This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Sparc exp(), expf() performance improvement


This PATCH is intended to improve exp() and expf() performance on Sparc.
These changes will only be active on Sparc platforms and only for
those platforms that support HWCAP_SPARC_CRYPTO (niagara4 and later).

Typical Performance Gains for new exp() code for Sparc

exp()
With -mcpu=niagara4,    8x to 14x (depending on input value)
Without -mcpu=niagara4, 5x to  8x (depending on input value)

expf()
With -mcpu=niagara4,    16x
Without -mcpu=niagara4, 15x

Some portion of these speedups are due to taking advantage of
specific features new to the niagara4 and later architectures.

Glibc correctness tests for exp() and expf() were run. Within the
test suite 3 input values were found to cause 1 bit differences (ulp)
when "FE_TONEAREST" rounding mode is set. No differences were
seen for the tested values for the other rounding modes.
Typical example:
exp(-0x1.760cd2p+0)  (-1.46113312244415283203125)
 new code:    2.31973271630014299393707e-01   0x1.db14cd799387ap-3
 old code:    2.31973271630014271638132e-01   0x1.db14cd7993879p-3
    exp    =  2.31973271630014285508337 (high precision)
Old delta: off by 0.49 ulp
New delta: off by 0.51 ulp
This is a difference of 0.02 ulp which is well within the 1 ulp requirement
although the test function reports the difference as a failure.

For expf(), no differences in the test suite were seen in
FE_TONEAREST, FE_DOWNWARD, FE_TOWARDZERO. Two differences were
seen with FE_UPWARD. For both cases, the precise value was less
than 0.00005 less than the value reported by the new code.
The difference of the old and new was 1 ulp.
Typical example:
Failure: Test: exp_upward (-0x1p-20)
Result:
 new code:  9.999990463256e-01 0x1.ffffe0p-1
 old code:  9.999991059303e-01 0x1.ffffe2p-1
precise     9.999990463261e-01 (high precision) (using bc -l at scale=60)

Changes to be committed:
    (use "git reset HEAD <file>..." to unstage)

    * sysdeps/sparc/configure.ac: manage -mcpu=niagara4 test
    * sysdeps/sparc/configure: Regenerate
    * sysdeps/sparc/fpu/libm_endian.h: New file
    * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile:
        add e_exp*-niagara4.c functions and -mcpu=niagara4 option
    * sysdeps/sparc/sparc64/fpu/multiarch/Makefile: Likewise

    * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_exp-niagara4.c: New file
    * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_expf-niagara4.c: New file
    * sysdeps/sparc/sparc64/fpu/multiarch/e_exp-niagara4.c: New file
    * sysdeps/sparc/sparc64/fpu/multiarch/e_expf-niagara4.c: New file

---
 sysdeps/sparc/configure                            |   24 +
 sysdeps/sparc/configure.ac                         |   12 +
 sysdeps/sparc/fpu/libm_endian.h                    |   32 ++
 .../sparc/sparc32/sparcv9/fpu/multiarch/Makefile   |    9 +-
 .../sparc32/sparcv9/fpu/multiarch/e_exp-niagara4.c |    1 +
 .../sparcv9/fpu/multiarch/e_expf-niagara4.c        |    1 +
 sysdeps/sparc/sparc64/fpu/multiarch/Makefile       |    9 +-
 .../sparc/sparc64/fpu/multiarch/e_exp-niagara4.c   |  433 +++++++++++++++++++
 .../sparc/sparc64/fpu/multiarch/e_expf-niagara4.c  |  445 ++++++++++++++++++++
 9 files changed, 964 insertions(+), 2 deletions(-)
 create mode 100644 sysdeps/sparc/fpu/libm_endian.h
 create mode 100644 sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_exp-niagara4.c
 create mode 100644 sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_expf-niagara4.c
 create mode 100644 sysdeps/sparc/sparc64/fpu/multiarch/e_exp-niagara4.c
 create mode 100644 sysdeps/sparc/sparc64/fpu/multiarch/e_expf-niagara4.c

diff --git a/sysdeps/sparc/configure b/sysdeps/sparc/configure
index 90a86f6..cf4d567 100644
--- a/sysdeps/sparc/configure
+++ b/sysdeps/sparc/configure
@@ -81,3 +81,27 @@ if test $libc_cv_sparc_gcc_gotdata = yes; then
   $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
 
 fi
+
+# Check for GCC support of -mcpu=niagara4
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for niagara4 compiler support" >&5
+$as_echo_n "checking for niagara4 compiler support... " >&6; }
+if ${libc_cv_sparc_niagara4+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  echo > conftest.S
+if { ac_try='${CC-cc} -mcpu=niagara4 conftest.S -S'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+  libc_cv_sparc_niagara4=yes
+else
+  libc_cv_sparc_niagara4=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_sparc_niagara4" >&5
+$as_echo "$libc_cv_sparc_niagara4" >&6; }
+config_vars="$config_vars
+sparc_n4flags = $libc_cv_sparc_niagara4"
diff --git a/sysdeps/sparc/configure.ac b/sysdeps/sparc/configure.ac
index 982077c..f9f5667 100644
--- a/sysdeps/sparc/configure.ac
+++ b/sysdeps/sparc/configure.ac
@@ -57,3 +57,15 @@ fi
 if test $libc_cv_sparc_gcc_gotdata = yes; then
   AC_DEFINE(PI_STATIC_AND_HIDDEN)
 fi
+
+# Check for GCC support of -mcpu=niagara4
+AC_CACHE_CHECK(for niagara4 compiler support, libc_cv_sparc_niagara4, [dnl
+echo > conftest.S
+dnl
+if AC_TRY_COMMAND([${CC-cc} -mcpu=niagara4 conftest.S -S]); then
+  libc_cv_sparc_niagara4=yes
+else
+  libc_cv_sparc_niagara4=no
+fi
+rm -f conftest*])
+LIBC_CONFIG_VAR([sparc_n4flags], [$libc_cv_sparc_niagara4])
diff --git a/sysdeps/sparc/fpu/libm_endian.h b/sysdeps/sparc/fpu/libm_endian.h
new file mode 100644
index 0000000..a49abcd
--- /dev/null
+++ b/sysdeps/sparc/fpu/libm_endian.h
@@ -0,0 +1,32 @@
+/*
+   Contributed by Oracle Inc.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _LIBM_ENDIAN_H
+#define	_LIBM_ENDIAN_H
+
+#define	HIWORD		0
+#define	LOWORD		1
+#define	HIXWORD		0		/* index of int containing exponent */
+#define	XSGNMSK		0x80000000	/* exponent bit mask within the int */
+/* XBIASED_EXP(x) must be an int expression; so ~0x80000000 is no good */
+#define	XBIASED_EXP(x)	((((int *)&x)[HIXWORD] & 0x7fffffff) >> 16)
+#define	ISZEROL(x)	(((((int *)&x)[0] & ~XSGNMSK) | ((int *)&x)[1] | \
+				((int *)&x)[2] | ((int *)&x)[3]) == 0)
+
+#endif	/* !defined(_LIBM_ENDIAN_H) */
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile
index 322e300..2d5bc0b 100644
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile
@@ -1,4 +1,5 @@
 ifeq ($(subdir),math)
+libm-sysdep_routines += e_exp-niagara4 e_expf-niagara4
 ifeq ($(have-as-vis3),yes)
 libm-sysdep_routines += m_copysignf-vis3 m_copysign-vis3 s_fabs-vis3 \
 			s_fabsf-vis3 s_llrintf-vis3 s_llrint-vis3 \
@@ -7,8 +8,14 @@ libm-sysdep_routines += m_copysignf-vis3 m_copysign-vis3 s_fabs-vis3 \
 			s_fmaf-vis3 s_fma-vis3 s_nearbyint-vis3 \
 			s_nearbyintf-vis3 s_fdimf-vis3 s_fdim-vis3
 sysdep_routines += s_copysignf-vis3 s_copysign-vis3
-
 CFLAGS-s_fdimf-vis3.c += -Wa,-Av9d -mvis3
 CFLAGS-s_fdim-vis3.c += -Wa,-Av9d -mvis3
 endif
+ifeq ($(sparc_n4flags),yes)
+N4_CFLAGS = -mcpu=niagara4
+else
+N4_CFLAGS =
+endif
+CFLAGS-e_exp-niagara4.c += ${N4_CFLAGS}
+CFLAGS-e_expf-niagara4.c += ${N4_CFLAGS}
 endif
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_exp-niagara4.c b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_exp-niagara4.c
new file mode 100644
index 0000000..fc29e48
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_exp-niagara4.c
@@ -0,0 +1 @@
+#include <sparc64/fpu/multiarch/e_exp-niagara4.c>
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_expf-niagara4.c b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_expf-niagara4.c
new file mode 100644
index 0000000..2cd92c2
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/e_expf-niagara4.c
@@ -0,0 +1 @@
+#include <sparc64/fpu/multiarch/e_expf-niagara4.c>
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/Makefile b/sysdeps/sparc/sparc64/fpu/multiarch/Makefile
index 03a271d..6379424 100644
--- a/sysdeps/sparc/sparc64/fpu/multiarch/Makefile
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/Makefile
@@ -1,4 +1,5 @@
 ifeq ($(subdir),math)
+libm-sysdep_routines +=  e_exp-niagara4 e_expf-niagara4
 ifeq ($(have-as-vis3),yes)
 libm-sysdep_routines += m_signbitf-vis3 m_signbit-vis3 m_finitef-vis3 \
 			m_finite-vis3 m_isinff-vis3 m_isinf-vis3 \
@@ -11,7 +12,6 @@ libm-sysdep_routines += m_signbitf-vis3 m_signbit-vis3 m_finitef-vis3 \
 sysdep_routines += s_signbitf-vis3 s_signbit-vis3 s_finitef-vis3 \
 		   s_finite-vis3 s_isinff-vis3 s_isinf-vis3 \
 		   s_isnanf-vis3 s_isnan-vis3
-
 CFLAGS-s_ceilf-vis3.c += -Wa,-Av9d -mvis3
 CFLAGS-s_ceil-vis3.c += -Wa,-Av9d -mvis3
 CFLAGS-s_floorf-vis3.c += -Wa,-Av9d -mvis3
@@ -19,4 +19,11 @@ CFLAGS-s_floor-vis3.c += -Wa,-Av9d -mvis3
 CFLAGS-s_truncf-vis3.c += -Wa,-Av9d -mvis3
 CFLAGS-s_trunc-vis3.c += -Wa,-Av9d -mvis3
 endif
+ifeq ($(sparc_n4flags),yes)
+N4_CFLAGS = -mcpu=niagara4
+else
+N4_CFLAGS =
+endif
+CFLAGS-e_exp-niagara4.c += ${N4_CFLAGS}
+CFLAGS-e_expf-niagara4.c += ${N4_CFLAGS}
 endif
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/e_exp-niagara4.c b/sysdeps/sparc/sparc64/fpu/multiarch/e_exp-niagara4.c
new file mode 100644
index 0000000..a179668
--- /dev/null
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/e_exp-niagara4.c
@@ -0,0 +1,433 @@
+/* EXP function
+   Contributed by Oracle Inc.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * exp(x)
+ * Hybrid algorithm of Peter Tang's Table driven method (for large
+ * arguments) and an accurate table (for small arguments).
+ * Written by K.C. Ng, November 1988.
+ * Method (large arguments):
+ *	1. Argument Reduction: given the input x, find r and integer k
+ *	   and j such that
+ *	             x = (k+j/32)*(ln2) + r,  |r| <= (1/64)*ln2
+ *
+ *	2. exp(x) = 2^k * (2^(j/32) + 2^(j/32)*expm1(r))
+ *	   a. expm1(r) is approximated by a polynomial:
+ *	      expm1(r) ~ r + t1*r^2 + t2*r^3 + ... + t5*r^6
+ *	      Here t1 = 1/2 exactly.
+ *	   b. 2^(j/32) is represented to twice double precision
+ *	      as TBL[2j]+TBL[2j+1].
+ *
+ * Note: If divide were fast enough, we could use another approximation
+ *	 in 2.a:
+ *	      expm1(r) ~ (2r)/(2-R), R = r - r^2*(t1 + t2*r^2)
+ *	      (for the same t1 and t2 as above)
+ *
+ * Special cases:
+ *	exp(INF) is INF, exp(NaN) is NaN;
+ *	exp(-INF)=  0;
+ *	for finite argument, only exp(0)=1 is exact.
+ *
+ * Accuracy:
+ *	According to an error analysis, the error is always less than
+ *	an ulp (unit in the last place).  The largest errors observed
+ *	are less than 0.55 ulp for normal results and less than 0.75 ulp
+ *	for subnormal results.
+ *
+ * Misc. info.
+ *	For IEEE double
+ *		if x >  7.09782712893383973096e+02 then exp(x) overflow
+ *		if x < -7.45133219101941108420e+02 then exp(x) underflow
+ */
+
+/* #pragma weak exp = __exp_niagara4 */
+#include <sparc-ifunc.h>
+#include "libm_endian.h"
+#include <math.h>
+#include <math_private.h>
+#include <errno.h>
+
+extern double exp (double);
+extern double __exp_niagara4 (double);
+extern double __ieee754_exp (double);
+
+sparc_libm_ifunc (exp, hwcap & HWCAP_SPARC_CRYPTO ?
+		  __exp_niagara4 : __ieee754_exp);
+
+
+static const double TBL[] = {
+  1.00000000000000000000e+00, 0.00000000000000000000e+00,
+  1.02189714865411662714e+00, 5.10922502897344389359e-17,
+  1.04427378242741375480e+00, 8.55188970553796365958e-17,
+  1.06714040067682369717e+00, -7.89985396684158212226e-17,
+  1.09050773266525768967e+00, -3.04678207981247114697e-17,
+  1.11438674259589243221e+00, 1.04102784568455709549e-16,
+  1.13878863475669156458e+00, 8.91281267602540777782e-17,
+  1.16372485877757747552e+00, 3.82920483692409349872e-17,
+  1.18920711500272102690e+00, 3.98201523146564611098e-17,
+  1.21524735998046895524e+00, -7.71263069268148813091e-17,
+  1.24185781207348400201e+00, 4.65802759183693679123e-17,
+  1.26905095719173321989e+00, 2.66793213134218609523e-18,
+  1.29683955465100964055e+00, 2.53825027948883149593e-17,
+  1.32523664315974132322e+00, -2.85873121003886075697e-17,
+  1.35425554693689265129e+00, 7.70094837980298946162e-17,
+  1.38390988196383202258e+00, -6.77051165879478628716e-17,
+  1.41421356237309514547e+00, -9.66729331345291345105e-17,
+  1.44518080697704665027e+00, -3.02375813499398731940e-17,
+  1.47682614593949934623e+00, -3.48399455689279579579e-17,
+  1.50916442759342284141e+00, -1.01645532775429503911e-16,
+  1.54221082540794074411e+00, 7.94983480969762085616e-17,
+  1.57598084510788649659e+00, -1.01369164712783039808e-17,
+  1.61049033194925428347e+00, 2.47071925697978878522e-17,
+  1.64575547815396494578e+00, -1.01256799136747726038e-16,
+  1.68179283050742900407e+00, 8.19901002058149652013e-17,
+  1.71861929812247793414e+00, -1.85138041826311098821e-17,
+  1.75625216037329945351e+00, 2.96014069544887330703e-17,
+  1.79470907500310716820e+00, 1.82274584279120867698e-17,
+  1.83400808640934243066e+00, 3.28310722424562658722e-17,
+  1.87416763411029996256e+00, -6.12276341300414256164e-17,
+  1.91520656139714740007e+00, -1.06199460561959626376e-16,
+  1.95714412417540017941e+00, 8.96076779103666776760e-17,
+};
+
+/*
+ * For i = 0, ..., 66,
+ *   TBL2[2*i] is a double precision number near (i+1)*2^-6, and
+ *   TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
+ *   than 2^-60.
+ *
+ * For i = 67, ..., 133,
+ *   TBL2[2*i] is a double precision number near -(i+1)*2^-6, and
+ *   TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
+ *   than 2^-60.
+ */
+static const double TBL2[] = {
+  1.56249999999984491572e-02, 1.01574770858668417262e+00,
+  3.12499999999998716305e-02, 1.03174340749910253834e+00,
+  4.68750000000011102230e-02, 1.04799100201663386578e+00,
+  6.24999999999990632493e-02, 1.06449445891785843266e+00,
+  7.81249999999999444888e-02, 1.08125780744903954300e+00,
+  9.37500000000013322676e-02, 1.09828514030782731226e+00,
+  1.09375000000001346145e-01, 1.11558061464248226002e+00,
+  1.24999999999999417133e-01, 1.13314845306682565607e+00,
+  1.40624999999995337063e-01, 1.15099294469117108264e+00,
+  1.56249999999996141975e-01, 1.16911844616949989195e+00,
+  1.71874999999992894573e-01, 1.18752938276309216725e+00,
+  1.87500000000000888178e-01, 1.20623024942098178158e+00,
+  2.03124999999361649516e-01, 1.22522561187652545556e+00,
+  2.18750000000000416334e-01, 1.24452010776609567344e+00,
+  2.34375000000003524958e-01, 1.26411844775347081971e+00,
+  2.50000000000006328271e-01, 1.28402541668774961003e+00,
+  2.65624999999982791543e-01, 1.30424587476761533189e+00,
+  2.81249999999993727240e-01, 1.32478475872885725906e+00,
+  2.96875000000003275158e-01, 1.34564708304941493822e+00,
+  3.12500000000002886580e-01, 1.36683794117380030819e+00,
+  3.28124999999993394173e-01, 1.38836250675661765364e+00,
+  3.43749999999998612221e-01, 1.41022603492570874906e+00,
+  3.59374999999992450483e-01, 1.43243386356506730017e+00,
+  3.74999999999991395772e-01, 1.45499141461818881638e+00,
+  3.90624999999997613020e-01, 1.47790419541173490003e+00,
+  4.06249999999991895372e-01, 1.50117780000011058483e+00,
+  4.21874999999996613820e-01, 1.52481791053132154090e+00,
+  4.37500000000004607426e-01, 1.54883029863414023453e+00,
+  4.53125000000004274359e-01, 1.57322082682725961078e+00,
+  4.68750000000008326673e-01, 1.59799544995064657371e+00,
+  4.84374999999985456078e-01, 1.62316021661928200359e+00,
+  4.99999999999997335465e-01, 1.64872127070012375327e+00,
+  5.15625000000000222045e-01, 1.67468485281178436352e+00,
+  5.31250000000003441691e-01, 1.70105730184840653330e+00,
+  5.46874999999999111822e-01, 1.72784505652716169344e+00,
+  5.62499999999999333866e-01, 1.75505465696029738787e+00,
+  5.78124999999993338662e-01, 1.78269274625180318417e+00,
+  5.93749999999999666933e-01, 1.81076607211938656050e+00,
+  6.09375000000003441691e-01, 1.83928148854178719063e+00,
+  6.24999999999995559108e-01, 1.86824595743221411048e+00,
+  6.40625000000009103829e-01, 1.89766655033813602671e+00,
+  6.56249999999993782751e-01, 1.92755045016753268072e+00,
+  6.71875000000002109424e-01, 1.95790495294292221651e+00,
+  6.87499999999992450483e-01, 1.98873746958227681780e+00,
+  7.03125000000004996004e-01, 2.02005552770870666635e+00,
+  7.18750000000007105427e-01, 2.05186677348799140219e+00,
+  7.34375000000008770762e-01, 2.08417897349558689513e+00,
+  7.49999999999983901766e-01, 2.11700001661264058939e+00,
+  7.65624999999997002398e-01, 2.15033791595229351046e+00,
+  7.81250000000005884182e-01, 2.18420081081563077774e+00,
+  7.96874999999991451283e-01, 2.21859696867912603579e+00,
+  8.12500000000000000000e-01, 2.25353478721320854561e+00,
+  8.28125000000008215650e-01, 2.28902279633221983346e+00,
+  8.43749999999997890576e-01, 2.32506966027711614586e+00,
+  8.59374999999999444888e-01, 2.36168417973090827289e+00,
+  8.75000000000003219647e-01, 2.39887529396710563745e+00,
+  8.90625000000013433699e-01, 2.43665208303232461162e+00,
+  9.06249999999980571097e-01, 2.47502376996297712708e+00,
+  9.21874999999984456878e-01, 2.51399972303748420188e+00,
+  9.37500000000001887379e-01, 2.55358945806293169412e+00,
+  9.53125000000003330669e-01, 2.59380264069854327147e+00,
+  9.68749999999989119814e-01, 2.63464908881560244680e+00,
+  9.84374999999997890576e-01, 2.67613877489447116176e+00,
+  1.00000000000001154632e+00, 2.71828182845907662113e+00,
+  1.01562499999999333866e+00, 2.76108853855008318234e+00,
+  1.03124999999995980993e+00, 2.80456935623711389738e+00,
+  1.04687499999999933387e+00, 2.84873489717039740654e+00,
+  -1.56249999999999514277e-02, 9.84496437005408453480e-01,
+  -3.12499999999955972718e-02, 9.69233234476348348707e-01,
+  -4.68749999999993824384e-02, 9.54206665969188905230e-01,
+  -6.24999999999976130205e-02, 9.39413062813478028090e-01,
+  -7.81249999999989314103e-02, 9.24848813216205822840e-01,
+  -9.37499999999995975442e-02, 9.10510361380034494161e-01,
+  -1.09374999999998584466e-01, 8.96394206635151680196e-01,
+  -1.24999999999998556710e-01, 8.82496902584596676355e-01,
+  -1.40624999999999361622e-01, 8.68815056262843721235e-01,
+  -1.56249999999999111822e-01, 8.55345327307423297647e-01,
+  -1.71874999999924144012e-01, 8.42084427143446223596e-01,
+  -1.87499999999996752598e-01, 8.29029118180403035154e-01,
+  -2.03124999999988037347e-01, 8.16176213022349550386e-01,
+  -2.18749999999995947686e-01, 8.03522573689063990265e-01,
+  -2.34374999999996419531e-01, 7.91065110850298847112e-01,
+  -2.49999999999996280753e-01, 7.78800783071407765057e-01,
+  -2.65624999999999888978e-01, 7.66726596070820165529e-01,
+  -2.81249999999989397370e-01, 7.54839601989015340777e-01,
+  -2.96874999999996114219e-01, 7.43136898668761203268e-01,
+  -3.12499999999999555911e-01, 7.31615628946642115871e-01,
+  -3.28124999999993782751e-01, 7.20272979955444259126e-01,
+  -3.43749999999997946087e-01, 7.09106182437399867879e-01,
+  -3.59374999999994337863e-01, 6.98112510068129799023e-01,
+  -3.74999999999994615418e-01, 6.87289278790975899369e-01,
+  -3.90624999999999000799e-01, 6.76633846161729612945e-01,
+  -4.06249999999947264406e-01, 6.66143610703522903727e-01,
+  -4.21874999999988453681e-01, 6.55816011271509125002e-01,
+  -4.37499999999999111822e-01, 6.45648526427892610613e-01,
+  -4.53124999999999278355e-01, 6.35638673826052436056e-01,
+  -4.68749999999999278355e-01, 6.25784009604591573428e-01,
+  -4.84374999999992894573e-01, 6.16082127790682609891e-01,
+  -4.99999999999998168132e-01, 6.06530659712634534486e-01,
+  -5.15625000000000000000e-01, 5.97127273421627413619e-01,
+  -5.31249999999989785948e-01, 5.87869673122352498496e-01,
+  -5.46874999999972688514e-01, 5.78755598612500032907e-01,
+  -5.62500000000000000000e-01, 5.69782824730923009859e-01,
+  -5.78124999999992339461e-01, 5.60949160814475100700e-01,
+  -5.93749999999948707696e-01, 5.52252450163048691500e-01,
+  -6.09374999999552580121e-01, 5.43690569513243682209e-01,
+  -6.24999999999984789945e-01, 5.35261428518998383375e-01,
+  -6.40624999999983457677e-01, 5.26962969243379708573e-01,
+  -6.56249999999998334665e-01, 5.18793165653890220312e-01,
+  -6.71874999999943378626e-01, 5.10750023129039609771e-01,
+  -6.87499999999997002398e-01, 5.02831577970942467104e-01,
+  -7.03124999999991118216e-01, 4.95035896926202978463e-01,
+  -7.18749999999991340260e-01, 4.87361076713623331269e-01,
+  -7.34374999999985678123e-01, 4.79805243559684402310e-01,
+  -7.49999999999997335465e-01, 4.72366552741015965911e-01,
+  -7.65624999999993782751e-01, 4.65043188134059204408e-01,
+  -7.81249999999863220523e-01, 4.57833361771676883301e-01,
+  -7.96874999999998112621e-01, 4.50735313406363247157e-01,
+  -8.12499999999990119015e-01, 4.43747310081084256339e-01,
+  -8.28124999999996003197e-01, 4.36867645705559026759e-01,
+  -8.43749999999988120614e-01, 4.30094640640067360504e-01,
+  -8.59374999999994115818e-01, 4.23426641285265303871e-01,
+  -8.74999999999977129406e-01, 4.16862019678517936594e-01,
+  -8.90624999999983346655e-01, 4.10399173096376801428e-01,
+  -9.06249999999991784350e-01, 4.04036523663345414903e-01,
+  -9.21874999999994004796e-01, 3.97772517966614058693e-01,
+  -9.37499999999994337863e-01, 3.91605626676801210628e-01,
+  -9.53124999999999444888e-01, 3.85534344174578935682e-01,
+  -9.68749999999986677324e-01, 3.79557188183094640355e-01,
+  -9.84374999999992339461e-01, 3.73672699406045860648e-01,
+  -9.99999999999995892175e-01, 3.67879441171443832825e-01,
+  -1.01562499999994315658e+00, 3.62175999080846300338e-01,
+  -1.03124999999991096011e+00, 3.56560980663978732697e-01,
+  -1.04687499999999067413e+00, 3.51033015038813400732e-01,
+};
+
+static const double zC[] = {
+  0.5,
+  4.61662413084468283841e+01,	/* 0x40471547, 0x652b82fe */
+  2.16608493865351192653e-02,	/* 0x3f962e42, 0xfee00000 */
+  5.96317165397058656257e-12,	/* 0x3d9a39ef, 0x35793c76 */
+  1.6666666666526086527e-1,	/* 3fc5555555548f7c */
+  4.1666666666226079285e-2,	/* 3fa5555555545d4e */
+  8.3333679843421958056e-3,	/* 3f811115b7aa905e */
+  1.3888949086377719040e-3,	/* 3f56c1728d739765 */
+  1.0,
+  0.0,
+  7.09782712893383973096e+02,	/* 0x40862E42, 0xFEFA39EF */
+  7.45133219101941108420e+02,	/* 0x40874910, 0xD52D3051 */
+  5.55111512312578270212e-17,	/* 0x3c900000, 0x00000000 */
+  1.0e-100,
+  1.0e-300,
+  1.0e300
+};
+
+#ifndef DBL_MIN
+#define DBL_MIN 2.2250738585072014E-308
+#endif
+
+#define	half		zC[0]
+#define	invln2_32	zC[1]
+#define	ln2_32hi	zC[2]
+#define	ln2_32lo	zC[3]
+#define	t2		zC[4]
+#define	t3		zC[5]
+#define	t4		zC[6]
+#define	t5		zC[7]
+#define	one		zC[8]
+#define	zero		zC[9]
+#define	threshold1	zC[10]
+#define	threshold2	zC[11]
+#define	twom54		zC[12]
+#define	small		zC[13]
+#define	tiny		zC[14]
+#define	hhuge		zC[15]
+
+
+void
+__set_err_exp (double x, double result)
+{
+  if (_LIB_VERSION != _IEEE_)
+    {
+      if (_LIB_VERSION == _POSIX_)
+	{
+	  __set_errno (ERANGE);
+	}
+      else
+	{
+	  struct exception exc;
+	  exc.arg1 = x;
+	  exc.type = DOMAIN;
+	  exc.name = (char *) "exp";
+	  exc.retval = result;
+	  if (!matherr (&exc))
+	    {
+	      __set_errno (ERANGE);
+	    }
+	}
+    }
+}
+
+
+double
+__exp_niagara4 (double x_arg)
+{
+  double z, t;
+  double retval;
+  int hx, ix, k, j, m;
+  union
+  {
+    int i_part[2];
+    double x;
+  } xx;
+  union
+  {
+    int y_part[2];
+    double y;
+  } yy;
+  xx.x = x_arg;
+
+  ix = xx.i_part[HIWORD];
+  hx = ix & ~0x80000000;
+
+  if (hx < 0x3ff0a2b2)
+    {				/* |x| < 3/2 ln 2 */
+      if (hx < 0x3f862e42)
+	{			/* |x| < 1/64 ln 2 */
+	  if (hx < 0x3ed00000)
+	    {			/* |x| < 2^-18 */
+	      /* raise inexact if x != 0 */
+	      if (hx < 0x3e300000)
+		{
+		  retval = one + xx.x;
+		  return (retval);
+		}
+	      retval = one + xx.x * (one + half * xx.x);
+	      return (retval);
+	    }
+	  t = xx.x * xx.x;
+	  yy.y = xx.x + (t * (half + xx.x * t2) +
+			 (t * t) * (t3 + xx.x * t4 + t * t5));
+	  retval = one + yy.y;
+	  return (retval);
+	}
+
+      /* find the multiple of 2^-6 nearest x */
+      k = hx >> 20;
+      j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k);
+      j = (j - 1) & ~1;
+      if (ix < 0)
+	j += 134;
+      z = xx.x - TBL2[j];
+      t = z * z;
+      /* the "small" term below guarantees inexact will be raised */
+      yy.y = z + (t * (half + (z * t2 + small)) +
+		  (t * t) * (t3 + z * t4 + t * t5));
+      retval = TBL2[j + 1] + TBL2[j + 1] * yy.y;
+      return (retval);
+    }
+
+  if (hx >= 0x40862e42)
+    {				/* x is large, infinite, or nan */
+      if (hx >= 0x7ff00000)
+	{
+	  if (ix == 0xfff00000 && xx.i_part[LOWORD] == 0)
+	    return (zero);	/* exp(-inf) = 0 */
+	  return (xx.x * xx.x);	/* exp(nan/inf) is nan or inf */
+	}
+      if (xx.x > threshold1)
+	{			/* set overflow error condition */
+	  retval = hhuge * hhuge;
+	  __set_err_exp (xx.x, retval);
+	  return retval;
+	}
+      if (-xx.x > threshold2)
+	{			/* set underflow error condition */
+	  double force_underflow = tiny * tiny;
+	  math_force_eval (force_underflow);
+	  retval = zero;
+	  __set_err_exp (xx.x, retval);
+	  return retval;
+	}
+    }
+
+  t = invln2_32 * xx.x;
+  if (ix < 0)
+    t -= half;
+  else
+    t += half;
+  k = (int) t;
+  j = (k & 0x1f) << 1;
+  m = k >> 5;
+  z = (xx.x - k * ln2_32hi) - k * ln2_32lo;
+
+  /* z is now in primary range */
+  t = z * z;
+  yy.y = z + (t * (half + z * t2) + (t * t) * (t3 + z * t4 + t * t5));
+  yy.y = TBL[j] + (TBL[j + 1] + TBL[j] * yy.y);
+  if (m < -1021)
+    {
+      yy.y_part[HIWORD] += (m + 54) << 20;
+      retval = twom54 * yy.y;
+      if (retval < DBL_MIN)
+	{
+	  double force_underflow = tiny * tiny;
+	  math_force_eval (force_underflow);
+	  __set_err_exp (xx.x, retval);
+	}
+      return (retval);
+    }
+  yy.y_part[HIWORD] += m << 20;
+  return (yy.y);
+}
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/e_expf-niagara4.c b/sysdeps/sparc/sparc64/fpu/multiarch/e_expf-niagara4.c
new file mode 100644
index 0000000..ba0b1e7
--- /dev/null
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/e_expf-niagara4.c
@@ -0,0 +1,445 @@
+/* EXPF function
+   Contributed by Oracle Inc.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* INDENT OFF */
+/*
+ * float expf(float x);
+ * Code by K.C. Ng for SUN 5.0 libmopt
+ * 11/5/99
+ * Method :
+ *	1. For |x| >= 2^7, either underflow/overflow.
+ *	   More precisely:
+ *		x > 88.722839355...(0x42B17218) => overflow;
+ *		x < -103.97207642..(0xc2CFF1B4) => underflow.
+ *	2. For |x| <  2^-6, use polynomail
+ *		exp(x) = 1 + x + p1*x^2 + p2*x^3
+ *      3. Otherwise, write |x|=(1+r)*2^n, where 0<=r<1.
+ *	   Let t = 2^n * (1+r) .... x > 0;
+ *	       t = 2^n * (1-r) .... x < 0. (x= -2**(n+1)+t)
+ *	   Since -6 <= n <= 6, we may break t into
+ *	   six 6-bits chunks:
+ *                    -5     -11     -17     -23     -29
+ *         t=j *2+j *2  +j *2   +j *2   +j *2   +j *2
+ *            1    2      3       4       5       6
+ *
+ *	   where 0 <= j  < 64 for i = 1,...,6.
+ *		       i
+ *	   Note that since t has only 24 significant bits,
+ *	   either j  or j  must be 0.
+ *		   1     6
+ *					       7-6i
+ *	   One may define j  by   (int) ( t * 2     ) mod 64
+ *			   i
+ *	   mathematically. In actual implementation, they can
+ *	   be obtained by manipulating the exponent and
+ *	   mantissa bits as follow:
+ *		Let ix = (HEX(x)&0x007fffff)|0x00800000.
+ *		If n>=0, let ix=ix<<n, then j =0 and
+ *					     6
+ *		    j  = ix>>(30-6i)) mod 64  ...i=1,...,5
+ *		     i
+ *		Otherwise, let ix=ix<<(j+6), then j = 0 and
+ *						   1
+ *		    j  = ix>>(36-6i)) mod 64  ...i=2,...,6
+ *		     i
+ *
+ *	4. Compute exp(t) by table look-up method.
+ *	   Precompute ET[k] = exp(j*2^(7-6i)), k=j+64*(6-i).
+ *	   Then
+ *	   exp(t) = ET[j +320]*ET[j +256]*ET[j +192]*
+ *		        1          2          3
+ *
+ *		    ET[j +128]*ET[j +64]*ET[j ]
+ *			4          5         6
+ *
+ *				  n+1
+ *	5. If x < 0, return exp(-2   )* exp(t). Note that
+ *	   -6 <= n <= 6. Let k = n - 6, then we can
+ *	   precompute
+ *	                 k-5          n+1
+ *         EN[k] = exp(-2   ) = exp(-2   ) for k=0,1,...,12.
+ *
+ *
+ * Special cases:
+ *	exp(INF) is INF, exp(NaN) is NaN;
+ *	exp(-INF) = 0;
+ *	for finite argument, only exp(0) = 1 is exact.
+ *
+ * Accuracy:
+ *      All calculations are done in double precision except for
+ *      the case |x| < 2^-6.  When |x| < 2^-6, the error is less
+ *      than 0.55 ulp.  When |x| >= 2^-6, the error is less than
+ *      0.51 ulp.
+ */
+/* INDENT ON */
+
+/* #pragma weak expf = __expf_niagara4 */
+#include <sparc-ifunc.h>
+#include "libm_endian.h"
+#include <math.h>
+#include <errno.h>
+
+extern float expf (float);
+extern float __expf_niagara4 (float);
+extern float __ieee754_expf (float);
+
+sparc_libm_ifunc (expf, hwcap & HWCAP_SPARC_CRYPTO ?
+		  __expf_niagara4 : __ieee754_expf);
+
+
+void
+__set_err_expf (float x, float result)
+{
+  if (_LIB_VERSION != _IEEE_)
+    {
+      if (_LIB_VERSION == _POSIX_)
+	{
+	  __set_errno (ERANGE);
+	}
+      else
+	{
+	  struct exception exc;
+	  exc.arg1 = x;
+	  exc.type = DOMAIN;
+	  exc.name = (char *) "exp";
+	  exc.retval = result;
+	  if (!matherr (&exc))
+	    {
+	      __set_errno (ERANGE);
+	    }
+	}
+    }
+}
+
+
+/*
+ * ET[k] = exp(j*2^(7-6i)) , where j = k mod 64, i = k/64
+ */
+static const double ET[] = {
+  1.00000000000000000000e+00, 1.00000000186264514923e+00,
+  1.00000000372529029846e+00, 1.00000000558793544769e+00,
+  1.00000000745058059692e+00, 1.00000000931322574615e+00,
+  1.00000001117587089539e+00, 1.00000001303851604462e+00,
+  1.00000001490116119385e+00, 1.00000001676380656512e+00,
+  1.00000001862645171435e+00, 1.00000002048909686359e+00,
+  1.00000002235174201282e+00, 1.00000002421438716205e+00,
+  1.00000002607703253332e+00, 1.00000002793967768255e+00,
+  1.00000002980232283178e+00, 1.00000003166496798102e+00,
+  1.00000003352761335229e+00, 1.00000003539025850152e+00,
+  1.00000003725290365075e+00, 1.00000003911554879998e+00,
+  1.00000004097819417126e+00, 1.00000004284083932049e+00,
+  1.00000004470348446972e+00, 1.00000004656612984100e+00,
+  1.00000004842877499023e+00, 1.00000005029142036150e+00,
+  1.00000005215406551073e+00, 1.00000005401671088201e+00,
+  1.00000005587935603124e+00, 1.00000005774200140252e+00,
+  1.00000005960464655175e+00, 1.00000006146729192302e+00,
+  1.00000006332993707225e+00, 1.00000006519258244353e+00,
+  1.00000006705522759276e+00, 1.00000006891787296404e+00,
+  1.00000007078051811327e+00, 1.00000007264316348454e+00,
+  1.00000007450580863377e+00, 1.00000007636845400505e+00,
+  1.00000007823109937632e+00, 1.00000008009374452556e+00,
+  1.00000008195638989683e+00, 1.00000008381903526811e+00,
+  1.00000008568168063938e+00, 1.00000008754432578861e+00,
+  1.00000008940697115989e+00, 1.00000009126961653116e+00,
+  1.00000009313226190244e+00, 1.00000009499490705167e+00,
+  1.00000009685755242295e+00, 1.00000009872019779422e+00,
+  1.00000010058284316550e+00, 1.00000010244548853677e+00,
+  1.00000010430813368600e+00, 1.00000010617077905728e+00,
+  1.00000010803342442856e+00, 1.00000010989606979983e+00,
+  1.00000011175871517111e+00, 1.00000011362136054238e+00,
+  1.00000011548400591366e+00, 1.00000011734665128493e+00,
+  1.00000000000000000000e+00, 1.00000011920929665621e+00,
+  1.00000023841860752327e+00, 1.00000035762793260119e+00,
+  1.00000047683727188996e+00, 1.00000059604662538959e+00,
+  1.00000071525599310007e+00, 1.00000083446537502141e+00,
+  1.00000095367477115360e+00, 1.00000107288418149665e+00,
+  1.00000119209360605055e+00, 1.00000131130304481530e+00,
+  1.00000143051249779091e+00, 1.00000154972196497738e+00,
+  1.00000166893144637470e+00, 1.00000178814094198287e+00,
+  1.00000190735045180190e+00, 1.00000202655997583179e+00,
+  1.00000214576951407253e+00, 1.00000226497906652412e+00,
+  1.00000238418863318657e+00, 1.00000250339821405987e+00,
+  1.00000262260780914403e+00, 1.00000274181741843904e+00,
+  1.00000286102704194491e+00, 1.00000298023667966163e+00,
+  1.00000309944633158921e+00, 1.00000321865599772764e+00,
+  1.00000333786567807692e+00, 1.00000345707537263706e+00,
+  1.00000357628508140806e+00, 1.00000369549480438991e+00,
+  1.00000381470454158261e+00, 1.00000393391429298617e+00,
+  1.00000405312405860059e+00, 1.00000417233383842586e+00,
+  1.00000429154363246198e+00, 1.00000441075344070896e+00,
+  1.00000452996326316679e+00, 1.00000464917309983548e+00,
+  1.00000476838295071502e+00, 1.00000488759281580542e+00,
+  1.00000500680269510667e+00, 1.00000512601258861878e+00,
+  1.00000524522249634174e+00, 1.00000536443241827556e+00,
+  1.00000548364235442023e+00, 1.00000560285230477575e+00,
+  1.00000572206226934213e+00, 1.00000584127224811937e+00,
+  1.00000596048224110746e+00, 1.00000607969224830640e+00,
+  1.00000619890226971620e+00, 1.00000631811230533685e+00,
+  1.00000643732235516836e+00, 1.00000655653241921073e+00,
+  1.00000667574249746394e+00, 1.00000679495258992802e+00,
+  1.00000691416269660294e+00, 1.00000703337281748873e+00,
+  1.00000715258295258536e+00, 1.00000727179310189285e+00,
+  1.00000739100326541120e+00, 1.00000751021344314040e+00,
+  1.00000000000000000000e+00, 1.00000762942363508046e+00,
+  1.00001525890547848796e+00, 1.00002288844553022251e+00,
+  1.00003051804379095024e+00, 1.00003814770026133729e+00,
+  1.00004577741494138365e+00, 1.00005340718783175546e+00,
+  1.00006103701893311886e+00, 1.00006866690824547383e+00,
+  1.00007629685576948653e+00, 1.00008392686150582307e+00,
+  1.00009155692545448346e+00, 1.00009918704761613384e+00,
+  1.00010681722799144033e+00, 1.00011444746658040295e+00,
+  1.00012207776338368781e+00, 1.00012970811840196106e+00,
+  1.00013733853163522269e+00, 1.00014496900308413885e+00,
+  1.00015259953274937565e+00, 1.00016023012063093311e+00,
+  1.00016786076672947736e+00, 1.00017549147104567453e+00,
+  1.00018312223357952462e+00, 1.00019075305433191581e+00,
+  1.00019838393330284809e+00, 1.00020601487049298761e+00,
+  1.00021364586590300050e+00, 1.00022127691953288675e+00,
+  1.00022890803138353455e+00, 1.00023653920145494389e+00,
+  1.00024417042974778091e+00, 1.00025180171626271175e+00,
+  1.00025943306099973640e+00, 1.00026706446395974304e+00,
+  1.00027469592514273167e+00, 1.00028232744454959047e+00,
+  1.00028995902218031944e+00, 1.00029759065803558471e+00,
+  1.00030522235211605242e+00, 1.00031285410442172257e+00,
+  1.00032048591495348333e+00, 1.00032811778371155675e+00,
+  1.00033574971069616488e+00, 1.00034338169590819589e+00,
+  1.00035101373934764979e+00, 1.00035864584101541475e+00,
+  1.00036627800091149076e+00, 1.00037391021903676602e+00,
+  1.00038154249539146257e+00, 1.00038917482997580244e+00,
+  1.00039680722279067382e+00, 1.00040443967383629875e+00,
+  1.00041207218311289928e+00, 1.00041970475062136359e+00,
+  1.00042733737636191371e+00, 1.00043497006033499375e+00,
+  1.00044260280254104778e+00, 1.00045023560298029786e+00,
+  1.00045786846165363215e+00, 1.00046550137856127272e+00,
+  1.00047313435370366363e+00, 1.00048076738708124900e+00,
+  1.00000000000000000000e+00, 1.00048840047869447289e+00,
+  1.00097703949241645383e+00, 1.00146591715766675179e+00,
+  1.00195503359100279717e+00, 1.00244438890903908579e+00,
+  1.00293398322844673487e+00, 1.00342381666595459322e+00,
+  1.00391388933834746489e+00, 1.00440420136246855165e+00,
+  1.00489475285521656645e+00, 1.00538554393354861993e+00,
+  1.00587657471447822211e+00, 1.00636784531507639251e+00,
+  1.00685935585247099411e+00, 1.00735110644384739942e+00,
+  1.00784309720644804642e+00, 1.00833532825757243856e+00,
+  1.00882779971457803292e+00, 1.00932051169487890796e+00,
+  1.00981346431594687374e+00, 1.01030665769531102782e+00,
+  1.01080009195055753324e+00, 1.01129376719933050666e+00,
+  1.01178768355933157430e+00, 1.01228184114831898377e+00,
+  1.01277624008410960244e+00, 1.01327088048457714109e+00,
+  1.01376576246765282008e+00, 1.01426088615132625748e+00,
+  1.01475625165364347069e+00, 1.01525185909270931894e+00,
+  1.01574770858668572693e+00, 1.01624380025379235093e+00,
+  1.01674013421230657883e+00, 1.01723671058056375216e+00,
+  1.01773352947695694404e+00, 1.01823059101993673714e+00,
+  1.01872789532801233392e+00, 1.01922544251975000229e+00,
+  1.01972323271377418585e+00, 1.02022126602876750390e+00,
+  1.02071954258347008526e+00, 1.02121806249668067856e+00,
+  1.02171682588725554197e+00, 1.02221583287410910934e+00,
+  1.02271508357621376817e+00, 1.02321457811260052573e+00,
+  1.02371431660235789884e+00, 1.02421429916463280207e+00,
+  1.02471452591863054771e+00, 1.02521499698361440167e+00,
+  1.02571571247890602763e+00, 1.02621667252388526492e+00,
+  1.02671787723799012859e+00, 1.02721932674071725344e+00,
+  1.02772102115162167202e+00, 1.02822296059031659254e+00,
+  1.02872514517647339893e+00, 1.02922757502982276101e+00,
+  1.02973025027015285815e+00, 1.03023317101731093359e+00,
+  1.03073633739120262831e+00, 1.03123974951179242510e+00,
+  1.00000000000000000000e+00, 1.03174340749910276038e+00,
+  1.06449445891785954288e+00, 1.09828514030782575794e+00,
+  1.13314845306682632220e+00, 1.16911844616950433284e+00,
+  1.20623024942098067136e+00, 1.24452010776609522935e+00,
+  1.28402541668774139438e+00, 1.32478475872886569675e+00,
+  1.36683794117379631139e+00, 1.41022603492571074746e+00,
+  1.45499141461820125087e+00, 1.50117780000012279729e+00,
+  1.54883029863413312910e+00, 1.59799544995063325104e+00,
+  1.64872127070012819416e+00, 1.70105730184840076014e+00,
+  1.75505465696029849809e+00, 1.81076607211938722664e+00,
+  1.86824595743222232613e+00, 1.92755045016754467113e+00,
+  1.98873746958229191684e+00, 2.05186677348797674725e+00,
+  2.11700001661267478426e+00, 2.18420081081561789915e+00,
+  2.25353478721320854561e+00, 2.32506966027712103084e+00,
+  2.39887529396709808793e+00, 2.47502376996302508871e+00,
+  2.55358945806292680913e+00, 2.63464908881563086851e+00,
+  2.71828182845904553488e+00, 2.80456935623722669604e+00,
+  2.89359594417176113623e+00, 2.98544853936535581340e+00,
+  3.08021684891803104733e+00, 3.17799342753883840018e+00,
+  3.27887376793867346692e+00, 3.38295639409246895468e+00,
+  3.49034295746184142217e+00, 3.60113833627217561073e+00,
+  3.71545073794110392029e+00, 3.83339180475841034834e+00,
+  3.95507672292057721464e+00, 4.08062433502646015882e+00,
+  4.21015725614395996956e+00, 4.34380199356104235164e+00,
+  4.48168907033806451778e+00, 4.62395315278208052234e+00,
+  4.77073318196760265408e+00, 4.92217250943229078786e+00,
+  5.07841903718008147450e+00, 5.23962536212848917216e+00,
+  5.40594892514116676097e+00, 5.57755216479125959239e+00,
+  5.75460267600573072144e+00, 5.93727337374560715233e+00,
+  6.12574266188198635064e+00, 6.32019460743274397174e+00,
+  6.52081912033011246166e+00, 6.72781213889469142941e+00,
+  6.94137582119703555605e+00, 7.16171874249371143151e+00,
+  1.00000000000000000000e+00, 7.38905609893065040694e+00,
+  5.45981500331442362040e+01, 4.03428793492735110249e+02,
+  2.98095798704172830185e+03, 2.20264657948067178950e+04,
+  1.62754791419003915507e+05, 1.20260428416477679275e+06,
+  8.88611052050787210464e+06, 6.56599691373305097222e+07,
+  4.85165195409790277481e+08, 3.58491284613159179688e+09,
+  2.64891221298434715271e+10, 1.95729609428838775635e+11,
+  1.44625706429147509766e+12, 1.06864745815244628906e+13,
+  7.89629601826806875000e+13, 5.83461742527454875000e+14,
+  4.31123154711519500000e+15, 3.18559317571137560000e+16,
+  2.35385266837020000000e+17, 1.73927494152050099200e+18,
+  1.28516001143593082880e+19, 9.49611942060244828160e+19,
+  7.01673591209763143680e+20, 5.18470552858707204506e+21,
+  3.83100800071657691546e+22, 2.83075330327469394756e+23,
+  2.09165949601299610311e+24, 1.54553893559010391826e+25,
+  1.14200738981568423454e+26, 8.43835666874145383188e+26,
+  6.23514908081161674391e+27, 4.60718663433129178064e+28,
+  3.40427604993174075827e+29, 2.51543867091916687979e+30,
+  1.85867174528412788702e+31, 1.37338297954017610775e+32,
+  1.01480038811388874615e+33, 7.49841699699012090701e+33,
+  5.54062238439350983445e+34, 4.09399696212745451138e+35,
+  3.02507732220114256223e+36, 2.23524660373471497416e+37,
+  1.65163625499400180987e+38, 1.22040329431784083418e+39,
+  9.01762840503429851945e+39, 6.66317621641089618500e+40,
+  4.92345828601205826106e+41, 3.63797094760880474988e+42,
+  2.68811714181613560943e+43, 1.98626483613765434356e+44,
+  1.46766223015544238535e+45, 1.08446385529002313207e+46,
+  8.01316426400059069850e+46, 5.92097202766466993617e+47,
+  4.37503944726134096988e+48, 3.23274119108485947460e+49,
+  2.38869060142499127023e+50, 1.76501688569176554670e+51,
+  1.30418087839363225614e+52, 9.63666567360320166416e+52,
+  7.12058632688933793173e+53, 5.26144118266638596909e+54,
+};
+
+/*
+ * EN[k] = exp(-2^(k-5))
+ */
+static const double EN[] = {
+  9.69233234476344129860e-01, 9.39413062813475807644e-01,
+  8.82496902584595455110e-01, 7.78800783071404878477e-01,
+  6.06530659712633424263e-01, 3.67879441171442334024e-01,
+  1.35335283236612702318e-01, 1.83156388887341786686e-02,
+  3.35462627902511853224e-04, 1.12535174719259116458e-07,
+  1.26641655490941755372e-14, 1.60381089054863792659e-28,
+  2.57220937264241481170e-56,
+};
+
+static const float zF[] = {
+  0.0f,
+  1.0f,
+  5.0000000951292138e-01F,
+  1.6666518897347284e-01F,
+  3.4028234663852885981170E+38F,
+  1.1754943508222875079688E-38F,
+  88.72283935546875,
+  -103.972076416
+};
+
+#define	zero	zF[0]
+#define	one	zF[1]
+#define	p1	zF[2]
+#define	p2	zF[3]
+#define	big	zF[4]
+#define	tiny	zF[5]
+#define	thresh1	zF[6]
+#define	thresh2	zF[7]
+
+
+float
+__expf_niagara4 (float xf)
+{
+  double w, p, q;
+  int hx, ix, n;
+  float retval;
+  union
+  {
+    int hx_part;
+    float my_xf;
+  } hxx;
+  hxx.my_xf = xf;
+  hx = hxx.hx_part;
+  ix = hx & ~0x80000000;
+
+  if (ix < 0x3c800000)
+    {				/* |x| < 2**-6 */
+      if (ix < 0x38800000)	/* |x| < 2**-14 */
+	return (one + xf);
+      return (one + (xf + (xf * xf) * (p1 + xf * p2)));
+    }
+
+  n = ix >> 23;			/* biased exponent */
+
+  if (n >= 0x85)
+    {				/* |x| >= 2^6 */
+      if (n >= 0x86)
+	{			/* |x| >= 2^7 */
+	  if (n >= 0xff)
+	    {			/* x is nan of +-inf */
+	      if (hx == 0xff800000)
+		return (zero);	/* exp(-inf)=0 */
+	      return (xf * xf);	/* exp(nan/inf) is nan or inf */
+	    }
+	  if (hx > 0)
+	    {
+	      retval = big * big;	/* overflow */
+	      __set_err_expf (hxx.my_xf, retval);
+	      return (retval);
+	    }
+	  else
+	    {
+	      retval = tiny * tiny;	/* underflow */
+	      __set_err_expf (hxx.my_xf, retval);
+	      return (retval);
+	    }
+	}
+      if (hxx.my_xf >= thresh1)
+	{
+	  retval = big * big;	/* overflow */
+	  __set_err_expf (hxx.my_xf, retval);
+	  return (retval);
+	}
+      if (hxx.my_xf <= thresh2)
+	{
+	  retval = tiny * tiny;	/* underflow */
+	  __set_err_expf (hxx.my_xf, retval);
+	  return (retval);
+	}
+    }
+  ix -= n << 23;
+  if (hx > 0)
+    ix += 0x800000;
+  else
+    ix = 0x800000 - ix;
+  if (n >= 0x7f)
+    {				/* n >= 0 */
+      ix <<= n - 0x7f;
+      w = ET[(ix & 0x3f) + 64] * ET[((ix >> 6) & 0x3f) + 128];
+      p = ET[((ix >> 12) & 0x3f) + 192] * ET[((ix >> 18) & 0x3f) + 256];
+      q = ET[((ix >> 24) & 0x3f) + 320];
+    }
+  else
+    {
+      ix <<= n - 0x79;
+      w = ET[ix & 0x3f] * ET[((ix >> 6) & 0x3f) + 64];
+      p = ET[((ix >> 12) & 0x3f) + 128] * ET[((ix >> 18) & 0x3f) + 192];
+      q = ET[((ix >> 24) & 0x3f) + 256];
+    }
+  xf = (float) ((w * p) * (hx < 0 ? q * EN[n - 0x79] : q));
+  return (xf);
+}
-- 
1.7.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]