This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 2/2] Improves __ieee754_exp() performance by greater than 5x on sparc/x86.
- From: Patrick McGehearty <patrick dot mcgehearty at oracle dot com>
- To: libc-alpha at sourceware dot org
- Date: Thu, 2 Nov 2017 14:48:57 -0400
- Subject: [PATCH 2/2] Improves __ieee754_exp() performance by greater than 5x on sparc/x86.
- Authentication-results: sourceware.org; auth=none
- References: <1509648537-63658-1-git-send-email-patrick.mcgehearty@oracle.com>
Version 4 of proposed patch.
New comments revised to use GNU standard comment formating.
Limited comment added in eexp.tbl for TBL[]. The original src
used for porting to Linux did not have a comment about TBL[].
The new comment is limited to the current worker's level of
understanding.
The (-xx.x > threshold2) case is changed to return force_underflow.
For FE_TONEAREST, tiny*tiny will always be zero but for
FE_UPWARD, it will be the smallest representable value.
That change caused no change in the math test results for Sparc or x86.
---
sysdeps/ieee754/dbl-64/e_exp.c | 38 +++++++++++++++-----------------------
sysdeps/ieee754/dbl-64/eexp.tbl | 14 +++++++++-----
2 files changed, 24 insertions(+), 28 deletions(-)
diff --git a/sysdeps/ieee754/dbl-64/e_exp.c b/sysdeps/ieee754/dbl-64/e_exp.c
index f118f33..c1e6585 100644
--- a/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/sysdeps/ieee754/dbl-64/e_exp.c
@@ -34,8 +34,7 @@
/***************************************************************************/
/* IBM exp(x) replaced by following exp(x) in 2017. IBM exp1(x,xx) remains. */
-/*
- exp(x)
+/* exp(x)
Hybrid algorithm of Peter Tang's Table driven method (for large
arguments) and an accurate table (for small arguments).
Written by K.C. Ng, November 1988.
@@ -70,8 +69,7 @@
Misc. info.
For IEEE double
if x > 7.09782712893383973096e+02 then exp(x) overflow
- if x < -7.45133219101941108420e+02 then exp(x) underflow
- */
+ if x < -7.45133219101941108420e+02 then exp(x) underflow. */
#include <math.h>
#include <math-svid-compat.h>
@@ -130,10 +128,8 @@ __ieee754_exp (double x_arg)
retval = one + xx.x * (one + half * xx.x);
return (retval);
}
- /*
- Use FE_TONEAREST rounding mode for computing yy.y
- Avoid set/reset of rounding mode if already in FE_TONEAREST mode
- */
+ /* Use FE_TONEAREST rounding mode for computing yy.y.
+ Avoid set/reset of rounding mode if in FE_TONEAREST mode. */
fe_val = get_rounding_mode ();
if (fe_val == FE_TONEAREST) {
t = xx.x * xx.x;
@@ -151,16 +147,14 @@ __ieee754_exp (double x_arg)
return (retval);
}
- /* find the multiple of 2^-6 nearest x */
+ /* Find the multiple of 2^-6 nearest x. */
k = hx >> 20;
j = (0x00100000 | (hx & 0x000fffff)) >> (0x40c - k);
j = (j - 1) & ~1;
if (ix < 0)
j += 134;
- /*
- Use FE_TONEAREST rounding mode for computing yy.y
- Avoid set/reset of rounding mode if already in FE_TONEAREST mode
- */
+ /* Use FE_TONEAREST rounding mode for computing yy.y.
+ Avoid set/reset of rounding mode if in FE_TONEAREST mode. */
fe_val = get_rounding_mode ();
if (fe_val == FE_TONEAREST) {
z = xx.x - TBL2[j];
@@ -181,31 +175,29 @@ __ieee754_exp (double x_arg)
}
if (hx >= 0x40862e42)
- { /* x is large, infinite, or nan */
+ { /* x is large, infinite, or nan. */
if (hx >= 0x7ff00000)
{
if (ix == 0xfff00000 && xx.i_part[LOW_HALF] == 0)
- return (zero); /* exp(-inf) = 0 */
- return (xx.x * xx.x); /* exp(nan/inf) is nan or inf */
+ return (zero); /* exp(-inf) = 0. */
+ return (xx.x * xx.x); /* exp(nan/inf) is nan or inf. */
}
if (xx.x > threshold1)
- { /* set overflow error condition */
+ { /* set overflow error condition. */
retval = hhuge * hhuge;
return retval;
}
if (-xx.x > threshold2)
- { /* set underflow error condition */
+ { /* set underflow error condition. */
double force_underflow = tiny * tiny;
math_force_eval (force_underflow);
- retval = zero;
+ retval = force_underflow;
return retval;
}
}
- /*
- Use FE_TONEAREST rounding mode for computing yy.y
- Avoid set/reset of rounding mode if already in FE_TONEAREST mode
- */
+ /* Use FE_TONEAREST rounding mode for computing yy.y.
+ Avoid set/reset of rounding mode if already in FE_TONEAREST mode. */
fe_val = get_rounding_mode ();
if (fe_val == FE_TONEAREST) {
t = invln2_32 * xx.x;
diff --git a/sysdeps/ieee754/dbl-64/eexp.tbl b/sysdeps/ieee754/dbl-64/eexp.tbl
index ec48489..1fed9f4 100644
--- a/sysdeps/ieee754/dbl-64/eexp.tbl
+++ b/sysdeps/ieee754/dbl-64/eexp.tbl
@@ -16,6 +16,11 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+
+/* TBL[2*j] and TBL[2*j+1] are double precision numbers used to
+ approximate exp(x) using the formula given in the comments
+ for e_exp.c. */
+
static const double TBL[64] = {
0x1.0000000000000p+0, 0x0.0000000000000p+0,
0x1.059b0d3158574p+0, 0x1.d73e2a475b465p-55,
@@ -49,8 +54,8 @@ static const double TBL[64] = {
0x1.dfc97337b9b5fp+0, -0x1.1a5cd4f184b5cp-54,
0x1.ea4afa2a490dap+0, -0x1.e9c23179c2893p-54,
0x1.f50765b6e4540p+0, 0x1.9d3e12dd8a18bp-54};
-/*
- For i = 0, ..., 66,
+
+/* For i = 0, ..., 66,
TBL2[2*i] is a double precision number near (i+1)*2^-6, and
TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
than 2^-60.
@@ -58,8 +63,7 @@ static const double TBL[64] = {
For i = 67, ..., 133,
TBL2[2*i] is a double precision number near -(i+1)*2^-6, and
TBL2[2*i+1] = exp(TBL2[2*i]) to within a relative error less
- than 2^-60.
-*/
+ than 2^-60. */
static const double TBL2[268] = {
0x1.ffffffffffc82p-7, 0x1.04080ab55de32p+0,
@@ -209,7 +213,7 @@ static const double
t5 = 0x1.6c1728d739765p-10, /* 1.3888949086377719040e-3 */
/* maximum value for x to not overflow */
threshold1 = 0x1.62e42fefa39efp+9, /* 7.09782712893383973096e+02 */
-/* maximum value for -x to not underflow */
+/* maximum value for -x to not underflow to zero in FE_NEAREST mode */
threshold2 = 0x1.74910d52d3051p+9, /* 7.45133219101941108420e+02 */
/* scaling factor used when result near zero*/
twom54 = 0x1.0000000000000p-54; /* 5.55111512312578270212e-17 */
--
1.7.1