This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.27.9000-280-ge88ecbb
- From: wilco at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 3 Apr 2018 15:53:01 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.27.9000-280-ge88ecbb
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via e88ecbbfe836db5b6da809108759de4ca56be5e7 (commit)
via aef3e2558a0ab0aff6d80f3e99ebe228321ab4b3 (commit)
via 72f6e9a3e34e2be76fd9a18ea1a427e7a713465e (commit)
via 649095838b85ae71f778338c210b4c1e519e1d16 (commit)
via d9469deb14ba6f55bd8af1652951ab306a8f63bd (commit)
via 7a5640f23a797d3be38a78fb899903699c3aa5a0 (commit)
via 19a8b9a300f2f1f0012aff0f2b70b09430f50d9e (commit)
from f72aa11d7e3008d608e1092abade16101fed8f35 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=e88ecbbfe836db5b6da809108759de4ca56be5e7
commit e88ecbbfe836db5b6da809108759de4ca56be5e7
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date: Tue Apr 3 16:49:33 2018 +0100
[PATCH 7/7] sin/cos slow paths: refactor sincos implementation
Refactor the sincos implementation - rather than rely on odd partial inlining
of preprocessed portions from sin and cos, explicitly write out the cases.
This makes sincos much easier to maintain and provides an additional 16-20%
speedup between 0 and 2^27. The overall speedup of sincos is 48% over this range.
Between 0 and PI it is 66% faster.
* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Cleanup ifdefs.
(__cos): Likewise.
* sysdeps/ieee754/dbl-64/s_sin.c (__sincos): Refactor using the same
logic as sin and cos.
diff --git a/ChangeLog b/ChangeLog
index 2e5b570..36b022c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+ * sysdeps/ieee754/dbl-64/s_sin.c (__sin): Cleanup ifdefs.
+ (__cos): Likewise.
+ * sysdeps/ieee754/dbl-64/s_sin.c (__sincos): Refactor using the same
+ logic as sin and cos.
+
+2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+
* sysdeps/ieee754/dbl-64/s_sin.c (do_sin): Use TAYLOR_SIN for small
inputs. Return correct sign.
(do_sincos): Remove small input check before do_sin, let do_sin set
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 6e261f2..ba1dbe2 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -197,27 +197,17 @@ do_sincos (double a, double da, int4 n)
/* An ultimate sin routine. Given an IEEE double machine number x */
/* it computes the correctly rounded (to nearest) value of sin(x) */
/*******************************************************************/
-#ifdef IN_SINCOS
-static double
-#else
+#ifndef IN_SINCOS
double
SECTION
-#endif
__sin (double x)
{
-#ifndef IN_SINCOS
double t, a, da;
mynumber u;
int4 k, m, n;
double retval = 0;
SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
-#else
- double xx, t, cor;
- mynumber u;
- int4 k, m;
- double retval = 0;
-#endif
u.x = x;
m = u.i[HIGH_HALF];
@@ -242,7 +232,6 @@ __sin (double x)
retval = __copysign (do_cos (t, hp1), x);
} /* else if (k < 0x400368fd) */
-#ifndef IN_SINCOS
/*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
else if (k < 0x419921FB)
{
@@ -263,7 +252,6 @@ __sin (double x)
__set_errno (EDOM);
retval = x / x;
}
-#endif
return retval;
}
@@ -274,27 +262,17 @@ __sin (double x)
/* it computes the correctly rounded (to nearest) value of cos(x) */
/*******************************************************************/
-#ifdef IN_SINCOS
-static double
-#else
double
SECTION
-#endif
__cos (double x)
{
double y, a, da;
mynumber u;
-#ifndef IN_SINCOS
int4 k, m, n;
-#else
- int4 k, m;
-#endif
double retval = 0;
-#ifndef IN_SINCOS
SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
-#endif
u.x = x;
m = u.i[HIGH_HALF];
@@ -320,8 +298,6 @@ __cos (double x)
retval = do_sin (a, da);
} /* else if (k < 0x400368fd) */
-
-#ifndef IN_SINCOS
else if (k < 0x419921FB)
{ /* 2.426265<|x|< 105414350 */
n = reduce_sincos (x, &a, &da);
@@ -341,7 +317,6 @@ __cos (double x)
__set_errno (EDOM);
retval = x / x; /* |x| > 2^1024 */
}
-#endif
return retval;
}
@@ -352,3 +327,5 @@ libm_alias_double (__cos, cos)
#ifndef __sin
libm_alias_double (__sin, sin)
#endif
+
+#endif
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index 4335ecb..c746037 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -23,9 +23,7 @@
#include <math_private.h>
#include <libm-alias-double.h>
-#define __sin __sin_local
-#define __cos __cos_local
-#define IN_SINCOS 1
+#define IN_SINCOS
#include "s_sin.c"
void
@@ -37,31 +35,63 @@ __sincos (double x, double *sinx, double *cosx)
SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
u.x = x;
- k = 0x7fffffff & u.i[HIGH_HALF];
+ k = u.i[HIGH_HALF] & 0x7fffffff;
if (k < 0x400368fd)
{
- *sinx = __sin_local (x);
- *cosx = __cos_local (x);
- return;
- }
- if (k < 0x419921FB)
- {
- double a, da;
- int4 n = reduce_sincos (x, &a, &da);
-
- *sinx = do_sincos (a, da, n);
- *cosx = do_sincos (a, da, n + 1);
+ double a, da, y;
+ /* |x| < 2^-27 => cos (x) = 1, sin (x) = x. */
+ if (k < 0x3e400000)
+ {
+ if (k < 0x3e500000)
+ math_check_force_underflow (x);
+ *sinx = x;
+ *cosx = 1.0;
+ return;
+ }
+ /* |x| < 0.855469. */
+ else if (k < 0x3feb6000)
+ {
+ *sinx = do_sin (x, 0);
+ *cosx = do_cos (x, 0);
+ return;
+ }
+ /* |x| < 2.426265. */
+ y = hp0 - fabs (x);
+ a = y + hp1;
+ da = (y - a) + hp1;
+ *sinx = __copysign (do_cos (a, da), x);
+ *cosx = do_sin (a, da);
return;
}
+ /* |x| < 2^1024. */
if (k < 0x7ff00000)
{
- double a, da;
- int4 n = __branred (x, &a, &da);
+ double a, da, xx;
+ unsigned int n;
- *sinx = do_sincos (a, da, n);
- *cosx = do_sincos (a, da, n + 1);
+ /* If |x| < 105414350 use simple range reduction. */
+ n = k < 0x419921FB ? reduce_sincos (x, &a, &da) : __branred (x, &a, &da);
+ n = n & 3;
+
+ if (n == 1 || n == 2)
+ {
+ a = -a;
+ da = -da;
+ }
+
+ if (n & 1)
+ {
+ double *temp = cosx;
+ cosx = sinx;
+ sinx = temp;
+ }
+
+ *sinx = do_sin (a, da);
+ xx = do_cos (a, da);
+ *cosx = (n & 2) ? -xx : xx;
+ return;
}
if (isinf (x))
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=aef3e2558a0ab0aff6d80f3e99ebe228321ab4b3
commit aef3e2558a0ab0aff6d80f3e99ebe228321ab4b3
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date: Tue Apr 3 16:46:10 2018 +0100
[PATCH 6/7] sin/cos slow paths: refactor duplicated code into dosin
Refactor duplicated code into do_sin. Since all calls to do_sin use copysign to
set the sign of the result, move it inside do_sin. Small inputs use a separate
polynomial, so move this into do_sin as well (the check is based on the more
conservative case when doing large range reduction, but could be relaxed).
* sysdeps/ieee754/dbl-64/s_sin.c (do_sin): Use TAYLOR_SIN for small
inputs. Return correct sign.
(do_sincos): Remove small input check before do_sin, let do_sin set
the sign.
(__sin): Likewise.
(__cos): Likewise.
diff --git a/ChangeLog b/ChangeLog
index 1b8b660..2e5b570 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+ * sysdeps/ieee754/dbl-64/s_sin.c (do_sin): Use TAYLOR_SIN for small
+ inputs. Return correct sign.
+ (do_sincos): Remove small input check before do_sin, let do_sin set
+ the sign.
+ (__sin): Likewise.
+ (__cos): Likewise.
+
+2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+
* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SLOW): Remove.
(do_cos_slow): Likewise.
(do_sin_slow): Likewise.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index fcb2e6b..6e261f2 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -124,6 +124,11 @@ static inline double
__always_inline
do_sin (double x, double dx)
{
+ double xold = x;
+ /* Max ULP is 0.501 if |x| < 0.126, otherwise ULP is 0.518. */
+ if (fabs (x) < 0.126)
+ return TAYLOR_SIN (x * x, x, dx);
+
mynumber u;
if (x <= 0)
@@ -137,7 +142,7 @@ do_sin (double x, double dx)
c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ssn + s * ccs - sn * c) + cs * s;
- return sn + cor;
+ return __copysign (sn + cor, xold);
}
/* Reduce range of x to within PI/2 with abs (x) < 105414350. The high part
@@ -181,14 +186,8 @@ do_sincos (double a, double da, int4 n)
/* Max ULP is 0.513. */
retval = do_cos (a, da);
else
- {
- double xx = a * a;
- /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518. */
- if (xx < 0.01588)
- retval = TAYLOR_SIN (xx, a, da);
- else
- retval = __copysign (do_sin (a, da), a);
- }
+ /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518. */
+ retval = do_sin (a, da);
return (n & 2) ? -retval : retval;
}
@@ -207,7 +206,7 @@ SECTION
__sin (double x)
{
#ifndef IN_SINCOS
- double xx, t, a, da;
+ double t, a, da;
mynumber u;
int4 k, m, n;
double retval = 0;
@@ -228,20 +227,11 @@ __sin (double x)
math_check_force_underflow (x);
retval = x;
}
- /*---------------------------- 2^-26 < |x|< 0.25 ----------------------*/
- else if (k < 0x3fd00000)
- {
- xx = x * x;
- /* Taylor series. */
- t = POLYNOMIAL (xx) * (xx * x);
- /* Max ULP of x + t is 0.535. */
- retval = x + t;
- } /* else if (k < 0x3fd00000) */
-/*---------------------------- 0.25<|x|< 0.855469---------------------- */
+/*--------------------------- 2^-26<|x|< 0.855469---------------------- */
else if (k < 0x3feb6000)
{
/* Max ULP is 0.548. */
- retval = __copysign (do_sin (x, 0), x);
+ retval = do_sin (x, 0);
} /* else if (k < 0x3feb6000) */
/*----------------------- 0.855469 <|x|<2.426265 ----------------------*/
@@ -292,7 +282,7 @@ SECTION
#endif
__cos (double x)
{
- double y, xx, a, da;
+ double y, a, da;
mynumber u;
#ifndef IN_SINCOS
int4 k, m, n;
@@ -325,13 +315,9 @@ __cos (double x)
y = hp0 - fabs (x);
a = y + hp1;
da = (y - a) + hp1;
- xx = a * a;
/* Max ULP is 0.501 if xx < 0.01588 or 0.518 otherwise.
Range reduction uses 106 bits here which is sufficient. */
- if (xx < 0.01588)
- retval = TAYLOR_SIN (xx, a, da);
- else
- retval = __copysign (do_sin (a, da), a);
+ retval = do_sin (a, da);
} /* else if (k < 0x400368fd) */
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=72f6e9a3e34e2be76fd9a18ea1a427e7a713465e
commit 72f6e9a3e34e2be76fd9a18ea1a427e7a713465e
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date: Tue Apr 3 16:43:34 2018 +0100
[PATCH 5/7] sin/cos slow paths: remove unused slowpath functions
Remove all unused slowpath functions.
* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SLOW): Remove.
(do_cos_slow): Likewise.
(do_sin_slow): Likewise.
(reduce_and_compute): Likewise.
(slow): Likewise.
(slow1): Likewise.
(slow2): Likewise.
(sloww): Likewise.
(sloww1): Likewise.
(sloww2): Likewise.
(bslow): Likewise.
(bslow1): Likewise.
(bslow2): Likewise.
(cslow2): Likewise.
diff --git a/ChangeLog b/ChangeLog
index a4defe7..1b8b660 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+ * sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SLOW): Remove.
+ (do_cos_slow): Likewise.
+ (do_sin_slow): Likewise.
+ (reduce_and_compute): Likewise.
+ (slow): Likewise.
+ (slow1): Likewise.
+ (slow2): Likewise.
+ (sloww): Likewise.
+ (sloww1): Likewise.
+ (sloww2): Likewise.
+ (bslow): Likewise.
+ (bslow1): Likewise.
+ (bslow2): Likewise.
+ (cslow2): Likewise.
+
+2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+
* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SIN): Remove cor parameter.
(do_cos): Remove corp parameter and calculations.
(do_sin): Likewise.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 7d0f375..fcb2e6b 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -22,22 +22,11 @@
/* */
/* FUNCTIONS: usin */
/* ucos */
-/* slow */
-/* slow1 */
-/* slow2 */
-/* sloww */
-/* sloww1 */
-/* sloww2 */
-/* bsloww */
-/* bsloww1 */
-/* bsloww2 */
-/* cslow2 */
/* FILES NEEDED: dla.h endian.h mpa.h mydefs.h usncs.h */
-/* branred.c sincos32.c dosincos.c mpa.c */
-/* sincos.tbl */
+/* branred.c sincos.tbl */
/* */
-/* An ultimate sin and routine. Given an IEEE double machine number x */
-/* it computes the correctly rounded (to nearest) value of sin(x) or cos(x) */
+/* An ultimate sin and cos routine. Given an IEEE double machine number x */
+/* it computes sin(x) or cos(x) with ~0.55 ULP. */
/* Assumption: Machine arithmetic operations are performed in */
/* round to nearest mode of IEEE 754 standard. */
/* */
@@ -74,29 +63,6 @@
res; \
})
-/* This is again a variation of the Taylor series expansion with the term
- x^3/3! expanded into the following for better accuracy:
-
- bb * x ^ 3 + 3 * aa * x * x1 * x2 + aa * x1 ^ 3 + aa * x2 ^ 3
-
- The correction term is dx and bb + aa = -1/3!
- */
-#define TAYLOR_SLOW(x0, dx, cor) \
-({ \
- static const double th2_36 = 206158430208.0; /* 1.5*2**37 */ \
- double xx = (x0) * (x0); \
- double x1 = ((x0) + th2_36) - th2_36; \
- double y = aa * x1 * x1 * x1; \
- double r = (x0) + y; \
- double x2 = ((x0) - x1) + (dx); \
- double t = (((POLYNOMIAL2 (xx) + bb) * xx + 3.0 * aa * x1 * x2) \
- * (x0) + aa * x2 * x2 * x2 + (dx)); \
- t = (((x0) - r) + y) + t; \
- double res = r + t; \
- (cor) = (r - res) + t; \
- res; \
-})
-
#define SINCOS_TABLE_LOOKUP(u, sn, ssn, cs, ccs) \
({ \
int4 k = u.i[LOW_HALF] << 2; \
@@ -123,23 +89,7 @@ static const double
cs4 = -4.16666666666664434524222570944589E-02,
cs6 = 1.38888874007937613028114285595617E-03;
-static const double t22 = 0x1.8p22;
-
-void __dubsin (double x, double dx, double w[]);
-void __docos (double x, double dx, double w[]);
-double __mpsin (double x, double dx, bool reduce_range);
-double __mpcos (double x, double dx, bool reduce_range);
-static double slow (double x);
-static double slow1 (double x);
-static double slow2 (double x);
-static double sloww (double x, double dx, double orig, bool shift_quadrant);
-static double sloww1 (double x, double dx, double orig, bool shift_quadrant);
-static double sloww2 (double x, double dx, double orig, int n);
-static double bsloww (double x, double dx, double orig, int n);
-static double bsloww1 (double x, double dx, double orig, int n);
-static double bsloww2 (double x, double dx, double orig, int n);
int __branred (double x, double *a, double *aa);
-static double cslow2 (double x);
/* Given a number partitioned into X and DX, this function computes the cosine
of the number by combining the sin and cos of X (as computed by a variation
@@ -166,40 +116,6 @@ do_cos (double x, double dx)
return cs + cor;
}
-/* A more precise variant of DO_COS. EPS is the adjustment to the correction
- COR. */
-static inline double
-__always_inline
-do_cos_slow (double x, double dx, double eps, double *corp)
-{
- mynumber u;
-
- if (x <= 0)
- dx = -dx;
-
- u.x = big + fabs (x);
- x = fabs (x) - (u.x - big);
-
- double xx, y, x1, x2, e1, e2, res, cor;
- double s, sn, ssn, c, cs, ccs;
- xx = x * x;
- s = x * xx * (sn3 + xx * sn5);
- c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
- SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
- x1 = (x + t22) - t22;
- x2 = (x - x1) + dx;
- e1 = (sn + t22) - t22;
- e2 = (sn - e1) + ssn;
- cor = (ccs - cs * c - e1 * x2 - e2 * x) - sn * s;
- y = cs - e1 * x1;
- cor = cor + ((cs - y) - e1 * x1);
- res = y + cor;
- cor = (y - res) + cor;
- cor = 1.0005 * cor + __copysign (eps, cor);
- *corp = cor;
- return res;
-}
-
/* Given a number partitioned into X and DX, this function computes the sine of
the number by combining the sin and cos of X (as computed by a variation of
the Taylor series) with the values looked up from the sin/cos table to get
@@ -224,70 +140,6 @@ do_sin (double x, double dx)
return sn + cor;
}
-/* A more precise variant of DO_SIN. EPS is the adjustment to the correction
- COR. */
-static inline double
-__always_inline
-do_sin_slow (double x, double dx, double eps, double *corp)
-{
- mynumber u;
-
- if (x <= 0)
- dx = -dx;
- u.x = big + fabs (x);
- x = fabs (x) - (u.x - big);
-
- double xx, y, x1, x2, c1, c2, res, cor;
- double s, sn, ssn, c, cs, ccs;
- xx = x * x;
- s = x * xx * (sn3 + xx * sn5);
- c = xx * (cs2 + xx * (cs4 + xx * cs6));
- SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
- x1 = (x + t22) - t22;
- x2 = (x - x1) + dx;
- c1 = (cs + t22) - t22;
- c2 = (cs - c1) + ccs;
- cor = (ssn + s * ccs + cs * s + c2 * x + c1 * x2 - sn * x * dx) - sn * c;
- y = sn + c1 * x1;
- cor = cor + ((sn - y) + c1 * x1);
- res = y + cor;
- cor = (y - res) + cor;
- cor = 1.0005 * cor + __copysign (eps, cor);
- *corp = cor;
- return res;
-}
-
-/* Reduce range of X and compute sin of a + da. When SHIFT_QUADRANT is true,
- the routine returns the cosine of a + da by rotating the quadrant once and
- computing the sine of the result. */
-static inline double
-__always_inline
-reduce_and_compute (double x, bool shift_quadrant)
-{
- double retval = 0, a, da;
- unsigned int n = __branred (x, &a, &da);
- int4 k = (n + shift_quadrant) % 4;
- switch (k)
- {
- case 2:
- a = -a;
- da = -da;
- /* Fall through. */
- case 0:
- if (a * a < 0.01588)
- retval = bsloww (a, da, x, n);
- else
- retval = bsloww1 (a, da, x, n);
- break;
-
- case 1:
- case 3:
- retval = bsloww2 (a, da, x, n);
- break;
- }
- return retval;
-}
-
/* Reduce range of x to within PI/2 with abs (x) < 105414350. The high part
is written to *a, the low part to *da. Range reduction is accurate to 136
bits so that when x is large and *a very close to zero, all 53 bits of *a
@@ -508,299 +360,6 @@ __cos (double x)
return retval;
}
-/************************************************************************/
-/* Routine compute sin(x) for 2^-26 < |x|< 0.25 by Taylor with more */
-/* precision and if still doesn't accurate enough by mpsin or dubsin */
-/************************************************************************/
-
-static inline double
-__always_inline
-slow (double x)
-{
- double res, cor, w[2];
- res = TAYLOR_SLOW (x, 0, cor);
- if (res == res + 1.0007 * cor)
- return res;
-
- __dubsin (fabs (x), 0, w);
- if (w[0] == w[0] + 1.000000001 * w[1])
- return __copysign (w[0], x);
-
- return __copysign (__mpsin (fabs (x), 0, false), x);
-}
-
-/*******************************************************************************/
-/* Routine compute sin(x) for 0.25<|x|< 0.855469 by __sincostab.tbl and Taylor */
-/* and if result still doesn't accurate enough by mpsin or dubsin */
-/*******************************************************************************/
-
-static inline double
-__always_inline
-slow1 (double x)
-{
- double w[2], cor, res;
-
- res = do_sin_slow (x, 0, 0, &cor);
- if (res == res + cor)
- return res;
-
- __dubsin (fabs (x), 0, w);
- if (w[0] == w[0] + 1.000000005 * w[1])
- return w[0];
-
- return __mpsin (fabs (x), 0, false);
-}
-
-/**************************************************************************/
-/* Routine compute sin(x) for 0.855469 <|x|<2.426265 by __sincostab.tbl */
-/* and if result still doesn't accurate enough by mpsin or dubsin */
-/**************************************************************************/
-static inline double
-__always_inline
-slow2 (double x)
-{
- double w[2], y, y1, y2, cor, res;
-
- double t = hp0 - fabs (x);
- res = do_cos_slow (t, hp1, 0, &cor);
- if (res == res + cor)
- return res;
-
- y = fabs (x) - hp0;
- y1 = y - hp1;
- y2 = (y - y1) - hp1;
- __docos (y1, y2, w);
- if (w[0] == w[0] + 1.000000005 * w[1])
- return w[0];
-
- return __mpsin (fabs (x), 0, false);
-}
-
-/* Compute sin(x + dx) where X is small enough to use Taylor series around zero
- and (x + dx) in the first or third quarter of the unit circle. ORIG is the
- original value of X for computing error of the result. If the result is not
- accurate enough, the routine calls mpsin or dubsin. SHIFT_QUADRANT rotates
- the unit circle by 1 to compute the cosine instead of sine. */
-static inline double
-__always_inline
-sloww (double x, double dx, double orig, bool shift_quadrant)
-{
- double y, t, res, cor, w[2], a, da, xn;
- mynumber v;
- int4 n;
- res = TAYLOR_SLOW (x, dx, cor);
-
- double eps = fabs (orig) * 3.1e-30;
-
- cor = 1.0005 * cor + __copysign (eps, cor);
-
- if (res == res + cor)
- return res;
-
- a = fabs (x);
- da = (x > 0) ? dx : -dx;
- __dubsin (a, da, w);
- eps = fabs (orig) * 1.1e-30;
- cor = 1.000000001 * w[1] + __copysign (eps, w[1]);
-
- if (w[0] == w[0] + cor)
- return __copysign (w[0], x);
-
- t = (orig * hpinv + toint);
- xn = t - toint;
- v.x = t;
- y = (orig - xn * mp1) - xn * mp2;
- n = (v.i[LOW_HALF] + shift_quadrant) & 3;
- da = xn * pp3;
- t = y - da;
- da = (y - t) - da;
- y = xn * pp4;
- a = t - y;
- da = ((t - a) - y) + da;
-
- if (n & 2)
- {
- a = -a;
- da = -da;
- }
- x = fabs (a);
- dx = (a > 0) ? da : -da;
- __dubsin (x, dx, w);
- eps = fabs (orig) * 1.1e-40;
- cor = 1.000000001 * w[1] + __copysign (eps, w[1]);
-
- if (w[0] == w[0] + cor)
- return __copysign (w[0], a);
-
- return shift_quadrant ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
-}
-
-/* Compute sin(x + dx) where X is in the first or third quarter of the unit
- circle. ORIG is the original value of X for computing error of the result.
- If the result is not accurate enough, the routine calls mpsin or dubsin.
- SHIFT_QUADRANT rotates the unit circle by 1 to compute the cosine instead of
- sine. */
-static inline double
-__always_inline
-sloww1 (double x, double dx, double orig, bool shift_quadrant)
-{
- double w[2], cor, res;
-
- res = do_sin_slow (x, dx, 3.1e-30 * fabs (orig), &cor);
-
- if (res == res + cor)
- return __copysign (res, x);
-
- dx = (x > 0 ? dx : -dx);
- __dubsin (fabs (x), dx, w);
-
- double eps = 1.1e-30 * fabs (orig);
- cor = 1.000000005 * w[1] + __copysign (eps, w[1]);
-
- if (w[0] == w[0] + cor)
- return __copysign (w[0], x);
-
- return shift_quadrant ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
-}
-
-/***************************************************************************/
-/* Routine compute sin(x+dx) (Double-Length number) where x in second or */
-/* fourth quarter of unit circle.Routine receive also the original value */
-/* and quarter(n= 1or 3)of x for computing error of result.And if result not*/
-/* accurate enough routine calls mpsin1 or dubsin */
-/***************************************************************************/
-
-static inline double
-__always_inline
-sloww2 (double x, double dx, double orig, int n)
-{
- double w[2], cor, res;
-
- res = do_cos_slow (x, dx, 3.1e-30 * fabs (orig), &cor);
-
- if (res == res + cor)
- return (n & 2) ? -res : res;
-
- dx = x > 0 ? dx : -dx;
- __docos (fabs (x), dx, w);
-
- double eps = 1.1e-30 * fabs (orig);
- cor = 1.000000005 * w[1] + __copysign (eps, w[1]);
-
- if (w[0] == w[0] + cor)
- return (n & 2) ? -w[0] : w[0];
-
- return (n & 1) ? __mpsin (orig, 0, true) : __mpcos (orig, 0, true);
-}
-
-/***************************************************************************/
-/* Routine compute sin(x+dx) or cos(x+dx) (Double-Length number) where x */
-/* is small enough to use Taylor series around zero and (x+dx) */
-/* in first or third quarter of unit circle.Routine receive also */
-/* (right argument) the original value of x for computing error of */
-/* result.And if result not accurate enough routine calls other routines */
-/***************************************************************************/
-
-static inline double
-__always_inline
-bsloww (double x, double dx, double orig, int n)
-{
- double res, cor, w[2], a, da;
-
- res = TAYLOR_SLOW (x, dx, cor);
- cor = 1.0005 * cor + __copysign (1.1e-24, cor);
- if (res == res + cor)
- return res;
-
- a = fabs (x);
- da = (x > 0) ? dx : -dx;
- __dubsin (a, da, w);
- cor = 1.000000001 * w[1] + __copysign (1.1e-24, w[1]);
-
- if (w[0] == w[0] + cor)
- return __copysign (w[0], x);
-
- return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
-}
-
-/***************************************************************************/
-/* Routine compute sin(x+dx) or cos(x+dx) (Double-Length number) where x */
-/* in first or third quarter of unit circle.Routine receive also */
-/* (right argument) the original value of x for computing error of result.*/
-/* And if result not accurate enough routine calls other routines */
-/***************************************************************************/
-
-static inline double
-__always_inline
-bsloww1 (double x, double dx, double orig, int n)
-{
- double w[2], cor, res;
-
- res = do_sin_slow (x, dx, 1.1e-24, &cor);
- if (res == res + cor)
- return (x > 0) ? res : -res;
-
- dx = (x > 0) ? dx : -dx;
- __dubsin (fabs (x), dx, w);
-
- cor = 1.000000005 * w[1] + __copysign (1.1e-24, w[1]);
-
- if (w[0] == w[0] + cor)
- return __copysign (w[0], x);
-
- return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
-}
-
-/***************************************************************************/
-/* Routine compute sin(x+dx) or cos(x+dx) (Double-Length number) where x */
-/* in second or fourth quarter of unit circle.Routine receive also the */
-/* original value and quarter(n= 1or 3)of x for computing error of result. */
-/* And if result not accurate enough routine calls other routines */
-/***************************************************************************/
-
-static inline double
-__always_inline
-bsloww2 (double x, double dx, double orig, int n)
-{
- double w[2], cor, res;
-
- res = do_cos_slow (x, dx, 1.1e-24, &cor);
- if (res == res + cor)
- return (n & 2) ? -res : res;
-
- dx = (x > 0) ? dx : -dx;
- __docos (fabs (x), dx, w);
-
- cor = 1.000000005 * w[1] + __copysign (1.1e-24, w[1]);
-
- if (w[0] == w[0] + cor)
- return (n & 2) ? -w[0] : w[0];
-
- return (n & 1) ? __mpsin (orig, 0, true) : __mpcos (orig, 0, true);
-}
-
-/************************************************************************/
-/* Routine compute cos(x) for 2^-27 < |x|< 0.25 by Taylor with more */
-/* precision and if still doesn't accurate enough by mpcos or docos */
-/************************************************************************/
-
-static inline double
-__always_inline
-cslow2 (double x)
-{
- double w[2], cor, res;
-
- res = do_cos_slow (x, 0, 0, &cor);
- if (res == res + cor)
- return res;
-
- __docos (fabs (x), 0, w);
- if (w[0] == w[0] + 1.000000005 * w[1])
- return w[0];
-
- return __mpcos (x, 0, false);
-}
-
#ifndef __cos
libm_alias_double (__cos, cos)
#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=649095838b85ae71f778338c210b4c1e519e1d16
commit 649095838b85ae71f778338c210b4c1e519e1d16
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date: Tue Apr 3 16:41:36 2018 +0100
[PATCH 4/7] sin/cos slow paths: remove slow paths from huge range reduction
For huge inputs use the improved do_sincos function as well. Now no cases use
the correction factor returned by do_sin, do_cos and TAYLOR_SIN, so remove it.
* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SIN): Remove cor parameter.
(do_cos): Remove corp parameter and calculations.
(do_sin): Likewise.
(do_sincos): Remove cor variable.
(__sin): Use do_sincos for huge inputs.
(__cos): Likewise.
* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.
(reduce_and_compute_sincos): Remove unused function.
diff --git a/ChangeLog b/ChangeLog
index a8d1ecb..a4defe7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+ * sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SIN): Remove cor parameter.
+ (do_cos): Remove corp parameter and calculations.
+ (do_sin): Likewise.
+ (do_sincos): Remove cor variable.
+ (__sin): Use do_sincos for huge inputs.
+ (__cos): Likewise.
+ * sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.
+ (reduce_and_compute_sincos): Remove unused function.
+
+2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+
* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
reduce_sincos, improve accuracy to 136 bits.
(do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index b8c366a..7d0f375 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -66,12 +66,11 @@
a - a^3/3! + a^5/5! - a^7/7! + a^9/9! + (1 - a^2) * da / 2
The constants s1, s2, s3, etc. are pre-computed values of 1/3!, 1/5! and so
- on. The result is returned to LHS and correction in COR. */
-#define TAYLOR_SIN(xx, a, da, cor) \
+ on. The result is returned to LHS. */
+#define TAYLOR_SIN(xx, a, da) \
({ \
double t = ((POLYNOMIAL (xx) * (a) - 0.5 * (da)) * (xx) + (da)); \
double res = (a) + t; \
- (cor) = ((a) - res) + t; \
res; \
})
@@ -145,10 +144,10 @@ static double cslow2 (double x);
/* Given a number partitioned into X and DX, this function computes the cosine
of the number by combining the sin and cos of X (as computed by a variation
of the Taylor series) with the values looked up from the sin/cos table to
- get the result in RES and a correction value in COR. */
+ get the result. */
static inline double
__always_inline
-do_cos (double x, double dx, double *corp)
+do_cos (double x, double dx)
{
mynumber u;
@@ -158,16 +157,13 @@ do_cos (double x, double dx, double *corp)
u.x = big + fabs (x);
x = fabs (x) - (u.x - big) + dx;
- double xx, s, sn, ssn, c, cs, ccs, res, cor;
+ double xx, s, sn, ssn, c, cs, ccs, cor;
xx = x * x;
s = x + x * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ccs - s * ssn - cs * c) - sn * s;
- res = cs + cor;
- cor = (cs - res) + cor;
- *corp = cor;
- return res;
+ return cs + cor;
}
/* A more precise variant of DO_COS. EPS is the adjustment to the correction
@@ -207,10 +203,10 @@ do_cos_slow (double x, double dx, double eps, double *corp)
/* Given a number partitioned into X and DX, this function computes the sine of
the number by combining the sin and cos of X (as computed by a variation of
the Taylor series) with the values looked up from the sin/cos table to get
- the result in RES and a correction value in COR. */
+ the result. */
static inline double
__always_inline
-do_sin (double x, double dx, double *corp)
+do_sin (double x, double dx)
{
mynumber u;
@@ -219,16 +215,13 @@ do_sin (double x, double dx, double *corp)
u.x = big + fabs (x);
x = fabs (x) - (u.x - big);
- double xx, s, sn, ssn, c, cs, ccs, cor, res;
+ double xx, s, sn, ssn, c, cs, ccs, cor;
xx = x * x;
s = x + (dx + x * xx * (sn3 + xx * sn5));
c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ssn + s * ccs - sn * c) + cs * s;
- res = sn + cor;
- cor = (sn - res) + cor;
- *corp = cor;
- return res;
+ return sn + cor;
}
/* A more precise variant of DO_SIN. EPS is the adjustment to the correction
@@ -330,19 +323,19 @@ static double
__always_inline
do_sincos (double a, double da, int4 n)
{
- double retval, cor;
+ double retval;
if (n & 1)
/* Max ULP is 0.513. */
- retval = do_cos (a, da, &cor);
+ retval = do_cos (a, da);
else
{
double xx = a * a;
/* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518. */
if (xx < 0.01588)
- retval = TAYLOR_SIN (xx, a, da, cor);
+ retval = TAYLOR_SIN (xx, a, da);
else
- retval = __copysign (do_sin (a, da, &cor), a);
+ retval = __copysign (do_sin (a, da), a);
}
return (n & 2) ? -retval : retval;
@@ -362,7 +355,7 @@ SECTION
__sin (double x)
{
#ifndef IN_SINCOS
- double xx, t, a, da, cor;
+ double xx, t, a, da;
mynumber u;
int4 k, m, n;
double retval = 0;
@@ -396,7 +389,7 @@ __sin (double x)
else if (k < 0x3feb6000)
{
/* Max ULP is 0.548. */
- retval = __copysign (do_sin (x, 0, &cor), x);
+ retval = __copysign (do_sin (x, 0), x);
} /* else if (k < 0x3feb6000) */
/*----------------------- 0.855469 <|x|<2.426265 ----------------------*/
@@ -404,7 +397,7 @@ __sin (double x)
{
t = hp0 - fabs (x);
/* Max ULP is 0.51. */
- retval = __copysign (do_cos (t, hp1, &cor), x);
+ retval = __copysign (do_cos (t, hp1), x);
} /* else if (k < 0x400368fd) */
#ifndef IN_SINCOS
@@ -417,8 +410,10 @@ __sin (double x)
/* --------------------105414350 <|x| <2^1024------------------------------*/
else if (k < 0x7ff00000)
- retval = reduce_and_compute (x, false);
-
+ {
+ n = __branred (x, &a, &da);
+ retval = do_sincos (a, da, n);
+ }
/*--------------------- |x| > 2^1024 ----------------------------------*/
else
{
@@ -445,7 +440,7 @@ SECTION
#endif
__cos (double x)
{
- double y, xx, cor, a, da;
+ double y, xx, a, da;
mynumber u;
#ifndef IN_SINCOS
int4 k, m, n;
@@ -470,7 +465,7 @@ __cos (double x)
else if (k < 0x3feb6000)
{ /* 2^-27 < |x| < 0.855469 */
/* Max ULP is 0.51. */
- retval = do_cos (x, 0, &cor);
+ retval = do_cos (x, 0);
} /* else if (k < 0x3feb6000) */
else if (k < 0x400368fd)
@@ -482,9 +477,9 @@ __cos (double x)
/* Max ULP is 0.501 if xx < 0.01588 or 0.518 otherwise.
Range reduction uses 106 bits here which is sufficient. */
if (xx < 0.01588)
- retval = TAYLOR_SIN (xx, a, da, cor);
+ retval = TAYLOR_SIN (xx, a, da);
else
- retval = __copysign (do_sin (a, da, &cor), a);
+ retval = __copysign (do_sin (a, da), a);
} /* else if (k < 0x400368fd) */
@@ -497,7 +492,10 @@ __cos (double x)
/* 105414350 <|x| <2^1024 */
else if (k < 0x7ff00000)
- retval = reduce_and_compute (x, true);
+ {
+ n = __branred (x, &a, &da);
+ retval = do_sincos (a, da, n + 1);
+ }
else
{
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index 4f032d2..4335ecb 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -28,37 +28,6 @@
#define IN_SINCOS 1
#include "s_sin.c"
-/* Consolidated version of reduce_and_compute in s_sin.c that does range
- reduction only once and computes sin and cos together. */
-static inline void
-__always_inline
-reduce_and_compute_sincos (double x, double *sinx, double *cosx)
-{
- double a, da;
- unsigned int n = __branred (x, &a, &da);
-
- n = n & 3;
-
- if (n == 1 || n == 2)
- {
- a = -a;
- da = -da;
- }
-
- if (n & 1)
- {
- double *temp = cosx;
- cosx = sinx;
- sinx = temp;
- }
-
- if (a * a < 0.01588)
- *sinx = bsloww (a, da, x, n);
- else
- *sinx = bsloww1 (a, da, x, n);
- *cosx = bsloww2 (a, da, x, n);
-}
-
void
__sincos (double x, double *sinx, double *cosx)
{
@@ -88,8 +57,11 @@ __sincos (double x, double *sinx, double *cosx)
}
if (k < 0x7ff00000)
{
- reduce_and_compute_sincos (x, sinx, cosx);
- return;
+ double a, da;
+ int4 n = __branred (x, &a, &da);
+
+ *sinx = do_sincos (a, da, n);
+ *cosx = do_sincos (a, da, n + 1);
}
if (isinf (x))
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d9469deb14ba6f55bd8af1652951ab306a8f63bd
commit d9469deb14ba6f55bd8af1652951ab306a8f63bd
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date: Tue Apr 3 16:33:13 2018 +0100
[PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
This patch improves the accuracy of the range reduction. When the input is
large (2^27) and very close to a multiple of PI/2, using 110 bits of PI is not
enough. Improve range reduction accuracy to 136 bits. As a result the special
checks for results close to zero can be removed. The ULP of the polynomials is
at worst 0.55ULP, so there is no reason for the slow functions, and they can be
removed.
* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
reduce_sincos, improve accuracy to 136 bits.
(do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
(__sin): Use improved reduction and simplified do_sincos calculation.
(__cos): Likewise.
* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.
diff --git a/ChangeLog b/ChangeLog
index a0b5228..a8d1ecb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+ * sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
+ reduce_sincos, improve accuracy to 136 bits.
+ (do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
+ (__sin): Use improved reduction and simplified do_sincos calculation.
+ (__cos): Likewise.
+ * sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.
+
+2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+
* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_2): Remove function.
(do_sincos_2): Likewise.
(__sin): Remove middle range reduction case.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index c86fb9f..b8c366a 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -295,9 +295,13 @@ reduce_and_compute (double x, bool shift_quadrant)
return retval;
}
+/* Reduce range of x to within PI/2 with abs (x) < 105414350. The high part
+ is written to *a, the low part to *da. Range reduction is accurate to 136
+ bits so that when x is large and *a very close to zero, all 53 bits of *a
+ are correct. */
static inline int4
__always_inline
-reduce_sincos_1 (double x, double *a, double *da)
+reduce_sincos (double x, double *a, double *da)
{
mynumber v;
@@ -306,62 +310,45 @@ reduce_sincos_1 (double x, double *a, double *da)
v.x = t;
double y = (x - xn * mp1) - xn * mp2;
int4 n = v.i[LOW_HALF] & 3;
- double db = xn * mp3;
- double b = y - db;
- db = (y - b) - db;
+
+ double b, db, t1, t2;
+ t1 = xn * pp3;
+ t2 = y - t1;
+ db = (y - t2) - t1;
+
+ t1 = xn * pp4;
+ b = t2 - t1;
+ db += (t2 - b) - t1;
*a = b;
*da = db;
-
return n;
}
-/* Compute sin (A + DA). cos can be computed by passing SHIFT_QUADRANT as
- true, which results in shifting the quadrant N clockwise. */
+/* Compute sin or cos (A + DA) for the given quadrant N. */
static double
__always_inline
-do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant)
+do_sincos (double a, double da, int4 n)
{
- double xx, retval, res, cor;
- double eps = fabs (x) * 1.2e-30;
+ double retval, cor;
- int k1 = (n + shift_quadrant) & 3;
- switch (k1)
- { /* quarter of unit circle */
- case 2:
- a = -a;
- da = -da;
- /* Fall through. */
- case 0:
- xx = a * a;
+ if (n & 1)
+ /* Max ULP is 0.513. */
+ retval = do_cos (a, da, &cor);
+ else
+ {
+ double xx = a * a;
+ /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518. */
if (xx < 0.01588)
- {
- /* Taylor series. */
- res = TAYLOR_SIN (xx, a, da, cor);
- cor = 1.02 * cor + __copysign (eps, cor);
- retval = (res == res + cor) ? res : sloww (a, da, x, shift_quadrant);
- }
+ retval = TAYLOR_SIN (xx, a, da, cor);
else
- {
- res = do_sin (a, da, &cor);
- cor = 1.035 * cor + __copysign (eps, cor);
- retval = ((res == res + cor) ? __copysign (res, a)
- : sloww1 (a, da, x, shift_quadrant));
- }
- break;
-
- case 1:
- case 3:
- res = do_cos (a, da, &cor);
- cor = 1.025 * cor + __copysign (eps, cor);
- retval = ((res == res + cor) ? ((n & 2) ? -res : res)
- : sloww2 (a, da, x, n));
- break;
+ retval = __copysign (do_sin (a, da, &cor), a);
}
- return retval;
+ return (n & 2) ? -retval : retval;
}
+
/*******************************************************************/
/* An ultimate sin routine. Given an IEEE double machine number x */
/* it computes the correctly rounded (to nearest) value of sin(x) */
@@ -374,13 +361,18 @@ SECTION
#endif
__sin (double x)
{
- double xx, t, cor;
+#ifndef IN_SINCOS
+ double xx, t, a, da, cor;
mynumber u;
- int4 k, m;
+ int4 k, m, n;
double retval = 0;
-#ifndef IN_SINCOS
SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
+#else
+ double xx, t, cor;
+ mynumber u;
+ int4 k, m;
+ double retval = 0;
#endif
u.x = x;
@@ -419,9 +411,8 @@ __sin (double x)
/*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
else if (k < 0x419921FB)
{
- double a, da;
- int4 n = reduce_sincos_1 (x, &a, &da);
- retval = do_sincos_1 (a, da, x, n, false);
+ n = reduce_sincos (x, &a, &da);
+ retval = do_sincos (a, da, n);
} /* else if (k < 0x419921FB ) */
/* --------------------105414350 <|x| <2^1024------------------------------*/
@@ -456,7 +447,11 @@ __cos (double x)
{
double y, xx, cor, a, da;
mynumber u;
+#ifndef IN_SINCOS
+ int4 k, m, n;
+#else
int4 k, m;
+#endif
double retval = 0;
@@ -496,9 +491,8 @@ __cos (double x)
#ifndef IN_SINCOS
else if (k < 0x419921FB)
{ /* 2.426265<|x|< 105414350 */
- double a, da;
- int4 n = reduce_sincos_1 (x, &a, &da);
- retval = do_sincos_1 (a, da, x, n, true);
+ n = reduce_sincos (x, &a, &da);
+ retval = do_sincos (a, da, n + 1);
} /* else if (k < 0x419921FB ) */
/* 105414350 <|x| <2^1024 */
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index a9af8ce..4f032d2 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -79,10 +79,10 @@ __sincos (double x, double *sinx, double *cosx)
if (k < 0x419921FB)
{
double a, da;
- int4 n = reduce_sincos_1 (x, &a, &da);
+ int4 n = reduce_sincos (x, &a, &da);
- *sinx = do_sincos_1 (a, da, x, n, false);
- *cosx = do_sincos_1 (a, da, x, n, true);
+ *sinx = do_sincos (a, da, n);
+ *cosx = do_sincos (a, da, n + 1);
return;
}
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=7a5640f23a797d3be38a78fb899903699c3aa5a0
commit 7a5640f23a797d3be38a78fb899903699c3aa5a0
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date: Tue Apr 3 16:28:03 2018 +0100
[PATCH 2/7] sin/cos slow paths: remove large range reduction
This patch removes the large range reduction code and defers to the huge range
reduction code. The first level range reducer supports inputs up to 2^27,
which is way too large given that inputs for sin/cos are typically small
(< 10), and optimizing for a smaller range would give a significant speedup.
Input values above 2^27 are practically never used, so there is no reason for
supporting range reduction between 2^27 and 2^48. Removing it significantly
simplifies code and enables further speedups. There is about a 2.3x slowdown
in this range due to __branred being extremely slow (a better algorithm could
easily more than double performance).
* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_2): Remove function.
(do_sincos_2): Likewise.
(__sin): Remove middle range reduction case.
(__cos): Likewise.
* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Remove middle range
reduction case.
diff --git a/ChangeLog b/ChangeLog
index 77e3477..a0b5228 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+ * sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_2): Remove function.
+ (do_sincos_2): Likewise.
+ (__sin): Remove middle range reduction case.
+ (__cos): Likewise.
+ * sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Remove middle range
+ reduction case.
+
+2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+
* sysdeps/aarch64/libm-test-ulps: Update ULP for sin, cos, sincos.
* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Remove slow paths for small
inputs.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 0c16b72..c86fb9f 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -362,80 +362,6 @@ do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant)
return retval;
}
-static inline int4
-__always_inline
-reduce_sincos_2 (double x, double *a, double *da)
-{
- mynumber v;
-
- double t = (x * hpinv + toint);
- double xn = t - toint;
- v.x = t;
- double xn1 = (xn + 8.0e22) - 8.0e22;
- double xn2 = xn - xn1;
- double y = ((((x - xn1 * mp1) - xn1 * mp2) - xn2 * mp1) - xn2 * mp2);
- int4 n = v.i[LOW_HALF] & 3;
- double db = xn1 * pp3;
- t = y - db;
- db = (y - t) - db;
- db = (db - xn2 * pp3) - xn * pp4;
- double b = t + db;
- db = (t - b) + db;
-
- *a = b;
- *da = db;
-
- return n;
-}
-
-/* Compute sin (A + DA). cos can be computed by passing SHIFT_QUADRANT as
- true, which results in shifting the quadrant N clockwise. */
-static double
-__always_inline
-do_sincos_2 (double a, double da, double x, int4 n, bool shift_quadrant)
-{
- double res, retval, cor, xx;
-
- double eps = 1.0e-24;
-
- int4 k = (n + shift_quadrant) & 3;
-
- switch (k)
- {
- case 2:
- a = -a;
- da = -da;
- /* Fall through. */
- case 0:
- xx = a * a;
- if (xx < 0.01588)
- {
- /* Taylor series. */
- res = TAYLOR_SIN (xx, a, da, cor);
- cor = 1.02 * cor + __copysign (eps, cor);
- retval = (res == res + cor) ? res : bsloww (a, da, x, n);
- }
- else
- {
- res = do_sin (a, da, &cor);
- cor = 1.035 * cor + __copysign (eps, cor);
- retval = ((res == res + cor) ? __copysign (res, a)
- : bsloww1 (a, da, x, n));
- }
- break;
-
- case 1:
- case 3:
- res = do_cos (a, da, &cor);
- cor = 1.025 * cor + __copysign (eps, cor);
- retval = ((res == res + cor) ? ((n & 2) ? -res : res)
- : bsloww2 (a, da, x, n));
- break;
- }
-
- return retval;
-}
-
/*******************************************************************/
/* An ultimate sin routine. Given an IEEE double machine number x */
/* it computes the correctly rounded (to nearest) value of sin(x) */
@@ -498,16 +424,7 @@ __sin (double x)
retval = do_sincos_1 (a, da, x, n, false);
} /* else if (k < 0x419921FB ) */
-/*---------------------105414350 <|x|< 281474976710656 --------------------*/
- else if (k < 0x42F00000)
- {
- double a, da;
-
- int4 n = reduce_sincos_2 (x, &a, &da);
- retval = do_sincos_2 (a, da, x, n, false);
- } /* else if (k < 0x42F00000 ) */
-
-/* -----------------281474976710656 <|x| <2^1024----------------------------*/
+/* --------------------105414350 <|x| <2^1024------------------------------*/
else if (k < 0x7ff00000)
retval = reduce_and_compute (x, false);
@@ -584,15 +501,7 @@ __cos (double x)
retval = do_sincos_1 (a, da, x, n, true);
} /* else if (k < 0x419921FB ) */
- else if (k < 0x42F00000)
- {
- double a, da;
-
- int4 n = reduce_sincos_2 (x, &a, &da);
- retval = do_sincos_2 (a, da, x, n, true);
- } /* else if (k < 0x42F00000 ) */
-
- /* 281474976710656 <|x| <2^1024 */
+ /* 105414350 <|x| <2^1024 */
else if (k < 0x7ff00000)
retval = reduce_and_compute (x, true);
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index e1977ea..a9af8ce 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -86,16 +86,6 @@ __sincos (double x, double *sinx, double *cosx)
return;
}
- if (k < 0x42F00000)
- {
- double a, da;
- int4 n = reduce_sincos_2 (x, &a, &da);
-
- *sinx = do_sincos_2 (a, da, x, n, false);
- *cosx = do_sincos_2 (a, da, x, n, true);
-
- return;
- }
if (k < 0x7ff00000)
{
reduce_and_compute_sincos (x, sinx, cosx);
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=19a8b9a300f2f1f0012aff0f2b70b09430f50d9e
commit 19a8b9a300f2f1f0012aff0f2b70b09430f50d9e
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date: Tue Apr 3 16:24:29 2018 +0100
[PATCH 1/7] sin/cos slow paths: avoid slow paths for small inputs
This series of patches removes the slow patchs from sin, cos and sincos.
Besides greatly simplifying the implementation, the new version is also much
faster for inputs up to PI (41% faster) and for large inputs needing range
reduction (27% faster).
ULP is ~0.55 with no errors found after testing 1.6 billion inputs across most
of the range with mpsin and mpcos. The number of incorrectly rounded results
(ie. ULP >0.5) is at most ~2750 per million inputs between 0.125 and 0.5,
the average is ~850 per million between 0 and PI.
Tested on AArch64 and x86_64 with no regressions.
The first patch removes the slow paths for the cases where the input is small
and doesn't require range reduction. Update ULP tables for sin, cos and sincos
on AArch64 and x86_64.
* sysdeps/aarch64/libm-test-ulps: Update ULP for sin, cos, sincos.
* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Remove slow paths for small
inputs.
(__cos): Likewise.
* sysdeps/x86_64/fpu/libm-test-ulps: Update ULP for sin, cos, sincos.
diff --git a/ChangeLog b/ChangeLog
index a9da4d4..77e3477 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,15 @@
+2018-04-03 Wilco Dijkstra <wdijkstr@arm.com>
+
+ * sysdeps/aarch64/libm-test-ulps: Update ULP for sin, cos, sincos.
+ * sysdeps/ieee754/dbl-64/s_sin.c (__sin): Remove slow paths for small
+ inputs.
+ (__cos): Likewise.
+ * sysdeps/x86_64/fpu/libm-test-ulps: Update ULP for sin, cos, sincos.
+
2018-04-03 Joseph Myers <joseph@codesourcery.com>
* scripts/build-many-glibcs.py (Context.checkout): Default Linux
- version to 4.16.
+ version to 4.16
2018-04-03 Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 1f46980..be06085 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1012,7 +1012,9 @@ ildouble: 2
ldouble: 2
Function: "cos":
+double: 1
float: 1
+idouble: 1
ifloat: 1
ildouble: 1
ldouble: 1
@@ -1970,7 +1972,9 @@ ildouble: 2
ldouble: 2
Function: "sin":
+double: 1
float: 1
+idouble: 1
ifloat: 1
ildouble: 1
ldouble: 1
@@ -2000,7 +2004,9 @@ ildouble: 3
ldouble: 3
Function: "sincos":
+double: 1
float: 1
+idouble: 1
ifloat: 1
ildouble: 1
ldouble: 1
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 8c589cb..0c16b72 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -448,7 +448,7 @@ SECTION
#endif
__sin (double x)
{
- double xx, res, t, cor;
+ double xx, t, cor;
mynumber u;
int4 k, m;
double retval = 0;
@@ -471,26 +471,22 @@ __sin (double x)
xx = x * x;
/* Taylor series. */
t = POLYNOMIAL (xx) * (xx * x);
- res = x + t;
- cor = (x - res) + t;
- retval = (res == res + 1.07 * cor) ? res : slow (x);
+ /* Max ULP of x + t is 0.535. */
+ retval = x + t;
} /* else if (k < 0x3fd00000) */
/*---------------------------- 0.25<|x|< 0.855469---------------------- */
else if (k < 0x3feb6000)
{
- res = do_sin (x, 0, &cor);
- retval = (res == res + 1.096 * cor) ? res : slow1 (x);
- retval = __copysign (retval, x);
+ /* Max ULP is 0.548. */
+ retval = __copysign (do_sin (x, 0, &cor), x);
} /* else if (k < 0x3feb6000) */
/*----------------------- 0.855469 <|x|<2.426265 ----------------------*/
else if (k < 0x400368fd)
{
-
t = hp0 - fabs (x);
- res = do_cos (t, hp1, &cor);
- retval = (res == res + 1.020 * cor) ? res : slow2 (x);
- retval = __copysign (retval, x);
+ /* Max ULP is 0.51. */
+ retval = __copysign (do_cos (t, hp1, &cor), x);
} /* else if (k < 0x400368fd) */
#ifndef IN_SINCOS
@@ -541,7 +537,7 @@ SECTION
#endif
__cos (double x)
{
- double y, xx, res, cor, a, da;
+ double y, xx, cor, a, da;
mynumber u;
int4 k, m;
@@ -561,8 +557,8 @@ __cos (double x)
else if (k < 0x3feb6000)
{ /* 2^-27 < |x| < 0.855469 */
- res = do_cos (x, 0, &cor);
- retval = (res == res + 1.020 * cor) ? res : cslow2 (x);
+ /* Max ULP is 0.51. */
+ retval = do_cos (x, 0, &cor);
} /* else if (k < 0x3feb6000) */
else if (k < 0x400368fd)
@@ -571,20 +567,12 @@ __cos (double x)
a = y + hp1;
da = (y - a) + hp1;
xx = a * a;
+ /* Max ULP is 0.501 if xx < 0.01588 or 0.518 otherwise.
+ Range reduction uses 106 bits here which is sufficient. */
if (xx < 0.01588)
- {
- res = TAYLOR_SIN (xx, a, da, cor);
- cor = 1.02 * cor + __copysign (1.0e-31, cor);
- retval = (res == res + cor) ? res : sloww (a, da, x, true);
- }
+ retval = TAYLOR_SIN (xx, a, da, cor);
else
- {
- res = do_sin (a, da, &cor);
- cor = 1.035 * cor + __copysign (1.0e-31, cor);
- retval = ((res == res + cor) ? __copysign (res, a)
- : sloww1 (a, da, x, true));
- }
-
+ retval = __copysign (do_sin (a, da, &cor), a);
} /* else if (k < 0x400368fd) */
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 48e53f7..bbb8a4d 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1262,7 +1262,9 @@ ildouble: 1
ldouble: 1
Function: "cos":
+double: 1
float128: 1
+idouble: 1
ifloat128: 1
ildouble: 1
ldouble: 1
@@ -2528,7 +2530,9 @@ Function: "pow_vlen8_avx2":
float: 3
Function: "sin":
+double: 1
float128: 1
+idouble: 1
ifloat128: 1
ildouble: 1
ldouble: 1
@@ -2578,7 +2582,9 @@ Function: "sin_vlen8_avx2":
float: 1
Function: "sincos":
+double: 1
float128: 1
+idouble: 1
ifloat128: 1
ildouble: 1
ldouble: 1
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 72 ++++-
sysdeps/aarch64/libm-test-ulps | 6 +
sysdeps/ieee754/dbl-64/s_sin.c | 733 ++++---------------------------------
sysdeps/ieee754/dbl-64/s_sincos.c | 108 +++---
sysdeps/x86_64/fpu/libm-test-ulps | 6 +
5 files changed, 205 insertions(+), 720 deletions(-)
hooks/post-receive
--
GNU C Library master sources