This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.27.9000-280-ge88ecbb


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  e88ecbbfe836db5b6da809108759de4ca56be5e7 (commit)
       via  aef3e2558a0ab0aff6d80f3e99ebe228321ab4b3 (commit)
       via  72f6e9a3e34e2be76fd9a18ea1a427e7a713465e (commit)
       via  649095838b85ae71f778338c210b4c1e519e1d16 (commit)
       via  d9469deb14ba6f55bd8af1652951ab306a8f63bd (commit)
       via  7a5640f23a797d3be38a78fb899903699c3aa5a0 (commit)
       via  19a8b9a300f2f1f0012aff0f2b70b09430f50d9e (commit)
      from  f72aa11d7e3008d608e1092abade16101fed8f35 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=e88ecbbfe836db5b6da809108759de4ca56be5e7

commit e88ecbbfe836db5b6da809108759de4ca56be5e7
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:49:33 2018 +0100

    [PATCH 7/7] sin/cos slow paths: refactor sincos implementation
    
    Refactor the sincos implementation - rather than rely on odd partial inlining
    of preprocessed portions from sin and cos, explicitly write out the cases.
    This makes sincos much easier to maintain and provides an additional 16-20%
    speedup between 0 and 2^27.  The overall speedup of sincos is 48% over this range.
    Between 0 and PI it is 66% faster.
    
    	* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Cleanup ifdefs.
    	(__cos): Likewise.
    	* sysdeps/ieee754/dbl-64/s_sin.c (__sincos): Refactor using the same
    	logic as sin and cos.

diff --git a/ChangeLog b/ChangeLog
index 2e5b570..36b022c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Cleanup ifdefs.
+	(__cos): Likewise.
+	* sysdeps/ieee754/dbl-64/s_sin.c (__sincos): Refactor using the same
+	logic as sin and cos.
+
+2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* sysdeps/ieee754/dbl-64/s_sin.c (do_sin): Use TAYLOR_SIN for small
 	inputs.  Return correct sign.
 	(do_sincos): Remove small input check before do_sin, let do_sin set
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 6e261f2..ba1dbe2 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -197,27 +197,17 @@ do_sincos (double a, double da, int4 n)
 /* An ultimate sin routine. Given an IEEE double machine number x   */
 /* it computes the correctly rounded (to nearest) value of sin(x)  */
 /*******************************************************************/
-#ifdef IN_SINCOS
-static double
-#else
+#ifndef IN_SINCOS
 double
 SECTION
-#endif
 __sin (double x)
 {
-#ifndef IN_SINCOS
   double t, a, da;
   mynumber u;
   int4 k, m, n;
   double retval = 0;
 
   SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
-#else
-  double xx, t, cor;
-  mynumber u;
-  int4 k, m;
-  double retval = 0;
-#endif
 
   u.x = x;
   m = u.i[HIGH_HALF];
@@ -242,7 +232,6 @@ __sin (double x)
       retval = __copysign (do_cos (t, hp1), x);
     }				/*   else  if (k < 0x400368fd)    */
 
-#ifndef IN_SINCOS
 /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
   else if (k < 0x419921FB)
     {
@@ -263,7 +252,6 @@ __sin (double x)
 	__set_errno (EDOM);
       retval = x / x;
     }
-#endif
 
   return retval;
 }
@@ -274,27 +262,17 @@ __sin (double x)
 /* it computes the correctly rounded (to nearest) value of cos(x)  */
 /*******************************************************************/
 
-#ifdef IN_SINCOS
-static double
-#else
 double
 SECTION
-#endif
 __cos (double x)
 {
   double y, a, da;
   mynumber u;
-#ifndef IN_SINCOS
   int4 k, m, n;
-#else
-  int4 k, m;
-#endif
 
   double retval = 0;
 
-#ifndef IN_SINCOS
   SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
-#endif
 
   u.x = x;
   m = u.i[HIGH_HALF];
@@ -320,8 +298,6 @@ __cos (double x)
       retval = do_sin (a, da);
     }				/*   else  if (k < 0x400368fd)    */
 
-
-#ifndef IN_SINCOS
   else if (k < 0x419921FB)
     {				/* 2.426265<|x|< 105414350 */
       n = reduce_sincos (x, &a, &da);
@@ -341,7 +317,6 @@ __cos (double x)
 	__set_errno (EDOM);
       retval = x / x;		/* |x| > 2^1024 */
     }
-#endif
 
   return retval;
 }
@@ -352,3 +327,5 @@ libm_alias_double (__cos, cos)
 #ifndef __sin
 libm_alias_double (__sin, sin)
 #endif
+
+#endif
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index 4335ecb..c746037 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -23,9 +23,7 @@
 #include <math_private.h>
 #include <libm-alias-double.h>
 
-#define __sin __sin_local
-#define __cos __cos_local
-#define IN_SINCOS 1
+#define IN_SINCOS
 #include "s_sin.c"
 
 void
@@ -37,31 +35,63 @@ __sincos (double x, double *sinx, double *cosx)
   SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
 
   u.x = x;
-  k = 0x7fffffff & u.i[HIGH_HALF];
+  k = u.i[HIGH_HALF] & 0x7fffffff;
 
   if (k < 0x400368fd)
     {
-      *sinx = __sin_local (x);
-      *cosx = __cos_local (x);
-      return;
-    }
-  if (k < 0x419921FB)
-    {
-      double a, da;
-      int4 n = reduce_sincos (x, &a, &da);
-
-      *sinx = do_sincos (a, da, n);
-      *cosx = do_sincos (a, da, n + 1);
+      double a, da, y;
+      /* |x| < 2^-27 => cos (x) = 1, sin (x) = x.  */
+      if (k < 0x3e400000)
+	{
+	  if (k < 0x3e500000)
+	    math_check_force_underflow (x);
+	  *sinx = x;
+	  *cosx = 1.0;
+	  return;
+	}
+      /* |x| < 0.855469.  */
+      else if (k < 0x3feb6000)
+	{
+	  *sinx = do_sin (x, 0);
+	  *cosx = do_cos (x, 0);
+	  return;
+	}
 
+      /* |x| < 2.426265.  */
+      y = hp0 - fabs (x);
+      a = y + hp1;
+      da = (y - a) + hp1;
+      *sinx = __copysign (do_cos (a, da), x);
+      *cosx = do_sin (a, da);
       return;
     }
+  /* |x| < 2^1024.  */
   if (k < 0x7ff00000)
     {
-      double a, da;
-      int4 n = __branred (x, &a, &da);
+      double a, da, xx;
+      unsigned int n;
 
-      *sinx = do_sincos (a, da, n);
-      *cosx = do_sincos (a, da, n + 1);
+      /* If |x| < 105414350 use simple range reduction.  */
+      n = k < 0x419921FB ? reduce_sincos (x, &a, &da) : __branred (x, &a, &da);
+      n = n & 3;
+
+      if (n == 1 || n == 2)
+	{
+	  a = -a;
+	  da = -da;
+	}
+
+      if (n & 1)
+	{
+	  double *temp = cosx;
+	  cosx = sinx;
+	  sinx = temp;
+	}
+
+      *sinx = do_sin (a, da);
+      xx = do_cos (a, da);
+      *cosx = (n & 2) ? -xx : xx;
+      return;
     }
 
   if (isinf (x))

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=aef3e2558a0ab0aff6d80f3e99ebe228321ab4b3

commit aef3e2558a0ab0aff6d80f3e99ebe228321ab4b3
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:46:10 2018 +0100

    [PATCH 6/7] sin/cos slow paths: refactor duplicated code into dosin
    
    Refactor duplicated code into do_sin.  Since all calls to do_sin use copysign to
    set the sign of the result, move it inside do_sin.  Small inputs use a separate
    polynomial, so move this into do_sin as well (the check is based on the more
    conservative case when doing large range reduction, but could be relaxed).
    
    	* sysdeps/ieee754/dbl-64/s_sin.c (do_sin): Use TAYLOR_SIN for small
    	inputs.  Return correct sign.
    	(do_sincos): Remove small input check before do_sin, let do_sin set
    	the sign.
    	(__sin): Likewise.
    	(__cos): Likewise.

diff --git a/ChangeLog b/ChangeLog
index 1b8b660..2e5b570 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
 2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* sysdeps/ieee754/dbl-64/s_sin.c (do_sin): Use TAYLOR_SIN for small
+	inputs.  Return correct sign.
+	(do_sincos): Remove small input check before do_sin, let do_sin set
+	the sign.
+	(__sin): Likewise.
+	(__cos): Likewise.
+
+2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SLOW): Remove.
 	(do_cos_slow): Likewise.
 	(do_sin_slow): Likewise.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index fcb2e6b..6e261f2 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -124,6 +124,11 @@ static inline double
 __always_inline
 do_sin (double x, double dx)
 {
+  double xold = x;
+  /* Max ULP is 0.501 if |x| < 0.126, otherwise ULP is 0.518.  */
+  if (fabs (x) < 0.126)
+    return TAYLOR_SIN (x * x, x, dx);
+
   mynumber u;
 
   if (x <= 0)
@@ -137,7 +142,7 @@ do_sin (double x, double dx)
   c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
   SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
   cor = (ssn + s * ccs - sn * c) + cs * s;
-  return sn + cor;
+  return __copysign (sn + cor, xold);
 }
 
 /* Reduce range of x to within PI/2 with abs (x) < 105414350.  The high part
@@ -181,14 +186,8 @@ do_sincos (double a, double da, int4 n)
     /* Max ULP is 0.513.  */
     retval = do_cos (a, da);
   else
-    {
-      double xx = a * a;
-      /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518.  */
-      if (xx < 0.01588)
-	retval = TAYLOR_SIN (xx, a, da);
-      else
-	retval = __copysign (do_sin (a, da), a);
-    }
+    /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518.  */
+    retval = do_sin (a, da);
 
   return (n & 2) ? -retval : retval;
 }
@@ -207,7 +206,7 @@ SECTION
 __sin (double x)
 {
 #ifndef IN_SINCOS
-  double xx, t, a, da;
+  double t, a, da;
   mynumber u;
   int4 k, m, n;
   double retval = 0;
@@ -228,20 +227,11 @@ __sin (double x)
       math_check_force_underflow (x);
       retval = x;
     }
- /*---------------------------- 2^-26 < |x|< 0.25 ----------------------*/
-  else if (k < 0x3fd00000)
-    {
-      xx = x * x;
-      /* Taylor series.  */
-      t = POLYNOMIAL (xx) * (xx * x);
-      /* Max ULP of x + t is 0.535.  */
-      retval = x + t;
-    }				/*  else  if (k < 0x3fd00000)    */
-/*---------------------------- 0.25<|x|< 0.855469---------------------- */
+/*--------------------------- 2^-26<|x|< 0.855469---------------------- */
   else if (k < 0x3feb6000)
     {
       /* Max ULP is 0.548.  */
-      retval = __copysign (do_sin (x, 0), x);
+      retval = do_sin (x, 0);
     }				/*   else  if (k < 0x3feb6000)    */
 
 /*----------------------- 0.855469  <|x|<2.426265  ----------------------*/
@@ -292,7 +282,7 @@ SECTION
 #endif
 __cos (double x)
 {
-  double y, xx, a, da;
+  double y, a, da;
   mynumber u;
 #ifndef IN_SINCOS
   int4 k, m, n;
@@ -325,13 +315,9 @@ __cos (double x)
       y = hp0 - fabs (x);
       a = y + hp1;
       da = (y - a) + hp1;
-      xx = a * a;
       /* Max ULP is 0.501 if xx < 0.01588 or 0.518 otherwise.
 	 Range reduction uses 106 bits here which is sufficient.  */
-      if (xx < 0.01588)
-	retval = TAYLOR_SIN (xx, a, da);
-      else
-	retval = __copysign (do_sin (a, da), a);
+      retval = do_sin (a, da);
     }				/*   else  if (k < 0x400368fd)    */
 
 

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=72f6e9a3e34e2be76fd9a18ea1a427e7a713465e

commit 72f6e9a3e34e2be76fd9a18ea1a427e7a713465e
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:43:34 2018 +0100

    [PATCH 5/7] sin/cos slow paths: remove unused slowpath functions
    
    Remove all unused slowpath functions.
    
    	* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SLOW): Remove.
    	(do_cos_slow): Likewise.
    	(do_sin_slow): Likewise.
    	(reduce_and_compute): Likewise.
    	(slow): Likewise.
    	(slow1): Likewise.
    	(slow2): Likewise.
    	(sloww): Likewise.
    	(sloww1): Likewise.
    	(sloww2): Likewise.
    	(bslow): Likewise.
    	(bslow1): Likewise.
    	(bslow2): Likewise.
    	(cslow2): Likewise.

diff --git a/ChangeLog b/ChangeLog
index a4defe7..1b8b660 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,22 @@
 2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SLOW): Remove.
+	(do_cos_slow): Likewise.
+	(do_sin_slow): Likewise.
+	(reduce_and_compute): Likewise.
+	(slow): Likewise.
+	(slow1): Likewise.
+	(slow2): Likewise.
+	(sloww): Likewise.
+	(sloww1): Likewise.
+	(sloww2): Likewise.
+	(bslow): Likewise.
+	(bslow1): Likewise.
+	(bslow2): Likewise.
+	(cslow2): Likewise.
+
+2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SIN): Remove cor parameter.
 	(do_cos): Remove corp parameter and calculations.
 	(do_sin): Likewise.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 7d0f375..fcb2e6b 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -22,22 +22,11 @@
 /*                                                                          */
 /* FUNCTIONS: usin                                                          */
 /*            ucos                                                          */
-/*            slow                                                          */
-/*            slow1                                                         */
-/*            slow2                                                         */
-/*            sloww                                                         */
-/*            sloww1                                                        */
-/*            sloww2                                                        */
-/*            bsloww                                                        */
-/*            bsloww1                                                       */
-/*            bsloww2                                                       */
-/*            cslow2                                                        */
 /* FILES NEEDED: dla.h endian.h mpa.h mydefs.h  usncs.h                     */
-/*               branred.c sincos32.c dosincos.c mpa.c                      */
-/*               sincos.tbl                                                 */
+/*		 branred.c sincos.tbl					    */
 /*                                                                          */
-/* An ultimate sin and  routine. Given an IEEE double machine number x       */
-/* it computes the correctly rounded (to nearest) value of sin(x) or cos(x) */
+/* An ultimate sin and cos routine. Given an IEEE double machine number x   */
+/* it computes sin(x) or cos(x) with ~0.55 ULP.				    */
 /* Assumption: Machine arithmetic operations are performed in               */
 /* round to nearest mode of IEEE 754 standard.                              */
 /*                                                                          */
@@ -74,29 +63,6 @@
   res;									      \
 })
 
-/* This is again a variation of the Taylor series expansion with the term
-   x^3/3! expanded into the following for better accuracy:
-
-   bb * x ^ 3 + 3 * aa * x * x1 * x2 + aa * x1 ^ 3 + aa * x2 ^ 3
-
-   The correction term is dx and bb + aa = -1/3!
-   */
-#define TAYLOR_SLOW(x0, dx, cor) \
-({									      \
-  static const double th2_36 = 206158430208.0;	/*    1.5*2**37   */	      \
-  double xx = (x0) * (x0);						      \
-  double x1 = ((x0) + th2_36) - th2_36;					      \
-  double y = aa * x1 * x1 * x1;						      \
-  double r = (x0) + y;							      \
-  double x2 = ((x0) - x1) + (dx);					      \
-  double t = (((POLYNOMIAL2 (xx) + bb) * xx + 3.0 * aa * x1 * x2)	      \
-	      * (x0)  + aa * x2 * x2 * x2 + (dx));			      \
-  t = (((x0) - r) + y) + t;						      \
-  double res = r + t;							      \
-  (cor) = (r - res) + t;						      \
-  res;									      \
-})
-
 #define SINCOS_TABLE_LOOKUP(u, sn, ssn, cs, ccs) \
 ({									      \
   int4 k = u.i[LOW_HALF] << 2;						      \
@@ -123,23 +89,7 @@ static const double
   cs4 = -4.16666666666664434524222570944589E-02,
   cs6 = 1.38888874007937613028114285595617E-03;
 
-static const double t22 = 0x1.8p22;
-
-void __dubsin (double x, double dx, double w[]);
-void __docos (double x, double dx, double w[]);
-double __mpsin (double x, double dx, bool reduce_range);
-double __mpcos (double x, double dx, bool reduce_range);
-static double slow (double x);
-static double slow1 (double x);
-static double slow2 (double x);
-static double sloww (double x, double dx, double orig, bool shift_quadrant);
-static double sloww1 (double x, double dx, double orig, bool shift_quadrant);
-static double sloww2 (double x, double dx, double orig, int n);
-static double bsloww (double x, double dx, double orig, int n);
-static double bsloww1 (double x, double dx, double orig, int n);
-static double bsloww2 (double x, double dx, double orig, int n);
 int __branred (double x, double *a, double *aa);
-static double cslow2 (double x);
 
 /* Given a number partitioned into X and DX, this function computes the cosine
    of the number by combining the sin and cos of X (as computed by a variation
@@ -166,40 +116,6 @@ do_cos (double x, double dx)
   return cs + cor;
 }
 
-/* A more precise variant of DO_COS.  EPS is the adjustment to the correction
-   COR.  */
-static inline double
-__always_inline
-do_cos_slow (double x, double dx, double eps, double *corp)
-{
-  mynumber u;
-
-  if (x <= 0)
-    dx = -dx;
-
-  u.x = big + fabs (x);
-  x = fabs (x) - (u.x - big);
-
-  double xx, y, x1, x2, e1, e2, res, cor;
-  double s, sn, ssn, c, cs, ccs;
-  xx = x * x;
-  s = x * xx * (sn3 + xx * sn5);
-  c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
-  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
-  x1 = (x + t22) - t22;
-  x2 = (x - x1) + dx;
-  e1 = (sn + t22) - t22;
-  e2 = (sn - e1) + ssn;
-  cor = (ccs - cs * c - e1 * x2 - e2 * x) - sn * s;
-  y = cs - e1 * x1;
-  cor = cor + ((cs - y) - e1 * x1);
-  res = y + cor;
-  cor = (y - res) + cor;
-  cor = 1.0005 * cor + __copysign (eps, cor);
-  *corp = cor;
-  return res;
-}
-
 /* Given a number partitioned into X and DX, this function computes the sine of
    the number by combining the sin and cos of X (as computed by a variation of
    the Taylor series) with the values looked up from the sin/cos table to get
@@ -224,70 +140,6 @@ do_sin (double x, double dx)
   return sn + cor;
 }
 
-/* A more precise variant of DO_SIN.  EPS is the adjustment to the correction
-   COR.  */
-static inline double
-__always_inline
-do_sin_slow (double x, double dx, double eps, double *corp)
-{
-  mynumber u;
-
-  if (x <= 0)
-    dx = -dx;
-  u.x = big + fabs (x);
-  x = fabs (x) - (u.x - big);
-
-  double xx, y, x1, x2, c1, c2, res, cor;
-  double s, sn, ssn, c, cs, ccs;
-  xx = x * x;
-  s = x * xx * (sn3 + xx * sn5);
-  c = xx * (cs2 + xx * (cs4 + xx * cs6));
-  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
-  x1 = (x + t22) - t22;
-  x2 = (x - x1) + dx;
-  c1 = (cs + t22) - t22;
-  c2 = (cs - c1) + ccs;
-  cor = (ssn + s * ccs + cs * s + c2 * x + c1 * x2 - sn * x * dx) - sn * c;
-  y = sn + c1 * x1;
-  cor = cor + ((sn - y) + c1 * x1);
-  res = y + cor;
-  cor = (y - res) + cor;
-  cor = 1.0005 * cor + __copysign (eps, cor);
-  *corp = cor;
-  return res;
-}
-
-/* Reduce range of X and compute sin of a + da. When SHIFT_QUADRANT is true,
-   the routine returns the cosine of a + da by rotating the quadrant once and
-   computing the sine of the result.  */
-static inline double
-__always_inline
-reduce_and_compute (double x, bool shift_quadrant)
-{
-  double retval = 0, a, da;
-  unsigned int n = __branred (x, &a, &da);
-  int4 k = (n + shift_quadrant) % 4;
-  switch (k)
-    {
-    case 2:
-      a = -a;
-      da = -da;
-      /* Fall through.  */
-    case 0:
-      if (a * a < 0.01588)
-	retval = bsloww (a, da, x, n);
-      else
-	retval = bsloww1 (a, da, x, n);
-      break;
-
-    case 1:
-    case 3:
-      retval = bsloww2 (a, da, x, n);
-      break;
-    }
-  return retval;
-}
-
 /* Reduce range of x to within PI/2 with abs (x) < 105414350.  The high part
    is written to *a, the low part to *da.  Range reduction is accurate to 136
    bits so that when x is large and *a very close to zero, all 53 bits of *a
@@ -508,299 +360,6 @@ __cos (double x)
   return retval;
 }
 
-/************************************************************************/
-/*  Routine compute sin(x) for  2^-26 < |x|< 0.25 by  Taylor with more   */
-/* precision  and if still doesn't accurate enough by mpsin   or dubsin */
-/************************************************************************/
-
-static inline double
-__always_inline
-slow (double x)
-{
-  double res, cor, w[2];
-  res = TAYLOR_SLOW (x, 0, cor);
-  if (res == res + 1.0007 * cor)
-    return res;
-
-  __dubsin (fabs (x), 0, w);
-  if (w[0] == w[0] + 1.000000001 * w[1])
-    return __copysign (w[0], x);
-
-  return __copysign (__mpsin (fabs (x), 0, false), x);
-}
-
-/*******************************************************************************/
-/* Routine compute sin(x) for 0.25<|x|< 0.855469 by __sincostab.tbl and Taylor */
-/* and if result still doesn't accurate enough by mpsin   or dubsin            */
-/*******************************************************************************/
-
-static inline double
-__always_inline
-slow1 (double x)
-{
-  double w[2], cor, res;
-
-  res = do_sin_slow (x, 0, 0, &cor);
-  if (res == res + cor)
-    return res;
-
-  __dubsin (fabs (x), 0, w);
-  if (w[0] == w[0] + 1.000000005 * w[1])
-    return w[0];
-
-  return __mpsin (fabs (x), 0, false);
-}
-
-/**************************************************************************/
-/*  Routine compute sin(x) for   0.855469  <|x|<2.426265  by  __sincostab.tbl  */
-/* and if result still doesn't accurate enough by mpsin   or dubsin       */
-/**************************************************************************/
-static inline double
-__always_inline
-slow2 (double x)
-{
-  double w[2], y, y1, y2, cor, res;
-
-  double t = hp0 - fabs (x);
-  res = do_cos_slow (t, hp1, 0, &cor);
-  if (res == res + cor)
-    return res;
-
-  y = fabs (x) - hp0;
-  y1 = y - hp1;
-  y2 = (y - y1) - hp1;
-  __docos (y1, y2, w);
-  if (w[0] == w[0] + 1.000000005 * w[1])
-    return w[0];
-
-  return __mpsin (fabs (x), 0, false);
-}
-
-/* Compute sin(x + dx) where X is small enough to use Taylor series around zero
-   and (x + dx) in the first or third quarter of the unit circle.  ORIG is the
-   original value of X for computing error of the result.  If the result is not
-   accurate enough, the routine calls mpsin or dubsin.  SHIFT_QUADRANT rotates
-   the unit circle by 1 to compute the cosine instead of sine.  */
-static inline double
-__always_inline
-sloww (double x, double dx, double orig, bool shift_quadrant)
-{
-  double y, t, res, cor, w[2], a, da, xn;
-  mynumber v;
-  int4 n;
-  res = TAYLOR_SLOW (x, dx, cor);
-
-  double eps = fabs (orig) * 3.1e-30;
-
-  cor = 1.0005 * cor + __copysign (eps, cor);
-
-  if (res == res + cor)
-    return res;
-
-  a = fabs (x);
-  da = (x > 0) ? dx : -dx;
-  __dubsin (a, da, w);
-  eps = fabs (orig) * 1.1e-30;
-  cor = 1.000000001 * w[1] + __copysign (eps, w[1]);
-
-  if (w[0] == w[0] + cor)
-    return __copysign (w[0], x);
-
-  t = (orig * hpinv + toint);
-  xn = t - toint;
-  v.x = t;
-  y = (orig - xn * mp1) - xn * mp2;
-  n = (v.i[LOW_HALF] + shift_quadrant) & 3;
-  da = xn * pp3;
-  t = y - da;
-  da = (y - t) - da;
-  y = xn * pp4;
-  a = t - y;
-  da = ((t - a) - y) + da;
-
-  if (n & 2)
-    {
-      a = -a;
-      da = -da;
-    }
-  x = fabs (a);
-  dx = (a > 0) ? da : -da;
-  __dubsin (x, dx, w);
-  eps = fabs (orig) * 1.1e-40;
-  cor = 1.000000001 * w[1] + __copysign (eps, w[1]);
-
-  if (w[0] == w[0] + cor)
-    return __copysign (w[0], a);
-
-  return shift_quadrant ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
-}
-
-/* Compute sin(x + dx) where X is in the first or third quarter of the unit
-   circle.  ORIG is the original value of X for computing error of the result.
-   If the result is not accurate enough, the routine calls mpsin or dubsin.
-   SHIFT_QUADRANT rotates the unit circle by 1 to compute the cosine instead of
-   sine.  */
-static inline double
-__always_inline
-sloww1 (double x, double dx, double orig, bool shift_quadrant)
-{
-  double w[2], cor, res;
-
-  res = do_sin_slow (x, dx, 3.1e-30 * fabs (orig), &cor);
-
-  if (res == res + cor)
-    return __copysign (res, x);
-
-  dx = (x > 0 ? dx : -dx);
-  __dubsin (fabs (x), dx, w);
-
-  double eps = 1.1e-30 * fabs (orig);
-  cor = 1.000000005 * w[1] + __copysign (eps, w[1]);
-
-  if (w[0] == w[0] + cor)
-    return __copysign (w[0], x);
-
-  return shift_quadrant ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
-}
-
-/***************************************************************************/
-/*  Routine compute sin(x+dx)   (Double-Length number) where x in second or */
-/*  fourth quarter of unit circle.Routine receive also  the  original value */
-/* and quarter(n= 1or 3)of x for computing error of result.And if result not*/
-/* accurate enough routine calls  mpsin1   or dubsin                       */
-/***************************************************************************/
-
-static inline double
-__always_inline
-sloww2 (double x, double dx, double orig, int n)
-{
-  double w[2], cor, res;
-
-  res = do_cos_slow (x, dx, 3.1e-30 * fabs (orig), &cor);
-
-  if (res == res + cor)
-    return (n & 2) ? -res : res;
-
-  dx = x > 0 ? dx : -dx;
-  __docos (fabs (x), dx, w);
-
-  double eps = 1.1e-30 * fabs (orig);
-  cor = 1.000000005 * w[1] + __copysign (eps, w[1]);
-
-  if (w[0] == w[0] + cor)
-    return (n & 2) ? -w[0] : w[0];
-
-  return (n & 1) ? __mpsin (orig, 0, true) : __mpcos (orig, 0, true);
-}
-
-/***************************************************************************/
-/*  Routine compute sin(x+dx) or cos(x+dx) (Double-Length number) where x   */
-/* is small enough to use Taylor series around zero and   (x+dx)            */
-/* in first or third quarter of unit circle.Routine receive also            */
-/* (right argument) the  original   value of x for computing error of      */
-/* result.And if result not accurate enough routine calls other routines    */
-/***************************************************************************/
-
-static inline double
-__always_inline
-bsloww (double x, double dx, double orig, int n)
-{
-  double res, cor, w[2], a, da;
-
-  res = TAYLOR_SLOW (x, dx, cor);
-  cor = 1.0005 * cor + __copysign (1.1e-24, cor);
-  if (res == res + cor)
-    return res;
-
-  a = fabs (x);
-  da = (x > 0) ? dx : -dx;
-  __dubsin (a, da, w);
-  cor = 1.000000001 * w[1] + __copysign (1.1e-24, w[1]);
-
-  if (w[0] == w[0] + cor)
-    return __copysign (w[0], x);
-
-  return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
-}
-
-/***************************************************************************/
-/*  Routine compute sin(x+dx)  or cos(x+dx) (Double-Length number) where x  */
-/* in first or third quarter of unit circle.Routine receive also            */
-/* (right argument) the original  value of x for computing error of result.*/
-/* And if result not  accurate enough routine calls  other routines         */
-/***************************************************************************/
-
-static inline double
-__always_inline
-bsloww1 (double x, double dx, double orig, int n)
-{
-  double w[2], cor, res;
-
-  res = do_sin_slow (x, dx, 1.1e-24, &cor);
-  if (res == res + cor)
-    return (x > 0) ? res : -res;
-
-  dx = (x > 0) ? dx : -dx;
-  __dubsin (fabs (x), dx, w);
-
-  cor = 1.000000005 * w[1] + __copysign (1.1e-24, w[1]);
-
-  if (w[0] == w[0] + cor)
-    return __copysign (w[0], x);
-
-  return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
-}
-
-/***************************************************************************/
-/*  Routine compute sin(x+dx)  or cos(x+dx) (Double-Length number) where x  */
-/* in second or fourth quarter of unit circle.Routine receive also  the     */
-/* original value and quarter(n= 1or 3)of x for computing error of result.  */
-/* And if result not accurate enough routine calls  other routines          */
-/***************************************************************************/
-
-static inline double
-__always_inline
-bsloww2 (double x, double dx, double orig, int n)
-{
-  double w[2], cor, res;
-
-  res = do_cos_slow (x, dx, 1.1e-24, &cor);
-  if (res == res + cor)
-    return (n & 2) ? -res : res;
-
-  dx = (x > 0) ? dx : -dx;
-  __docos (fabs (x), dx, w);
-
-  cor = 1.000000005 * w[1] + __copysign (1.1e-24, w[1]);
-
-  if (w[0] == w[0] + cor)
-    return (n & 2) ? -w[0] : w[0];
-
-  return (n & 1) ? __mpsin (orig, 0, true) : __mpcos (orig, 0, true);
-}
-
-/************************************************************************/
-/*  Routine compute cos(x) for  2^-27 < |x|< 0.25 by  Taylor with more   */
-/* precision  and if still doesn't accurate enough by mpcos   or docos  */
-/************************************************************************/
-
-static inline double
-__always_inline
-cslow2 (double x)
-{
-  double w[2], cor, res;
-
-  res = do_cos_slow (x, 0, 0, &cor);
-  if (res == res + cor)
-    return res;
-
-  __docos (fabs (x), 0, w);
-  if (w[0] == w[0] + 1.000000005 * w[1])
-    return w[0];
-
-  return __mpcos (x, 0, false);
-}
-
 #ifndef __cos
 libm_alias_double (__cos, cos)
 #endif

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=649095838b85ae71f778338c210b4c1e519e1d16

commit 649095838b85ae71f778338c210b4c1e519e1d16
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:41:36 2018 +0100

    [PATCH 4/7] sin/cos slow paths: remove slow paths from huge range reduction
    
    For huge inputs use the improved do_sincos function as well.  Now no cases use
    the correction factor returned by do_sin, do_cos and TAYLOR_SIN, so remove it.
    
    	* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SIN): Remove cor parameter.
    	(do_cos): Remove corp parameter and calculations.
    	(do_sin): Likewise.
    	(do_sincos): Remove cor variable.
    	(__sin): Use do_sincos for huge inputs.
    	(__cos): Likewise.
    	* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.
    	(reduce_and_compute_sincos): Remove unused function.

diff --git a/ChangeLog b/ChangeLog
index a8d1ecb..a4defe7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
 2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* sysdeps/ieee754/dbl-64/s_sin.c (TAYLOR_SIN): Remove cor parameter.
+	(do_cos): Remove corp parameter and calculations.
+	(do_sin): Likewise.
+	(do_sincos): Remove cor variable.
+	(__sin): Use do_sincos for huge inputs.
+	(__cos): Likewise.
+	* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.
+	(reduce_and_compute_sincos): Remove unused function.
+
+2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
 	reduce_sincos, improve accuracy to 136 bits.
 	(do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index b8c366a..7d0f375 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -66,12 +66,11 @@
    a - a^3/3! + a^5/5! - a^7/7! + a^9/9! + (1 - a^2) * da / 2
 
    The constants s1, s2, s3, etc. are pre-computed values of 1/3!, 1/5! and so
-   on.  The result is returned to LHS and correction in COR.  */
-#define TAYLOR_SIN(xx, a, da, cor) \
+   on.  The result is returned to LHS.  */
+#define TAYLOR_SIN(xx, a, da) \
 ({									      \
   double t = ((POLYNOMIAL (xx)  * (a) - 0.5 * (da))  * (xx) + (da));	      \
   double res = (a) + t;							      \
-  (cor) = ((a) - res) + t;						      \
   res;									      \
 })
 
@@ -145,10 +144,10 @@ static double cslow2 (double x);
 /* Given a number partitioned into X and DX, this function computes the cosine
    of the number by combining the sin and cos of X (as computed by a variation
    of the Taylor series) with the values looked up from the sin/cos table to
-   get the result in RES and a correction value in COR.  */
+   get the result.  */
 static inline double
 __always_inline
-do_cos (double x, double dx, double *corp)
+do_cos (double x, double dx)
 {
   mynumber u;
 
@@ -158,16 +157,13 @@ do_cos (double x, double dx, double *corp)
   u.x = big + fabs (x);
   x = fabs (x) - (u.x - big) + dx;
 
-  double xx, s, sn, ssn, c, cs, ccs, res, cor;
+  double xx, s, sn, ssn, c, cs, ccs, cor;
   xx = x * x;
   s = x + x * xx * (sn3 + xx * sn5);
   c = xx * (cs2 + xx * (cs4 + xx * cs6));
   SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
   cor = (ccs - s * ssn - cs * c) - sn * s;
-  res = cs + cor;
-  cor = (cs - res) + cor;
-  *corp = cor;
-  return res;
+  return cs + cor;
 }
 
 /* A more precise variant of DO_COS.  EPS is the adjustment to the correction
@@ -207,10 +203,10 @@ do_cos_slow (double x, double dx, double eps, double *corp)
 /* Given a number partitioned into X and DX, this function computes the sine of
    the number by combining the sin and cos of X (as computed by a variation of
    the Taylor series) with the values looked up from the sin/cos table to get
-   the result in RES and a correction value in COR.  */
+   the result.  */
 static inline double
 __always_inline
-do_sin (double x, double dx, double *corp)
+do_sin (double x, double dx)
 {
   mynumber u;
 
@@ -219,16 +215,13 @@ do_sin (double x, double dx, double *corp)
   u.x = big + fabs (x);
   x = fabs (x) - (u.x - big);
 
-  double xx, s, sn, ssn, c, cs, ccs, cor, res;
+  double xx, s, sn, ssn, c, cs, ccs, cor;
   xx = x * x;
   s = x + (dx + x * xx * (sn3 + xx * sn5));
   c = x * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
   SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
   cor = (ssn + s * ccs - sn * c) + cs * s;
-  res = sn + cor;
-  cor = (sn - res) + cor;
-  *corp = cor;
-  return res;
+  return sn + cor;
 }
 
 /* A more precise variant of DO_SIN.  EPS is the adjustment to the correction
@@ -330,19 +323,19 @@ static double
 __always_inline
 do_sincos (double a, double da, int4 n)
 {
-  double retval, cor;
+  double retval;
 
   if (n & 1)
     /* Max ULP is 0.513.  */
-    retval = do_cos (a, da, &cor);
+    retval = do_cos (a, da);
   else
     {
       double xx = a * a;
       /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518.  */
       if (xx < 0.01588)
-	retval = TAYLOR_SIN (xx, a, da, cor);
+	retval = TAYLOR_SIN (xx, a, da);
       else
-	retval = __copysign (do_sin (a, da, &cor), a);
+	retval = __copysign (do_sin (a, da), a);
     }
 
   return (n & 2) ? -retval : retval;
@@ -362,7 +355,7 @@ SECTION
 __sin (double x)
 {
 #ifndef IN_SINCOS
-  double xx, t, a, da, cor;
+  double xx, t, a, da;
   mynumber u;
   int4 k, m, n;
   double retval = 0;
@@ -396,7 +389,7 @@ __sin (double x)
   else if (k < 0x3feb6000)
     {
       /* Max ULP is 0.548.  */
-      retval = __copysign (do_sin (x, 0, &cor), x);
+      retval = __copysign (do_sin (x, 0), x);
     }				/*   else  if (k < 0x3feb6000)    */
 
 /*----------------------- 0.855469  <|x|<2.426265  ----------------------*/
@@ -404,7 +397,7 @@ __sin (double x)
     {
       t = hp0 - fabs (x);
       /* Max ULP is 0.51.  */
-      retval = __copysign (do_cos (t, hp1, &cor), x);
+      retval = __copysign (do_cos (t, hp1), x);
     }				/*   else  if (k < 0x400368fd)    */
 
 #ifndef IN_SINCOS
@@ -417,8 +410,10 @@ __sin (double x)
 
 /* --------------------105414350 <|x| <2^1024------------------------------*/
   else if (k < 0x7ff00000)
-    retval = reduce_and_compute (x, false);
-
+    {
+      n = __branred (x, &a, &da);
+      retval = do_sincos (a, da, n);
+    }
 /*--------------------- |x| > 2^1024 ----------------------------------*/
   else
     {
@@ -445,7 +440,7 @@ SECTION
 #endif
 __cos (double x)
 {
-  double y, xx, cor, a, da;
+  double y, xx, a, da;
   mynumber u;
 #ifndef IN_SINCOS
   int4 k, m, n;
@@ -470,7 +465,7 @@ __cos (double x)
   else if (k < 0x3feb6000)
     {				/* 2^-27 < |x| < 0.855469 */
       /* Max ULP is 0.51.  */
-      retval = do_cos (x, 0, &cor);
+      retval = do_cos (x, 0);
     }				/*   else  if (k < 0x3feb6000)    */
 
   else if (k < 0x400368fd)
@@ -482,9 +477,9 @@ __cos (double x)
       /* Max ULP is 0.501 if xx < 0.01588 or 0.518 otherwise.
 	 Range reduction uses 106 bits here which is sufficient.  */
       if (xx < 0.01588)
-	retval = TAYLOR_SIN (xx, a, da, cor);
+	retval = TAYLOR_SIN (xx, a, da);
       else
-	retval = __copysign (do_sin (a, da, &cor), a);
+	retval = __copysign (do_sin (a, da), a);
     }				/*   else  if (k < 0x400368fd)    */
 
 
@@ -497,7 +492,10 @@ __cos (double x)
 
   /* 105414350 <|x| <2^1024 */
   else if (k < 0x7ff00000)
-    retval = reduce_and_compute (x, true);
+    {
+      n = __branred (x, &a, &da);
+      retval = do_sincos (a, da, n + 1);
+    }
 
   else
     {
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index 4f032d2..4335ecb 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -28,37 +28,6 @@
 #define IN_SINCOS 1
 #include "s_sin.c"
 
-/* Consolidated version of reduce_and_compute in s_sin.c that does range
-   reduction only once and computes sin and cos together.  */
-static inline void
-__always_inline
-reduce_and_compute_sincos (double x, double *sinx, double *cosx)
-{
-  double a, da;
-  unsigned int n = __branred (x, &a, &da);
-
-  n = n & 3;
-
-  if (n == 1 || n == 2)
-    {
-      a = -a;
-      da = -da;
-    }
-
-  if (n & 1)
-    {
-      double *temp = cosx;
-      cosx = sinx;
-      sinx = temp;
-    }
-
-  if (a * a < 0.01588)
-    *sinx = bsloww (a, da, x, n);
-  else
-    *sinx = bsloww1 (a, da, x, n);
-  *cosx = bsloww2 (a, da, x, n);
-}
-
 void
 __sincos (double x, double *sinx, double *cosx)
 {
@@ -88,8 +57,11 @@ __sincos (double x, double *sinx, double *cosx)
     }
   if (k < 0x7ff00000)
     {
-      reduce_and_compute_sincos (x, sinx, cosx);
-      return;
+      double a, da;
+      int4 n = __branred (x, &a, &da);
+
+      *sinx = do_sincos (a, da, n);
+      *cosx = do_sincos (a, da, n + 1);
     }
 
   if (isinf (x))

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d9469deb14ba6f55bd8af1652951ab306a8f63bd

commit d9469deb14ba6f55bd8af1652951ab306a8f63bd
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:33:13 2018 +0100

    [PATCH 3/7] sin/cos slow paths: remove slow paths from small range reduction
    
    This patch improves the accuracy of the range reduction.  When the input is
    large (2^27) and very close to a multiple of PI/2, using 110 bits of PI is not
    enough.  Improve range reduction accuracy to 136 bits.  As a result the special
    checks for results close to zero can be removed.  The ULP of the polynomials is
    at worst 0.55ULP, so there is no reason for the slow functions, and they can be
    removed.
    
    	* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
    	reduce_sincos, improve accuracy to 136 bits.
    	(do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
    	(__sin): Use improved reduction and simplified do_sincos calculation.
    	(__cos): Likewise.
    	* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.

diff --git a/ChangeLog b/ChangeLog
index a0b5228..a8d1ecb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
 2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_1): Rename to
+	reduce_sincos, improve accuracy to 136 bits.
+	(do_sincos_1): Rename to do_sincos, remove fallbacks to slow functions.
+	(__sin): Use improved reduction and simplified do_sincos calculation.
+	(__cos): Likewise.
+	* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Likewise.
+
+2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_2): Remove function.
 	(do_sincos_2): Likewise.
 	(__sin): Remove middle range reduction case.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index c86fb9f..b8c366a 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -295,9 +295,13 @@ reduce_and_compute (double x, bool shift_quadrant)
   return retval;
 }
 
+/* Reduce range of x to within PI/2 with abs (x) < 105414350.  The high part
+   is written to *a, the low part to *da.  Range reduction is accurate to 136
+   bits so that when x is large and *a very close to zero, all 53 bits of *a
+   are correct.  */
 static inline int4
 __always_inline
-reduce_sincos_1 (double x, double *a, double *da)
+reduce_sincos (double x, double *a, double *da)
 {
   mynumber v;
 
@@ -306,62 +310,45 @@ reduce_sincos_1 (double x, double *a, double *da)
   v.x = t;
   double y = (x - xn * mp1) - xn * mp2;
   int4 n = v.i[LOW_HALF] & 3;
-  double db = xn * mp3;
-  double b = y - db;
-  db = (y - b) - db;
+
+  double b, db, t1, t2;
+  t1 = xn * pp3;
+  t2 = y - t1;
+  db = (y - t2) - t1;
+
+  t1 = xn * pp4;
+  b = t2 - t1;
+  db += (t2 - b) - t1;
 
   *a = b;
   *da = db;
-
   return n;
 }
 
-/* Compute sin (A + DA).  cos can be computed by passing SHIFT_QUADRANT as
-   true, which results in shifting the quadrant N clockwise.  */
+/* Compute sin or cos (A + DA) for the given quadrant N.  */
 static double
 __always_inline
-do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant)
+do_sincos (double a, double da, int4 n)
 {
-  double xx, retval, res, cor;
-  double eps = fabs (x) * 1.2e-30;
+  double retval, cor;
 
-  int k1 = (n + shift_quadrant) & 3;
-  switch (k1)
-    {			/* quarter of unit circle */
-    case 2:
-      a = -a;
-      da = -da;
-      /* Fall through.  */
-    case 0:
-      xx = a * a;
+  if (n & 1)
+    /* Max ULP is 0.513.  */
+    retval = do_cos (a, da, &cor);
+  else
+    {
+      double xx = a * a;
+      /* Max ULP is 0.501 if xx < 0.01588, otherwise ULP is 0.518.  */
       if (xx < 0.01588)
-	{
-	  /* Taylor series.  */
-	  res = TAYLOR_SIN (xx, a, da, cor);
-	  cor = 1.02 * cor + __copysign (eps, cor);
-	  retval = (res == res + cor) ? res : sloww (a, da, x, shift_quadrant);
-	}
+	retval = TAYLOR_SIN (xx, a, da, cor);
       else
-	{
-	  res = do_sin (a, da, &cor);
-	  cor = 1.035 * cor + __copysign (eps, cor);
-	  retval = ((res == res + cor) ? __copysign (res, a)
-		    : sloww1 (a, da, x, shift_quadrant));
-	}
-      break;
-
-    case 1:
-    case 3:
-      res = do_cos (a, da, &cor);
-      cor = 1.025 * cor + __copysign (eps, cor);
-      retval = ((res == res + cor) ? ((n & 2) ? -res : res)
-		: sloww2 (a, da, x, n));
-      break;
+	retval = __copysign (do_sin (a, da, &cor), a);
     }
 
-  return retval;
+  return (n & 2) ? -retval : retval;
 }
 
+
 /*******************************************************************/
 /* An ultimate sin routine. Given an IEEE double machine number x   */
 /* it computes the correctly rounded (to nearest) value of sin(x)  */
@@ -374,13 +361,18 @@ SECTION
 #endif
 __sin (double x)
 {
-  double xx, t, cor;
+#ifndef IN_SINCOS
+  double xx, t, a, da, cor;
   mynumber u;
-  int4 k, m;
+  int4 k, m, n;
   double retval = 0;
 
-#ifndef IN_SINCOS
   SET_RESTORE_ROUND_53BIT (FE_TONEAREST);
+#else
+  double xx, t, cor;
+  mynumber u;
+  int4 k, m;
+  double retval = 0;
 #endif
 
   u.x = x;
@@ -419,9 +411,8 @@ __sin (double x)
 /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
   else if (k < 0x419921FB)
     {
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, false);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n);
     }				/*   else  if (k <  0x419921FB )    */
 
 /* --------------------105414350 <|x| <2^1024------------------------------*/
@@ -456,7 +447,11 @@ __cos (double x)
 {
   double y, xx, cor, a, da;
   mynumber u;
+#ifndef IN_SINCOS
+  int4 k, m, n;
+#else
   int4 k, m;
+#endif
 
   double retval = 0;
 
@@ -496,9 +491,8 @@ __cos (double x)
 #ifndef IN_SINCOS
   else if (k < 0x419921FB)
     {				/* 2.426265<|x|< 105414350 */
-      double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
-      retval = do_sincos_1 (a, da, x, n, true);
+      n = reduce_sincos (x, &a, &da);
+      retval = do_sincos (a, da, n + 1);
     }				/*   else  if (k <  0x419921FB )    */
 
   /* 105414350 <|x| <2^1024 */
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index a9af8ce..4f032d2 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -79,10 +79,10 @@ __sincos (double x, double *sinx, double *cosx)
   if (k < 0x419921FB)
     {
       double a, da;
-      int4 n = reduce_sincos_1 (x, &a, &da);
+      int4 n = reduce_sincos (x, &a, &da);
 
-      *sinx = do_sincos_1 (a, da, x, n, false);
-      *cosx = do_sincos_1 (a, da, x, n, true);
+      *sinx = do_sincos (a, da, n);
+      *cosx = do_sincos (a, da, n + 1);
 
       return;
     }

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=7a5640f23a797d3be38a78fb899903699c3aa5a0

commit 7a5640f23a797d3be38a78fb899903699c3aa5a0
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:28:03 2018 +0100

    [PATCH 2/7] sin/cos slow paths: remove large range reduction
    
    This patch removes the large range reduction code and defers to the huge range
    reduction code.  The first level range reducer supports inputs up to 2^27,
    which is way too large given that inputs for sin/cos are typically small
    (< 10), and optimizing for a smaller range would give a significant speedup.
    
    Input values above 2^27 are practically never used, so there is no reason for
    supporting range reduction between 2^27 and 2^48.  Removing it significantly
    simplifies code and enables further speedups.  There is about a 2.3x slowdown
    in this range due to __branred being extremely slow  (a better algorithm could
    easily more than double performance).
    
    	* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_2): Remove function.
    	(do_sincos_2): Likewise.
    	(__sin): Remove middle range reduction case.
    	(__cos): Likewise.
    	* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Remove middle range
    	reduction case.

diff --git a/ChangeLog b/ChangeLog
index 77e3477..a0b5228 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,14 @@
 2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_2): Remove function.
+	(do_sincos_2): Likewise.
+	(__sin): Remove middle range reduction case.
+	(__cos): Likewise.
+	* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Remove middle range
+	reduction case.
+
+2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* sysdeps/aarch64/libm-test-ulps: Update ULP for sin, cos, sincos.
 	* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Remove slow paths for small
 	inputs.
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 0c16b72..c86fb9f 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -362,80 +362,6 @@ do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant)
   return retval;
 }
 
-static inline int4
-__always_inline
-reduce_sincos_2 (double x, double *a, double *da)
-{
-  mynumber v;
-
-  double t = (x * hpinv + toint);
-  double xn = t - toint;
-  v.x = t;
-  double xn1 = (xn + 8.0e22) - 8.0e22;
-  double xn2 = xn - xn1;
-  double y = ((((x - xn1 * mp1) - xn1 * mp2) - xn2 * mp1) - xn2 * mp2);
-  int4 n = v.i[LOW_HALF] & 3;
-  double db = xn1 * pp3;
-  t = y - db;
-  db = (y - t) - db;
-  db = (db - xn2 * pp3) - xn * pp4;
-  double b = t + db;
-  db = (t - b) + db;
-
-  *a = b;
-  *da = db;
-
-  return n;
-}
-
-/* Compute sin (A + DA).  cos can be computed by passing SHIFT_QUADRANT as
-   true, which results in shifting the quadrant N clockwise.  */
-static double
-__always_inline
-do_sincos_2 (double a, double da, double x, int4 n, bool shift_quadrant)
-{
-  double res, retval, cor, xx;
-
-  double eps = 1.0e-24;
-
-  int4 k = (n + shift_quadrant) & 3;
-
-  switch (k)
-    {
-    case 2:
-      a = -a;
-      da = -da;
-      /* Fall through.  */
-    case 0:
-      xx = a * a;
-      if (xx < 0.01588)
-	{
-	  /* Taylor series.  */
-	  res = TAYLOR_SIN (xx, a, da, cor);
-	  cor = 1.02 * cor + __copysign (eps, cor);
-	  retval = (res == res + cor) ? res : bsloww (a, da, x, n);
-	}
-      else
-	{
-	  res = do_sin (a, da, &cor);
-	  cor = 1.035 * cor + __copysign (eps, cor);
-	  retval = ((res == res + cor) ? __copysign (res, a)
-		    : bsloww1 (a, da, x, n));
-	}
-      break;
-
-    case 1:
-    case 3:
-      res = do_cos (a, da, &cor);
-      cor = 1.025 * cor + __copysign (eps, cor);
-      retval = ((res == res + cor) ? ((n & 2) ? -res : res)
-		: bsloww2 (a, da, x, n));
-      break;
-    }
-
-  return retval;
-}
-
 /*******************************************************************/
 /* An ultimate sin routine. Given an IEEE double machine number x   */
 /* it computes the correctly rounded (to nearest) value of sin(x)  */
@@ -498,16 +424,7 @@ __sin (double x)
       retval = do_sincos_1 (a, da, x, n, false);
     }				/*   else  if (k <  0x419921FB )    */
 
-/*---------------------105414350 <|x|< 281474976710656 --------------------*/
-  else if (k < 0x42F00000)
-    {
-      double a, da;
-
-      int4 n = reduce_sincos_2 (x, &a, &da);
-      retval = do_sincos_2 (a, da, x, n, false);
-    }				/*   else  if (k <  0x42F00000 )   */
-
-/* -----------------281474976710656 <|x| <2^1024----------------------------*/
+/* --------------------105414350 <|x| <2^1024------------------------------*/
   else if (k < 0x7ff00000)
     retval = reduce_and_compute (x, false);
 
@@ -584,15 +501,7 @@ __cos (double x)
       retval = do_sincos_1 (a, da, x, n, true);
     }				/*   else  if (k <  0x419921FB )    */
 
-  else if (k < 0x42F00000)
-    {
-      double a, da;
-
-      int4 n = reduce_sincos_2 (x, &a, &da);
-      retval = do_sincos_2 (a, da, x, n, true);
-    }				/*   else  if (k <  0x42F00000 )    */
-
-  /* 281474976710656 <|x| <2^1024 */
+  /* 105414350 <|x| <2^1024 */
   else if (k < 0x7ff00000)
     retval = reduce_and_compute (x, true);
 
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index e1977ea..a9af8ce 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -86,16 +86,6 @@ __sincos (double x, double *sinx, double *cosx)
 
       return;
     }
-  if (k < 0x42F00000)
-    {
-      double a, da;
-      int4 n = reduce_sincos_2 (x, &a, &da);
-
-      *sinx = do_sincos_2 (a, da, x, n, false);
-      *cosx = do_sincos_2 (a, da, x, n, true);
-
-      return;
-    }
   if (k < 0x7ff00000)
     {
       reduce_and_compute_sincos (x, sinx, cosx);

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=19a8b9a300f2f1f0012aff0f2b70b09430f50d9e

commit 19a8b9a300f2f1f0012aff0f2b70b09430f50d9e
Author: Wilco Dijkstra <wdijkstr@arm.com>
Date:   Tue Apr 3 16:24:29 2018 +0100

    [PATCH 1/7] sin/cos slow paths: avoid slow paths for small inputs
    
    This series of patches removes the slow patchs from sin, cos and sincos.
    Besides greatly simplifying the implementation, the new version is also much
    faster for inputs up to PI (41% faster) and for large inputs needing range
    reduction (27% faster).
    
    ULP is ~0.55 with no errors found after testing 1.6 billion inputs across most
    of the range with mpsin and mpcos.  The number of incorrectly rounded results
    (ie. ULP >0.5) is at most ~2750 per million inputs between 0.125 and 0.5,
    the average is ~850 per million between 0 and PI.
    
    Tested on AArch64 and x86_64 with no regressions.
    
    The first patch removes the slow paths for the cases where the input is small
    and doesn't require range reduction.  Update ULP tables for sin, cos and sincos
    on AArch64 and x86_64.
    
    	* sysdeps/aarch64/libm-test-ulps: Update ULP for sin, cos, sincos.
    	* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Remove slow paths for small
    	inputs.
    	(__cos): Likewise.
    	* sysdeps/x86_64/fpu/libm-test-ulps: Update ULP for sin, cos, sincos.

diff --git a/ChangeLog b/ChangeLog
index a9da4d4..77e3477 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,15 @@
+2018-04-03  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* sysdeps/aarch64/libm-test-ulps: Update ULP for sin, cos, sincos.
+	* sysdeps/ieee754/dbl-64/s_sin.c (__sin): Remove slow paths for small
+	inputs.
+	(__cos): Likewise.
+	* sysdeps/x86_64/fpu/libm-test-ulps: Update ULP for sin, cos, sincos.
+
 2018-04-03  Joseph Myers  <joseph@codesourcery.com>
 
 	* scripts/build-many-glibcs.py (Context.checkout): Default Linux
-	version to 4.16.
+	version to 4.16
 
 2018-04-03  Adhemerval Zanella  <adhemerval.zanella@linaro.org>
 
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 1f46980..be06085 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1012,7 +1012,9 @@ ildouble: 2
 ldouble: 2
 
 Function: "cos":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
 ildouble: 1
 ldouble: 1
@@ -1970,7 +1972,9 @@ ildouble: 2
 ldouble: 2
 
 Function: "sin":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
 ildouble: 1
 ldouble: 1
@@ -2000,7 +2004,9 @@ ildouble: 3
 ldouble: 3
 
 Function: "sincos":
+double: 1
 float: 1
+idouble: 1
 ifloat: 1
 ildouble: 1
 ldouble: 1
diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 8c589cb..0c16b72 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -448,7 +448,7 @@ SECTION
 #endif
 __sin (double x)
 {
-  double xx, res, t, cor;
+  double xx, t, cor;
   mynumber u;
   int4 k, m;
   double retval = 0;
@@ -471,26 +471,22 @@ __sin (double x)
       xx = x * x;
       /* Taylor series.  */
       t = POLYNOMIAL (xx) * (xx * x);
-      res = x + t;
-      cor = (x - res) + t;
-      retval = (res == res + 1.07 * cor) ? res : slow (x);
+      /* Max ULP of x + t is 0.535.  */
+      retval = x + t;
     }				/*  else  if (k < 0x3fd00000)    */
 /*---------------------------- 0.25<|x|< 0.855469---------------------- */
   else if (k < 0x3feb6000)
     {
-      res = do_sin (x, 0, &cor);
-      retval = (res == res + 1.096 * cor) ? res : slow1 (x);
-      retval = __copysign (retval, x);
+      /* Max ULP is 0.548.  */
+      retval = __copysign (do_sin (x, 0, &cor), x);
     }				/*   else  if (k < 0x3feb6000)    */
 
 /*----------------------- 0.855469  <|x|<2.426265  ----------------------*/
   else if (k < 0x400368fd)
     {
-
       t = hp0 - fabs (x);
-      res = do_cos (t, hp1, &cor);
-      retval = (res == res + 1.020 * cor) ? res : slow2 (x);
-      retval = __copysign (retval, x);
+      /* Max ULP is 0.51.  */
+      retval = __copysign (do_cos (t, hp1, &cor), x);
     }				/*   else  if (k < 0x400368fd)    */
 
 #ifndef IN_SINCOS
@@ -541,7 +537,7 @@ SECTION
 #endif
 __cos (double x)
 {
-  double y, xx, res, cor, a, da;
+  double y, xx, cor, a, da;
   mynumber u;
   int4 k, m;
 
@@ -561,8 +557,8 @@ __cos (double x)
 
   else if (k < 0x3feb6000)
     {				/* 2^-27 < |x| < 0.855469 */
-      res = do_cos (x, 0, &cor);
-      retval = (res == res + 1.020 * cor) ? res : cslow2 (x);
+      /* Max ULP is 0.51.  */
+      retval = do_cos (x, 0, &cor);
     }				/*   else  if (k < 0x3feb6000)    */
 
   else if (k < 0x400368fd)
@@ -571,20 +567,12 @@ __cos (double x)
       a = y + hp1;
       da = (y - a) + hp1;
       xx = a * a;
+      /* Max ULP is 0.501 if xx < 0.01588 or 0.518 otherwise.
+	 Range reduction uses 106 bits here which is sufficient.  */
       if (xx < 0.01588)
-	{
-	  res = TAYLOR_SIN (xx, a, da, cor);
-	  cor = 1.02 * cor + __copysign (1.0e-31, cor);
-	  retval = (res == res + cor) ? res : sloww (a, da, x, true);
-	}
+	retval = TAYLOR_SIN (xx, a, da, cor);
       else
-	{
-	  res = do_sin (a, da, &cor);
-	  cor = 1.035 * cor + __copysign (1.0e-31, cor);
-	  retval = ((res == res + cor) ? __copysign (res, a)
-		    : sloww1 (a, da, x, true));
-	}
-
+	retval = __copysign (do_sin (a, da, &cor), a);
     }				/*   else  if (k < 0x400368fd)    */
 
 
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 48e53f7..bbb8a4d 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1262,7 +1262,9 @@ ildouble: 1
 ldouble: 1
 
 Function: "cos":
+double: 1
 float128: 1
+idouble: 1
 ifloat128: 1
 ildouble: 1
 ldouble: 1
@@ -2528,7 +2530,9 @@ Function: "pow_vlen8_avx2":
 float: 3
 
 Function: "sin":
+double: 1
 float128: 1
+idouble: 1
 ifloat128: 1
 ildouble: 1
 ldouble: 1
@@ -2578,7 +2582,9 @@ Function: "sin_vlen8_avx2":
 float: 1
 
 Function: "sincos":
+double: 1
 float128: 1
+idouble: 1
 ifloat128: 1
 ildouble: 1
 ldouble: 1

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                         |   72 ++++-
 sysdeps/aarch64/libm-test-ulps    |    6 +
 sysdeps/ieee754/dbl-64/s_sin.c    |  733 ++++---------------------------------
 sysdeps/ieee754/dbl-64/s_sincos.c |  108 +++---
 sysdeps/x86_64/fpu/libm-test-ulps |    6 +
 5 files changed, 205 insertions(+), 720 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]