math: Remove slow paths from atan [BZ #15267]

author Wilco Dijkstra <wilco.dijkstra@arm.com>

Wed, 10 Mar 2021 12:40:56 +0000 (12:40 +0000)

committer Wilco Dijkstra <wdijkstr@arm.com>

Thu, 11 Mar 2021 14:26:36 +0000 (14:26 +0000)
author Wilco Dijkstra <wilco.dijkstra@arm.com>
Wed, 10 Mar 2021 12:40:56 +0000 (12:40 +0000)
committer Wilco Dijkstra <wdijkstr@arm.com>
Thu, 11 Mar 2021 14:26:36 +0000 (14:26 +0000)
diff --git a/sysdeps/ieee754/dbl-64/atnat.h b/sysdeps/ieee754/dbl-64/atnat.h

index af4374019f1fce9b56b387b93a69d1330b9c8666..2b12481f0491ade9379c71ae98935ae8e67cb755 100644 (file)
--- a/sysdeps/ieee754/dbl-64/atnat.h
+++ b/sysdeps/ieee754/dbl-64/atnat.h
@@ -29,7 +29,7 @@
  #define M 4
  
  #ifdef BIG_ENDI
-  static const number
+  static const mynumber
    /* polynomial I */
  /**/ d3             = {{0xbfd55555, 0x55555555} }, /* -0.333... */
  /**/ d5             = {{0x3fc99999, 0x999997fd} }, /*  0.199... */
@@ -79,7 +79,7 @@
  
  #else
  #ifdef LITTLE_ENDI
-  static const number
+  static const mynumber
    /* polynomial I */
  /**/ d3             = {{0x55555555, 0xbfd55555} }, /* -0.333... */
  /**/ d5             = {{0x999997fd, 0x3fc99999} }, /*  0.199... */
diff --git a/sysdeps/ieee754/dbl-64/s_atan.c b/sysdeps/ieee754/dbl-64/s_atan.c

index 291b988318a67444b473044b030619d2f7851585..977183eb928f62d110e2c9e9ca6dd1f9286b1b97 100644 (file)
--- a/sysdeps/ieee754/dbl-64/s_atan.c
+++ b/sysdeps/ieee754/dbl-64/s_atan.c
@@ -20,25 +20,15 @@
  /*  MODULE_NAME: atnat.c                                                */
  /*                                                                      */
  /*  FUNCTIONS:  uatan                                                   */
-/*              atanMp                                                  */
  /*              signArctan                                              */
  /*                                                                      */
-/*                                                                      */
-/*  FILES NEEDED: dla.h endian.h mpa.h mydefs.h atnat.h                 */
-/*                mpatan.c mpatan2.c mpsqrt.c                           */
+/*  FILES NEEDED: dla.h endian.h mydefs.h atnat.h                       */
  /*                uatan.tbl                                             */
  /*                                                                      */
-/* An ultimate atan() routine. Given an IEEE double machine number x    */
-/* it computes the correctly rounded (to nearest) value of atan(x).     */
-/*                                                                      */
-/* Assumption: Machine arithmetic operations are performed in           */
-/* round to nearest mode of IEEE 754 standard.                          */
-/*                                                                      */
  /************************************************************************/
  
  #include <dla.h>
-#include "mpa.h"
-#include "MathLib.h"
+#include "mydefs.h"
  #include "uatan.tbl"
  #include "atnat.h"
  #include <fenv.h>
@@ -47,10 +37,8 @@
  #include <math.h>
  #include <fenv_private.h>
  #include <math-underflow.h>
-#include <stap-probe.h>
  
-void __mpatan (mp_no *, mp_no *, int); /* see definition in mpatan.c */
-static double atanMp (double, const int[]);
+#define  TWO52     0x1.0p52
  
    /* Fix the sign of y and return */
  static double
@@ -59,17 +47,14 @@ __signArctan (double x, double y)
    return copysign (y, x);
  }
  
-
-/* An ultimate atan() routine. Given an IEEE double machine number x,    */
-/* routine computes the correctly rounded (to nearest) value of atan(x). */
+/* atan with max ULP of ~0.523 based on random sampling.  */
  double
  __atan (double x)
  {
-  double cor, s1, ss1, s2, ss2, t1, t2, t3, t4, u, u2, u3,
-        v, vv, w, ww, y, yy, z, zz;
+  double cor, t1, t2, t3, u,
+        v, w, ww, y, yy, z;
    int i, ux, dx;
-  static const int pr[M] = { 6, 8, 10, 32 };
-  number num;
+  mynumber num;
  
    num.d = x;
    ux = num.i[HIGH_HALF];
@@ -102,36 +87,14 @@ __atan (double x)
               yy = d3.d + v * yy;
               yy *= x * v;
  
-             if ((y = x + (yy - U1 * x)) == x + (yy + U1 * x))
-               return y;
-
-             EMULV (x, x, v, vv);      /* v+vv=x^2 */
-
-             s1 = f17.d + v * f19.d;
-             s1 = f15.d + v * s1;
-             s1 = f13.d + v * s1;
-             s1 = f11.d + v * s1;
-             s1 *= v;
-
-             ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
-             MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-             ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
-             MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-             ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
-             MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-             ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
-             MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-             MUL2 (x, 0, s1, ss1, s2, ss2, t1, t2);
-             ADD2 (x, 0, s2, ss2, s1, ss1, t1, t2);
-             if ((y = s1 + (ss1 - U5 * s1)) == s1 + (ss1 + U5 * s1))
-               return y;
-
-             return atanMp (x, pr);
+             y = x + yy;
+             /* Max ULP is 0.511.  */
+             return y;
             }
         }
        else
         {                       /* B <= u < C */
-         i = (TWO52 + TWO8 * u) - TWO52;
+         i = (TWO52 + 256 * u) - TWO52;
           i -= 16;
           z = u - cij[i][0].d;
           yy = cij[i][5].d + z * cij[i][6].d;
@@ -141,44 +104,9 @@ __atan (double x)
           yy *= z;
  
           t1 = cij[i][1].d;
-         if (i < 112)
-           {
-             if (i < 48)
-               u2 = U21;       /* u < 1/4        */
-             else
-               u2 = U22;
-           }                   /* 1/4 <= u < 1/2 */
-         else
-           {
-             if (i < 176)
-               u2 = U23;       /* 1/2 <= u < 3/4 */
-             else
-               u2 = U24;
-           }                   /* 3/4 <= u <= 1  */
-         if ((y = t1 + (yy - u2 * t1)) == t1 + (yy + u2 * t1))
-           return __signArctan (x, y);
-
-         z = u - hij[i][0].d;
-
-         s1 = hij[i][14].d + z * hij[i][15].d;
-         s1 = hij[i][13].d + z * s1;
-         s1 = hij[i][12].d + z * s1;
-         s1 = hij[i][11].d + z * s1;
-         s1 *= z;
-
-         ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
-         MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-         ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
-         MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-         ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
-         MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-         ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
-         MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-         ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
-         if ((y = s2 + (ss2 - U6 * s2)) == s2 + (ss2 + U6 * s2))
-           return __signArctan (x, y);
-
-         return atanMp (x, pr);
+         y = t1 + yy;
+         /* Max ULP is 0.56.  */
+         return __signArctan (x, y);
         }
      }
    else
@@ -188,7 +116,7 @@ __atan (double x)
           w = 1 / u;
           EMULV (w, u, t1, t2);
           ww = w * ((1 - t1) - t2);
-         i = (TWO52 + TWO8 * w) - TWO52;
+         i = (TWO52 + 256 * w) - TWO52;
           i -= 16;
           z = (w - cij[i][0].d) + ww;
  
@@ -199,37 +127,9 @@ __atan (double x)
           yy = HPI1 - z * yy;
  
           t1 = HPI - cij[i][1].d;
-         if (i < 112)
-           u3 = U31;           /* w <  1/2 */
-         else
-           u3 = U32;           /* w >= 1/2 */
-         if ((y = t1 + (yy - u3)) == t1 + (yy + u3))
-           return __signArctan (x, y);
-
-         DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4);
-         t1 = w - hij[i][0].d;
-         EADD (t1, ww, z, zz);
-
-         s1 = hij[i][14].d + z * hij[i][15].d;
-         s1 = hij[i][13].d + z * s1;
-         s1 = hij[i][12].d + z * s1;
-         s1 = hij[i][11].d + z * s1;
-         s1 *= z;
-
-         ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
-         MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-         ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
-         MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-         ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
-         MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-         ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
-         MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-         ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
-         SUB2 (HPI, HPI1, s2, ss2, s1, ss1, t1, t2);
-         if ((y = s1 + (ss1 - U7)) == s1 + (ss1 + U7))
-           return __signArctan (x, y);
-
-         return atanMp (x, pr);
+         y = t1 + yy;
+         /* Max ULP is 0.503.  */
+         return __signArctan (x, y);
         }
        else
         {
@@ -249,34 +149,9 @@ __atan (double x)
               ww = w * ((1 - t1) - t2);
               ESUB (HPI, w, t3, cor);
               yy = ((HPI1 + cor) - ww) - yy;
-             if ((y = t3 + (yy - U4)) == t3 + (yy + U4))
-               return __signArctan (x, y);
-
-             DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4);
-             MUL2 (w, ww, w, ww, v, vv, t1, t2);
-
-             s1 = f17.d + v * f19.d;
-             s1 = f15.d + v * s1;
-             s1 = f13.d + v * s1;
-             s1 = f11.d + v * s1;
-             s1 *= v;
-
-             ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
-             MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-             ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
-             MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-             ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
-             MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-             ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
-             MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-             MUL2 (w, ww, s1, ss1, s2, ss2, t1, t2);
-             ADD2 (w, ww, s2, ss2, s1, ss1, t1, t2);
-             SUB2 (HPI, HPI1, s1, ss1, s2, ss2, t1, t2);
-
-             if ((y = s2 + (ss2 - U8)) == s2 + (ss2 + U8))
-               return __signArctan (x, y);
-
-             return atanMp (x, pr);
+             y = t3 + yy;
+             /* Max ULP is 0.5003.  */
+             return __signArctan (x, y);
             }
           else
             {
@@ -290,35 +165,6 @@ __atan (double x)
      }
  }
  
- /* Final stages. Compute atan(x) by multiple precision arithmetic */
-static double
-atanMp (double x, const int pr[])
-{
-  mp_no mpx, mpy, mpy2, mperr, mpt1, mpy1;
-  double y1, y2;
-  int i, p;
-
-  for (i = 0; i < M; i++)
-    {
-      p = pr[i];
-      __dbl_mp (x, &mpx, p);
-      __mpatan (&mpx, &mpy, p);
-      __dbl_mp (u9[i].d, &mpt1, p);
-      __mul (&mpy, &mpt1, &mperr, p);
-      __add (&mpy, &mperr, &mpy1, p);
-      __sub (&mpy, &mperr, &mpy2, p);
-      __mp_dbl (&mpy1, &y1, p);
-      __mp_dbl (&mpy2, &y2, p);
-      if (y1 == y2)
-       {
-         LIBC_PROBE (slowatan, 3, &p, &x, &y1);
-         return y1;
-       }
-    }
-  LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1);
-  return y1;                   /*if impossible to do exact computing */
-}
-
  #ifndef __atan
  libm_alias_double (__atan, atan)
  #endif
diff --git a/sysdeps/ieee754/dbl-64/uatan.tbl b/sysdeps/ieee754/dbl-64/uatan.tbl

index 8a477caa385d3f447abacf8490a8b45278d86610..fdc3e53304112eb31863a3144d6acb2f65a77a2e 100644 (file)
--- a/sysdeps/ieee754/dbl-64/uatan.tbl
+++ b/sysdeps/ieee754/dbl-64/uatan.tbl
@@ -25,7 +25,7 @@
  
  #ifdef BIG_ENDI
  
-  static const number
+  static const mynumber
        cij[241][7] = {                             /* x0,cij for (1/16,1) */
  /**/                 {{{0X3FB04006, 0X65E0244E} },
  /**/                  {{0X3FB03A73, 0X7B53DD20} },
@@ -1716,7 +1716,7 @@
  /**/                  {{0XBF99B9A7, 0X18A3BA58} } },
    };
  
-  static const number
+  static const mynumber
       hij[241][16] = {                             /* x0,hij for (1/16,1) */
  /**/                 {{{0x3fb04000, 0x00000000} },
  /**/                  {{0x3fb03a6d, 0x1c06693d} },
@@ -5579,7 +5579,7 @@
  #else
  #ifdef LITTLE_ENDI
  
-  static const number
+  static const mynumber
        cij[241][7] = {                             /* x0,cij for (1/16,1) */
  /**/                 {{{0X65E0244E, 0X3FB04006} },
  /**/                  {{0X7B53DD20, 0X3FB03A73} },
@@ -7270,7 +7270,7 @@
  /**/                  {{0X18A3BA58, 0XBF99B9A7} } },
    };
  
-  static const number
+  static const mynumber
       hij[241][16] = {                             /* x0,hij for (1/16,1) */
  /**/                 {{{0x00000000, 0x3fb04000} },
  /**/                  {{0x1c06693d, 0x3fb03a6d} },
author	Wilco Dijkstra <wilco.dijkstra@arm.com>
	Wed, 10 Mar 2021 12:40:56 +0000 (12:40 +0000)
committer	Wilco Dijkstra <wdijkstr@arm.com>
	Thu, 11 Mar 2021 14:26:36 +0000 (14:26 +0000)
sysdeps/ieee754/dbl-64/atnat.h		patch \| blob \| blame \| history
sysdeps/ieee754/dbl-64/s_atan.c		patch \| blob \| blame \| history
sysdeps/ieee754/dbl-64/uatan.tbl		patch \| blob \| blame \| history