fix inaccuracy of j0f for x >= 2^127 when sin(x)+cos(x) is tiny (v4)

Mon Aug 3 13:19:25 GMT 2020

Dear Florian,

> I suggest to leave it at 6, other CPU variants may still need the 6
> there.

here is a new version.

Paul

>From 971c832c2087e9463951d1b07b48d4d9a998e8c0 Mon Sep 17 00:00:00 2001
From: Paul Zimmermann <Paul.Zimmermann@inria.fr>
Date: Mon, 3 Aug 2020 15:16:39 +0200
Subject: [PATCH] fix inaccuracy of j0f for x >= 2^127 when sin(x)+cos(x) is
 tiny (v4)

---
 math/auto-libm-test-in         |  2 ++
 sysdeps/ieee754/flt-32/e_j0f.c | 17 ++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
index 4414e54d93..5d488a8711 100644
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@@ -5748,6 +5748,8 @@ j0 0x1p16382
 j0 0x1p16383
 # the next value generates larger error bounds on x86_64 (binary32)
 j0 0x2.602774p+0 xfail-rounding:ibm128-libgcc
+# the next value exercises the flt-32 code path for x >= 2^127
+j0 0x8.2f4ecp+124
 
 j1 -1.0
 j1 0.0
diff --git a/sysdeps/ieee754/flt-32/e_j0f.c b/sysdeps/ieee754/flt-32/e_j0f.c
index c89b9f2688..91e8de8fe3 100644
--- a/sysdeps/ieee754/flt-32/e_j0f.c
+++ b/sysdeps/ieee754/flt-32/e_j0f.c
@@ -55,7 +55,22 @@ __ieee754_j0f(float x)
 		    z = -__cosf(x+x);
 		    if ((s*c)<zero) cc = z/ss;
 		    else	    ss = z/cc;
-		}
+		} else {
+                  /* We subtract (exactly) a value x0 such that cos(x0)+sin(x0)
+                     is very near to 0, and use the identity
+                     sin(x-x0) = sin(x)*cos(x0)-cos(x)*sin(x0) to get
+                     sin(x) + cos(x) with extra accuracy. */
+                  float x0 = 0xe.d4108p+124f;
+                  float y = x - x0; /* exact */
+                  /* sin(y) = sin(x)*cos(x0)-cos(x)*sin(x0) */
+                  z = __sinf (y);
+                  float eps = 0x1.5f263ep-24f;
+                  /* cos(x0) ~ -sin(x0) + eps */
+                  z += eps * __cosf (x);
+                  /* now z ~ (sin(x)-cos(x))*cos(x0) */
+                  float cosx0 = -0xb.504f3p-4f;
+                  cc = z / cosx0;
+                }
 	/*
 	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
 	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
-- 
2.27.0