[PATCH] Fix the inaccuracy of j0f (BZ 14469) and y0f (BZ 14471)

Paul Zimmermann Paul.Zimmermann@inria.fr
Tue Jan 19 10:28:14 GMT 2021


thank you Joseph for your review. Here is an updated combined patch.

Best regards,
Paul

For both j0f and y0f, the largest error for all binary32 inputs is now less
then 9.5 ulps with respect to the infinite precision result, i.e., less than
9 ulps after rounding, which is the largest error allowed.  (This is for
rounding to nearest; for other rounding modes, the new code should not behave
worse than the old one.)

The new code is enabled only when there is a cancellation at the very end of
the j0f/y0f computation, or for very large inputs, thus should not give any
visible slowdown on average. Two different algorithms are used:

* around the first 64 zeros of j0/y0, approximation polynomials of degree 3
  are used, computed using the Sollya tool (https://www.sollya.org/)

* for large inputs, an asymptotic formula from [1] is used

[1] Fast and Accurate Bessel Function Computation,
John Harrison, Proceedings of Arith 19, 2009.

The largest error is now obtained for the following inputs respectively:

libm wrong by up to 9.50e+00 ulp(s) for x=0x1.8bde7ep+5
j0f     gives 0x1.e97c1ep-14
mpfr_j0 gives 0x1.e97c0cp-14

libm wrong by up to 9.49e+00 ulp(s) for x=0x1.3d4e56p+6
y0f     gives 0x1.b42876p-16
mpfr_y0 gives 0x1.b42864p-16
---
 math/auto-libm-test-in            |   8 +-
 math/auto-libm-test-out-j0        |  50 +--
 math/auto-libm-test-out-y0        |  50 +--
 sysdeps/ieee754/flt-32/e_j0f.c    | 564 +++++++++++++++++++++++++++---
 sysdeps/x86_64/fpu/libm-test-ulps |  28 +-
 5 files changed, 592 insertions(+), 108 deletions(-)

diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
index 73840b8bef..2c7afe64ee 100644
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@@ -5756,8 +5756,8 @@ j0 -0x1.001000001p+593
 j0 0x1p1023
 j0 0x1p16382
 j0 0x1p16383
-# the next value generates larger error bounds on x86_64 (binary32)
-j0 0x2.602774p+0 xfail-rounding:ibm128-libgcc
+# the next value generates large error bounds on x86_64 (binary32)
+j0 0x1.8bde7ep+5
 # the next value exercises the flt-32 code path for x >= 2^127
 j0 0x8.2f4ecp+124
 
@@ -8225,8 +8225,8 @@ y0 0x1p-100
 y0 0x1p-110
 y0 0x1p-600
 y0 0x1p-10000
-# the next value generates larger error bounds on x86_64 (binary32)
-y0 0xd.3432bp-4
+# the next value generates large error bounds on x86_64 (binary32)
+y0 0x1.3d4e56p+6
 y0 min
 y0 min_subnorm
 
diff --git a/math/auto-libm-test-out-j0 b/math/auto-libm-test-out-j0
index cc1fea074e..32b9685de3 100644
--- a/math/auto-libm-test-out-j0
+++ b/math/auto-libm-test-out-j0
@@ -1334,31 +1334,31 @@ j0 0x1p16383
 = j0 tonearest ibm128 0xf.ffffffffffffbffffffffffffcp+1020 : -0xb.a80d0ee91ce259a722e1f01904p-516 : inexact-ok
 = j0 towardzero ibm128 0xf.ffffffffffffbffffffffffffcp+1020 : -0xb.a80d0ee91ce259a722e1f019p-516 : inexact-ok
 = j0 upward ibm128 0xf.ffffffffffffbffffffffffffcp+1020 : -0xb.a80d0ee91ce259a722e1f019p-516 : inexact-ok
-j0 0x2.602774p+0 xfail-rounding:ibm128-libgcc
-= j0 downward binary32 0x2.602774p+0 : 0x3.e83778p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest binary32 0x2.602774p+0 : 0x3.e83778p-8 : inexact-ok
-= j0 towardzero binary32 0x2.602774p+0 : 0x3.e83778p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward binary32 0x2.602774p+0 : 0x3.e8377cp-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward binary64 0x2.602774p+0 : 0x3.e83779fe1991p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest binary64 0x2.602774p+0 : 0x3.e83779fe19912p-8 : inexact-ok
-= j0 towardzero binary64 0x2.602774p+0 : 0x3.e83779fe1991p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward binary64 0x2.602774p+0 : 0x3.e83779fe19912p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward intel96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest intel96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : inexact-ok
-= j0 towardzero intel96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward intel96 0x2.602774p+0 : 0x3.e83779fe19911facp-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward m68k96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest m68k96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : inexact-ok
-= j0 towardzero m68k96 0x2.602774p+0 : 0x3.e83779fe19911fa8p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward m68k96 0x2.602774p+0 : 0x3.e83779fe19911facp-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward binary128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest binary128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : inexact-ok
-= j0 towardzero binary128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward binary128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab702p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 downward ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 tonearest ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : inexact-ok
-= j0 towardzero ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab7p-8 : xfail:ibm128-libgcc inexact-ok
-= j0 upward ibm128 0x2.602774p+0 : 0x3.e83779fe19911fa806cee83ab8p-8 : xfail:ibm128-libgcc inexact-ok
+j0 0x1.8bde7ep+5
+= j0 downward binary32 0x3.17bcfcp+4 : 0x7.a5f028p-16 : inexact-ok
+= j0 tonearest binary32 0x3.17bcfcp+4 : 0x7.a5f03p-16 : inexact-ok
+= j0 towardzero binary32 0x3.17bcfcp+4 : 0x7.a5f028p-16 : inexact-ok
+= j0 upward binary32 0x3.17bcfcp+4 : 0x7.a5f03p-16 : inexact-ok
+= j0 downward binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77cp-16 : inexact-ok
+= j0 tonearest binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77cp-16 : inexact-ok
+= j0 towardzero binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77cp-16 : inexact-ok
+= j0 upward binary64 0x3.17bcfcp+4 : 0x7.a5f02c0a5b78p-16 : inexact-ok
+= j0 downward intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 tonearest intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 towardzero intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 upward intel96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 downward m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 tonearest m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 towardzero m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0a8p-16 : inexact-ok
+= j0 upward m68k96 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0bp-16 : inexact-ok
+= j0 downward binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f168p-16 : inexact-ok
+= j0 tonearest binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f168p-16 : inexact-ok
+= j0 towardzero binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f168p-16 : inexact-ok
+= j0 upward binary128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f16cp-16 : inexact-ok
+= j0 downward ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935fp-16 : inexact-ok
+= j0 tonearest ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f2p-16 : inexact-ok
+= j0 towardzero ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935fp-16 : inexact-ok
+= j0 upward ibm128 0x3.17bcfcp+4 : 0x7.a5f02c0a5b77d0af558d6935f2p-16 : inexact-ok
 j0 0x8.2f4ecp+124
 = j0 downward binary32 0x8.2f4ecp+124 : 0xd.331efp-68 : inexact-ok
 = j0 tonearest binary32 0x8.2f4ecp+124 : 0xd.331efp-68 : inexact-ok
diff --git a/math/auto-libm-test-out-y0 b/math/auto-libm-test-out-y0
index 8ebb585170..8148a56933 100644
--- a/math/auto-libm-test-out-y0
+++ b/math/auto-libm-test-out-y0
@@ -795,31 +795,31 @@ y0 0x1p-10000
 = y0 tonearest binary128 0x1p-10000 : -0x1.13cc92aab9d385d1d0f2693cb631p+12 : inexact-ok
 = y0 towardzero binary128 0x1p-10000 : -0x1.13cc92aab9d385d1d0f2693cb631p+12 : inexact-ok
 = y0 upward binary128 0x1p-10000 : -0x1.13cc92aab9d385d1d0f2693cb631p+12 : inexact-ok
-y0 0xd.3432bp-4
-= y0 downward binary32 0xd.3432bp-4 : -0xf.fdd88p-8 : inexact-ok
-= y0 tonearest binary32 0xd.3432bp-4 : -0xf.fdd87p-8 : inexact-ok
-= y0 towardzero binary32 0xd.3432bp-4 : -0xf.fdd87p-8 : inexact-ok
-= y0 upward binary32 0xd.3432bp-4 : -0xf.fdd87p-8 : inexact-ok
-= y0 downward binary64 0xd.3432bp-4 : -0xf.fdd871793bc78p-8 : inexact-ok
-= y0 tonearest binary64 0xd.3432bp-4 : -0xf.fdd871793bc7p-8 : inexact-ok
-= y0 towardzero binary64 0xd.3432bp-4 : -0xf.fdd871793bc7p-8 : inexact-ok
-= y0 upward binary64 0xd.3432bp-4 : -0xf.fdd871793bc7p-8 : inexact-ok
-= y0 downward intel96 0xd.3432bp-4 : -0xf.fdd871793bc71fap-8 : inexact-ok
-= y0 tonearest intel96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 towardzero intel96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 upward intel96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 downward m68k96 0xd.3432bp-4 : -0xf.fdd871793bc71fap-8 : inexact-ok
-= y0 tonearest m68k96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 towardzero m68k96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 upward m68k96 0xd.3432bp-4 : -0xf.fdd871793bc71f9p-8 : inexact-ok
-= y0 downward binary128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44118p-8 : inexact-ok
-= y0 tonearest binary128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44118p-8 : inexact-ok
-= y0 towardzero binary128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b4411p-8 : inexact-ok
-= y0 upward binary128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b4411p-8 : inexact-ok
-= y0 downward ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b444p-8 : inexact-ok
-= y0 tonearest ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44p-8 : inexact-ok
-= y0 towardzero ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44p-8 : inexact-ok
-= y0 upward ibm128 0xd.3432bp-4 : -0xf.fdd871793bc71f92d6b137b44p-8 : inexact-ok
+y0 0x1.3d4e56p+6
+= y0 downward binary32 0x4.f53958p+4 : 0x1.b42862p-16 : inexact-ok
+= y0 tonearest binary32 0x4.f53958p+4 : 0x1.b42864p-16 : inexact-ok
+= y0 towardzero binary32 0x4.f53958p+4 : 0x1.b42862p-16 : inexact-ok
+= y0 upward binary32 0x4.f53958p+4 : 0x1.b42864p-16 : inexact-ok
+= y0 downward binary64 0x4.f53958p+4 : 0x1.b428630651a17p-16 : inexact-ok
+= y0 tonearest binary64 0x4.f53958p+4 : 0x1.b428630651a17p-16 : inexact-ok
+= y0 towardzero binary64 0x4.f53958p+4 : 0x1.b428630651a17p-16 : inexact-ok
+= y0 upward binary64 0x4.f53958p+4 : 0x1.b428630651a18p-16 : inexact-ok
+= y0 downward intel96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 tonearest intel96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 towardzero intel96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 upward intel96 0x4.f53958p+4 : 0x1.b428630651a175aep-16 : inexact-ok
+= y0 downward m68k96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 tonearest m68k96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 towardzero m68k96 0x4.f53958p+4 : 0x1.b428630651a175acp-16 : inexact-ok
+= y0 upward m68k96 0x4.f53958p+4 : 0x1.b428630651a175aep-16 : inexact-ok
+= y0 downward binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eeap-16 : inexact-ok
+= y0 tonearest binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eebp-16 : inexact-ok
+= y0 towardzero binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eeap-16 : inexact-ok
+= y0 upward binary128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104eebp-16 : inexact-ok
+= y0 downward ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104e8p-16 : inexact-ok
+= y0 tonearest ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104fp-16 : inexact-ok
+= y0 towardzero ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104e8p-16 : inexact-ok
+= y0 upward ibm128 0x4.f53958p+4 : 0x1.b428630651a175ac20abf8104fp-16 : inexact-ok
 y0 min
 = y0 downward binary32 0x4p-128 : -0x3.7ac89cp+4 : inexact-ok
 = y0 tonearest binary32 0x4p-128 : -0x3.7ac89cp+4 : inexact-ok
diff --git a/sysdeps/ieee754/flt-32/e_j0f.c b/sysdeps/ieee754/flt-32/e_j0f.c
index 5d29611eb7..92da9e5642 100644
--- a/sysdeps/ieee754/flt-32/e_j0f.c
+++ b/sysdeps/ieee754/flt-32/e_j0f.c
@@ -37,6 +37,330 @@ S04  =  1.1661400734e-09; /* 0x30a045e8 */
 
 static const float zero = 0.0;
 
+#define FIRST_ZERO_J0 2.404825f
+
+#define SMALL_SIZE 64
+
+/* The following table contains successive zeros of j0 and degree-3 polynomial
+   approximations of j0 around these zeros: Pj[0] for the first zero (2.404825),
+   Pj[1] for the second one (5.520078), and so on.  Each line contains:
+              {x0, xmid, x1, p0, p1, p2, p3}
+   where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+   to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+   Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+   around the corresponding zero where the error is larger than 9 ulps for the
+   default code below.  Degree 3 is enough to get an error less than 4 ulps.
+*/
+static float Pj[SMALL_SIZE][7] = {
+  { 0x2.63cb8cp+0, 0x2.67a2a4p+0, 0x2.6f5d28p+0, 0xf.26247p-28, -0x8.4e6d9p-4, 0x1.ba1c7p-4, 0xe.6c06ap-8 },
+/* The following polynomial was generated by Sollya. */
+  { 0x5.83abe8p+0, 0x5.8523d8p+0, 0x5.8a557p+0, 0x6.9205fp-28, 0x5.71b98p-4, -0x7.e3e798p-8, -0xd.87d1p-8 },
+  { 0x8.a66f1p+0, 0x8.a75abp+0, 0x8.a92a7p+0, 0x1.bcc1cap-24, -0x4.57de6p-4, 0x4.03debp-8, 0xb.44a4ap-8 },
+  { 0xb.c9905p+0, 0xb.caa2p+0, 0xb.ccc6bp+0, -0xf.2976fp-32, 0x3.b827ccp-4, -0x2.85fdb8p-8, -0x9.c5a97p-8 },
+  { 0xe.edb6ap+0, 0xe.ee50ap+0, 0xe.ef864p+0, -0x1.bd67d8p-28, -0x3.4e03ap-4, 0x1.c54b8ep-8, 0x8.bb70dp-8 },
+  { 0x1.2119fp+4, 0x1.212314p+4, 0x1.21375p+4, 0x1.62209cp-28, 0x3.00efecp-4, -0x1.5467fp-8, -0x7.f5a2p-8 },
+  { 0x1.535d28p+4, 0x1.5362dep+4, 0x1.536cbcp+4, -0x2.853f74p-24, -0x2.c5b274p-4, 0x1.0bab0ap-8, 0x7.5c05b8p-8 },
+  { 0x1.859df6p+4, 0x1.85a3bap+4, 0x1.85afcap+4, 0x2.19ed1cp-24, 0x2.96545cp-4, -0xd.995c9p-12, -0x6.e024cp-8 },
+  { 0x1.b7dfe6p+4, 0x1.b7e54ap+4, 0x1.b7ebccp+4, 0xe.959aep-28, -0x2.6f5594p-4, 0xb.55ff1p-12, 0x6.79cf58p-8 },
+  { 0x1.ea22bcp+4, 0x1.ea275ap+4, 0x1.ea2e2ep+4, 0x2.0c3964p-24, 0x2.4e80fcp-4, -0x9.a35abp-12, -0x6.234b08p-8 },
+  { 0x2.1c6638p+4, 0x2.1c69c4p+4, 0x2.1c6d7cp+4, -0x3.642554p-24, -0x2.325e48p-4, 0x8.534f1p-12, 0x5.d9048p-8 },
+  { 0x2.4ea8dcp+4, 0x2.4eac7p+4, 0x2.4eb39cp+4, 0x1.6c015ap-24, 0x2.19e7d8p-4, -0x7.4915f8p-12, -0x5.984698p-8 },
+  { 0x2.80ec2p+4, 0x2.80ef5p+4, 0x2.80f72cp+4, -0x4.b18c9p-28, -0x2.046174p-4, 0x6.720468p-12, 0x5.5f426p-8 },
+  { 0x2.b32e6p+4, 0x2.b33258p+4, 0x2.b33654p+4, -0x1.8b8792p-24, 0x1.f13fbp-4, -0x5.c149dp-12, -0x5.2c935p-8 },
+  { 0x2.e5736p+4, 0x2.e5758p+4, 0x2.e57894p+4, 0x3.a26e0cp-24, -0x1.e018dap-4, 0x5.2df918p-12, 0x4.ff0f68p-8 },
+  { 0x3.17b694p+4, 0x3.17b8c4p+4, 0x3.17bcecp+4, -0x2.18fabcp-24, 0x1.d09b22p-4, -0x4.b1c31p-12, -0x4.d5ecd8p-8 },
+  { 0x3.49f9d8p+4, 0x3.49fc1cp+4, 0x3.4a0084p+4, 0x3.2370e8p-24, -0x1.c28614p-4, 0x4.47bb78p-12, 0x4.b08458p-8 },
+  { 0x3.7c3d78p+4, 0x3.7c3f88p+4, 0x3.7c43ep+4, -0x5.9eae3p-28, 0x1.b5a622p-4, -0x3.ec892cp-12, -0x4.8e4d3p-8 },
+  { 0x3.ae80ap+4, 0x3.ae83p+4, 0x3.ae8528p+4, 0x2.9fa1e8p-24, -0x1.a9d184p-4, 0x3.9d2fa8p-12, 0x4.6edccp-8 },
+  { 0x3.e0c484p+4, 0x3.e0c688p+4, 0x3.e0c8a4p+4, 0x9.9ac67p-28, 0x1.9ee5eep-4, -0x3.57e9ep-12, -0x4.51d1e8p-8 },
+  { 0x4.1308f8p+4, 0x4.130a18p+4, 0x4.130c58p+4, 0xd.6ab94p-28, -0x1.94c6f6p-4, 0x3.1ac03p-12, 0x4.36e4bp-8 },
+  { 0x4.454cp+4, 0x4.454dbp+4, 0x4.45504p+4, -0x4.4cb2d8p-24, 0x1.8b5cccp-4, -0x2.e477ap-12, -0x4.1dd858p-8 },
+  { 0x4.778f6p+4, 0x4.779158p+4, 0x4.779368p+4, -0x4.4aa8c8p-24, -0x1.829356p-4, 0x2.b4726cp-12, 0x4.0676dp-8 },
+  { 0x4.a9d3ep+4, 0x4.a9d5p+4, 0x4.a9d6cp+4, 0x2.077c38p-24, 0x1.7a597ep-4, -0x2.891dbcp-12, -0x3.f09154p-8 },
+  { 0x4.dc175p+4, 0x4.dc18bp+4, 0x4.dc1a08p+4, -0x2.6a6cd8p-24, -0x1.72a09ap-4, 0x2.62315cp-12, 0x3.dc034p-8 },
+  { 0x5.0e5bb8p+4, 0x5.0e5c6p+4, 0x5.0e5dbp+4, -0x5.08287p-24, 0x1.6b5c06p-4, -0x2.3ec48p-12, -0x3.c8a91cp-8 },
+  { 0x5.409ebp+4, 0x5.40a02p+4, 0x5.40a188p+4, -0x3.4598dcp-24, -0x1.6480c4p-4, 0x2.1f1798p-12, 0x3.b667ccp-8 },
+  { 0x5.72e268p+4, 0x5.72e3ep+4, 0x5.72e54p+4, 0x5.4e74bp-24, 0x1.5e0544p-4, -0x2.021254p-12, -0x3.a5248cp-8 },
+  { 0x5.a5263p+4, 0x5.a527ap+4, 0x5.a528d8p+4, -0x2.05751cp-24, -0x1.57e12p-4, 0x1.e7643ep-12, 0x3.94c994p-8 },
+  { 0x5.d76acp+4, 0x5.d76b68p+4, 0x5.d76ccp+4, 0x4.c5e278p-24, 0x1.520ceep-4, -0x1.cf1d4ep-12, -0x3.85428cp-8 },
+  { 0x6.09ae88p+4, 0x6.09af3p+4, 0x6.09b0bp+4, -0x3.50e62cp-24, -0x1.4c822p-4, 0x1.b8ab9ap-12, 0x3.767f34p-8 },
+  { 0x6.3bf21p+4, 0x6.3bf2f8p+4, 0x6.3bf418p+4, -0x1.c39f38p-24, 0x1.473ae6p-4, -0x1.a3dccep-12, -0x3.68700cp-8 },
+  { 0x6.6e362p+4, 0x6.6e36c8p+4, 0x6.6e3818p+4, -0x1.9245b6p-28, -0x1.42320ap-4, 0x1.90d5f2p-12, 0x3.5b0634p-8 },
+  { 0x6.a079dp+4, 0x6.a07a98p+4, 0x6.a07b78p+4, -0x1.0acf8p-24, 0x1.3d62e6p-4, -0x1.7f1e42p-12, -0x3.4e3678p-8 },
+  { 0x6.d2bda8p+4, 0x6.d2be68p+4, 0x6.d2bfb8p+4, 0x4.cd92d8p-24, -0x1.38c94ap-4, 0x1.6e94e2p-12, 0x3.41f4acp-8 },
+  { 0x7.05018p+4, 0x7.05024p+4, 0x7.0503p+4, -0x1.37bf8ap-24, 0x1.34617p-4, -0x1.5f6a22p-12, -0x3.3637c8p-8 },
+  { 0x7.37459p+4, 0x7.374618p+4, 0x7.3747ap+4, -0x1.8f62dep-28, -0x1.3027fp-4, 0x1.51357ap-12, 0x3.2af594p-8 },
+  { 0x7.69892p+4, 0x7.6989fp+4, 0x7.698b98p+4, -0x9.81e79p-28, 0x1.2c19b4p-4, -0x1.43e0aep-12, -0x3.20271p-8 },
+  { 0x7.9bccf8p+4, 0x7.9bcdc8p+4, 0x7.9bceap+4, 0x3.103b3p-24, -0x1.2833eep-4, 0x1.37580ep-12, 0x3.15c484p-8 },
+  { 0x7.ce10dp+4, 0x7.ce11a8p+4, 0x7.ce136p+4, 0x2.07b058p-24, 0x1.24740ap-4, -0x1.2bd334p-12, -0x3.0bc628p-8 },
+  { 0x8.0054cp+4, 0x8.00558p+4, 0x8.00562p+4, 0x3.87576cp-24, -0x1.20d7b6p-4, 0x1.20af6cp-12, 0x3.022738p-8 },
+  { 0x8.32994p+4, 0x8.32996p+4, 0x8.329a4p+4, -0x1.691ecp-24, 0x1.1d5ccap-4, -0x1.167022p-12, -0x2.f8e084p-8 },
+  { 0x8.64dcdp+4, 0x8.64dd4p+4, 0x8.64dd9p+4, 0x9.b406dp-28, -0x1.1a015p-4, 0x1.0cbfd2p-12, 0x2.efee34p-8 },
+  { 0x8.97209p+4, 0x8.97212p+4, 0x8.9721bp+4, -0xf.bfd8fp-28, 0x1.16c37ap-4, -0x1.039356p-12, -0x2.e74a78p-8 },
+  { 0x8.c9649p+4, 0x8.c965p+4, 0x8.c965bp+4, 0x2.6d50c8p-24, -0x1.13a19ep-4, 0xf.ae0ap-16, 0x2.def13p-8 },
+  { 0x8.fba89p+4, 0x8.fba8ep+4, 0x8.fba9dp+4, -0x4.d475c8p-24, 0x1.109a32p-4, -0xf.29e9cp-16, -0x2.d6de4cp-8 },
+  { 0x9.2dec7p+4, 0x9.2deccp+4, 0x9.2dedp+4, 0x8.1982p-24, -0x1.0dabc8p-4, 0xe.ac514p-16, 0x2.cf0e6p-8 },
+  { 0x9.60306p+4, 0x9.6030bp+4, 0x9.60318p+4, 0x4.864518p-24, 0x1.0ad51p-4, -0xe.3d1fbp-16, -0x2.c77d28p-8 },
+  { 0x9.92743p+4, 0x9.92749p+4, 0x9.9274ep+4, 0x6.8917a8p-28, -0x1.0814d4p-4, 0xd.cb25ap-16, 0x2.c0283p-8 },
+  { 0x9.c4b81p+4, 0x9.c4b87p+4, 0x9.c4b8ep+4, -0x5.fa18fp-24, 0x1.0569fp-4, -0xd.5e6d8p-16, -0x2.b90bep-8 },
+  { 0x9.f6fc2p+4, 0x9.f6fc6p+4, 0x9.f6fcep+4, -0x4.0e5c98p-24, -0x1.02d354p-4, 0xc.feb37p-16, 0x2.b2259p-8 },
+  { 0xa.293f8p+4, 0xa.29404p+4, 0xa.29408p+4, -0x2.c3ddbp-24, 0x1.005004p-4, -0xc.9b641p-16, -0x2.ab72fp-8 },
+  { 0xa.5b83bp+4, 0xa.5b843p+4, 0xa.5b852p+4, -0x5.d052p-24, -0xf.ddf16p-8, 0xc.444dcp-16, 0x2.a4f0d4p-8 },
+  { 0xa.8dc7ap+4, 0xa.8dc81p+4, 0xa.8dc87p+4, -0x2.0b97dcp-24, 0xf.b7fafp-8, -0xb.e93a7p-16, -0x2.9e9dccp-8 },
+  { 0xa.c00b4p+4, 0xa.c00cp+4, 0xa.c00c4p+4, -0x5.4aab5p-24, -0xf.930fep-8, 0xb.99b61p-16, 0x2.98774p-8 },
+  { 0xa.f24f5p+4, 0xa.f24fep+4, 0xa.f2509p+4, -0x3.6dadd8p-24, 0xf.6f245p-8, -0xb.45e12p-16, -0x2.927b08p-8 },
+  { 0xb.24939p+4, 0xb.2493dp+4, 0xb.24948p+4, -0x2.d7e038p-24, -0xf.4c2cep-8, 0xa.fd076p-16, 0x2.8ca788p-8 },
+  { 0xb.56d73p+4, 0xb.56d7bp+4, 0xb.56d82p+4, -0x6.977a1p-24, 0xf.2a1fp-8, -0xa.af99ap-16, -0x2.86fb2p-8 },
+  { 0xb.891b3p+4, 0xb.891bap+4, 0xb.891c7p+4, 0x1.3cc95ep-24, -0xf.08f0ap-8, 0xa.6ca59p-16, 0x2.8173b8p-8 },
+  { 0xb.bb5f5p+4, 0xb.bb5f9p+4, 0xb.bb5fdp+4, 0x3.a4921p-24, 0xe.e8986p-8, -0xa.2c5b8p-16, -0x2.7c1024p-8 },
+  { 0xb.eda37p+4, 0xb.eda37p+4, 0xb.eda3bp+4, 0x6.b45a7p-24, -0xe.c90d8p-8, 0x9.e7307p-16, 0x2.76ceacp-8 },
+  { 0xc.1fe71p+4, 0xc.1fe76p+4, 0xc.1fe87p+4, -0x2.8f34a4p-24, 0xe.aa478p-8, -0x9.abd8fp-16, -0x2.71adecp-8 },
+  { 0xc.522aep+4, 0xc.522b5p+4, 0xc.522c4p+4, -0x1.325968p-24, -0xe.8c3eap-8, 0x9.72bf7p-16, 0x2.6cacd4p-8 },
+  { 0xc.846eep+4, 0xc.846f4p+4, 0xc.846fap+4, 0x4.96b808p-24, 0xe.6eeb5p-8, -0x9.3bc53p-16, -0x2.67ca04p-8 }
+};
+
+/* Argument reduction mod 2pi: T[i] ~ 2^i mod (2*pi). */
+static double T[128] = {
+  0x1p+0,
+  0x2p+0,
+  -0x2.487ed5110b462p+0,
+  0x1.b7812aeef4b9fp+0,
+  -0x2.d97c7f3321d24p+0,
+  0x9.585d6aac7a1a8p-4,
+  0x1.2b0bad558f435p+0,
+  0x2.56175aab1e86ap+0,
+  -0x1.9c501fbace38dp+0,
+  0x3.0fde959b6ed46p+0,
+  -0x2.8c1a9da2d9d3cp-4,
+  -0x5.18353b45b3a78p-4,
+  -0xa.306a768b674fp-4,
+  -0x1.460d4ed16ce9ep+0,
+  -0x2.8c1a9da2d9d3cp+0,
+  0x1.304999cb579e8p+0,
+  0x2.60933396af3dp+0,
+  -0x1.87586de3accc3p+0,
+  -0x3.0eb0dbc759986p+0,
+  0x2.b1d1d8258155p-4,
+  0x5.63a3b04b02aap-4,
+  0xa.c74760960554p-4,
+  0x1.58e8ec12c0aa8p+0,
+  0x2.b1d1d8258155p+0,
+  -0xe.4db24c6089bf8p-4,
+  -0x1.c9b6498c1137fp+0,
+  0x2.b51241f8e8d64p+0,
+  -0xd.e5a511f3999bp-4,
+  -0x1.bcb4a23e73336p+0,
+  0x2.cf15909424df4p+0,
+  -0xa.a53b3e8c1877p-4,
+  -0x1.54a767d1830eep+0,
+  -0x2.a94ecfa3061dcp+0,
+  0xf.5e135caff0a78p-4,
+  0x1.ebc26b95fe14fp+0,
+  -0x2.70f9fde50f1c4p+0,
+  0x1.668ad946ed0dap+0,
+  0x2.cd15b28dda1b4p+0,
+  -0xa.e536ff5570fbp-4,
+  -0x1.5ca6dfeaae1f6p+0,
+  -0x2.b94dbfd55c3ecp+0,
+  0xd.5e3556652c89p-4,
+  0x1.abc6aacca5912p+0,
+  -0x2.f0f17f77c023ep+0,
+  0x6.69bd6218afe64p-4,
+  0xc.d37ac4315fcc8p-4,
+  0x1.9a6f58862bf99p+0,
+  -0x3.13a02404b352ep+0,
+  0x2.13e8d07a4a046p-4,
+  0x4.27d1a0f49408cp-4,
+  0x8.4fa341e928118p-4,
+  0x1.09f4683d25023p+0,
+  0x2.13e8d07a4a046p+0,
+  -0x2.20ad341c773d4p+0,
+  0x2.07246cd81ccb8p+0,
+  -0x2.3a35fb60d1af2p+0,
+  0x1.d412de4f67e7ep+0,
+  -0x2.a05918723b764p+0,
+  0x1.07cca42c94599p+0,
+  0x2.0f99485928b32p+0,
+  -0x2.294c445eb9dfep+0,
+  0x1.f5e64c5397865p+0,
+  -0x2.5cb23c69dc396p+0,
+  0x1.8f1a5c3d52d34p+0,
+  0x3.1e34b87aa5a68p+0,
+  -0xc.15641bbff90bp-8,
+  -0x1.82ac8377ff216p-4,
+  -0x3.055906effe42cp-4,
+  -0x6.0ab20ddffc858p-4,
+  -0xc.15641bbff90bp-4,
+  -0x1.82ac8377ff216p+0,
+  -0x3.055906effe42cp+0,
+  0x3.dccc7310ec09ap-4,
+  0x7.b998e621d8134p-4,
+  0xf.7331cc43b0268p-4,
+  0x1.ee6639887604dp+0,
+  -0x2.6bb262001f3c6p+0,
+  0x1.711a1110cccd4p+0,
+  0x2.e2342221999a8p+0,
+  -0x8.41690cdd81128p-4,
+  -0x1.082d219bb0225p+0,
+  -0x2.105a43376044ap+0,
+  0x2.27ca4ea24abcep+0,
+  -0x1.f8ea37cc75cc6p+0,
+  0x2.56aa65781fad6p+0,
+  -0x1.9b2a0a20cbeb6p+0,
+  0x3.122ac0cf736f6p+0,
+  -0x2.4295372246764p-4,
+  -0x4.852a6e448cec8p-4,
+  -0x9.0a54dc8919d9p-4,
+  -0x1.214a9b91233b2p+0,
+  -0x2.4295372246764p+0,
+  0x1.c35466cc7e59ap+0,
+  -0x2.c1d607780e92ep+0,
+  0xc.4d2c620ee206p-4,
+  0x1.89a58c41dc40cp+0,
+  0x3.134b1883b8818p+0,
+  -0x2.1e8a4099a4324p-4,
+  -0x4.3d14813348648p-4,
+  -0x8.7a29026690c9p-4,
+  -0x1.0f45204cd2192p+0,
+  -0x2.1e8a4099a4324p+0,
+  0x2.0b6a53ddc2e18p+0,
+  -0x2.31aa2d558583p+0,
+  0x1.e52a7a6600402p+0,
+  -0x2.7e29e0450ac5cp+0,
+  0x1.4c2b1486f5ba8p+0,
+  0x2.9856290deb75p+0,
+  -0x1.17d282f5345bfp+0,
+  -0x2.2fa505ea68b7ep+0,
+  0x1.e934c93c39d64p+0,
+  -0x2.761542989799ap+0,
+  0x1.5c544fdfdc12fp+0,
+  0x2.b8a89fbfb825ep+0,
+  -0xd.72d95919afa58p-4,
+  -0x1.ae5b2b2335f4bp+0,
+  0x2.ebc87eca9f5cap+0,
+  -0x7.0edd77bcc8ccp-4,
+  -0xe.1dbaef799198p-4,
+  -0x1.c3b75def3233p+0,
+  0x2.c1101932a6e02p+0,
+  -0xc.65ea2abbd85fp-4,
+  -0x1.8cbd45577b0bep+0,
+  -0x3.197a8aaef617cp+0,
+  0x1.589bfb31f1687p-4,
+  0x2.b137f663e2d0ep-4,
+  0x5.626fecc7c5a1cp-4,
+  0xa.c4dfd98f8b438p-4
+};
+
+/* Return h such that x - pi/4 mod (2*pi) ~ h. */
+static double
+reduce_mod_twopi (double x)
+{
+  double sign = 1, h;
+  if (x < 0)
+    {
+      x = -x;
+      sign = -1;
+    }
+  h = 0; /* Invariant is x+h mod (2*pi). */
+  while (x >= 4)
+    {
+      int i = ilogb (x);
+      x -= ldexp (1.0f, i);
+      h += T[i];
+    }
+  /* Add the remainder x. */
+  h += x;
+  /* Subtract pi/4. */
+  double piover4 = 0xc.90fdaa22168cp-4;
+  h -= piover4;
+  /* Reduce mod 2*pi. */
+  double twopi = 0x6.487ed5110b46p+0;
+  while (fabs (h) > twopi * 0.5)
+    {
+      if (h > 0)
+        h -= twopi;
+      else
+        h += twopi;
+    }
+  return sign * h;
+}
+
+/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
+   j0f(x) ~ sqrt(2/(pi*x))*beta0(x)*cos(x-pi/4-alpha0(x))
+   where beta0(x) = 1 - 16/x^2 + 53/(512*x^4)
+   and alpha0(x) = 1/(8*x) - 25/(384*x^3). */
+static float
+j0f_asympt (float x)
+{
+  /* The following code fails to give an error <= 9 ulps in only one case,
+     for which we tabulate the result. */
+  if (x == 0x1.4665d2p+24f)
+    return 0xa.50206p-52f;
+  double y = 1.0 / (double) x;
+  double y2 = y * y;
+  double beta0 = 1.0f + y2 * (-0x1p-4f + 0x1.a8p-4 * y2);
+  double alpha0 = y * (0x2p-4 - 0x1.0aaaaap-4 * y2);
+  double h;
+  h = reduce_mod_twopi ((double) x);
+  /* Subtract alpha0. */
+  h -= alpha0;
+  /* Now reduce mod pi/2. */
+  double piover2 = 0x1.921fb54442d18p+0;
+  int n = 0;
+  while (fabs (h) > piover2 / 2)
+    {
+      if (h > 0)
+        {
+          h -= piover2;
+          n ++;
+        }
+      else
+        {
+          h += piover2;
+          n --;
+        }
+    }
+  /* Now x - pi/4 - alpha0 = h + n*pi/2 mod (2*pi). */
+  float xr = (float) h;
+  n = n & 3;
+  float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+  float t = cst / sqrtf (x) * (float) beta0;
+  if (n == 0)
+    return t * cosf (xr);
+  else if (n == 2) /* cos(x+pi) = -cos(x) */
+    return -t * cosf (xr);
+  else if (n == 1) /* cos(x+pi/2) = -sin(x) */
+    return -t * sinf (xr);
+  else             /* cos(x+3pi/2) = sin(x) */
+    return t * sinf (xr);
+}
+
+/* Special code for x near a root of j0.
+   z is the value computed by the generic code.
+   For small x, we use a polynomial approximating j0 around its root.
+   For large x, we use an asymptotic formula (j0f_asympt). */
+static float
+j0f_near_root (float x, float z)
+{
+  float index_f;
+  int index;
+  index_f = roundf ((x - FIRST_ZERO_J0) / (float) M_PI);
+  /* j0f_asympt fails to give an error <= 9 ulps for x=0x1.324e92p+7 (index 48)
+     thus we can't reduce SMALL_SIZE below 49. */
+  if (index_f >= SMALL_SIZE)
+    return j0f_asympt (x);
+  index = (int) index_f;
+  float *p = Pj[index];
+  float x0 = p[0];
+  float x1 = p[2];
+  /* If we are not in the interval [x0,x1] around xmid, we return the value z. */
+  if (! (x0 <= x && x <= x1))
+    return z;
+  float xmid = p[1];
+  float y = x - xmid;
+  return p[3] + y * (p[4] + y * (p[5] + y * p[6]));
+}
+
 float
 __ieee754_j0f(float x)
 {
@@ -51,36 +375,27 @@ __ieee754_j0f(float x)
 		__sincosf (x, &s, &c);
 		ss = s-c;
 		cc = s+c;
-		if(ix<0x7f000000) {  /* make sure x+x not overflow */
-		    z = -__cosf(x+x);
-		    if ((s*c)<zero) cc = z/ss;
-		    else	    ss = z/cc;
-		} else {
-		    /* We subtract (exactly) a value x0 such that
-		       cos(x0)+sin(x0) is very near to 0, and use the identity
-		       sin(x-x0) = sin(x)*cos(x0)-cos(x)*sin(x0) to get
-		       sin(x) + cos(x) with extra accuracy.  */
-		    float x0 = 0xe.d4108p+124f;
-		    float y = x - x0; /* exact  */
-		    /* sin(y) = sin(x)*cos(x0)-cos(x)*sin(x0)  */
-		    z = __sinf (y);
-		    float eps = 0x1.5f263ep-24f;
-		    /* cos(x0) ~ -sin(x0) + eps  */
-		    z += eps * __cosf (x);
-		    /* now z ~ (sin(x)-cos(x))*cos(x0)  */
-		    float cosx0 = -0xb.504f3p-4f;
-		    cc = z / cosx0;
-                }
+                if (ix >= 0x7f000000) /* x >= 2^127: use asymptotic expansion. */
+                  return j0f_asympt (x);
+                /* Now we are sure that x+x cannot overflow. */
+                z = -__cosf(x+x);
+                if ((s*c)<zero) cc = z/ss;
+                else	    ss = z/cc;
 	/*
 	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
 	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
 	 */
-		if(ix>0x5c000000) z = (invsqrtpi*cc)/sqrtf(x);
-		else {
-		    u = pzerof(x); v = qzerof(x);
-		    z = invsqrtpi*(u*cc-v*ss)/sqrtf(x);
-		}
-		return z;
+                if (ix <= 0x5c000000)
+                  {
+                    u = pzerof(x); v = qzerof(x);
+                    cc = u*cc-v*ss;
+                  }
+                z = (invsqrtpi * cc) / sqrtf(x);
+                float magic = 0x1.8p+20; /* 2^20 + 2^19 */
+                if (magic + cc != magic) /* Most likely. */
+                  return z;
+                else /* |cc| <= 2^-4 */
+                  return j0f_near_root (x, z);
 	}
 	if(ix<0x39000000) {	/* |x| < 2**-13 */
 	    math_force_eval(huge+x);		/* raise inexact if x != 0 */
@@ -112,6 +427,168 @@ v02  =  7.6006865129e-05, /* 0x389f65e0 */
 v03  =  2.5915085189e-07, /* 0x348b216c */
 v04  =  4.4111031494e-10; /* 0x2ff280c2 */
 
+#define FIRST_ZERO_Y0 0.893576f
+
+#define SMALL_SIZE 64
+
+/* The following table contains successive zeros of y0 and degree-3 polynomial
+   approximations of y0 around these zeros: Py[0] for the first zero (0.893576),
+   Py[1] for the second one (3.957678), and so on.  Each line contains:
+              {x0, xmid, x1, p0, p1, p2, p3}
+   where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+   to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+   Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+   around the corresponding zero where the error is larger than 9 ulps for the
+   default code below.  Degree 3 is enough to get an error less than 4 ulps.
+*/
+static float Py[SMALL_SIZE][7] = {
+  /* For the first root we use a degree-4 polynomial since degree 3 is not enough,
+     where the coefficient of degree 4 is hard-coded in y0f_near_root(). */
+  { 0xd.bd613p-4, 0xe.4c176p-4, 0xe.e0897p-4, 0x3.274468p-28, 0xe.121b7p-4, -0x7.df8eap-4, 0x3.88cc2p-4 },
+  { 0x3.f2af8cp+0, 0x3.f52a68p+0, 0x3.fa1fa4p+0, 0xa.f1f83p-28, -0x6.70d098p-4, 0xd.04dc4p-8, 0xe.f2a5p-8 },
+  { 0x7.1464ap+0, 0x7.16077p+0, 0x7.191dap+0, -0x5.e2a51p-28, 0x4.cd3328p-4, -0x5.6bbb5p-8, -0xc.48cfbp-8 },
+  { 0xa.37ec2p+0, 0xa.38ebap+0, 0xa.3ad94p+0, -0x1.4b0aeep-24, -0x3.fec6b8p-4, 0x3.206ccp-8, 0xa.72401p-8 },
+  { 0xd.5bd7dp+0, 0xd.5c70ep+0, 0xd.5d94ap+0, -0x8.179d7p-28, 0x3.7e6544p-4, -0x2.178554p-8, -0x9.35f5bp-8 },
+  { 0x1.07f9aap+4, 0x1.0803c8p+4, 0x1.08170cp+4, -0x2.5b8078p-24, -0x3.24b868p-4, 0x1.86265ep-8, 0x8.51de2p-8 },
+  { 0x1.3a3d44p+4, 0x1.3a42cep+4, 0x1.3a4d8ap+4, 0x1.cd304ap-28, 0x2.e189ecp-4, -0x1.2c673ap-8, -0x7.a4726p-8 },
+  { 0x1.6c7d5ep+4, 0x1.6c833p+4, 0x1.6c99fp+4, -0x6.c63f1p-28, -0x2.acc9a8p-4, 0xf.077a1p-12, 0x7.1aba98p-8 },
+  { 0x1.9ebec4p+4, 0x1.9ec47p+4, 0x1.9ed016p+4, 0x1.e53838p-24, 0x2.81f2p-4, -0xc.61ccp-12, -0x6.aaa99p-8 },
+  { 0x1.d1008ep+4, 0x1.d10644p+4, 0x1.d10cf2p+4, 0x1.636feep-24, -0x2.5e40dcp-4, 0xa.6dfp-12, 0x6.4cd5a8p-8 },
+  { 0x2.0344f8p+4, 0x2.034884p+4, 0x2.034d4p+4, -0x4.04e1bp-28, 0x2.3febd8p-4, -0x8.f0ff9p-12, -0x5.fcd088p-8 },
+  { 0x2.358778p+4, 0x2.358b1p+4, 0x2.359224p+4, 0x3.6063d8p-24, -0x2.25baacp-4, 0x7.c6a088p-12, 0x5.b78ff8p-8 },
+  { 0x2.67ca1p+4, 0x2.67cdd8p+4, 0x2.67d434p+4, -0x3.f39ebcp-24, 0x2.0ed04cp-4, -0x6.d7eaf8p-12, -0x5.7aeaa8p-8 },
+  { 0x2.9a0d0cp+4, 0x2.9a10dp+4, 0x2.9a1828p+4, -0x4.ea23p-28, -0x1.fa8b4p-4, 0x6.158438p-12, 0x5.45324p-8 },
+  { 0x2.cc51ecp+4, 0x2.cc53e8p+4, 0x2.cc580cp+4, -0x3.5df0c8p-24, 0x1.e8727ep-4, -0x5.7460d8p-12, -0x5.1536p-8 },
+  { 0x2.fe94f8p+4, 0x2.fe972p+4, 0x2.fe9b18p+4, 0x1.1ef09ep-24, -0x1.d8293ap-4, 0x4.ed6058p-12, 0x4.e9fcc8p-8 },
+  { 0x3.30d8b8p+4, 0x3.30da7p+4, 0x3.30debcp+4, 0x1.b70cecp-24, 0x1.c967p-4, -0x4.7ad838p-12, -0x4.c2c9d8p-8 },
+  { 0x3.631b94p+4, 0x3.631ddp+4, 0x3.632244p+4, 0x1.abaadcp-24, -0x1.bbf246p-4, 0x4.187ba8p-12, 0x4.9f09f8p-8 },
+  { 0x3.955f7cp+4, 0x3.956144p+4, 0x3.9565fcp+4, 0x1.63989ep-24, 0x1.af9cb4p-4, -0x3.c397f8p-12, -0x4.7e4038p-8 },
+  { 0x3.c7a2bp+4, 0x3.c7a4c4p+4, 0x3.c7a878p+4, -0x1.68a8ecp-24, -0x1.a4407ep-4, 0x3.797fdcp-12, 0x4.600d3p-8 },
+  { 0x3.f9e62cp+4, 0x3.f9e85p+4, 0x3.f9ea7p+4, 0x1.e1bb5p-24, 0x1.99be74p-4, -0x3.38739cp-12, -0x4.441c5p-8 },
+  { 0x4.2c2a1p+4, 0x4.2c2be8p+4, 0x4.2c2e7p+4, -0x5.5bbcfp-24, -0x1.8ffc9ap-4, 0x2.ff0f5cp-12, 0x4.2a266p-8 },
+  { 0x4.5e6d98p+4, 0x4.5e6f8p+4, 0x4.5e71c8p+4, -0x4.9e34a8p-24, 0x1.86e51cp-4, -0x2.cba284p-12, -0x4.11f568p-8 },
+  { 0x4.90b17p+4, 0x4.90b328p+4, 0x4.90b5ap+4, 0x1.966706p-24, -0x1.7e657p-4, 0x2.9e0d44p-12, 0x3.fb56a4p-8 },
+  { 0x4.c2f59p+4, 0x4.c2f6d8p+4, 0x4.c2fac8p+4, 0x3.4b3b68p-24, 0x1.766dc2p-4, -0x2.752fbp-12, -0x3.e61fcp-8 },
+  { 0x4.f53968p+4, 0x4.f53a88p+4, 0x4.f53cb8p+4, 0x6.a99008p-28, -0x1.6ef07ep-4, 0x2.501294p-12, 0x3.d230ep-8 },
+  { 0x5.277dp+4, 0x5.277e4p+4, 0x5.278108p+4, -0x7.a9fa6p-32, 0x1.67e1dap-4, -0x2.2e9388p-12, -0x3.bf663cp-8 },
+  { 0x5.59c0e8p+4, 0x5.59c2p+4, 0x5.59c398p+4, -0x5.026808p-24, -0x1.613798p-4, 0x2.104558p-12, 0x3.ada76cp-8 },
+  { 0x5.8c0498p+4, 0x5.8c05cp+4, 0x5.8c0898p+4, 0x4.46aa2p-24, 0x1.5ae8c2p-4, -0x1.f474eep-12, -0x3.9cda1cp-8 },
+  { 0x5.be48dp+4, 0x5.be498p+4, 0x5.be4aap+4, 0x1.5cfccp-24, -0x1.54ed76p-4, 0x1.dad812p-12, 0x3.8cec8p-8 },
+  { 0x5.f08c08p+4, 0x5.f08d48p+4, 0x5.f08ecp+4, -0xb.4dc4cp-28, 0x1.4f3ebcp-4, -0x1.c38338p-12, -0x3.7dc9dp-8 },
+  { 0x6.22d05p+4, 0x6.22d11p+4, 0x6.22d428p+4, 0x3.f5343p-24, -0x1.49d668p-4, 0x1.ade97cp-12, 0x3.6f610cp-8 },
+  { 0x6.55137p+4, 0x6.5514ep+4, 0x6.551638p+4, -0x6.e6f32p-28, 0x1.44aefap-4, -0x1.9a2d3ep-12, -0x3.61a778p-8 },
+  { 0x6.8757e8p+4, 0x6.8758bp+4, 0x6.8759c8p+4, 0x1.f359c2p-28, -0x1.3fc386p-4, 0x1.87d25cp-12, 0x3.548be4p-8 },
+  { 0x6.b99bp+4, 0x6.b99c8p+4, 0x6.b99e2p+4, -0x2.9de748p-24, 0x1.3b0fa4p-4, -0x1.76b5aap-12, -0x3.48048p-8 },
+  { 0x6.ebdfb8p+4, 0x6.ebe058p+4, 0x6.ebe1bp+4, -0x2.24ccc8p-24, -0x1.368f5cp-4, 0x1.67061p-12, 0x3.3c0608p-8 },
+  { 0x7.1e2368p+4, 0x7.1e243p+4, 0x7.1e25bp+4, 0x4.7dcea8p-24, 0x1.323f16p-4, -0x1.5858c4p-12, -0x3.3087acp-8 },
+  { 0x7.50673p+4, 0x7.506808p+4, 0x7.5069ap+4, -0x4.b538p-24, -0x1.2e1b98p-4, 0x1.4a9624p-12, 0x3.258078p-8 },
+  { 0x7.82ab38p+4, 0x7.82abep+4, 0x7.82ad78p+4, 0x3.09dc4cp-24, 0x1.2a21ecp-4, -0x1.3da94p-12, -0x3.1ae88cp-8 },
+  { 0x7.b4eeep+4, 0x7.b4efb8p+4, 0x7.b4f158p+4, 0x4.d5a58p-28, -0x1.264f66p-4, 0x1.317f8cp-12, 0x3.10b8fcp-8 },
+  { 0x7.e732cp+4, 0x7.e73398p+4, 0x7.e73548p+4, 0x3.f4c44cp-24, 0x1.22a192p-4, -0x1.265128p-12, -0x3.06eb08p-8 },
+  { 0x8.1976bp+4, 0x8.19777p+4, 0x8.19783p+4, 0x2.4beae8p-24, -0x1.1f1634p-4, 0x1.1b7d2ap-12, 0x2.fd7934p-8 },
+  { 0x8.4bbbp+4, 0x8.4bbb5p+4, 0x8.4bbcep+4, -0xd.a581ep-28, 0x1.1bab3cp-4, -0x1.1186d6p-12, -0x2.f45cep-8 },
+  { 0x8.7dfe8p+4, 0x8.7dff3p+4, 0x8.7dffbp+4, 0xa.7c0bdp-28, -0x1.185eccp-4, 0x1.0819c2p-12, 0x2.eb92ccp-8 },
+  { 0x8.b042bp+4, 0x8.b0431p+4, 0x8.b043dp+4, -0x1.9452ecp-24, 0x1.152f26p-4, -0xf.f2b59p-16, -0x2.e314bp-8 },
+  { 0x8.e2868p+4, 0x8.e286fp+4, 0x8.e287cp+4, 0x3.83b7b8p-24, -0x1.121ab2p-4, 0xf.6b21p-16, 0x2.dadf34p-8 },
+  { 0x9.14ca8p+4, 0x9.14cadp+4, 0x9.14cbbp+4, -0x6.5ca3a8p-24, 0x1.0f1ff2p-4, -0xe.ea544p-16, -0x2.d2ee28p-8 },
+  { 0x9.470e6p+4, 0x9.470ecp+4, 0x9.470fp+4, -0x6.bb61ap-24, -0x1.0c3d8ap-4, 0xe.7833fp-16, 0x2.cb3e2cp-8 },
+  { 0x9.79524p+4, 0x9.7952ap+4, 0x9.79539p+4, 0x2.2438p-24, 0x1.097236p-4, -0xe.03747p-16, -0x2.c3cb48p-8 },
+  { 0x9.ab95cp+4, 0x9.ab968p+4, 0x9.ab971p+4, 0x3.1e0054p-24, -0x1.06bcc8p-4, 0xd.94272p-16, 0x2.bc9328p-8 },
+  { 0x9.ddd9fp+4, 0x9.ddda7p+4, 0x9.dddb5p+4, 0x7.46c908p-24, 0x1.041c28p-4, -0xd.320e5p-16, -0x2.b5920cp-8 },
+  { 0xa.101d7p+4, 0xa.101e5p+4, 0xa.101f3p+4, -0xb.4f158p-28, -0x1.018f52p-4, 0xc.cc7dfp-16, 0x2.aec5f4p-8 },
+  { 0xa.4261cp+4, 0xa.42623p+4, 0xa.4262bp+4, -0x6.5a89c8p-24, 0xf.f155p-8, -0xc.6b5cbp-16, -0x2.a82be4p-8 },
+  { 0xa.74a5cp+4, 0xa.74a62p+4, 0xa.74a6ap+4, -0x1.ef16c8p-24, -0xf.cad3fp-8, 0xc.16478p-16, 0x2.a1c1ap-8 },
+  { 0xa.a6e9bp+4, 0xa.a6eap+4, 0xa.a6ea9p+4, -0x6.1e7b68p-24, 0xf.a564cp-8, -0xb.bd1eap-16, -0x2.9b84f8p-8 },
+  { 0xa.d92d7p+4, 0xa.d92dfp+4, 0xa.d92eap+4, -0xf.8c858p-28, -0xf.80faep-8, 0xb.6f5dbp-16, 0x2.9573dcp-8 },
+  { 0xb.0b71ap+4, 0xb.0b71ep+4, 0xb.0b727p+4, 0x7.75d858p-24, 0xf.5d8abp-8, -0xb.24e88p-16, -0x2.8f8c4cp-8 },
+  { 0xb.3db58p+4, 0xb.3db5cp+4, 0xb.3db68p+4, 0x1.d78632p-24, -0xf.3b096p-8, 0xa.d5ef1p-16, 0x2.89cc8p-8 },
+  { 0xb.6ff96p+4, 0xb.6ff9bp+4, 0xb.6ffaap+4, 0x3.b24794p-24, 0xf.196c7p-8, -0xa.918e2p-16, -0x2.8432cp-8 },
+  { 0xb.a23d1p+4, 0xb.a23d9p+4, 0xb.a23e5p+4, 0x6.39cbc8p-24, -0xe.f8aa5p-8, 0xa.486fap-16, 0x2.7ebd9p-8 },
+  { 0xb.d481p+4, 0xb.d4818p+4, 0xb.d481cp+4, -0x1.820e3ap-24, 0xe.d8b9dp-8, -0xa.0973fp-16, -0x2.796b4cp-8 },
+  { 0xc.06c4fp+4, 0xc.06c57p+4, 0xc.06c5fp+4, -0x2.c7e038p-24, -0xe.b9925p-8, 0x9.cce94p-16, 0x2.743a5cp-8 },
+  { 0xc.3908ep+4, 0xc.39096p+4, 0xc.390a2p+4, 0x6.ab31c8p-24, 0xe.9b2bep-8, -0x9.92ad2p-16, -0x2.6f2994p-8 },
+  { 0xc.6b4cdp+4, 0xc.6b4d4p+4, 0xc.6b4d8p+4, 0x4.4ef25p-24, -0xe.7d7ecp-8, 0x9.5360fp-16, 0x2.6a37dp-8 }
+};
+
+/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
+   y0(x) ~ sqrt(2/(pi*x))*beta0(x)*sin(x-pi/4-alpha0(x))
+   where beta0(x) = 1 - 16/x^2 + 53/(512*x^4)
+   and alpha0(x) = 1/(8*x) - 25/(384*x^3). */
+static float
+y0f_asympt (float x)
+{
+  /* The following code fails to give an error <= 9 ulps in only two cases,
+     for which we tabulate the correctly-rounded result. */
+  if (x == 0x1.bfad96p+7f)
+    return -0x7.f32bdp-32f;
+  if (x == 0x1.2e2a42p+17f)
+    return 0x1.a48974p-40f;
+  double y = 1.0 / (double) x;
+  double y2 = y * y;
+  double beta0 = 1.0f + y2 * (-0x1p-4f + 0x1.a8p-4 * y2);
+  double alpha0 = y * (0x2p-4 - 0x1.0aaaaap-4 * y2);
+  double h;
+  h = reduce_mod_twopi ((double) x);
+  /* Subtract alpha0. */
+  h -= alpha0;
+  /* Now reduce mod pi/2. */
+  double piover2 = 0x1.921fb54442d18p+0;
+  int n = 0;
+  while (fabs (h) > piover2 / 2)
+    {
+      if (h > 0)
+        {
+          h -= piover2;
+          n ++;
+        }
+      else
+        {
+          h += piover2;
+          n --;
+        }
+    }
+  /* Now x - pi/4 - alpha0 = h + n*pi/2 mod (2*pi). */
+  float xr = (float) h;
+  n = n & 3;
+  float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+  float t = cst / sqrtf (x) * (float) beta0;
+  if (n == 0)
+    return t * sinf (xr);
+  else if (n == 2) /* sin(x+pi) = -sin(x) */
+    return -t * sinf (xr);
+  else if (n == 1) /* sin(x+pi/2) = cos(x) */
+    return t * cosf (xr);
+  else             /* sin(x+3pi/2) = -cos(x) */
+    return -t * cosf (xr);
+}
+
+/* Special code for x near a root of y0.
+   z is the value computed by the generic code.
+   For small x, we use a polynomial approximating y0 around its root.
+   For large x, we use an asymptotic formula (y0f_asympt). */
+static float
+y0f_near_root (float x, float z)
+{
+  float index_f;
+  int index;
+  index_f = roundf ((x - FIRST_ZERO_Y0) / (float) M_PI);
+  if (index_f >= SMALL_SIZE)
+    return y0f_asympt (x);
+  index = (int) index_f;
+  float *p = Py[index];
+  float x0 = p[0];
+  float x1 = p[2];
+  /* If we are not in the interval [x0,x1] around xmid, we return the value z. */
+  if (! (x0 <= x && x <= x1))
+    return z;
+  float xmid = p[1];
+  float y = x - xmid;
+  /* For degree 0 we use a degree-4 polynomial, where the coefficient of degree 4
+     is hard-coded here. */
+  float p6 = (index > 0) ? p[6] : p[6] + y * -0x3.9ce95p-4;
+  return p[3] + y * (p[4] + y * (p[5] + y * p6));
+}
+
 float
 __ieee754_y0f(float x)
 {
@@ -124,7 +601,8 @@ __ieee754_y0f(float x)
 	if(ix>=0x7f800000) return  one/(x+x*x);
 	if(ix==0) return -1/zero; /* -inf and divide by zero exception.  */
 	if(hx<0) return zero/(zero*x);
-	if(ix >= 0x40000000) {  /* |x| >= 2.0 */
+	if(ix >= 0x40000000 || (0x3f5bd613 <= ix && ix <= 0x3f6e0897)) {
+          /* |x| >= 2.0 or 0x1.b7ac26p-1 <= |x| <= 0x1.dc112ep-1 (around 1st zero) */
 	/* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
 	 * where x0 = x-pi/4
 	 *      Better formula:
@@ -143,17 +621,23 @@ __ieee754_y0f(float x)
 	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
 	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
 	 */
-		if(ix<0x7f000000) {  /* make sure x+x not overflow */
-		    z = -__cosf(x+x);
-		    if ((s*c)<zero) cc = z/ss;
-		    else            ss = z/cc;
-		}
-		if(ix>0x5c000000) z = (invsqrtpi*ss)/sqrtf(x);
-		else {
-		    u = pzerof(x); v = qzerof(x);
-		    z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
-		}
-		return z;
+                if (ix >= 0x7f000000) /* x >= 2^127: use asymptotic expansion. */
+                  return y0f_asympt (x);
+                /* Now we are sure that x+x cannot overflow. */
+                z = -__cosf(x+x);
+                if ((s*c)<zero) cc = z/ss;
+                else            ss = z/cc;
+                if (ix <= 0x5c000000)
+                  {
+                    u = pzerof(x); v = qzerof(x);
+                    ss = u*ss+v*cc;
+                  }
+                z = (invsqrtpi*ss)/sqrtf(x);
+                float magic = 0x1.8p+20; /* 2^20 + 2^19 */
+                if (magic + ss != magic) /* Most likely. */
+                  return z;
+                else /* |cc| <= 2^-4 */
+                  return y0f_near_root (x, z);
 	}
 	if(ix<=0x39800000) {	/* x < 2**-13 */
 	    return(u00 + tpi*__ieee754_logf(x));
@@ -165,7 +649,7 @@ __ieee754_y0f(float x)
 }
 libm_alias_finite (__ieee754_y0f, __y0f)
 
-/* The asymptotic expansions of pzero is
+/* The asymptotic expansion of pzero is
  *	1 - 9/128 s^2 + 11025/98304 s^4 - ...,	where s = 1/x.
  * For x >= 2, We approximate pzero by
  *	pzero(x) = 1 + (R/S)
@@ -257,7 +741,7 @@ pzerof(float x)
 }
 
 
-/* For x >= 8, the asymptotic expansions of qzero is
+/* For x >= 8, the asymptotic expansion of qzero is
  *	-1/8 s + 75/1024 s^3 - ..., where s = 1/x.
  * We approximate pzero by
  *	qzero(x) = s*(-1.25 + (R/S))
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 633d2ab8e4..425e1cc881 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1312,22 +1312,22 @@ float128: 1
 ldouble: 1
 
 Function: "j0":
-double: 2
-float: 8
-float128: 2
+double: 5
+float: 9
+float128: 4
 ldouble: 2
 
 Function: "j0_downward":
 double: 2
-float: 4
+float: 9
 float128: 4
 ldouble: 6
 
 Function: "j0_towardzero":
-double: 5
-float: 6
+double: 9
+float: 7
 float128: 4
-ldouble: 6
+ldouble: 9
 
 Function: "j0_upward":
 double: 4
@@ -1748,10 +1748,10 @@ float128: 4
 ldouble: 5
 
 Function: "y0":
-double: 3
-float: 8
-float128: 3
-ldouble: 1
+double: 8
+float: 9
+float128: 4
+ldouble: 2
 
 Function: "y0_downward":
 double: 3
@@ -1760,15 +1760,15 @@ float128: 4
 ldouble: 5
 
 Function: "y0_towardzero":
-double: 3
+double: 5
 float: 3
 float128: 3
-ldouble: 6
+ldouble: 7
 
 Function: "y0_upward":
 double: 3
 float: 6
-float128: 3
+float128: 7
 ldouble: 5
 
 Function: "y1":
-- 
2.29.2



More information about the Libc-alpha mailing list