[PATCH] Fix the inaccuracy of j1f (BZ 14470) and y1f (BZ 14472) [v2]
Paul Zimmermann
Paul.Zimmermann@inria.fr
Fri Jan 22 10:44:05 GMT 2021
For both j1f and y1f, the largest error for all binary32 inputs is now less
then 9.5 ulps with respect to the infinite precision result, i.e., less than
9 ulps after rounding, which is the largest error allowed. (This is for
rounding to nearest; for other rounding modes, the new code should not behave
worse than the old one.)
The new code is enabled only when there is a cancellation at the very end of
the j1f/y1f computation, or for very large inputs, thus should not give any
visible slowdown on average. Two different algorithms are used:
* around the first 64 zeros of j1/y1, approximation polynomials of degree 3
or 4 are used, computed using the Sollya tool (https://www.sollya.org/)
* for large inputs, an asymptotic formula from [1] is used
[1] Fast and Accurate Bessel Function Computation,
John Harrison, Proceedings of Arith 19, 2009.
The largest error is now obtained for the following inputs respectively:
libm wrong by up to 9.49e+00 ulp(s) for x=0x1.0fbe5ep+7
j1f gives -0x1.8b5cd2p-15
mpfr_j1 gives -0x1.8b5ccp-15
libm wrong by up to 9.49e+00 ulp(s) for x=0x1.405122p+5
y1f gives -0x1.a25b7cp-11
mpfr_y1 gives -0x1.a25b6ap-11
---
math/auto-libm-test-in | 5 +-
math/auto-libm-test-out-j1 | 25 ++
math/auto-libm-test-out-y1 | 25 ++
sysdeps/ieee754/flt-32/e_j1f.c | 582 ++++++++++++++++++++++++++++--
sysdeps/x86_64/fpu/libm-test-ulps | 24 +-
5 files changed, 619 insertions(+), 42 deletions(-)
diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
index 73840b8bef..24abc984cd 100644
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@@ -5791,8 +5791,9 @@ j1 0x1p-60
j1 0x1p-100
j1 0x1p-600
j1 0x1p-10000
-# the next value generates larger error bounds on x86_64 (binary32)
+# the next values generate large error bounds on x86_64 (binary32)
j1 0x3.ae4b2p+0 xfail-rounding:ibm128-libgcc
+j1 0x1.0fbe5ep+7
j1 min
j1 -min
j1 min_subnorm
@@ -8255,6 +8256,8 @@ y1 0x1p-100
y1 0x1p-110
y1 0x1p-600
y1 0x1p-10000
+# the next value generates large error bounds on x86_64 (binary32)
+y1 0x1.405122p+5
y1 min
y1 min_subnorm
diff --git a/math/auto-libm-test-out-j1 b/math/auto-libm-test-out-j1
index 6bc3bbe5a4..d93c646eb1 100644
--- a/math/auto-libm-test-out-j1
+++ b/math/auto-libm-test-out-j1
@@ -993,6 +993,31 @@ j1 0x3.ae4b2p+0 xfail-rounding:ibm128-libgcc
= j1 tonearest ibm128 0x3.ae4b2p+0 : 0xf.d085c66e86f30267f22d6f787cp-8 : inexact-ok
= j1 towardzero ibm128 0x3.ae4b2p+0 : 0xf.d085c66e86f30267f22d6f787cp-8 : xfail:ibm128-libgcc inexact-ok
= j1 upward ibm128 0x3.ae4b2p+0 : 0xf.d085c66e86f30267f22d6f788p-8 : xfail:ibm128-libgcc inexact-ok
+j1 0x1.0fbe5ep+7
+= j1 downward binary32 0x8.7df2fp+4 : -0x3.16b98p-16 : inexact-ok
+= j1 tonearest binary32 0x8.7df2fp+4 : -0x3.16b98p-16 : inexact-ok
+= j1 towardzero binary32 0x8.7df2fp+4 : -0x3.16b97cp-16 : inexact-ok
+= j1 upward binary32 0x8.7df2fp+4 : -0x3.16b97cp-16 : inexact-ok
+= j1 downward binary64 0x8.7df2fp+4 : -0x3.16b97e0bd74b6p-16 : inexact-ok
+= j1 tonearest binary64 0x8.7df2fp+4 : -0x3.16b97e0bd74b6p-16 : inexact-ok
+= j1 towardzero binary64 0x8.7df2fp+4 : -0x3.16b97e0bd74b4p-16 : inexact-ok
+= j1 upward binary64 0x8.7df2fp+4 : -0x3.16b97e0bd74b4p-16 : inexact-ok
+= j1 downward intel96 0x8.7df2fp+4 : -0x3.16b97e0bd74b5494p-16 : inexact-ok
+= j1 tonearest intel96 0x8.7df2fp+4 : -0x3.16b97e0bd74b549p-16 : inexact-ok
+= j1 towardzero intel96 0x8.7df2fp+4 : -0x3.16b97e0bd74b549p-16 : inexact-ok
+= j1 upward intel96 0x8.7df2fp+4 : -0x3.16b97e0bd74b549p-16 : inexact-ok
+= j1 downward m68k96 0x8.7df2fp+4 : -0x3.16b97e0bd74b5494p-16 : inexact-ok
+= j1 tonearest m68k96 0x8.7df2fp+4 : -0x3.16b97e0bd74b549p-16 : inexact-ok
+= j1 towardzero m68k96 0x8.7df2fp+4 : -0x3.16b97e0bd74b549p-16 : inexact-ok
+= j1 upward m68k96 0x8.7df2fp+4 : -0x3.16b97e0bd74b549p-16 : inexact-ok
+= j1 downward binary128 0x8.7df2fp+4 : -0x3.16b97e0bd74b54917f09d8f15fd6p-16 : inexact-ok
+= j1 tonearest binary128 0x8.7df2fp+4 : -0x3.16b97e0bd74b54917f09d8f15fd6p-16 : inexact-ok
+= j1 towardzero binary128 0x8.7df2fp+4 : -0x3.16b97e0bd74b54917f09d8f15fd4p-16 : inexact-ok
+= j1 upward binary128 0x8.7df2fp+4 : -0x3.16b97e0bd74b54917f09d8f15fd4p-16 : inexact-ok
+= j1 downward ibm128 0x8.7df2fp+4 : -0x3.16b97e0bd74b54917f09d8f16p-16 : inexact-ok
+= j1 tonearest ibm128 0x8.7df2fp+4 : -0x3.16b97e0bd74b54917f09d8f16p-16 : inexact-ok
+= j1 towardzero ibm128 0x8.7df2fp+4 : -0x3.16b97e0bd74b54917f09d8f15fp-16 : inexact-ok
+= j1 upward ibm128 0x8.7df2fp+4 : -0x3.16b97e0bd74b54917f09d8f15fp-16 : inexact-ok
j1 min
= j1 downward binary32 0x4p-128 : 0x1.fffff8p-128 : inexact-ok underflow errno-erange-ok
= j1 tonearest binary32 0x4p-128 : 0x2p-128 : inexact-ok underflow errno-erange-ok
diff --git a/math/auto-libm-test-out-y1 b/math/auto-libm-test-out-y1
index af68e6c05a..642a2f00a6 100644
--- a/math/auto-libm-test-out-y1
+++ b/math/auto-libm-test-out-y1
@@ -795,6 +795,31 @@ y1 0x1p-10000
= y1 tonearest binary128 0x1p-10000 : -0xa.2f9836e4e441529fc2757d1f535p+9996 : inexact-ok
= y1 towardzero binary128 0x1p-10000 : -0xa.2f9836e4e441529fc2757d1f5348p+9996 : inexact-ok
= y1 upward binary128 0x1p-10000 : -0xa.2f9836e4e441529fc2757d1f5348p+9996 : inexact-ok
+y1 0x1.405122p+5
+= y1 downward binary32 0x2.80a244p+4 : -0x3.44b6d4p-12 : inexact-ok
+= y1 tonearest binary32 0x2.80a244p+4 : -0x3.44b6d4p-12 : inexact-ok
+= y1 towardzero binary32 0x2.80a244p+4 : -0x3.44b6dp-12 : inexact-ok
+= y1 upward binary32 0x2.80a244p+4 : -0x3.44b6dp-12 : inexact-ok
+= y1 downward binary64 0x2.80a244p+4 : -0x3.44b6d20bbfe2p-12 : inexact-ok
+= y1 tonearest binary64 0x2.80a244p+4 : -0x3.44b6d20bbfe1ep-12 : inexact-ok
+= y1 towardzero binary64 0x2.80a244p+4 : -0x3.44b6d20bbfe1ep-12 : inexact-ok
+= y1 upward binary64 0x2.80a244p+4 : -0x3.44b6d20bbfe1ep-12 : inexact-ok
+= y1 downward intel96 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb7cp-12 : inexact-ok
+= y1 tonearest intel96 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb78p-12 : inexact-ok
+= y1 towardzero intel96 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb78p-12 : inexact-ok
+= y1 upward intel96 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb78p-12 : inexact-ok
+= y1 downward m68k96 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb7cp-12 : inexact-ok
+= y1 tonearest m68k96 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb78p-12 : inexact-ok
+= y1 towardzero m68k96 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb78p-12 : inexact-ok
+= y1 upward m68k96 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb78p-12 : inexact-ok
+= y1 downward binary128 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb796a80a98e3808p-12 : inexact-ok
+= y1 tonearest binary128 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb796a80a98e3806p-12 : inexact-ok
+= y1 towardzero binary128 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb796a80a98e3806p-12 : inexact-ok
+= y1 upward binary128 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb796a80a98e3806p-12 : inexact-ok
+= y1 downward ibm128 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb796a80a98e39p-12 : inexact-ok
+= y1 tonearest ibm128 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb796a80a98e38p-12 : inexact-ok
+= y1 towardzero ibm128 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb796a80a98e38p-12 : inexact-ok
+= y1 upward ibm128 0x2.80a244p+4 : -0x3.44b6d20bbfe1eb796a80a98e38p-12 : inexact-ok
y1 min
= y1 downward binary32 0x4p-128 : -0x2.8be61p+124 : inexact-ok
= y1 tonearest binary32 0x4p-128 : -0x2.8be60cp+124 : inexact-ok
diff --git a/sysdeps/ieee754/flt-32/e_j1f.c b/sysdeps/ieee754/flt-32/e_j1f.c
index ac5bb76828..55ed5c478d 100644
--- a/sysdeps/ieee754/flt-32/e_j1f.c
+++ b/sysdeps/ieee754/flt-32/e_j1f.c
@@ -40,7 +40,350 @@ s03 = 1.1771846857e-06, /* 0x359dffc2 */
s04 = 5.0463624390e-09, /* 0x31ad6446 */
s05 = 1.2354227016e-11; /* 0x2d59567e */
-static const float zero = 0.0;
+static const float zero = 0.0;
+
+#define FIRST_ZERO_J1 3.831705f /* First positive zero of j1. */
+
+#define SMALL_SIZE 64
+
+/* The following table contains successive zeros of j1 and degree-3 polynomial
+ approximations of j1 around these zeros: Pj[0] for the first positive zero
+ (3.831705), Pj[1] for the second one (7.015586), and so on. Each line contains:
+ {x0, xmid, x1, p0, p1, p2, p3}
+ where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+ to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+ Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+ around the corresponding zero where the error is larger than 9 ulps for the
+ default code below. Degree 3 is enough to get an error less than 4 ulps,
+ except around the first zero.
+*/
+static const float Pj[SMALL_SIZE][7] = {
+ /* For index 0, we use a degree-4 polynomial generated by Sollya, with the
+ coefficient of degree 4 hard-coded in j1f_near_root(). */
+ { 0x3.c14204p+0, 0x3.d4eabp+0, 0x3.dee2b4p+0, -0x8.4f069p-28, -0x6.71b3d8p-4, 0xd.744a3p-8, 0xd.acd9bp-8 },
+ { 0x6.f6d308p+0, 0x7.03fd8p+0, 0x7.09038p+0, 0xe.c2858p-28, 0x4.cd464p-4, -0x5.79cec8p-8, -0xc.12b1ep-8 },
+ { 0xa.24741p+0, 0xa.2c687p+0, 0xa.30667p+0, -0x1.e7acecp-24, -0x3.feca8cp-4, 0x3.2442b4p-8, 0xa.5c1e2p-8 },
+ { 0xd.4e4cp+0, 0xd.52dd8p+0, 0xd.55249p+0, 0x1.698158p-24, 0x3.7e666cp-4, -0x2.1906ap-8, -0x9.2a415p-8 },
+ { 0x1.073be4p+4, 0x1.0787b4p+4, 0x1.07aep+4, -0x1.f5f658p-24, -0x3.24b8ep-4, 0x1.86dd04p-8, 0x8.4b4f4p-8 },
+ { 0x1.39ae36p+4, 0x1.39da8ep+4, 0x1.39f064p+4, -0x1.4e744p-24, 0x2.e18a24p-4, -0x1.2cca26p-8, -0x7.9ff6cp-8 },
+ { 0x1.6bfdeep+4, 0x1.6c294ep+4, 0x1.6c40f2p+4, 0xa.3fb7fp-28, -0x2.acc9c4p-4, 0xf.0b205p-12, 0x7.17e4e8p-8 },
+ { 0x1.9e4ab2p+4, 0x1.9e757p+4, 0x1.9e8222p+4, -0x2.29f6f4p-24, 0x2.81f21p-4, -0xc.640cap-12, -0x6.a8aa2p-8 },
+ { 0x1.d0a4ep+4, 0x1.d0bfdp+4, 0x1.d0cd3cp+4, -0x1.b5d196p-24, -0x2.5e40e4p-4, 0xa.6f9dp-12, 0x6.4b1ad8p-8 },
+ { 0x2.02ef28p+4, 0x2.0308ecp+4, 0x2.0317p+4, -0x4.0e001p-24, 0x2.3febep-4, -0x8.f1faep-12, -0x5.fb7998p-8 },
+ { 0x2.35425cp+4, 0x2.35512p+4, 0x2.355f1p+4, 0x3.b26f2p-24, -0x2.25babp-4, 0x7.c76918p-12, 0x5.b66ddp-8 },
+ { 0x2.677bbcp+4, 0x2.6798a4p+4, 0x2.67a5c8p+4, -0xf.c8cdap-28, 0x2.0ed05p-4, -0x6.d899ep-12, -0x5.7a1fc8p-8 },
+ { 0x2.99cf8p+4, 0x2.99dfap+4, 0x2.99efa8p+4, -0x3.9940e4p-24, -0x1.fa8b4p-4, 0x6.1610ap-12, 0x5.447178p-8 },
+ { 0x2.cc07dp+4, 0x2.cc262cp+4, 0x2.cc2e54p+4, 0x9.da15dp-28, 0x1.e8727ep-4, -0x5.74db18p-12, -0x5.14b9dp-8 },
+ { 0x2.fe5d78p+4, 0x2.fe6c64p+4, 0x2.fe74ep+4, -0x3.39b218p-24, -0x1.d8293ap-4, 0x4.edc938p-12, 0x4.e97d68p-8 },
+ { 0x3.30a318p+4, 0x3.30b25p+4, 0x3.30bb28p+4, -0x3.7b5108p-24, 0x1.c96702p-4, -0x4.7ae6c8p-12, -0x4.c25f08p-8 },
+ { 0x3.62e5dp+4, 0x3.62f808p+4, 0x3.62ff0cp+4, -0x1.91e43ep-24, -0x1.bbf246p-4, 0x4.18c358p-12, 0x4.9eb48p-8 },
+ { 0x3.952ab4p+4, 0x3.953d8cp+4, 0x3.954334p+4, 0x1.28453cp-24, 0x1.af9cb4p-4, -0x3.c3bbe4p-12, -0x4.7dfa3p-8 },
+ { 0x3.c77928p+4, 0x3.c782e8p+4, 0x3.c787f4p+4, -0x2.e7fef4p-24, -0x1.a4407ep-4, 0x3.79aaecp-12, 0x4.5fc5e8p-8 },
+ { 0x3.f9be2cp+4, 0x3.f9c81cp+4, 0x3.f9cd08p+4, -0x3.23b2fcp-24, 0x1.99be76p-4, -0x3.386584p-12, -0x4.43dc7p-8 },
+ { 0x4.2c034p+4, 0x4.2c0d38p+4, 0x4.2c14c8p+4, -0xd.19e93p-28, -0x1.8ffc9cp-4, 0x2.ff00f8p-12, 0x4.29ec1p-8 },
+ { 0x4.5e4d1p+4, 0x4.5e5238p+4, 0x4.5e5688p+4, 0x1.c2ac48p-24, 0x1.86e51cp-4, -0x2.cbe84p-12, -0x4.11bfd8p-8 },
+ { 0x4.9090e8p+4, 0x4.90972p+4, 0x4.909b88p+4, -0xd.31027p-28, -0x1.7e656ep-4, 0x2.9e309cp-12, 0x3.fb27ep-8 },
+ { 0x4.c2d6ap+4, 0x4.c2dbf8p+4, 0x4.c2e138p+4, 0x5.b5e53p-24, 0x1.766dc2p-4, -0x2.7550d4p-12, -0x3.e5f5ecp-8 },
+ { 0x4.f51b3p+4, 0x4.f520b8p+4, 0x4.f52578p+4, -0x1.340a8ap-24, -0x1.6ef07ep-4, 0x2.502b88p-12, 0x3.d20a7p-8 },
+ { 0x5.275a2p+4, 0x5.276568p+4, 0x5.276af8p+4, -0x3.ba66p-24, 0x1.67e1dcp-4, -0x2.2e807p-12, -0x3.bf474p-8 },
+ { 0x5.59a458p+4, 0x5.59aa1p+4, 0x5.59afp+4, 0x6.47ca5p-28, -0x1.61379ap-4, 0x2.102354p-12, 0x3.ad8764p-8 },
+ { 0x5.8be4p+4, 0x5.8beea8p+4, 0x5.8bf27p+4, -0x2.12affp-24, 0x1.5ae8c4p-4, -0x1.f44a4ep-12, -0x3.9cc18p-8 },
+ { 0x5.be2868p+4, 0x5.be3338p+4, 0x5.be38dp+4, -0x7.4853ap-28, -0x1.54ed76p-4, 0x1.daedc8p-12, 0x3.8cd45p-8 },
+ { 0x5.f0721p+4, 0x5.f077b8p+4, 0x5.f07acp+4, -0x4.f0a998p-24, 0x1.4f3ebcp-4, -0x1.c367c2p-12, -0x3.7db22p-8 },
+ { 0x6.22b718p+4, 0x6.22bc38p+4, 0x6.22bf1p+4, -0x1.80c246p-24, -0x1.49d668p-4, 0x1.ae1adcp-12, 0x3.6f4c3cp-8 },
+ { 0x6.54f5cp+4, 0x6.5500a8p+4, 0x6.550298p+4, -0x2.22aff8p-24, 0x1.44aefap-4, -0x1.9a24dp-12, -0x3.61967p-8 },
+ { 0x6.874078p+4, 0x6.874518p+4, 0x6.874808p+4, -0x3.aad6d4p-24, -0x1.3fc386p-4, 0x1.87f54p-12, 0x3.5478ep-8 },
+ { 0x6.b983bp+4, 0x6.b98978p+4, 0x6.b98c88p+4, -0x2.010be4p-24, 0x1.3b0fa4p-4, -0x1.76beb4p-12, -0x3.47f348p-8 },
+ { 0x6.ebc8dp+4, 0x6.ebcdd8p+4, 0x6.ebd108p+4, -0xd.4fd17p-32, -0x1.368f5cp-4, 0x1.66f912p-12, 0x3.3bf5fp-8 },
+ { 0x7.1e0b98p+4, 0x7.1e123p+4, 0x7.1e1458p+4, -0xa.d662dp-28, 0x1.323f18p-4, -0x1.5832d6p-12, -0x3.3079d4p-8 },
+ { 0x7.505138p+4, 0x7.50568p+4, 0x7.505848p+4, 0x4.9f217p-24, -0x1.2e1b9ap-4, 0x1.4a4e9cp-12, 0x3.25733cp-8 },
+ { 0x7.82983p+4, 0x7.829adp+4, 0x7.829e2p+4, -0x2.d167p-24, 0x1.2a21eep-4, -0x1.3d7d36p-12, -0x3.1ada9p-8 },
+ { 0x7.b4dbb8p+4, 0x7.b4df2p+4, 0x7.b4e0fp+4, -0x4.15a83p-24, -0x1.264f66p-4, 0x1.31a534p-12, 0x3.10acb4p-8 },
+ { 0x7.e71dcp+4, 0x7.e7236p+4, 0x7.e726bp+4, -0x2.a5bbbp-24, 0x1.22a192p-4, -0x1.261ebp-12, -0x3.06dfd4p-8 },
+ { 0x8.19643p+4, 0x8.1967ap+4, 0x8.196b3p+4, 0x4.e828bp-24, -0x1.1f1634p-4, 0x1.1b6a7p-12, 0x2.fd6d78p-8 },
+ { 0x8.4ba86p+4, 0x8.4babep+4, 0x8.4bad9p+4, -0x3.28a3bcp-24, 0x1.1bab3ep-4, -0x1.11766p-12, -0x2.f452f8p-8 },
+ { 0x8.7dee6p+4, 0x8.7df02p+4, 0x8.7df1fp+4, -0x2.2f667p-24, -0x1.185eccp-4, 0x1.08324ep-12, 0x2.eb8854p-8 },
+ { 0x8.b031cp+4, 0x8.b0345p+4, 0x8.b0362p+4, -0x6.9097dp-24, 0x1.152f28p-4, -0xf.f0528p-16, -0x2.e30b48p-8 },
+ { 0x8.e274dp+4, 0x8.e2789p+4, 0x8.e27a5p+4, -0x5.1b408p-24, -0x1.121abp-4, 0xf.6f895p-16, 0x2.dad6acp-8 },
+ { 0x9.14b55p+4, 0x9.14bccp+4, 0x9.14be5p+4, 0x2.70d0dp-24, 0x1.0f1ffp-4, -0xe.eed25p-16, -0x2.d2e74cp-8 },
+ { 0x9.46fd2p+4, 0x9.4700fp+4, 0x9.4702ap+4, -0x2.7c176p-24, -0x1.0c3d8ap-4, 0xe.7629dp-16, 0x2.cb3674p-8 },
+ { 0x9.79415p+4, 0x9.79452p+4, 0x9.7946fp+4, 0x4.fd6368p-24, 0x1.097236p-4, -0xe.04f4fp-16, -0x2.c3c42p-8 },
+ { 0x9.ab858p+4, 0x9.ab894p+4, 0x9.ab8a8p+4, 0x6.b05f68p-24, -0x1.06bccap-4, 0xd.9270ap-16, 0x2.bc8c5p-8 },
+ { 0x9.ddc99p+4, 0x9.ddcd7p+4, 0x9.ddcf5p+4, 0x4.0d622p-28, 0x1.041c28p-4, -0xd.2e9ccp-16, -0x2.b58b94p-8 },
+ { 0xa.100dbp+4, 0xa.10119p+4, 0xa.10138p+4, 0x7.0d98cp-24, -0x1.018f52p-4, 0xc.c8acfp-16, 0x2.aebfbp-8 },
+ { 0xa.4253cp+4, 0xa.4255cp+4, 0xa.4256cp+4, 0x3.93d10cp-24, 0xf.f154fp-8, -0xc.7062ep-16, -0x2.a825bp-8 },
+ { 0xa.74964p+4, 0xa.7499ep+4, 0xa.749b4p+4, 0xd.88185p-32, -0xf.cad3fp-8, 0xc.15576p-16, 0x2.a1bc0cp-8 },
+ { 0xa.a6dc2p+4, 0xa.a6dep+4, 0xa.a6dfap+4, -0x1.fe6b92p-24, 0xf.a564cp-8, -0xb.bf3bep-16, -0x2.9b7f34p-8 },
+ { 0xa.d91e2p+4, 0xa.d9222p+4, 0xa.d9243p+4, 0x2.6137f4p-24, -0xf.80faep-8, 0xb.6dbd2p-16, 0x2.956ebcp-8 },
+ { 0xb.0b644p+4, 0xb.0b664p+4, 0xb.0b685p+4, -0x1.55468p-24, 0xf.5d8acp-8, -0xb.208ebp-16, -0x2.8f86fcp-8 },
+ { 0xb.3da7cp+4, 0xb.3daa6p+4, 0xb.3dac4p+4, -0x1.08c6bep-24, -0xf.3b096p-8, 0xa.d76a2p-16, 0x2.89c7ap-8 },
+ { 0xb.6fec7p+4, 0xb.6fee8p+4, 0xb.6fefdp+4, 0x4.9ebfa8p-24, 0xf.196c7p-8, -0xa.920eap-16, -0x2.842e2p-8 },
+ { 0xb.a230dp+4, 0xb.a2329p+4, 0xb.a2349p+4, 0x5.a4017p-24, -0xe.f8aa5p-8, 0xa.48c44p-16, 0x2.7eb8d8p-8 },
+ { 0xb.d474fp+4, 0xb.d476bp+4, 0xb.d4776p+4, 0x3.bcb2a8p-28, 0xe.d8b9dp-8, -0xa.0a5cp-16, -0x2.7966fp-8 },
+ { 0xc.06b8ap+4, 0xc.06badp+4, 0xc.06bc5p+4, -0x7.1091fp-24, -0xe.b9925p-8, 0x9.cf163p-16, 0x2.743628p-8 },
+ { 0xc.38facp+4, 0xc.38feep+4, 0xc.3900dp+4, 0x2.ca1cf4p-28, 0xe.9b2bep-8, -0x9.8f762p-16, -0x2.6f25e4p-8 },
+ { 0xc.6b41bp+4, 0xc.6b42fp+4, 0xc.6b441p+4, 0x5.aa8908p-24, -0xe.7d7ecp-8, 0x9.52baep-16, 0x2.6a33ccp-8 },
+ { 0xc.9d84ep+4, 0xc.9d871p+4, 0xc.9d887p+4, 0x3.d9cd9cp-24, 0xe.6083ap-8, -0x9.1feafp-16, -0x2.655fd8p-8 },
+};
+
+/* Argument reduction mod 2pi: T[i] ~ 2^i mod (2*pi). */
+static const double T[128] = {
+ 0x1p+0,
+ 0x2p+0,
+ -0x2.487ed5110b462p+0,
+ 0x1.b7812aeef4b9fp+0,
+ -0x2.d97c7f3321d24p+0,
+ 0x9.585d6aac7a1a8p-4,
+ 0x1.2b0bad558f435p+0,
+ 0x2.56175aab1e86ap+0,
+ -0x1.9c501fbace38dp+0,
+ 0x3.0fde959b6ed46p+0,
+ -0x2.8c1a9da2d9d3cp-4,
+ -0x5.18353b45b3a78p-4,
+ -0xa.306a768b674fp-4,
+ -0x1.460d4ed16ce9ep+0,
+ -0x2.8c1a9da2d9d3cp+0,
+ 0x1.304999cb579e8p+0,
+ 0x2.60933396af3dp+0,
+ -0x1.87586de3accc3p+0,
+ -0x3.0eb0dbc759986p+0,
+ 0x2.b1d1d8258155p-4,
+ 0x5.63a3b04b02aap-4,
+ 0xa.c74760960554p-4,
+ 0x1.58e8ec12c0aa8p+0,
+ 0x2.b1d1d8258155p+0,
+ -0xe.4db24c6089bf8p-4,
+ -0x1.c9b6498c1137fp+0,
+ 0x2.b51241f8e8d64p+0,
+ -0xd.e5a511f3999bp-4,
+ -0x1.bcb4a23e73336p+0,
+ 0x2.cf15909424df4p+0,
+ -0xa.a53b3e8c1877p-4,
+ -0x1.54a767d1830eep+0,
+ -0x2.a94ecfa3061dcp+0,
+ 0xf.5e135caff0a78p-4,
+ 0x1.ebc26b95fe14fp+0,
+ -0x2.70f9fde50f1c4p+0,
+ 0x1.668ad946ed0dap+0,
+ 0x2.cd15b28dda1b4p+0,
+ -0xa.e536ff5570fbp-4,
+ -0x1.5ca6dfeaae1f6p+0,
+ -0x2.b94dbfd55c3ecp+0,
+ 0xd.5e3556652c89p-4,
+ 0x1.abc6aacca5912p+0,
+ -0x2.f0f17f77c023ep+0,
+ 0x6.69bd6218afe64p-4,
+ 0xc.d37ac4315fcc8p-4,
+ 0x1.9a6f58862bf99p+0,
+ -0x3.13a02404b352ep+0,
+ 0x2.13e8d07a4a046p-4,
+ 0x4.27d1a0f49408cp-4,
+ 0x8.4fa341e928118p-4,
+ 0x1.09f4683d25023p+0,
+ 0x2.13e8d07a4a046p+0,
+ -0x2.20ad341c773d4p+0,
+ 0x2.07246cd81ccb8p+0,
+ -0x2.3a35fb60d1af2p+0,
+ 0x1.d412de4f67e7ep+0,
+ -0x2.a05918723b764p+0,
+ 0x1.07cca42c94599p+0,
+ 0x2.0f99485928b32p+0,
+ -0x2.294c445eb9dfep+0,
+ 0x1.f5e64c5397865p+0,
+ -0x2.5cb23c69dc396p+0,
+ 0x1.8f1a5c3d52d34p+0,
+ 0x3.1e34b87aa5a68p+0,
+ -0xc.15641bbff90bp-8,
+ -0x1.82ac8377ff216p-4,
+ -0x3.055906effe42cp-4,
+ -0x6.0ab20ddffc858p-4,
+ -0xc.15641bbff90bp-4,
+ -0x1.82ac8377ff216p+0,
+ -0x3.055906effe42cp+0,
+ 0x3.dccc7310ec09ap-4,
+ 0x7.b998e621d8134p-4,
+ 0xf.7331cc43b0268p-4,
+ 0x1.ee6639887604dp+0,
+ -0x2.6bb262001f3c6p+0,
+ 0x1.711a1110cccd4p+0,
+ 0x2.e2342221999a8p+0,
+ -0x8.41690cdd81128p-4,
+ -0x1.082d219bb0225p+0,
+ -0x2.105a43376044ap+0,
+ 0x2.27ca4ea24abcep+0,
+ -0x1.f8ea37cc75cc6p+0,
+ 0x2.56aa65781fad6p+0,
+ -0x1.9b2a0a20cbeb6p+0,
+ 0x3.122ac0cf736f6p+0,
+ -0x2.4295372246764p-4,
+ -0x4.852a6e448cec8p-4,
+ -0x9.0a54dc8919d9p-4,
+ -0x1.214a9b91233b2p+0,
+ -0x2.4295372246764p+0,
+ 0x1.c35466cc7e59ap+0,
+ -0x2.c1d607780e92ep+0,
+ 0xc.4d2c620ee206p-4,
+ 0x1.89a58c41dc40cp+0,
+ 0x3.134b1883b8818p+0,
+ -0x2.1e8a4099a4324p-4,
+ -0x4.3d14813348648p-4,
+ -0x8.7a29026690c9p-4,
+ -0x1.0f45204cd2192p+0,
+ -0x2.1e8a4099a4324p+0,
+ 0x2.0b6a53ddc2e18p+0,
+ -0x2.31aa2d558583p+0,
+ 0x1.e52a7a6600402p+0,
+ -0x2.7e29e0450ac5cp+0,
+ 0x1.4c2b1486f5ba8p+0,
+ 0x2.9856290deb75p+0,
+ -0x1.17d282f5345bfp+0,
+ -0x2.2fa505ea68b7ep+0,
+ 0x1.e934c93c39d64p+0,
+ -0x2.761542989799ap+0,
+ 0x1.5c544fdfdc12fp+0,
+ 0x2.b8a89fbfb825ep+0,
+ -0xd.72d95919afa58p-4,
+ -0x1.ae5b2b2335f4bp+0,
+ 0x2.ebc87eca9f5cap+0,
+ -0x7.0edd77bcc8ccp-4,
+ -0xe.1dbaef799198p-4,
+ -0x1.c3b75def3233p+0,
+ 0x2.c1101932a6e02p+0,
+ -0xc.65ea2abbd85fp-4,
+ -0x1.8cbd45577b0bep+0,
+ -0x3.197a8aaef617cp+0,
+ 0x1.589bfb31f1687p-4,
+ 0x2.b137f663e2d0ep-4,
+ 0x5.626fecc7c5a1cp-4,
+ 0xa.c4dfd98f8b438p-4
+};
+
+/* Return h such that x - pi/4 mod (2*pi) ~ h. */
+static double
+reduce_mod_twopi (double x)
+{
+ double sign = 1, h;
+ if (x < 0)
+ {
+ x = -x;
+ sign = -1;
+ }
+ h = 0; /* Invariant is x+h mod (2*pi). */
+ while (x >= 4)
+ {
+ int i = ilogb (x);
+ x -= ldexp (1.0f, i);
+ h += T[i];
+ }
+ /* Add the remainder x. */
+ h += x;
+ /* Subtract pi/4. */
+ double piover4 = 0xc.90fdaa22168cp-4;
+ h -= piover4;
+ /* Reduce mod 2*pi. */
+ double twopi = 0x6.487ed5110b46p+0;
+ while (fabs (h) > twopi * 0.5)
+ {
+ if (h > 0)
+ h -= twopi;
+ else
+ h += twopi;
+ }
+ return sign * h;
+}
+
+/* Reduce h mod (pi/2), and add k to n if k*pi/2 was subtracted from h. */
+static double
+reduce_mod_piover2 (double h, int *n)
+{
+ double piover2 = 0x1.921fb54442d18p+0;
+ while (fabs (h) > piover2 / 2)
+ {
+ if (h > 0)
+ {
+ h -= piover2;
+ (*n) ++;
+ }
+ else
+ {
+ h += piover2;
+ (*n) --;
+ }
+ }
+ return h;
+}
+
+/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
+ j1f(x) ~ sqrt(2/(pi*x))*beta1(x)*cos(x-3pi/4-alpha1(x))
+ where beta1(x) = 1 + 3/(16*x^2) - 99/(512*x^4)
+ and alpha1(x) = -3/(8*x) + 21/(128*x^3) - 1899/(5120*x^5). */
+static float
+j1f_asympt (float x)
+{
+ float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+ if (x < 0)
+ {
+ x = -x;
+ cst = -cst;
+ }
+ double y = 1.0 / (double) x;
+ double y2 = y * y;
+ double beta1 = 1.0f + y2 * (0x3p-4 - 0x3.18p-4 * y2);
+ double alpha1 = y * (-0x6p-4 + y2 * (0x2.ap-4 - 0x5.ef33333333334p-4 * y2));
+ double h;
+ h = reduce_mod_twopi ((double) x);
+ int n = -1; /* Accounts for -pi/2 (pi/4 was subtracted in reduce_mod_twopi). */
+ /* Now reduce mod pi/2. */
+ h = reduce_mod_piover2 (h, &n);
+ /* Subtract alpha1. */
+ h -= alpha1;
+ /* Reduce again mod pi/2 if needed. */
+ h = reduce_mod_piover2 (h, &n);
+ /* Now x - 3pi/4 - alpha1 = h + n*pi/2 mod (2*pi). */
+ float xr = (float) h;
+ n = n & 3;
+ float t = cst / sqrtf (x) * (float) beta1;
+ if (n == 0)
+ return t * cosf (xr);
+ else if (n == 2) /* cos(x+pi) = -cos(x) */
+ return -t * cosf (xr);
+ else if (n == 1) /* cos(x+pi/2) = -sin(x) */
+ return -t * sinf (xr);
+ else /* cos(x+3pi/2) = sin(x) */
+ return t * sinf (xr);
+}
+
+/* Special code for x near a root of j1.
+ z is the value computed by the generic code.
+ For small x, we use a polynomial approximating j1 around its root.
+ For large x, we use an asymptotic formula (j1f_asympt). */
+static float
+j1f_near_root (float x, float z)
+{
+ float index_f, sign = 1.0f;
+ int index;
+
+ if (x < 0)
+ {
+ x = -x;
+ sign = -1.0f;
+ }
+ index_f = roundf ((x - FIRST_ZERO_J1) / (float) M_PI);
+ /* SMALL_SIZE should be at least 21 */
+ if (index_f >= SMALL_SIZE)
+ return sign * j1f_asympt (x);
+ index = (int) index_f;
+ const float *p = Pj[index];
+ float x0 = p[0];
+ float x1 = p[2];
+ /* If we are not in the interval [x0,x1] around xmid, we return the value z. */
+ if (! (x0 <= x && x <= x1))
+ return z;
+ float xmid = p[1];
+ float y = x - xmid;
+ float p6 = (index > 0) ? p[6] : p[6] + y * -0x1.3e52fp-8;
+ return sign * (p[3] + y * (p[4] + y * (p[5] + y * p6)));
+}
float
__ieee754_j1f(float x)
@@ -56,22 +399,29 @@ __ieee754_j1f(float x)
__sincosf (y, &s, &c);
ss = -s-c;
cc = s-c;
- if(ix<0x7f000000) { /* make sure y+y not overflow */
- z = __cosf(y+y);
- if ((s*c)>zero) cc = z/ss;
- else ss = z/cc;
- }
+ if (ix >= 0x7f000000) /* x >= 2^127: use asymptotic expansion. */
+ return j1f_asympt (x);
+ /* Now we are sure that x+x cannot overflow. */
+ z = __cosf(y+y);
+ if ((s*c)>zero) cc = z/ss;
+ else ss = z/cc;
/*
* j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
* y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
*/
- if(ix>0x5c000000) z = (invsqrtpi*cc)/sqrtf(y);
- else {
+ if (ix <= 0x5c000000)
+ {
u = ponef(y); v = qonef(y);
- z = invsqrtpi*(u*cc-v*ss)/sqrtf(y);
- }
- if(hx<0) return -z;
- else return z;
+ cc = u*cc-v*ss;
+ }
+ z = (invsqrtpi * cc) / sqrtf(y);
+ /* Adjust sign of z. */
+ z = (hx < 0) ? -z : z;
+ float magic = 0x1.8p+21; /* 2^21 + 2^20 */
+ if (magic + cc != magic) /* Most likely. */
+ return z;
+ else /* |cc| <= 2^-3 */
+ return j1f_near_root (x, z);
}
if(__builtin_expect(ix<0x32000000, 0)) { /* |x|<2**-27 */
if(huge+x>one) { /* inexact if x!=0 necessary */
@@ -105,6 +455,160 @@ static const float V0[5] = {
1.6655924903e-11, /* 0x2d9281cf */
};
+#define FIRST_ZERO_Y1 2.197141f /* First zero of y1. */
+
+/* The following table contains successive zeros of y1 and degree-3 polynomial
+ approximations of y1 around these zeros: Py[0] for the first positive zero
+ (2.197141), Py[1] for the second one (5.429681), and so on. Each line contains:
+ {x0, xmid, x1, p0, p1, p2, p3}
+ where [x0,x1] is the interval around the zero, xmid is the binary32 number closest
+ to the zero, and p0+p1*x+p2*x^2+p3*x^3 is the approximation polynomial.
+ Each polynomial was generated using Remez' algorithm on the interval [x0,x1]
+ around the corresponding zero where the error is larger than 9 ulps for the
+ default code below. Degree 3 is enough (except around the first root).
+*/
+static const float Py[SMALL_SIZE][7] = {
+ /* For index 0, we use a degree-4 polynomial generated by Sollya, with the
+ coefficient of degree 4 hard-coded in y1f_near_root(). */
+ { 0x2.148768p+0, 0x2.3277dcp+0, 0x2.420bb8p+0, 0xb.96749p-28, 0x8.55242p-4, -0x1.e56a7ap-4, -0x8.6888p-8 },
+ { 0x5.625a8p+0, 0x5.6dff9p+0, 0x5.73e2b8p+0, 0x1.3c7822p-24, -0x5.71f17p-4, 0x8.05b18p-8, 0xd.16c1dp-8 },
+ { 0x8.914dp+0, 0x8.9893dp+0, 0x8.9c38cp+0, -0x1.f3ad8ep-24, 0x4.57e658p-4, -0x4.0ac6f8p-8, -0xb.21063p-8 },
+ { 0xb.b73efp+0, 0xb.bfc8ap+0, 0xb.c3535p+0, -0xd.5608fp-28, -0x3.b829d4p-4, 0x2.88544cp-8, 0x9.b7be2p-8 },
+ { 0xe.e13dfp+0, 0xe.e5becp+0, 0xe.e819p+0, -0xe.a7c04p-28, 0x3.4e0458p-4, -0x1.c64ed8p-8, -0x8.b2bd2p-8 },
+ { 0x1.20874p+4, 0x1.20b1c6p+4, 0x1.20c71p+4, 0x1.c2626p-24, -0x3.00f03cp-4, 0x1.54ec7cp-8, 0x7.f02b88p-8 },
+ { 0x1.52d934p+4, 0x1.530254p+4, 0x1.531962p+4, -0x1.9503ecp-24, 0x2.c5b29cp-4, -0x1.0bf4eep-8, -0x7.584198p-8 },
+ { 0x1.851f6ep+4, 0x1.854fa4p+4, 0x1.85675ap+4, -0x2.8d40fcp-24, -0x2.96547p-4, 0xd.9c4d1p-12, 0x6.ddacdp-8 },
+ { 0x1.b7808ep+4, 0x1.b79acep+4, 0x1.b7b034p+4, -0x2.36df5cp-24, 0x2.6f55ap-4, -0xb.57dcfp-12, -0x6.77afap-8 },
+ { 0x1.e9c916p+4, 0x1.e9e48p+4, 0x1.e9f24p+4, 0xd.e2eb7p-28, -0x2.4e8104p-4, 0x9.a4938p-12, 0x6.21cdbp-8 },
+ { 0x2.1c1454p+4, 0x2.1c2d2p+4, 0x2.1c3b24p+4, -0x2.3070f4p-24, 0x2.325e4cp-4, -0x8.5410cp-12, -0x5.d7d08p-8 },
+ { 0x2.4e663p+4, 0x2.4e74f8p+4, 0x2.4e83f8p+4, -0x3.525508p-24, -0x2.19e7dcp-4, 0x7.49d36p-12, 0x5.974078p-8 },
+ { 0x2.80ac64p+4, 0x2.80bc3p+4, 0x2.80caep+4, -0xe.6e158p-28, 0x2.046174p-4, -0x6.727d9p-12, -0x5.5e727p-8 },
+ { 0x2.b2e3b8p+4, 0x2.b302fp+4, 0x2.b30b24p+4, 0x1.e72698p-24, -0x1.f13fb2p-4, 0x5.c1ad88p-12, 0x5.2c075p-8 },
+ { 0x2.e5389cp+4, 0x2.e5495p+4, 0x2.e551b4p+4, -0x1.5bed9cp-24, 0x1.e018dcp-4, -0x5.2e5a3p-12, -0x4.fe8628p-8 },
+ { 0x3.177e9cp+4, 0x3.178f64p+4, 0x3.17982p+4, -0x3.6b654cp-24, -0x1.d09b2p-4, 0x4.b22ddp-12, 0x4.d57a68p-8 },
+ { 0x3.49cc2cp+4, 0x3.49d534p+4, 0x3.49dc9p+4, 0x1.6f11bp-24, 0x1.c28612p-4, -0x4.481288p-12, -0x4.b01a8p-8 },
+ { 0x3.7c117cp+4, 0x3.7c1adp+4, 0x3.7c211p+4, -0x2.0bc074p-24, -0x1.b5a622p-4, 0x3.ecc594p-12, 0x4.8df3fp-8 },
+ { 0x3.ae4e54p+4, 0x3.ae603cp+4, 0x3.ae64fp+4, -0x2.87dd4p-24, 0x1.a9d184p-4, -0x3.9d52dcp-12, -0x4.6e98p-8 },
+ { 0x3.e09334p+4, 0x3.e0a588p+4, 0x3.e0ad78p+4, -0x2.fb964p-24, -0x1.9ee5eep-4, 0x3.58195cp-12, 0x4.51938p-8 },
+ { 0x4.12e15p+4, 0x4.12eabp+4, 0x4.12f0fp+4, 0x2.cf5adp-24, 0x1.94c6f4p-4, -0x3.1af534p-12, -0x4.36a7e8p-8 },
+ { 0x4.451e78p+4, 0x4.452fb8p+4, 0x4.4534d8p+4, 0x3.6766fp-24, -0x1.8b5cccp-4, 0x2.e49344p-12, 0x4.1daa98p-8 },
+ { 0x4.776a3p+4, 0x4.7774bp+4, 0x4.7779e8p+4, 0x3.501424p-24, 0x1.829356p-4, -0x2.b47be8p-12, -0x4.0647d8p-8 },
+ { 0x4.a9afp+4, 0x4.a9b99p+4, 0x4.a9bea8p+4, -0x5.c05808p-24, -0x1.7a597ep-4, 0x2.894a64p-12, 0x3.f067e8p-8 },
+ { 0x4.dbf358p+4, 0x4.dbfe58p+4, 0x4.dc03d8p+4, 0x7.1e1478p-28, 0x1.72a09ap-4, -0x2.622e7cp-12, -0x3.dbdd9cp-8 },
+ { 0x5.0e3d88p+4, 0x5.0e431p+4, 0x5.0e4788p+4, 0x3.e36e6cp-24, -0x1.6b5c06p-4, 0x2.3ed8dcp-12, 0x3.c8847p-8 },
+ { 0x5.4082bp+4, 0x5.4087cp+4, 0x5.408d4p+4, 0x1.3f9e5ap-24, 0x1.6480c4p-4, -0x2.1f1138p-12, -0x3.b644f8p-8 },
+ { 0x5.72c1fp+4, 0x5.72cc6p+4, 0x5.72d19p+4, -0x2.39e41cp-24, -0x1.5e0544p-4, 0x2.020254p-12, 0x3.a5087p-8 },
+ { 0x5.a50598p+4, 0x5.a510fp+4, 0x5.a5165p+4, -0x2.912f84p-24, 0x1.57e12p-4, -0x1.e7472cp-12, -0x3.94b05p-8 },
+ { 0x5.d749fp+4, 0x5.d75578p+4, 0x5.d758dp+4, 0x3.d5b00cp-24, -0x1.520ceep-4, 0x1.cedf4cp-12, 0x3.852d54p-8 },
+ { 0x6.09959p+4, 0x6.0999f8p+4, 0x6.099dp+4, -0x3.1726ecp-24, 0x1.4c8222p-4, -0x1.b87e64p-12, -0x3.766828p-8 },
+ { 0x6.3bd32p+4, 0x6.3bde7p+4, 0x6.3be22p+4, 0x1.949e22p-24, -0x1.473ae6p-4, 0x1.a3e3b6p-12, 0x3.685db8p-8 },
+ { 0x6.6e1cap+4, 0x6.6e22ep+4, 0x6.6e25bp+4, -0x5.5553bp-28, 0x1.42320ap-4, -0x1.90d756p-12, -0x3.5af384p-8 },
+ { 0x6.a060fp+4, 0x6.a06748p+4, 0x6.a06a68p+4, 0x3.2df7ecp-28, -0x1.3d62e4p-4, 0x1.7f295cp-12, 0x3.4e24ccp-8 },
+ { 0x6.d2a87p+4, 0x6.d2aba8p+4, 0x6.d2aeep+4, -0x1.e13fcep-24, 0x1.38c948p-4, -0x1.6eb032p-12, -0x3.41e3p-8 },
+ { 0x7.04ea5p+4, 0x7.04f008p+4, 0x7.04f2cp+4, -0x3.ad9974p-24, -0x1.34616ep-4, 0x1.5f94dap-12, 0x3.36286cp-8 },
+ { 0x7.372dd8p+4, 0x7.373458p+4, 0x7.37379p+4, -0x3.6977e8p-24, 0x1.3027fp-4, -0x1.511ca2p-12, -0x3.2ae7d8p-8 },
+ { 0x7.6973c8p+4, 0x7.6978a8p+4, 0x7.697bcp+4, 0x4.654cbp-24, -0x1.2c19b6p-4, 0x1.43c536p-12, 0x3.201998p-8 },
+ { 0x7.9bb73p+4, 0x7.9bbcf8p+4, 0x7.9bc028p+4, 0x8.825c8p-32, 0x1.2833eep-4, -0x1.377382p-12, -0x3.15b7e4p-8 },
+ { 0x7.cdfe18p+4, 0x7.ce014p+4, 0x7.ce04a8p+4, -0x2.0d11d8p-28, -0x1.24740ap-4, 0x1.2bc672p-12, 0x3.0bb998p-8 },
+ { 0x8.00421p+4, 0x8.00458p+4, 0x8.00484p+4, -0x4.4e495p-24, 0x1.20d7b6p-4, -0x1.20ab76p-12, -0x3.021b64p-8 },
+ { 0x8.3282ep+4, 0x8.3289cp+4, 0x8.328cp+4, 0x4.81c1c8p-24, -0x1.1d5ccap-4, 0x1.165974p-12, 0x2.f8d71cp-8 },
+ { 0x8.64c77p+4, 0x8.64cep+4, 0x8.64d14p+4, -0xe.99386p-28, 0x1.1a015p-4, -0x1.0cbf48p-12, -0x2.efe49cp-8 },
+ { 0x8.97109p+4, 0x8.97124p+4, 0x8.97148p+4, -0x6.16f1c8p-24, -0x1.16c37ap-4, 0x1.03cdaep-12, 0x2.e7402cp-8 },
+ { 0x8.c9537p+4, 0x8.c9567p+4, 0x8.c9581p+4, -0x1.129336p-24, 0x1.13a19ep-4, -0xf.aed15p-16, -0x2.dee83p-8 },
+ { 0x8.fb938p+4, 0x8.fb9aap+4, 0x8.fb9c5p+4, 0x5.19c09p-24, -0x1.109a32p-4, 0xf.29df7p-16, 0x2.d6d728p-8 },
+ { 0x9.2ddcp+4, 0x9.2ddedp+4, 0x9.2de09p+4, -0x6.497dp-24, 0x1.0dabc8p-4, -0xe.ad4f9p-16, -0x2.cf0624p-8 },
+ { 0x9.602p+4, 0x9.6023p+4, 0x9.6025p+4, 0x4.e4f338p-24, -0x1.0ad512p-4, 0xe.387eep-16, 0x2.c77588p-8 },
+ { 0x9.92658p+4, 0x9.92673p+4, 0x9.9269p+4, -0x1.287c58p-24, 0x1.0814d4p-4, -0xd.cad8fp-16, -0x2.c02074p-8 },
+ { 0x9.c4a7bp+4, 0x9.c4ab6p+4, 0x9.c4acdp+4, -0x4.b594ep-24, -0x1.0569fp-4, 0xd.63d72p-16, 0x2.b9053p-8 },
+ { 0x9.f6eccp+4, 0x9.f6ef8p+4, 0x9.f6f15p+4, -0x3.a8f164p-24, 0x1.02d354p-4, -0xc.fae8ap-16, -0x2.b21efp-8 },
+ { 0xa.2931cp+4, 0xa.2933bp+4, 0xa.2935ap+4, -0x6.12505p-24, -0x1.005004p-4, 0xc.9fdebp-16, 0x2.ab6c2cp-8 },
+ { 0xa.5b74p+4, 0xa.5b77dp+4, 0xa.5b79ap+4, 0x1.8acf4ep-24, 0xf.ddf16p-8, -0xc.4239ap-16, -0x2.a4eb2p-8 },
+ { 0xa.8dba1p+4, 0xa.8dbbfp+4, 0xa.8dbd9p+4, 0x1.39cf86p-24, -0xf.b7faep-8, 0xb.e9b1p-16, 0x2.9e97dp-8 },
+ { 0xa.bffe2p+4, 0xa.c0001p+4, 0xa.c001ep+4, -0x2.5f8de8p-24, 0xf.930fep-8, -0xb.95eddp-16, -0x2.98716cp-8 },
+ { 0xa.f2422p+4, 0xa.f2443p+4, 0xa.f2464p+4, 0x2.073d64p-24, -0xf.6f245p-8, 0xb.46a06p-16, 0x2.92759p-8 },
+ { 0xb.24846p+4, 0xb.24885p+4, 0xb.24895p+4, -0x4.ed26ep-28, 0xf.4c2cep-8, -0xa.fb7f6p-16, -0x2.8ca308p-8 },
+ { 0xb.56c8fp+4, 0xb.56cc7p+4, 0xb.56cep+4, -0x2.ae5204p-24, -0xf.2a1efp-8, 0xa.b4472p-16, 0x2.86f67cp-8 },
+ { 0xb.890e7p+4, 0xb.89109p+4, 0xb.8911cp+4, 0x6.d72168p-24, 0xf.08f09p-8, -0xa.70b97p-16, -0x2.816f28p-8 },
+ { 0xb.bb506p+4, 0xb.bb54ap+4, 0xb.bb566p+4, 0x2.d3f294p-24, -0xe.e8986p-8, 0xa.2928cp-16, 0x2.7c0c0cp-8 },
+ { 0xb.ed974p+4, 0xb.ed98cp+4, 0xb.ed9adp+4, 0x3.88c0fp-24, 0xe.c90d7p-8, -0x9.ec57dp-16, -0x2.76ca28p-8 },
+ { 0xc.1fdafp+4, 0xc.1fdcdp+4, 0xc.1fdfp+4, 0x3.d94d34p-24, -0xe.aa478p-8, 0x9.ab3c5p-16, 0x2.71a9cp-8 },
+ { 0xc.521f7p+4, 0xc.5220fp+4, 0xc.5223p+4, 0x4.66b7ep-24, 0xe.8c3e9p-8, -0x9.74619p-16, -0x2.6ca8b8p-8 },
+ { 0xc.8463p+4, 0xc.8465p+4, 0xc.8466bp+4, 0xf.f7ac9p-28, -0xe.6eeb6p-8, 0x9.3901ap-16, 0x2.67c628p-8 },
+};
+
+/* Formula page 5 of https://www.cl.cam.ac.uk/~jrh13/papers/bessel.pdf:
+ y1f(x) ~ sqrt(2/(pi*x))*beta1(x)*sin(x-3pi/4-alpha1(x))
+ where beta1(x) = 1 + 3/(16*x^2) - 99/(512*x^4)
+ and alpha1(x) = -3/(8*x) + 21/(128*x^3) - 1899/(5120*x^5). */
+static float
+y1f_asympt (float x)
+{
+ float cst = 0xc.c422ap-4; /* sqrt(2/pi) rounded to nearest */
+ double y = 1.0 / (double) x;
+ double y2 = y * y;
+ double beta1 = 1.0f + y2 * (0x3p-4 - 0x3.18p-4 * y2);
+ double alpha1 = y * (-0x6p-4 + y2 * (0x2.ap-4 - 0x5.ef33333333334p-4 * y2));
+ double h;
+ h = reduce_mod_twopi ((double) x);
+ int n = -1; /* Accounts for -pi/2 (pi/4 was subtracted in reduce_mod_twopi). */
+ /* Now reduce mod pi/2. */
+ h = reduce_mod_piover2 (h, &n);
+ /* Subtract alpha1. */
+ h -= alpha1;
+ /* Reduce again mod pi/2 if needed. */
+ h = reduce_mod_piover2 (h, &n);
+ /* Now x - 3pi/4 - alpha1 = h + n*pi/2 mod (2*pi). */
+ float xr = (float) h;
+ n = n & 3;
+ float t = cst / sqrtf (x) * (float) beta1;
+ if (n == 0)
+ return t * sinf (xr);
+ else if (n == 2) /* sin(x+pi) = -sin(x) */
+ return -t * sinf (xr);
+ else if (n == 1) /* sin(x+pi/2) = cos(x) */
+ return t * cosf (xr);
+ else /* sin(x+3pi/2) = -cos(x) */
+ return -t * cosf (xr);
+}
+
+/* Special code for x near a root of y1.
+ z is the value computed by the generic code.
+ For small x, we use a polynomial approximating y1 around its root.
+ For large x, we use an asymptotic formula (y1f_asympt). */
+static float
+y1f_near_root (float x, float z)
+{
+ float index_f;
+ int index;
+
+ index_f = roundf ((x - FIRST_ZERO_Y1) / (float) M_PI);
+ /* SMALL_SIZE should be at least 18 */
+ if (index_f >= SMALL_SIZE)
+ return y1f_asympt (x);
+ index = (int) index_f;
+ const float *p = Py[index];
+ float x0 = p[0];
+ float x1 = p[2];
+ /* If we are not in the interval [x0,x1] around xmid, we return the value z. */
+ if (! (x0 <= x && x <= x1))
+ return z;
+ float xmid = p[1];
+ float y = x - xmid;
+ float p6 = (index > 0) ? p[6] : p[6] + y * -0x1.83758ep-8f;
+ return p[3] + y * (p[4] + y * (p[5] + y * p6));
+}
+
+/* Special code for 0x1.f7f7acp+0 <= x < 2, because the code for
+ 2^-25 <= x < 2 yields an error > 9 ulps in a few cases. */
+static float
+y1f_below_two (float x)
+{
+ float xmid = 0x1.fbfbd6p+0;
+ float y = x - xmid;
+ float p[3] = { -0x1.dabdeep-4, 0x9.1290ap-4, -0x1.98468cp-4 };
+ return p[0] + y * (p[1] + y * p[2]);
+}
+
float
__ieee754_y1f(float x)
{
@@ -123,11 +627,12 @@ __ieee754_y1f(float x)
__sincosf (x, &s, &c);
ss = -s-c;
cc = s-c;
- if(ix<0x7f000000) { /* make sure x+x not overflow */
- z = __cosf(x+x);
- if ((s*c)>zero) cc = z/ss;
- else ss = z/cc;
- }
+ if (ix >= 0x7f000000) /* x >= 2^127: use asymptotic expansion. */
+ return y1f_asympt (x);
+ /* Now we are sure that x+x cannot overflow. */
+ z = __cosf(x+x);
+ if ((s*c)>zero) cc = z/ss;
+ else ss = z/cc;
/* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
* where x0 = x-3pi/4
* Better formula:
@@ -139,12 +644,17 @@ __ieee754_y1f(float x)
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
* to compute the worse one.
*/
- if(ix>0x5c000000) z = (invsqrtpi*ss)/sqrtf(x);
- else {
- u = ponef(x); v = qonef(x);
- z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
- }
- return z;
+ if (ix <= 0x5c000000)
+ {
+ u = ponef(x); v = qonef(x);
+ ss = u*ss+v*cc;
+ }
+ z = (invsqrtpi * ss) / sqrtf(x);
+ float magic = 0x1.8p+22; /* 2^22 + 2^21 */
+ if (magic + ss != magic) /* Most likely. */
+ return z;
+ else /* |cc| <= 2^-2 */
+ return y1f_near_root (x, z);
}
if(__builtin_expect(ix<=0x33000000, 0)) { /* x < 2**-25 */
z = -tpi / x;
@@ -152,6 +662,9 @@ __ieee754_y1f(float x)
__set_errno (ERANGE);
return z;
}
+ /* Now 2**-25 <= x < 2. */
+ if (ix >= 0x3ffbfbd6) /* 0x1.f7f7acp+0 <= x < 2 */
+ return y1f_below_two (x);
z = x*x;
u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
@@ -159,7 +672,7 @@ __ieee754_y1f(float x)
}
libm_alias_finite (__ieee754_y1f, __y1f)
-/* For x >= 8, the asymptotic expansions of pone is
+/* For x >= 8, the asymptotic expansion of pone is
* 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x.
* We approximate pone by
* pone(x) = 1 + (R/S)
@@ -252,8 +765,19 @@ ponef(float x)
return one+ r/s;
}
+/* FIXME: the comment below is wrong. It was most likely copied from
+ dbl-64/e_j1f.c, but not updated. Largest errors are the following (rounded up):
+ * for qr8/qs8: largest known value of | qone(x)/s -0.375-R/S | is 2^(-35.78)
+ (for x=0x8p+0)
+ * for qr5/qs5: largest value of | qone(x)/s -0.375-R/S | is 2^(-34.28)
+ (for x=0x7.b8e2cp+0)
+ * for qr3/qs3: largest value of | qone(x)/s -0.375-R/S | is 2^(-32.18)
+ (for x=0x3.4956bcp+0)
+ * for qr2/qs2: largest value of | qone(x)/s -0.375-R/S | is 2^(-33.10)
+ (for x=0x2.da487cp+0)
+*/
-/* For x >= 8, the asymptotic expansions of qone is
+/* For x >= 8, the asymptotic expansion of qone is
* 3/8 s - 105/1024 s^3 - ..., where s = 1/x.
* We approximate pone by
* qone(x) = s*(0.375 + (R/S))
@@ -340,10 +864,10 @@ qonef(float x)
GET_FLOAT_WORD(ix,x);
ix &= 0x7fffffff;
/* ix >= 0x40000000 for all calls to this function. */
- if(ix>=0x40200000) {p = qr8; q= qs8;}
- else if(ix>=0x40f71c58){p = qr5; q= qs5;}
- else if(ix>=0x4036db68){p = qr3; q= qs3;}
- else {p = qr2; q= qs2;}
+ if(ix>=0x41000000) {p = qr8; q= qs8;} /* x >= 8 */
+ else if(ix>=0x40f71c58){p = qr5; q= qs5;} /* x >= 7.722209930e+00 */
+ else if(ix>=0x4036db68){p = qr3; q= qs3;} /* x >= 2.857141495e+00 */
+ else {p = qr2; q= qs2;} /* x >= 2 */
z = one/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 633d2ab8e4..f63337112e 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1336,28 +1336,28 @@ float128: 5
ldouble: 6
Function: "j1":
-double: 2
+double: 6
float: 9
-float128: 4
+float128: 7
ldouble: 5
Function: "j1_downward":
-double: 3
+double: 9
float: 5
float128: 4
ldouble: 4
Function: "j1_towardzero":
double: 3
-float: 2
+float: 7
float128: 4
-ldouble: 4
+ldouble: 7
Function: "j1_upward":
double: 3
float: 5
float128: 3
-ldouble: 3
+ldouble: 5
Function: "jn":
double: 4
@@ -1778,21 +1778,21 @@ float128: 5
ldouble: 3
Function: "y1_downward":
-double: 3
-float: 2
+double: 6
+float: 9
float128: 5
-ldouble: 7
+ldouble: 9
Function: "y1_towardzero":
double: 4
float: 5
float128: 6
-ldouble: 5
+ldouble: 6
Function: "y1_upward":
-double: 7
+double: 9
float: 9
-float128: 6
+float128: 9
ldouble: 9
Function: "yn":
--
2.29.2
More information about the Libc-alpha
mailing list