From 8f1259a6ef03edfa0f8e8b28fcaa13410b4e6b10 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Thu, 16 Aug 2018 10:39:35 +0000 Subject: [PATCH] Improve sincosf comments Improve comments in sincosf implementation to make the code easier to understand. Rename the constant pi64 to pi63 since it's actually PI * 2^-63. Add comments for fields of sincos_t structure. Add comments describing implementation details to reduce_fast. --- newlib/libm/common/cosf.c | 7 +++---- newlib/libm/common/sincosf.c | 7 +++---- newlib/libm/common/sincosf.h | 26 +++++++++++++++++--------- newlib/libm/common/sinf.c | 7 +++---- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/newlib/libm/common/cosf.c b/newlib/libm/common/cosf.c index f87186c68..2bdd47a14 100644 --- a/newlib/libm/common/cosf.c +++ b/newlib/libm/common/cosf.c @@ -32,11 +32,10 @@ #include "math_config.h" #include "sincosf.h" -/* Fast cosf implementation. Worst-case ULP is 0.56072, maximum relative - error is 0.5303p-23. A single-step signed range reduction is used for +/* Fast cosf implementation. Worst-case ULP is 0.5607, maximum relative + error is 0.5303 * 2^-23. A single-step range reduction is used for small values. Large inputs have their range reduced using fast integer - arithmetic. -*/ + arithmetic. */ float cosf (float y) { diff --git a/newlib/libm/common/sincosf.c b/newlib/libm/common/sincosf.c index 65dd05e6e..5584c95bd 100644 --- a/newlib/libm/common/sincosf.c +++ b/newlib/libm/common/sincosf.c @@ -32,11 +32,10 @@ #include "math_config.h" #include "sincosf.h" -/* Fast sincosf implementation. Worst-case ULP is 0.56072, maximum relative - error is 0.5303p-23. A single-step signed range reduction is used for +/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative + error is 0.5303 * 2^-23. A single-step range reduction is used for small values. Large inputs have their range reduced using fast integer - arithmetic. -*/ + arithmetic. */ void sincosf (float y, float *sinp, float *cosp) { diff --git a/newlib/libm/common/sincosf.h b/newlib/libm/common/sincosf.h index 3cac8e88f..8e241d70a 100644 --- a/newlib/libm/common/sincosf.h +++ b/newlib/libm/common/sincosf.h @@ -28,19 +28,25 @@ #include #include "math_config.h" -/* PI * 2^-64. */ -static const double pi64 = 0x1.921FB54442D18p-62; +/* 2PI * 2^-64. */ +static const double pi63 = 0x1.921FB54442D18p-62; /* PI / 4. */ static const double pio4 = 0x1.921FB54442D18p-1; +/* The constants and polynomials for sine and cosine. */ typedef struct { - double sign[4]; - double hpi_inv, hpi, c0, c1, c2, c3, c4, s1, s2, s3; + double sign[4]; /* Sign of sine in quadrants 0..3. */ + double hpi_inv; /* 2 / PI ( * 2^24 if !TOINT_INTRINSICS). */ + double hpi; /* PI / 2. */ + double c0, c1, c2, c3, c4; /* Cosine polynomial. */ + double s1, s2, s3; /* Sine polynomial. */ } sincos_t; +/* Polynomial data (the cosine polynomial is negated in the 2nd entry). */ extern const sincos_t __sincosf_table[2] HIDDEN; +/* Table with 4/PI to 192 bit precision. */ extern const uint32_t __inv_pio4[] HIDDEN; /* Top 12 bits of the float representation with the sign bit cleared. */ @@ -114,18 +120,20 @@ sinf_poly (double x, double x2, const sincos_t *p, int n) X as a value between -PI/4 and PI/4 and store the quadrant in NP. The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4, - only 2 multiplies are required and the result is accurate for |X| <= 120.0. - Use round/lround if inlined, otherwise convert to int. To avoid inaccuracies - introduced by truncating negative values, compute the quadrant * 2^24. */ + the result is accurate for |X| <= 120.0. */ static inline double reduce_fast (double x, const sincos_t *p, int *np) { double r; #if TOINT_INTRINSICS + /* Use fast round and lround instructions when available. */ r = x * p->hpi_inv; *np = converttoint (r); return x - roundtoint (r) * p->hpi; #else + /* Use scaled float to int conversion with explicit rounding. + hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31. + This avoids inaccuracies introduced by truncating negative values. */ r = x * p->hpi_inv; int n = ((int32_t)r + 0x800000) >> 24; *np = n; @@ -133,7 +141,7 @@ reduce_fast (double x, const sincos_t *p, int *np) #endif } -/* Reduce the range of XI to a multiple of PI/4 using fast integer arithmetic. +/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic. XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored). Return the modulo between -PI/4 and PI/4 and store the quadrant in NP. Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit @@ -160,5 +168,5 @@ reduce_large (uint32_t xi, int *np) res0 -= n << 62; double x = (int64_t)res0; *np = n; - return x * pi64; + return x * pi63; } diff --git a/newlib/libm/common/sinf.c b/newlib/libm/common/sinf.c index c2e61039b..0f6accfbc 100644 --- a/newlib/libm/common/sinf.c +++ b/newlib/libm/common/sinf.c @@ -31,11 +31,10 @@ #include "math_config.h" #include "sincosf.h" -/* Fast sinf implementation. Worst-case ULP is 0.56072, maximum relative - error is 0.5303p-23. A single-step signed range reduction is used for +/* Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative + error is 0.5303 * 2^-23. A single-step range reduction is used for small values. Large inputs have their range reduced using fast integer - arithmetic. -*/ + arithmetic. */ float sinf (float y) { -- 2.43.5