This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCHv6] New generic sinf
- From: Paul Clarke <pc at us dot ibm dot com>
- To: Rajalakshmi Srinivasaraghavan <raji at linux dot vnet dot ibm dot com>, libc-alpha at sourceware dot org
- Date: Thu, 30 Nov 2017 19:17:34 -0600
- Subject: Re: [PATCHv6] New generic sinf
- Authentication-results: sourceware.org; auth=none
- References: <1512033760-18792-1-git-send-email-raji@linux.vnet.ibm.com>
On 11/30/2017 03:22 AM, Rajalakshmi Srinivasaraghavan wrote:
...snip...
> +float
> +SINF_FUNC (float x)
> +{
> + double cx;
> + double theta = x;
> + double abstheta = fabs (theta);
> + /* If |x|< Pi/4. */
> + if (abstheta < M_PI_4)
> + {
> + if (abstheta >= 0x1p-5) /* |x| >= 2^-5. */
> + {
> + const double theta2 = theta * theta;
> + /* Chebyshev polynomial of the form for sin
> + x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). */
> + cx = S3 + theta2 * S4;
> + cx = S2 + theta2 * cx;
> + cx = S1 + theta2 * cx;
> + cx = S0 + theta2 * cx;
> + cx = theta + theta * theta2 * cx;
> + return cx;
> + }
> + else if (abstheta >= 0x1p-27) /* |x| >= 2^-27. */
> + {
> + /* A simpler Chebyshev approximation is close enough for this range:
> + for sin: x+x^3*(SS0+x^2*SS1). */
> + const double theta2 = theta * theta;
> + cx = SS0 + theta2 * SS1;
> + cx = theta + theta * theta2 * cx;
> + return cx;
> + }
> + else
> + {
> + /* Handle some special cases. */
> + if (theta)
> + return theta - (theta * SMALL);
> + else
> + return theta;
> + }
> + }
> + else /* |x| >= Pi/4. */
> + {
> + unsigned long int signbit = (x < 0);
> + if (abstheta < 9 * M_PI_4) /* |x| < 9*Pi/4. */
> + {
> + /* There are cases where FE_UPWARD rounding mode can
> + produce a result of abstheta * inv_PI_4 == 9,
> + where abstheta < 9pi/4, so the domain for
> + pio2_table must go to 5 (9 / 2 + 1). */
> + unsigned long int n = (abstheta * inv_PI_4) + 1;
> + theta = abstheta - pio2_table[n / 2];
> + return reduced (theta, n, signbit);
> + }
> + else if (isless (abstheta, INFINITY))
> + {
> + if (abstheta < 0x1p+23) /* |x| < 2^23. */
> + {
> + unsigned long int n = floor (abstheta * inv_PI_4) + 1.0;
> + double x = floor (n / 2.0);
> + theta = x * PI_2_lo + (x * PI_2_hi + abstheta);
> + /* Argument reduction needed. */
> + return reduced (theta, n, signbit);
> + }
> + else /* |x| >= 2^23. */
> + {
> + x = fabsf (x);
> + int exponent;
> + GET_FLOAT_WORD (exponent, x);
> + exponent =
> + (exponent >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS;
> + exponent += 3;
> + exponent /= 28;
> + double a = invpio4_table[exponent] * x;
> + double b = invpio4_table[exponent + 1] * x;
> + double c = invpio4_table[exponent + 2] * x;
> + double d = invpio4_table[exponent + 3] * x;
> + uint64_t l = a;
> + l &= ~0x7;
> + a -= l;
> + double e = a + b;
> + l = e;
> + e = a - l;
> + if (l & 1)
The next 20+ lines have 8 spaces (after an initial tab) where a tab should go (if I understand whitespace conventions correctly). From here...
> + {
> + e -= 1.0;
> + e += b;
> + e += c;
> + e += d;
> + e *= M_PI_4;
> + return reduced (e, l + 1, signbit);
> + }
> + else
> + {
> + e += b;
> + e += c;
> + e += d;
> + if (e <= 1.0)
> + {
> + e *= M_PI_4;
> + return reduced (e, l + 1, signbit);
> + }
> + else
> + {
> + l++;
> + e -= 2.0;
> + e *= M_PI_4;
> + return reduced (e, l + 1, signbit);
> + }
> + }
...to here.
> }
> }
> + else
> + {
> + int32_t ix;
> + /* High word of x. */
> + GET_FLOAT_WORD (ix, abstheta);
> + /* Sin(Inf or NaN) is NaN. */
> + if (ix == 0x7f800000)
> + __set_errno (EDOM);
> + return x - x;
> + }
> + }
> }
PC