[PATCH 11/11] math: Use tanf from CORE-MATH
DJ Delorie
dj@redhat.com
Thu Nov 21 03:19:23 GMT 2024
Various comments that need fixing.
Otherwise LGTM
Reviewed-by: DJ Delorie <dj@redhat.com>
Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
> The CORE-MATH implementation is correctly rounded (for any rounding mode)
> and shows better performance to the generic tanf.
> diff --git a/SHARED-FILES b/SHARED-FILES
> index 033ce7f092..580e6b231a 100644
> --- a/SHARED-FILES
> +++ b/SHARED-FILES
> @@ -288,3 +288,9 @@ sysdeps/ieee754/flt-32/e_lgammaf_r.c:
> - remove the errno stuff (this is done by the wrapper)
> - replace 0x1p127f * 0x1p127f by math_narrow_eval (x * 0x1p127f)
> - add libm_alias_finite (__ieee754_lgammaf_r, __lgammaf_r) at the end
> +sysdeps/ieee754/flt-32/s_tanf.c:
> + (src/binary32/tan/tanf.cc in CORE-MATH)
> + - The code was adapted to use glibc code style and internal
> + functions to handle errno, overflow, and underflow. It was changed
> + to use an internal wrapper for 128 bit unsigned integer operation
> + for ABIs that do not support the type natively.
Ok.
> diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
> diff --git a/sysdeps/alpha/fpu/libm-test-ulps b/sysdeps/alpha/fpu/libm-test-ulps
> diff --git a/sysdeps/arc/fpu/libm-test-ulps b/sysdeps/arc/fpu/libm-test-ulps
> diff --git a/sysdeps/arc/nofpu/libm-test-ulps b/sysdeps/arc/nofpu/libm-test-ulps
> diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps
> diff --git a/sysdeps/csky/fpu/libm-test-ulps b/sysdeps/csky/fpu/libm-test-ulps
> diff --git a/sysdeps/csky/nofpu/libm-test-ulps b/sysdeps/csky/nofpu/libm-test-ulps
Ok.
> diff --git a/sysdeps/generic/math_int128.h b/sysdeps/generic/math_int128.h
If only unsigned is supported, this would make sense as math_uint128.h -
unless the plan is to add signed int128 in the future.
> +/* Internal 128 bit int support.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _MATH_INT128_H
> +#define _MATH_INT128_H
> +
> +/* Limited support for internal 128 bit integer, used on some math
> + implementations. It uses compiler builtin type if supported, otherwise
> + it is emulate. Only unsigned and some operations are currently supported:
"it is emulated" (spelling)
> +
> + - u128_t: the 128 bit unsigned type.
> + - u128_high: return the high part of the number.
> + - u128_low: return the low part of the number.
> + - u128_from_u64: createa 128 bit number from a 64 bit one.
s/createa/create a/
> + - u128_mul: multiple two 128 bit numbers.
"multiply" (spelling)
> + - u128_add: add two 128 bit numbers.
> + - u128_lshift: left shift a number.
> + - u128_rshift: right shift a number.
> + */
Ok.
> +#ifdef __SIZEOF_INT128__
> +typedef unsigned __int128 u128;
> +# define u128_high(__x) (uint64_t)((__x) >> 64)
> +# define u128_low(__x) (uint64_t)(__x)
> +# define u128_from_u64(__x) (u128)(__x)
> +# define u128_mul(__x, __y) (__x) * (__y)
> +# define u128_add(__x, __y) (__x) + (__y)
> +# define u128_lshift(__x, __y) (__x) << (__y)
> +# define u128_rshift(__x, __y) (__x) >> (__y)
Ok.
> +#else
> +typedef struct
> +{
> + uint64_t low;
> + uint64_t high;
> +} u128;
> +
> +# define u128_high(__x) (__x).high
> +# define u128_low(__x) (__x).low
> +
> +# define u128_from_u64(__x) (u128){.low = (__x), .high = 0}
> +
> +# define TOPBIT (UINT64_C(1) << 63)
> +# define MASK32 (UINT64_C(0xffffffff))
Ok.
> +static u128 u128_add (u128 x, u128 y)
> +{
> + uint64_t lower = (x.low & ~TOPBIT) + (y.low & ~TOPBIT);
> + bool carry = (lower >> 63) + (x.low >> 63) + (y.low >> 63) > 1;
> + return (u128) { .high = x.high + y.high + carry, .low = x.low + y.low };
> +}
Ok.
> +static u128 u128_lshift (u128 x, unsigned int n)
> +{
> + switch (n)
> + {
> + case 0: return x;
> + case 1 ... 63: return (u128) { .high = (x.high << n) | (x.low >> (64 - n)),
> + .low = x.low << n };
> + case 64 ...127: return (u128) { .high = x.low << (n - 64), .low = 0};
> + default: return (u128) { .high = 0, .low = 0 };
> + }
> +}
Ok.
> +static u128 u128_rshift (u128 x, unsigned int n)
> +{
> + switch (n)
> + {
> + case 0: return x;
> + case 1 ... 63: return (u128) { .high = x.high >> n,
> + .low = (x.high << (64 - n)) | (x.low >> n) };
> + case 64 ...127: return (u128) { .high = 0, .low = x.high >> (n - 64) };
> + default: return (u128) { .high = 0, .low = 0 };
> + }
> +}
Ok.
> +static u128 u128_mul (u128 x, u128 y)
> +{
> + if (x.high == 0 && y.high == 0)
> + {
> + uint64_t x0 = x.low & MASK32;
> + uint64_t x1 = x.low >> 32;
> + uint64_t y0 = y.low & MASK32;
> + uint64_t y1 = y.low >> 32;
> + u128 x0y0 = { .high = 0, .low = x0 * y0 };
> + u128 x0y1 = { .high = 0, .low = x0 * y1 };
> + u128 x1y0 = { .high = 0, .low = x1 * y0 };
> + u128 x1y1 = { .high = 0, .low = x1 * y1 };
> + /* x0y0 + ((x0y1 + x1y0) << 32) + (x1y1 << 64) */
> + return u128_add (u128_add (x0y0, u128_lshift (u128_add (x0y1,
> + x1y0),
> + 32)),
> + u128_lshift (x1y1, 64));
> + }
Ok.
> + else
> + {
> + uint64_t x0 = x.low & MASK32;
> + uint64_t x1 = x.low >> 32;
> + uint64_t x2 = x.high & MASK32;
> + uint64_t x3 = x.high >> 32;
> + uint64_t y0 = y.low & MASK32;
> + uint64_t y1 = y.low >> 32;
> + uint64_t y2 = y.high & MASK32;
> + uint64_t y3 = y.high >> 32;
> + u128 x0y0 = { .high = 0, .low = x0 * y0 };
> + u128 x0y1 = { .high = 0, .low = x0 * y1 };
> + u128 x0y2 = { .high = 0, .low = x0 * y2 };
> + u128 x0y3 = { .high = 0, .low = x0 * y3 };
> + u128 x1y0 = { .high = 0, .low = x1 * y0 };
> + u128 x1y1 = { .high = 0, .low = x1 * y1 };
> + u128 x1y2 = { .high = 0, .low = x1 * y2 };
> + u128 x2y0 = { .high = 0, .low = x2 * y0 };
> + u128 x2y1 = { .high = 0, .low = x2 * y1 };
> + u128 x3y0 = { .high = 0, .low = x3 * y0 };
> + /* x0y0 + ((x0y1 + x1y0) << 32) + ((x0y2 + x1y1 + x2y0) << 64) +
> + ((x0y3 + x1y2 + x2y1 + x3y0) << 96) */
> + u128 r0 = u128_add (x0y0,
> + u128_lshift (u128_add (x0y1, x1y0),
> + 32));
> + u128 r1 = u128_add (u128_lshift (u128_add (u128_add (x0y2, x1y1), x2y0),
> + 64),
> + u128_lshift (u128_add (u128_add (x0y3, x1y2),
> + u128_add (x2y1, x3y0)),
> + 96));
> + return u128_add (r0, r1);
> + }
> +}
> +#endif /* __SIZEOF_INT128__ */
> +
> +#endif
Ok.
> diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps
> diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
> diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
Ok.
> diff --git a/sysdeps/ieee754/flt-32/k_tanf.c b/sysdeps/ieee754/flt-32/k_tanf.c
> -/* k_tanf.c -- float version of k_tan.c
> - */
> . . .
> -}
> +/* Not needed. */
Ok.
> diff --git a/sysdeps/ieee754/flt-32/s_tanf.c b/sysdeps/ieee754/flt-32/s_tanf.c
> index ae6600bd57..41fd8fe496 100644
> --- a/sysdeps/ieee754/flt-32/s_tanf.c
> +++ b/sysdeps/ieee754/flt-32/s_tanf.c
> @@ -1,76 +1,176 @@
> -/* s_tanf.c -- float version of s_tan.c.
> - */
> +/* Correctly-rounded tangent of binary32 value.
>
> -/*
> - * ====================================================
> - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
> - *
> - * Developed at SunPro, a Sun Microsystems, Inc. business.
> - * Permission to use, copy, modify, and distribute this
> - * software is freely granted, provided that this notice
> - * is preserved.
> - * ====================================================
> - */
> +Copyright (c) 2022-2024 Alexei Sibidanov.
>
> -#if defined(LIBM_SCCS) && !defined(lint)
> -static char rcsid[] = "$NetBSD: s_tanf.c,v 1.4 1995/05/10 20:48:20 jtc Exp $";
> -#endif
> +The original version of this file was copied from the CORE-MATH
> +project (file src/binary32/tan/tanf.c, revision 59d21d7).
>
> -#include <errno.h>
> -#include <math.h>
> -#include <math_private.h>
> +Permission is hereby granted, free of charge, to any person obtaining a copy
> +of this software and associated documentation files (the "Software"), to deal
> +in the Software without restriction, including without limitation the rights
> +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> +copies of the Software, and to permit persons to whom the Software is
> +furnished to do so, subject to the following conditions:
> +
> +The above copyright notice and this permission notice shall be included in all
> +copies or substantial portions of the Software.
> +
> +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> +SOFTWARE.
> +*/
Ok.
> +
> +#include <array_length.h>
> +#include <stdint.h>
> #include <libm-alias-float.h>
> -#include "s_sincosf.h"
> +#include "math_config.h"
> +#include <math_int128.h>
Ok.
> -/* Reduce range of X to a multiple of PI/2. The modulo result is between
> - -PI/4 and PI/4 and returned as a high part y[0] and a low part y[1].
> - The low bit in the return value indicates the first or 2nd half of tanf. */
> -static inline int32_t
> -rem_pio2f (float x, float *y)
Ok.
> +static inline double
> +rltl (float z, int *q)
Function needs a comment saying what its purpose is? I can't guess from
"rltl" nor from the contents or usage.
> {
> - double dx = x;
> - int n;
> - const sincos_t *p = &__sincosf_table[0];
Ok.
> + double x = z;
> + double idl = -0x1.b1bbead603d8bp-32 * x;
> + double idh = 0x1.45f306ep-1 * x;
> + double id = roundeven (idh);
> + *q = (int64_t) id;
> + return (idh - id) + idl;
> +}
Ok.
> - if (__glibc_likely (abstop12 (x) < abstop12 (120.0f)))
> - dx = reduce_fast (dx, p, &n);
> - else
Ok.
> +static double __attribute__ ((noinline))
> +rbig (uint32_t u, int *q)
Same here (comment needed). Radians Bigger? Rotate Bigwise? Rhythm
and Blues Ignores Generations?
> +{
> + static const uint64_t ipi[] =
> {
> - uint32_t xi = asuint (x);
> - int sign = xi >> 31;
> -
> - dx = reduce_large (xi, &n);
> - dx = sign ? -dx : dx;
Ok.
> + 0xfe5163abdebbc562, 0xdb6295993c439041,
> + 0xfc2757d1f534ddc0, 0xa2f9836e4e441529
> + };
> + int e = (u >> 23) & 0xff, i;
> + uint64_t m = (u & (~0u >> 9)) | 1 << 23;
> + u128 p0 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[0]));
> + u128 p1 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[1]));
> + p1 = u128_add (p1, u128_rshift (p0, 64));
> + u128 p2 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[2]));
> + p2 = u128_add (p2, u128_rshift (p1, 64));
> + u128 p3 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[3]));
> + p3 = u128_add (p3, u128_rshift (p2, 64));
> + uint64_t p3h = u128_high (p3);
> + uint64_t p3l = u128_low (p3);
> + uint64_t p2l = u128_low (p2);
> + uint64_t p1l = u128_low (p1);
> + int64_t a;
> + int k = e - 127, s = k - 23;
Ok.
> + /* in ctanf(), rbig() is called in the case 127+28 <= e < 0xff
> + thus 155 <= e <= 254, which yields 28 <= k <= 127 and 5 <= s <= 104 */
I'm guessing this comment goes with the lines above it, not the
comparison below? If so, please include a blank line here ;-)
> + if (s < 64)
> + {
> + i = p3h << s | p3l >> (64 - s);
> + a = p3l << s | p2l >> (64 - s);
> }
Ok.
> -
> - y[0] = dx;
> - y[1] = dx - y[0];
> - return n;
Ok.
> + else if (s == 64)
> + {
> + i = p3l;
> + a = p2l;
> + }
> + else
> + { /* s > 64 */
> + i = p3l << (s - 64) | p2l >> (128 - s);
> + a = p2l << (s - 64) | p1l >> (128 - s);
> + }
Ok.
> + int sgn = u;
> + sgn >>= 31;
Ok.
> + int64_t sm = a >> 63;
> + i -= sm;
> + double z = (a ^ sgn) * 0x1p-64;
> + i = (i ^ sgn) - sgn;
> + *q = i;
> + return z;
> }
Ok.
> -float __tanf(float x)
> +float
> +__tanf (float x)
> {
Ok.
> - float y[2],z=0.0;
> - int32_t n, ix;
> -
> - GET_FLOAT_WORD(ix,x);
> -
> - /* |x| ~< pi/4 */
> - ix &= 0x7fffffff;
> - if(ix <= 0x3f490fda) return __kernel_tanf(x,z,1);
> -
> - /* tan(Inf or NaN) is NaN */
> - else if (ix>=0x7f800000) {
> - if (ix==0x7f800000)
> - __set_errno (EDOM);
> - return x-x; /* NaN */
Ok.
> + uint32_t t = asuint (x);
> + int e = (t >> 23) & 0xff;
> + int i;
> + double z;
> + if (__glibc_likely (e < 127 + 28))
> + {
> + if (__glibc_unlikely (e < 115))
> + {
> + if (__glibc_unlikely (e < 102))
> + return fmaf (x, fabsf (x), x);
> + float x2 = x * x;
> + return fmaf (x, 0x1.555556p-2f * x2, x);
> }
Ok.
> -
> - /* argument reduction needed */
> - else {
> - n = rem_pio2f(x,y);
> - return __kernel_tanf(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
> - -1 -- n odd */
Ok.
> + z = rltl (x, &i);
> + }
> + else if (e < 0xff)
> + z = rbig (t, &i);
> + else
> + {
> + if (t << 9)
> + return x + x; /* nan */
> + return __math_invalidf (x);
> + }
Ok.
> + double z2 = z * z;
> + double z4 = z2 * z2;
> + static const double cn[] =
> + {
> + 0x1.921fb54442d18p+0, -0x1.fd226e573289fp-2,
> + 0x1.b7a60c8dac9f6p-6, -0x1.725beb40f33e5p-13
> + };
> + static const double cd[] =
> + {
> + 0x1p+0, -0x1.2395347fb829dp+0,
> + 0x1.2313660f29c36p-3, -0x1.9a707ab98d1c1p-9
> + };
Ok.
> + static const double s[] = { 0, 1 };
> + double n = cn[0] + z2 * cn[1];
> + double n2 = cn[2] + z2 * cn[3];
> + n += z4 * n2;
> + double d = cd[0] + z2 * cd[1];
> + double d2 = cd[2] + z2 * cd[3];
> + d += z4 * d2;
> + n *= z;
> + double s0 = s[i & 1];
> + double s1 = s[1 - (i & 1)];
> + double r1 = (n * s1 - d * s0) / (n * s0 + d * s1);
Ok.
> + uint64_t tail = (asuint64 (r1) + 7) & (~UINT64_C(0) >> 35);
> + if (__glibc_unlikely (tail <= 14))
> + {
> + static const struct
> + {
> + float arg;
> + float rh;
> + float rl;
> + } st[] = {
> + { 0x1.143ec4p+0f, 0x1.ddf9f6p+0f, -0x1.891d24p-52f },
> + { 0x1.ada6aap+27f, 0x1.e80304p-3f, 0x1.419f46p-58f },
> + { 0x1.af61dap+48f, 0x1.60d1c8p-2f, -0x1.2d6c3ap-55f },
> + { 0x1.0088bcp+52f, 0x1.ca1edp+0f, 0x1.f6053p-53f },
> + { 0x1.f90dfcp+72f, 0x1.597f9cp-1f, 0x1.925978p-53f },
> + { 0x1.cc4e22p+85f, -0x1.f33584p+1f, 0x1.d7254ap-51f },
> + { 0x1.a6ce12p+86f, -0x1.c5612ep-1f, -0x1.26c33ep-53f },
> + { 0x1.6a0b76p+102f, -0x1.e42a1ep+0f, -0x1.1dc906p-52f },
> + };
> + uint32_t ax = t & (~0u >> 1);
> + uint32_t sgn = t >> 31;
> + for (int j = 0; j < array_length (st); j++)
> + {
> + if (__glibc_unlikely (asfloat (st[j].arg) == ax))
> + {
> + if (sgn)
> + return -st[j].rh - st[j].rl;
> + else
> + return st[j].rh + st[j].rl;
> + }
Ok.
> }
> + }
> + return r1;
> }
> libm_alias_float (__tan, tan)
Ok.
> diff --git a/sysdeps/loongarch/lp64/libm-test-ulps b/sysdeps/loongarch/lp64/libm-test-ulps
> diff --git a/sysdeps/microblaze/libm-test-ulps b/sysdeps/microblaze/libm-test-ulps
> diff --git a/sysdeps/mips/mips32/libm-test-ulps b/sysdeps/mips/mips32/libm-test-ulps
> diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps
> diff --git a/sysdeps/nios2/libm-test-ulps b/sysdeps/nios2/libm-test-ulps
> diff --git a/sysdeps/or1k/fpu/libm-test-ulps b/sysdeps/or1k/fpu/libm-test-ulps
> diff --git a/sysdeps/or1k/nofpu/libm-test-ulps b/sysdeps/or1k/nofpu/libm-test-ulps
> diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
> diff --git a/sysdeps/powerpc/nofpu/libm-test-ulps b/sysdeps/powerpc/nofpu/libm-test-ulps
> diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps
> diff --git a/sysdeps/riscv/rvd/libm-test-ulps b/sysdeps/riscv/rvd/libm-test-ulps
> diff --git a/sysdeps/s390/fpu/libm-test-ulps b/sysdeps/s390/fpu/libm-test-ulps
> diff --git a/sysdeps/sh/libm-test-ulps b/sysdeps/sh/libm-test-ulps
> diff --git a/sysdeps/sparc/fpu/libm-test-ulps b/sysdeps/sparc/fpu/libm-test-ulps
> diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
Ok.
More information about the Libc-alpha
mailing list