[PATCH 11/11] math: Use tanf from CORE-MATH
Adhemerval Zanella Netto
adhemerval.zanella@linaro.org
Thu Nov 21 14:08:13 GMT 2024
On 21/11/24 00:19, DJ Delorie wrote:
>
> Various comments that need fixing.
>
> Otherwise LGTM
> Reviewed-by: DJ Delorie <dj@redhat.com>
>
> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
>> The CORE-MATH implementation is correctly rounded (for any rounding mode)
>> and shows better performance to the generic tanf.
>> diff --git a/SHARED-FILES b/SHARED-FILES
>> index 033ce7f092..580e6b231a 100644
>> --- a/SHARED-FILES
>> +++ b/SHARED-FILES
>> @@ -288,3 +288,9 @@ sysdeps/ieee754/flt-32/e_lgammaf_r.c:
>> - remove the errno stuff (this is done by the wrapper)
>> - replace 0x1p127f * 0x1p127f by math_narrow_eval (x * 0x1p127f)
>> - add libm_alias_finite (__ieee754_lgammaf_r, __lgammaf_r) at the end
>> +sysdeps/ieee754/flt-32/s_tanf.c:
>> + (src/binary32/tan/tanf.cc in CORE-MATH)
>> + - The code was adapted to use glibc code style and internal
>> + functions to handle errno, overflow, and underflow. It was changed
>> + to use an internal wrapper for 128 bit unsigned integer operation
>> + for ABIs that do not support the type natively.
>
> Ok.
>
>> diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
>> diff --git a/sysdeps/alpha/fpu/libm-test-ulps b/sysdeps/alpha/fpu/libm-test-ulps
>> diff --git a/sysdeps/arc/fpu/libm-test-ulps b/sysdeps/arc/fpu/libm-test-ulps
>> diff --git a/sysdeps/arc/nofpu/libm-test-ulps b/sysdeps/arc/nofpu/libm-test-ulps
>> diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps
>> diff --git a/sysdeps/csky/fpu/libm-test-ulps b/sysdeps/csky/fpu/libm-test-ulps
>> diff --git a/sysdeps/csky/nofpu/libm-test-ulps b/sysdeps/csky/nofpu/libm-test-ulps
>
> Ok.
>
>> diff --git a/sysdeps/generic/math_int128.h b/sysdeps/generic/math_int128.h
>
> If only unsigned is supported, this would make sense as math_uint128.h -
> unless the plan is to add signed int128 in the future.
No right now, I will change to math_uint128.h.
>
>> +/* Internal 128 bit int support.
>> + Copyright (C) 2024 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#ifndef _MATH_INT128_H
>> +#define _MATH_INT128_H
>> +
>> +/* Limited support for internal 128 bit integer, used on some math
>> + implementations. It uses compiler builtin type if supported, otherwise
>> + it is emulate. Only unsigned and some operations are currently supported:
>
> "it is emulated" (spelling)
Ack.
>
>> +
>> + - u128_t: the 128 bit unsigned type.
>> + - u128_high: return the high part of the number.
>> + - u128_low: return the low part of the number.
>> + - u128_from_u64: createa 128 bit number from a 64 bit one.
>
> s/createa/create a/
Ack.
>
>> + - u128_mul: multiple two 128 bit numbers.
>
> "multiply" (spelling)
Ack.
(Paul already has pointed out these ealier).
>
>> + - u128_add: add two 128 bit numbers.
>> + - u128_lshift: left shift a number.
>> + - u128_rshift: right shift a number.
>> + */
>
> Ok.
>
>> +#ifdef __SIZEOF_INT128__
>> +typedef unsigned __int128 u128;
>> +# define u128_high(__x) (uint64_t)((__x) >> 64)
>> +# define u128_low(__x) (uint64_t)(__x)
>> +# define u128_from_u64(__x) (u128)(__x)
>> +# define u128_mul(__x, __y) (__x) * (__y)
>> +# define u128_add(__x, __y) (__x) + (__y)
>> +# define u128_lshift(__x, __y) (__x) << (__y)
>> +# define u128_rshift(__x, __y) (__x) >> (__y)
>
> Ok.
>
>> +#else
>> +typedef struct
>> +{
>> + uint64_t low;
>> + uint64_t high;
>> +} u128;
>> +
>> +# define u128_high(__x) (__x).high
>> +# define u128_low(__x) (__x).low
>> +
>> +# define u128_from_u64(__x) (u128){.low = (__x), .high = 0}
>> +
>> +# define TOPBIT (UINT64_C(1) << 63)
>> +# define MASK32 (UINT64_C(0xffffffff))
>
> Ok.
>
>> +static u128 u128_add (u128 x, u128 y)
>> +{
>> + uint64_t lower = (x.low & ~TOPBIT) + (y.low & ~TOPBIT);
>> + bool carry = (lower >> 63) + (x.low >> 63) + (y.low >> 63) > 1;
>> + return (u128) { .high = x.high + y.high + carry, .low = x.low + y.low };
>> +}
>
> Ok.
>
>> +static u128 u128_lshift (u128 x, unsigned int n)
>> +{
>> + switch (n)
>> + {
>> + case 0: return x;
>> + case 1 ... 63: return (u128) { .high = (x.high << n) | (x.low >> (64 - n)),
>> + .low = x.low << n };
>> + case 64 ...127: return (u128) { .high = x.low << (n - 64), .low = 0};
>> + default: return (u128) { .high = 0, .low = 0 };
>> + }
>> +}
>
> Ok.
>
>> +static u128 u128_rshift (u128 x, unsigned int n)
>> +{
>> + switch (n)
>> + {
>> + case 0: return x;
>> + case 1 ... 63: return (u128) { .high = x.high >> n,
>> + .low = (x.high << (64 - n)) | (x.low >> n) };
>> + case 64 ...127: return (u128) { .high = 0, .low = x.high >> (n - 64) };
>> + default: return (u128) { .high = 0, .low = 0 };
>> + }
>> +}
>
> Ok.
>
>> +static u128 u128_mul (u128 x, u128 y)
>> +{
>> + if (x.high == 0 && y.high == 0)
>> + {
>> + uint64_t x0 = x.low & MASK32;
>> + uint64_t x1 = x.low >> 32;
>> + uint64_t y0 = y.low & MASK32;
>> + uint64_t y1 = y.low >> 32;
>> + u128 x0y0 = { .high = 0, .low = x0 * y0 };
>> + u128 x0y1 = { .high = 0, .low = x0 * y1 };
>> + u128 x1y0 = { .high = 0, .low = x1 * y0 };
>> + u128 x1y1 = { .high = 0, .low = x1 * y1 };
>> + /* x0y0 + ((x0y1 + x1y0) << 32) + (x1y1 << 64) */
>> + return u128_add (u128_add (x0y0, u128_lshift (u128_add (x0y1,
>> + x1y0),
>> + 32)),
>> + u128_lshift (x1y1, 64));
>> + }
>
> Ok.
>
>> + else
>> + {
>> + uint64_t x0 = x.low & MASK32;
>> + uint64_t x1 = x.low >> 32;
>> + uint64_t x2 = x.high & MASK32;
>> + uint64_t x3 = x.high >> 32;
>> + uint64_t y0 = y.low & MASK32;
>> + uint64_t y1 = y.low >> 32;
>> + uint64_t y2 = y.high & MASK32;
>> + uint64_t y3 = y.high >> 32;
>> + u128 x0y0 = { .high = 0, .low = x0 * y0 };
>> + u128 x0y1 = { .high = 0, .low = x0 * y1 };
>> + u128 x0y2 = { .high = 0, .low = x0 * y2 };
>> + u128 x0y3 = { .high = 0, .low = x0 * y3 };
>> + u128 x1y0 = { .high = 0, .low = x1 * y0 };
>> + u128 x1y1 = { .high = 0, .low = x1 * y1 };
>> + u128 x1y2 = { .high = 0, .low = x1 * y2 };
>> + u128 x2y0 = { .high = 0, .low = x2 * y0 };
>> + u128 x2y1 = { .high = 0, .low = x2 * y1 };
>> + u128 x3y0 = { .high = 0, .low = x3 * y0 };
>> + /* x0y0 + ((x0y1 + x1y0) << 32) + ((x0y2 + x1y1 + x2y0) << 64) +
>> + ((x0y3 + x1y2 + x2y1 + x3y0) << 96) */
>> + u128 r0 = u128_add (x0y0,
>> + u128_lshift (u128_add (x0y1, x1y0),
>> + 32));
>> + u128 r1 = u128_add (u128_lshift (u128_add (u128_add (x0y2, x1y1), x2y0),
>> + 64),
>> + u128_lshift (u128_add (u128_add (x0y3, x1y2),
>> + u128_add (x2y1, x3y0)),
>> + 96));
>> + return u128_add (r0, r1);
>> + }
>> +}
>> +#endif /* __SIZEOF_INT128__ */
>> +
>> +#endif
>
> Ok.
>
>> diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps
>> diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
>> diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
>
> Ok.
>
>> diff --git a/sysdeps/ieee754/flt-32/k_tanf.c b/sysdeps/ieee754/flt-32/k_tanf.c
>> -/* k_tanf.c -- float version of k_tan.c
>> - */
>> . . .
>> -}
>> +/* Not needed. */
>
> Ok.
>
>> diff --git a/sysdeps/ieee754/flt-32/s_tanf.c b/sysdeps/ieee754/flt-32/s_tanf.c
>> index ae6600bd57..41fd8fe496 100644
>> --- a/sysdeps/ieee754/flt-32/s_tanf.c
>> +++ b/sysdeps/ieee754/flt-32/s_tanf.c
>> @@ -1,76 +1,176 @@
>> -/* s_tanf.c -- float version of s_tan.c.
>> - */
>> +/* Correctly-rounded tangent of binary32 value.
>>
>> -/*
>> - * ====================================================
>> - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
>> - *
>> - * Developed at SunPro, a Sun Microsystems, Inc. business.
>> - * Permission to use, copy, modify, and distribute this
>> - * software is freely granted, provided that this notice
>> - * is preserved.
>> - * ====================================================
>> - */
>> +Copyright (c) 2022-2024 Alexei Sibidanov.
>>
>> -#if defined(LIBM_SCCS) && !defined(lint)
>> -static char rcsid[] = "$NetBSD: s_tanf.c,v 1.4 1995/05/10 20:48:20 jtc Exp $";
>> -#endif
>> +The original version of this file was copied from the CORE-MATH
>> +project (file src/binary32/tan/tanf.c, revision 59d21d7).
>>
>> -#include <errno.h>
>> -#include <math.h>
>> -#include <math_private.h>
>> +Permission is hereby granted, free of charge, to any person obtaining a copy
>> +of this software and associated documentation files (the "Software"), to deal
>> +in the Software without restriction, including without limitation the rights
>> +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> +copies of the Software, and to permit persons to whom the Software is
>> +furnished to do so, subject to the following conditions:
>> +
>> +The above copyright notice and this permission notice shall be included in all
>> +copies or substantial portions of the Software.
>> +
>> +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
>> +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>> +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
>> +SOFTWARE.
>> +*/
>
> Ok.
>
>> +
>> +#include <array_length.h>
>> +#include <stdint.h>
>> #include <libm-alias-float.h>
>> -#include "s_sincosf.h"
>> +#include "math_config.h"
>> +#include <math_int128.h>
>
> Ok.
>
>> -/* Reduce range of X to a multiple of PI/2. The modulo result is between
>> - -PI/4 and PI/4 and returned as a high part y[0] and a low part y[1].
>> - The low bit in the return value indicates the first or 2nd half of tanf. */
>> -static inline int32_t
>> -rem_pio2f (float x, float *y)
>
> Ok.
>
>> +static inline double
>> +rltl (float z, int *q)
>
> Function needs a comment saying what its purpose is? I can't guess from
> "rltl" nor from the contents or usage.
>
>> {
>> - double dx = x;
>> - int n;
>> - const sincos_t *p = &__sincosf_table[0];
>
> Ok.
>
>> + double x = z;
>> + double idl = -0x1.b1bbead603d8bp-32 * x;
>> + double idh = 0x1.45f306ep-1 * x;
>> + double id = roundeven (idh);
>> + *q = (int64_t) id;
>> + return (idh - id) + idl;
>> +}
>
> Ok.
>
>> - if (__glibc_likely (abstop12 (x) < abstop12 (120.0f)))
>> - dx = reduce_fast (dx, p, &n);
>> - else
>
> Ok.
>
>> +static double __attribute__ ((noinline))
>> +rbig (uint32_t u, int *q)
>
> Same here (comment needed). Radians Bigger? Rotate Bigwise? Rhythm
> and Blues Ignores Generations?
Paul has improved the comments on CORE-MATH commit d59182eaf842164626465186427e18d9924ec80b,
and I have added:
+/* argument reduction
+ for |z| < 2^28, return r such that 2/pi*x = q + r */
static inline double
rltl (float z, int *q)
{
@@ -41,6 +43,8 @@ rltl (float z, int *q)
return (idh - id) + idl;
}
+/* argument reduction
+ same as rltl, but for |x| >= 2^28 */
static double __attribute__ ((noinline))
rbig (uint32_t u, int *q)
{
>
>> +{
>> + static const uint64_t ipi[] =
>> {
>> - uint32_t xi = asuint (x);
>> - int sign = xi >> 31;
>> -
>> - dx = reduce_large (xi, &n);
>> - dx = sign ? -dx : dx;
>
> Ok.
>
>> + 0xfe5163abdebbc562, 0xdb6295993c439041,
>> + 0xfc2757d1f534ddc0, 0xa2f9836e4e441529
>> + };
>> + int e = (u >> 23) & 0xff, i;
>> + uint64_t m = (u & (~0u >> 9)) | 1 << 23;
>> + u128 p0 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[0]));
>> + u128 p1 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[1]));
>> + p1 = u128_add (p1, u128_rshift (p0, 64));
>> + u128 p2 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[2]));
>> + p2 = u128_add (p2, u128_rshift (p1, 64));
>> + u128 p3 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[3]));
>> + p3 = u128_add (p3, u128_rshift (p2, 64));
>> + uint64_t p3h = u128_high (p3);
>> + uint64_t p3l = u128_low (p3);
>> + uint64_t p2l = u128_low (p2);
>> + uint64_t p1l = u128_low (p1);
>> + int64_t a;
>> + int k = e - 127, s = k - 23;
>
> Ok.
>
>> + /* in ctanf(), rbig() is called in the case 127+28 <= e < 0xff
>> + thus 155 <= e <= 254, which yields 28 <= k <= 127 and 5 <= s <= 104 */
>
> I'm guessing this comment goes with the lines above it, not the
> comparison below? If so, please include a blank line here ;-)
>
>> + if (s < 64)
>> + {
>> + i = p3h << s | p3l >> (64 - s);
>> + a = p3l << s | p2l >> (64 - s);
>> }
>
> Ok.
>
>> -
>> - y[0] = dx;
>> - y[1] = dx - y[0];
>> - return n;
>
> Ok.
>
>> + else if (s == 64)
>> + {
>> + i = p3l;
>> + a = p2l;
>> + }
>> + else
>> + { /* s > 64 */
>> + i = p3l << (s - 64) | p2l >> (128 - s);
>> + a = p2l << (s - 64) | p1l >> (128 - s);
>> + }
>
> Ok.
>
>> + int sgn = u;
>> + sgn >>= 31;
>
> Ok.
>
>> + int64_t sm = a >> 63;
>> + i -= sm;
>> + double z = (a ^ sgn) * 0x1p-64;
>> + i = (i ^ sgn) - sgn;
>> + *q = i;
>> + return z;
>> }
>
> Ok.
>
>> -float __tanf(float x)
>> +float
>> +__tanf (float x)
>> {
>
> Ok.
>
>> - float y[2],z=0.0;
>> - int32_t n, ix;
>> -
>> - GET_FLOAT_WORD(ix,x);
>> -
>> - /* |x| ~< pi/4 */
>> - ix &= 0x7fffffff;
>> - if(ix <= 0x3f490fda) return __kernel_tanf(x,z,1);
>> -
>> - /* tan(Inf or NaN) is NaN */
>> - else if (ix>=0x7f800000) {
>> - if (ix==0x7f800000)
>> - __set_errno (EDOM);
>> - return x-x; /* NaN */
>
> Ok.
>
>> + uint32_t t = asuint (x);
>> + int e = (t >> 23) & 0xff;
>> + int i;
>> + double z;
>> + if (__glibc_likely (e < 127 + 28))
>> + {
>> + if (__glibc_unlikely (e < 115))
>> + {
>> + if (__glibc_unlikely (e < 102))
>> + return fmaf (x, fabsf (x), x);
>> + float x2 = x * x;
>> + return fmaf (x, 0x1.555556p-2f * x2, x);
>> }
>
> Ok.
>
>> -
>> - /* argument reduction needed */
>> - else {
>> - n = rem_pio2f(x,y);
>> - return __kernel_tanf(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
>> - -1 -- n odd */
>
> Ok.
>
>> + z = rltl (x, &i);
>> + }
>> + else if (e < 0xff)
>> + z = rbig (t, &i);
>> + else
>> + {
>> + if (t << 9)
>> + return x + x; /* nan */
>> + return __math_invalidf (x);
>> + }
>
> Ok.
>
>> + double z2 = z * z;
>> + double z4 = z2 * z2;
>> + static const double cn[] =
>> + {
>> + 0x1.921fb54442d18p+0, -0x1.fd226e573289fp-2,
>> + 0x1.b7a60c8dac9f6p-6, -0x1.725beb40f33e5p-13
>> + };
>> + static const double cd[] =
>> + {
>> + 0x1p+0, -0x1.2395347fb829dp+0,
>> + 0x1.2313660f29c36p-3, -0x1.9a707ab98d1c1p-9
>> + };
>
> Ok.
>
>> + static const double s[] = { 0, 1 };
>> + double n = cn[0] + z2 * cn[1];
>> + double n2 = cn[2] + z2 * cn[3];
>> + n += z4 * n2;
>> + double d = cd[0] + z2 * cd[1];
>> + double d2 = cd[2] + z2 * cd[3];
>> + d += z4 * d2;
>> + n *= z;
>> + double s0 = s[i & 1];
>> + double s1 = s[1 - (i & 1)];
>> + double r1 = (n * s1 - d * s0) / (n * s0 + d * s1);
>
> Ok.
>
>> + uint64_t tail = (asuint64 (r1) + 7) & (~UINT64_C(0) >> 35);
>> + if (__glibc_unlikely (tail <= 14))
>> + {
>> + static const struct
>> + {
>> + float arg;
>> + float rh;
>> + float rl;
>> + } st[] = {
>> + { 0x1.143ec4p+0f, 0x1.ddf9f6p+0f, -0x1.891d24p-52f },
>> + { 0x1.ada6aap+27f, 0x1.e80304p-3f, 0x1.419f46p-58f },
>> + { 0x1.af61dap+48f, 0x1.60d1c8p-2f, -0x1.2d6c3ap-55f },
>> + { 0x1.0088bcp+52f, 0x1.ca1edp+0f, 0x1.f6053p-53f },
>> + { 0x1.f90dfcp+72f, 0x1.597f9cp-1f, 0x1.925978p-53f },
>> + { 0x1.cc4e22p+85f, -0x1.f33584p+1f, 0x1.d7254ap-51f },
>> + { 0x1.a6ce12p+86f, -0x1.c5612ep-1f, -0x1.26c33ep-53f },
>> + { 0x1.6a0b76p+102f, -0x1.e42a1ep+0f, -0x1.1dc906p-52f },
>> + };
>> + uint32_t ax = t & (~0u >> 1);
>> + uint32_t sgn = t >> 31;
>> + for (int j = 0; j < array_length (st); j++)
>> + {
>> + if (__glibc_unlikely (asfloat (st[j].arg) == ax))
>> + {
>> + if (sgn)
>> + return -st[j].rh - st[j].rl;
>> + else
>> + return st[j].rh + st[j].rl;
>> + }
>
> Ok.
>
>> }
>> + }
>> + return r1;
>> }
>> libm_alias_float (__tan, tan)
>
> Ok.
>
>> diff --git a/sysdeps/loongarch/lp64/libm-test-ulps b/sysdeps/loongarch/lp64/libm-test-ulps
>> diff --git a/sysdeps/microblaze/libm-test-ulps b/sysdeps/microblaze/libm-test-ulps
>> diff --git a/sysdeps/mips/mips32/libm-test-ulps b/sysdeps/mips/mips32/libm-test-ulps
>> diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps
>> diff --git a/sysdeps/nios2/libm-test-ulps b/sysdeps/nios2/libm-test-ulps
>> diff --git a/sysdeps/or1k/fpu/libm-test-ulps b/sysdeps/or1k/fpu/libm-test-ulps
>> diff --git a/sysdeps/or1k/nofpu/libm-test-ulps b/sysdeps/or1k/nofpu/libm-test-ulps
>> diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
>> diff --git a/sysdeps/powerpc/nofpu/libm-test-ulps b/sysdeps/powerpc/nofpu/libm-test-ulps
>> diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps
>> diff --git a/sysdeps/riscv/rvd/libm-test-ulps b/sysdeps/riscv/rvd/libm-test-ulps
>> diff --git a/sysdeps/s390/fpu/libm-test-ulps b/sysdeps/s390/fpu/libm-test-ulps
>> diff --git a/sysdeps/sh/libm-test-ulps b/sysdeps/sh/libm-test-ulps
>> diff --git a/sysdeps/sparc/fpu/libm-test-ulps b/sysdeps/sparc/fpu/libm-test-ulps
>> diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
>
> Ok.
>
More information about the Libc-alpha
mailing list