+++ /dev/null
-/* ix87 specific implementation of exp(x)-1.
- Copyright (C) 1996-2024 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
- /* Using: e^x - 1 = 2^(x * log2(e)) - 1 */
-
-#include <sysdep.h>
-#include <machine/asm.h>
-#include <i386-math-asm.h>
-#include <libm-alias-float.h>
-
- .section .rodata
-
- .align ALIGNARG(4)
- .type minus1,@object
-minus1: .double -1.0
- ASM_SIZE_DIRECTIVE(minus1)
- .type one,@object
-one: .double 1.0
- ASM_SIZE_DIRECTIVE(one)
- .type l2e,@object
-l2e: .quad 0xb8aa3b295c17f0bc /* 1.442695040888963407359924681002 */
- .short 0x3fff
- ASM_SIZE_DIRECTIVE(l2e)
-
-DEFINE_FLT_MIN
-
-#ifdef PIC
-#define MO(op) op##@GOTOFF(%edx)
-#else
-#define MO(op) op
-#endif
-
- .text
-ENTRY(__expm1f)
- movzwl 4+2(%esp), %eax
- xorb $0x80, %ah // invert sign bit (now 1 is "positive")
- cmpl $0xc2b1, %eax // is num >= 88.5?
- jae HIDDEN_JUMPTARGET (__expf)
-
- flds 4(%esp) // x
- fxam // Is NaN, +-Inf or +-0?
- xorb $0x80, %ah
- cmpl $0xc190, %eax // is num <= -18.0?
- fstsw %ax
- movb $0x45, %ch
- jb 4f
-
- // Below -18.0 (may be -NaN or -Inf).
- andb %ah, %ch
-#ifdef PIC
- LOAD_PIC_REG (dx)
-#endif
- cmpb $0x01, %ch
- je 5f // If -NaN, jump.
- jmp 2f // -large, possibly -Inf.
-
-4: // In range -18.0 to 88.5 (may be +-0 but not NaN or +-Inf).
- andb %ah, %ch
- cmpb $0x40, %ch
- je 3f // If +-0, jump.
-#ifdef PIC
- LOAD_PIC_REG (dx)
-#endif
-
-5: fldt MO(l2e) // log2(e) : x
- fmulp // log2(e)*x
- fld %st // log2(e)*x : log2(e)*x
- // Set round-to-nearest temporarily.
- subl $8, %esp
- cfi_adjust_cfa_offset (8)
- fstcw 4(%esp)
- movl $0xf3ff, %ecx
- andl 4(%esp), %ecx
- movl %ecx, (%esp)
- fldcw (%esp)
- frndint // int(log2(e)*x) : log2(e)*x
- fldcw 4(%esp)
- addl $8, %esp
- cfi_adjust_cfa_offset (-8)
- fsubr %st, %st(1) // int(log2(e)*x) : fract(log2(e)*x)
- fxch // fract(log2(e)*x) : int(log2(e)*x)
- f2xm1 // 2^fract(log2(e)*x)-1 : int(log2(e)*x)
- fscale // 2^(log2(e)*x)-2^int(log2(e)*x) : int(log2(e)*x)
- fxch // int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
- fldl MO(one) // 1 : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
- fscale // 2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
- fsubrl MO(one) // 1-2^int(log2(e)*x) : int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
- fstp %st(1) // 1-2^int(log2(e)*x) : 2^(log2(e)*x)-2^int(log2(e)*x)
- fsubrp %st, %st(1) // 2^(log2(e)*x)
- FLT_CHECK_FORCE_UFLOW
- ret
-
-2: fstp %st
- fldl MO(minus1) // Set result to -1.0.
-3: ret
-END(__expm1f)
-libm_alias_float (__expm1, expm1)
-/* s_expm1f.c -- float version of s_expm1.c.
- */
+/* Correctly-rounded natural exponent function biased by 1 for binary32 value.
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+Copyright (c) 2022-2024 Alexei Sibidanov.
+
+This file is part of the CORE-MATH project
+project (file src/binary32/expm1/expm1f.c, revision bc385c2).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
-#include <errno.h>
-#include <float.h>
#include <math.h>
-#include <math-barriers.h>
-#include <math_private.h>
#include <math-underflow.h>
#include <libm-alias-float.h>
-
-static const float huge = 1.0e+30;
-static const float tiny = 1.0e-30;
-
-static const float
-one = 1.0,
-o_threshold = 8.8721679688e+01,/* 0x42b17180 */
-ln2_hi = 6.9313812256e-01,/* 0x3f317180 */
-ln2_lo = 9.0580006145e-06,/* 0x3717f7d1 */
-invln2 = 1.4426950216e+00,/* 0x3fb8aa3b */
- /* scaled coefficients related to expm1 */
-Q1 = -3.3333335072e-02, /* 0xbd088889 */
-Q2 = 1.5873016091e-03, /* 0x3ad00d01 */
-Q3 = -7.9365076090e-05, /* 0xb8a670cd */
-Q4 = 4.0082177293e-06, /* 0x36867e54 */
-Q5 = -2.0109921195e-07; /* 0xb457edbb */
+#include "math_config.h"
float
-__expm1f(float x)
+__expm1f (float x)
{
- float y,hi,lo,c,t,e,hxs,hfx,r1;
- int32_t k,xsb;
- uint32_t hx;
-
- GET_FLOAT_WORD(hx,x);
- xsb = hx&0x80000000; /* sign bit of x */
- if(xsb==0) y=x; else y= -x; /* y = |x| */
- hx &= 0x7fffffff; /* high word of |x| */
-
- /* filter out huge and non-finite argument */
- if(hx >= 0x4195b844) { /* if |x|>=27*ln2 */
- if(hx >= 0x42b17218) { /* if |x|>=88.721... */
- if(hx>0x7f800000)
- return x+x; /* NaN */
- if(hx==0x7f800000)
- return (xsb==0)? x:-1.0;/* exp(+-inf)={inf,-1} */
- if(x > o_threshold) {
- __set_errno (ERANGE);
- return huge*huge; /* overflow */
- }
- }
- if(xsb!=0) { /* x < -27*ln2, return -1.0 with inexact */
- math_force_eval(x+tiny);/* raise inexact */
- return tiny-one; /* return -1 */
- }
- }
-
- /* argument reduction */
- if(hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
- if(hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
- if(xsb==0)
- {hi = x - ln2_hi; lo = ln2_lo; k = 1;}
- else
- {hi = x + ln2_hi; lo = -ln2_lo; k = -1;}
- } else {
- k = invln2*x+((xsb==0)?(float)0.5:(float)-0.5);
- t = k;
- hi = x - t*ln2_hi; /* t*ln2_hi is exact here */
- lo = t*ln2_lo;
- }
- x = hi - lo;
- c = (hi-x)-lo;
- }
- else if(hx < 0x33000000) { /* when |x|<2**-25, return x */
- math_check_force_underflow (x);
- t = huge+x; /* return x with inexact flags when x!=0 */
- return x - (t-(huge+x));
+ static const double c[] =
+ {
+ 1, 0x1.62e42fef4c4e7p-6, 0x1.ebfd1b232f475p-13, 0x1.c6b19384ecd93p-20
+ };
+ static const double ch[] =
+ {
+ 0x1.62e42fefa39efp-6, 0x1.ebfbdff82c58fp-13, 0x1.c6b08d702e0edp-20,
+ 0x1.3b2ab6fb92e5ep-27, 0x1.5d886e6d54203p-35, 0x1.430976b8ce6efp-43
+ };
+ static const double td[] =
+ {
+ 0x1p+0, 0x1.059b0d3158574p+0, 0x1.0b5586cf9890fp+0,
+ 0x1.11301d0125b51p+0, 0x1.172b83c7d517bp+0, 0x1.1d4873168b9aap+0,
+ 0x1.2387a6e756238p+0, 0x1.29e9df51fdee1p+0, 0x1.306fe0a31b715p+0,
+ 0x1.371a7373aa9cbp+0, 0x1.3dea64c123422p+0, 0x1.44e086061892dp+0,
+ 0x1.4bfdad5362a27p+0, 0x1.5342b569d4f82p+0, 0x1.5ab07dd485429p+0,
+ 0x1.6247eb03a5585p+0, 0x1.6a09e667f3bcdp+0, 0x1.71f75e8ec5f74p+0,
+ 0x1.7a11473eb0187p+0, 0x1.82589994cce13p+0, 0x1.8ace5422aa0dbp+0,
+ 0x1.93737b0cdc5e5p+0, 0x1.9c49182a3f09p+0, 0x1.a5503b23e255dp+0,
+ 0x1.ae89f995ad3adp+0, 0x1.b7f76f2fb5e47p+0, 0x1.c199bdd85529cp+0,
+ 0x1.cb720dcef9069p+0, 0x1.d5818dcfba487p+0, 0x1.dfc97337b9b5fp+0,
+ 0x1.ea4afa2a490dap+0, 0x1.f50765b6e454p+0
+ };
+ const double iln2 = 0x1.71547652b82fep+5;
+ const double big = 0x1.8p52;
+ double z = x;
+ uint32_t ux = asuint (x);
+ uint32_t ax = ux << 1;
+ if (__glibc_likely (ax < 0x7c400000u))
+ { /* |x| < 0.15625 */
+ if (__glibc_unlikely (ax < 0x676a09e8u))
+ { /* |x| < 0x1.6a09e8p-24 */
+ if (__glibc_unlikely (ax == 0x0u))
+ return x; /* x = +-0 */
+ return fmaf (fabsf (x), 0x1p-25f, x);
}
- else k = 0;
-
- /* x is now in primary range */
- hfx = (float)0.5*x;
- hxs = x*hfx;
- r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))));
- t = (float)3.0-r1*hfx;
- e = hxs*((r1-t)/((float)6.0 - x*t));
- if(k==0) return x - (x*e-hxs); /* c is 0 */
- else {
- e = (x*(e-c)-c);
- e -= hxs;
- if(k== -1) return (float)0.5*(x-e)-(float)0.5;
- if(k==1) {
- if(x < (float)-0.25) return -(float)2.0*(e-(x+(float)0.5));
- else return one+(float)2.0*(x-e);
- }
- if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */
- int32_t i;
- y = one-(e-x);
- GET_FLOAT_WORD(i,y);
- SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */
- return y-one;
- }
- t = one;
- if(k<23) {
- int32_t i;
- SET_FLOAT_WORD(t,0x3f800000 - (0x1000000>>k)); /* t=1-2^-k */
- y = t-(e-x);
- GET_FLOAT_WORD(i,y);
- SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */
- } else {
- int32_t i;
- SET_FLOAT_WORD(t,((0x7f-k)<<23)); /* 2^-k */
- y = x-(e+t);
- y += one;
- GET_FLOAT_WORD(i,y);
- SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */
- }
+ static const double b[] =
+ {
+ 0x1.fffffffffffc2p-2, 0x1.55555555555fep-3, 0x1.555555559767fp-5,
+ 0x1.1111111098dc1p-7, 0x1.6c16bca988aa9p-10, 0x1.a01a07658483fp-13,
+ 0x1.a05b04d2c3503p-16, 0x1.71de3a960b5e3p-19
+ };
+ double z2 = z * z, z4 = z2 * z2;
+ double r = z + z2
+ * ((b[0] + z * b[1]) + z2 * (b[2] + z * b[3])
+ + z4 * ((b[4] + z * b[5]) + z2 * (b[6] + z * b[7])));
+ return r;
+ }
+ if (__glibc_unlikely (ax >= 0x8562e430u))
+ { /* |x| > 88.72 */
+ if (ax > (0xffu << 24))
+ return x + x; /* nan */
+ if (__glibc_unlikely (ux >> 31))
+ { /* x < 0 */
+ if (ax == (0xffu << 24))
+ return -1.0f;
+ return -1.0f + 0x1p-26f;
}
- return y;
+ if (ax == (0xffu << 24))
+ return INFINITY;
+ return __math_oflowf (0);
+ }
+ double a = iln2 * z;
+ double ia = roundeven (a);
+ double h = a - ia;
+ double h2 = h * h;
+ uint64_t u = asuint64 (ia + big);
+ double c2 = c[2] + h * c[3], c0 = c[0] + h * c[1];
+ const uint64_t *tdl = (uint64_t *) ((void *) td);
+ double sv = asdouble (tdl[u & 0x1f] + ((u >> 5) << 52));
+ double r = (c0 + h2 * c2) * sv - 1.0;
+ float ub = r, lb = r - sv * 0x1.3b3p-33;
+ if (__glibc_unlikely (ub != lb))
+ {
+ if (__glibc_unlikely (ux > 0xc18aa123u)) /* x < -17.32 */
+ return -1.0f + 0x1p-26f;
+ const double iln2h = 0x1.7154765p+5;
+ const double iln2l = 0x1.5c17f0bbbe88p-26;
+ double s = sv;
+ h = (iln2h * z - ia) + iln2l * z;
+ h2 = h * h;
+ double w = s * h;
+ r = (s - 1) + w
+ * ((ch[0] + h * ch[1])
+ + h2 * ((ch[2] + h * ch[3]) + h2 * (ch[4] + h * ch[5])));
+ ub = r;
+ }
+ return ub;
}
libm_alias_float (__expm1, expm1)