double and single precision arithmetics has similar latency and it
has no legacy SVID matherr support, only POSIX errno and fenv
exception based error handling.
-
- __HAVE_FAST_FMA_DEFAULT
-
- Default value for __HAVE_FAST_FMA if that's not set by the user.
- It should be set here based on predefined feature macros.
-
- __HAVE_FAST_FMA
-
- It should be set to 1 if the compiler can inline an fma call as a
- single instruction. Some math code has a separate faster code
- path assuming the target has single instruction fma.
*/
#if (defined(__arm__) || defined(__thumb__)) && !defined(__MAVERICK__)
# endif
# if __ARM_FP & 0x8
# define __OBSOLETE_MATH_DEFAULT 0
-# if __ARM_FEATURE_FMA
-# define __HAVE_FAST_FMA_DEFAULT 1
-# endif
# endif
#else
# define __IEEE_BIG_ENDIAN
#define __IEEE_BIG_ENDIAN
#endif
#define __OBSOLETE_MATH_DEFAULT 0
-#define __HAVE_FAST_FMA_DEFAULT 1
#endif
#ifdef __epiphany__
#define __OBSOLETE_MATH __OBSOLETE_MATH_DEFAULT
#endif
-#ifndef __HAVE_FAST_FMA_DEFAULT
-/* Assume slow fma by default. */
-#define __HAVE_FAST_FMA_DEFAULT 0
-#endif
-#ifndef __HAVE_FAST_FMA
-#define __HAVE_FAST_FMA __HAVE_FAST_FMA_DEFAULT
-#endif
-
#ifndef __IEEE_BIG_ENDIAN
#ifndef __IEEE_LITTLE_ENDIAN
#error Endianess not declared!!
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
#else
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
hi = r * InvLn2hi;
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
#else
/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
t1 = r * InvLn2hi;
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
#endif
},
-#if !__HAVE_FAST_FMA
+#if !HAVE_FAST_FMA
.tab2 = {
# if N == 64
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
# endif
},
-#endif /* !__HAVE_FAST_FMA */
+#endif /* !HAVE_FAST_FMA */
};
#endif /* __OBSOLETE_MATH */
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
#endif
},
-#if !__HAVE_FAST_FMA
+#if !HAVE_FAST_FMA
.tab2 = {
# if N == 64
{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
#endif
},
-#endif /* !__HAVE_FAST_FMA */
+#endif /* !HAVE_FAST_FMA */
};
#endif /* __OBSOLETE_MATH */
# endif
#endif
+/* Compiler can inline fma as a single instruction. */
+#ifndef HAVE_FAST_FMA
+# if __aarch64__ || __ARM_FEATURE_FMA
+# define HAVE_FAST_FMA 1
+# else
+# define HAVE_FAST_FMA 0
+# endif
+#endif
+
#if HAVE_FAST_ROUND
# define TOINT_INTRINSICS 1
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
double poly1[LOG_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG_TABLE_BITS];
-#if !__HAVE_FAST_FMA
+#if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG_TABLE_BITS];
#endif
} __log_data HIDDEN;
double poly[LOG2_POLY_ORDER - 1];
double poly1[LOG2_POLY1_ORDER - 1];
struct {double invc, logc;} tab[1 << LOG2_TABLE_BITS];
-#if !__HAVE_FAST_FMA
+#if !HAVE_FAST_FMA
struct {double chi, clo;} tab2[1 << LOG2_TABLE_BITS];
#endif
} __log2_data HIDDEN;
logctail = T[i].logctail;
/* r = z/c - 1, arranged to be exact. */
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
r = fma (z, invc, -1.0);
#else
double_t zhi = asdouble (iz & (-1ULL << 32));
ar2 = r * ar;
ar3 = r * ar2;
/* k*Ln2 + log(c) + r + A[0]*r*r. */
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
hi = t2 + ar2;
lo3 = fma (ar, r, -ar2);
lo4 = t2 - hi + ar2;
double_t lo;
double_t hi = log_inline (ix, &lo);
double_t ehi, elo;
-#if __HAVE_FAST_FMA
+#if HAVE_FAST_FMA
ehi = y * hi;
elo = y * lo + fma (y, hi, -ehi);
#else