This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[PATCH RFC] allow inline intrinsics for __ieee754_sqrt/f

From: "Jon Beniston" <jon at beniston dot com>
To: <newlib at sourceware dot org>
Date: Fri, 22 Jun 2018 11:49:03 +0100
Subject: [PATCH RFC] allow inline intrinsics for __ieee754_sqrt/f

Hi,

Most functions in libm call __ieee754_sqrt when needing to perform a square
root. For most targets, this results in the s/w implementation in
math/e_sqrt.c being using, even if the target has a h/w sqrt instruction.
There are some targets that have machine specific implementations in
machine/*/, but even if a single instruction, that code typically doesn't
get inlined.

The following patch is one possible way to allow a sqrt instruction to be
used and for the calls to be inlined. I've just done this for x86/arm for
now. I've put this in include/machine/ieeefp.h, rather than fdlibm.h, as
that's where most of the other target specific code seems to be.

Not sure if using the __IEEE754_INLINE_SQRT* macros is the best way to
prevent redefinition errors. Perhaps someone has a better idea?

Cheers,
Jon

diff --git a/newlib/libc/include/machine/ieeefp.h
b/newlib/libc/include/machine/ieeefp.h
index 2fb2268ce..e917d74b0 100644
--- a/newlib/libc/include/machine/ieeefp.h
+++ b/newlib/libc/include/machine/ieeefp.h
@@ -87,6 +87,39 @@
 #  define __IEEE_BYTES_LITTLE_ENDIAN
 # endif
 #endif
+
+#if (__ARM_FP & 0x8) && !defined(__SOFTFP__)
+#define __IEEE754_INLINE_SQRT
+static inline double
+__ieee754_sqrt(double x)
+{
+  double result;
+#if __ARM_ARCH >= 6
+  __asm__ ("vsqrt.f64 %P0, %P1" : "=w" (result) : "w" (x));
+#else
+  /* VFP9 Erratum 760019, see GCC sources "gcc/config/arm/vfp.md" */
+  __asm__ ("vsqrt.f64 %P0, %P1" : "=&w" (result) : "w" (x));
+#endif
+  return result;
+}
+#endif
+
+#if (__ARM_FP & 0x4) && !defined(__SOFTFP__)
+#define __IEEE754_INLINE_SQRTF
+static inline float
+__ieee754_sqrtf(float x)
+{
+  float result;
+#if __ARM_ARCH >= 6
+  __asm__ ("vsqrt.f32 %0, %1" : "=w" (result) : "w" (x));
+#else
+  /* VFP9 Erratum 760019, see GCC sources "gcc/config/arm/vfp.md" */
+  __asm__ ("vsqrt.f32 %0, %1" : "=&w" (result) : "w" (x));
+#endif
+  return result;
+}
+#endif
+
 #endif
 
 #if defined (__aarch64__)
@@ -189,6 +222,25 @@
 
 #ifdef __i386__
 #define __IEEE_LITTLE_ENDIAN
+
+#define __IEEE754_INLINE_SQRT
+static inline double
+__ieee754_sqrt (double x)
+{
+  double result;
+  __asm__ ("fsqrt" : "=t" (result) : "0" (x));
+  return result;
+}
+
+#define __IEEE754_INLINE_SQRTF
+static inline float
+__ieee754_sqrtf (float x)
+{
+  float result;
+  __asm__ ("fsqrt" : "=t" (result) : "0" (x));
+  return result;
+}
+
 #endif
 
 #ifdef __riscv
diff --git a/newlib/libm/common/fdlibm.h b/newlib/libm/common/fdlibm.h
index 4523e8b2a..7eccce2b6 100644
--- a/newlib/libm/common/fdlibm.h
+++ b/newlib/libm/common/fdlibm.h
@@ -149,7 +149,9 @@ extern double significand __P((double));
 extern long double __ieee754_hypotl __P((long double, long double));
 
 /* ieee style elementary functions */
+#ifndef __IEEE754_INLINE_SQRT
 extern double __ieee754_sqrt __P((double));			
+#endif
 extern double __ieee754_acos __P((double));			
 extern double __ieee754_acosh __P((double));			
 extern double __ieee754_log __P((double));			
@@ -195,7 +197,9 @@ extern float scalbf __P((float, float));
 extern float significandf __P((float));
 
 /* ieee style elementary float functions */
+#ifndef __IEEE754_INLINE_SQRTF
 extern float __ieee754_sqrtf __P((float));			
+#endif
 extern float __ieee754_acosf __P((float));			
 extern float __ieee754_acoshf __P((float));			
 extern float __ieee754_logf __P((float));			
diff --git a/newlib/libm/machine/arm/e_sqrt.c
b/newlib/libm/machine/arm/e_sqrt.c
index 6f3eb8301..8d50ae234 100644
--- a/newlib/libm/machine/arm/e_sqrt.c
+++ b/newlib/libm/machine/arm/e_sqrt.c
@@ -24,7 +24,7 @@
  * SUCH DAMAGE.
  */
 
-#if (__ARM_FP & 0x8) && !defined(__SOFTFP__)
+#if (__ARM_FP & 0x8) && !defined(__SOFTFP__) &&
!defined(__IEEE754_INLINE_SQRT)
 #include <math.h>
 
 double
diff --git a/newlib/libm/machine/arm/ef_sqrt.c
b/newlib/libm/machine/arm/ef_sqrt.c
index 3a1ba6cb4..3d8fd1191 100644
--- a/newlib/libm/machine/arm/ef_sqrt.c
+++ b/newlib/libm/machine/arm/ef_sqrt.c
@@ -24,7 +24,7 @@
  * SUCH DAMAGE.
  */
 
-#if (__ARM_FP & 0x4) && !defined(__SOFTFP__)
+#if (__ARM_FP & 0x4) && !defined(__SOFTFP__) &&
!defined(__IEEE754_INLINE_SQRTF)
 #include <math.h>
 
 float
diff --git a/newlib/libm/math/e_sqrt.c b/newlib/libm/math/e_sqrt.c
index 78fc52417..313ae972c 100644
--- a/newlib/libm/math/e_sqrt.c
+++ b/newlib/libm/math/e_sqrt.c
@@ -83,6 +83,8 @@
 
 #include "fdlibm.h"
 
+#ifndef __IEEE754_INLINE_SQRTF
+
 #ifndef _DOUBLE_IS_32BITS
 
 #ifdef __STDC__
@@ -194,6 +196,8 @@ static	double	one	= 1.0, tiny=1.0e-300;
  
 #endif /* defined(_DOUBLE_IS_32BITS) */
 
+#endif /* __IEEE754_INLINE_SQRTF */
+
 /*
 Other methods  (use floating-point arithmetic)
 -------------
diff --git a/newlib/libm/math/ef_sqrt.c b/newlib/libm/math/ef_sqrt.c
index 80e7f360e..9940bad32 100644
--- a/newlib/libm/math/ef_sqrt.c
+++ b/newlib/libm/math/ef_sqrt.c
@@ -15,6 +15,8 @@
 
 #include "fdlibm.h"
 
+#ifndef __IEEE754_INLINE_SQRT
+
 #ifdef __STDC__
 static	const float	one	= 1.0, tiny=1.0e-30;
 #else
@@ -87,3 +89,5 @@ static	float	one	= 1.0, tiny=1.0e-30;
 	SET_FLOAT_WORD(z,ix);
 	return z;
 }
+
+#endif /* __IEEE754_INLINE_SQRT */

Follow-Ups:
- Re: [PATCH RFC] allow inline intrinsics for __ieee754_sqrt/f
  - From: Corinna Vinschen

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]