This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH] Cleanup __ieee754_sqrt(f)

From: Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>
To: "libc-alpha at sourceware dot org" <libc-alpha at sourceware dot org>
Cc: nd <nd at arm dot com>
Date: Fri, 15 Sep 2017 17:15:52 +0000
Subject: [PATCH] Cleanup __ieee754_sqrt(f)
Authentication-results: sourceware.org; auth=none
Authentication-results: spf=none (sender IP is ) smtp.mailfrom=Wilco dot Dijkstra at arm dot com;
Nodisclaimer: True
Spamdiagnosticmetadata: NSPM
Spamdiagnosticoutput: 1:99
This patch cleans up the many uses of  __ieee754_sqrt(f) in GLIBC.
We can compile all of the math functions with -fno-math-errno which 
means GCC will inline sqrt as a single instruction.  This means targets
are no longer forced to add a special inline, and the resulting code is
better without asm() constructs.

I verified there is no diff in the math libs except if there are new opportunities
for optimization (for example fewer callee-saves are needed in some
functions and sqrt was CSEd in a few cases).

I have left __ieee754_sqrtl alone for now, however this could also
be removed in principle. Also given pretty much every target has
a hardware sqrt instruction that GCC will inline, it would be feasible
to change all the target sqrt implementations into a single generic one
that does something like:

#if USE_SQRT_BUILTIN
  return __builtin_sqrt (x)
#else
  // generic implementation
#endif

ChangeLog:

	* Makeconfig (math-flags): Add -fno-math-errno.
	* math/w_sqrt_compat.c (__sqrt): Call __builtin_sqrt.
	* math/w_sqrtf_compat.c (__sqrtf): Call __builtin_sqrtf.
	* sysdeps/aarch64/fpu/e_sqrt.c (__ieee754_sqrt): Call __builtin_sqrt.
	* sysdeps/aarch64/fpu/e_sqrtf.c (__ieee754_sqrtf): Call __builtin_sqrtf.
	* sysdeps/aarch64/fpu/math_private.h (__ieee754_sqrt): Remove.
	(__ieee754_sqrtf): Remove.
	* sysdeps/alpha/fpu/math_private.h (__ieee754_sqrt): Remove.
	(__ieee754_sqrtf): Remove.
	* sysdeps/generic/math-type-macros.h (M_SQRT): Use __builtin_sqrt(f). 	
	* sysdeps/ieee754/dbl-64/e_acosh.c (__ieee754_acosh): Use sqrt.
	* sysdeps/ieee754/dbl-64/e_gamma_r.c (gamma_positive): Likewise.
	* sysdeps/ieee754/dbl-64/e_hypot.c (__ieee754_hypot): Likewise.
	* sysdeps/ieee754/dbl-64/e_j0.c (__ieee754_j0): Likewise.
	* sysdeps/ieee754/dbl-64/e_j1.c (__ieee754_j1): Likewise.
	* sysdeps/ieee754/dbl-64/e_jn.c (__ieee754_jn): Likewise.
	* sysdeps/ieee754/dbl-64/halfulp.c (__halfulp): Likewise.
	* sysdeps/ieee754/dbl-64/s_asinh.c (__asinh): Likewise.
	* sysdeps/ieee754/dbl-64/wordsize-64/e_acosh.c (__ieee754_acosh): Likewise.
	* sysdeps/ieee754/flt-32/e_acosf.c (__ieee754_acosf): Likewise.
	* sysdeps/ieee754/flt-32/e_acoshf.c (__ieee754_acoshf): Likewise.
	* sysdeps/ieee754/flt-32/e_asinf.c (__ieee754_asinf): Likewise.
	* sysdeps/ieee754/flt-32/e_gammaf_r.c (gammaf_positive): Likewise.
	* sysdeps/ieee754/flt-32/e_hypotf.c (__ieee754_hypotf): Likewise.
	* sysdeps/ieee754/flt-32/e_j0f.c (__ieee754_j0f): Likewise.
	* sysdeps/ieee754/flt-32/e_j1f.c (__ieee754_j1f): Likewise.
	* sysdeps/ieee754/flt-32/s_asinhf.c (__asinhf): Likewise.
	* sysdeps/powerpc/fpu/e_hypot.c (__ieee754_hypot): Likewise.
	* sysdeps/powerpc/fpu/e_hypotf.c (__ieee754_hypotf): Likewise.
	* sysdeps/powerpc/fpu/math_private.h (__ieee754_sqrt): Remove.
	(__ieee754_sqrtf): Remove.
	* sysdeps/s390/fpu/bits/mathinline.h (__ieee754_sqrt): Remove.
	(__ieee754_sqrtf): Remove.
	* sysdeps/sparc/fpu/bits/mathinline.h (__ieee754_sqrt): Remove.
	(__ieee754_sqrtf): Remove.	
	* sysdeps/x86_64/fpu/math_private.h (__ieee754_sqrt): Remove.
	(__ieee754_sqrtf): Remove.

--

diff --git a/Makeconfig b/Makeconfig
index b51904b7973e508e49ab87a8678c8fd3f9cba9a5..1fb93ae1e3fcec7c17ea99cce6a17bc15b11eafb 100644
--- a/Makeconfig
+++ b/Makeconfig
@@ -812,7 +812,7 @@ endif
 # We have to assume that glibc functions are called in any rounding
 # mode and also change the rounding mode in a few functions. So,
 # disable any optimization that assume default rounding mode.
-+math-flags = -frounding-math
++math-flags = -frounding-math -fno-math-errno
 
 # We might want to compile with some stack-protection flag.
 ifneq ($(stack-protector),)
diff --git a/math/w_sqrt_compat.c b/math/w_sqrt_compat.c
index aeead2e49c2bad906017412e2a4538c8e8f51f34..f131e6ba0381fa7a0a252b431c4e0bcaf1f8f762 100644
--- a/math/w_sqrt_compat.c
+++ b/math/w_sqrt_compat.c
@@ -29,7 +29,7 @@ __sqrt (double x)
   if (__builtin_expect (isless (x, 0.0), 0) && _LIB_VERSION != _IEEE_)
     return __kernel_standard (x, x, 26); /* sqrt(negative) */
 
-  return __ieee754_sqrt (x);
+  return __builtin_sqrt (x);
 }
 weak_alias (__sqrt, sqrt)
 # ifdef NO_LONG_DOUBLE
diff --git a/math/w_sqrtf_compat.c b/math/w_sqrtf_compat.c
index bd3d5048fc45a6b1a1189b0f2ef9fd5bc84e2776..b88a53877716bfa44c17c6dd0caeee0eb50bda6b 100644
--- a/math/w_sqrtf_compat.c
+++ b/math/w_sqrtf_compat.c
@@ -29,7 +29,7 @@ __sqrtf (float x)
   if (__builtin_expect (isless (x, 0.0f), 0) && _LIB_VERSION != _IEEE_)
     return __kernel_standard_f (x, x, 126); /* sqrt(negative) */
 
-  return __ieee754_sqrtf (x);
+  return __builtin_sqrtf (x);
 }
 weak_alias (__sqrtf, sqrtf)
 #endif
diff --git a/sysdeps/aarch64/fpu/e_sqrt.c b/sysdeps/aarch64/fpu/e_sqrt.c
index f984d877b64eea72e23b8c099c41923bf60911f7..b80ac272611ebf8d4959f366ea7e5e8207ddbe22 100644
--- a/sysdeps/aarch64/fpu/e_sqrt.c
+++ b/sysdeps/aarch64/fpu/e_sqrt.c
@@ -21,8 +21,6 @@
 double
 __ieee754_sqrt (double d)
 {
-  double res;
-  asm ("fsqrt   %d0, %d1" : "=w" (res) : "w" (d));
-  return res;
+  return __builtin_sqrt (d);
 }
 strong_alias (__ieee754_sqrt, __sqrt_finite)
diff --git a/sysdeps/aarch64/fpu/e_sqrtf.c b/sysdeps/aarch64/fpu/e_sqrtf.c
index 67707ef83374fdb2b330d0c9b0cfb8770036a98b..73804542c812f28de96ce68a6fe64f44284856eb 100644
--- a/sysdeps/aarch64/fpu/e_sqrtf.c
+++ b/sysdeps/aarch64/fpu/e_sqrtf.c
@@ -21,8 +21,6 @@
 float
 __ieee754_sqrtf (float s)
 {
-  float res;
-  asm ("fsqrt   %s0, %s1" : "=w" (res) : "w" (s));
-  return res;
+  return __builtin_sqrtf (s);
 }
 strong_alias (__ieee754_sqrtf, __sqrtf_finite)
diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h
index 775237ffb2040772f97fdef2a6fbe9041f27ad6e..1282418fffdc89bf15f7eb5b9051f3f9fc76f5cd 100644
--- a/sysdeps/aarch64/fpu/math_private.h
+++ b/sysdeps/aarch64/fpu/math_private.h
@@ -27,22 +27,6 @@
 #define math_force_eval(x) \
 ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "w" (__x)); })
 
-extern __always_inline double
-__ieee754_sqrt (double d)
-{
-  double res;
-  asm __volatile__ ("fsqrt   %d0, %d1" : "=w" (res) : "w" (d));
-  return res;
-}
-
-extern __always_inline float
-__ieee754_sqrtf (float s)
-{
-  float res;
-  asm __volatile__ ("fsqrt   %s0, %s1" : "=w" (res) : "w" (s));
-  return res;
-}
-
 static __always_inline void
 libc_feholdexcept_aarch64 (fenv_t *envp)
 {
diff --git a/sysdeps/alpha/fpu/math_private.h b/sysdeps/alpha/fpu/math_private.h
index 1e97c867c38767a2237df21f84e9c55a53ad13bb..95dc32c969c1133add0d5a1c42e1f1bd0c9d0e2d 100644
--- a/sysdeps/alpha/fpu/math_private.h
+++ b/sysdeps/alpha/fpu/math_private.h
@@ -21,30 +21,4 @@
 
 #include_next <math_private.h>
 
-#ifdef __alpha_fix__
-extern __always_inline double
-__ieee754_sqrt (double d)
-{
-  double ret;
-# ifdef _IEEE_FP_INEXACT
-  asm ("sqrtt/suid %1,%0" : "=&f"(ret) : "f"(d));
-# else
-  asm ("sqrtt/sud %1,%0" : "=&f"(ret) : "f"(d));
-# endif
-  return ret;
-}
-
-extern __always_inline float
-__ieee754_sqrtf (float d)
-{
-  float ret;
-# ifdef _IEEE_FP_INEXACT
-  asm ("sqrts/suid %1,%0" : "=&f"(ret) : "f"(d));
-# else
-  asm ("sqrts/sud %1,%0" : "=&f"(ret) : "f"(d));
-# endif
-  return ret;
-}
-#endif /* FIX */
-
 #endif /* ALPHA_MATH_PRIVATE_H */
diff --git a/sysdeps/generic/math-type-macros.h b/sysdeps/generic/math-type-macros.h
index 6aaa2335b9b165ba5ec9384e730f10382e3b969c..da86421ddd26c2c91af2814afe4d8c5d6c3b8c28 100644
--- a/sysdeps/generic/math-type-macros.h
+++ b/sysdeps/generic/math-type-macros.h
@@ -84,7 +84,8 @@
 #define M_HYPOT M_SUF (__ieee754_hypot)
 #define M_LOG M_SUF (__ieee754_log)
 #define M_SINH M_SUF (__ieee754_sinh)
-#define M_SQRT M_SUF (__ieee754_sqrt)
+#define M_SQRT M_SUF (__builtin_sqrt)
+#define __builtin_sqrtl __ieee754_sqrtl
 
 /* Needed to evaluate M_MANT_DIG below.  */
 #include <float.h>
diff --git a/sysdeps/ieee754/dbl-64/e_acosh.c b/sysdeps/ieee754/dbl-64/e_acosh.c
index 51916f221aa3a4babecef05f93d7bc158e9f78cd..fe0c375f00428db93abd5db02be3b3be6940bda5 100644
--- a/sysdeps/ieee754/dbl-64/e_acosh.c
+++ b/sysdeps/ieee754/dbl-64/e_acosh.c
@@ -58,12 +58,12 @@ __ieee754_acosh (double x)
   else if (hx > 0x40000000)             /* 2**28 > x > 2 */
     {
       t = x * x;
-      return __ieee754_log (2.0 * x - one / (x + __ieee754_sqrt (t - one)));
+      return __ieee754_log (2.0 * x - one / (x + sqrt (t - one)));
     }
   else                                  /* 1<x<2 */
     {
       t = x - one;
-      return __log1p (t + __ieee754_sqrt (2.0 * t + t * t));
+      return __log1p (t + sqrt (2.0 * t + t * t));
     }
 }
 strong_alias (__ieee754_acosh, __acosh_finite)
diff --git a/sysdeps/ieee754/dbl-64/e_gamma_r.c b/sysdeps/ieee754/dbl-64/e_gamma_r.c
index e6988a678adc79066f0c0e1e1677c1455ce40d8d..3cda2e1960371a8c3b034a9f836d0f136d18382d 100644
--- a/sysdeps/ieee754/dbl-64/e_gamma_r.c
+++ b/sysdeps/ieee754/dbl-64/e_gamma_r.c
@@ -98,7 +98,7 @@ gamma_positive (double x, int *exp2_adj)
       double ret = (__ieee754_pow (x_adj_mant, x_adj)
 		    * __ieee754_exp2 (x_adj_log2 * x_adj_frac)
 		    * __ieee754_exp (-x_adj)
-		    * __ieee754_sqrt (2 * M_PI / x_adj)
+		    * sqrt (2 * M_PI / x_adj)
 		    / prod);
       exp_adj += x_eps * __ieee754_log (x_adj);
       double bsum = gamma_coeff[NCOEFF - 1];
diff --git a/sysdeps/ieee754/dbl-64/e_hypot.c b/sysdeps/ieee754/dbl-64/e_hypot.c
index 698f004f62a3656209c333f81255d209b03aef92..a89075169b4f16522d11aa7d900f02368add447b 100644
--- a/sysdeps/ieee754/dbl-64/e_hypot.c
+++ b/sysdeps/ieee754/dbl-64/e_hypot.c
@@ -132,7 +132,7 @@ __ieee754_hypot (double x, double y)
       t1 = 0;
       SET_HIGH_WORD (t1, ha);
       t2 = a - t1;
-      w = __ieee754_sqrt (t1 * t1 - (b * (-b) - t2 * (a + t1)));
+      w = sqrt (t1 * t1 - (b * (-b) - t2 * (a + t1)));
     }
   else
     {
@@ -143,7 +143,7 @@ __ieee754_hypot (double x, double y)
       t1 = 0;
       SET_HIGH_WORD (t1, ha + 0x00100000);
       t2 = a - t1;
-      w = __ieee754_sqrt (t1 * y1 - (w * (-w) - (t1 * y2 + t2 * b)));
+      w = sqrt (t1 * y1 - (w * (-w) - (t1 * y2 + t2 * b)));
     }
   if (k != 0)
     {
diff --git a/sysdeps/ieee754/dbl-64/e_j0.c b/sysdeps/ieee754/dbl-64/e_j0.c
index 4b440cf0d045b94778ba889b9309c46d2c5e7a4a..3794fd48fc612d3d4dc2021fbdf2be71bac961e7 100644
--- a/sysdeps/ieee754/dbl-64/e_j0.c
+++ b/sysdeps/ieee754/dbl-64/e_j0.c
@@ -109,11 +109,11 @@ __ieee754_j0 (double x)
        * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
        */
       if (ix > 0x48000000)
-	z = (invsqrtpi * cc) / __ieee754_sqrt (x);
+	z = (invsqrtpi * cc) / sqrt (x);
       else
 	{
 	  u = pzero (x); v = qzero (x);
-	  z = invsqrtpi * (u * cc - v * ss) / __ieee754_sqrt (x);
+	  z = invsqrtpi * (u * cc - v * ss) / sqrt (x);
 	}
       return z;
     }
@@ -200,11 +200,11 @@ __ieee754_y0 (double x)
 	    ss = z / cc;
 	}
       if (ix > 0x48000000)
-	z = (invsqrtpi * ss) / __ieee754_sqrt (x);
+	z = (invsqrtpi * ss) / sqrt (x);
       else
 	{
 	  u = pzero (x); v = qzero (x);
-	  z = invsqrtpi * (u * ss + v * cc) / __ieee754_sqrt (x);
+	  z = invsqrtpi * (u * ss + v * cc) / sqrt (x);
 	}
       return z;
     }
diff --git a/sysdeps/ieee754/dbl-64/e_j1.c b/sysdeps/ieee754/dbl-64/e_j1.c
index eb446fd102d91f9ab98f9ed979569e7ede7e876e..b528998b32cde8eacb86c2c79ef5150579450f85 100644
--- a/sysdeps/ieee754/dbl-64/e_j1.c
+++ b/sysdeps/ieee754/dbl-64/e_j1.c
@@ -112,11 +112,11 @@ __ieee754_j1 (double x)
        * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
        */
       if (ix > 0x48000000)
-	z = (invsqrtpi * cc) / __ieee754_sqrt (y);
+	z = (invsqrtpi * cc) / sqrt (y);
       else
 	{
 	  u = pone (y); v = qone (y);
-	  z = invsqrtpi * (u * cc - v * ss) / __ieee754_sqrt (y);
+	  z = invsqrtpi * (u * cc - v * ss) / sqrt (y);
 	}
       if (hx < 0)
 	return -z;
@@ -203,11 +203,11 @@ __ieee754_y1 (double x)
        * to compute the worse one.
        */
       if (ix > 0x48000000)
-	z = (invsqrtpi * ss) / __ieee754_sqrt (x);
+	z = (invsqrtpi * ss) / sqrt (x);
       else
 	{
 	  u = pone (x); v = qone (x);
-	  z = invsqrtpi * (u * ss + v * cc) / __ieee754_sqrt (x);
+	  z = invsqrtpi * (u * ss + v * cc) / sqrt (x);
 	}
       return z;
     }
diff --git a/sysdeps/ieee754/dbl-64/e_jn.c b/sysdeps/ieee754/dbl-64/e_jn.c
index 3ac91df2b0795855c1f7f38d8812fdd6fb00c561..d829ff7c102812cd7cb78772ff947c7504bb261c 100644
--- a/sysdeps/ieee754/dbl-64/e_jn.c
+++ b/sysdeps/ieee754/dbl-64/e_jn.c
@@ -107,7 +107,7 @@ __ieee754_jn (int n, double x)
 	      case 2: temp = -c - s; break;
 	      case 3: temp = c - s; break;
 	      }
-	    b = invsqrtpi * temp / __ieee754_sqrt (x);
+	    b = invsqrtpi * temp / sqrt (x);
 	  }
 	else
 	  {
@@ -314,7 +314,7 @@ __ieee754_yn (int n, double x)
 	  case 2: temp = -s + c; break;
 	  case 3: temp = s + c; break;
 	  }
-	b = invsqrtpi * temp / __ieee754_sqrt (x);
+	b = invsqrtpi * temp / sqrt (x);
       }
     else
       {
diff --git a/sysdeps/ieee754/dbl-64/halfulp.c b/sysdeps/ieee754/dbl-64/halfulp.c
index d5f8a010e280bbaf383f80a2b50f04184f58c08f..64ba3474ae5ca42e09fbd71e156b0a03bfdd7a2e 100644
--- a/sysdeps/ieee754/dbl-64/halfulp.c
+++ b/sysdeps/ieee754/dbl-64/halfulp.c
@@ -113,7 +113,7 @@ __halfulp (double x, double y)
   /*   now treat x        */
   while (k > 0)
     {
-      z = __ieee754_sqrt (x);
+      z = sqrt (x);
       EMULV (z, z, u, uu, j1, j2, j3, j4, j5);
       if (((u - x) + uu) != 0)
 	break;
diff --git a/sysdeps/ieee754/dbl-64/s_asinh.c b/sysdeps/ieee754/dbl-64/s_asinh.c
index 9193301b5ebbcdd283ab814e9ff930f2a3d35f2f..593f588bee7717a14fab725a70ef311894d7cc92 100644
--- a/sysdeps/ieee754/dbl-64/s_asinh.c
+++ b/sysdeps/ieee754/dbl-64/s_asinh.c
@@ -54,13 +54,13 @@ __asinh (double x)
       double xa = fabs (x);
       if (ix > 0x40000000)              /* 2**28 > |x| > 2.0 */
 	{
-	  w = __ieee754_log (2.0 * xa + one / (__ieee754_sqrt (xa * xa + one) +
+	  w = __ieee754_log (2.0 * xa + one / (sqrt (xa * xa + one) +
               xa));
 	}
       else                      /* 2.0 > |x| > 2**-28 */
 	{
 	  double t = xa * xa;
-	  w = __log1p (xa + t / (one + __ieee754_sqrt (one + t)));
+	  w = __log1p (xa + t / (one + sqrt (one + t)));
 	}
     }
   return __copysign (w, x);
diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/e_acosh.c b/sysdeps/ieee754/dbl-64/wordsize-64/e_acosh.c
index ccccdaf106d0bf5dc90c32fa5c5f873e3893bf42..0af05a02221f8a86cc91265b1edda70b59b7bb30 100644
--- a/sysdeps/ieee754/dbl-64/wordsize-64/e_acosh.c
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/e_acosh.c
@@ -51,13 +51,13 @@ __ieee754_acosh (double x)
 
       /* 2**28 > x > 2 */
       double t = x * x;
-      return __ieee754_log (2.0 * x - one / (x + __ieee754_sqrt (t - one)));
+      return __ieee754_log (2.0 * x - one / (x + sqrt (t - one)));
     }
   else if (__glibc_likely (hx > INT64_C (0x3ff0000000000000)))
     {
       /* 1<x<2 */
       double t = x - one;
-      return __log1p (t + __ieee754_sqrt (2.0 * t + t * t));
+      return __log1p (t + sqrt (2.0 * t + t * t));
     }
   else if (__glibc_likely (hx == INT64_C (0x3ff0000000000000)))
     return 0.0;				/* acosh(1) = 0 */
diff --git a/sysdeps/ieee754/flt-32/e_acosf.c b/sysdeps/ieee754/flt-32/e_acosf.c
index 6f792f6604cdab3fca51a5546e086d2e40df1f15..3b2e4f1bde9906e483d31081ba794790f0eed05b 100644
--- a/sysdeps/ieee754/flt-32/e_acosf.c
+++ b/sysdeps/ieee754/flt-32/e_acosf.c
@@ -56,14 +56,14 @@ __ieee754_acosf(float x)
 	    z = (one+x)*(float)0.5;
 	    p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
 	    q = one+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
-	    s = __ieee754_sqrtf(z);
+	    s = sqrtf(z);
 	    r = p/q;
 	    w = r*s-pio2_lo;
 	    return pi - (float)2.0*(s+w);
 	} else {			/* x > 0.5 */
 	    int32_t idf;
 	    z = (one-x)*(float)0.5;
-	    s = __ieee754_sqrtf(z);
+	    s = sqrtf(z);
 	    df = s;
 	    GET_FLOAT_WORD(idf,df);
 	    SET_FLOAT_WORD(df,idf&0xfffff000);
diff --git a/sysdeps/ieee754/flt-32/e_acoshf.c b/sysdeps/ieee754/flt-32/e_acoshf.c
index aabfb85df7d490ac1a8951b99246cd54e9460d27..49e64f3c43f0e5e37232dbcf8a38e7ab0c357453 100644
--- a/sysdeps/ieee754/flt-32/e_acoshf.c
+++ b/sysdeps/ieee754/flt-32/e_acoshf.c
@@ -40,10 +40,10 @@ float __ieee754_acoshf(float x)
 	    return 0.0;			/* acosh(1) = 0 */
 	} else if (hx > 0x40000000) {	/* 2**28 > x > 2 */
 	    t=x*x;
-	    return __ieee754_logf((float)2.0*x-one/(x+__ieee754_sqrtf(t-one)));
+	    return __ieee754_logf((float)2.0*x-one/(x+sqrtf(t-one)));
 	} else {			/* 1<x<2 */
 	    t = x-one;
-	    return __log1pf(t+__ieee754_sqrtf((float)2.0*t+t*t));
+	    return __log1pf(t+sqrtf((float)2.0*t+t*t));
 	}
 }
 strong_alias (__ieee754_acoshf, __acoshf_finite)
diff --git a/sysdeps/ieee754/flt-32/e_asinf.c b/sysdeps/ieee754/flt-32/e_asinf.c
index 2ca2dbcb2839fc5ea3b6485086b8a2239dbcdfcb..55eb1449c57161446374d649ae5cf95e41e86af4 100644
--- a/sysdeps/ieee754/flt-32/e_asinf.c
+++ b/sysdeps/ieee754/flt-32/e_asinf.c
@@ -85,7 +85,7 @@ float __ieee754_asinf(float x)
 	w = one-fabsf(x);
 	t = w*0.5f;
 	p = t * (p0 + t * (p1 + t * (p2 + t * (p3 + t * p4))));
-	s = __ieee754_sqrtf(t);
+	s = sqrtf(t);
 	if(ix>=0x3F79999A) {	/* if |x| > 0.975 */
 	    t = pio2_hi-(2.0f*(s+s*p)-pio2_lo);
 	} else {
diff --git a/sysdeps/ieee754/flt-32/e_gammaf_r.c b/sysdeps/ieee754/flt-32/e_gammaf_r.c
index 83f1a1bf03b9962e34bb0840ada92f4175052240..673d64a2a1999505e5c281b3b7e3d8ede097b056 100644
--- a/sysdeps/ieee754/flt-32/e_gammaf_r.c
+++ b/sysdeps/ieee754/flt-32/e_gammaf_r.c
@@ -91,7 +91,7 @@ gammaf_positive (float x, int *exp2_adj)
       float ret = (__ieee754_powf (x_adj_mant, x_adj)
 		   * __ieee754_exp2f (x_adj_log2 * x_adj_frac)
 		   * __ieee754_expf (-x_adj)
-		   * __ieee754_sqrtf (2 * (float) M_PI / x_adj)
+		   * sqrtf (2 * (float) M_PI / x_adj)
 		   / prod);
       exp_adj += x_eps * __ieee754_logf (x_adj);
       float bsum = gamma_coeff[NCOEFF - 1];
diff --git a/sysdeps/ieee754/flt-32/e_hypotf.c b/sysdeps/ieee754/flt-32/e_hypotf.c
index fda2651a846815e70631c1f9fe66be1a7561e540..5336876cf4cbb8d684dcd3510b7e8585aef7524a 100644
--- a/sysdeps/ieee754/flt-32/e_hypotf.c
+++ b/sysdeps/ieee754/flt-32/e_hypotf.c
@@ -40,6 +40,6 @@ __ieee754_hypotf(float x, float y)
 	d_x = (double) x;
 	d_y = (double) y;
 
-	return (float) __ieee754_sqrt(d_x * d_x + d_y * d_y);
+	return (float) sqrt(d_x * d_x + d_y * d_y);
 }
 strong_alias (__ieee754_hypotf, __hypotf_finite)
diff --git a/sysdeps/ieee754/flt-32/e_j0f.c b/sysdeps/ieee754/flt-32/e_j0f.c
index b783dd069dded429fa41ce8c4a93df6544797ab6..105965d06cf46ff7de0de45254537cb727595fe6 100644
--- a/sysdeps/ieee754/flt-32/e_j0f.c
+++ b/sysdeps/ieee754/flt-32/e_j0f.c
@@ -58,10 +58,10 @@ __ieee754_j0f(float x)
 	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
 	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
 	 */
-		if(ix>0x48000000) z = (invsqrtpi*cc)/__ieee754_sqrtf(x);
+		if(ix>0x48000000) z = (invsqrtpi*cc)/sqrtf(x);
 		else {
 		    u = pzerof(x); v = qzerof(x);
-		    z = invsqrtpi*(u*cc-v*ss)/__ieee754_sqrtf(x);
+		    z = invsqrtpi*(u*cc-v*ss)/sqrtf(x);
 		}
 		return z;
 	}
@@ -131,10 +131,10 @@ __ieee754_y0f(float x)
 		    if ((s*c)<zero) cc = z/ss;
 		    else            ss = z/cc;
 		}
-		if(ix>0x48000000) z = (invsqrtpi*ss)/__ieee754_sqrtf(x);
+		if(ix>0x48000000) z = (invsqrtpi*ss)/sqrtf(x);
 		else {
 		    u = pzerof(x); v = qzerof(x);
-		    z = invsqrtpi*(u*ss+v*cc)/__ieee754_sqrtf(x);
+		    z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
 		}
 		return z;
 	}
diff --git a/sysdeps/ieee754/flt-32/e_j1f.c b/sysdeps/ieee754/flt-32/e_j1f.c
index 805a87d85b732b9d30d3b79464e039df2b8e832b..95f03d15c5a6e558ab7f3e6f1b1869342c5c0b0e 100644
--- a/sysdeps/ieee754/flt-32/e_j1f.c
+++ b/sysdeps/ieee754/flt-32/e_j1f.c
@@ -61,10 +61,10 @@ __ieee754_j1f(float x)
 	 * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
 	 * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
 	 */
-		if(ix>0x48000000) z = (invsqrtpi*cc)/__ieee754_sqrtf(y);
+		if(ix>0x48000000) z = (invsqrtpi*cc)/sqrtf(y);
 		else {
 		    u = ponef(y); v = qonef(y);
-		    z = invsqrtpi*(u*cc-v*ss)/__ieee754_sqrtf(y);
+		    z = invsqrtpi*(u*cc-v*ss)/sqrtf(y);
 		}
 		if(hx<0) return -z;
 		else	 return  z;
@@ -135,10 +135,10 @@ __ieee754_y1f(float x)
 	 *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
 	 * to compute the worse one.
 	 */
-		if(ix>0x48000000) z = (invsqrtpi*ss)/__ieee754_sqrtf(x);
+		if(ix>0x48000000) z = (invsqrtpi*ss)/sqrtf(x);
 		else {
 		    u = ponef(x); v = qonef(x);
-		    z = invsqrtpi*(u*ss+v*cc)/__ieee754_sqrtf(x);
+		    z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
 		}
 		return z;
 	}
diff --git a/sysdeps/ieee754/flt-32/s_asinhf.c b/sysdeps/ieee754/flt-32/s_asinhf.c
index da9cafb600b3039084d987db6804b7fd5a7be046..914093a281c2e2c142c1a018d6a10c15b7e2979e 100644
--- a/sysdeps/ieee754/flt-32/s_asinhf.c
+++ b/sysdeps/ieee754/flt-32/s_asinhf.c
@@ -39,10 +39,10 @@ __asinhf(float x)
 	} else {
 	    float xa = fabsf(x);
 	    if (ix>0x40000000) {	/* 2**14 > |x| > 2.0 */
-		w = __ieee754_logf(2.0f*xa+one/(__ieee754_sqrtf(xa*xa+one)+xa));
+		w = __ieee754_logf(2.0f*xa+one/(sqrtf(xa*xa+one)+xa));
 	    } else {		/* 2.0 > |x| > 2**-14 */
 		float t = xa*xa;
-		w =__log1pf(xa+t/(one+__ieee754_sqrtf(one+t)));
+		w =__log1pf(xa+t/(one+sqrtf(one+t)));
 	    }
 	}
 	return __copysignf(w, x);
diff --git a/sysdeps/powerpc/fpu/e_hypot.c b/sysdeps/powerpc/fpu/e_hypot.c
index 2685ca6ba06441f1ab48aeb78183442686841a97..98319f841ecddb185506f40cddb5d980f1bdab7c 100644
--- a/sysdeps/powerpc/fpu/e_hypot.c
+++ b/sysdeps/powerpc/fpu/e_hypot.c
@@ -110,7 +110,7 @@ __ieee754_hypot (double x, double y)
     {
       x *= twoM600;
       y *= twoM600;
-      return __ieee754_sqrt (x * x + y * y) / twoM600;
+      return sqrt (x * x + y * y) / twoM600;
     }
   if (y < twoM500)
     {
@@ -118,7 +118,7 @@ __ieee754_hypot (double x, double y)
 	{
 	  x *= two1022;
 	  y *= two1022;
-	  double ret = __ieee754_sqrt (x * x + y * y) / two1022;
+	  double ret = sqrt (x * x + y * y) / two1022;
 	  math_check_force_underflow_nonneg (ret);
 	  return ret;
 	}
@@ -126,9 +126,9 @@ __ieee754_hypot (double x, double y)
 	{
 	  x *= two600;
 	  y *= two600;
-	  return __ieee754_sqrt (x * x + y * y) / two600;
+	  return sqrt (x * x + y * y) / two600;
 	}
     }
-  return __ieee754_sqrt (x * x + y * y);
+  return sqrt (x * x + y * y);
 }
 strong_alias (__ieee754_hypot, __hypot_finite)
diff --git a/sysdeps/powerpc/fpu/e_hypotf.c b/sysdeps/powerpc/fpu/e_hypotf.c
index 8502ca962a307d94dea6e693d50b1eadee11e76c..f09c42a03aab50492ab9effc53968a5fd2472d55 100644
--- a/sysdeps/powerpc/fpu/e_hypotf.c
+++ b/sysdeps/powerpc/fpu/e_hypotf.c
@@ -71,6 +71,6 @@ __ieee754_hypotf (float x, float y)
 {
   TEST_INF_NAN (x, y);
 
-  return __ieee754_sqrt ((double) x * x + (double) y * y);
+  return sqrt ((double) x * x + (double) y * y);
 }
 strong_alias (__ieee754_hypotf, __hypotf_finite)
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index 396fd0562eda2f64bc5858a8e90b94cf5028f28b..f73b6efaa9d1c3f5dec0427965cdf25b1b61b843 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -35,36 +35,6 @@ __ieee754_sqrtf128 (_Float128 __x)
 }
 #endif
 
-extern double __slow_ieee754_sqrt (double);
-extern __always_inline double
-__ieee754_sqrt (double __x)
-{
-  double __z;
-
-#ifdef _ARCH_PPCSQ
-   asm ("fsqrt	%0,%1" : "=f" (__z) : "f" (__x));
-#else
-   __z = __slow_ieee754_sqrt(__x);
-#endif
-
-  return __z;
-}
-
-extern float __slow_ieee754_sqrtf (float);
-extern __always_inline float
-__ieee754_sqrtf (float __x)
-{
-  float __z;
-
-#ifdef _ARCH_PPCSQ
-  asm ("fsqrts	%0,%1" : "=f" (__z) : "f" (__x));
-#else
-   __z = __slow_ieee754_sqrtf(__x);
-#endif
-
-  return __z;
-}
-
 #if defined _ARCH_PWR5X
 
 # ifndef __round
diff --git a/sysdeps/s390/fpu/bits/mathinline.h b/sysdeps/s390/fpu/bits/mathinline.h
index 52e21db98d51254cee442989190923eef25b1b44..3ccfe44877473afe9eca13741db0f5b7e0a619ed 100644
--- a/sysdeps/s390/fpu/bits/mathinline.h
+++ b/sysdeps/s390/fpu/bits/mathinline.h
@@ -65,25 +65,6 @@ __NTH (__signbitl (long double __x))
 
 /* This code is used internally in the GNU libc.  */
 #ifdef __LIBC_INTERNAL_MATH_INLINES
-
-__MATH_INLINE double
-__NTH (__ieee754_sqrt (double x))
-{
-  double res;
-
-  __asm__ ( "sqdbr %0,%1" : "=f" (res) : "f" (x) );
-  return res;
-}
-
-__MATH_INLINE float
-__NTH (__ieee754_sqrtf (float x))
-{
-  float res;
-
-  __asm__ ( "sqebr %0,%1" : "=f" (res) : "f" (x) );
-  return res;
-}
-
 # if !defined __NO_LONG_DOUBLE_MATH
 __MATH_INLINE long double
 __NTH (sqrtl (long double __x))
diff --git a/sysdeps/sparc/fpu/bits/mathinline.h b/sysdeps/sparc/fpu/bits/mathinline.h
index 60a2028f2ce7484883791c683be1a28586de0c13..83e0d3c882d13d2a755c3e38c8bdcfbcb1cd0c6a 100644
--- a/sysdeps/sparc/fpu/bits/mathinline.h
+++ b/sysdeps/sparc/fpu/bits/mathinline.h
@@ -230,22 +230,6 @@ sqrtl (long double __x) __THROW
 
 /* This code is used internally in the GNU libc.  */
 #  ifdef __LIBC_INTERNAL_MATH_INLINES
-__MATH_INLINE double
-__ieee754_sqrt (double __x)
-{
-  register double __r;
-  __asm ("fsqrtd %1,%0" : "=f" (__r) : "f" (__x));
-  return __r;
-}
-
-__MATH_INLINE float
-__ieee754_sqrtf (float __x)
-{
-  register float __r;
-  __asm ("fsqrts %1,%0" : "=f" (__r) : "f" (__x));
-  return __r;
-}
-
 #   if __WORDSIZE == 64
 __MATH_INLINE long double
 __ieee754_sqrtl (long double __x)
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 027a6a3a4d0a68948f19ebaa129bf442a9f4f3e6..aa3577046c2b242b3f406118dba9933ec7da189e 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -48,30 +48,6 @@
 #include <sysdeps/i386/fpu/fenv_private.h>
 #include_next <math_private.h>
 
-extern __always_inline double
-__ieee754_sqrt (double d)
-{
-  double res;
-#if defined __AVX__ || defined SSE2AVX
-  asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));
-#else
-  asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));
-#endif
-  return res;
-}
-
-extern __always_inline float
-__ieee754_sqrtf (float d)
-{
-  float res;
-#if defined __AVX__ || defined SSE2AVX
-  asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));
-#else
-  asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));
-#endif
-  return res;
-}
-
 extern __always_inline long double
 __ieee754_sqrtl (long double d)
 {
Follow-Ups:
- Re: [PATCH] Cleanup __ieee754_sqrt(f)
  - From: Joseph Myers
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]