This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 23/36] PowerPC: sqrt/sqrtf multilib for PowerPC32


2013-08-19  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

	* sysdeps/powerpc/fpu/w_sqrt.c: Removed unneeded file.
	* sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S: Move to ...
	* sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-power4.S: ... here.
	(__sqrt): Rename symbol to __sqrt_power4 and remove strong_alias,
	weak_alias and compat_symbol definitions.
	* sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S : Move to ...
	* powerpc32/fpu/multiarch/w_sqrt-power5.S: ... here.
	(__sqrt): Rename symbol to __sqrt_power5 and remove strong_alias,
	weak_alias and compat_symbol definitions.
	* sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S: Move to ...
	* powerpc32/fpu/multiarch/w_sqrtf-power4.S: ... here.
	(__sqrtf): Rename symbol to __sqrtf_power4 and remove weak_alias
	definition.
	* sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S: Move to ...
	* powerpc32/fpu/multiarch/w_sqrtf-power5.S: ... here.
	(__sqrtf): Rename symbol to __sqrtf_power5 and remove weak_alias
	definition.
	* sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-ppc32.c: New file:
	default sqrt implementation for PPC32.
	* sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-ppc32.c: New file:
	default sqrtf implementation for PPC32.
	* sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt.c: New file:
	multiarch sqrt for PPC32.
	* sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf.c: New file:
	multiarch sqrtf for PPC32.
	* sysdeps/powerpc/powerpc32/fpu/multiarch/Makefile: Add sqrt and
	sqrtf objects.

--

diff --git a/sysdeps/powerpc/fpu/w_sqrt.c b/sysdeps/powerpc/fpu/w_sqrt.c
deleted file mode 100644
index 70f28dd..0000000
--- a/sysdeps/powerpc/fpu/w_sqrt.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Double-precision floating point square root wrapper.
-   Copyright (C) 2004-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <math.h>
-#include <math_private.h>
-#include <fenv_libc.h>
-#include <math_ldbl_opt.h>
-
-double
-__sqrt (double x)		/* wrapper sqrt */
-{
-#ifdef _IEEE_LIBM
-  return __ieee754_sqrt (x);
-#else
-  double z;
-  z = __ieee754_sqrt (x);
-  if (_LIB_VERSION == _IEEE_ || (x != x))
-    return z;
-
-  if (x < 0.0)
-    return __kernel_standard (x, x, 26);	/* sqrt(negative) */
-  else
-    return z;
-#endif
-}
-
-weak_alias (__sqrt, sqrt)
-#ifdef NO_LONG_DOUBLE
-  strong_alias (__sqrt, __sqrtl) weak_alias (__sqrt, sqrtl)
-#endif
-#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
-compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0);
-#endif
diff --git a/sysdeps/powerpc/powerpc32/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc32/fpu/multiarch/Makefile
index 7c50805..22e07ce 100644
--- a/sysdeps/powerpc/powerpc32/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc32/fpu/multiarch/Makefile
@@ -2,7 +2,9 @@ ifeq ($(subdir),math)
 libm-sysdep_routines += s_llrintf-power6 s_llrintf-power4 s_llrintf-ppc32 \
 			s_llrint-power6 s_llrint-power4 s_llrint-ppc32 \
 			s_llround-power6 s_llround-power5+ s_llround-power4 \
-			s_llround-ppc32
+			s_llround-ppc32 w_sqrt-power5 w_sqrt-power4 \
+			w_sqrt-ppc32 w_sqrtf-power5 w_sqrtf-power4 \
+			w_sqrtf-ppc32
 
 CFLAGS-s_llround.c = -fno-builtin-llroundf
 endif
diff --git a/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-power4.S b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-power4.S
new file mode 100644
index 0000000..35e638f
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-power4.S
@@ -0,0 +1,98 @@
+/* sqrt function.  PowerPC32/POWER4 version.
+   Copyright (C) 2007-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* double [fp1] sqrt (double x [fp1])
+   Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
+   The fsqrt instruction generates the correct value for all inputs and
+   sets the appropriate floating point exceptions.  Extended checking is
+   only needed to set errno (via __kernel_standard) if the input value
+   is negative.
+
+   The fsqrt will set FPCC and FU (Floating Point Unordered or NaN
+   to indicated that the input value was negative or NaN. Use Move to
+   Condition Register from FPSCR to copy the FPCC field to cr1.  The
+   branch on summary overflow transfers control to w_sqrt to process
+   any error conditions. Otherwise we can return the result directly.
+
+   This part of the function is a leaf routine,  so no need to stack a
+   frame or execute prologue/epilogue code. This means it is safe to
+   transfer directly to w_sqrt as long as the input value (f1) is
+   preserved. Putting the sqrt result into f2 (double parameter 2)
+   allows passing both the input value and sqrt result into the extended
+   wrapper so there is no need to recompute.
+
+   This tactic avoids the overhead of stacking a frame for the normal
+   (non-error) case.  Until gcc supports prologue shrink-wrapping
+   this is the best we can do.  */
+
+	.section	".text"
+	.machine power4
+EALIGN (__sqrt_power4, 5, 0)
+	fsqrt	fp2,fp1
+	mcrfs	cr1,4
+	bso-	cr1,.Lw_sqrt
+	fmr	fp1,fp2
+	blr
+	.align	4
+.Lw_sqrt:
+	mflr	r0
+	stwu	r1,-16(r1)
+	cfi_adjust_cfa_offset(16)
+	fmr	fp12,fp2
+	stw	r0,20(r1)
+	stw	r30,8(r1)
+	cfi_offset(lr,20-16)
+	cfi_offset(r30,8-16)
+#ifdef SHARED
+	SETUP_GOT_ACCESS(r30,got_label)
+	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha
+	addi	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l
+	lwz	r9,_LIB_VERSION@got(30)
+	lwz	r0,0(r9)
+#else
+	lis	r9,_LIB_VERSION@ha
+	lwz	r0,_LIB_VERSION@l(r9)
+#endif
+/*  if (_LIB_VERSION == _IEEE_) return z; */
+	cmpwi	cr7,r0,-1
+	beq-	cr7,.L4
+/*  if (x != x) return z; !isnan*/
+	fcmpu	cr7,fp1,fp1
+	bne-	cr7,.L4
+/*  if  (x < 0.0)
+    return __kernel_standard (x, x, 26) */
+	fmr	fp2,fp1
+	fabs	fp0,fp1
+	li	r3,26
+	fcmpu	cr7,fp1,fp0
+	bne- 	cr7,.L11
+.L4:
+	lwz	r0,20(r1)
+	fmr	fp1,fp12
+	lwz	r30,8(r1)
+	addi	r1,r1,16
+	mtlr 	r0
+	blr
+.L11:
+	bl	__kernel_standard@plt
+	fmr	fp12,fp1
+	b	.L4
+END(__sqrt_power4)
diff --git a/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-power5.S b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-power5.S
new file mode 100644
index 0000000..fcb3043
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-power5.S
@@ -0,0 +1,96 @@
+/* sqrt function.  PowerPC32/POWER5 version.
+   Copyright (C) 2007-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* double [fp1] sqrt (double x [fp1])
+   Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
+   The fsqrt instruction generates the correct value for all inputs and
+   sets the appropriate floating point exceptions.  Extended checking is
+   only needed to set errno (via __kernel_standard) if the input value
+   is negative.
+
+   So compare the input value against the absolute value of itself.
+   This will compare equal unless the value is negative (EDOM) or a NAN,
+   in which case we branch to the extend wrapper.  If equal we can return
+   the result directly.
+
+   This part of the function looks like a leaf routine,  so no need to
+   stack a frame or execute prologue/epilogue code. It is safe to
+   branch directly to w_sqrt as long as the input value (f1) is
+   preserved. Putting the sqrt result into f2 (float parameter 2)
+   allows passing both the input value and sqrt result into the extended
+   wrapper so there is no need to recompute.
+
+   This tactic avoids the overhead of stacking a frame for the normal
+   (non-error) case.  Until gcc supports prologue shrink-wrapping
+   this is the best we can do.  */
+
+	.section	".text"
+	.machine power5
+EALIGN (__sqrt_power5, 5, 0)
+	fabs	fp0,fp1
+	fsqrt	fp2,fp1
+	fcmpu	cr1,fp0,fp1
+	bne-	cr1,.Lw_sqrt
+	fmr	fp1,fp2
+	blr
+	.align	4
+.Lw_sqrt:
+	mflr	r0
+	stwu	r1,-16(r1)
+	cfi_adjust_cfa_offset(16)
+	fmr	fp12,fp2
+	stw	r0,20(r1)
+	stw	r30,8(r1)
+	cfi_offset(lr,20-16)
+	cfi_offset(r30,8-16)
+#ifdef SHARED
+	SETUP_GOT_ACCESS(r30,got_label)
+	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha
+	addi	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l
+	lwz	r9,_LIB_VERSION@got(30)
+	lwz	r0,0(r9)
+#else
+	lis	r9,_LIB_VERSION@ha
+	lwz	r0,_LIB_VERSION@l(r9)
+#endif
+/*  if (_LIB_VERSION == _IEEE_) return z; */
+	cmpwi	cr7,r0,-1
+	beq-	cr7,.L4
+/*  if (x != x) return z; !isnan*/
+	fcmpu	cr7,fp1,fp1
+	bne-	cr7,.L4
+/*  if  (x < 0.0)
+    return __kernel_standard (x, x, 26) */
+	fmr	fp2,fp1
+	li	r3,26
+	bne- 	cr1,.L11
+.L4:
+	lwz	r0,20(r1)
+	fmr	fp1,fp12
+	lwz	r30,8(r1)
+	addi	r1,r1,16
+	mtlr 	r0
+	blr
+.L11:
+	bl	__kernel_standard@plt
+	fmr	fp12,fp1
+	b	.L4
+END (__sqrt_power5)
diff --git a/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-ppc32.c b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-ppc32.c
new file mode 100644
index 0000000..5d07a7b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt-ppc32.c
@@ -0,0 +1,30 @@
+/* PowerPC32 default implementation for sqrt.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+
+/* The PPC32 default implementation will fallback to __ieee754_sqrt symbol
+   from sysdeps/powerpc/fpu/e_sqrt.c  */
+
+#define __sqrtf __sqrtf_ppc32
+#undef weak_alias
+#define weak_alias(a, b)
+#undef strong_alias
+#define strong_alias(a, b)
+
+#include <math/w_sqrtf.c>
diff --git a/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt.c b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt.c
new file mode 100644
index 0000000..f77eeb2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrt.c
@@ -0,0 +1,43 @@
+/* Multiple versions of sqrt.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include <math_ldbl_opt.h>
+#include <shlib-compat.h>
+#include "init-arch.h"
+
+extern __typeof (__sqrt) __sqrt_ppc32 attribute_hidden;
+extern __typeof (__sqrt) __sqrt_power4 attribute_hidden;
+extern __typeof (__sqrt) __sqrt_power5 attribute_hidden;
+
+libc_ifunc (__sqrt,
+	    (hwcap & PPC_FEATURE_POWER5)
+	    ? __sqrt_power5 :
+	      (hwcap & PPC_FEATURE_POWER4)
+	      ? __sqrt_power4
+	    : __sqrt_ppc32);
+
+weak_alias (__sqrt, sqrt)
+
+#ifdef NO_LONG_DOUBLE
+strong_alias (__sqrt, __sqrtl)
+weak_alias (__sqrt, sqrtl)
+#endif
+#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
+compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0);
+#endif
diff --git a/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-power4.S b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-power4.S
new file mode 100644
index 0000000..f3524a2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-power4.S
@@ -0,0 +1,98 @@
+/* sqrtf function.  PowerPC32/POWER4 version.
+   Copyright (C) 2007-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* float [fp1] sqrts (float x [fp1])
+   Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
+   The fsqrts instruction generates the correct value for all inputs and
+   sets the appropriate floating point exceptions.  Extended checking is
+   only needed to set errno (via __kernel_standard) if the input value
+   is negative.
+
+   The fsqrts will set FPCC and FU (Floating Point Unordered or NaN
+   to indicated that the input value was negative or NaN. Use Move to
+   Condition Register from FPSCR to copy the FPCC field to cr1.  The
+   branch on summary overflow transfers control to w_sqrt to process
+   any error conditions. Otherwise we can return the result directly.
+
+   This part of the function is a leaf routine,  so no need to stack a
+   frame or execute prologue/epilogue code. This means it is safe to
+   transfer directly to w_sqrt as long as the input value (f1) is
+   preserved. Putting the sqrt result into f2 (float parameter 2)
+   allows passing both the input value and sqrt result into the extended
+   wrapper so there is no need to recompute.
+
+   This tactic avoids the overhead of stacking a frame for the normal
+   (non-error) case.  Until gcc supports prologue shrink-wrapping
+   this is the best we can do.  */
+
+	.section	".text"
+	.machine power4
+EALIGN (__sqrtf_power4, 5, 0)
+	fsqrts	fp2,fp1
+	mcrfs	cr1,4
+	bso-	cr1,.Lw_sqrtf
+	fmr	fp1,fp2
+	blr
+        .align 4
+.Lw_sqrtf:
+	mflr	r0
+	stwu	r1,-16(r1)
+	cfi_adjust_cfa_offset(16)
+	fmr	fp12,fp2
+	stw	r0,20(r1)
+	stw	r30,8(r1)
+	cfi_offset(lr,20-16)
+	cfi_offset(r30,8-16)
+#ifdef SHARED
+	SETUP_GOT_ACCESS(r30,got_label)
+	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha
+	addi	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l
+	lwz	r9,_LIB_VERSION@got(30)
+	lwz	r0,0(r9)
+#else
+	lis	r9,_LIB_VERSION@ha
+	lwz	r0,_LIB_VERSION@l(r9)
+#endif
+/*  if (_LIB_VERSION == _IEEE_) return z; */
+	cmpwi	cr7,r0,-1
+	beq-	cr7,.L4
+/*  if (x != x, 0) return z; !isnan */
+	fcmpu	cr7,fp1,fp1
+	bne-	cr7,.L4
+/*  if  (x < 0.0)
+    return __kernel_standard (x, x, 126) */
+	fmr	fp2,fp1
+	fabs	fp0,fp1
+	li	r3,126
+	fcmpu	cr7,1,0
+	bne- 	cr7,.L11
+.L4:
+	lwz	r0,20(r1)
+	fmr	fp1,fp12
+	lwz	r30,8(r1)
+	addi	r1,r1,16
+	mtlr 	r0
+	blr
+.L11:
+	bl	__kernel_standard@plt
+	fmr	fp12,fp1
+	b	.L4
+END (__sqrtf_power4)
diff --git a/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-power5.S b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-power5.S
new file mode 100644
index 0000000..aae9e91
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-power5.S
@@ -0,0 +1,96 @@
+/* sqrtf function.  PowerPC32/POWER5 version.
+   Copyright (C) 2007-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <math_ldbl_opt.h>
+
+/* float [fp1] sqrts (float x [fp1])
+   Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
+   The fsqrts instruction generates the correct value for all inputs and
+   sets the appropriate floating point exceptions.  Extended checking is
+   only needed to set errno (via __kernel_standard) if the input value
+   is negative.
+
+   So compare the input value against the absolute value of itself.
+   This will compare equal unless the value is negative (EDOM) or a NAN,
+   in which case we branch to the extend wrapper.  If equal we can return
+   the result directly.
+
+   This part of the function looks like a leaf routine,  so no need to
+   stack a frame or execute prologue/epilogue code. It is safe to
+   branch directly to w_sqrt as long as the input value (f1) is
+   preserved. Putting the sqrt result into f2 (float parameter 2)
+   allows passing both the input value and sqrt result into the extended
+   wrapper so there is no need to recompute.
+
+   This tactic avoids the overhead of stacking a frame for the normal
+   (non-error) case.  Until gcc supports prologue shrink-wrapping
+   this is the best we can do.  */
+
+	.section	".text"
+	.machine power5
+EALIGN (__sqrtf_power5, 5, 0)
+	fabs	fp0,fp1
+	fsqrts	fp2,fp1
+	fcmpu	cr1,fp0,fp1
+	bne-	cr1,.Lw_sqrtf
+	fmr	fp1,fp2
+	blr
+        .align 4
+.Lw_sqrtf:
+	mflr	r0
+	stwu	r1,-16(r1)
+	cfi_adjust_cfa_offset(16)
+	fmr	fp12,fp2
+	stw	r0,20(r1)
+	stw	r30,8(r1)
+	cfi_offset(lr,20-16)
+	cfi_offset(r30,8-16)
+#ifdef SHARED
+	SETUP_GOT_ACCESS(r30,got_label)
+	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha
+	addi	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l
+	lwz	r9,_LIB_VERSION@got(30)
+	lwz	r0,0(r9)
+#else
+	lis	r9,_LIB_VERSION@ha
+	lwz	r0,_LIB_VERSION@l(r9)
+#endif
+/*  if (_LIB_VERSION == _IEEE_) return z; */
+	cmpwi	cr7,r0,-1
+	beq-	cr7,.L4
+/*  if (x != x, 0) return z; !isnan */
+	fcmpu	cr7,fp1,fp1
+	bne-	cr7,.L4
+/*  if  (x < 0.0)
+    return __kernel_standard (x, x, 126) */
+	fmr	fp2,fp1
+	li	r3,126
+	bne- 	cr1,.L11
+.L4:
+	lwz	r0,20(r1)
+	fmr	fp1,fp12
+	lwz	r30,8(r1)
+	addi	r1,r1,16
+	mtlr 	r0
+	blr
+.L11:
+	bl	__kernel_standard@plt
+	fmr	fp12,fp1
+	b	.L4
+END (__sqrtf_power5)
diff --git a/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-ppc32.c b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-ppc32.c
new file mode 100644
index 0000000..3702e44
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf-ppc32.c
@@ -0,0 +1,30 @@
+/* PowerPC32 default implementation for sqrtf.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+
+/* The PPC32 default implementation will fallback to __ieee754_sqrt symbol
+   from sysdeps/powerpc/fpu/e_sqrt.c  */
+
+#define __sqrt __sqrt_ppc32
+#undef weak_alias
+#define weak_alias(a, b)
+#undef strong_alias
+#define strong_alias(a, b)
+
+#include <math/w_sqrt.c>
diff --git a/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf.c b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf.c
new file mode 100644
index 0000000..8af86c4
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/multiarch/w_sqrtf.c
@@ -0,0 +1,35 @@
+/* Multiple versions of sqrtf.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include <math_ldbl_opt.h>
+#include <shlib-compat.h>
+#include "init-arch.h"
+
+extern __typeof (__sqrtf) __sqrtf_ppc32 attribute_hidden;
+extern __typeof (__sqrtf) __sqrtf_power4 attribute_hidden;
+extern __typeof (__sqrtf) __sqrtf_power5 attribute_hidden;
+
+libc_ifunc (__sqrtf,
+	    (hwcap & PPC_FEATURE_POWER5)
+	    ? __sqrtf_power5 :
+	      (hwcap & PPC_FEATURE_POWER4)
+	      ? __sqrtf_power4
+	    : __sqrtf_ppc32);
+
+weak_alias (__sqrtf, sqrtf)
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S
deleted file mode 100644
index 3648e4a..0000000
--- a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrt.S
+++ /dev/null
@@ -1,109 +0,0 @@
-/* sqrt function.  PowerPC32 version.
-   Copyright (C) 2007-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-/* double [fp1] sqrt (double x [fp1])
-   Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
-   The fsqrt instruction generates the correct value for all inputs and
-   sets the appropriate floating point exceptions.  Extended checking is
-   only needed to set errno (via __kernel_standard) if the input value
-   is negative.
-
-   The fsqrt will set FPCC and FU (Floating Point Unordered or NaN
-   to indicated that the input value was negative or NaN. Use Move to
-   Condition Register from FPSCR to copy the FPCC field to cr1.  The
-   branch on summary overflow transfers control to w_sqrt to process
-   any error conditions. Otherwise we can return the result directly.
-
-   This part of the function is a leaf routine,  so no need to stack a
-   frame or execute prologue/epilogue code. This means it is safe to
-   transfer directly to w_sqrt as long as the input value (f1) is
-   preserved. Putting the sqrt result into f2 (double parameter 2)
-   allows passing both the input value and sqrt result into the extended
-   wrapper so there is no need to recompute.
-
-   This tactic avoids the overhead of stacking a frame for the normal
-   (non-error) case.  Until gcc supports prologue shrink-wrapping
-   this is the best we can do.  */
-
-	.section	".text"
-	.machine power4
-EALIGN (__sqrt, 5, 0)
-	fsqrt	fp2,fp1
-	mcrfs	cr1,4
-	bso-	cr1,.Lw_sqrt
-	fmr	fp1,fp2
-	blr
-	.align	4
-.Lw_sqrt:
-	mflr	r0
-	stwu	r1,-16(r1)
-	cfi_adjust_cfa_offset(16)
-	fmr	fp12,fp2
-	stw	r0,20(r1)
-	stw	r30,8(r1)
-	cfi_offset(lr,20-16)
-	cfi_offset(r30,8-16)
-#ifdef SHARED
-	SETUP_GOT_ACCESS(r30,got_label)
-	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha
-	addi	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l
-	lwz	r9,_LIB_VERSION@got(30)
-	lwz	r0,0(r9)
-#else
-	lis	r9,_LIB_VERSION@ha
-	lwz	r0,_LIB_VERSION@l(r9)
-#endif
-/*  if (_LIB_VERSION == _IEEE_) return z; */
-	cmpwi	cr7,r0,-1
-	beq-	cr7,.L4
-/*  if (x != x) return z; !isnan*/
-	fcmpu	cr7,fp1,fp1
-	bne-	cr7,.L4
-/*  if  (x < 0.0)
-    return __kernel_standard (x, x, 26) */
-	fmr	fp2,fp1
-	fabs	fp0,fp1
-	li	r3,26
-	fcmpu	cr7,fp1,fp0
-	bne- 	cr7,.L11
-.L4:
-	lwz	r0,20(r1)
-	fmr	fp1,fp12
-	lwz	r30,8(r1)
-	addi	r1,r1,16
-	mtlr 	r0
-	blr
-.L11:
-	bl	__kernel_standard@plt
-	fmr	fp12,fp1
-	b	.L4
-	END	(__sqrt)
-
-weak_alias (__sqrt, sqrt)
-
-#ifdef NO_LONG_DOUBLE
-weak_alias (__sqrt, sqrtl)
-strong_alias (__sqrt, __sqrtl)
-#endif
-#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
-compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0)
-#endif
-
diff --git a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S b/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S
deleted file mode 100644
index 153843c..0000000
--- a/sysdeps/powerpc/powerpc32/power4/fpu/w_sqrtf.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/* sqrtf function.  PowerPC32 version.
-   Copyright (C) 2007-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-/* float [fp1] sqrts (float x [fp1])
-   Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
-   The fsqrts instruction generates the correct value for all inputs and
-   sets the appropriate floating point exceptions.  Extended checking is
-   only needed to set errno (via __kernel_standard) if the input value
-   is negative.
-
-   The fsqrts will set FPCC and FU (Floating Point Unordered or NaN
-   to indicated that the input value was negative or NaN. Use Move to
-   Condition Register from FPSCR to copy the FPCC field to cr1.  The
-   branch on summary overflow transfers control to w_sqrt to process
-   any error conditions. Otherwise we can return the result directly.
-
-   This part of the function is a leaf routine,  so no need to stack a
-   frame or execute prologue/epilogue code. This means it is safe to
-   transfer directly to w_sqrt as long as the input value (f1) is
-   preserved. Putting the sqrt result into f2 (float parameter 2)
-   allows passing both the input value and sqrt result into the extended
-   wrapper so there is no need to recompute.
-
-   This tactic avoids the overhead of stacking a frame for the normal
-   (non-error) case.  Until gcc supports prologue shrink-wrapping
-   this is the best we can do.  */
-
-	.section	".text"
-	.machine power4
-EALIGN (__sqrtf, 5, 0)
-	fsqrts	fp2,fp1
-	mcrfs	cr1,4
-	bso-	cr1,.Lw_sqrtf
-	fmr	fp1,fp2
-	blr
-        .align 4
-.Lw_sqrtf:
-	mflr	r0
-	stwu	r1,-16(r1)
-	cfi_adjust_cfa_offset(16)
-	fmr	fp12,fp2
-	stw	r0,20(r1)
-	stw	r30,8(r1)
-	cfi_offset(lr,20-16)
-	cfi_offset(r30,8-16)
-#ifdef SHARED
-	SETUP_GOT_ACCESS(r30,got_label)
-	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha
-	addi	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l
-	lwz	r9,_LIB_VERSION@got(30)
-	lwz	r0,0(r9)
-#else
-	lis	r9,_LIB_VERSION@ha
-	lwz	r0,_LIB_VERSION@l(r9)
-#endif
-/*  if (_LIB_VERSION == _IEEE_) return z; */
-	cmpwi	cr7,r0,-1
-	beq-	cr7,.L4
-/*  if (x != x, 0) return z; !isnan */
-	fcmpu	cr7,fp1,fp1
-	bne-	cr7,.L4
-/*  if  (x < 0.0)
-    return __kernel_standard (x, x, 126) */
-	fmr	fp2,fp1
-	fabs	fp0,fp1
-	li	r3,126
-	fcmpu	cr7,1,0
-	bne- 	cr7,.L11
-.L4:
-	lwz	r0,20(r1)
-	fmr	fp1,fp12
-	lwz	r30,8(r1)
-	addi	r1,r1,16
-	mtlr 	r0
-	blr
-.L11:
-	bl	__kernel_standard@plt
-	fmr	fp12,fp1
-	b	.L4
-	END	(__sqrtf)
-
-weak_alias (__sqrtf, sqrtf)
-
diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S
deleted file mode 100644
index ed11d5a..0000000
--- a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrt.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/* sqrt function.  PowerPC32 version.
-   Copyright (C) 2007-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-/* double [fp1] sqrt (double x [fp1])
-   Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
-   The fsqrt instruction generates the correct value for all inputs and
-   sets the appropriate floating point exceptions.  Extended checking is
-   only needed to set errno (via __kernel_standard) if the input value
-   is negative.
-
-   So compare the input value against the absolute value of itself.
-   This will compare equal unless the value is negative (EDOM) or a NAN,
-   in which case we branch to the extend wrapper.  If equal we can return
-   the result directly.
-
-   This part of the function looks like a leaf routine,  so no need to
-   stack a frame or execute prologue/epilogue code. It is safe to
-   branch directly to w_sqrt as long as the input value (f1) is
-   preserved. Putting the sqrt result into f2 (float parameter 2)
-   allows passing both the input value and sqrt result into the extended
-   wrapper so there is no need to recompute.
-
-   This tactic avoids the overhead of stacking a frame for the normal
-   (non-error) case.  Until gcc supports prologue shrink-wrapping
-   this is the best we can do.  */
-
-	.section	".text"
-	.machine power4
-EALIGN (__sqrt, 5, 0)
-	fabs	fp0,fp1
-	fsqrt	fp2,fp1
-	fcmpu	cr1,fp0,fp1
-	bne-	cr1,.Lw_sqrt
-	fmr	fp1,fp2
-	blr
-	.align	4
-.Lw_sqrt:
-	mflr	r0
-	stwu	r1,-16(r1)
-	cfi_adjust_cfa_offset(16)
-	fmr	fp12,fp2
-	stw	r0,20(r1)
-	stw	r30,8(r1)
-	cfi_offset(lr,20-16)
-	cfi_offset(r30,8-16)
-#ifdef SHARED
-	SETUP_GOT_ACCESS(r30,got_label)
-	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha
-	addi	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l
-	lwz	r9,_LIB_VERSION@got(30)
-	lwz	r0,0(r9)
-#else
-	lis	r9,_LIB_VERSION@ha
-	lwz	r0,_LIB_VERSION@l(r9)
-#endif
-/*  if (_LIB_VERSION == _IEEE_) return z; */
-	cmpwi	cr7,r0,-1
-	beq-	cr7,.L4
-/*  if (x != x) return z; !isnan*/
-	fcmpu	cr7,fp1,fp1
-	bne-	cr7,.L4
-/*  if  (x < 0.0)
-    return __kernel_standard (x, x, 26) */
-	fmr	fp2,fp1
-	li	r3,26
-	bne- 	cr1,.L11
-.L4:
-	lwz	r0,20(r1)
-	fmr	fp1,fp12
-	lwz	r30,8(r1)
-	addi	r1,r1,16
-	mtlr 	r0
-	blr
-.L11:
-	bl	__kernel_standard@plt
-	fmr	fp12,fp1
-	b	.L4
-	END	(__sqrt)
-
-weak_alias (__sqrt, sqrt)
-
-#ifdef NO_LONG_DOUBLE
-weak_alias (__sqrt, sqrtl)
-strong_alias (__sqrt, __sqrtl)
-#endif
-#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_0)
-compat_symbol (libm, __sqrt, sqrtl, GLIBC_2_0)
-#endif
-
diff --git a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S b/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S
deleted file mode 100644
index 2049172..0000000
--- a/sysdeps/powerpc/powerpc32/power5/fpu/w_sqrtf.S
+++ /dev/null
@@ -1,99 +0,0 @@
-/* sqrtf function.  PowerPC32 version.
-   Copyright (C) 2007-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-
-/* float [fp1] sqrts (float x [fp1])
-   Power4 (ISA V2.0) and above implement sqrt in hardware (not optional).
-   The fsqrts instruction generates the correct value for all inputs and
-   sets the appropriate floating point exceptions.  Extended checking is
-   only needed to set errno (via __kernel_standard) if the input value
-   is negative.
-
-   So compare the input value against the absolute value of itself.
-   This will compare equal unless the value is negative (EDOM) or a NAN,
-   in which case we branch to the extend wrapper.  If equal we can return
-   the result directly.
-
-   This part of the function looks like a leaf routine,  so no need to
-   stack a frame or execute prologue/epilogue code. It is safe to
-   branch directly to w_sqrt as long as the input value (f1) is
-   preserved. Putting the sqrt result into f2 (float parameter 2)
-   allows passing both the input value and sqrt result into the extended
-   wrapper so there is no need to recompute.
-
-   This tactic avoids the overhead of stacking a frame for the normal
-   (non-error) case.  Until gcc supports prologue shrink-wrapping
-   this is the best we can do.  */
-
-	.section	".text"
-	.machine power4
-EALIGN (__sqrtf, 5, 0)
-	fabs	fp0,fp1
-	fsqrts	fp2,fp1
-	fcmpu	cr1,fp0,fp1
-	bne-	cr1,.Lw_sqrtf
-	fmr	fp1,fp2
-	blr
-        .align 4
-.Lw_sqrtf:
-	mflr	r0
-	stwu	r1,-16(r1)
-	cfi_adjust_cfa_offset(16)
-	fmr	fp12,fp2
-	stw	r0,20(r1)
-	stw	r30,8(r1)
-	cfi_offset(lr,20-16)
-	cfi_offset(r30,8-16)
-#ifdef SHARED
-	SETUP_GOT_ACCESS(r30,got_label)
-	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha
-	addi	r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l
-	lwz	r9,_LIB_VERSION@got(30)
-	lwz	r0,0(r9)
-#else
-	lis	r9,_LIB_VERSION@ha
-	lwz	r0,_LIB_VERSION@l(r9)
-#endif
-/*  if (_LIB_VERSION == _IEEE_) return z; */
-	cmpwi	cr7,r0,-1
-	beq-	cr7,.L4
-/*  if (x != x, 0) return z; !isnan */
-	fcmpu	cr7,fp1,fp1
-	bne-	cr7,.L4
-/*  if  (x < 0.0)
-    return __kernel_standard (x, x, 126) */
-	fmr	fp2,fp1
-	li	r3,126
-	bne- 	cr1,.L11
-.L4:
-	lwz	r0,20(r1)
-	fmr	fp1,fp12
-	lwz	r30,8(r1)
-	addi	r1,r1,16
-	mtlr 	r0
-	blr
-.L11:
-	bl	__kernel_standard@plt
-	fmr	fp12,fp1
-	b	.L4
-	END	(__sqrtf)
-
-weak_alias (__sqrtf, sqrtf)
-
-- 
1.7.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]