This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] PowerPC: removed branch prediction from rint implementation


This patch removes the branch prediction instructions for PowerPC rint
implementation, replacing them for normal instructions. These instructions
are actually hurting performance, it is safer and generally better to let
the hardware handle it.

I added a rint benchmark test on benchtest framework. The results on a POWER7
64 bits are:

MASTER: rint: ITERS:1e+09: TOTAL:12.0901s, MAX:48.908ns, MIN:5.312ns, 8.27124e+07 iter/s
PATCH:  rint: ITERS:1e+09: TOTAL:5.29302s, MAX:37.826ns, MIN:3.922ns, 1.88928e+08 iter/s

Any tips, comments, advices?

---

2013-03-31  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc32/fpu/s_rint.S: Remove branch prediction
	instructions.
	* sysdeps/powerpc/powerpc32/fpu/s_rintf.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/s_rint.S: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/s_rintf.S: Likewise.
	* benchtests/Makefile: Add rint benchtest.
	* benchtests/rint-inputs: Input for rint benchtest.

--

diff --git a/benchtests/Makefile b/benchtests/Makefile
index 74938b9..cc54b81 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -43,7 +43,7 @@
 #   See pow-inputs for an example.
 
 subdir := benchtests
-bench := exp pow
+bench := exp pow rint
 
 exp-ITER = 100000
 exp-ARGLIST = double
@@ -55,5 +55,10 @@ pow-ARGLIST = double:double
 pow-RET = double
 LDFLAGS-bench-pow = -lm
 
+rint-ITER = 250000000
+rint-ARGLIST = double
+rint-RET = double
+LDFLAGS-bench-rint = -lm
+
 include ../Makeconfig
 include ../Rules
diff --git a/benchtests/rint-inputs b/benchtests/rint-inputs
new file mode 100644
index 0000000..a5f83dc
--- /dev/null
+++ b/benchtests/rint-inputs
@@ -0,0 +1,4 @@
+78.5
+-78.5
+4503599627370497.0
+-4503599627370497.0
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
index f3cd036..f04055f 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
@@ -45,14 +45,14 @@ ENTRY (__rint)
 	fsub	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
index 247dd4a..e0301af 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
@@ -41,14 +41,14 @@ ENTRY (__rintf)
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
index f333972..57e3759 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
@@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0)
 	fsub	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index 26b0872..cb28ec7 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0)
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
-	bnllr-	cr7
-	bng-	cr6,.L4
+	bnllr	cr7
+	bng	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fabs	fp1,fp1		/* if (x == 0.0)  */
 	blr			/* x = 0.0; */
 .L4:
-	bnllr-	cr6		/* if (x < 0.0)  */
+	bnllr	cr6		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fnabs	fp1,fp1		/* if (x == 0.0)  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]