This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
[PATCH] Minor performance improvement for AMD64 assembly routines
- From: Nicholas Miell <nmiell at comcast dot net>
- To: libc-alpha at sources dot redhat dot com
- Date: Sun, 05 Dec 2004 20:56:31 -0800
- Subject: [PATCH] Minor performance improvement for AMD64 assembly routines
AMD x86-64 processors cannot predict single-byte near RET instructions
when they are either the target of a branch or immediately preceded by a
conditional branch. (See section 6.2 in the Athlon 64/Opteron
optimization guide)
AMD's recommended solution is to either reorder instructions so that
this situation does not occur, or use the REP prefix on the RET
instruction.
Recent versions of GCC will do this automatically, but glibc contains
many hand written assembly routines.
The attached patch changes all (that I found, anyway) relevant RET
instructions to REP RET.
--
Nicholas Miell <nmiell@comcast.net>
--- libc/sysdeps/x86_64/fpu/s_fmin.S.~1~ 2002-07-06 08:07:10.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_fmin.S 2004-12-05 19:39:11.189901937 -0800
@@ -31,6 +31,6 @@
jp 2f // then return xmm0
movsd %xmm1, %xmm0 // otherwise return xmm1
-2: ret
+2: rep ; ret
END(__fmin)
weak_alias (__fmin, fmin)
--- libc/sysdeps/x86_64/fpu/s_expm1l.S.~1~ 2002-09-09 18:19:47.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_expm1l.S 2004-12-05 19:36:22.586487310 -0800
@@ -78,7 +78,7 @@
jz 3f // If positive, jump.
fstp %st
fldl MO(minus1) // Set result to -1.0.
-3: ret
+3: rep ; ret
END(__expm1l)
libm_hidden_def (__expm1l)
weak_alias (__expm1l, expm1l)
--- libc/sysdeps/x86_64/fpu/e_exp2l.S.~1~ 2001-09-19 03:24:08.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/e_exp2l.S 2004-12-05 19:31:23.664212416 -0800
@@ -34,5 +34,5 @@
jz 2f /* If positive, jump. */
fstp %st
fldz /* Set result to 0. */
-2: ret
+2: rep ; ret
END (__ieee754_exp2l)
--- libc/sysdeps/x86_64/fpu/s_fmaxf.S.~1~ 2002-07-06 08:07:10.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_fmaxf.S 2004-12-05 19:37:42.437792442 -0800
@@ -31,6 +31,6 @@
jp 2f // then return xmm0
movss %xmm1, %xmm0 // otherwise return xmm1
-2: ret
+2: rep ; ret
END(__fmaxf)
weak_alias (__fmaxf, fmaxf)
--- libc/sysdeps/x86_64/fpu/s_cosl.S.~1~ 2001-09-19 03:24:08.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_cosl.S 2004-12-05 19:36:03.613166933 -0800
@@ -16,7 +16,7 @@
fnstsw %ax
testl $0x400,%eax
jnz 1f
- ret
+ rep ; ret
.align ALIGNARG(4)
1: fldpi
fadd %st(0)
--- libc/sysdeps/x86_64/fpu/s_fmax.S.~1~ 2002-07-06 08:07:10.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_fmax.S 2004-12-05 19:38:31.746630572 -0800
@@ -31,6 +31,6 @@
jp 2f // then return xmm0
movsd %xmm1, %xmm0 // otherwise return xmm1
-2: ret
+2: rep ; ret
END(__fmax)
weak_alias (__fmax, fmax)
--- libc/sysdeps/x86_64/fpu/s_fminf.S.~1~ 2002-07-06 08:07:10.000000000 -0700
+++ libc/sysdeps/x86_64/fpu/s_fminf.S 2004-12-05 19:38:44.226552423 -0800
@@ -31,6 +31,6 @@
jp 2f // then return xmm0
movss %xmm1, %xmm0 // otherwise return xmm1
-2: ret
+2: rep ; ret
END(__fminf)
weak_alias (__fminf, fminf)
--- libc/sysdeps/x86_64/strchr.S.~1~ 2003-04-29 15:47:18.000000000 -0700
+++ libc/sysdeps/x86_64/strchr.S 2004-12-05 19:49:02.562126710 -0800
@@ -283,8 +283,7 @@
incq %rax
6:
- nop
- retq
+ rep ; retq
END (BP_SYM (strchr))
weak_alias (BP_SYM (strchr), BP_SYM (index))