This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch master updated. glibc-2.26.9000-631-g5313581
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 22 Oct 2017 15:12:56 -0000
- Subject: GNU C Library master sources branch master updated. glibc-2.26.9000-631-g5313581
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 5313581cb52fd5d3d2cf222ddb6f8f86f090974f (commit)
from 6089a3ee24cede17e9443aef0aa72fa1a0ba1548 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5313581cb52fd5d3d2cf222ddb6f8f86f090974f
commit 5313581cb52fd5d3d2cf222ddb6f8f86f090974f
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Oct 22 08:11:15 2017 -0700
i386: Replace assembly versions of e_powf with generic e_powf.c
This patch replaces i386 assembly versions of e_powf with generic
e_powf.c. For workload-spec2017.wrf, on Nehalem, it improves
performance by:
Before After Improvement
reciprocal-throughput 230.855 78.3358 194%
latency 231.685 94.1259 146%
On Skylake, it improves performance by:
Before After Improvement
reciprocal-throughput 239.858 47.4713 405%
latency 247.57 93.8798 163%
On IvyBridge with --disable-multi-arch, it improves performance by:
Before After Improvement
reciprocal-throughput 269.078 63.3758 324%
latency 271.473 102.091 165%
* sysdeps/i386/fpu/e_powf.S: Removed.
* sysdeps/i386/fpu/e_powf_log2_data.c: Likewise.
* sysdeps/i386/fpu/w_powf.c: Likewise.
* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_powf.c.
* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
Add e_powf-sse2.
(CFLAGS-e_powf-sse2.c): New.
* sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c: New file.
* sysdeps/i386/i686/fpu/multiarch/e_powf.c: Likewise.
diff --git a/ChangeLog b/ChangeLog
index 78910c5..5d45da1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
+ * sysdeps/i386/fpu/e_powf.S: Removed.
+ * sysdeps/i386/fpu/e_powf_log2_data.c: Likewise.
+ * sysdeps/i386/fpu/w_powf.c: Likewise.
+ * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_powf.c.
+ * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
+ * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
+ Add e_powf-sse2.
+ (CFLAGS-e_powf-sse2.c): New.
+ * sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c: New file.
+ * sysdeps/i386/i686/fpu/multiarch/e_powf.c: Likewise.
+
+2017-10-22 H.J. Lu <hongjiu.lu@intel.com>
+
* sysdeps/i386/fpu/e_log2f.S: Removed.
* sysdeps/i386/fpu/e_log2f_data.c: Likewise.
* sysdeps/i386/fpu/w_log2f.c: Likewise.
diff --git a/sysdeps/i386/fpu/e_powf.S b/sysdeps/i386/fpu/e_powf.S
deleted file mode 100644
index 467ef23..0000000
--- a/sysdeps/i386/fpu/e_powf.S
+++ /dev/null
@@ -1,392 +0,0 @@
-/* ix87 specific implementation of pow function.
- Copyright (C) 1996-2017 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <machine/asm.h>
-#include <i386-math-asm.h>
-
- .section .rodata.cst8,"aM",@progbits,8
-
- .p2align 3
- .type one,@object
-one: .double 1.0
- ASM_SIZE_DIRECTIVE(one)
- .type limit,@object
-limit: .double 0.29
- ASM_SIZE_DIRECTIVE(limit)
- .type p31,@object
-p31: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x41
- ASM_SIZE_DIRECTIVE(p31)
-
- .section .rodata.cst16,"aM",@progbits,16
-
- .p2align 3
- .type infinity,@object
-inf_zero:
-infinity:
- .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f
- ASM_SIZE_DIRECTIVE(infinity)
- .type zero,@object
-zero: .double 0.0
- ASM_SIZE_DIRECTIVE(zero)
- .type minf_mzero,@object
-minf_mzero:
-minfinity:
- .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff
-mzero:
- .byte 0, 0, 0, 0, 0, 0, 0, 0x80
- ASM_SIZE_DIRECTIVE(minf_mzero)
-DEFINE_FLT_MIN
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%ecx)
-# define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
-#else
-# define MO(op) op
-# define MOX(op,x,f) op(,x,f)
-#endif
-
- .text
-ENTRY(__ieee754_powf)
- flds 8(%esp) // y
- fxam
-
-#ifdef PIC
- LOAD_PIC_REG (cx)
-#endif
-
- fnstsw
- movb %ah, %dl
- andb $0x45, %ah
- cmpb $0x40, %ah // is y == 0 ?
- je 11f
-
- cmpb $0x05, %ah // is y == ±inf ?
- je 12f
-
- cmpb $0x01, %ah // is y == NaN ?
- je 30f
-
- flds 4(%esp) // x : y
-
- subl $4, %esp
- cfi_adjust_cfa_offset (4)
-
- fxam
- fnstsw
- movb %ah, %dh
- andb $0x45, %ah
- cmpb $0x40, %ah
- je 20f // x is ±0
-
- cmpb $0x05, %ah
- je 15f // x is ±inf
-
- cmpb $0x01, %ah
- je 33f // x is NaN
-
- fxch // y : x
-
- /* fistpl raises invalid exception for |y| >= 1L<<31. */
- fld %st // y : y : x
- fabs // |y| : y : x
- fcompl MO(p31) // y : x
- fnstsw
- sahf
- jnc 2f
-
- /* First see whether `y' is a natural number. In this case we
- can use a more precise algorithm. */
- fld %st // y : y : x
- fistpl (%esp) // y : x
- fildl (%esp) // int(y) : y : x
- fucomp %st(1) // y : x
- fnstsw
- sahf
- jne 3f
-
- /* OK, we have an integer value for y. */
- popl %edx
- cfi_adjust_cfa_offset (-4)
- orl $0, %edx
- fstp %st(0) // x
- jns 4f // y >= 0, jump
- fdivrl MO(one) // 1/x (now referred to as x)
- negl %edx
-4: fldl MO(one) // 1 : x
- fxch
-
- /* If y is even, take the absolute value of x. Otherwise,
- ensure all intermediate values that might overflow have the
- sign of x. */
- testb $1, %dl
- jnz 6f
- fabs
-
-6: shrl $1, %edx
- jnc 5f
- fxch
- fabs
- fmul %st(1) // x : ST*x
- fxch
-5: fld %st // x : x : ST*x
- fabs // |x| : x : ST*x
- fmulp // |x|*x : ST*x
- testl %edx, %edx
- jnz 6b
- fstp %st(0) // ST*x
- FLT_NARROW_EVAL_UFLOW_NONNAN
- ret
-
- /* y is ±NAN */
-30: flds 4(%esp) // x : y
- fldl MO(one) // 1.0 : x : y
- fucomp %st(1) // x : y
- fnstsw
- sahf
- je 31f
- fxch // y : x
-31: fstp %st(1)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
-2: /* y is a large integer (so even). */
- fxch // x : y
- fabs // |x| : y
- fxch // y : x
- .align ALIGNARG(4)
-3: /* y is a real number. */
- fxch // x : y
- fldl MO(one) // 1.0 : x : y
- fldl MO(limit) // 0.29 : 1.0 : x : y
- fld %st(2) // x : 0.29 : 1.0 : x : y
- fsub %st(2) // x-1 : 0.29 : 1.0 : x : y
- fabs // |x-1| : 0.29 : 1.0 : x : y
- fucompp // 1.0 : x : y
- fnstsw
- fxch // x : 1.0 : y
- sahf
- ja 7f
- fsub %st(1) // x-1 : 1.0 : y
- fyl2xp1 // log2(x) : y
- jmp 8f
-
-7: fyl2x // log2(x) : y
-8: fmul %st(1) // y*log2(x) : y
- fst %st(1) // y*log2(x) : y*log2(x)
- frndint // int(y*log2(x)) : y*log2(x)
- fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
- fxch // fract(y*log2(x)) : int(y*log2(x))
- f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x))
- faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x))
- fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
-32: addl $4, %esp
- cfi_adjust_cfa_offset (-4)
- fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
- FLT_NARROW_EVAL_UFLOW_NONNAN
- ret
-
- /* x is NaN. */
- cfi_adjust_cfa_offset (4)
-33: addl $4, %esp
- cfi_adjust_cfa_offset (-4)
- fstp %st(1)
- ret
-
- // pow(x,±0) = 1
- .align ALIGNARG(4)
-11: fstp %st(0) // pop y
- fldl MO(one)
- ret
-
- // y == ±inf
- .align ALIGNARG(4)
-12: fstp %st(0) // pop y
- fldl MO(one) // 1
- flds 4(%esp) // x : 1
- fabs // abs(x) : 1
- fucompp // < 1, == 1, or > 1
- fnstsw
- andb $0x45, %ah
- cmpb $0x45, %ah
- je 13f // jump if x is NaN
-
- cmpb $0x40, %ah
- je 14f // jump if |x| == 1
-
- shlb $1, %ah
- xorb %ah, %dl
- andl $2, %edx
- fldl MOX(inf_zero, %edx, 4)
- ret
-
- .align ALIGNARG(4)
-14: fldl MO(one)
- ret
-
- .align ALIGNARG(4)
-13: flds 4(%esp) // load x == NaN
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
- // x is ±inf
-15: fstp %st(0) // y
- testb $2, %dh
- jz 16f // jump if x == +inf
-
- // fistpl raises invalid exception for |y| >= 1L<<31, so test
- // that (in which case y is certainly even) before testing
- // whether y is odd.
- fld %st // y : y
- fabs // |y| : y
- fcompl MO(p31) // y
- fnstsw
- sahf
- jnc 16f
-
- // We must find out whether y is an odd integer.
- fld %st // y : y
- fistpl (%esp) // y
- fildl (%esp) // int(y) : y
- fucompp // <empty>
- fnstsw
- sahf
- jne 17f
-
- // OK, the value is an integer.
- popl %edx
- cfi_adjust_cfa_offset (-4)
- testb $1, %dl
- jz 18f // jump if not odd
- // It's an odd integer.
- shrl $31, %edx
- fldl MOX(minf_mzero, %edx, 8)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
-16: fcompl MO(zero)
- addl $4, %esp
- cfi_adjust_cfa_offset (-4)
- fnstsw
- shrl $5, %eax
- andl $8, %eax
- fldl MOX(inf_zero, %eax, 1)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
-17: shll $30, %edx // sign bit for y in right position
- addl $4, %esp
- cfi_adjust_cfa_offset (-4)
-18: shrl $31, %edx
- fldl MOX(inf_zero, %edx, 8)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
- // x is ±0
-20: fstp %st(0) // y
- testb $2, %dl
- jz 21f // y > 0
-
- // x is ±0 and y is < 0. We must find out whether y is an odd integer.
- testb $2, %dh
- jz 25f
-
- // fistpl raises invalid exception for |y| >= 1L<<31, so test
- // that (in which case y is certainly even) before testing
- // whether y is odd.
- fld %st // y : y
- fabs // |y| : y
- fcompl MO(p31) // y
- fnstsw
- sahf
- jnc 25f
-
- fld %st // y : y
- fistpl (%esp) // y
- fildl (%esp) // int(y) : y
- fucompp // <empty>
- fnstsw
- sahf
- jne 26f
-
- // OK, the value is an integer.
- popl %edx
- cfi_adjust_cfa_offset (-4)
- testb $1, %dl
- jz 27f // jump if not odd
- // It's an odd integer.
- // Raise divide-by-zero exception and get minus infinity value.
- fldl MO(one)
- fdivl MO(zero)
- fchs
- ret
-
- cfi_adjust_cfa_offset (4)
-25: fstp %st(0)
-26: addl $4, %esp
- cfi_adjust_cfa_offset (-4)
-27: // Raise divide-by-zero exception and get infinity value.
- fldl MO(one)
- fdivl MO(zero)
- ret
-
- cfi_adjust_cfa_offset (4)
- .align ALIGNARG(4)
- // x is ±0 and y is > 0. We must find out whether y is an odd integer.
-21: testb $2, %dh
- jz 22f
-
- // fistpl raises invalid exception for |y| >= 1L<<31, so test
- // that (in which case y is certainly even) before testing
- // whether y is odd.
- fcoml MO(p31) // y
- fnstsw
- sahf
- jnc 22f
-
- fld %st // y : y
- fistpl (%esp) // y
- fildl (%esp) // int(y) : y
- fucompp // <empty>
- fnstsw
- sahf
- jne 23f
-
- // OK, the value is an integer.
- popl %edx
- cfi_adjust_cfa_offset (-4)
- testb $1, %dl
- jz 24f // jump if not odd
- // It's an odd integer.
- fldl MO(mzero)
- ret
-
- cfi_adjust_cfa_offset (4)
-22: fstp %st(0)
-23: addl $4, %esp // Don't use pop.
- cfi_adjust_cfa_offset (-4)
-24: fldl MO(zero)
- ret
-
-END(__ieee754_powf)
-strong_alias (__ieee754_powf, __powf_finite)
diff --git a/sysdeps/i386/fpu/e_powf_log2_data.c b/sysdeps/i386/fpu/e_powf_log2_data.c
deleted file mode 100644
index 1cc8931..0000000
--- a/sysdeps/i386/fpu/e_powf_log2_data.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed. */
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
index 64cac56..3ab3fd8 100644
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@@ -2370,24 +2370,30 @@ ldouble: 1
Function: "pow_downward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_towardzero":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_upward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
diff --git a/sysdeps/i386/fpu/w_powf.c b/sysdeps/i386/fpu/w_powf.c
deleted file mode 100644
index d133216..0000000
--- a/sysdeps/i386/fpu/w_powf.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/../math/w_powf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/Makefile b/sysdeps/i386/i686/fpu/multiarch/Makefile
index eee3b8b..c0fa976 100644
--- a/sysdeps/i386/i686/fpu/multiarch/Makefile
+++ b/sysdeps/i386/i686/fpu/multiarch/Makefile
@@ -1,9 +1,10 @@
ifeq ($(subdir),math)
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 e_log2f-sse2 \
- s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
+ e_powf-sse2 s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_log2f-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
+CFLAGS-e_powf-sse2.c = -msse2 -mfpmath=sse
endif
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c b/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c
new file mode 100644
index 0000000..c56f6ee
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c
@@ -0,0 +1,3 @@
+#define __powf __powf_sse2
+
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_powf.c b/sysdeps/i386/i686/fpu/multiarch/e_powf.c
new file mode 100644
index 0000000..4dc4c87
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/multiarch/e_powf.c
@@ -0,0 +1,43 @@
+/* Multiple versions of powf.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define powf __redirect_powf
+#define __DECL_SIMD___redirect_powf
+#include <math.h>
+#undef powf
+
+#define SYMBOL_NAME powf
+#include "ifunc-sse2.h"
+
+libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__powf_ia32, __GI___powf, __redirect_powf)
+ __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __powf, powf, GLIBC_2_27);
+#else
+weak_alias (__powf, powf)
+#endif
+
+strong_alias (__powf, __ieee754_powf)
+strong_alias (__powf, __powf_finite)
+
+#define __powf __powf_ia32
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
index b5d74df..26d90ec 100644
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
@@ -2370,24 +2370,30 @@ ldouble: 1
Function: "pow_downward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_towardzero":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_upward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
@@ -2577,30 +2583,30 @@ ldouble: 5
Function: "tgamma_downward":
double: 3
-float: 4
+float: 5
float128: 5
idouble: 3
-ifloat: 4
+ifloat: 5
ifloat128: 5
ildouble: 5
ldouble: 5
Function: "tgamma_towardzero":
double: 4
-float: 4
+float: 5
float128: 5
idouble: 4
-ifloat: 4
+ifloat: 5
ifloat128: 5
ildouble: 5
ldouble: 5
Function: "tgamma_upward":
double: 4
-float: 4
+float: 6
float128: 4
idouble: 4
-ifloat: 4
+ifloat: 6
ifloat128: 4
ildouble: 5
ldouble: 5
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 13 +
sysdeps/i386/fpu/e_powf.S | 392 --------------------
sysdeps/i386/fpu/e_powf_log2_data.c | 1 -
sysdeps/i386/fpu/libm-test-ulps | 6 +
sysdeps/i386/fpu/w_powf.c | 1 -
sysdeps/i386/i686/fpu/multiarch/Makefile | 3 +-
.../i686/fpu/multiarch/e_powf-sse2.c} | 2 +-
.../{x86_64 => i386/i686}/fpu/multiarch/e_powf.c | 6 +-
sysdeps/i386/i686/fpu/multiarch/libm-test-ulps | 18 +-
9 files changed, 37 insertions(+), 405 deletions(-)
delete mode 100644 sysdeps/i386/fpu/e_powf.S
delete mode 100644 sysdeps/i386/fpu/e_powf_log2_data.c
delete mode 100644 sysdeps/i386/fpu/w_powf.c
copy sysdeps/{x86_64/fpu/multiarch/e_powf-fma.c => i386/i686/fpu/multiarch/e_powf-sse2.c} (61%)
copy sysdeps/{x86_64 => i386/i686}/fpu/multiarch/e_powf.c (92%)
hooks/post-receive
--
GNU C Library master sources