This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFC: Remove duplicated codes from x86-64 libm.so


Hi,

X86-64 libm supports multi-arch for AVX and FMA4.  It compiles the same
source file 3 times: default, AVX and FMA4.  It turns out that for many
functions, AVX and FMA versions are the same since they don't benefit from
FMA4.  There are many duplicated codes.

Here is the first patch to address this.  It compiles __mul in mpa.c with
both AVX and FMA4 since it uses FMA4, but compiles other functions only
with AVX since they don't benefit from FMA4.  Before this patch:

gnu-tools-1:pts/1[51]> size math/libm.so 
   text	   data	    bss	    dec	    hex	filename
1027977	    876	     72	1028925	  fb33d	math/libm.so

After this patch:

gnu-tools-1:pts/3[29]> size math/libm.so 
   text	   data	    bss	    dec	    hex	filename
1024585	    876	     72	1025533	  fa5fd	math/libm.so

Any comments?

Thanks.


H.J.
---
 ChangeLog.pr14654                            | 32 ++++++++++++++++++++++++++++
 sysdeps/ieee754/dbl-64/mpa.c                 |  6 ++++++
 sysdeps/x86_64/fpu/multiarch/Makefile        |  7 +++---
 sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c  |  8 +++----
 sysdeps/x86_64/fpu/multiarch/e_log-fma4.c    |  6 +++---
 sysdeps/x86_64/fpu/multiarch/mpa-avx.c       |  2 +-
 sysdeps/x86_64/fpu/multiarch/mpa-fma4.c      | 12 -----------
 sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c   |  6 +++---
 sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c  |  4 ++--
 sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c    |  6 +++---
 sysdeps/x86_64/fpu/multiarch/mplog-fma4.c    |  4 ++--
 sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c   |  4 ++--
 sysdeps/x86_64/fpu/multiarch/mptan-fma4.c    |  2 +-
 sysdeps/x86_64/fpu/multiarch/mul-avx.c       |  8 +++++++
 sysdeps/x86_64/fpu/multiarch/mul-fma4.c      |  8 +++++++
 sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c   |  6 +++---
 sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c    |  4 ++--
 sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c |  6 +++---
 sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c  |  6 +++---
 sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c  |  6 +++---
 20 files changed, 93 insertions(+), 50 deletions(-)
 create mode 100644 ChangeLog.pr14654
 delete mode 100644 sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/mul-avx.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/mul-fma4.c

diff --git a/ChangeLog.pr14654 b/ChangeLog.pr14654
new file mode 100644
index 0000000..bb5e25f
--- /dev/null
+++ b/ChangeLog.pr14654
@@ -0,0 +1,32 @@
+2012-10-04  H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #14654]
+	* sysdeps/ieee754/dbl-64/mpa.c: Don't export __mul if NO___MUL
+	is defined.  Only export __mul if __mul is defined.
+	* sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines):
+	Replace mpa-fma4 with mul-fma4.  Add mul-avx.
+	(CFLAGS-mpa-fma4.c): Removed.
+	(CFLAGS-mul-fma4.c): New.
+	(CFLAGS-mul-avx.c): Likewise.
+	* sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c (__add): Defined
+	to __add_avx.
+	(__dbl_mp): Defined to __dbl_mp_avx.
+	(__dvd): Defined to __dvd_avx.
+	(__sub): Defined to __sub_avx.
+	* sysdeps/x86_64/fpu/multiarch/e_log-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/mplog-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/mptan-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: Likewise.
+	* sysdeps/x86_64/fpu/multiarch/mpa-avx.c (__mul): Removed.
+	(NO___MUL): New.
+	* sysdeps/x86_64/fpu/multiarch/mpa-fma4.c: Removed.
+	* sysdeps/x86_64/fpu/multiarch/mul-avx.c: New file.
+	* sysdeps/x86_64/fpu/multiarch/mul-fma4.c: Likewise.
diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index 7e0ee44..4a92ada 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -51,6 +51,7 @@
 # define SECTION
 #endif
 
+#ifndef __mul
 #ifndef NO___ACR
 /* mcr() compares the sizes of the mantissas of two multiple precision  */
 /* numbers. Mantissas are compared regardless of the signs of the       */
@@ -430,8 +431,10 @@ __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
     else                      Z[0] = ZERO;
   }
 }
+#endif
 
 
+#ifndef NO___MUL
 /* Multiply two multiple precision numbers. *z is set to *x * *y. x&y      */
 /* may overlap but not x&z or y&z. In case p=1,2,3 the exact result is     */
 /* truncated to p digits. In case p>3 the error is bounded by 1.001 ulp.   */
@@ -471,8 +474,10 @@ __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
 
   Z[0] = X[0] * Y[0];
 }
+#endif
 
 
+#ifndef __mul
 /* Invert a multiple precision number. Set *y = 1 / *x.                     */
 /* Relative error bound = 1.001*r**(1-p) for p=2, 1.063*r**(1-p) for p=3,   */
 /* 2.001*r**(1-p) for p>3.                                                  */
@@ -520,3 +525,4 @@ __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
   if (X[0] == ZERO)    Z[0] = ZERO;
   else                {__inv(y,&w,p);   __mul(x,&w,z,p);}
 }
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 12b0526..b3e5658 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -5,7 +5,7 @@ libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
 ifeq ($(have-mfma4),yes)
 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
 			e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
-			mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
+			mplog-fma4 mul-fma4 slowexp-fma4 slowpow-fma4 \
 			sincos32-fma4 doasin-fma4 dosincos-fma4 \
 			halfulp-fma4 mpexp-fma4 \
 			mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4
@@ -18,13 +18,13 @@ CFLAGS-e_exp-fma4.c = -mfma4
 CFLAGS-e_log-fma4.c = -mfma4
 CFLAGS-e_pow-fma4.c = -mfma4
 CFLAGS-halfulp-fma4.c = -mfma4
-CFLAGS-mpa-fma4.c = -mfma4
 CFLAGS-mpatan-fma4.c = -mfma4
 CFLAGS-mpatan2-fma4.c = -mfma4
 CFLAGS-mpexp-fma4.c = -mfma4
 CFLAGS-mplog-fma4.c = -mfma4
 CFLAGS-mpsqrt-fma4.c = -mfma4
 CFLAGS-mptan-fma4.c = -mfma4
+CFLAGS-mul-fma4.c = -mfma4
 CFLAGS-s_atan-fma4.c = -mfma4
 CFLAGS-sincos32-fma4.c = -mfma4
 CFLAGS-slowexp-fma4.c = -mfma4
@@ -37,7 +37,7 @@ ifeq ($(config-cflags-sse2avx),yes)
 libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
 			e_atan2-avx s_sin-avx s_tan-avx \
 			mplog-avx mpa-avx slowexp-avx \
-			mpexp-avx
+			mpexp-avx mul-avx
 
 CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
@@ -45,6 +45,7 @@ CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
+CFLAGS-mul-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c
index f4e9862..f8c8eb6 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c
@@ -1,10 +1,10 @@
 #define __ieee754_atan2 __ieee754_atan2_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __dvd __dvd_fma4
+#define __add __add_avx
+#define __dbl_mp __dbl_mp_avx
+#define __dvd __dvd_avx
 #define __mpatan2 __mpatan2_fma4
 #define __mul __mul_fma4
-#define __sub __sub_fma4
+#define __sub __sub_avx
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
index a2346cc..d4b1819 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
@@ -1,8 +1,8 @@
 #define __ieee754_log __ieee754_log_fma4
 #define __mplog __mplog_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __sub __sub_fma4
+#define __add __add_avx
+#define __dbl_mp __dbl_mp_avx
+#define __sub __sub_avx
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-avx.c b/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
index a92dbed..90d93e8 100644
--- a/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/mpa-avx.c
@@ -1,5 +1,4 @@
 #define __add __add_avx
-#define __mul __mul_avx
 #define __sub __sub_avx
 #define __dbl_mp __dbl_mp_avx
 #define __dvd __dvd_avx
@@ -7,6 +6,7 @@
 #define NO___CPY 1
 #define NO___MP_DBL 1
 #define NO___ACR 1
+#define NO___MUL 1
 #define SECTION __attribute__ ((section (".text.avx")))
 
 #include <sysdeps/ieee754/dbl-64/mpa.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
deleted file mode 100644
index f8ed8f3..0000000
--- a/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c
+++ /dev/null
@@ -1,12 +0,0 @@
-#define __add __add_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __dvd __dvd_fma4
-
-#define NO___CPY 1
-#define NO___MP_DBL 1
-#define NO___ACR 1
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/mpa.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c
index fbd3bd4..a665e68 100644
--- a/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c
@@ -1,9 +1,9 @@
 #define __mpatan __mpatan_fma4
-#define __add __add_fma4
-#define __dvd __dvd_fma4
+#define __add __add_avx
+#define __dvd __dvd_avx
 #define __mpsqrt __mpsqrt_fma4
 #define __mul __mul_fma4
-#define __sub __sub_fma4
+#define __sub __sub_avx
 #define AVOID_MPATAN_H 1
 #define SECTION __attribute__ ((section (".text.fma4")))
 
diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c
index e6e44d4..c41b887 100644
--- a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c
@@ -1,6 +1,6 @@
 #define __mpatan2 __mpatan2_fma4
-#define __add __add_fma4
-#define __dvd __dvd_fma4
+#define __add __add_avx
+#define __dvd __dvd_avx
 #define __mpatan __mpatan_fma4
 #define __mpsqrt __mpsqrt_fma4
 #define __mul __mul_fma4
diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
index 07ca6e9..5950aef 100644
--- a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
@@ -1,7 +1,7 @@
 #define __mpexp __mpexp_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __dvd __dvd_fma4
+#define __add __add_avx
+#define __dbl_mp __dbl_mp_avx
+#define __dvd __dvd_avx
 #define __mul __mul_fma4
 #define AVOID_MPEXP_H 1
 #define SECTION __attribute__ ((section (".text.fma4")))
diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
index b473311..dd1f46e 100644
--- a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
@@ -1,8 +1,8 @@
 #define __mplog __mplog_fma4
-#define __add __add_fma4
+#define __add __add_avx
 #define __mpexp __mpexp_fma4
 #define __mul __mul_fma4
-#define __sub __sub_fma4
+#define __sub __sub_avx
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/mplog.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c
index f8a1ba2..85996db 100644
--- a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c
@@ -1,7 +1,7 @@
 #define __mpsqrt __mpsqrt_fma4
-#define __dbl_mp __dbl_mp_fma4
+#define __dbl_mp __dbl_mp_avx
 #define __mul __mul_fma4
-#define __sub __sub_fma4
+#define __sub __sub_avx
 #define AVOID_MPSQRT_H 1
 #define SECTION __attribute__ ((section (".text.fma4")))
 
diff --git a/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c b/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c
index fb4a9d4..87e4dec 100644
--- a/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c
@@ -1,6 +1,6 @@
 #define __mptan __mptan_fma4
 #define __c32 __c32_fma4
-#define __dvd __dvd_fma4
+#define __dvd __dvd_avx
 #define __mpranred __mpranred_fma4
 #define SECTION __attribute__ ((section (".text.fma4")))
 
diff --git a/sysdeps/x86_64/fpu/multiarch/mul-avx.c b/sysdeps/x86_64/fpu/multiarch/mul-avx.c
new file mode 100644
index 0000000..b4f0477
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mul-avx.c
@@ -0,0 +1,8 @@
+#define __mul __mul_avx
+
+#define NO___CPY 1
+#define NO___MP_DBL 1
+#define NO___ACR 1
+#define SECTION __attribute__ ((section (".text.avx")))
+
+#include <sysdeps/ieee754/dbl-64/mpa.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mul-fma4.c b/sysdeps/x86_64/fpu/multiarch/mul-fma4.c
new file mode 100644
index 0000000..230b0e5
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mul-fma4.c
@@ -0,0 +1,8 @@
+#define __mul __mul_fma4
+
+#define NO___CPY 1
+#define NO___MP_DBL 1
+#define NO___ACR 1
+#define SECTION __attribute__ ((section (".text.fma4")))
+
+#include <sysdeps/ieee754/dbl-64/mpa.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
index 9e83e6c..2b3516a 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
@@ -1,9 +1,9 @@
 #define atan __atan_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
+#define __add __add_avx
+#define __dbl_mp __dbl_mp_avx
 #define __mpatan __mpatan_fma4
 #define __mul __mul_fma4
-#define __sub __sub_fma4
+#define __sub __sub_avx
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
index a805440..39556a8 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
@@ -1,8 +1,8 @@
 #define tan __tan_fma4
-#define __dbl_mp __dbl_mp_fma4
+#define __dbl_mp __dbl_mp_avx
 #define __mpranred __mpranred_fma4
 #define __mptan __mptan_fma4
-#define __sub __sub_fma4
+#define __sub __sub_avx
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c b/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c
index ebbfa18..14b9d09 100644
--- a/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c
@@ -6,10 +6,10 @@
 #define __mpcos __mpcos_fma4
 #define __mpcos1 __mpcos1_fma4
 #define __mpranred __mpranred_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
+#define __add __add_avx
+#define __dbl_mp __dbl_mp_avx
 #define __mul __mul_fma4
-#define __sub __sub_fma4
+#define __sub __sub_avx
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/sincos32.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
index 3bcde84..df90752 100644
--- a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
@@ -1,9 +1,9 @@
 #define __slowexp __slowexp_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
+#define __add __add_avx
+#define __dbl_mp __dbl_mp_avx
 #define __mpexp __mpexp_fma4
 #define __mul __mul_fma4
-#define __sub __sub_fma4
+#define __sub __sub_avx
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/slowexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
index 69d6982..162aaa5 100644
--- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
@@ -1,10 +1,10 @@
 #define __slowpow __slowpow_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
+#define __add __add_avx
+#define __dbl_mp __dbl_mp_avx
 #define __mpexp __mpexp_fma4
 #define __mplog __mplog_fma4
 #define __mul __mul_fma4
-#define __sub __sub_fma4
+#define __sub __sub_avx
 #define __halfulp __halfulp_fma4
 #define SECTION __attribute__ ((section (".text.fma4")))
 
-- 
1.7.11.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]