This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch master updated. glibc-2.21-453-g2a52321


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  2a523216d5dc973d8bf91a00f00b70b7df42b91d (commit)
       via  04f496d6025753058bdd071fd711e9f56df149a7 (commit)
      from  24a2718f595bc11dc6abb31303ceb8fdcb664f2f (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2a523216d5dc973d8bf91a00f00b70b7df42b91d

commit 2a523216d5dc973d8bf91a00f00b70b7df42b91d
Author: Andrew Senkevich <andrew.senkevich@intel.com>
Date:   Tue Jun 9 18:32:42 2015 +0300

    This patch adds vector cosf tests.
    
        * math/Makefile: Added CFLAGS for new tests.
        * math/test-float-vlen16.h: New file.
        * math/test-float-vlen4.h: New file.
        * math/test-float-vlen8.h: New file.
        * math/test-double-vlen2.h: Fixed 2 argument macro and comment.
        * sysdeps/x86_64/fpu/Makefile: Added new tests and variables.
        * sysdeps/x86_64/fpu/libm-test-ulps: Regenerated.
        * sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c: New file.
        * sysdeps/x86_64/fpu/test-float-vlen16.c: New file.
        * sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c: New file.
        * sysdeps/x86_64/fpu/test-float-vlen4.c: New file.
        * sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c: New file.
        * sysdeps/x86_64/fpu/test-float-vlen8-avx2.c: New file.
        * sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c: New file.
        * sysdeps/x86_64/fpu/test-float-vlen8.c: New file.

diff --git a/ChangeLog b/ChangeLog
index c3e52b2..353b383 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -68,6 +68,22 @@
 	* sysdeps/x86/fpu/bits/math-vector.h: Added SIMD declaration for cosf.
 	* NEWS: Mention addition of x86_64 vector cosf.
 
+	* math/Makefile: Added CFLAGS for new tests.
+	* math/test-float-vlen16.h: New file.
+	* math/test-float-vlen4.h: New file.
+	* math/test-float-vlen8.h: New file.
+	* math/test-double-vlen2.h: Fixed 2 argument macro and comment.
+	* sysdeps/x86_64/fpu/Makefile: Added new tests and variables.
+	* sysdeps/x86_64/fpu/libm-test-ulps: Regenerated.
+	* sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c: New file.
+	* sysdeps/x86_64/fpu/test-float-vlen16.c: New file.
+	* sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c: New file.
+	* sysdeps/x86_64/fpu/test-float-vlen4.c: New file.
+	* sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c: New file.
+	* sysdeps/x86_64/fpu/test-float-vlen8-avx2.c: New file.
+	* sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c: New file.
+	* sysdeps/x86_64/fpu/test-float-vlen8.c: New file.
+
 2015-06-09 Marko Myllynen  <myllynen@redhat.com>
 
 	* locale/C-ctype.c (PREDEFINED_CLASSES): Remove.
diff --git a/math/Makefile b/math/Makefile
index 8f14f25..7f6b85e 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -160,6 +160,15 @@ CFLAGS-test-double-vlen4-wrappers.c = $(double-vlen4-arch-ext-cflags)
 CFLAGS-test-double-vlen8.c = $(libm-test-vec-cflags)
 CFLAGS-test-double-vlen8-wrappers.c = $(double-vlen8-arch-ext-cflags)
 
+CFLAGS-test-float-vlen4.c = $(libm-test-vec-cflags)
+CFLAGS-test-float-vlen4-wrappers.c = $(float-vlen4-arch-ext-cflags)
+
+CFLAGS-test-float-vlen8.c = $(libm-test-vec-cflags)
+CFLAGS-test-float-vlen8-wrappers.c = $(float-vlen8-arch-ext-cflags)
+
+CFLAGS-test-float-vlen16.c = $(libm-test-vec-cflags)
+CFLAGS-test-float-vlen16-wrappers.c = $(float-vlen16-arch-ext-cflags)
+
 CFLAGS-test-float.c = -fno-inline -ffloat-store -fno-builtin
 CFLAGS-test-double.c = -fno-inline -ffloat-store -fno-builtin
 CFLAGS-test-ldouble.c = -fno-inline -ffloat-store -fno-builtin
diff --git a/math/test-double-vlen2.h b/math/test-double-vlen2.h
index 37d7060..2e8415b 100644
--- a/math/test-double-vlen2.h
+++ b/math/test-double-vlen2.h
@@ -45,7 +45,7 @@
 #define WRAPPER_DECL(function) extern FLOAT function (FLOAT);
 #define WRAPPER_DECL_ff(function) extern FLOAT function (FLOAT, FLOAT);
 
-// Wrapper from scalar to vector function with vector length 8.
+// Wrapper from scalar to vector function with vector length 2.
 #define VECTOR_WRAPPER(scalar_func, vector_func) \
 extern VEC_TYPE vector_func (VEC_TYPE);		\
 FLOAT scalar_func (FLOAT x)			\
@@ -63,7 +63,7 @@ extern VEC_TYPE vector_func (VEC_TYPE, VEC_TYPE);	\
 FLOAT scalar_func (FLOAT x, FLOAT y)		\
 {						\
   int i;					\
-  VEC_TYPE mx;					\
+  VEC_TYPE mx, my;				\
   INIT_VEC_LOOP (mx, x, 2);			\
   INIT_VEC_LOOP (my, y, 2);			\
   VEC_TYPE mr = vector_func (mx, my);		\
diff --git a/math/test-double-vlen2.h b/math/test-float-vlen16.h
similarity index 63%
copy from math/test-double-vlen2.h
copy to math/test-float-vlen16.h
index 37d7060..5c0a7a4 100644
--- a/math/test-double-vlen2.h
+++ b/math/test-float-vlen16.h
@@ -1,4 +1,4 @@
-/* Definitions for double vector tests with vector length 2.
+/* Definitions for float vector tests with vector length 16.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,56 +16,56 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#define FLOAT double
-#define FUNC(function) function
-#define TEST_MSG "testing double vector math (without inline functions)\n"
+#define FLOAT float
+#define FUNC(function) function ## f
+#define TEST_MSG "testing float vector math (without inline functions)\n"
 #define MATHCONST(x) x
-#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cdouble
+#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cfloat
 #define PRINTF_EXPR "e"
 #define PRINTF_XEXPR "a"
 #define PRINTF_NEXPR "f"
-#define TEST_DOUBLE 1
+#define TEST_FLOAT 1
 #define TEST_MATHVEC 1
 
 #ifndef __NO_MATH_INLINES
 # define __NO_MATH_INLINES
 #endif
 
-#define EXCEPTION_TESTS_double 0
-#define ROUNDING_TESTS_double(MODE) ((MODE) == FE_TONEAREST)
+#define EXCEPTION_TESTS_float 0
+#define ROUNDING_TESTS_float(MODE) ((MODE) == FE_TONEAREST)
 
 #define CNCT(x, y) x ## y
 #define CONCAT(a, b) CNCT (a, b)
 
-#define VEC_SUFF _vlen2
+#define VEC_SUFF _vlen16
 #define WRAPPER_NAME(function) CONCAT (function, VEC_SUFF)
 
-#define FUNC_TEST(function) function ## _VEC_SUFF
+#define FUNC_TEST(function) function ## f ## _VEC_SUFF
 
-#define WRAPPER_DECL(function) extern FLOAT function (FLOAT);
-#define WRAPPER_DECL_ff(function) extern FLOAT function (FLOAT, FLOAT);
+#define WRAPPER_DECL(func) extern FLOAT func (FLOAT x);
+#define WRAPPER_DECL_ff(func) extern FLOAT func (FLOAT x, FLOAT y);
 
-// Wrapper from scalar to vector function with vector length 8.
+// Wrapper from scalar to vector function with vector length 16.
 #define VECTOR_WRAPPER(scalar_func, vector_func) \
 extern VEC_TYPE vector_func (VEC_TYPE);		\
 FLOAT scalar_func (FLOAT x)			\
 {						\
   int i;					\
   VEC_TYPE mx;					\
-  INIT_VEC_LOOP (mx, x, 2);			\
+  INIT_VEC_LOOP (mx, x, 16);			\
   VEC_TYPE mr = vector_func (mx);		\
-  TEST_VEC_LOOP (2);				\
+  TEST_VEC_LOOP (16);				\
 }
 
 // Wrapper from scalar 2 argument function to vector one.
-#define VECTOR_WRAPPER_ff(scalar_func, vector_func) 	\
-extern VEC_TYPE vector_func (VEC_TYPE, VEC_TYPE);	\
+#define VECTOR_WRAPPER_ff(scalar_func, vector_func) \
+extern VEC_TYPE vector_func (VEC_TYPE);		\
 FLOAT scalar_func (FLOAT x, FLOAT y)		\
 {						\
   int i;					\
-  VEC_TYPE mx;					\
-  INIT_VEC_LOOP (mx, x, 2);			\
-  INIT_VEC_LOOP (my, y, 2);			\
+  VEC_TYPE mx, my;				\
+  INIT_VEC_LOOP (mx, x, 16);			\
+  INIT_VEC_LOOP (my, y, 16);			\
   VEC_TYPE mr = vector_func (mx, my);		\
-  TEST_VEC_LOOP (2);				\
+  TEST_VEC_LOOP (16);				\
 }
diff --git a/math/test-double-vlen2.h b/math/test-float-vlen4.h
similarity index 68%
copy from math/test-double-vlen2.h
copy to math/test-float-vlen4.h
index 37d7060..09485bc 100644
--- a/math/test-double-vlen2.h
+++ b/math/test-float-vlen4.h
@@ -1,4 +1,4 @@
-/* Definitions for double vector tests with vector length 2.
+/* Definitions for float vector tests with vector length 4.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,56 +16,56 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#define FLOAT double
-#define FUNC(function) function
-#define TEST_MSG "testing double vector math (without inline functions)\n"
+#define FLOAT float
+#define FUNC(function) function ## f
+#define TEST_MSG "testing float vector math (without inline functions)\n"
 #define MATHCONST(x) x
-#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cdouble
+#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cfloat
 #define PRINTF_EXPR "e"
 #define PRINTF_XEXPR "a"
 #define PRINTF_NEXPR "f"
-#define TEST_DOUBLE 1
+#define TEST_FLOAT 1
 #define TEST_MATHVEC 1
 
 #ifndef __NO_MATH_INLINES
 # define __NO_MATH_INLINES
 #endif
 
-#define EXCEPTION_TESTS_double 0
-#define ROUNDING_TESTS_double(MODE) ((MODE) == FE_TONEAREST)
+#define EXCEPTION_TESTS_float 0
+#define ROUNDING_TESTS_float(MODE) ((MODE) == FE_TONEAREST)
 
 #define CNCT(x, y) x ## y
 #define CONCAT(a, b) CNCT (a, b)
 
-#define VEC_SUFF _vlen2
+#define VEC_SUFF _vlen4
 #define WRAPPER_NAME(function) CONCAT (function, VEC_SUFF)
 
-#define FUNC_TEST(function) function ## _VEC_SUFF
+#define FUNC_TEST(function) function ## f ## _VEC_SUFF
 
 #define WRAPPER_DECL(function) extern FLOAT function (FLOAT);
 #define WRAPPER_DECL_ff(function) extern FLOAT function (FLOAT, FLOAT);
 
-// Wrapper from scalar to vector function with vector length 8.
+// Wrapper from scalar to vector function with vector length 4.
 #define VECTOR_WRAPPER(scalar_func, vector_func) \
 extern VEC_TYPE vector_func (VEC_TYPE);		\
 FLOAT scalar_func (FLOAT x)			\
 {						\
   int i;					\
   VEC_TYPE mx;					\
-  INIT_VEC_LOOP (mx, x, 2);			\
+  INIT_VEC_LOOP (mx, x, 4);			\
   VEC_TYPE mr = vector_func (mx);		\
-  TEST_VEC_LOOP (2);				\
+  TEST_VEC_LOOP (4);				\
 }
 
 // Wrapper from scalar 2 argument function to vector one.
-#define VECTOR_WRAPPER_ff(scalar_func, vector_func) 	\
-extern VEC_TYPE vector_func (VEC_TYPE, VEC_TYPE);	\
+#define VECTOR_WRAPPER_ff(scalar_func, vector_func) \
+extern VEC_TYPE vector_func (VEC_TYPE);		\
 FLOAT scalar_func (FLOAT x, FLOAT y)		\
 {						\
   int i;					\
-  VEC_TYPE mx;					\
-  INIT_VEC_LOOP (mx, x, 2);			\
-  INIT_VEC_LOOP (my, y, 2);			\
+  VEC_TYPE mx, my;				\
+  INIT_VEC_LOOP (mx, x, 4);			\
+  INIT_VEC_LOOP (my, y, 4);			\
   VEC_TYPE mr = vector_func (mx, my);		\
-  TEST_VEC_LOOP (2);				\
+  TEST_VEC_LOOP (4);				\
 }
diff --git a/math/test-double-vlen2.h b/math/test-float-vlen8.h
similarity index 71%
copy from math/test-double-vlen2.h
copy to math/test-float-vlen8.h
index 37d7060..d309931 100644
--- a/math/test-double-vlen2.h
+++ b/math/test-float-vlen8.h
@@ -1,4 +1,4 @@
-/* Definitions for double vector tests with vector length 2.
+/* Definitions for float vector tests with vector length 8.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,31 +16,31 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#define FLOAT double
-#define FUNC(function) function
-#define TEST_MSG "testing double vector math (without inline functions)\n"
+#define FLOAT float
+#define FUNC(function) function ## f
+#define TEST_MSG "testing float vector math (without inline functions)\n"
 #define MATHCONST(x) x
-#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cdouble
+#define CHOOSE(Clongdouble,Cdouble,Cfloat,Cinlinelongdouble,Cinlinedouble,Cinlinefloat) Cfloat
 #define PRINTF_EXPR "e"
 #define PRINTF_XEXPR "a"
 #define PRINTF_NEXPR "f"
-#define TEST_DOUBLE 1
+#define TEST_FLOAT 1
 #define TEST_MATHVEC 1
 
 #ifndef __NO_MATH_INLINES
 # define __NO_MATH_INLINES
 #endif
 
-#define EXCEPTION_TESTS_double 0
-#define ROUNDING_TESTS_double(MODE) ((MODE) == FE_TONEAREST)
+#define EXCEPTION_TESTS_float 0
+#define ROUNDING_TESTS_float(MODE) ((MODE) == FE_TONEAREST)
 
 #define CNCT(x, y) x ## y
 #define CONCAT(a, b) CNCT (a, b)
 
-#define VEC_SUFF _vlen2
+#define VEC_SUFF _vlen8
 #define WRAPPER_NAME(function) CONCAT (function, VEC_SUFF)
 
-#define FUNC_TEST(function) function ## _VEC_SUFF
+#define FUNC_TEST(function) function ## f ## _VEC_SUFF
 
 #define WRAPPER_DECL(function) extern FLOAT function (FLOAT);
 #define WRAPPER_DECL_ff(function) extern FLOAT function (FLOAT, FLOAT);
@@ -52,20 +52,20 @@ FLOAT scalar_func (FLOAT x)			\
 {						\
   int i;					\
   VEC_TYPE mx;					\
-  INIT_VEC_LOOP (mx, x, 2);			\
+  INIT_VEC_LOOP (mx, x, 8);			\
   VEC_TYPE mr = vector_func (mx);		\
-  TEST_VEC_LOOP (2);				\
+  TEST_VEC_LOOP (8);				\
 }
 
 // Wrapper from scalar 2 argument function to vector one.
-#define VECTOR_WRAPPER_ff(scalar_func, vector_func) 	\
-extern VEC_TYPE vector_func (VEC_TYPE, VEC_TYPE);	\
+#define VECTOR_WRAPPER_ff(scalar_func, vector_func) \
+extern VEC_TYPE vector_func (VEC_TYPE);		\
 FLOAT scalar_func (FLOAT x, FLOAT y)		\
 {						\
   int i;					\
-  VEC_TYPE mx;					\
-  INIT_VEC_LOOP (mx, x, 2);			\
-  INIT_VEC_LOOP (my, y, 2);			\
+  VEC_TYPE mx, my;				\
+  INIT_VEC_LOOP (mx, x, 8);			\
+  INIT_VEC_LOOP (my, y, 8);			\
   VEC_TYPE mr = vector_func (mx, my);		\
-  TEST_VEC_LOOP (2);				\
+  TEST_VEC_LOOP (8);				\
 }
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index af0d2af..454cfba 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -9,10 +9,11 @@ endif
 # Variables for libmvec tests.
 ifeq ($(subdir),math)
 ifeq ($(build-mathvec),yes)
-libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2
+libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \
+		 float-vlen4 float-vlen8 float-vlen8-avx2
 
 ifeq (yes,$(config-cflags-avx512))
-libmvec-tests += double-vlen8
+libmvec-tests += double-vlen8 float-vlen16
 endif
 
 double-vlen2-arch-ext-cflags = -msse4
@@ -20,8 +21,16 @@ double-vlen4-arch-ext-cflags = -mavx
 double-vlen4-arch-ext2-cflags = -mavx2
 double-vlen8-arch-ext-cflags = -mavx512f
 
+float-vlen4-arch-ext-cflags = -msse4
+float-vlen8-arch-ext-cflags = -mavx
+float-vlen8-arch-ext2-cflags = -mavx2
+float-vlen16-arch-ext-cflags = -mavx512f
+
 CFLAGS-test-double-vlen4-avx2.c = $(libm-test-vec-cflags)
 CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags)
 
+CFLAGS-test-float-vlen8-avx2.c = $(libm-test-vec-cflags)
+CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags)
+
 endif
 endif
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 0d1f6e8..ed152d8 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -965,17 +965,25 @@ idouble: 1
 ildouble: 2
 ldouble: 2
 
+Function: "cos_vlen16":
+float: 1
+
 Function: "cos_vlen2":
 double: 1
 
 Function: "cos_vlen4":
 double: 1
+float: 1
 
 Function: "cos_vlen4_avx2":
 double: 1
 
 Function: "cos_vlen8":
 double: 1
+float: 1
+
+Function: "cos_vlen8_avx2":
+float: 1
 
 Function: "cosh":
 double: 1
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
new file mode 100644
index 0000000..2bb155f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
@@ -0,0 +1,25 @@
+/* Wrapper part of tests for AVX-512 ISA versions of vector math functions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "test-float-vlen16.h"
+#include "test-vec-loop.h"
+#include <immintrin.h>
+
+#define VEC_TYPE __m512
+
+VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVeN16v_cosf)
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16.c b/sysdeps/x86_64/fpu/test-float-vlen16.c
new file mode 100644
index 0000000..a664ad9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-vlen16.c
@@ -0,0 +1,25 @@
+/* Tests for AVX-512 ISA versions of vector math functions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "test-float-vlen16.h"
+
+#define TEST_VECTOR_cosf 1
+
+#define REQUIRE_AVX512F
+
+#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
new file mode 100644
index 0000000..05d6a40
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
@@ -0,0 +1,25 @@
+/* Wrapper part of tests for SSE ISA versions of vector math functions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "test-float-vlen4.h"
+#include "test-vec-loop.h"
+#include <immintrin.h>
+
+#define VEC_TYPE __m128
+
+VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf)
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4.c b/sysdeps/x86_64/fpu/test-float-vlen4.c
new file mode 100644
index 0000000..8946520
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-vlen4.c
@@ -0,0 +1,23 @@
+/* Tests for SSE ISA versions of vector math functions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "test-float-vlen4.h"
+
+#define TEST_VECTOR_cosf 1
+
+#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
new file mode 100644
index 0000000..cff9941
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
@@ -0,0 +1,28 @@
+/* Wrapper part of tests for AVX2 ISA versions of vector math functions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "test-float-vlen8.h"
+#include "test-vec-loop.h"
+#include <immintrin.h>
+
+#undef VEC_SUFF
+#define VEC_SUFF _vlen8_avx2
+
+#define VEC_TYPE __m256
+
+VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVdN8v_cosf)
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c
new file mode 100644
index 0000000..f0ee6f2
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c
@@ -0,0 +1,28 @@
+/* Tests for AVX2 ISA versions of vector math functions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "test-float-vlen8.h"
+
+#undef VEC_SUFF
+#define VEC_SUFF _vlen8_avx2
+
+#define TEST_VECTOR_cosf 1
+
+#define REQUIRE_AVX2
+
+#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
new file mode 100644
index 0000000..c2305a3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
@@ -0,0 +1,25 @@
+/* Wrapper part of tests for AVX ISA versions of vector math functions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "test-float-vlen8.h"
+#include "test-vec-loop.h"
+#include <immintrin.h>
+
+#define VEC_TYPE __m256
+
+VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVcN8v_cosf)
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8.c b/sysdeps/x86_64/fpu/test-float-vlen8.c
new file mode 100644
index 0000000..b96dec6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-vlen8.c
@@ -0,0 +1,23 @@
+/* Tests for AVX ISA versions of vector math functions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "test-float-vlen8.h"
+
+#define TEST_VECTOR_cosf 1
+
+#include "libm-test.c"

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=04f496d6025753058bdd071fd711e9f56df149a7

commit 04f496d6025753058bdd071fd711e9f56df149a7
Author: Andrew Senkevich <andrew.senkevich@intel.com>
Date:   Tue Jun 9 18:29:47 2015 +0300

    Vector cosf for x86_64.
    
    Here is implementation of vectorized cosf containing SSE, AVX,
    AVX2 and AVX512 versions according to Vector ABI
    <https://groups.google.com/forum/#!topic/x86-64-abi/LmppCfN1rZ4>.
    
        * sysdeps/x86_64/fpu/Makefile (libmvec-support): Added new files.
        * sysdeps/x86_64/fpu/Versions: New versions added.
        * sysdeps/x86_64/fpu/svml_s_cosf4_core.S: New file.
        * sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S: New file.
        * sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S: New file.
        * sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S: New file.
        * sysdeps/x86_64/fpu/svml_s_cosf8_core.S: New file.
        * sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S: New file.
        * sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S: New file.
        * sysdeps/x86_64/fpu/svml_s_cosf16_core.S: New file.
        * sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S: New file.
        * sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S: New file.
        * sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: New file.
        * sysdeps/x86_64/fpu/svml_s_cosf_data.S: New file.
        * sysdeps/x86_64/fpu/svml_s_cosf_data.h: New file.
        * sysdeps/x86_64/fpu/multiarch/Makefile (libmvec-sysdep_routines): Added
        build of SSE, AVX2 and AVX512 IFUNC versions.
        * sysdeps/unix/sysv/linux/x86_64/libmvec.abilist: New versions added.
        * sysdeps/x86/fpu/bits/math-vector.h: Added SIMD declaration for cosf.
        * NEWS: Mention addition of x86_64 vector cosf.

diff --git a/ChangeLog b/ChangeLog
index 47318a7..c3e52b2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -47,6 +47,27 @@
 	* sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: New file.
 	* sysdeps/x86_64/fpu/test-double-vlen8.c: New file.
 
+	* sysdeps/x86_64/fpu/Makefile (libmvec-support): Added new files.
+	* sysdeps/x86_64/fpu/Versions: New versions added.
+	* sysdeps/x86_64/fpu/svml_s_cosf4_core.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S: New file.
+	* sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S: New file.
+	* sysdeps/x86_64/fpu/svml_s_cosf8_core.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S: New file.
+	* sysdeps/x86_64/fpu/svml_s_cosf16_core.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S: New file.
+	* sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S: New file.
+	* sysdeps/x86_64/fpu/svml_s_wrapper_impl.h: New file.
+	* sysdeps/x86_64/fpu/svml_s_cosf_data.S: New file.
+	* sysdeps/x86_64/fpu/svml_s_cosf_data.h: New file.
+	* sysdeps/x86_64/fpu/multiarch/Makefile (libmvec-sysdep_routines): Added
+	build of SSE, AVX2 and AVX512 IFUNC versions.
+	* sysdeps/unix/sysv/linux/x86_64/libmvec.abilist: New versions added.
+	* sysdeps/x86/fpu/bits/math-vector.h: Added SIMD declaration for cosf.
+	* NEWS: Mention addition of x86_64 vector cosf.
+
 2015-06-09 Marko Myllynen  <myllynen@redhat.com>
 
 	* locale/C-ctype.c (PREDEFINED_CLASSES): Remove.
diff --git a/NEWS b/NEWS
index 5e223a1..53f244d 100644
--- a/NEWS
+++ b/NEWS
@@ -52,7 +52,7 @@ Version 2.22
   condition in some applications.
 
 * Added vector math library named libmvec with the following vectorized x86_64
-  implementations: cos.
+  implementations: cos, cosf.
   The library can be disabled with --disable-mathvec. Use of the functions is
   enabled with -fopenmp -ffast-math starting from -O1 for GCC version >= 4.9.0.
   The library is linked in as needed when using -lm (no need to specify -lmvec
diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
index be6eaed..acabb8a 100644
--- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist
@@ -1,6 +1,10 @@
 GLIBC_2.22
  GLIBC_2.22 A
  _ZGVbN2v_cos F
+ _ZGVbN4v_cosf F
  _ZGVcN4v_cos F
+ _ZGVcN8v_cosf F
  _ZGVdN4v_cos F
+ _ZGVdN8v_cosf F
+ _ZGVeN16v_cosf F
  _ZGVeN8v_cos F
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h
index 27294ce..b3ef833 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86/fpu/bits/math-vector.h
@@ -30,5 +30,7 @@
 #  define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch")
 #  undef __DECL_SIMD_cos
 #  define __DECL_SIMD_cos __DECL_SIMD_x86_64
+#  undef __DECL_SIMD_cosf
+#  define __DECL_SIMD_cosf __DECL_SIMD_x86_64
 # endif
 #endif
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index 2d5fcf8..af0d2af 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -1,7 +1,9 @@
 ifeq ($(subdir),mathvec)
 libmvec-support += svml_d_cos2_core svml_d_cos4_core_avx \
 		   svml_d_cos4_core svml_d_cos8_core \
-		   svml_d_cos_data init-arch
+		   svml_d_cos_data svml_s_cosf4_core svml_s_cosf8_core_avx \
+		   svml_s_cosf8_core svml_s_cosf16_core svml_s_cosf_data \
+		   init-arch
 endif
 
 # Variables for libmvec tests.
diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions
index 4290e73..f85c28b 100644
--- a/sysdeps/x86_64/fpu/Versions
+++ b/sysdeps/x86_64/fpu/Versions
@@ -1,5 +1,6 @@
 libmvec {
   GLIBC_2.22 {
     _ZGVbN2v_cos; _ZGVcN4v_cos; _ZGVdN4v_cos; _ZGVeN8v_cos;
+    _ZGVbN4v_cosf; _ZGVcN8v_cosf; _ZGVdN8v_cosf; _ZGVeN16v_cosf;
   }
 }
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index b2f3266..6b50475 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -54,5 +54,6 @@ endif
 
 ifeq ($(subdir),mathvec)
 libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \
-			   svml_d_cos8_core_avx512
+			   svml_d_cos8_core_avx512 svml_s_cosf4_core_sse4 \
+			   svml_s_cosf8_core_avx2 svml_s_cosf16_core_avx512
 endif
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
similarity index 50%
copy from sysdeps/x86/fpu/bits/math-vector.h
copy to sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
index 27294ce..91564de 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
@@ -1,4 +1,4 @@
-/* Platform-specific SIMD declarations of math functions.
+/* Multiple versions of vectorized cosf.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,19 +16,24 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _MATH_H
-# error "Never include <bits/math-vector.h> directly;\
- include <math.h> instead."
-#endif
+#include <sysdep.h>
+#include <init-arch.h>
 
-/* Get default empty definitions for simd declarations.  */
-#include <bits/libm-simd-decl-stubs.h>
+	.text
+ENTRY (_ZGVeN16v_cosf)
+        .type   _ZGVeN16v_cosf, @gnu_indirect_function
+        cmpl    $0, KIND_OFFSET+__cpu_features(%rip)
+        jne     1
+        call    __init_cpu_features
+1:      leaq    _ZGVeN16v_cosf_skx(%rip), %rax
+        testl   $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip)
+        jnz     3
+2:      leaq    _ZGVeN16v_cosf_knl(%rip), %rax
+        testl   $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip)
+        jnz     3
+        leaq    _ZGVeN16v_cosf_avx2_wrapper(%rip), %rax
+3:      ret
+END (_ZGVeN16v_cosf)
 
-#if defined __x86_64__ && defined __FAST_MATH__
-# if defined _OPENMP && _OPENMP >= 201307
-/* OpenMP case.  */
-#  define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch")
-#  undef __DECL_SIMD_cos
-#  define __DECL_SIMD_cos __DECL_SIMD_x86_64
-# endif
-#endif
+#define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper
+#include "../svml_s_cosf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
new file mode 100644
index 0000000..a78ae2e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
@@ -0,0 +1,460 @@
+/* Function cosf vectorized with AVX-512. KNL and SKX versions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_cosf_data.h"
+#include "svml_s_wrapper_impl.h"
+
+	.text
+ENTRY (_ZGVeN16v_cosf_knl)
+#ifndef HAVE_AVX512_ASM_SUPPORT
+WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
+#else
+/*
+  ALGORITHM DESCRIPTION:
+
+  1) Range reduction to [-Pi/2; +Pi/2] interval
+     a) We remove sign using AND operation
+     b) Add Pi/2 value to argument X for Cos to Sin transformation
+     c) Getting octant Y by 1/Pi multiplication
+     d) Add "Right Shifter" value
+     e) Treat obtained value as integer for destination sign setting.
+        Shift first bit of this value to the last (sign) position
+     f) Subtract "Right Shifter"  value
+     g) Subtract 0.5 from result for octant correction
+     h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+        X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+  2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
+     a) Calculate X^2 = X * X
+     b) Calculate polynomial:
+        R = X + X * X^2 * (A3 + x^2 * (A5 + .....
+  3) Destination sign setting
+     a) Set shifted destination sign using XOR operation:
+        R = XOR( R, S );
+ */
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-64, %rsp
+        subq      $1280, %rsp
+        movq      __svml_scos_data@GOTPCREL(%rip), %rdx
+
+/*
+  h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+  X = X - Y*PI1 - Y*PI2 - Y*PI3
+ */
+        vmovaps   %zmm0, %zmm6
+        movl      $-1, %eax
+
+/* b) Add Pi/2 value to argument X for Cos to Sin transformation */
+        vaddps    __sHalfPI(%rdx), %zmm0, %zmm2
+        vmovups   __sRShifter(%rdx), %zmm3
+
+/*
+  1) Range reduction to [-Pi/2; +Pi/2] interval
+  c) Getting octant Y by 1/Pi multiplication
+  d) Add "Right Shifter" (0x4B000000) value
+ */
+        vfmadd132ps __sInvPI(%rdx), %zmm3, %zmm2
+        vmovups     __sPI1_FMA(%rdx), %zmm5
+
+/* f) Subtract "Right Shifter" (0x4B000000) value */
+        vsubps    %zmm3, %zmm2, %zmm4
+        vmovups   __sA9_FMA(%rdx), %zmm9
+
+/* Check for large and special arguments */
+        vpandd    __sAbsMask(%rdx), %zmm0, %zmm1
+
+/*
+  e) Treat obtained value as integer for destination sign setting.
+  Shift first bit of this value to the last (sign) position (S << 31)
+ */
+        vpslld       $31, %zmm2, %zmm8
+        vcmpps       $22, __sRangeReductionVal(%rdx), %zmm1, %k1
+        vpbroadcastd %eax, %zmm12{%k1}{z}
+
+/* g) Subtract 0.5 from result for octant correction */
+        vsubps       __sOneHalf(%rdx), %zmm4, %zmm7
+        vptestmd     %zmm12, %zmm12, %k0
+        vfnmadd231ps %zmm7, %zmm5, %zmm6
+        kmovw        %k0, %ecx
+        vfnmadd231ps __sPI2_FMA(%rdx), %zmm7, %zmm6
+        vfnmadd132ps __sPI3_FMA(%rdx), %zmm6, %zmm7
+
+/* a) Calculate X^2 = X * X */
+        vmulps    %zmm7, %zmm7, %zmm10
+
+/*
+  3) Destination sign setting
+    a) Set shifted destination sign using XOR operation:
+  R = XOR( R, S );
+ */
+        vpxord    %zmm8, %zmm7, %zmm11
+
+/*
+  b) Calculate polynomial:
+  R = X + X * X^2 * (A3 + x^2 * (A5 + x^2 * (A7 + x^2 * (A9))));
+ */
+        vfmadd213ps __sA7_FMA(%rdx), %zmm10, %zmm9
+        vfmadd213ps __sA5_FMA(%rdx), %zmm10, %zmm9
+        vfmadd213ps __sA3(%rdx), %zmm10, %zmm9
+        vmulps      %zmm10, %zmm9, %zmm1
+        vfmadd213ps %zmm11, %zmm11, %zmm1
+        testl       %ecx, %ecx
+        jne         .LBL_1_3
+
+.LBL_1_2:
+        cfi_remember_state
+        vmovaps   %zmm1, %zmm0
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+
+.LBL_1_3:
+        cfi_restore_state
+        vmovups   %zmm0, 1152(%rsp)
+        vmovups   %zmm1, 1216(%rsp)
+        je        .LBL_1_2
+
+        xorb      %dl, %dl
+        kmovw     %k4, 1048(%rsp)
+        xorl      %eax, %eax
+        kmovw     %k5, 1040(%rsp)
+        kmovw     %k6, 1032(%rsp)
+        kmovw     %k7, 1024(%rsp)
+        vmovups   %zmm16, 960(%rsp)
+        vmovups   %zmm17, 896(%rsp)
+        vmovups   %zmm18, 832(%rsp)
+        vmovups   %zmm19, 768(%rsp)
+        vmovups   %zmm20, 704(%rsp)
+        vmovups   %zmm21, 640(%rsp)
+        vmovups   %zmm22, 576(%rsp)
+        vmovups   %zmm23, 512(%rsp)
+        vmovups   %zmm24, 448(%rsp)
+        vmovups   %zmm25, 384(%rsp)
+        vmovups   %zmm26, 320(%rsp)
+        vmovups   %zmm27, 256(%rsp)
+        vmovups   %zmm28, 192(%rsp)
+        vmovups   %zmm29, 128(%rsp)
+        vmovups   %zmm30, 64(%rsp)
+        vmovups   %zmm31, (%rsp)
+        movq      %rsi, 1064(%rsp)
+        movq      %rdi, 1056(%rsp)
+        movq      %r12, 1096(%rsp)
+        cfi_offset_rel_rsp (12, 1096)
+        movb      %dl, %r12b
+        movq      %r13, 1088(%rsp)
+        cfi_offset_rel_rsp (13, 1088)
+        movl      %ecx, %r13d
+        movq      %r14, 1080(%rsp)
+        cfi_offset_rel_rsp (14, 1080)
+        movl      %eax, %r14d
+        movq      %r15, 1072(%rsp)
+        cfi_offset_rel_rsp (15, 1072)
+        cfi_remember_state
+
+.LBL_1_6:
+        btl       %r14d, %r13d
+        jc        .LBL_1_12
+
+.LBL_1_7:
+        lea       1(%r14), %esi
+        btl       %esi, %r13d
+        jc        .LBL_1_10
+
+.LBL_1_8:
+        addb      $1, %r12b
+        addl      $2, %r14d
+        cmpb      $16, %r12b
+        jb        .LBL_1_6
+
+        kmovw     1048(%rsp), %k4
+        movq      1064(%rsp), %rsi
+        kmovw     1040(%rsp), %k5
+        movq      1056(%rsp), %rdi
+        kmovw     1032(%rsp), %k6
+        movq      1096(%rsp), %r12
+        cfi_restore (%r12)
+        movq      1088(%rsp), %r13
+        cfi_restore (%r13)
+        kmovw     1024(%rsp), %k7
+        vmovups   960(%rsp), %zmm16
+        vmovups   896(%rsp), %zmm17
+        vmovups   832(%rsp), %zmm18
+        vmovups   768(%rsp), %zmm19
+        vmovups   704(%rsp), %zmm20
+        vmovups   640(%rsp), %zmm21
+        vmovups   576(%rsp), %zmm22
+        vmovups   512(%rsp), %zmm23
+        vmovups   448(%rsp), %zmm24
+        vmovups   384(%rsp), %zmm25
+        vmovups   320(%rsp), %zmm26
+        vmovups   256(%rsp), %zmm27
+        vmovups   192(%rsp), %zmm28
+        vmovups   128(%rsp), %zmm29
+        vmovups   64(%rsp), %zmm30
+        vmovups   (%rsp), %zmm31
+        movq      1080(%rsp), %r14
+        cfi_restore (%r14)
+        movq      1072(%rsp), %r15
+        cfi_restore (%r15)
+        vmovups   1216(%rsp), %zmm1
+        jmp       .LBL_1_2
+
+.LBL_1_10:
+        cfi_restore_state
+        movzbl    %r12b, %r15d
+        vmovss    1156(%rsp,%r15,8), %xmm0
+        call      cosf@PLT
+        vmovss    %xmm0, 1220(%rsp,%r15,8)
+        jmp       .LBL_1_8
+
+.LBL_1_12:
+        movzbl    %r12b, %r15d
+        vmovss    1152(%rsp,%r15,8), %xmm0
+        call      cosf@PLT
+        vmovss    %xmm0, 1216(%rsp,%r15,8)
+        jmp       .LBL_1_7
+#endif
+END (_ZGVeN16v_cosf_knl)
+
+ENTRY (_ZGVeN16v_cosf_skx)
+#ifndef HAVE_AVX512_ASM_SUPPORT
+WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
+#else
+/*
+  ALGORITHM DESCRIPTION:
+
+  1) Range reduction to [-Pi/2; +Pi/2] interval
+     a) We remove sign using AND operation
+     b) Add Pi/2 value to argument X for Cos to Sin transformation
+     c) Getting octant Y by 1/Pi multiplication
+     d) Add "Right Shifter" value
+     e) Treat obtained value as integer for destination sign setting.
+        Shift first bit of this value to the last (sign) position
+     f) Subtract "Right Shifter"  value
+     g) Subtract 0.5 from result for octant correction
+     h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+        X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+  2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
+     a) Calculate X^2 = X * X
+     b) Calculate polynomial:
+        R = X + X * X^2 * (A3 + x^2 * (A5 + .....
+  3) Destination sign setting
+     a) Set shifted destination sign using XOR operation:
+        R = XOR( R, S );
+ */
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-64, %rsp
+        subq      $1280, %rsp
+        movq      __svml_scos_data@GOTPCREL(%rip), %rax
+
+/*
+  h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+  X = X - Y*PI1 - Y*PI2 - Y*PI3
+ */
+        vmovaps   %zmm0, %zmm6
+        vmovups   .L_2il0floatpacket.13(%rip), %zmm12
+        vmovups __sRShifter(%rax), %zmm3
+        vmovups __sPI1_FMA(%rax), %zmm5
+        vmovups __sA9_FMA(%rax), %zmm9
+
+/* b) Add Pi/2 value to argument X for Cos to Sin transformation */
+        vaddps __sHalfPI(%rax), %zmm0, %zmm2
+
+/* Check for large and special arguments */
+        vandps __sAbsMask(%rax), %zmm0, %zmm1
+
+/*
+  1) Range reduction to [-Pi/2; +Pi/2] interval
+  c) Getting octant Y by 1/Pi multiplication
+  d) Add "Right Shifter" (0x4B000000) value
+ */
+        vfmadd132ps __sInvPI(%rax), %zmm3, %zmm2
+        vcmpps    $18, __sRangeReductionVal(%rax), %zmm1, %k1
+
+/*
+  e) Treat obtained value as integer for destination sign setting.
+  Shift first bit of this value to the last (sign) position (S << 31)
+ */
+        vpslld    $31, %zmm2, %zmm8
+
+/* f) Subtract "Right Shifter" (0x4B000000) value */
+        vsubps    %zmm3, %zmm2, %zmm4
+
+/* g) Subtract 0.5 from result for octant correction */
+        vsubps __sOneHalf(%rax), %zmm4, %zmm7
+        vfnmadd231ps %zmm7, %zmm5, %zmm6
+        vfnmadd231ps __sPI2_FMA(%rax), %zmm7, %zmm6
+        vfnmadd132ps __sPI3_FMA(%rax), %zmm6, %zmm7
+
+/* a) Calculate X^2 = X * X */
+        vmulps    %zmm7, %zmm7, %zmm10
+
+/*
+  3) Destination sign setting
+  a) Set shifted destination sign using XOR operation:
+  R = XOR( R, S );
+ */
+        vxorps    %zmm8, %zmm7, %zmm11
+
+/*
+  b) Calculate polynomial:
+  R = X + X * X^2 * (A3 + x^2 * (A5 + x^2 * (A7 + x^2 * (A9))));
+ */
+        vfmadd213ps __sA7_FMA(%rax), %zmm10, %zmm9
+        vfmadd213ps __sA5_FMA(%rax), %zmm10, %zmm9
+        vfmadd213ps __sA3(%rax), %zmm10, %zmm9
+        vpandnd   %zmm1, %zmm1, %zmm12{%k1}
+        vmulps    %zmm10, %zmm9, %zmm1
+        vptestmd  %zmm12, %zmm12, %k0
+        vfmadd213ps %zmm11, %zmm11, %zmm1
+        kmovw     %k0, %ecx
+        testl     %ecx, %ecx
+        jne       .LBL_2_3
+.LBL_2_2:
+        cfi_remember_state
+        vmovaps   %zmm1, %zmm0
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+
+.LBL_2_3:
+        cfi_restore_state
+        vmovups   %zmm0, 1152(%rsp)
+        vmovups   %zmm1, 1216(%rsp)
+        je        .LBL_2_2
+
+        xorb      %dl, %dl
+        xorl      %eax, %eax
+        kmovw     %k4, 1048(%rsp)
+        kmovw     %k5, 1040(%rsp)
+        kmovw     %k6, 1032(%rsp)
+        kmovw     %k7, 1024(%rsp)
+        vmovups   %zmm16, 960(%rsp)
+        vmovups   %zmm17, 896(%rsp)
+        vmovups   %zmm18, 832(%rsp)
+        vmovups   %zmm19, 768(%rsp)
+        vmovups   %zmm20, 704(%rsp)
+        vmovups   %zmm21, 640(%rsp)
+        vmovups   %zmm22, 576(%rsp)
+        vmovups   %zmm23, 512(%rsp)
+        vmovups   %zmm24, 448(%rsp)
+        vmovups   %zmm25, 384(%rsp)
+        vmovups   %zmm26, 320(%rsp)
+        vmovups   %zmm27, 256(%rsp)
+        vmovups   %zmm28, 192(%rsp)
+        vmovups   %zmm29, 128(%rsp)
+        vmovups   %zmm30, 64(%rsp)
+        vmovups   %zmm31, (%rsp)
+        movq      %rsi, 1064(%rsp)
+        movq      %rdi, 1056(%rsp)
+        movq      %r12, 1096(%rsp)
+        cfi_offset_rel_rsp (12, 1096)
+        movb      %dl, %r12b
+        movq      %r13, 1088(%rsp)
+        cfi_offset_rel_rsp (13, 1088)
+        movl      %ecx, %r13d
+        movq      %r14, 1080(%rsp)
+        cfi_offset_rel_rsp (14, 1080)
+        movl      %eax, %r14d
+        movq      %r15, 1072(%rsp)
+        cfi_offset_rel_rsp (15, 1072)
+        cfi_remember_state
+
+.LBL_2_6:
+        btl       %r14d, %r13d
+        jc        .LBL_2_12
+.LBL_2_7:
+        lea       1(%r14), %esi
+        btl       %esi, %r13d
+        jc        .LBL_2_10
+.LBL_2_8:
+        incb      %r12b
+        addl      $2, %r14d
+        cmpb      $16, %r12b
+        jb        .LBL_2_6
+        kmovw     1048(%rsp), %k4
+        kmovw     1040(%rsp), %k5
+        kmovw     1032(%rsp), %k6
+        kmovw     1024(%rsp), %k7
+        vmovups   960(%rsp), %zmm16
+        vmovups   896(%rsp), %zmm17
+        vmovups   832(%rsp), %zmm18
+        vmovups   768(%rsp), %zmm19
+        vmovups   704(%rsp), %zmm20
+        vmovups   640(%rsp), %zmm21
+        vmovups   576(%rsp), %zmm22
+        vmovups   512(%rsp), %zmm23
+        vmovups   448(%rsp), %zmm24
+        vmovups   384(%rsp), %zmm25
+        vmovups   320(%rsp), %zmm26
+        vmovups   256(%rsp), %zmm27
+        vmovups   192(%rsp), %zmm28
+        vmovups   128(%rsp), %zmm29
+        vmovups   64(%rsp), %zmm30
+        vmovups   (%rsp), %zmm31
+        vmovups   1216(%rsp), %zmm1
+        movq      1064(%rsp), %rsi
+        movq      1056(%rsp), %rdi
+        movq      1096(%rsp), %r12
+        cfi_restore (%r12)
+        movq      1088(%rsp), %r13
+        cfi_restore (%r13)
+        movq      1080(%rsp), %r14
+        cfi_restore (%r14)
+        movq      1072(%rsp), %r15
+        cfi_restore (%r15)
+        jmp       .LBL_2_2
+
+.LBL_2_10:
+        cfi_restore_state
+        movzbl    %r12b, %r15d
+        vmovss    1156(%rsp,%r15,8), %xmm0
+        vzeroupper
+        vmovss    1156(%rsp,%r15,8), %xmm0
+        call      cosf@PLT
+        vmovss    %xmm0, 1220(%rsp,%r15,8)
+        jmp       .LBL_2_8
+.LBL_2_12:
+        movzbl    %r12b, %r15d
+        vmovss    1152(%rsp,%r15,8), %xmm0
+        vzeroupper
+        vmovss    1152(%rsp,%r15,8), %xmm0
+        call      cosf@PLT
+        vmovss    %xmm0, 1216(%rsp,%r15,8)
+        jmp       .LBL_2_7
+#endif
+END (_ZGVeN16v_cosf_skx)
+
+	.section .rodata, "a"
+.L_2il0floatpacket.13:
+	.long	0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
+	.type	.L_2il0floatpacket.13,@object
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
similarity index 55%
copy from sysdeps/x86/fpu/bits/math-vector.h
copy to sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
index 27294ce..fa2363b 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
@@ -1,4 +1,4 @@
-/* Platform-specific SIMD declarations of math functions.
+/* Multiple versions of vectorized cosf, vector length is 4.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,19 +16,23 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _MATH_H
-# error "Never include <bits/math-vector.h> directly;\
- include <math.h> instead."
-#endif
+#include <sysdep.h>
+#include <init-arch.h>
 
-/* Get default empty definitions for simd declarations.  */
-#include <bits/libm-simd-decl-stubs.h>
+	.text
+ENTRY (_ZGVbN4v_cosf)
+        .type   _ZGVbN4v_cosf, @gnu_indirect_function
+        cmpl    $0, KIND_OFFSET+__cpu_features(%rip)
+        jne     1f
+        call    __init_cpu_features
+1:      leaq    _ZGVbN4v_cosf_sse4(%rip), %rax
+        testl   $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip)
+        jz      2f
+        ret
+2:      leaq    _ZGVbN4v_cosf_sse2(%rip), %rax
+        ret
+END (_ZGVbN4v_cosf)
+libmvec_hidden_def (_ZGVbN4v_cosf)
 
-#if defined __x86_64__ && defined __FAST_MATH__
-# if defined _OPENMP && _OPENMP >= 201307
-/* OpenMP case.  */
-#  define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch")
-#  undef __DECL_SIMD_cos
-#  define __DECL_SIMD_cos __DECL_SIMD_x86_64
-# endif
-#endif
+#define _ZGVbN4v_cosf _ZGVbN4v_cosf_sse2
+#include "../svml_s_cosf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
new file mode 100644
index 0000000..f231ba2
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
@@ -0,0 +1,227 @@
+/* Function cosf vectorized with SSE4.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "svml_s_cosf_data.h"
+
+	.text
+ENTRY (_ZGVbN4v_cosf_sse4)
+/*
+  ALGORITHM DESCRIPTION:
+
+   1) Range reduction to [-Pi/2; +Pi/2] interval
+      a) We remove sign using AND operation
+      b) Add Pi/2 value to argument X for Cos to Sin transformation
+      c) Getting octant Y by 1/Pi multiplication
+      d) Add "Right Shifter" value
+      e) Treat obtained value as integer for destination sign setting.
+         Shift first bit of this value to the last (sign) position
+      f) Subtract "Right Shifter"  value
+      g) Subtract 0.5 from result for octant correction
+      h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+         X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+   2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
+      a) Calculate X^2 = X * X
+      b) Calculate polynomial:
+         R = X + X * X^2 * (A3 + x^2 * (A5 + .....
+   3) Destination sign setting
+      a) Set shifted destination sign using XOR operation:
+         R = XOR( R, S );
+ */
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-64, %rsp
+        subq      $320, %rsp
+        movaps    %xmm0, %xmm4
+        movq      __svml_scos_data@GOTPCREL(%rip), %rax
+        movups __sHalfPI(%rax), %xmm1
+        movups __sRShifter(%rax), %xmm5
+
+/* b) Add Pi/2 value to argument X for Cos to Sin transformation */
+        addps     %xmm4, %xmm1
+
+/*
+  1) Range reduction to [-Pi/2; +Pi/2] interval
+  c) Getting octant Y by 1/Pi multiplication
+  d) Add "Right Shifter" (0x4B000000) value
+ */
+        mulps __sInvPI(%rax), %xmm1
+        movups __sPI1(%rax), %xmm6
+        addps     %xmm5, %xmm1
+
+/*
+  e) Treat obtained value as integer for destination sign setting.
+  Shift first bit of this value to the last (sign) position (S << 31)
+ */
+        movaps    %xmm1, %xmm2
+
+/* f) Subtract "Right Shifter" (0x4B000000) value */
+        subps     %xmm5, %xmm1
+        movups __sPI2(%rax), %xmm7
+        pslld     $31, %xmm2
+        movups __sPI3(%rax), %xmm5
+        movups __sAbsMask(%rax), %xmm3
+
+/* Check for large and special arguments */
+        andps     %xmm4, %xmm3
+
+/* g) Subtract 0.5 from result for octant correction */
+        subps __sOneHalf(%rax), %xmm1
+        cmpnleps __sRangeReductionVal(%rax), %xmm3
+
+/*
+  h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+  X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+ */
+        mulps     %xmm1, %xmm6
+        mulps     %xmm1, %xmm7
+        mulps     %xmm1, %xmm5
+        subps     %xmm6, %xmm0
+        movmskps  %xmm3, %ecx
+        movups __sPI4(%rax), %xmm6
+        subps     %xmm7, %xmm0
+        mulps     %xmm6, %xmm1
+        subps     %xmm5, %xmm0
+        subps     %xmm1, %xmm0
+
+/* a) Calculate X^2 = X * X */
+        movaps    %xmm0, %xmm1
+        mulps     %xmm0, %xmm1
+
+/*
+  3) Destination sign setting
+  a) Set shifted destination sign using XOR operation:
+  R = XOR( R, S );
+ */
+        xorps     %xmm2, %xmm0
+        movups __sA9(%rax), %xmm2
+
+/*
+  b) Calculate polynomial:
+  R = X + X * X^2 * (A3 + x^2 * (A5 + x^2 * (A7 + x^2 * (A9))));
+ */
+        mulps     %xmm1, %xmm2
+        addps __sA7(%rax), %xmm2
+        mulps     %xmm1, %xmm2
+        addps __sA5(%rax), %xmm2
+        mulps     %xmm1, %xmm2
+        addps __sA3(%rax), %xmm2
+        mulps     %xmm2, %xmm1
+        mulps     %xmm0, %xmm1
+        addps     %xmm1, %xmm0
+        testl     %ecx, %ecx
+        jne       .LBL_1_3
+
+.LBL_1_2:
+        cfi_remember_state
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+
+.LBL_1_3:
+        cfi_restore_state
+        movups    %xmm4, 192(%rsp)
+        movups    %xmm0, 256(%rsp)
+        je        .LBL_1_2
+
+        xorb      %dl, %dl
+        xorl      %eax, %eax
+        movups    %xmm8, 112(%rsp)
+        movups    %xmm9, 96(%rsp)
+        movups    %xmm10, 80(%rsp)
+        movups    %xmm11, 64(%rsp)
+        movups    %xmm12, 48(%rsp)
+        movups    %xmm13, 32(%rsp)
+        movups    %xmm14, 16(%rsp)
+        movups    %xmm15, (%rsp)
+        movq      %rsi, 136(%rsp)
+        movq      %rdi, 128(%rsp)
+        movq      %r12, 168(%rsp)
+        cfi_offset_rel_rsp (12, 168)
+        movb      %dl, %r12b
+        movq      %r13, 160(%rsp)
+        cfi_offset_rel_rsp (13, 160)
+        movl      %ecx, %r13d
+        movq      %r14, 152(%rsp)
+        cfi_offset_rel_rsp (14, 152)
+        movl      %eax, %r14d
+        movq      %r15, 144(%rsp)
+        cfi_offset_rel_rsp (15, 144)
+        cfi_remember_state
+
+.LBL_1_6:
+        btl       %r14d, %r13d
+        jc        .LBL_1_12
+
+.LBL_1_7:
+        lea       1(%r14), %esi
+        btl       %esi, %r13d
+        jc        .LBL_1_10
+
+.LBL_1_8:
+        incb      %r12b
+        addl      $2, %r14d
+        cmpb      $16, %r12b
+        jb        .LBL_1_6
+
+        movups    112(%rsp), %xmm8
+        movups    96(%rsp), %xmm9
+        movups    80(%rsp), %xmm10
+        movups    64(%rsp), %xmm11
+        movups    48(%rsp), %xmm12
+        movups    32(%rsp), %xmm13
+        movups    16(%rsp), %xmm14
+        movups    (%rsp), %xmm15
+        movq      136(%rsp), %rsi
+        movq      128(%rsp), %rdi
+        movq      168(%rsp), %r12
+        cfi_restore (%r12)
+        movq      160(%rsp), %r13
+        cfi_restore (%r13)
+        movq      152(%rsp), %r14
+        cfi_restore (%r14)
+        movq      144(%rsp), %r15
+        cfi_restore (%r15)
+        movups    256(%rsp), %xmm0
+        jmp       .LBL_1_2
+
+.LBL_1_10:
+        cfi_restore_state
+        movzbl    %r12b, %r15d
+        movss     196(%rsp,%r15,8), %xmm0
+
+        call      cosf@PLT
+
+        movss     %xmm0, 260(%rsp,%r15,8)
+        jmp       .LBL_1_8
+
+.LBL_1_12:
+        movzbl    %r12b, %r15d
+        movss     192(%rsp,%r15,8), %xmm0
+
+        call      cosf@PLT
+
+        movss     %xmm0, 256(%rsp,%r15,8)
+        jmp       .LBL_1_7
+END (_ZGVbN4v_cosf_sse4)
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
similarity index 50%
copy from sysdeps/x86/fpu/bits/math-vector.h
copy to sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
index 27294ce..e14bba4 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
@@ -1,4 +1,4 @@
-/* Platform-specific SIMD declarations of math functions.
+/* Multiple versions of vectorized cosf, vector length is 8.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -10,25 +10,29 @@
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
+    Lesser General Public License for more details.
 
    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _MATH_H
-# error "Never include <bits/math-vector.h> directly;\
- include <math.h> instead."
-#endif
+#include <sysdep.h>
+#include <init-arch.h>
 
-/* Get default empty definitions for simd declarations.  */
-#include <bits/libm-simd-decl-stubs.h>
+	.text
+ENTRY (_ZGVdN8v_cosf)
+        .type   _ZGVdN8v_cosf, @gnu_indirect_function
+        cmpl    $0, KIND_OFFSET+__cpu_features(%rip)
+        jne     1f
+        call    __init_cpu_features
+1:      leaq    _ZGVdN8v_cosf_avx2(%rip), %rax
+        testl   $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip)
+        jz      2f
+        ret
+2:      leaq    _ZGVdN8v_cosf_sse_wrapper(%rip), %rax
+        ret
+END (_ZGVdN8v_cosf)
+libmvec_hidden_def (_ZGVdN8v_cosf)
 
-#if defined __x86_64__ && defined __FAST_MATH__
-# if defined _OPENMP && _OPENMP >= 201307
-/* OpenMP case.  */
-#  define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch")
-#  undef __DECL_SIMD_cos
-#  define __DECL_SIMD_cos __DECL_SIMD_x86_64
-# endif
-#endif
+#define _ZGVdN8v_cosf _ZGVdN8v_cosf_sse_wrapper
+#include "../svml_s_cosf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
new file mode 100644
index 0000000..6c25e14
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
@@ -0,0 +1,215 @@
+/* Function cosf vectorized with AVX2.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#include <sysdep.h>
+#include "svml_s_cosf_data.h"
+
+	.text
+ENTRY (_ZGVdN8v_cosf_avx2)
+/*
+  ALGORITHM DESCRIPTION:
+
+  1) Range reduction to [-Pi/2; +Pi/2] interval
+    a) We remove sign using AND operation
+    b) Add Pi/2 value to argument X for Cos to Sin transformation
+    c) Getting octant Y by 1/Pi multiplication
+    d) Add "Right Shifter" value
+    e) Treat obtained value as integer for destination sign setting.
+       Shift first bit of this value to the last (sign) position
+    f) Subtract "Right Shifter"  value
+    g) Subtract 0.5 from result for octant correction
+    h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+         X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+  2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
+    a) Calculate X^2 = X * X
+    b) Calculate polynomial:
+         R = X + X * X^2 * (A3 + x^2 * (A5 + .....
+  3) Destination sign setting
+    a) Set shifted destination sign using XOR operation:
+         R = XOR( R, S );
+ */
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-64, %rsp
+        subq      $448, %rsp
+        movq      __svml_scos_data@GOTPCREL(%rip), %rax
+        vmovaps   %ymm0, %ymm2
+        vmovups __sRShifter(%rax), %ymm5
+        vmovups __sPI1_FMA(%rax), %ymm7
+
+/* b) Add Pi/2 value to argument X for Cos to Sin transformation */
+        vaddps __sHalfPI(%rax), %ymm2, %ymm4
+
+/*
+  1) Range reduction to [-Pi/2; +Pi/2] interval
+  c) Getting octant Y by 1/Pi multiplication
+  d) Add "Right Shifter" (0x4B000000) value
+ */
+        vfmadd132ps __sInvPI(%rax), %ymm5, %ymm4
+
+/* f) Subtract "Right Shifter" (0x4B000000) value */
+        vsubps    %ymm5, %ymm4, %ymm6
+
+/*
+  e) Treat obtained value as integer for destination sign setting.
+  Shift first bit of this value to the last (sign) position (S << 31)
+ */
+        vpslld    $31, %ymm4, %ymm0
+
+/* g) Subtract 0.5 from result for octant correction */
+        vsubps __sOneHalf(%rax), %ymm6, %ymm4
+
+/* Check for large and special arguments */
+        vandps __sAbsMask(%rax), %ymm2, %ymm3
+        vcmpnle_uqps __sRangeReductionVal(%rax), %ymm3, %ymm1
+
+/*
+  h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+  X = X - Y*PI1 - Y*PI2 - Y*PI3
+ */
+        vmovaps   %ymm2, %ymm3
+        vfnmadd231ps %ymm4, %ymm7, %ymm3
+        vfnmadd231ps __sPI2_FMA(%rax), %ymm4, %ymm3
+        vfnmadd132ps __sPI3_FMA(%rax), %ymm3, %ymm4
+
+/* a) Calculate X^2 = X * X */
+        vmulps    %ymm4, %ymm4, %ymm5
+
+/*
+  3) Destination sign setting
+  a) Set shifted destination sign using XOR operation:
+  R = XOR( R, S );
+ */
+        vxorps    %ymm0, %ymm4, %ymm6
+        vmovups __sA9_FMA(%rax), %ymm0
+
+/*
+  b) Calculate polynomial:
+  R = X + X * X^2 * (A3 + x^2 * (A5 + x^2 * (A7 + x^2 * (A9))))
+ */
+        vfmadd213ps __sA7_FMA(%rax), %ymm5, %ymm0
+        vfmadd213ps __sA5_FMA(%rax), %ymm5, %ymm0
+        vfmadd213ps __sA3(%rax), %ymm5, %ymm0
+        vmulps    %ymm5, %ymm0, %ymm0
+        vmovmskps %ymm1, %ecx
+        vfmadd213ps %ymm6, %ymm6, %ymm0
+        testl     %ecx, %ecx
+        jne       .LBL_1_3
+
+.LBL_1_2:
+        cfi_remember_state
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+
+.LBL_1_3:
+        cfi_restore_state
+        vmovups   %ymm2, 320(%rsp)
+        vmovups   %ymm0, 384(%rsp)
+        je        .LBL_1_2
+
+        xorb      %dl, %dl
+        xorl      %eax, %eax
+        vmovups   %ymm8, 224(%rsp)
+        vmovups   %ymm9, 192(%rsp)
+        vmovups   %ymm10, 160(%rsp)
+        vmovups   %ymm11, 128(%rsp)
+        vmovups   %ymm12, 96(%rsp)
+        vmovups   %ymm13, 64(%rsp)
+        vmovups   %ymm14, 32(%rsp)
+        vmovups   %ymm15, (%rsp)
+        movq      %rsi, 264(%rsp)
+        movq      %rdi, 256(%rsp)
+        movq      %r12, 296(%rsp)
+        cfi_offset_rel_rsp (12, 296)
+        movb      %dl, %r12b
+        movq      %r13, 288(%rsp)
+        cfi_offset_rel_rsp (13, 288)
+        movl      %ecx, %r13d
+        movq      %r14, 280(%rsp)
+        cfi_offset_rel_rsp (14, 280)
+        movl      %eax, %r14d
+        movq      %r15, 272(%rsp)
+        cfi_offset_rel_rsp (15, 272)
+        cfi_remember_state
+
+.LBL_1_6:
+        btl       %r14d, %r13d
+        jc        .LBL_1_12
+
+.LBL_1_7:
+        lea       1(%r14), %esi
+        btl       %esi, %r13d
+        jc        .LBL_1_10
+
+.LBL_1_8:
+        incb      %r12b
+        addl      $2, %r14d
+        cmpb      $16, %r12b
+        jb        .LBL_1_6
+
+        vmovups   224(%rsp), %ymm8
+        vmovups   192(%rsp), %ymm9
+        vmovups   160(%rsp), %ymm10
+        vmovups   128(%rsp), %ymm11
+        vmovups   96(%rsp), %ymm12
+        vmovups   64(%rsp), %ymm13
+        vmovups   32(%rsp), %ymm14
+        vmovups   (%rsp), %ymm15
+        vmovups   384(%rsp), %ymm0
+        movq      264(%rsp), %rsi
+        movq      256(%rsp), %rdi
+        movq      296(%rsp), %r12
+        cfi_restore (%r12)
+        movq      288(%rsp), %r13
+        cfi_restore (%r13)
+        movq      280(%rsp), %r14
+        cfi_restore (%r14)
+        movq      272(%rsp), %r15
+        cfi_restore (%r15)
+        jmp       .LBL_1_2
+
+.LBL_1_10:
+        cfi_restore_state
+        movzbl    %r12b, %r15d
+        vmovss    324(%rsp,%r15,8), %xmm0
+        vzeroupper
+
+        call      cosf@PLT
+
+        vmovss    %xmm0, 388(%rsp,%r15,8)
+        jmp       .LBL_1_8
+
+.LBL_1_12:
+        movzbl    %r12b, %r15d
+        vmovss    320(%rsp,%r15,8), %xmm0
+        vzeroupper
+
+        call      cosf@PLT
+
+        vmovss    %xmm0, 384(%rsp,%r15,8)
+        jmp       .LBL_1_7
+
+END (_ZGVdN8v_cosf_avx2)
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
similarity index 59%
copy from sysdeps/x86/fpu/bits/math-vector.h
copy to sysdeps/x86_64/fpu/svml_s_cosf16_core.S
index 27294ce..e623df5 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
@@ -1,4 +1,4 @@
-/* Platform-specific SIMD declarations of math functions.
+/* Function cosf vectorized with AVX-512. Wrapper to AVX2 version.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,19 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _MATH_H
-# error "Never include <bits/math-vector.h> directly;\
- include <math.h> instead."
-#endif
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
 
-/* Get default empty definitions for simd declarations.  */
-#include <bits/libm-simd-decl-stubs.h>
-
-#if defined __x86_64__ && defined __FAST_MATH__
-# if defined _OPENMP && _OPENMP >= 201307
-/* OpenMP case.  */
-#  define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch")
-#  undef __DECL_SIMD_cos
-#  define __DECL_SIMD_cos __DECL_SIMD_x86_64
-# endif
-#endif
+	.text
+ENTRY (_ZGVeN16v_cosf)
+WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
+END (_ZGVeN16v_cosf)
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
similarity index 60%
copy from sysdeps/x86/fpu/bits/math-vector.h
copy to sysdeps/x86_64/fpu/svml_s_cosf4_core.S
index 27294ce..9875cd7 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
@@ -1,4 +1,4 @@
-/* Platform-specific SIMD declarations of math functions.
+/* Function cosf vectorized with SSE2, wrapper version.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,19 +16,14 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _MATH_H
-# error "Never include <bits/math-vector.h> directly;\
- include <math.h> instead."
-#endif
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
 
-/* Get default empty definitions for simd declarations.  */
-#include <bits/libm-simd-decl-stubs.h>
+	.text
+ENTRY (_ZGVbN4v_cosf)
+WRAPPER_IMPL_SSE2 cosf
+END (_ZGVbN4v_cosf)
 
-#if defined __x86_64__ && defined __FAST_MATH__
-# if defined _OPENMP && _OPENMP >= 201307
-/* OpenMP case.  */
-#  define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch")
-#  undef __DECL_SIMD_cos
-#  define __DECL_SIMD_cos __DECL_SIMD_x86_64
-# endif
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVbN4v_cosf)
 #endif
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
similarity index 60%
copy from sysdeps/x86/fpu/bits/math-vector.h
copy to sysdeps/x86_64/fpu/svml_s_cosf8_core.S
index 27294ce..376ee35 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
@@ -1,4 +1,4 @@
-/* Platform-specific SIMD declarations of math functions.
+/* Function cosf vectorized with AVX2, wrapper version.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,19 +16,14 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _MATH_H
-# error "Never include <bits/math-vector.h> directly;\
- include <math.h> instead."
-#endif
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
 
-/* Get default empty definitions for simd declarations.  */
-#include <bits/libm-simd-decl-stubs.h>
+	.text
+ENTRY (_ZGVdN8v_cosf)
+WRAPPER_IMPL_AVX _ZGVbN4v_cosf
+END (_ZGVdN8v_cosf)
 
-#if defined __x86_64__ && defined __FAST_MATH__
-# if defined _OPENMP && _OPENMP >= 201307
-/* OpenMP case.  */
-#  define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch")
-#  undef __DECL_SIMD_cos
-#  define __DECL_SIMD_cos __DECL_SIMD_x86_64
-# endif
+#ifndef USE_MULTIARCH
+ libmvec_hidden_def (_ZGVdN8v_cosf)
 #endif
diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
similarity index 59%
copy from sysdeps/x86/fpu/bits/math-vector.h
copy to sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
index 27294ce..a443fd2 100644
--- a/sysdeps/x86/fpu/bits/math-vector.h
+++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
@@ -1,4 +1,4 @@
-/* Platform-specific SIMD declarations of math functions.
+/* Function cosf vectorized in AVX ISA as wrapper to SSE4 ISA version.
    Copyright (C) 2014-2015 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,19 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _MATH_H
-# error "Never include <bits/math-vector.h> directly;\
- include <math.h> instead."
-#endif
+#include <sysdep.h>
+#include "svml_s_wrapper_impl.h"
 
-/* Get default empty definitions for simd declarations.  */
-#include <bits/libm-simd-decl-stubs.h>
-
-#if defined __x86_64__ && defined __FAST_MATH__
-# if defined _OPENMP && _OPENMP >= 201307
-/* OpenMP case.  */
-#  define __DECL_SIMD_x86_64 _Pragma ("omp declare simd notinbranch")
-#  undef __DECL_SIMD_cos
-#  define __DECL_SIMD_cos __DECL_SIMD_x86_64
-# endif
-#endif
+        .text
+ENTRY (_ZGVcN8v_cosf)
+WRAPPER_IMPL_AVX _ZGVbN4v_cosf
+END (_ZGVcN8v_cosf)
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf_data.S b/sysdeps/x86_64/fpu/svml_s_cosf_data.S
new file mode 100644
index 0000000..2f7303c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_s_cosf_data.S
@@ -0,0 +1,1130 @@
+/* Data for function cosf.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "svml_s_cosf_data.h"
+
+	.section .rodata, "a"
+	.align 64
+
+/* Data table for vector implementations of function cosf.
+   The table may contain polynomial, reduction, lookup coefficients
+   and other macro_names obtained through different methods
+   of research and experimental work.  */
+
+	.globl __svml_scos_data
+__svml_scos_data:
+
+/* Lookup table for high accuracy version (CHL,SHi,SLo,Sigma).  */
+.if .-__svml_scos_data != __dT
+.err
+.endif
+	.long	0x00000000
+	.long	0x3f800000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x3bdbd541
+	.long	0x3f7fec43
+	.long	0x3084cd0d
+	.long	0xbd000000
+	.long	0x3c5c1342
+	.long	0x3f7fb10f
+	.long	0x31de5b5f
+	.long	0xbd800000
+	.long	0xbc354825
+	.long	0x3f7f4e6d
+	.long	0x32d01884
+	.long	0xbd800000
+	.long	0x3cdd0b28
+	.long	0x3f7ec46d
+	.long	0x31f44949
+	.long	0xbe000000
+	.long	0x3b29b1a9
+	.long	0x3f7e1324
+	.long	0xb2f1e603
+	.long	0xbe000000
+	.long	0xbcb2041c
+	.long	0x3f7d3aac
+	.long	0xb0f75ae9
+	.long	0xbe000000
+	.long	0xbd3c4289
+	.long	0x3f7c3b28
+	.long	0xb231d68b
+	.long	0xbe000000
+	.long	0x3d60e8f8
+	.long	0x3f7b14be
+	.long	0x32ff75cb
+	.long	0xbe800000
+	.long	0x3cfd1f65
+	.long	0x3f79c79d
+	.long	0x32c64e59
+	.long	0xbe800000
+	.long	0x3be60685
+	.long	0x3f7853f8
+	.long	0xb20db9e5
+	.long	0xbe800000
+	.long	0xbc88e931
+	.long	0x3f76ba07
+	.long	0x326d092c
+	.long	0xbe800000
+	.long	0xbd25018c
+	.long	0x3f74fa0b
+	.long	0xb2939d22
+	.long	0xbe800000
+	.long	0xbd826b93
+	.long	0x3f731447
+	.long	0x32c48e11
+	.long	0xbe800000
+	.long	0xbdb1f34f
+	.long	0x3f710908
+	.long	0x321ed0dd
+	.long	0xbe800000
+	.long	0x3e0f77ad
+	.long	0x3f6ed89e
+	.long	0xb29333dc
+	.long	0xbf000000
+	.long	0x3df043ab
+	.long	0x3f6c835e
+	.long	0x32f328d4
+	.long	0xbf000000
+	.long	0x3dc210d8
+	.long	0x3f6a09a7
+	.long	0xb2eb236c
+	.long	0xbf000000
+	.long	0x3d945dff
+	.long	0x3f676bd8
+	.long	0xb2bc3389
+	.long	0xbf000000
+	.long	0x3d4e645a
+	.long	0x3f64aa59
+	.long	0x311a08fa
+	.long	0xbf000000
+	.long	0x3cea5164
+	.long	0x3f61c598
+	.long	0xb2e7f425
+	.long	0xbf000000
+	.long	0x3be8b648
+	.long	0x3f5ebe05
+	.long	0x32c6f953
+	.long	0xbf000000
+	.long	0xbc670f32
+	.long	0x3f5b941a
+	.long	0x32232dc8
+	.long	0xbf000000
+	.long	0xbd0f59aa
+	.long	0x3f584853
+	.long	0xb27d5fc0
+	.long	0xbf000000
+	.long	0xbd639d9d
+	.long	0x3f54db31
+	.long	0x3290ea1a
+	.long	0xbf000000
+	.long	0xbd9b4153
+	.long	0x3f514d3d
+	.long	0x300c4f04
+	.long	0xbf000000
+	.long	0xbdc3fdff
+	.long	0x3f4d9f02
+	.long	0x327e70e8
+	.long	0xbf000000
+	.long	0xbdebfe8a
+	.long	0x3f49d112
+	.long	0x32992640
+	.long	0xbf000000
+	.long	0xbe099e65
+	.long	0x3f45e403
+	.long	0x32b15174
+	.long	0xbf000000
+	.long	0xbe1cd957
+	.long	0x3f41d870
+	.long	0x32bff977
+	.long	0xbf000000
+	.long	0xbe2fad27
+	.long	0x3f3daef9
+	.long	0x319aabec
+	.long	0xbf000000
+	.long	0xbe4216eb
+	.long	0x3f396842
+	.long	0xb2810007
+	.long	0xbf000000
+	.long	0x3e95f61a
+	.long	0x3f3504f3
+	.long	0x324fe77a
+	.long	0xbf800000
+	.long	0x3e8d2f7d
+	.long	0x3f3085bb
+	.long	0xb2ae2d32
+	.long	0xbf800000
+	.long	0x3e84a20e
+	.long	0x3f2beb4a
+	.long	0xb2b73136
+	.long	0xbf800000
+	.long	0x3e789e3f
+	.long	0x3f273656
+	.long	0xb2038343
+	.long	0xbf800000
+	.long	0x3e686ff3
+	.long	0x3f226799
+	.long	0x322123bb
+	.long	0xbf800000
+	.long	0x3e58bbb7
+	.long	0x3f1d7fd1
+	.long	0x3292050c
+	.long	0xbf800000
+	.long	0x3e4983f7
+	.long	0x3f187fc0
+	.long	0xb1c7a3f3
+	.long	0xbf800000
+	.long	0x3e3acb0c
+	.long	0x3f13682a
+	.long	0x32cdd12e
+	.long	0xbf800000
+	.long	0x3e2c933b
+	.long	0x3f0e39da
+	.long	0xb24a32e7
+	.long	0xbf800000
+	.long	0x3e1edeb5
+	.long	0x3f08f59b
+	.long	0xb2be4b4e
+	.long	0xbf800000
+	.long	0x3e11af97
+	.long	0x3f039c3d
+	.long	0xb25ba002
+	.long	0xbf800000
+	.long	0x3e0507ea
+	.long	0x3efc5d27
+	.long	0xb180eca9
+	.long	0xbf800000
+	.long	0x3df1d344
+	.long	0x3ef15aea
+	.long	0xb1ff2139
+	.long	0xbf800000
+	.long	0x3ddaad38
+	.long	0x3ee63375
+	.long	0xb1d9c774
+	.long	0xbf800000
+	.long	0x3dc4a143
+	.long	0x3edae880
+	.long	0x321e15cc
+	.long	0xbf800000
+	.long	0x3dafb2cc
+	.long	0x3ecf7bca
+	.long	0x316a3b63
+	.long	0xbf800000
+	.long	0x3d9be50c
+	.long	0x3ec3ef15
+	.long	0x31d5d52c
+	.long	0xbf800000
+	.long	0x3d893b12
+	.long	0x3eb8442a
+	.long	0xb2705ba6
+	.long	0xbf800000
+	.long	0x3d6f6f7e
+	.long	0x3eac7cd4
+	.long	0xb2254e02
+	.long	0xbf800000
+	.long	0x3d4ebb8a
+	.long	0x3ea09ae5
+	.long	0xb23e89a0
+	.long	0xbf800000
+	.long	0x3d305f55
+	.long	0x3e94a031
+	.long	0x326d59f0
+	.long	0xbf800000
+	.long	0x3d145f8c
+	.long	0x3e888e93
+	.long	0x312c7d9e
+	.long	0xbf800000
+	.long	0x3cf58104
+	.long	0x3e78cfcc
+	.long	0xb11bd41d
+	.long	0xbf800000
+	.long	0x3cc70c54
+	.long	0x3e605c13
+	.long	0x31a7e4f6
+	.long	0xbf800000
+	.long	0x3c9d6830
+	.long	0x3e47c5c2
+	.long	0xb0e5967d
+	.long	0xbf800000
+	.long	0x3c71360b
+	.long	0x3e2f10a2
+	.long	0x311167f9
+	.long	0xbf800000
+	.long	0x3c315502
+	.long	0x3e164083
+	.long	0x31e8e614
+	.long	0xbf800000
+	.long	0x3bf66e3c
+	.long	0x3dfab273
+	.long	0xb11568cf
+	.long	0xbf800000
+	.long	0x3b9dc971
+	.long	0x3dc8bd36
+	.long	0xb07592f5
+	.long	0xbf800000
+	.long	0x3b319298
+	.long	0x3d96a905
+	.long	0xb1531e61
+	.long	0xbf800000
+	.long	0x3a9de1c8
+	.long	0x3d48fb30
+	.long	0xb0ef227f
+	.long	0xbf800000
+	.long	0x399de7df
+	.long	0x3cc90ab0
+	.long	0xb005c998
+	.long	0xbf800000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0xbf800000
+	.long	0x399de7df
+	.long	0xbcc90ab0
+	.long	0x3005c998
+	.long	0xbf800000
+	.long	0x3a9de1c8
+	.long	0xbd48fb30
+	.long	0x30ef227f
+	.long	0xbf800000
+	.long	0x3b319298
+	.long	0xbd96a905
+	.long	0x31531e61
+	.long	0xbf800000
+	.long	0x3b9dc971
+	.long	0xbdc8bd36
+	.long	0x307592f5
+	.long	0xbf800000
+	.long	0x3bf66e3c
+	.long	0xbdfab273
+	.long	0x311568cf
+	.long	0xbf800000
+	.long	0x3c315502
+	.long	0xbe164083
+	.long	0xb1e8e614
+	.long	0xbf800000
+	.long	0x3c71360b
+	.long	0xbe2f10a2
+	.long	0xb11167f9
+	.long	0xbf800000
+	.long	0x3c9d6830
+	.long	0xbe47c5c2
+	.long	0x30e5967d
+	.long	0xbf800000
+	.long	0x3cc70c54
+	.long	0xbe605c13
+	.long	0xb1a7e4f6
+	.long	0xbf800000
+	.long	0x3cf58104
+	.long	0xbe78cfcc
+	.long	0x311bd41d
+	.long	0xbf800000
+	.long	0x3d145f8c
+	.long	0xbe888e93
+	.long	0xb12c7d9e
+	.long	0xbf800000
+	.long	0x3d305f55
+	.long	0xbe94a031
+	.long	0xb26d59f0
+	.long	0xbf800000
+	.long	0x3d4ebb8a
+	.long	0xbea09ae5
+	.long	0x323e89a0
+	.long	0xbf800000
+	.long	0x3d6f6f7e
+	.long	0xbeac7cd4
+	.long	0x32254e02
+	.long	0xbf800000
+	.long	0x3d893b12
+	.long	0xbeb8442a
+	.long	0x32705ba6
+	.long	0xbf800000
+	.long	0x3d9be50c
+	.long	0xbec3ef15
+	.long	0xb1d5d52c
+	.long	0xbf800000
+	.long	0x3dafb2cc
+	.long	0xbecf7bca
+	.long	0xb16a3b63
+	.long	0xbf800000
+	.long	0x3dc4a143
+	.long	0xbedae880
+	.long	0xb21e15cc
+	.long	0xbf800000
+	.long	0x3ddaad38
+	.long	0xbee63375
+	.long	0x31d9c774
+	.long	0xbf800000
+	.long	0x3df1d344
+	.long	0xbef15aea
+	.long	0x31ff2139
+	.long	0xbf800000
+	.long	0x3e0507ea
+	.long	0xbefc5d27
+	.long	0x3180eca9
+	.long	0xbf800000
+	.long	0x3e11af97
+	.long	0xbf039c3d
+	.long	0x325ba002
+	.long	0xbf800000
+	.long	0x3e1edeb5
+	.long	0xbf08f59b
+	.long	0x32be4b4e
+	.long	0xbf800000
+	.long	0x3e2c933b
+	.long	0xbf0e39da
+	.long	0x324a32e7
+	.long	0xbf800000
+	.long	0x3e3acb0c
+	.long	0xbf13682a
+	.long	0xb2cdd12e
+	.long	0xbf800000
+	.long	0x3e4983f7
+	.long	0xbf187fc0
+	.long	0x31c7a3f3
+	.long	0xbf800000
+	.long	0x3e58bbb7
+	.long	0xbf1d7fd1
+	.long	0xb292050c
+	.long	0xbf800000
+	.long	0x3e686ff3
+	.long	0xbf226799
+	.long	0xb22123bb
+	.long	0xbf800000
+	.long	0x3e789e3f
+	.long	0xbf273656
+	.long	0x32038343
+	.long	0xbf800000
+	.long	0x3e84a20e
+	.long	0xbf2beb4a
+	.long	0x32b73136
+	.long	0xbf800000
+	.long	0x3e8d2f7d
+	.long	0xbf3085bb
+	.long	0x32ae2d32
+	.long	0xbf800000
+	.long	0x3e95f61a
+	.long	0xbf3504f3
+	.long	0xb24fe77a
+	.long	0xbf800000
+	.long	0xbe4216eb
+	.long	0xbf396842
+	.long	0x32810007
+	.long	0xbf000000
+	.long	0xbe2fad27
+	.long	0xbf3daef9
+	.long	0xb19aabec
+	.long	0xbf000000
+	.long	0xbe1cd957
+	.long	0xbf41d870
+	.long	0xb2bff977
+	.long	0xbf000000
+	.long	0xbe099e65
+	.long	0xbf45e403
+	.long	0xb2b15174
+	.long	0xbf000000
+	.long	0xbdebfe8a
+	.long	0xbf49d112
+	.long	0xb2992640
+	.long	0xbf000000
+	.long	0xbdc3fdff
+	.long	0xbf4d9f02
+	.long	0xb27e70e8
+	.long	0xbf000000
+	.long	0xbd9b4153
+	.long	0xbf514d3d
+	.long	0xb00c4f04
+	.long	0xbf000000
+	.long	0xbd639d9d
+	.long	0xbf54db31
+	.long	0xb290ea1a
+	.long	0xbf000000
+	.long	0xbd0f59aa
+	.long	0xbf584853
+	.long	0x327d5fc0
+	.long	0xbf000000
+	.long	0xbc670f32
+	.long	0xbf5b941a
+	.long	0xb2232dc8
+	.long	0xbf000000
+	.long	0x3be8b648
+	.long	0xbf5ebe05
+	.long	0xb2c6f953
+	.long	0xbf000000
+	.long	0x3cea5164
+	.long	0xbf61c598
+	.long	0x32e7f425
+	.long	0xbf000000
+	.long	0x3d4e645a
+	.long	0xbf64aa59
+	.long	0xb11a08fa
+	.long	0xbf000000
+	.long	0x3d945dff
+	.long	0xbf676bd8
+	.long	0x32bc3389
+	.long	0xbf000000
+	.long	0x3dc210d8
+	.long	0xbf6a09a7
+	.long	0x32eb236c
+	.long	0xbf000000
+	.long	0x3df043ab
+	.long	0xbf6c835e
+	.long	0xb2f328d4
+	.long	0xbf000000
+	.long	0x3e0f77ad
+	.long	0xbf6ed89e
+	.long	0x329333dc
+	.long	0xbf000000
+	.long	0xbdb1f34f
+	.long	0xbf710908
+	.long	0xb21ed0dd
+	.long	0xbe800000
+	.long	0xbd826b93
+	.long	0xbf731447
+	.long	0xb2c48e11
+	.long	0xbe800000
+	.long	0xbd25018c
+	.long	0xbf74fa0b
+	.long	0x32939d22
+	.long	0xbe800000
+	.long	0xbc88e931
+	.long	0xbf76ba07
+	.long	0xb26d092c
+	.long	0xbe800000
+	.long	0x3be60685
+	.long	0xbf7853f8
+	.long	0x320db9e5
+	.long	0xbe800000
+	.long	0x3cfd1f65
+	.long	0xbf79c79d
+	.long	0xb2c64e59
+	.long	0xbe800000
+	.long	0x3d60e8f8
+	.long	0xbf7b14be
+	.long	0xb2ff75cb
+	.long	0xbe800000
+	.long	0xbd3c4289
+	.long	0xbf7c3b28
+	.long	0x3231d68b
+	.long	0xbe000000
+	.long	0xbcb2041c
+	.long	0xbf7d3aac
+	.long	0x30f75ae9
+	.long	0xbe000000
+	.long	0x3b29b1a9
+	.long	0xbf7e1324
+	.long	0x32f1e603
+	.long	0xbe000000
+	.long	0x3cdd0b28
+	.long	0xbf7ec46d
+	.long	0xb1f44949
+	.long	0xbe000000
+	.long	0xbc354825
+	.long	0xbf7f4e6d
+	.long	0xb2d01884
+	.long	0xbd800000
+	.long	0x3c5c1342
+	.long	0xbf7fb10f
+	.long	0xb1de5b5f
+	.long	0xbd800000
+	.long	0x3bdbd541
+	.long	0xbf7fec43
+	.long	0xb084cd0d
+	.long	0xbd000000
+	.long	0x00000000
+	.long	0xbf800000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0xbbdbd541
+	.long	0xbf7fec43
+	.long	0xb084cd0d
+	.long	0x3d000000
+	.long	0xbc5c1342
+	.long	0xbf7fb10f
+	.long	0xb1de5b5f
+	.long	0x3d800000
+	.long	0x3c354825
+	.long	0xbf7f4e6d
+	.long	0xb2d01884
+	.long	0x3d800000
+	.long	0xbcdd0b28
+	.long	0xbf7ec46d
+	.long	0xb1f44949
+	.long	0x3e000000
+	.long	0xbb29b1a9
+	.long	0xbf7e1324
+	.long	0x32f1e603
+	.long	0x3e000000
+	.long	0x3cb2041c
+	.long	0xbf7d3aac
+	.long	0x30f75ae9
+	.long	0x3e000000
+	.long	0x3d3c4289
+	.long	0xbf7c3b28
+	.long	0x3231d68b
+	.long	0x3e000000
+	.long	0xbd60e8f8
+	.long	0xbf7b14be
+	.long	0xb2ff75cb
+	.long	0x3e800000
+	.long	0xbcfd1f65
+	.long	0xbf79c79d
+	.long	0xb2c64e59
+	.long	0x3e800000
+	.long	0xbbe60685
+	.long	0xbf7853f8
+	.long	0x320db9e5
+	.long	0x3e800000
+	.long	0x3c88e931
+	.long	0xbf76ba07
+	.long	0xb26d092c
+	.long	0x3e800000
+	.long	0x3d25018c
+	.long	0xbf74fa0b
+	.long	0x32939d22
+	.long	0x3e800000
+	.long	0x3d826b93
+	.long	0xbf731447
+	.long	0xb2c48e11
+	.long	0x3e800000
+	.long	0x3db1f34f
+	.long	0xbf710908
+	.long	0xb21ed0dd
+	.long	0x3e800000
+	.long	0xbe0f77ad
+	.long	0xbf6ed89e
+	.long	0x329333dc
+	.long	0x3f000000
+	.long	0xbdf043ab
+	.long	0xbf6c835e
+	.long	0xb2f328d4
+	.long	0x3f000000
+	.long	0xbdc210d8
+	.long	0xbf6a09a7
+	.long	0x32eb236c
+	.long	0x3f000000
+	.long	0xbd945dff
+	.long	0xbf676bd8
+	.long	0x32bc3389
+	.long	0x3f000000
+	.long	0xbd4e645a
+	.long	0xbf64aa59
+	.long	0xb11a08fa
+	.long	0x3f000000
+	.long	0xbcea5164
+	.long	0xbf61c598
+	.long	0x32e7f425
+	.long	0x3f000000
+	.long	0xbbe8b648
+	.long	0xbf5ebe05
+	.long	0xb2c6f953
+	.long	0x3f000000
+	.long	0x3c670f32
+	.long	0xbf5b941a
+	.long	0xb2232dc8
+	.long	0x3f000000
+	.long	0x3d0f59aa
+	.long	0xbf584853
+	.long	0x327d5fc0
+	.long	0x3f000000
+	.long	0x3d639d9d
+	.long	0xbf54db31
+	.long	0xb290ea1a
+	.long	0x3f000000
+	.long	0x3d9b4153
+	.long	0xbf514d3d
+	.long	0xb00c4f04
+	.long	0x3f000000
+	.long	0x3dc3fdff
+	.long	0xbf4d9f02
+	.long	0xb27e70e8
+	.long	0x3f000000
+	.long	0x3debfe8a
+	.long	0xbf49d112
+	.long	0xb2992640
+	.long	0x3f000000
+	.long	0x3e099e65
+	.long	0xbf45e403
+	.long	0xb2b15174
+	.long	0x3f000000
+	.long	0x3e1cd957
+	.long	0xbf41d870
+	.long	0xb2bff977
+	.long	0x3f000000
+	.long	0x3e2fad27
+	.long	0xbf3daef9
+	.long	0xb19aabec
+	.long	0x3f000000
+	.long	0x3e4216eb
+	.long	0xbf396842
+	.long	0x32810007
+	.long	0x3f000000
+	.long	0xbe95f61a
+	.long	0xbf3504f3
+	.long	0xb24fe77a
+	.long	0x3f800000
+	.long	0xbe8d2f7d
+	.long	0xbf3085bb
+	.long	0x32ae2d32
+	.long	0x3f800000
+	.long	0xbe84a20e
+	.long	0xbf2beb4a
+	.long	0x32b73136
+	.long	0x3f800000
+	.long	0xbe789e3f
+	.long	0xbf273656
+	.long	0x32038343
+	.long	0x3f800000
+	.long	0xbe686ff3
+	.long	0xbf226799
+	.long	0xb22123bb
+	.long	0x3f800000
+	.long	0xbe58bbb7
+	.long	0xbf1d7fd1
+	.long	0xb292050c
+	.long	0x3f800000
+	.long	0xbe4983f7
+	.long	0xbf187fc0
+	.long	0x31c7a3f3
+	.long	0x3f800000
+	.long	0xbe3acb0c
+	.long	0xbf13682a
+	.long	0xb2cdd12e
+	.long	0x3f800000
+	.long	0xbe2c933b
+	.long	0xbf0e39da
+	.long	0x324a32e7
+	.long	0x3f800000
+	.long	0xbe1edeb5
+	.long	0xbf08f59b
+	.long	0x32be4b4e
+	.long	0x3f800000
+	.long	0xbe11af97
+	.long	0xbf039c3d
+	.long	0x325ba002
+	.long	0x3f800000
+	.long	0xbe0507ea
+	.long	0xbefc5d27
+	.long	0x3180eca9
+	.long	0x3f800000
+	.long	0xbdf1d344
+	.long	0xbef15aea
+	.long	0x31ff2139
+	.long	0x3f800000
+	.long	0xbddaad38
+	.long	0xbee63375
+	.long	0x31d9c774
+	.long	0x3f800000
+	.long	0xbdc4a143
+	.long	0xbedae880
+	.long	0xb21e15cc
+	.long	0x3f800000
+	.long	0xbdafb2cc
+	.long	0xbecf7bca
+	.long	0xb16a3b63
+	.long	0x3f800000
+	.long	0xbd9be50c
+	.long	0xbec3ef15
+	.long	0xb1d5d52c
+	.long	0x3f800000
+	.long	0xbd893b12
+	.long	0xbeb8442a
+	.long	0x32705ba6
+	.long	0x3f800000
+	.long	0xbd6f6f7e
+	.long	0xbeac7cd4
+	.long	0x32254e02
+	.long	0x3f800000
+	.long	0xbd4ebb8a
+	.long	0xbea09ae5
+	.long	0x323e89a0
+	.long	0x3f800000
+	.long	0xbd305f55
+	.long	0xbe94a031
+	.long	0xb26d59f0
+	.long	0x3f800000
+	.long	0xbd145f8c
+	.long	0xbe888e93
+	.long	0xb12c7d9e
+	.long	0x3f800000
+	.long	0xbcf58104
+	.long	0xbe78cfcc
+	.long	0x311bd41d
+	.long	0x3f800000
+	.long	0xbcc70c54
+	.long	0xbe605c13
+	.long	0xb1a7e4f6
+	.long	0x3f800000
+	.long	0xbc9d6830
+	.long	0xbe47c5c2
+	.long	0x30e5967d
+	.long	0x3f800000
+	.long	0xbc71360b
+	.long	0xbe2f10a2
+	.long	0xb11167f9
+	.long	0x3f800000
+	.long	0xbc315502
+	.long	0xbe164083
+	.long	0xb1e8e614
+	.long	0x3f800000
+	.long	0xbbf66e3c
+	.long	0xbdfab273
+	.long	0x311568cf
+	.long	0x3f800000
+	.long	0xbb9dc971
+	.long	0xbdc8bd36
+	.long	0x307592f5
+	.long	0x3f800000
+	.long	0xbb319298
+	.long	0xbd96a905
+	.long	0x31531e61
+	.long	0x3f800000
+	.long	0xba9de1c8
+	.long	0xbd48fb30
+	.long	0x30ef227f
+	.long	0x3f800000
+	.long	0xb99de7df
+	.long	0xbcc90ab0
+	.long	0x3005c998
+	.long	0x3f800000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x00000000
+	.long	0x3f800000
+	.long	0xb99de7df
+	.long	0x3cc90ab0
+	.long	0xb005c998
+	.long	0x3f800000
+	.long	0xba9de1c8
+	.long	0x3d48fb30
+	.long	0xb0ef227f
+	.long	0x3f800000
+	.long	0xbb319298
+	.long	0x3d96a905
+	.long	0xb1531e61
+	.long	0x3f800000
+	.long	0xbb9dc971
+	.long	0x3dc8bd36
+	.long	0xb07592f5
+	.long	0x3f800000
+	.long	0xbbf66e3c
+	.long	0x3dfab273
+	.long	0xb11568cf
+	.long	0x3f800000
+	.long	0xbc315502
+	.long	0x3e164083
+	.long	0x31e8e614
+	.long	0x3f800000
+	.long	0xbc71360b
+	.long	0x3e2f10a2
+	.long	0x311167f9
+	.long	0x3f800000
+	.long	0xbc9d6830
+	.long	0x3e47c5c2
+	.long	0xb0e5967d
+	.long	0x3f800000
+	.long	0xbcc70c54
+	.long	0x3e605c13
+	.long	0x31a7e4f6
+	.long	0x3f800000
+	.long	0xbcf58104
+	.long	0x3e78cfcc
+	.long	0xb11bd41d
+	.long	0x3f800000
+	.long	0xbd145f8c
+	.long	0x3e888e93
+	.long	0x312c7d9e
+	.long	0x3f800000
+	.long	0xbd305f55
+	.long	0x3e94a031
+	.long	0x326d59f0
+	.long	0x3f800000
+	.long	0xbd4ebb8a
+	.long	0x3ea09ae5
+	.long	0xb23e89a0
+	.long	0x3f800000
+	.long	0xbd6f6f7e
+	.long	0x3eac7cd4
+	.long	0xb2254e02
+	.long	0x3f800000
+	.long	0xbd893b12
+	.long	0x3eb8442a
+	.long	0xb2705ba6
+	.long	0x3f800000
+	.long	0xbd9be50c
+	.long	0x3ec3ef15
+	.long	0x31d5d52c
+	.long	0x3f800000
+	.long	0xbdafb2cc
+	.long	0x3ecf7bca
+	.long	0x316a3b63
+	.long	0x3f800000
+	.long	0xbdc4a143
+	.long	0x3edae880
+	.long	0x321e15cc
+	.long	0x3f800000
+	.long	0xbddaad38
+	.long	0x3ee63375
+	.long	0xb1d9c774
+	.long	0x3f800000
+	.long	0xbdf1d344
+	.long	0x3ef15aea
+	.long	0xb1ff2139
+	.long	0x3f800000
+	.long	0xbe0507ea
+	.long	0x3efc5d27
+	.long	0xb180eca9
+	.long	0x3f800000
+	.long	0xbe11af97
+	.long	0x3f039c3d
+	.long	0xb25ba002
+	.long	0x3f800000
+	.long	0xbe1edeb5
+	.long	0x3f08f59b
+	.long	0xb2be4b4e
+	.long	0x3f800000
+	.long	0xbe2c933b
+	.long	0x3f0e39da
+	.long	0xb24a32e7
+	.long	0x3f800000
+	.long	0xbe3acb0c
+	.long	0x3f13682a
+	.long	0x32cdd12e
+	.long	0x3f800000
+	.long	0xbe4983f7
+	.long	0x3f187fc0
+	.long	0xb1c7a3f3
+	.long	0x3f800000
+	.long	0xbe58bbb7
+	.long	0x3f1d7fd1
+	.long	0x3292050c
+	.long	0x3f800000
+	.long	0xbe686ff3
+	.long	0x3f226799
+	.long	0x322123bb
+	.long	0x3f800000
+	.long	0xbe789e3f
+	.long	0x3f273656
+	.long	0xb2038343
+	.long	0x3f800000
+	.long	0xbe84a20e
+	.long	0x3f2beb4a
+	.long	0xb2b73136
+	.long	0x3f800000
+	.long	0xbe8d2f7d
+	.long	0x3f3085bb
+	.long	0xb2ae2d32
+	.long	0x3f800000
+	.long	0xbe95f61a
+	.long	0x3f3504f3
+	.long	0x324fe77a
+	.long	0x3f800000
+	.long	0x3e4216eb
+	.long	0x3f396842
+	.long	0xb2810007
+	.long	0x3f000000
+	.long	0x3e2fad27
+	.long	0x3f3daef9
+	.long	0x319aabec
+	.long	0x3f000000
+	.long	0x3e1cd957
+	.long	0x3f41d870
+	.long	0x32bff977
+	.long	0x3f000000
+	.long	0x3e099e65
+	.long	0x3f45e403
+	.long	0x32b15174
+	.long	0x3f000000
+	.long	0x3debfe8a
+	.long	0x3f49d112
+	.long	0x32992640
+	.long	0x3f000000
+	.long	0x3dc3fdff
+	.long	0x3f4d9f02
+	.long	0x327e70e8
+	.long	0x3f000000
+	.long	0x3d9b4153
+	.long	0x3f514d3d
+	.long	0x300c4f04
+	.long	0x3f000000
+	.long	0x3d639d9d
+	.long	0x3f54db31
+	.long	0x3290ea1a
+	.long	0x3f000000
+	.long	0x3d0f59aa
+	.long	0x3f584853
+	.long	0xb27d5fc0
+	.long	0x3f000000
+	.long	0x3c670f32
+	.long	0x3f5b941a
+	.long	0x32232dc8
+	.long	0x3f000000
+	.long	0xbbe8b648
+	.long	0x3f5ebe05
+	.long	0x32c6f953
+	.long	0x3f000000
+	.long	0xbcea5164
+	.long	0x3f61c598
+	.long	0xb2e7f425
+	.long	0x3f000000
+	.long	0xbd4e645a
+	.long	0x3f64aa59
+	.long	0x311a08fa
+	.long	0x3f000000
+	.long	0xbd945dff
+	.long	0x3f676bd8
+	.long	0xb2bc3389
+	.long	0x3f000000
+	.long	0xbdc210d8
+	.long	0x3f6a09a7
+	.long	0xb2eb236c
+	.long	0x3f000000
+	.long	0xbdf043ab
+	.long	0x3f6c835e
+	.long	0x32f328d4
+	.long	0x3f000000
+	.long	0xbe0f77ad
+	.long	0x3f6ed89e
+	.long	0xb29333dc
+	.long	0x3f000000
+	.long	0x3db1f34f
+	.long	0x3f710908
+	.long	0x321ed0dd
+	.long	0x3e800000
+	.long	0x3d826b93
+	.long	0x3f731447
+	.long	0x32c48e11
+	.long	0x3e800000
+	.long	0x3d25018c
+	.long	0x3f74fa0b
+	.long	0xb2939d22
+	.long	0x3e800000
+	.long	0x3c88e931
+	.long	0x3f76ba07
+	.long	0x326d092c
+	.long	0x3e800000
+	.long	0xbbe60685
+	.long	0x3f7853f8
+	.long	0xb20db9e5
+	.long	0x3e800000
+	.long	0xbcfd1f65
+	.long	0x3f79c79d
+	.long	0x32c64e59
+	.long	0x3e800000
+	.long	0xbd60e8f8
+	.long	0x3f7b14be
+	.long	0x32ff75cb
+	.long	0x3e800000
+	.long	0x3d3c4289
+	.long	0x3f7c3b28
+	.long	0xb231d68b
+	.long	0x3e000000
+	.long	0x3cb2041c
+	.long	0x3f7d3aac
+	.long	0xb0f75ae9
+	.long	0x3e000000
+	.long	0xbb29b1a9
+	.long	0x3f7e1324
+	.long	0xb2f1e603
+	.long	0x3e000000
+	.long	0xbcdd0b28
+	.long	0x3f7ec46d
+	.long	0x31f44949
+	.long	0x3e000000
+	.long	0x3c354825
+	.long	0x3f7f4e6d
+	.long	0x32d01884
+	.long	0x3d800000
+	.long	0xbc5c1342
+	.long	0x3f7fb10f
+	.long	0x31de5b5f
+	.long	0x3d800000
+	.long	0xbbdbd541
+	.long	0x3f7fec43
+	.long	0x3084cd0d
+	.long	0x3d000000
+
+/* General purpose constants:
+   absolute value mask */
+float_vector __sAbsMask 0x7fffffff
+
+/* threshold for out-of-range values */
+float_vector __sRangeReductionVal 0x461c4000
+
+/* +INF */
+float_vector __sRangeVal 0x7f800000
+
+/* High Accuracy version polynomial coefficients:
+   S1 = -1.66666666664728165763e-01 */
+float_vector __sS1 0xbe2aaaab
+
+/* S2 = 8.33329173045453069014e-03 */
+float_vector __sS2 0x3c08885c
+
+/* C1 = -5.00000000000000000000e-01 */
+float_vector __sC1 0xbf000000
+
+/* C2 = 4.16638942914469202550e-02 */
+float_vector __sC2 0x3d2aaa7c
+
+/* Range reduction PI-based constants:
+   PI high part  */
+float_vector __sPI1 0x40490000
+
+/* PI mid part 1 */
+float_vector __sPI2 0x3a7da000
+
+/* PI mid part 2 */
+float_vector __sPI3 0x34222000
+
+/* PI low part */
+float_vector __sPI4 0x2cb4611a
+
+/* PI1, PI2, and PI3 when FMA is available
+   PI high part (when FMA available) */
+float_vector __sPI1_FMA 0x40490fdb
+
+/* PI mid part  (when FMA available) */
+float_vector __sPI2_FMA 0xb3bbbd2e
+
+/* PI low part  (when FMA available) */
+float_vector __sPI3_FMA 0xa7772ced
+
+/* Polynomial constants for work w/o FMA, relative error ~ 2^(-26.625) */
+float_vector __sA3 0xbe2aaaa6
+float_vector __sA5 0x3c08876a
+float_vector __sA7 0xb94fb7ff
+float_vector __sA9 0x362edef8
+
+/* Polynomial constants, work with FMA, relative error ~ 2^(-26.417) */
+float_vector __sA5_FMA 0x3c088768
+float_vector __sA7_FMA 0xb94fb6cf
+float_vector __sA9_FMA 0x362ec335
+
+/* 1/PI */
+float_vector __sInvPI 0x3ea2f983
+
+/* right-shifter constant */
+float_vector __sRShifter 0x4b400000
+
+/* PI/2 */
+float_vector __sHalfPI 0x3fc90fdb
+
+/* 1/2 */
+float_vector __sOneHalf 0x3f000000
+	.type	__svml_scos_data,@object
+	.size __svml_scos_data,.-__svml_scos_data
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf_data.h b/sysdeps/x86_64/fpu/svml_s_cosf_data.h
new file mode 100644
index 0000000..1e25c5a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_s_cosf_data.h
@@ -0,0 +1,58 @@
+/* Offsets for data table for vectorized cosf.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef S_COSF_DATA_H
+#define S_COSF_DATA_H
+
+.macro float_vector offset value
+.if .-__svml_scos_data != \offset
+.err
+.endif
+.rept 16
+.long \value
+.endr
+.endm
+
+#define __dT                            0
+#define __sAbsMask                      4096
+#define __sRangeReductionVal            4160
+#define __sRangeVal                     4224
+#define __sS1                           4288
+#define __sS2                           4352
+#define __sC1                           4416
+#define __sC2                           4480
+#define __sPI1                          4544
+#define __sPI2                          4608
+#define __sPI3                          4672
+#define __sPI4                          4736
+#define __sPI1_FMA                      4800
+#define __sPI2_FMA                      4864
+#define __sPI3_FMA                      4928
+#define __sA3                           4992
+#define __sA5                           5056
+#define __sA7                           5120
+#define __sA9                           5184
+#define __sA5_FMA                       5248
+#define __sA7_FMA                       5312
+#define __sA9_FMA                       5376
+#define __sInvPI                        5440
+#define __sRShifter                     5504
+#define __sHalfPI                       5568
+#define __sOneHalf                      5632
+
+#endif
diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
new file mode 100644
index 0000000..d5b62ee
--- /dev/null
+++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
@@ -0,0 +1,111 @@
+/* Wrapper implementations of vector math functions.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* SSE2 ISA version as wrapper to scalar.  */
+.macro WRAPPER_IMPL_SSE2 callee
+        subq      $40, %rsp
+        cfi_adjust_cfa_offset(40)
+        movaps    %xmm0, (%rsp)
+        call      \callee@PLT
+        movss     %xmm0, 16(%rsp)
+        movss     4(%rsp), %xmm0
+        call      \callee@PLT
+        movss     %xmm0, 20(%rsp)
+        movss     8(%rsp), %xmm0
+        call      \callee@PLT
+        movss     %xmm0, 24(%rsp)
+        movss     12(%rsp), %xmm0
+        call      \callee@PLT
+        movss     16(%rsp), %xmm3
+        movss     20(%rsp), %xmm2
+        movss     24(%rsp), %xmm1
+        movss     %xmm0, 28(%rsp)
+        unpcklps  %xmm1, %xmm3
+        unpcklps  %xmm0, %xmm2
+        unpcklps  %xmm2, %xmm3
+        movaps    %xmm3, %xmm0
+        addq      $40, %rsp
+        cfi_adjust_cfa_offset(-40)
+        ret
+.endm
+
+/* AVX/AVX2 ISA version as wrapper to SSE ISA version.  */
+.macro WRAPPER_IMPL_AVX callee
+        pushq     	%rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      	%rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      	$-32, %rsp
+        subq      	$32, %rsp
+        vextractf128 	$1, %ymm0, (%rsp)
+        vzeroupper
+        call      	HIDDEN_JUMPTARGET(\callee)
+        vmovaps   	%xmm0, 16(%rsp)
+        vmovaps   	(%rsp), %xmm0
+        call      	HIDDEN_JUMPTARGET(\callee)
+        vmovaps   	%xmm0, %xmm1
+        vmovaps   	16(%rsp), %xmm0
+        vinsertf128 	$1, %xmm1, %ymm0, %ymm0
+        movq      	%rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      	%rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+.endm
+
+/* AVX512 ISA version as wrapper to AVX2 ISA version.  */
+.macro WRAPPER_IMPL_AVX512 callee
+        pushq	%rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq	%rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq	$-64, %rsp
+        subq	$64, %rsp
+/* Below is encoding for vmovaps %zmm0, (%rsp).  */
+        .byte	0x62
+        .byte	0xf1
+        .byte	0x7c
+        .byte	0x48
+        .byte	0x29
+        .byte	0x04
+        .byte	0x24
+/* Below is encoding for vmovaps (%rsp), %ymm0.  */
+        .byte	0xc5
+        .byte	0xfc
+        .byte	0x28
+        .byte	0x04
+        .byte	0x24
+        call	HIDDEN_JUMPTARGET(\callee)
+/* Below is encoding for vmovaps 32(%rsp), %ymm0.  */
+        .byte	0xc5
+        .byte	0xfc
+        .byte	0x28
+        .byte	0x44
+        .byte	0x24
+        .byte	0x20
+        call	HIDDEN_JUMPTARGET(\callee)
+        movq	%rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq	%rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+.endm

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                          |   37 +
 NEWS                                               |    2 +-
 math/Makefile                                      |    9 +
 math/test-double-vlen2.h                           |    4 +-
 math/test-float-vlen16.h                           |   71 ++
 math/test-float-vlen4.h                            |   71 ++
 math/test-float-vlen8.h                            |   71 ++
 sysdeps/unix/sysv/linux/x86_64/libmvec.abilist     |    4 +
 sysdeps/x86/fpu/bits/math-vector.h                 |    2 +
 sysdeps/x86_64/fpu/Makefile                        |   17 +-
 sysdeps/x86_64/fpu/Versions                        |    1 +
 sysdeps/x86_64/fpu/libm-test-ulps                  |    8 +
 sysdeps/x86_64/fpu/multiarch/Makefile              |    3 +-
 sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S  |   39 +
 .../fpu/multiarch/svml_s_cosf16_core_avx512.S      |  460 ++++++++
 sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S   |   38 +
 .../x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S  |  227 ++++
 sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S   |   38 +
 .../x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S  |  215 ++++
 sysdeps/x86_64/fpu/svml_s_cosf16_core.S            |   25 +
 sysdeps/x86_64/fpu/svml_s_cosf4_core.S             |   29 +
 sysdeps/x86_64/fpu/svml_s_cosf8_core.S             |   29 +
 sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S         |   25 +
 sysdeps/x86_64/fpu/svml_s_cosf_data.S              | 1130 ++++++++++++++++++++
 sysdeps/x86_64/fpu/svml_s_cosf_data.h              |   58 +
 sysdeps/x86_64/fpu/svml_s_wrapper_impl.h           |  111 ++
 sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c    |   25 +
 sysdeps/x86_64/fpu/test-float-vlen16.c             |   25 +
 sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c     |   25 +
 sysdeps/x86_64/fpu/test-float-vlen4.c              |   23 +
 .../x86_64/fpu/test-float-vlen8-avx2-wrappers.c    |   28 +
 sysdeps/x86_64/fpu/test-float-vlen8-avx2.c         |   28 +
 sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c     |   25 +
 sysdeps/x86_64/fpu/test-float-vlen8.c              |   23 +
 34 files changed, 2919 insertions(+), 7 deletions(-)
 create mode 100644 math/test-float-vlen16.h
 create mode 100644 math/test-float-vlen4.h
 create mode 100644 math/test-float-vlen8.h
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
 create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_cosf16_core.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_cosf4_core.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_cosf8_core.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_cosf_data.S
 create mode 100644 sysdeps/x86_64/fpu/svml_s_cosf_data.h
 create mode 100644 sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
 create mode 100644 sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-vlen16.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-vlen4.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-vlen8-avx2.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
 create mode 100644 sysdeps/x86_64/fpu/test-float-vlen8.c


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]