From 6af25acc7b6313fd8934c3b2f0eb3da5a1c6eb6b Mon Sep 17 00:00:00 2001 From: Andrew Senkevich Date: Wed, 17 Jun 2015 15:38:29 +0300 Subject: [PATCH] Vector log for x86_64 and tests. Here is implementation of vectorized log containing SSE, AVX, AVX2 and AVX512 versions according to Vector ABI . * bits/libm-simd-decl-stubs.h: Added stubs for log. * math/bits/mathcalls.h: Added log declaration with __MATHCALL_VEC. * sysdeps/unix/sysv/linux/x86_64/libmvec.abilist: New versions added. * sysdeps/x86/fpu/bits/math-vector.h: Added SIMD declaration and asm redirections for log. * sysdeps/x86_64/fpu/Makefile (libmvec-support): Added new files. * sysdeps/x86_64/fpu/Versions: New versions added. * sysdeps/x86_64/fpu/libm-test-ulps: Regenerated. * sysdeps/x86_64/fpu/multiarch/Makefile (libmvec-sysdep_routines): Added build of SSE, AVX2 and AVX512 IFUNC versions. * sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S: New file. * sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S: New file. * sysdeps/x86_64/fpu/svml_d_log2_core.S: New file. * sysdeps/x86_64/fpu/svml_d_log4_core.S: New file. * sysdeps/x86_64/fpu/svml_d_log4_core_avx.S: New file. * sysdeps/x86_64/fpu/svml_d_log8_core.S: New file. * sysdeps/x86_64/fpu/svml_d_log_data.S: New file. * sysdeps/x86_64/fpu/svml_d_log_data.h: New file. * sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Added vector log test. * sysdeps/x86_64/fpu/test-double-vlen2.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen4-avx2.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen4.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise. * sysdeps/x86_64/fpu/test-double-vlen8.c: Likewise. * NEWS: Mention addition of x86_64 vector log. --- ChangeLog | 34 + NEWS | 2 +- bits/libm-simd-decl-stubs.h | 4 + math/bits/mathcalls.h | 2 +- .../unix/sysv/linux/x86_64/libmvec.abilist | 4 + sysdeps/x86/fpu/bits/math-vector.h | 12 + sysdeps/x86_64/fpu/Makefile | 2 + sysdeps/x86_64/fpu/Versions | 1 + sysdeps/x86_64/fpu/libm-test-ulps | 12 + sysdeps/x86_64/fpu/multiarch/Makefile | 2 + .../x86_64/fpu/multiarch/svml_d_log2_core.S | 38 + .../fpu/multiarch/svml_d_log2_core_sse4.S | 229 +++ .../x86_64/fpu/multiarch/svml_d_log4_core.S | 38 + .../fpu/multiarch/svml_d_log4_core_avx2.S | 210 +++ .../x86_64/fpu/multiarch/svml_d_log8_core.S | 39 + .../fpu/multiarch/svml_d_log8_core_avx512.S | 468 +++++ sysdeps/x86_64/fpu/svml_d_log2_core.S | 29 + sysdeps/x86_64/fpu/svml_d_log4_core.S | 29 + sysdeps/x86_64/fpu/svml_d_log4_core_avx.S | 25 + sysdeps/x86_64/fpu/svml_d_log8_core.S | 25 + sysdeps/x86_64/fpu/svml_d_log_data.S | 1662 +++++++++++++++++ sysdeps/x86_64/fpu/svml_d_log_data.h | 54 + .../x86_64/fpu/test-double-vlen2-wrappers.c | 1 + sysdeps/x86_64/fpu/test-double-vlen2.c | 1 + .../fpu/test-double-vlen4-avx2-wrappers.c | 1 + sysdeps/x86_64/fpu/test-double-vlen4-avx2.c | 1 + .../x86_64/fpu/test-double-vlen4-wrappers.c | 1 + sysdeps/x86_64/fpu/test-double-vlen4.c | 1 + .../x86_64/fpu/test-double-vlen8-wrappers.c | 1 + sysdeps/x86_64/fpu/test-double-vlen8.c | 1 + 30 files changed, 2927 insertions(+), 2 deletions(-) create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S create mode 100644 sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S create mode 100644 sysdeps/x86_64/fpu/svml_d_log2_core.S create mode 100644 sysdeps/x86_64/fpu/svml_d_log4_core.S create mode 100644 sysdeps/x86_64/fpu/svml_d_log4_core_avx.S create mode 100644 sysdeps/x86_64/fpu/svml_d_log8_core.S create mode 100644 sysdeps/x86_64/fpu/svml_d_log_data.S create mode 100644 sysdeps/x86_64/fpu/svml_d_log_data.h diff --git a/ChangeLog b/ChangeLog index aedec5a5a5..bad022e036 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,37 @@ +2015-06-17 Andrew Senkevich + + * bits/libm-simd-decl-stubs.h: Added stubs for log. + * math/bits/mathcalls.h: Added log declaration with __MATHCALL_VEC. + * sysdeps/unix/sysv/linux/x86_64/libmvec.abilist: New versions added. + * sysdeps/x86/fpu/bits/math-vector.h: Added SIMD declaration and asm + redirections for log. + * sysdeps/x86_64/fpu/Makefile (libmvec-support): Added new files. + * sysdeps/x86_64/fpu/Versions: New versions added. + * sysdeps/x86_64/fpu/libm-test-ulps: Regenerated. + * sysdeps/x86_64/fpu/multiarch/Makefile (libmvec-sysdep_routines): Added + build of SSE, AVX2 and AVX512 IFUNC versions. + * sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S: New file. + * sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S: New file. + * sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S: New file. + * sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S: New file. + * sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S: New file. + * sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S: New file. + * sysdeps/x86_64/fpu/svml_d_log2_core.S: New file. + * sysdeps/x86_64/fpu/svml_d_log4_core.S: New file. + * sysdeps/x86_64/fpu/svml_d_log4_core_avx.S: New file. + * sysdeps/x86_64/fpu/svml_d_log8_core.S: New file. + * sysdeps/x86_64/fpu/svml_d_log_data.S: New file. + * sysdeps/x86_64/fpu/svml_d_log_data.h: New file. + * sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c: Added vector log test. + * sysdeps/x86_64/fpu/test-double-vlen2.c: Likewise. + * sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-double-vlen4-avx2.c: Likewise. + * sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-double-vlen4.c: Likewise. + * sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c: Likewise. + * sysdeps/x86_64/fpu/test-double-vlen8.c: Likewise. + * NEWS: Mention addition of x86_64 vector log. + 2015-06-17 Szabolcs Nagy * sysdeps/aarch64/dl-tlsdesc.S (_dl_tlsdesc_return_lazy): Fix diff --git a/NEWS b/NEWS index b215276b07..4c666878dd 100644 --- a/NEWS +++ b/NEWS @@ -53,7 +53,7 @@ Version 2.22 condition in some applications. * Added vector math library named libmvec with the following vectorized x86_64 - implementations: cos, cosf, sin, sinf. + implementations: cos, cosf, sin, sinf, log. The library can be disabled with --disable-mathvec. Use of the functions is enabled with -fopenmp -ffast-math starting from -O1 for GCC version >= 4.9.0. The library is linked in as needed when using -lm (no need to specify -lmvec diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h index 50310d643a..6367b775a3 100644 --- a/bits/libm-simd-decl-stubs.h +++ b/bits/libm-simd-decl-stubs.h @@ -41,4 +41,8 @@ #define __DECL_SIMD_sinf #define __DECL_SIMD_sinl +#define __DECL_SIMD_log +#define __DECL_SIMD_logf +#define __DECL_SIMD_logl + #endif diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h index fbe7a3a6af..c41946fa0e 100644 --- a/math/bits/mathcalls.h +++ b/math/bits/mathcalls.h @@ -106,7 +106,7 @@ __MATHCALL (frexp,, (_Mdouble_ __x, int *__exponent)); __MATHCALL (ldexp,, (_Mdouble_ __x, int __exponent)); /* Natural logarithm of X. */ -__MATHCALL (log,, (_Mdouble_ __x)); +__MATHCALL_VEC (log,, (_Mdouble_ __x)); /* Base-ten logarithm of X. */ __MATHCALL (log10,, (_Mdouble_ __x)); diff --git a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist index dcf9c7d148..3357957d3d 100644 --- a/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/x86_64/libmvec.abilist @@ -1,18 +1,22 @@ GLIBC_2.22 GLIBC_2.22 A _ZGVbN2v_cos F + _ZGVbN2v_log F _ZGVbN2v_sin F _ZGVbN4v_cosf F _ZGVbN4v_sinf F _ZGVcN4v_cos F + _ZGVcN4v_log F _ZGVcN4v_sin F _ZGVcN8v_cosf F _ZGVcN8v_sinf F _ZGVdN4v_cos F + _ZGVdN4v_log F _ZGVdN4v_sin F _ZGVdN8v_cosf F _ZGVdN8v_sinf F _ZGVeN16v_cosf F _ZGVeN16v_sinf F _ZGVeN8v_cos F + _ZGVeN8v_log F _ZGVeN8v_sin F diff --git a/sysdeps/x86/fpu/bits/math-vector.h b/sysdeps/x86/fpu/bits/math-vector.h index 2b739c5444..ed85622560 100644 --- a/sysdeps/x86/fpu/bits/math-vector.h +++ b/sysdeps/x86/fpu/bits/math-vector.h @@ -36,5 +36,17 @@ # define __DECL_SIMD_sin __DECL_SIMD_x86_64 # undef __DECL_SIMD_sinf # define __DECL_SIMD_sinf __DECL_SIMD_x86_64 +# undef __DECL_SIMD_log +# define __DECL_SIMD_log __DECL_SIMD_x86_64 + +/* Workaround to exclude unnecessary symbol aliases in libmvec + while GCC creates the vector names based on scalar asm name. + Corresponding discussion started at + . */ +__asm__ ("_ZGVbN2v___log_finite = _ZGVbN2v_log"); +__asm__ ("_ZGVcN4v___log_finite = _ZGVcN4v_log"); +__asm__ ("_ZGVdN4v___log_finite = _ZGVdN4v_log"); +__asm__ ("_ZGVeN8v___log_finite = _ZGVeN8v_log"); + # endif #endif diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile index b6ecbc3cee..a5097468ee 100644 --- a/sysdeps/x86_64/fpu/Makefile +++ b/sysdeps/x86_64/fpu/Makefile @@ -7,6 +7,8 @@ libmvec-support += svml_d_cos2_core svml_d_cos4_core_avx \ svml_s_cosf8_core svml_s_cosf16_core svml_s_cosf_data \ svml_s_sinf4_core svml_s_sinf8_core_avx \ svml_s_sinf8_core svml_s_sinf16_core svml_s_sinf_data \ + svml_d_log2_core svml_d_log4_core_avx svml_d_log4_core \ + svml_d_log8_core svml_d_log_data \ init-arch endif diff --git a/sysdeps/x86_64/fpu/Versions b/sysdeps/x86_64/fpu/Versions index 3f3b22812b..7bda47f4af 100644 --- a/sysdeps/x86_64/fpu/Versions +++ b/sysdeps/x86_64/fpu/Versions @@ -2,6 +2,7 @@ libmvec { GLIBC_2.22 { _ZGVbN2v_cos; _ZGVcN4v_cos; _ZGVdN4v_cos; _ZGVeN8v_cos; _ZGVbN2v_sin; _ZGVcN4v_sin; _ZGVdN4v_sin; _ZGVeN8v_sin; + _ZGVbN2v_log; _ZGVcN4v_log; _ZGVdN4v_log; _ZGVeN8v_log; _ZGVbN4v_cosf; _ZGVcN8v_cosf; _ZGVdN8v_cosf; _ZGVeN16v_cosf; _ZGVbN4v_sinf; _ZGVcN8v_sinf; _ZGVdN8v_sinf; _ZGVeN16v_sinf; } diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index c2b6c4dfcb..949a099090 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -1847,6 +1847,18 @@ ifloat: 2 ildouble: 1 ldouble: 1 +Function: "log_vlen2": +double: 1 + +Function: "log_vlen4": +double: 1 + +Function: "log_vlen4_avx2": +double: 1 + +Function: "log_vlen8": +double: 1 + Function: "pow": float: 3 ifloat: 3 diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 61759b8d0b..16d93caf57 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -56,6 +56,8 @@ ifeq ($(subdir),mathvec) libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \ svml_d_cos8_core_avx512 svml_d_sin2_core_sse4 \ svml_d_sin4_core_avx2 svml_d_sin8_core_avx512 \ + svml_d_log2_core_sse4 svml_d_log4_core_avx2 \ + svml_d_log8_core_avx512 \ svml_s_cosf4_core_sse4 svml_s_cosf8_core_avx2 \ svml_s_cosf16_core_avx512 svml_s_sinf4_core_sse4 \ svml_s_sinf8_core_avx2 svml_s_sinf16_core_avx512 diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S new file mode 100644 index 0000000000..38d369fc3c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S @@ -0,0 +1,38 @@ +/* Multiple versions of vectorized log. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY (_ZGVbN2v_log) + .type _ZGVbN2v_log, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features(%rip) + jne 1f + call __init_cpu_features +1: leaq _ZGVbN2v_log_sse4(%rip), %rax + testl $bit_SSE4_1, __cpu_features+CPUID_OFFSET+index_SSE4_1(%rip) + jz 2f + ret +2: leaq _ZGVbN2v_log_sse2(%rip), %rax + ret +END (_ZGVbN2v_log) +libmvec_hidden_def (_ZGVbN2v_log) + +#define _ZGVbN2v_log _ZGVbN2v_log_sse2 +#include "../svml_d_log2_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S new file mode 100644 index 0000000000..82f3d8215d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S @@ -0,0 +1,229 @@ +/* Function log vectorized with SSE4. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_log_data.h" + + .text +ENTRY (_ZGVbN2v_log_sse4) +/* + ALGORITHM DESCRIPTION: + + log(x) = -log(Rcp) + log(Rcp*x), + where Rcp ~ 1/x (accuracy ~9 bits, obtained by rounding + HW approximation to 1+9 mantissa bits) + + Reduced argument R=Rcp*x-1 is used to approximate log(1+R) as polynomial + + log(Rcp) = exponent_Rcp*log(2) + log(mantissa_Rcp) + -log(mantissa_Rcp) is obtained from a lookup table, + accessed by a 9-bit index + */ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $320, %rsp + movaps %xmm0, %xmm6 + movq __svml_dlog_data@GOTPCREL(%rip), %r8 + movaps %xmm6, %xmm3 + movaps %xmm6, %xmm2 + +/* isolate exponent bits */ + movaps %xmm6, %xmm1 + psrlq $20, %xmm1 + movups _ExpMask(%r8), %xmm5 + +/* preserve mantissa, set input exponent to 2^(-10) */ + andps %xmm6, %xmm5 + orps _Two10(%r8), %xmm5 + +/* reciprocal approximation good to at least 11 bits */ + cvtpd2ps %xmm5, %xmm7 + cmpltpd _MinNorm(%r8), %xmm3 + cmpnlepd _MaxNorm(%r8), %xmm2 + movlhps %xmm7, %xmm7 + +/* combine and get argument value range mask */ + orps %xmm2, %xmm3 + rcpps %xmm7, %xmm0 + movmskpd %xmm3, %eax + movups _HalfMask(%r8), %xmm2 + +/* argument reduction started: R = Mantissa*Rcp - 1 */ + andps %xmm5, %xmm2 + cvtps2pd %xmm0, %xmm4 + subpd %xmm2, %xmm5 + +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ + roundpd $0, %xmm4, %xmm4 + mulpd %xmm4, %xmm2 + mulpd %xmm4, %xmm5 + subpd _One(%r8), %xmm2 + addpd %xmm2, %xmm5 + movups _Threshold(%r8), %xmm2 + +/* calculate index for table lookup */ + movaps %xmm4, %xmm3 + cmpltpd %xmm4, %xmm2 + pshufd $221, %xmm1, %xmm7 + psrlq $40, %xmm3 + +/* convert biased exponent to DP format */ + cvtdq2pd %xmm7, %xmm0 + movd %xmm3, %edx + movups _poly_coeff_1(%r8), %xmm4 + +/* polynomial computation */ + mulpd %xmm5, %xmm4 + andps _Bias(%r8), %xmm2 + orps _Bias1(%r8), %xmm2 + +/* + Table stores -log(0.5*mantissa) for larger mantissas, + adjust exponent accordingly + */ + subpd %xmm2, %xmm0 + addpd _poly_coeff_2(%r8), %xmm4 + +/* exponent*log(2.0) */ + mulpd _L2(%r8), %xmm0 + movaps %xmm5, %xmm2 + mulpd %xmm5, %xmm2 + movups _poly_coeff_3(%r8), %xmm7 + mulpd %xmm5, %xmm7 + mulpd %xmm2, %xmm4 + addpd _poly_coeff_4(%r8), %xmm7 + addpd %xmm4, %xmm7 + mulpd %xmm7, %xmm2 + movslq %edx, %rdx + pextrd $2, %xmm3, %ecx + +/* + reconstruction: + (exponent*log(2)) + (LogRcp + (R+poly)) + */ + addpd %xmm2, %xmm5 + movslq %ecx, %rcx + movsd _LogRcp_lookup(%r8,%rdx), %xmm1 + movhpd _LogRcp_lookup(%r8,%rcx), %xmm1 + addpd %xmm5, %xmm1 + addpd %xmm1, %xmm0 + testl %eax, %eax + jne .LBL_1_3 + +.LBL_1_2: + cfi_remember_state + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret + +.LBL_1_3: + cfi_restore_state + movups %xmm6, 192(%rsp) + movups %xmm0, 256(%rsp) + je .LBL_1_2 + + xorb %cl, %cl + xorl %edx, %edx + movups %xmm8, 112(%rsp) + movups %xmm9, 96(%rsp) + movups %xmm10, 80(%rsp) + movups %xmm11, 64(%rsp) + movups %xmm12, 48(%rsp) + movups %xmm13, 32(%rsp) + movups %xmm14, 16(%rsp) + movups %xmm15, (%rsp) + movq %rsi, 136(%rsp) + movq %rdi, 128(%rsp) + movq %r12, 168(%rsp) + cfi_offset_rel_rsp (12, 168) + movb %cl, %r12b + movq %r13, 160(%rsp) + cfi_offset_rel_rsp (13, 160) + movl %eax, %r13d + movq %r14, 152(%rsp) + cfi_offset_rel_rsp (14, 152) + movl %edx, %r14d + movq %r15, 144(%rsp) + cfi_offset_rel_rsp (15, 144) + cfi_remember_state + +.LBL_1_6: + btl %r14d, %r13d + jc .LBL_1_12 + +.LBL_1_7: + lea 1(%r14), %esi + btl %esi, %r13d + jc .LBL_1_10 + +.LBL_1_8: + incb %r12b + addl $2, %r14d + cmpb $16, %r12b + jb .LBL_1_6 + + movups 112(%rsp), %xmm8 + movups 96(%rsp), %xmm9 + movups 80(%rsp), %xmm10 + movups 64(%rsp), %xmm11 + movups 48(%rsp), %xmm12 + movups 32(%rsp), %xmm13 + movups 16(%rsp), %xmm14 + movups (%rsp), %xmm15 + movq 136(%rsp), %rsi + movq 128(%rsp), %rdi + movq 168(%rsp), %r12 + cfi_restore (%r12) + movq 160(%rsp), %r13 + cfi_restore (%r13) + movq 152(%rsp), %r14 + cfi_restore (%r14) + movq 144(%rsp), %r15 + cfi_restore (%r15) + movups 256(%rsp), %xmm0 + jmp .LBL_1_2 + +.LBL_1_10: + cfi_restore_state + movzbl %r12b, %r15d + shlq $4, %r15 + movsd 200(%rsp,%r15), %xmm0 + + call log@PLT + + movsd %xmm0, 264(%rsp,%r15) + jmp .LBL_1_8 + +.LBL_1_12: + movzbl %r12b, %r15d + shlq $4, %r15 + movsd 192(%rsp,%r15), %xmm0 + + call log@PLT + + movsd %xmm0, 256(%rsp,%r15) + jmp .LBL_1_7 + +END (_ZGVbN2v_log_sse4) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S new file mode 100644 index 0000000000..ddb6105405 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S @@ -0,0 +1,38 @@ +/* Multiple versions of vectorized log. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY (_ZGVdN4v_log) + .type _ZGVdN4v_log, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features(%rip) + jne 1f + call __init_cpu_features +1: leaq _ZGVdN4v_log_avx2(%rip), %rax + testl $bit_AVX2_Usable, __cpu_features+FEATURE_OFFSET+index_AVX2_Usable(%rip) + jz 2f + ret +2: leaq _ZGVdN4v_log_sse_wrapper(%rip), %rax + ret +END (_ZGVdN4v_log) +libmvec_hidden_def (_ZGVdN4v_log) + +#define _ZGVdN4v_log _ZGVdN4v_log_sse_wrapper +#include "../svml_d_log4_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S new file mode 100644 index 0000000000..816aede395 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S @@ -0,0 +1,210 @@ +/* Function log vectorized with AVX2. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_log_data.h" + + .text +ENTRY (_ZGVdN4v_log_avx2) +/* ALGORITHM DESCRIPTION: + + log(x) = -log(Rcp) + log(Rcp*x), + where Rcp ~ 1/x (accuracy ~9 bits, obtained by rounding + HW approximation to 1+9 mantissa bits) + + Reduced argument R=Rcp*x-1 is used to approximate log(1+R) as polynomial + + log(Rcp) = exponent_Rcp*log(2) + log(mantissa_Rcp) + -log(mantissa_Rcp) is obtained from a lookup table, + accessed by a 9-bit index + */ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $448, %rsp + movq __svml_dlog_data@GOTPCREL(%rip), %rax + vmovdqa %ymm0, %ymm5 + +/* isolate exponent bits */ + vpsrlq $20, %ymm5, %ymm0 + +/* preserve mantissa, set input exponent to 2^(-10) */ + vandpd _ExpMask(%rax), %ymm5, %ymm6 + vorpd _Two10(%rax), %ymm6, %ymm4 + +/* reciprocal approximation good to at least 11 bits */ + vcvtpd2ps %ymm4, %xmm7 + vrcpps %xmm7, %xmm1 + vcmplt_oqpd _MinNorm(%rax), %ymm5, %ymm7 + vcvtps2pd %xmm1, %ymm3 + vcmpnle_uqpd _MaxNorm(%rax), %ymm5, %ymm1 + vextracti128 $1, %ymm0, %xmm2 + vshufps $221, %xmm2, %xmm0, %xmm6 + +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ + vroundpd $0, %ymm3, %ymm2 + +/* convert biased exponent to DP format */ + vcvtdq2pd %xmm6, %ymm0 + +/* combine and get argument value range mask */ + vorpd %ymm1, %ymm7, %ymm3 + vmovupd _One(%rax), %ymm1 + vmovmskpd %ymm3, %ecx + +/* calculate index for table lookup */ + vpsrlq $40, %ymm2, %ymm3 + +/* argument reduction started: R = Mantissa*Rcp - 1 */ + vfmsub213pd %ymm1, %ymm2, %ymm4 + vcmpgt_oqpd _Threshold(%rax), %ymm2, %ymm2 + vpcmpeqd %ymm6, %ymm6, %ymm6 + vxorpd %ymm1, %ymm1, %ymm1 + vgatherqpd %ymm6, _LogRcp_lookup(%rax,%ymm3), %ymm1 + +/* exponent*log(2.0) */ + vmovupd _poly_coeff_1(%rax), %ymm6 + vmulpd %ymm4, %ymm4, %ymm3 + +/* polynomial computation */ + vfmadd213pd _poly_coeff_2(%rax), %ymm4, %ymm6 + vandpd _Bias(%rax), %ymm2, %ymm7 + vorpd _Bias1(%rax), %ymm7, %ymm2 + +/* + Table stores -log(0.5*mantissa) for larger mantissas, + adjust exponent accordingly + */ + vsubpd %ymm2, %ymm0, %ymm0 + vmovupd _poly_coeff_3(%rax), %ymm2 + vfmadd213pd _poly_coeff_4(%rax), %ymm4, %ymm2 + vfmadd213pd %ymm2, %ymm3, %ymm6 + +/* + reconstruction: + (exponent*log(2)) + (LogRcp + (R+poly)) + */ + vfmadd213pd %ymm4, %ymm3, %ymm6 + vaddpd %ymm1, %ymm6, %ymm4 + vfmadd132pd _L2(%rax), %ymm4, %ymm0 + testl %ecx, %ecx + jne .LBL_1_3 + +.LBL_1_2: + cfi_remember_state + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret + +.LBL_1_3: + cfi_restore_state + vmovupd %ymm5, 320(%rsp) + vmovupd %ymm0, 384(%rsp) + je .LBL_1_2 + + xorb %dl, %dl + xorl %eax, %eax + vmovups %ymm8, 224(%rsp) + vmovups %ymm9, 192(%rsp) + vmovups %ymm10, 160(%rsp) + vmovups %ymm11, 128(%rsp) + vmovups %ymm12, 96(%rsp) + vmovups %ymm13, 64(%rsp) + vmovups %ymm14, 32(%rsp) + vmovups %ymm15, (%rsp) + movq %rsi, 264(%rsp) + movq %rdi, 256(%rsp) + movq %r12, 296(%rsp) + cfi_offset_rel_rsp (12, 296) + movb %dl, %r12b + movq %r13, 288(%rsp) + cfi_offset_rel_rsp (13, 288) + movl %ecx, %r13d + movq %r14, 280(%rsp) + cfi_offset_rel_rsp (14, 280) + movl %eax, %r14d + movq %r15, 272(%rsp) + cfi_offset_rel_rsp (15, 272) + cfi_remember_state + +.LBL_1_6: + btl %r14d, %r13d + jc .LBL_1_12 + +.LBL_1_7: + lea 1(%r14), %esi + btl %esi, %r13d + jc .LBL_1_10 + +.LBL_1_8: + incb %r12b + addl $2, %r14d + cmpb $16, %r12b + jb .LBL_1_6 + + vmovups 224(%rsp), %ymm8 + vmovups 192(%rsp), %ymm9 + vmovups 160(%rsp), %ymm10 + vmovups 128(%rsp), %ymm11 + vmovups 96(%rsp), %ymm12 + vmovups 64(%rsp), %ymm13 + vmovups 32(%rsp), %ymm14 + vmovups (%rsp), %ymm15 + vmovupd 384(%rsp), %ymm0 + movq 264(%rsp), %rsi + movq 256(%rsp), %rdi + movq 296(%rsp), %r12 + cfi_restore (%r12) + movq 288(%rsp), %r13 + cfi_restore (%r13) + movq 280(%rsp), %r14 + cfi_restore (%r14) + movq 272(%rsp), %r15 + cfi_restore (%r15) + jmp .LBL_1_2 + +.LBL_1_10: + cfi_restore_state + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 328(%rsp,%r15), %xmm0 + vzeroupper + + call log@PLT + + vmovsd %xmm0, 392(%rsp,%r15) + jmp .LBL_1_8 + +.LBL_1_12: + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 320(%rsp,%r15), %xmm0 + vzeroupper + + call log@PLT + + vmovsd %xmm0, 384(%rsp,%r15) + jmp .LBL_1_7 + +END (_ZGVdN4v_log_avx2) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S new file mode 100644 index 0000000000..2f9e9d8892 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S @@ -0,0 +1,39 @@ +/* Multiple versions of vectorized log. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + + .text +ENTRY (_ZGVeN8v_log) + .type _ZGVeN8v_log, @gnu_indirect_function + cmpl $0, KIND_OFFSET+__cpu_features(%rip) + jne 1 + call __init_cpu_features +1: leaq _ZGVeN8v_log_skx(%rip), %rax + testl $bit_AVX512DQ_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512DQ_Usable(%rip) + jnz 3 +2: leaq _ZGVeN8v_log_knl(%rip), %rax + testl $bit_AVX512F_Usable, __cpu_features+FEATURE_OFFSET+index_AVX512F_Usable(%rip) + jnz 3 + leaq _ZGVeN8v_log_avx2_wrapper(%rip), %rax +3: ret +END (_ZGVeN8v_log) + +#define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper +#include "../svml_d_log8_core.S" diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S new file mode 100644 index 0000000000..b0f3dd580c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S @@ -0,0 +1,468 @@ +/* Function log vectorized with AVX-512. KNL and SKX versions. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_log_data.h" +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVeN8v_log_knl) +#ifndef HAVE_AVX512_ASM_SUPPORT +WRAPPER_IMPL_AVX512 _ZGVdN4v_log +#else +/* + ALGORITHM DESCRIPTION: + + log(x) = -log(Rcp) + log(Rcp*x), + where Rcp ~ 1/x (accuracy ~9 bits, obtained by + rounding HW approximation to 1+9 mantissa bits) + + Reduced argument R=Rcp*x-1 is used to approximate log(1+R) as polynomial + + log(Rcp) = exponent_Rcp*log(2) + log(mantissa_Rcp) + -log(mantissa_Rcp) is obtained from a lookup table, + accessed by a 9-bit index + */ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $1280, %rsp + movq __svml_dlog_data@GOTPCREL(%rip), %rdx + movq $-1, %rax + +/* isolate exponent bits */ + vpsrlq $20, %zmm0, %zmm2 + vpsrlq $32, %zmm2, %zmm3 + vpxord %zmm2, %zmm2, %zmm2 + kxnorw %k3, %k3, %k3 + vmovups _Two10(%rdx), %zmm1 + vmovups _One(%rdx), %zmm9 + vpmovqd %zmm3, %ymm4 + +/* convert biased exponent to DP format */ + vcvtdq2pd %ymm4, %zmm13 + +/* preserve mantissa, set input exponent to 2^(-10) */ + vpternlogq $248, _ExpMask(%rdx), %zmm0, %zmm1 + vcmppd $17, _MinNorm(%rdx), %zmm0, %k1 + +/* reciprocal approximation good to at least 11 bits */ + vrcp28pd %zmm1, %zmm5 + vpbroadcastq %rax, %zmm6{%k1}{z} + vmovups _poly_coeff_3(%rdx), %zmm15 + vcmppd $22, _MaxNorm(%rdx), %zmm0, %k2 + vmovups _Bias1(%rdx), %zmm14 + +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ + vrndscalepd $8, %zmm5, %zmm11 + vpbroadcastq %rax, %zmm7{%k2}{z} + +/* argument reduction started: R = Mantissa*Rcp - 1 */ + vfmsub213pd %zmm9, %zmm11, %zmm1 + +/* calculate index for table lookup */ + vpsrlq $40, %zmm11, %zmm10 + vgatherqpd _LogRcp_lookup(%rdx,%zmm10), %zmm2{%k3} + vcmppd $30, _Threshold(%rdx), %zmm11, %k1 + +/* combine and get argument value range mask */ + vporq %zmm7, %zmm6, %zmm8 + +/* exponent*log(2.0) */ + vmovups _poly_coeff_1(%rdx), %zmm11 + vmulpd %zmm1, %zmm1, %zmm10 + vptestmq %zmm8, %zmm8, %k0 + vfmadd213pd _poly_coeff_4(%rdx), %zmm1, %zmm15 + kmovw %k0, %ecx + +/* polynomial computation */ + vfmadd213pd _poly_coeff_2(%rdx), %zmm1, %zmm11 + movzbl %cl, %ecx + vpbroadcastq %rax, %zmm12{%k1}{z} + vfmadd213pd %zmm15, %zmm10, %zmm11 + vpternlogq $248, _Bias(%rdx), %zmm12, %zmm14 + +/* + Table stores -log(0.5*mantissa) for larger mantissas, + adjust exponent accordingly + */ + vsubpd %zmm14, %zmm13, %zmm3 + +/* + reconstruction: + (exponent*log(2)) + (LogRcp + (R+poly)) + */ + vfmadd213pd %zmm1, %zmm10, %zmm11 + vaddpd %zmm2, %zmm11, %zmm1 + vfmadd132pd _L2(%rdx), %zmm1, %zmm3 + testl %ecx, %ecx + jne .LBL_1_3 + +.LBL_1_2: + cfi_remember_state + vmovaps %zmm3, %zmm0 + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret + +.LBL_1_3: + cfi_restore_state + vmovups %zmm0, 1152(%rsp) + vmovups %zmm3, 1216(%rsp) + je .LBL_1_2 + + xorb %dl, %dl + kmovw %k4, 1048(%rsp) + xorl %eax, %eax + kmovw %k5, 1040(%rsp) + kmovw %k6, 1032(%rsp) + kmovw %k7, 1024(%rsp) + vmovups %zmm16, 960(%rsp) + vmovups %zmm17, 896(%rsp) + vmovups %zmm18, 832(%rsp) + vmovups %zmm19, 768(%rsp) + vmovups %zmm20, 704(%rsp) + vmovups %zmm21, 640(%rsp) + vmovups %zmm22, 576(%rsp) + vmovups %zmm23, 512(%rsp) + vmovups %zmm24, 448(%rsp) + vmovups %zmm25, 384(%rsp) + vmovups %zmm26, 320(%rsp) + vmovups %zmm27, 256(%rsp) + vmovups %zmm28, 192(%rsp) + vmovups %zmm29, 128(%rsp) + vmovups %zmm30, 64(%rsp) + vmovups %zmm31, (%rsp) + movq %rsi, 1064(%rsp) + movq %rdi, 1056(%rsp) + movq %r12, 1096(%rsp) + cfi_offset_rel_rsp (12, 1096) + movb %dl, %r12b + movq %r13, 1088(%rsp) + cfi_offset_rel_rsp (13, 1088) + movl %ecx, %r13d + movq %r14, 1080(%rsp) + cfi_offset_rel_rsp (14, 1080) + movl %eax, %r14d + movq %r15, 1072(%rsp) + cfi_offset_rel_rsp (15, 1072) + cfi_remember_state + +.LBL_1_6: + btl %r14d, %r13d + jc .LBL_1_12 + +.LBL_1_7: + lea 1(%r14), %esi + btl %esi, %r13d + jc .LBL_1_10 + +.LBL_1_8: + addb $1, %r12b + addl $2, %r14d + cmpb $16, %r12b + jb .LBL_1_6 + + kmovw 1048(%rsp), %k4 + movq 1064(%rsp), %rsi + kmovw 1040(%rsp), %k5 + movq 1056(%rsp), %rdi + kmovw 1032(%rsp), %k6 + movq 1096(%rsp), %r12 + cfi_restore (%r12) + movq 1088(%rsp), %r13 + cfi_restore (%r13) + kmovw 1024(%rsp), %k7 + vmovups 960(%rsp), %zmm16 + vmovups 896(%rsp), %zmm17 + vmovups 832(%rsp), %zmm18 + vmovups 768(%rsp), %zmm19 + vmovups 704(%rsp), %zmm20 + vmovups 640(%rsp), %zmm21 + vmovups 576(%rsp), %zmm22 + vmovups 512(%rsp), %zmm23 + vmovups 448(%rsp), %zmm24 + vmovups 384(%rsp), %zmm25 + vmovups 320(%rsp), %zmm26 + vmovups 256(%rsp), %zmm27 + vmovups 192(%rsp), %zmm28 + vmovups 128(%rsp), %zmm29 + vmovups 64(%rsp), %zmm30 + vmovups (%rsp), %zmm31 + movq 1080(%rsp), %r14 + cfi_restore (%r14) + movq 1072(%rsp), %r15 + cfi_restore (%r15) + vmovups 1216(%rsp), %zmm3 + jmp .LBL_1_2 + +.LBL_1_10: + cfi_restore_state + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 1160(%rsp,%r15), %xmm0 + call log@PLT + vmovsd %xmm0, 1224(%rsp,%r15) + jmp .LBL_1_8 + +.LBL_1_12: + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 1152(%rsp,%r15), %xmm0 + call log@PLT + vmovsd %xmm0, 1216(%rsp,%r15) + jmp .LBL_1_7 +#endif +END (_ZGVeN8v_log_knl) + +ENTRY (_ZGVeN8v_log_skx) +#ifndef HAVE_AVX512_ASM_SUPPORT +WRAPPER_IMPL_AVX512 _ZGVdN4v_log +#else +/* + ALGORITHM DESCRIPTION: + + log(x) = -log(Rcp) + log(Rcp*x), + where Rcp ~ 1/x (accuracy ~9 bits, + obtained by rounding HW approximation to 1+9 mantissa bits) + + Reduced argument R=Rcp*x-1 is used to approximate log(1+R) as polynomial + + log(Rcp) = exponent_Rcp*log(2) + log(mantissa_Rcp) + -log(mantissa_Rcp) is obtained from a lookup table, + accessed by a 9-bit index + */ + pushq %rbp + cfi_adjust_cfa_offset (8) + cfi_rel_offset (%rbp, 0) + movq %rsp, %rbp + cfi_def_cfa_register (%rbp) + andq $-64, %rsp + subq $1280, %rsp + movq __svml_dlog_data@GOTPCREL(%rip), %rax + vmovaps %zmm0, %zmm3 + kxnorw %k3, %k3, %k3 + vmovups _Two10(%rax), %zmm2 + vmovups _Threshold(%rax), %zmm14 + vmovups _One(%rax), %zmm11 + vcmppd $21, _MinNorm(%rax), %zmm3, %k1 + vcmppd $18, _MaxNorm(%rax), %zmm3, %k2 + +/* isolate exponent bits */ + vpsrlq $20, %zmm3, %zmm4 + +/* preserve mantissa, set input exponent to 2^(-10) */ + vpternlogq $248, _ExpMask(%rax), %zmm3, %zmm2 + vpbroadcastq .L_2il0floatpacket.12(%rip), %zmm1 + vpsrlq $32, %zmm4, %zmm6 + +/* reciprocal approximation good to at least 11 bits */ + vrcp14pd %zmm2, %zmm5 + +/* exponent*log(2.0) */ + vmovups _poly_coeff_1(%rax), %zmm4 + vpmovqd %zmm6, %ymm7 + +/* round reciprocal to nearest integer, will have 1+9 mantissa bits */ + vrndscalepd $8, %zmm5, %zmm0 + +/* calculate index for table lookup */ + vpsrlq $40, %zmm0, %zmm12 + +/* argument reduction started: R = Mantissa*Rcp - 1 */ + vfmsub213pd %zmm11, %zmm0, %zmm2 + vpmovqd %zmm12, %ymm13 + +/* polynomial computation */ + vfmadd213pd _poly_coeff_2(%rax), %zmm2, %zmm4 + vmovaps %zmm1, %zmm8 + vmovaps %zmm1, %zmm9 + vpxord %zmm5, %zmm5, %zmm5 + vgatherdpd _LogRcp_lookup(%rax,%ymm13), %zmm5{%k3} + vmovups _Bias1(%rax), %zmm13 + vpandnq %zmm3, %zmm3, %zmm8{%k1} + vcmppd $21, %zmm0, %zmm14, %k1 + vpandnq %zmm14, %zmm14, %zmm1{%k1} + vmulpd %zmm2, %zmm2, %zmm14 + vpternlogq $248, _Bias(%rax), %zmm1, %zmm13 + vmovups _poly_coeff_3(%rax), %zmm1 + vfmadd213pd _poly_coeff_4(%rax), %zmm2, %zmm1 + vfmadd213pd %zmm1, %zmm14, %zmm4 + +/* + reconstruction: + (exponent*log(2)) + (LogRcp + (R+poly)) + */ + vfmadd213pd %zmm2, %zmm14, %zmm4 + vaddpd %zmm5, %zmm4, %zmm2 + vpandnq %zmm3, %zmm3, %zmm9{%k2} + +/* combine and get argument value range mask */ + vorpd %zmm9, %zmm8, %zmm10 + vcmppd $3, %zmm10, %zmm10, %k0 + kmovw %k0, %ecx + +/* convert biased exponent to DP format */ + vcvtdq2pd %ymm7, %zmm15 + +/* + Table stores -log(0.5*mantissa) for larger mantissas, + adjust exponent accordingly + */ + vsubpd %zmm13, %zmm15, %zmm0 + vfmadd132pd _L2(%rax), %zmm2, %zmm0 + testl %ecx, %ecx + jne .LBL_2_3 + +.LBL_2_2: + cfi_remember_state + movq %rbp, %rsp + cfi_def_cfa_register (%rsp) + popq %rbp + cfi_adjust_cfa_offset (-8) + cfi_restore (%rbp) + ret + +.LBL_2_3: + cfi_restore_state + vmovups %zmm3, 1152(%rsp) + vmovups %zmm0, 1216(%rsp) + je .LBL_2_2 + + xorb %dl, %dl + xorl %eax, %eax + kmovw %k4, 1048(%rsp) + kmovw %k5, 1040(%rsp) + kmovw %k6, 1032(%rsp) + kmovw %k7, 1024(%rsp) + vmovups %zmm16, 960(%rsp) + vmovups %zmm17, 896(%rsp) + vmovups %zmm18, 832(%rsp) + vmovups %zmm19, 768(%rsp) + vmovups %zmm20, 704(%rsp) + vmovups %zmm21, 640(%rsp) + vmovups %zmm22, 576(%rsp) + vmovups %zmm23, 512(%rsp) + vmovups %zmm24, 448(%rsp) + vmovups %zmm25, 384(%rsp) + vmovups %zmm26, 320(%rsp) + vmovups %zmm27, 256(%rsp) + vmovups %zmm28, 192(%rsp) + vmovups %zmm29, 128(%rsp) + vmovups %zmm30, 64(%rsp) + vmovups %zmm31, (%rsp) + movq %rsi, 1064(%rsp) + movq %rdi, 1056(%rsp) + movq %r12, 1096(%rsp) + cfi_offset_rel_rsp (12, 1096) + movb %dl, %r12b + movq %r13, 1088(%rsp) + cfi_offset_rel_rsp (13, 1088) + movl %ecx, %r13d + movq %r14, 1080(%rsp) + cfi_offset_rel_rsp (14, 1080) + movl %eax, %r14d + movq %r15, 1072(%rsp) + cfi_offset_rel_rsp (15, 1072) + cfi_remember_state + +.LBL_2_6: + btl %r14d, %r13d + jc .LBL_2_12 + +.LBL_2_7: + lea 1(%r14), %esi + btl %esi, %r13d + jc .LBL_2_10 + +.LBL_2_8: + incb %r12b + addl $2, %r14d + cmpb $16, %r12b + jb .LBL_2_6 + + kmovw 1048(%rsp), %k4 + kmovw 1040(%rsp), %k5 + kmovw 1032(%rsp), %k6 + kmovw 1024(%rsp), %k7 + vmovups 960(%rsp), %zmm16 + vmovups 896(%rsp), %zmm17 + vmovups 832(%rsp), %zmm18 + vmovups 768(%rsp), %zmm19 + vmovups 704(%rsp), %zmm20 + vmovups 640(%rsp), %zmm21 + vmovups 576(%rsp), %zmm22 + vmovups 512(%rsp), %zmm23 + vmovups 448(%rsp), %zmm24 + vmovups 384(%rsp), %zmm25 + vmovups 320(%rsp), %zmm26 + vmovups 256(%rsp), %zmm27 + vmovups 192(%rsp), %zmm28 + vmovups 128(%rsp), %zmm29 + vmovups 64(%rsp), %zmm30 + vmovups (%rsp), %zmm31 + vmovups 1216(%rsp), %zmm0 + movq 1064(%rsp), %rsi + movq 1056(%rsp), %rdi + movq 1096(%rsp), %r12 + cfi_restore (%r12) + movq 1088(%rsp), %r13 + cfi_restore (%r13) + movq 1080(%rsp), %r14 + cfi_restore (%r14) + movq 1072(%rsp), %r15 + cfi_restore (%r15) + jmp .LBL_2_2 + +.LBL_2_10: + cfi_restore_state + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 1160(%rsp,%r15), %xmm0 + vzeroupper + vmovsd 1160(%rsp,%r15), %xmm0 + + call log@PLT + + vmovsd %xmm0, 1224(%rsp,%r15) + jmp .LBL_2_8 + +.LBL_2_12: + movzbl %r12b, %r15d + shlq $4, %r15 + vmovsd 1152(%rsp,%r15), %xmm0 + vzeroupper + vmovsd 1152(%rsp,%r15), %xmm0 + + call log@PLT + + vmovsd %xmm0, 1216(%rsp,%r15) + jmp .LBL_2_7 +#endif +END (_ZGVeN8v_log_skx) + + .section .rodata, "a" +.L_2il0floatpacket.12: + .long 0xffffffff,0xffffffff + .type .L_2il0floatpacket.12,@object diff --git a/sysdeps/x86_64/fpu/svml_d_log2_core.S b/sysdeps/x86_64/fpu/svml_d_log2_core.S new file mode 100644 index 0000000000..daa63b583f --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log2_core.S @@ -0,0 +1,29 @@ +/* Function log vectorized with SSE2. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVbN2v_log) +WRAPPER_IMPL_SSE2 log +END (_ZGVbN2v_log) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVbN2v_log) +#endif diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core.S b/sysdeps/x86_64/fpu/svml_d_log4_core.S new file mode 100644 index 0000000000..009c93c837 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log4_core.S @@ -0,0 +1,29 @@ +/* Function log vectorized with AVX2, wrapper version. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVdN4v_log) +WRAPPER_IMPL_AVX _ZGVbN2v_log +END (_ZGVdN4v_log) + +#ifndef USE_MULTIARCH + libmvec_hidden_def (_ZGVdN4v_log) +#endif diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S new file mode 100644 index 0000000000..554fc45712 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S @@ -0,0 +1,25 @@ +/* Function log vectorized in AVX ISA as wrapper to SSE4 ISA version. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVcN4v_log) +WRAPPER_IMPL_AVX _ZGVbN2v_log +END (_ZGVcN4v_log) diff --git a/sysdeps/x86_64/fpu/svml_d_log8_core.S b/sysdeps/x86_64/fpu/svml_d_log8_core.S new file mode 100644 index 0000000000..9728305f17 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log8_core.S @@ -0,0 +1,25 @@ +/* Function log vectorized with AVX-512. Wrapper to AVX2 version. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "svml_d_wrapper_impl.h" + + .text +ENTRY (_ZGVeN8v_log) +WRAPPER_IMPL_AVX512 _ZGVdN4v_log +END (_ZGVeN8v_log) diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.S b/sysdeps/x86_64/fpu/svml_d_log_data.S new file mode 100644 index 0000000000..1ce78e2c8b --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log_data.S @@ -0,0 +1,1662 @@ +/* Data for function log. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include "svml_d_log_data.h" + + .section .rodata, "a" + .align 64 + +/* Data table for vector implementations of function log. + The table may contain polynomial, reduction, lookup coefficients + and other constants obtained through different methods + of research and experimental work. */ + .globl __svml_dlog_data +__svml_dlog_data: + +/* Lookup table in high+low parts and 9-bit index for + -log(mRcp), where mRcp is mantissa of 1/x 9-bit accurate reciprocal: */ +.if .-__svml_dlog_data != _Log_HA_table +.err +.endif + .quad 0xc086232bdd7a8300 + .quad 0xbe1ce91eef3fb100 + .quad 0xc086232fdc7ad828 + .quad 0xbe1cefcffda73b6a + .quad 0xc0862333d97d2ba0 + .quad 0xbe1cef406748f1ff + .quad 0xc0862337d48378e0 + .quad 0xbe1cef2a9429925a + .quad 0xc086233bcd8fb878 + .quad 0xbe1cf138d17ebecb + .quad 0xc086233fc4a3e018 + .quad 0xbe1ceff2dbbbb29e + .quad 0xc0862343b9c1e270 + .quad 0xbe1cf1a42aae437b + .quad 0xc0862347acebaf68 + .quad 0xbe1cef3b152048af + .quad 0xc086234b9e2333f0 + .quad 0xbe1cef20e127805e + .quad 0xc086234f8d6a5a30 + .quad 0xbe1cf00ad6052cf4 + .quad 0xc08623537ac30980 + .quad 0xbe1cefc4642ee597 + .quad 0xc0862357662f2660 + .quad 0xbe1cf1f277d36e16 + .quad 0xc086235b4fb092a0 + .quad 0xbe1ceed009e8d8e6 + .quad 0xc086235f37492d28 + .quad 0xbe1cf1e4038cb362 + .quad 0xc08623631cfad250 + .quad 0xbe1cf0b0873b8557 + .quad 0xc086236700c75b98 + .quad 0xbe1cf15bb3227c0b + .quad 0xc086236ae2b09fe0 + .quad 0xbe1cf151ef8ca9ed + .quad 0xc086236ec2b87358 + .quad 0xbe1cefe1dc2cd2ed + .quad 0xc0862372a0e0a780 + .quad 0xbe1cf0d1eec5454f + .quad 0xc08623767d2b0b48 + .quad 0xbe1ceeefd570bbce + .quad 0xc086237a57996af0 + .quad 0xbe1cee99ae91b3a7 + .quad 0xc086237e302d9028 + .quad 0xbe1cf0412830fbd1 + .quad 0xc086238206e94218 + .quad 0xbe1ceee898588610 + .quad 0xc0862385dbce4548 + .quad 0xbe1cee9a1fbcaaea + .quad 0xc0862389aede5bc0 + .quad 0xbe1ceed8e7cc1ad6 + .quad 0xc086238d801b4500 + .quad 0xbe1cf10c8d059da6 + .quad 0xc08623914f86be18 + .quad 0xbe1ceee6c63a8165 + .quad 0xc08623951d228180 + .quad 0xbe1cf0c3592d2ff1 + .quad 0xc0862398e8f04758 + .quad 0xbe1cf0026cc4cb1b + .quad 0xc086239cb2f1c538 + .quad 0xbe1cf15d48d8e670 + .quad 0xc08623a07b28ae60 + .quad 0xbe1cef359363787c + .quad 0xc08623a44196b390 + .quad 0xbe1cefdf1ab2e82c + .quad 0xc08623a8063d8338 + .quad 0xbe1cefe43c02aa84 + .quad 0xc08623abc91ec960 + .quad 0xbe1cf044f5ae35b7 + .quad 0xc08623af8a3c2fb8 + .quad 0xbe1cf0b0b4001e1b + .quad 0xc08623b349975d98 + .quad 0xbe1cf1bae76dfbcf + .quad 0xc08623b70731f810 + .quad 0xbe1cef0a72e13a62 + .quad 0xc08623bac30da1c8 + .quad 0xbe1cf184007d2b6b + .quad 0xc08623be7d2bfb40 + .quad 0xbe1cf16f4b239e98 + .quad 0xc08623c2358ea2a0 + .quad 0xbe1cf0976acada87 + .quad 0xc08623c5ec3733d0 + .quad 0xbe1cf066318a16ff + .quad 0xc08623c9a1274880 + .quad 0xbe1ceffaa7148798 + .quad 0xc08623cd54607820 + .quad 0xbe1cf23ab02e9b6e + .quad 0xc08623d105e45800 + .quad 0xbe1cefdfef7d4fde + .quad 0xc08623d4b5b47b20 + .quad 0xbe1cf17fece44f2b + .quad 0xc08623d863d27270 + .quad 0xbe1cf18f907d0d7c + .quad 0xc08623dc103fccb0 + .quad 0xbe1cee61fe072c98 + .quad 0xc08623dfbafe1668 + .quad 0xbe1cf022dd891e2f + .quad 0xc08623e3640eda20 + .quad 0xbe1ceecc1daf4358 + .quad 0xc08623e70b73a028 + .quad 0xbe1cf0173c4fa380 + .quad 0xc08623eab12deec8 + .quad 0xbe1cf16a2150c2f4 + .quad 0xc08623ee553f4a30 + .quad 0xbe1cf1bf980b1f4b + .quad 0xc08623f1f7a93480 + .quad 0xbe1cef8b731663c2 + .quad 0xc08623f5986d2dc0 + .quad 0xbe1cee9a664d7ef4 + .quad 0xc08623f9378cb3f0 + .quad 0xbe1cf1eda2af6400 + .quad 0xc08623fcd5094320 + .quad 0xbe1cf1923f9d68d7 + .quad 0xc086240070e45548 + .quad 0xbe1cf0747cd3e03a + .quad 0xc08624040b1f6260 + .quad 0xbe1cf22ee855bd6d + .quad 0xc0862407a3bbe078 + .quad 0xbe1cf0d57360c00b + .quad 0xc086240b3abb4398 + .quad 0xbe1ceebc815cd575 + .quad 0xc086240ed01efdd0 + .quad 0xbe1cf03bfb970951 + .quad 0xc086241263e87f50 + .quad 0xbe1cf16e74768529 + .quad 0xc0862415f6193658 + .quad 0xbe1cefec64b8becb + .quad 0xc086241986b28f30 + .quad 0xbe1cf0838d210baa + .quad 0xc086241d15b5f448 + .quad 0xbe1cf0ea86e75b11 + .quad 0xc0862420a324ce28 + .quad 0xbe1cf1708d11d805 + .quad 0xc08624242f008380 + .quad 0xbe1ceea988c5a417 + .quad 0xc0862427b94a7910 + .quad 0xbe1cef166a7bbca5 + .quad 0xc086242b420411d0 + .quad 0xbe1cf0c9d9e86a38 + .quad 0xc086242ec92eaee8 + .quad 0xbe1cef0946455411 + .quad 0xc08624324ecbaf98 + .quad 0xbe1cefea60907739 + .quad 0xc0862435d2dc7160 + .quad 0xbe1cf1ed0934ce42 + .quad 0xc086243955624ff8 + .quad 0xbe1cf191ba746c7d + .quad 0xc086243cd65ea548 + .quad 0xbe1ceeec78cf2a7e + .quad 0xc086244055d2c968 + .quad 0xbe1cef345284c119 + .quad 0xc0862443d3c012b8 + .quad 0xbe1cf24f77355219 + .quad 0xc08624475027d5e8 + .quad 0xbe1cf05bf087e114 + .quad 0xc086244acb0b65d0 + .quad 0xbe1cef3504a32189 + .quad 0xc086244e446c1398 + .quad 0xbe1ceff54b2a406f + .quad 0xc0862451bc4b2eb8 + .quad 0xbe1cf0757d54ed4f + .quad 0xc086245532aa04f0 + .quad 0xbe1cf0c8099fdfd5 + .quad 0xc0862458a789e250 + .quad 0xbe1cf0b173796a31 + .quad 0xc086245c1aec1138 + .quad 0xbe1cf11d8734540d + .quad 0xc086245f8cd1da60 + .quad 0xbe1cf1916a723ceb + .quad 0xc0862462fd3c84d8 + .quad 0xbe1cf19a911e1da7 + .quad 0xc08624666c2d5608 + .quad 0xbe1cf23a9ef72e4f + .quad 0xc0862469d9a591c0 + .quad 0xbe1cef503d947663 + .quad 0xc086246d45a67a18 + .quad 0xbe1cf0fceeb1a0b2 + .quad 0xc0862470b0314fa8 + .quad 0xbe1cf107e27e4fbc + .quad 0xc086247419475160 + .quad 0xbe1cf03dd9922331 + .quad 0xc086247780e9bc98 + .quad 0xbe1cefce1a10e129 + .quad 0xc086247ae719cd18 + .quad 0xbe1ceea47f73c4f6 + .quad 0xc086247e4bd8bd10 + .quad 0xbe1ceec0ac56d100 + .quad 0xc0862481af27c528 + .quad 0xbe1cee8a6593278a + .quad 0xc086248511081c70 + .quad 0xbe1cf2231dd9dec7 + .quad 0xc0862488717af888 + .quad 0xbe1cf0b4b8ed7da8 + .quad 0xc086248bd0818d68 + .quad 0xbe1cf1bd8d835002 + .quad 0xc086248f2e1d0d98 + .quad 0xbe1cf259acc107f4 + .quad 0xc08624928a4eaa20 + .quad 0xbe1cee897636b00c + .quad 0xc0862495e5179270 + .quad 0xbe1cee757f20c326 + .quad 0xc08624993e78f490 + .quad 0xbe1cefafd3aa54a4 + .quad 0xc086249c9673fd10 + .quad 0xbe1cee7298d38b97 + .quad 0xc086249fed09d6f8 + .quad 0xbe1ceedc158d4ceb + .quad 0xc08624a3423babe0 + .quad 0xbe1cf2282987cb2e + .quad 0xc08624a6960aa400 + .quad 0xbe1cefe7381ecc4b + .quad 0xc08624a9e877e600 + .quad 0xbe1cef328dbbce80 + .quad 0xc08624ad39849728 + .quad 0xbe1cefde45f3cc71 + .quad 0xc08624b08931db58 + .quad 0xbe1cefa8b89433b9 + .quad 0xc08624b3d780d500 + .quad 0xbe1cef6773c0b139 + .quad 0xc08624b72472a528 + .quad 0xbe1cf031c931c11f + .quad 0xc08624ba70086b78 + .quad 0xbe1cf088f49275e7 + .quad 0xc08624bdba434630 + .quad 0xbe1cf17de0eaa86d + .quad 0xc08624c103245238 + .quad 0xbe1cefd492f1ba75 + .quad 0xc08624c44aacab08 + .quad 0xbe1cf1253e154466 + .quad 0xc08624c790dd6ad0 + .quad 0xbe1cf0fb09ee6d55 + .quad 0xc08624cad5b7aa58 + .quad 0xbe1cf1f08dd048fe + .quad 0xc08624ce193c8120 + .quad 0xbe1ceeca0809697f + .quad 0xc08624d15b6d0538 + .quad 0xbe1cef8d5662d968 + .quad 0xc08624d49c4a4b78 + .quad 0xbe1cee97b556ed78 + .quad 0xc08624d7dbd56750 + .quad 0xbe1cf1b14b6acb75 + .quad 0xc08624db1a0f6b00 + .quad 0xbe1cef1e860623f2 + .quad 0xc08624de56f96758 + .quad 0xbe1ceeaf4d156f3d + .quad 0xc08624e192946bf0 + .quad 0xbe1ceecc12b400ed + .quad 0xc08624e4cce18710 + .quad 0xbe1cf180c40c794f + .quad 0xc08624e805e1c5c8 + .quad 0xbe1cf185a08f7f65 + .quad 0xc08624eb3d9633d8 + .quad 0xbe1cef45fc924078 + .quad 0xc08624ee73ffdbb0 + .quad 0xbe1cf1e4f457f32a + .quad 0xc08624f1a91fc6a0 + .quad 0xbe1cf040147b8a5a + .quad 0xc08624f4dcf6fc98 + .quad 0xbe1cf1effca0dfb2 + .quad 0xc08624f80f868468 + .quad 0xbe1cf0470146e5bc + .quad 0xc08624fb40cf6390 + .quad 0xbe1cef4dd186e501 + .quad 0xc08624fe70d29e60 + .quad 0xbe1ceebe257f66c7 + .quad 0xc08625019f9137f0 + .quad 0xbe1ceefb7a1c395c + .quad 0xc0862504cd0c3220 + .quad 0xbe1cf209dedfed8c + .quad 0xc0862507f9448db0 + .quad 0xbe1cf082da464994 + .quad 0xc086250b243b4a18 + .quad 0xbe1cee88694a73cf + .quad 0xc086250e4df165a0 + .quad 0xbe1cf0b61e8f0531 + .quad 0xc08625117667dd78 + .quad 0xbe1cf1106599c962 + .quad 0xc08625149d9fad98 + .quad 0xbe1ceff1ee88af1f + .quad 0xc0862517c399d0c8 + .quad 0xbe1cf0f746994ef6 + .quad 0xc086251ae85740b8 + .quad 0xbe1cefe8a1d077e4 + .quad 0xc086251e0bd8f5e0 + .quad 0xbe1cf1a1da036092 + .quad 0xc08625212e1fe7a8 + .quad 0xbe1cf0f8a7786fcd + .quad 0xc08625244f2d0c48 + .quad 0xbe1cefa1174a07a7 + .quad 0xc08625276f0158d8 + .quad 0xbe1cef1043aa5b25 + .quad 0xc086252a8d9dc150 + .quad 0xbe1cf15d521c169d + .quad 0xc086252dab033898 + .quad 0xbe1cf220bba8861f + .quad 0xc0862530c732b078 + .quad 0xbe1cef51e310eae2 + .quad 0xc0862533e22d1988 + .quad 0xbe1cf222fcedd8ae + .quad 0xc0862536fbf36370 + .quad 0xbe1cefdb4da4bda8 + .quad 0xc086253a14867ca0 + .quad 0xbe1ceeafc1112171 + .quad 0xc086253d2be75280 + .quad 0xbe1cee99dfb4b408 + .quad 0xc08625404216d160 + .quad 0xbe1cf22d2536f06b + .quad 0xc08625435715e498 + .quad 0xbe1cef6abbf2e268 + .quad 0xc08625466ae57648 + .quad 0xbe1cf093a14789f5 + .quad 0xc08625497d866fa0 + .quad 0xbe1cf0f93655603c + .quad 0xc086254c8ef9b8b8 + .quad 0xbe1cf1cc40c9aafc + .quad 0xc086254f9f4038a8 + .quad 0xbe1ceeea5f4e9157 + .quad 0xc0862552ae5ad568 + .quad 0xbe1cefa9f52d4997 + .quad 0xc0862555bc4a7400 + .quad 0xbe1cefa490a638ff + .quad 0xc0862558c90ff868 + .quad 0xbe1cef7fcf797d6f + .quad 0xc086255bd4ac4590 + .quad 0xbe1cf1b4c51113c9 + .quad 0xc086255edf203d78 + .quad 0xbe1cef55e5b4a55d + .quad 0xc0862561e86cc100 + .quad 0xbe1cf0d37a25f9dc + .quad 0xc0862564f092b028 + .quad 0xbe1ceebe9efc19d9 + .quad 0xc0862567f792e9d8 + .quad 0xbe1cee8ad30a57b5 + .quad 0xc086256afd6e4c08 + .quad 0xbe1cef4e1817b90b + .quad 0xc086256e0225b3b8 + .quad 0xbe1cee7fa9229996 + .quad 0xc086257105b9fce0 + .quad 0xbe1cf0b54963d945 + .quad 0xc0862574082c0298 + .quad 0xbe1cee5f2f3c7995 + .quad 0xc0862577097c9ee0 + .quad 0xbe1cf0828e303a2c + .quad 0xc086257a09acaae0 + .quad 0xbe1cf172c3078947 + .quad 0xc086257d08bcfec0 + .quad 0xbe1cf189252afa22 + .quad 0xc086258006ae71b8 + .quad 0xbe1cefdb80426923 + .quad 0xc08625830381da08 + .quad 0xbe1ceef1391a0372 + .quad 0xc0862585ff380d00 + .quad 0xbe1cf17720c78d13 + .quad 0xc0862588f9d1df18 + .quad 0xbe1ceef1f9027d83 + .quad 0xc086258bf35023b8 + .quad 0xbe1cf06fac99dec9 + .quad 0xc086258eebb3ad78 + .quad 0xbe1cf1373eeb45c0 + .quad 0xc0862591e2fd4e00 + .quad 0xbe1cef777536bb81 + .quad 0xc0862594d92dd600 + .quad 0xbe1cf0f43ca40766 + .quad 0xc0862597ce461558 + .quad 0xbe1cefb2cfc6766b + .quad 0xc086259ac246daf0 + .quad 0xbe1ceea49e64ffa2 + .quad 0xc086259db530f4c8 + .quad 0xbe1cf250fa457dec + .quad 0xc08625a0a7053018 + .quad 0xbe1cf17d8bb2a44e + .quad 0xc08625a397c45918 + .quad 0xbe1cf1d5906d54b7 + .quad 0xc08625a6876f3b30 + .quad 0xbe1cf08fe7b31780 + .quad 0xc08625a97606a0e0 + .quad 0xbe1cef13edfc9d11 + .quad 0xc08625ac638b53c8 + .quad 0xbe1cef9d2b107219 + .quad 0xc08625af4ffe1cb0 + .quad 0xbe1cf1ddd4ff6160 + .quad 0xc08625b23b5fc390 + .quad 0xbe1cefa02a996495 + .quad 0xc08625b525b10f68 + .quad 0xbe1cf166a7e37ee5 + .quad 0xc08625b80ef2c680 + .quad 0xbe1cef0b171068a5 + .quad 0xc08625baf725ae28 + .quad 0xbe1cf05c80779283 + .quad 0xc08625bdde4a8af0 + .quad 0xbe1cf1bbfbffb889 + .quad 0xc08625c0c4622090 + .quad 0xbe1cf0b8666c0124 + .quad 0xc08625c3a96d31e0 + .quad 0xbe1cf0a8fcf47a86 + .quad 0xc08625c68d6c80f0 + .quad 0xbe1cef46e18cb092 + .quad 0xc08625c97060cef0 + .quad 0xbe1cf1458a350efb + .quad 0xc08625cc524adc58 + .quad 0xbe1ceeea1dadce12 + .quad 0xc08625cf332b68b0 + .quad 0xbe1cf0a1bfdc44c7 + .quad 0xc08625d2130332d0 + .quad 0xbe1cef96d02da73e + .quad 0xc08625d4f1d2f8a8 + .quad 0xbe1cf2451c3c7701 + .quad 0xc08625d7cf9b7778 + .quad 0xbe1cf10d08f83812 + .quad 0xc08625daac5d6ba0 + .quad 0xbe1ceec5b4895c5e + .quad 0xc08625dd881990b0 + .quad 0xbe1cf14e1325c5e4 + .quad 0xc08625e062d0a188 + .quad 0xbe1cf21d0904be12 + .quad 0xc08625e33c835838 + .quad 0xbe1ceed0839bcf21 + .quad 0xc08625e615326df0 + .quad 0xbe1cf1bb944889d2 + .quad 0xc08625e8ecde9b48 + .quad 0xbe1cee738e85eece + .quad 0xc08625ebc38897e0 + .quad 0xbe1cf25c2bc6ef12 + .quad 0xc08625ee99311ac8 + .quad 0xbe1cf132b70a41ad + .quad 0xc08625f16dd8da28 + .quad 0xbe1cf1984236a6e3 + .quad 0xc08625f441808b78 + .quad 0xbe1cf19ae74998f9 + .quad 0xc08625f71428e370 + .quad 0xbe1cef3e175d61a1 + .quad 0xc08625f9e5d295f8 + .quad 0xbe1cf101f9868fd9 + .quad 0xc08625fcb67e5658 + .quad 0xbe1cee69db83dcd2 + .quad 0xc08625ff862cd6f8 + .quad 0xbe1cf081b636af51 + .quad 0xc086260254dec9a8 + .quad 0xbe1cee62c7d59b3e + .quad 0xc08626052294df58 + .quad 0xbe1cf1b745c57716 + .quad 0xc0862607ef4fc868 + .quad 0xbe1cef3d2800ea23 + .quad 0xc086260abb103458 + .quad 0xbe1cef480ff1acd2 + .quad 0xc086260d85d6d200 + .quad 0xbe1cf2424c9a17ef + .quad 0xc08626104fa44f90 + .quad 0xbe1cf12cfde90fd5 + .quad 0xc086261318795a68 + .quad 0xbe1cf21f590dd5b6 + .quad 0xc0862615e0569f48 + .quad 0xbe1cf0c50f9cd28a + .quad 0xc0862618a73cca30 + .quad 0xbe1ceedbdb520545 + .quad 0xc086261b6d2c8668 + .quad 0xbe1cf0b030396011 + .quad 0xc086261e32267e98 + .quad 0xbe1cf19917010e96 + .quad 0xc0862620f62b5cb0 + .quad 0xbe1cf07331355985 + .quad 0xc0862623b93bc9e8 + .quad 0xbe1cf01ae921a1c3 + .quad 0xc08626267b586ed0 + .quad 0xbe1cefe5cf0dbf0c + .quad 0xc08626293c81f348 + .quad 0xbe1cf01b258aeb50 + .quad 0xc086262bfcb8fe88 + .quad 0xbe1cee6b9e7f4c68 + .quad 0xc086262ebbfe3710 + .quad 0xbe1cee684a9b21c9 + .quad 0xc08626317a5242b8 + .quad 0xbe1cf1f8bcde9a8b + .quad 0xc086263437b5c6c0 + .quad 0xbe1cf1d063d36238 + .quad 0xc0862636f42967a8 + .quad 0xbe1cf1e31a19075e + .quad 0xc0862639afadc950 + .quad 0xbe1cf1d8efdf7e7d + .quad 0xc086263c6a438ef0 + .quad 0xbe1cf1812ee72dba + .quad 0xc086263f23eb5b18 + .quad 0xbe1cf1449a9a2279 + .quad 0xc0862641dca5cfb8 + .quad 0xbe1cee96edce5085 + .quad 0xc086264494738e08 + .quad 0xbe1cf06797bd03b2 + .quad 0xc08626474b5536b8 + .quad 0xbe1cef91b9b7ffc1 + .quad 0xc086264a014b69c0 + .quad 0xbe1cef4b6721278f + .quad 0xc086264cb656c678 + .quad 0xbe1cf1942925eb4a + .quad 0xc086264f6a77eba8 + .quad 0xbe1cefa2c7bc2e39 + .quad 0xc08626521daf7758 + .quad 0xbe1cf252595aceb3 + .quad 0xc0862654cffe0718 + .quad 0xbe1cee8e9ae47ec2 + .quad 0xc0862657816437a8 + .quad 0xbe1cf1bf913828fa + .quad 0xc086265a31e2a558 + .quad 0xbe1cf23475d6b366 + .quad 0xc086265ce179ebc8 + .quad 0xbe1cef8df00a922b + .quad 0xc086265f902aa5f0 + .quad 0xbe1cef279bfa43e0 + .quad 0xc08626623df56e38 + .quad 0xbe1cf080e10b8365 + .quad 0xc0862664eadade70 + .quad 0xbe1cf1a518f9b544 + .quad 0xc086266796db8fd0 + .quad 0xbe1cef9308fed9e9 + .quad 0xc086266a41f81ae8 + .quad 0xbe1ceea3ae6b19c9 + .quad 0xc086266cec3117b8 + .quad 0xbe1ceef06003d4c2 + .quad 0xc086266f95871da8 + .quad 0xbe1cf0b8457ffb0c + .quad 0xc08626723dfac390 + .quad 0xbe1cf0c526745ad6 + .quad 0xc0862674e58c9fa8 + .quad 0xbe1cf0cf91ff7b5d + .quad 0xc08626778c3d4798 + .quad 0xbe1cefe260819380 + .quad 0xc086267a320d5070 + .quad 0xbe1ceebd90aa27a3 + .quad 0xc086267cd6fd4ea8 + .quad 0xbe1cf0388121dffa + .quad 0xc086267f7b0dd630 + .quad 0xbe1cf1a3881435f1 + .quad 0xc08626821e3f7a68 + .quad 0xbe1cef28e9d9ac52 + .quad 0xc0862684c092ce08 + .quad 0xbe1cf02d300062dd + .quad 0xc086268762086350 + .quad 0xbe1cefaee1edfa35 + .quad 0xc086268a02a0cbe0 + .quad 0xbe1cf0a5a052e936 + .quad 0xc086268ca25c98d8 + .quad 0xbe1cee60a4a497ed + .quad 0xc086268f413c5ab0 + .quad 0xbe1cf0e4a5d0cf49 + .quad 0xc0862691df40a170 + .quad 0xbe1cf149235a4e6e + .quad 0xc08626947c69fc80 + .quad 0xbe1cf215180b9fcc + .quad 0xc086269718b8fac8 + .quad 0xbe1cef9b156a9840 + .quad 0xc0862699b42e2a90 + .quad 0xbe1cf054c91441be + .quad 0xc086269c4eca19a8 + .quad 0xbe1cf13ded26512c + .quad 0xc086269ee88d5550 + .quad 0xbe1cf22ea4d8ac06 + .quad 0xc08626a181786a40 + .quad 0xbe1cf2354666ee2e + .quad 0xc08626a4198be4a8 + .quad 0xbe1cefef936752b3 + .quad 0xc08626a6b0c85020 + .quad 0xbe1cf1e360a9db68 + .quad 0xc08626a9472e37d8 + .quad 0xbe1ceed6aeb812c5 + .quad 0xc08626abdcbe2650 + .quad 0xbe1cf227340b4986 + .quad 0xc08626ae7178a5b0 + .quad 0xbe1cf0215a0cbe0d + .quad 0xc08626b1055e3f70 + .quad 0xbe1cf256adf0ae26 + .quad 0xc08626b3986f7ca8 + .quad 0xbe1ceff3c67aed06 + .quad 0xc08626b62aace5c8 + .quad 0xbe1cf2159fb93652 + .quad 0xc08626b8bc1702e0 + .quad 0xbe1cf01e6dbd1c7f + .quad 0xc08626bb4cae5b60 + .quad 0xbe1cf009e75d1c0c + .quad 0xc08626bddc737648 + .quad 0xbe1ceec10a020e73 + .quad 0xc08626c06b66da08 + .quad 0xbe1cf06d5783eee7 + .quad 0xc08626c2f9890ca0 + .quad 0xbe1cf0cb8f169ffe + .quad 0xc08626c586da9388 + .quad 0xbe1cef7de2452430 + .quad 0xc08626c8135bf3b0 + .quad 0xbe1cf05da6f783ae + .quad 0xc08626ca9f0db198 + .quad 0xbe1cefcc877d681d + .quad 0xc08626cd29f05138 + .quad 0xbe1cef0531954ab3 + .quad 0xc08626cfb4045608 + .quad 0xbe1cf06b8565ea3d + .quad 0xc08626d23d4a4310 + .quad 0xbe1cefdc455d9d7e + .quad 0xc08626d4c5c29ad0 + .quad 0xbe1ceefc47e8fa64 + .quad 0xc08626d74d6ddf48 + .quad 0xbe1cf1872bf033f2 + .quad 0xc08626d9d44c9210 + .quad 0xbe1cf19d91087f9d + .quad 0xc08626dc5a5f3438 + .quad 0xbe1cf012d444c6ab + .quad 0xc08626dedfa64650 + .quad 0xbe1cf0ba528ee153 + .quad 0xc08626e164224880 + .quad 0xbe1ceeb431709788 + .quad 0xc08626e3e7d3ba60 + .quad 0xbe1cf0b9af31a6a5 + .quad 0xc08626e66abb1b28 + .quad 0xbe1cf168fb2e135b + .quad 0xc08626e8ecd8e990 + .quad 0xbe1cef9097461c93 + .quad 0xc08626eb6e2da3d0 + .quad 0xbe1cee7a434735d8 + .quad 0xc08626edeeb9c7a8 + .quad 0xbe1cf235732b86f2 + .quad 0xc08626f06e7dd280 + .quad 0xbe1cefe1510b89e6 + .quad 0xc08626f2ed7a4120 + .quad 0xbe1cf1f64b9b80ef + .quad 0xc08626f56baf9000 + .quad 0xbe1cf08f320ca339 + .quad 0xc08626f7e91e3b08 + .quad 0xbe1cf1b1de2808a1 + .quad 0xc08626fa65c6bdc0 + .quad 0xbe1cf1976d778b28 + .quad 0xc08626fce1a99338 + .quad 0xbe1ceef40a4f076f + .quad 0xc08626ff5cc73600 + .quad 0xbe1cef3e45869ce3 + .quad 0xc0862701d7202048 + .quad 0xbe1ceef601b4c9d6 + .quad 0xc086270450b4cbc0 + .quad 0xbe1cf1eaf0b57fd6 + .quad 0xc0862706c985b1c0 + .quad 0xbe1cef82a44990f3 + .quad 0xc086270941934b10 + .quad 0xbe1ceefe32981f2c + .quad 0xc086270bb8de1018 + .quad 0xbe1cefbf6f5a0445 + .quad 0xc086270e2f6678d0 + .quad 0xbe1cf18dba75792c + .quad 0xc0862710a52cfcc8 + .quad 0xbe1cf0da64ce995f + .quad 0xc08627131a321318 + .quad 0xbe1cef04ac0fb802 + .quad 0xc08627158e763268 + .quad 0xbe1cee9d4e2ad9bd + .quad 0xc086271801f9d0f8 + .quad 0xbe1cefa9b55407b5 + .quad 0xc086271a74bd64a0 + .quad 0xbe1cefe6bd329570 + .quad 0xc086271ce6c162c8 + .quad 0xbe1cef0b1205dc85 + .quad 0xc086271f58064068 + .quad 0xbe1cef092a785e3f + .quad 0xc0862721c88c7210 + .quad 0xbe1cf050dcdaac30 + .quad 0xc086272438546be8 + .quad 0xbe1cf210907ded8b + .quad 0xc0862726a75ea1b8 + .quad 0xbe1cee760be44f99 + .quad 0xc086272915ab86c0 + .quad 0xbe1ceeeee07c2bcc + .quad 0xc086272b833b8df0 + .quad 0xbe1cf06874992df5 + .quad 0xc086272df00f29d0 + .quad 0xbe1cef8fac5d4899 + .quad 0xc08627305c26cc70 + .quad 0xbe1cf1103241cc99 + .quad 0xc0862732c782e788 + .quad 0xbe1cf1d35fef83fe + .quad 0xc08627353223ec68 + .quad 0xbe1cef3ec8133e1d + .quad 0xc08627379c0a4be8 + .quad 0xbe1cef7261daccd8 + .quad 0xc086273a05367688 + .quad 0xbe1cf18656c50806 + .quad 0xc086273c6da8dc68 + .quad 0xbe1cf1c8736e049a + .quad 0xc086273ed561ed38 + .quad 0xbe1cf1f93bff4911 + .quad 0xc08627413c621848 + .quad 0xbe1cf188a4ea680c + .quad 0xc0862743a2a9cc80 + .quad 0xbe1cf1d270930c80 + .quad 0xc086274608397868 + .quad 0xbe1cf25a328c28e2 + .quad 0xc08627486d118a28 + .quad 0xbe1cf106f90aa3b8 + .quad 0xc086274ad1326f80 + .quad 0xbe1cee5e9d2e885a + .quad 0xc086274d349c95c0 + .quad 0xbe1cf1c0bac27228 + .quad 0xc086274f975069f8 + .quad 0xbe1cf1a1500f9b1c + .quad 0xc0862751f94e58c0 + .quad 0xbe1cefc30663ac44 + .quad 0xc08627545a96ce48 + .quad 0xbe1cf17123e427a2 + .quad 0xc0862756bb2a3678 + .quad 0xbe1cefb92749fea4 + .quad 0xc08627591b08fcc0 + .quad 0xbe1cefa40e1ea74a + .quad 0xc086275b7a338c40 + .quad 0xbe1cee6f4612c3e9 + .quad 0xc086275dd8aa4fa8 + .quad 0xbe1cf1c54a053627 + .quad 0xc0862760366db168 + .quad 0xbe1ceff5eb503d9e + .quad 0xc0862762937e1b70 + .quad 0xbe1cf02e47f10cee + .quad 0xc0862764efdbf768 + .quad 0xbe1ceeb06e1d0dad + .quad 0xc08627674b87ae88 + .quad 0xbe1cf10aadd6dba5 + .quad 0xc0862769a681a9c0 + .quad 0xbe1cf24e9913d30f + .quad 0xc086276c00ca51a0 + .quad 0xbe1cef47b301e312 + .quad 0xc086276e5a620e48 + .quad 0xbe1ceeb1cefc2e85 + .quad 0xc0862770b3494788 + .quad 0xbe1cf16f1fbbe011 + .quad 0xc08627730b8064e8 + .quad 0xbe1ceebdf75174c7 + .quad 0xc08627756307cd70 + .quad 0xbe1cf06e3871a0da + .quad 0xc0862777b9dfe7f0 + .quad 0xbe1cef16799fd554 + .quad 0xc086277a10091ac0 + .quad 0xbe1cf248dabf5377 + .quad 0xc086277c6583cc00 + .quad 0xbe1cf0c78d92a2cd + .quad 0xc086277eba506158 + .quad 0xbe1cf0b911b029f0 + .quad 0xc08627810e6f4028 + .quad 0xbe1cefdc24719766 + .quad 0xc086278361e0cd70 + .quad 0xbe1cefbb6562b7e7 + .quad 0xc0862785b4a56dd8 + .quad 0xbe1cf1e0afb349ec + .quad 0xc086278806bd85c0 + .quad 0xbe1cf008292e52fc + .quad 0xc086278a58297918 + .quad 0xbe1cf053073872bf + .quad 0xc086278ca8e9ab88 + .quad 0xbe1cf17a0a55a947 + .quad 0xc086278ef8fe8068 + .quad 0xbe1ceeffb0b60234 + .quad 0xc086279148685aa0 + .quad 0xbe1cf162204794a8 + .quad 0xc086279397279ce0 + .quad 0xbe1cf24cc8cb48ac + .quad 0xc0862795e53ca978 + .quad 0xbe1cf0c9be68d5c3 + .quad 0xc086279832a7e258 + .quad 0xbe1cf172cd3d7388 + .quad 0xc086279a7f69a930 + .quad 0xbe1ceea2465fbce5 + .quad 0xc086279ccb825f40 + .quad 0xbe1cf0a386d2500f + .quad 0xc086279f16f26590 + .quad 0xbe1cf1e338ddc18a + .quad 0xc08627a161ba1cd0 + .quad 0xbe1cef1f5049867f + .quad 0xc08627a3abd9e548 + .quad 0xbe1cef96c1ea8b1f + .quad 0xc08627a5f5521f00 + .quad 0xbe1cf138f6fd3c26 + .quad 0xc08627a83e2329b0 + .quad 0xbe1cf0d4fcbfdf3a + .quad 0xc08627aa864d64b0 + .quad 0xbe1cf24870c12c81 + .quad 0xc08627accdd12f18 + .quad 0xbe1cf0ae2a56348d + .quad 0xc08627af14aee7a0 + .quad 0xbe1cee8ca1a9b893 + .quad 0xc08627b15ae6eca8 + .quad 0xbe1cf20414d637b0 + .quad 0xc08627b3a0799c60 + .quad 0xbe1cf0fc6b7b12d8 + .quad 0xc08627b5e5675488 + .quad 0xbe1cf152d93c4a00 + .quad 0xc08627b829b072a0 + .quad 0xbe1cf1073f9b77c2 + .quad 0xc08627ba6d5553d8 + .quad 0xbe1cee694f97d5a4 + .quad 0xc08627bcb0565500 + .quad 0xbe1cf0456b8239d7 + .quad 0xc08627bef2b3d2b0 + .quad 0xbe1cf211497127e3 + .quad 0xc08627c1346e2930 + .quad 0xbe1cf01856c0384d + .quad 0xc08627c37585b468 + .quad 0xbe1cefa7dd05479e + .quad 0xc08627c5b5fad000 + .quad 0xbe1cef3ae8e50b93 + .quad 0xc08627c7f5cdd750 + .quad 0xbe1ceea5f32fdd3a + .quad 0xc08627ca34ff2560 + .quad 0xbe1cef424caeb8d9 + .quad 0xc08627cc738f14f0 + .quad 0xbe1cf0194d07a81f + .quad 0xc08627ceb17e0070 + .quad 0xbe1cf20f452000c1 + .quad 0xc08627d0eecc4210 + .quad 0xbe1cf00e356218e4 + .quad 0xc08627d32b7a33a0 + .quad 0xbe1cef30484b4bcb + .quad 0xc08627d567882eb0 + .quad 0xbe1ceeea11a6641b + .quad 0xc08627d7a2f68c80 + .quad 0xbe1cf13492d5bd7b + .quad 0xc08627d9ddc5a618 + .quad 0xbe1ceeb7048fad96 + .quad 0xc08627dc17f5d418 + .quad 0xbe1ceef0666f0477 + .quad 0xc08627de51876ee8 + .quad 0xbe1cf060d4b8b5c2 + .quad 0xc08627e08a7acea8 + .quad 0xbe1cf0b2a4b6ff8c + .quad 0xc08627e2c2d04b28 + .quad 0xbe1cf0e34809a875 + .quad 0xc08627e4fa883bf0 + .quad 0xbe1cf16bf74a3522 + .quad 0xc08627e731a2f848 + .quad 0xbe1cee6a24623d57 + .quad 0xc08627e96820d718 + .quad 0xbe1cefc7b4f1528e + .quad 0xc08627eb9e022f18 + .quad 0xbe1cf163051f3548 + .quad 0xc08627edd34756b8 + .quad 0xbe1cef36b3366305 + .quad 0xc08627f007f0a408 + .quad 0xbe1cf18134625550 + .quad 0xc08627f23bfe6cf0 + .quad 0xbe1cf0ec32ec1a11 + .quad 0xc08627f46f710700 + .quad 0xbe1ceeb3b64f3edc + .quad 0xc08627f6a248c778 + .quad 0xbe1cf0cd15805bc8 + .quad 0xc08627f8d4860368 + .quad 0xbe1cf20db3bddebe + .quad 0xc08627fb06290f90 + .quad 0xbe1cf25188430e25 + .quad 0xc08627fd37324070 + .quad 0xbe1ceea1713490f9 + .quad 0xc08627ff67a1ea28 + .quad 0xbe1cf159521d234c + .quad 0xc0862801977860b8 + .quad 0xbe1cf24dfe50783b + .quad 0xc0862803c6b5f7d0 + .quad 0xbe1ceef2ef89a60b + .quad 0xc0862805f55b02c8 + .quad 0xbe1cee7fc919d62c + .quad 0xc08628082367d4c0 + .quad 0xbe1cf215a7fb513a + .quad 0xc086280a50dcc0a8 + .quad 0xbe1cf0e4401c5ed4 + .quad 0xc086280c7dba1910 + .quad 0xbe1cf04ec734d256 + .quad 0xc086280eaa003050 + .quad 0xbe1cf010ad787fea + .quad 0xc0862810d5af5880 + .quad 0xbe1cee622478393d + .quad 0xc086281300c7e368 + .quad 0xbe1cf01c7482564f + .quad 0xc08628152b4a22a0 + .quad 0xbe1cf0de20d33536 + .quad 0xc086281755366778 + .quad 0xbe1cef2edae5837d + .quad 0xc08628197e8d02f0 + .quad 0xbe1cf0a345318cc9 + .quad 0xc086281ba74e45d8 + .quad 0xbe1cf20085aa34b8 + .quad 0xc086281dcf7a80c0 + .quad 0xbe1cef5fa845ad83 + .quad 0xc086281ff71203e0 + .quad 0xbe1cf050d1df69c4 + .quad 0xc08628221e151f48 + .quad 0xbe1ceffe43c035b9 + .quad 0xc0862824448422b8 + .quad 0xbe1cf14f3018d3c2 + .quad 0xc08628266a5f5dc0 + .quad 0xbe1cef0a5fbae83d + .quad 0xc08628288fa71f98 + .quad 0xbe1ceff8a95b72a1 + .quad 0xc086282ab45bb750 + .quad 0xbe1cef073aa9849b + .quad 0xc086282cd87d73a8 + .quad 0xbe1cef69b3835c02 + .quad 0xc086282efc0ca328 + .quad 0xbe1cf0bc139379a9 + .quad 0xc08628311f099420 + .quad 0xbe1cef247a9ec596 + .quad 0xc086283341749490 + .quad 0xbe1cef74bbcc488a + .quad 0xc0862835634df248 + .quad 0xbe1cef4bc42e7b8e + .quad 0xc08628378495fad0 + .quad 0xbe1cf136d4d5a810 + .quad 0xc0862839a54cfb80 + .quad 0xbe1cf0d290b24dd8 + .quad 0xc086283bc5734168 + .quad 0xbe1ceeebde8e0065 + .quad 0xc086283de5091950 + .quad 0xbe1cf1a09f60aa1e + .quad 0xc0862840040ecfe0 + .quad 0xbe1cf0803947a234 + .quad 0xc08628422284b168 + .quad 0xbe1cf0abf7638127 + .quad 0xc0862844406b0a08 + .quad 0xbe1cf0f73ee12058 + .quad 0xc08628465dc225a0 + .quad 0xbe1cf2079971b26c + .quad 0xc08628487a8a4fe0 + .quad 0xbe1cee74957564b1 + .quad 0xc086284a96c3d420 + .quad 0xbe1ceee77c1b7d43 + .quad 0xc086284cb26efd90 + .quad 0xbe1cf23addba6e09 + .quad 0xc086284ecd8c1730 + .quad 0xbe1cf199f4a1da60 + .quad 0xc0862850e81b6bb0 + .quad 0xbe1cf09fdea81393 + .quad 0xc0862853021d4588 + .quad 0xbe1cf176adb417f7 + .quad 0xc08628551b91ef00 + .quad 0xbe1cf0f64f84a8da + .quad 0xc08628573479b220 + .quad 0xbe1ceec34cf49523 + .quad 0xc08628594cd4d8a8 + .quad 0xbe1cf16d60fbe0bb + .quad 0xc086285b64a3ac40 + .quad 0xbe1cee8de7acfc7b + .quad 0xc086285d7be67630 + .quad 0xbe1ceee6256cce8d + .quad 0xc086285f929d7fa0 + .quad 0xbe1cee7d66a3d8a5 + .quad 0xc0862861a8c91170 + .quad 0xbe1cf0bef8265792 + .quad 0xc0862863be697458 + .quad 0xbe1cf097f890c6f8 + .quad 0xc0862865d37ef0c8 + .quad 0xbe1cf09502d5c3fc + .quad 0xc0862867e809cf00 + .quad 0xbe1ceeffb239dac7 + .quad 0xc0862869fc0a56f8 + .quad 0xbe1cf1fbfff95c98 + .quad 0xc086286c0f80d090 + .quad 0xbe1cefa57ad3eef7 + .quad 0xc086286e226d8348 + .quad 0xbe1cf22c58b9183d + .quad 0xc086287034d0b690 + .quad 0xbe1ceff262d0a248 + .quad 0xc086287246aab180 + .quad 0xbe1cefa7bc194186 + .quad 0xc086287457fbbb08 + .quad 0xbe1cf06782d784d9 + .quad 0xc086287668c419e0 + .quad 0xbe1cf1d44d0eaa07 + .quad 0xc086287879041490 + .quad 0xbe1cf034803c8a48 + .quad 0xc086287a88bbf158 + .quad 0xbe1cf08e84916b6f + .quad 0xc086287c97ebf650 + .quad 0xbe1cf0c4d3dc1bc7 + .quad 0xc086287ea6946958 + .quad 0xbe1cefb1e4625943 + .quad 0xc0862880b4b59010 + .quad 0xbe1cf143efdd1fd0 + .quad 0xc0862882c24faff8 + .quad 0xbe1cee9896d016da + .quad 0xc0862884cf630e38 + .quad 0xbe1cf2186072f2cc + .quad 0xc0862886dbefeff0 + .quad 0xbe1cef9217633d34 + .quad 0xc0862888e7f699e0 + .quad 0xbe1cf05603549486 + .quad 0xc086288af37750b0 + .quad 0xbe1cef50fff513d3 + .quad 0xc086288cfe7258c0 + .quad 0xbe1cf127713b32d0 + .quad 0xc086288f08e7f650 + .quad 0xbe1cf05015520f3d + .quad 0xc086289112d86d58 + .quad 0xbe1cf12eb458b26f + .quad 0xc08628931c4401a8 + .quad 0xbe1cf22eae2887ed + .quad 0xc0862895252af6e0 + .quad 0xbe1cefdd6656dd2d + .quad 0xc08628972d8d9058 + .quad 0xbe1cf1048ea4e646 + .quad 0xc0862899356c1150 + .quad 0xbe1ceec4501167e9 + .quad 0xc086289b3cc6bcb8 + .quad 0xbe1cf0ad52becc3f + .quad 0xc086289d439dd568 + .quad 0xbe1cf0daa4e00e35 + .quad 0xc086289f49f19df8 + .quad 0xbe1cf00b80de8d6a + .quad 0xc08628a14fc258c8 + .quad 0xbe1cf1bcf2ea8464 + .quad 0xc08628a355104818 + .quad 0xbe1cf0435e2782b0 + .quad 0xc08628a559dbade0 + .quad 0xbe1cf0e3e1a5f56c + .quad 0xc08628a75e24cbf8 + .quad 0xbe1cefed9d5a721d + .quad 0xc08628a961ebe3f8 + .quad 0xbe1cf0d2d74321e2 + .quad 0xc08628ab65313750 + .quad 0xbe1cf24200eb55e9 + .quad 0xc08628ad67f50740 + .quad 0xbe1cf23e9d7cf979 + .quad 0xc08628af6a3794d0 + .quad 0xbe1cf23a088f421c + .quad 0xc08628b16bf920e0 + .quad 0xbe1cef2c1de1ab32 + .quad 0xc08628b36d39ec08 + .quad 0xbe1cf1abc231f7b2 + .quad 0xc08628b56dfa36d0 + .quad 0xbe1cf2074d5ba303 + .quad 0xc08628b76e3a4180 + .quad 0xbe1cf05cd5eed880 + .rept 48 + .byte 0 + .endr + +/* Lookup table with 9-bit index for + -log(mRcp), where mRcp is mantissa of 1/x 9-bit accurate reciprocal: + */ +.if .-__svml_dlog_data != _Log_LA_table +.err +.endif + .quad 0x8000000000000000 + .quad 0xbf5ff802a9ab10e6 + .quad 0xbf6ff00aa2b10bc0 + .quad 0xbf77ee11ebd82e94 + .quad 0xbf7fe02a6b106789 + .quad 0xbf83e7295d25a7d9 + .quad 0xbf87dc475f810a77 + .quad 0xbf8bcf712c74384c + .quad 0xbf8fc0a8b0fc03e4 + .quad 0xbf91d7f7eb9eebe7 + .quad 0xbf93cea44346a575 + .quad 0xbf95c45a51b8d389 + .quad 0xbf97b91b07d5b11b + .quad 0xbf99ace7551cc514 + .quad 0xbf9b9fc027af9198 + .quad 0xbf9d91a66c543cc4 + .quad 0xbf9f829b0e783300 + .quad 0xbfa0b94f7c196176 + .quad 0xbfa1b0d98923d980 + .quad 0xbfa2a7ec2214e873 + .quad 0xbfa39e87b9febd60 + .quad 0xbfa494acc34d911c + .quad 0xbfa58a5bafc8e4d5 + .quad 0xbfa67f94f094bd98 + .quad 0xbfa77458f632dcfc + .quad 0xbfa868a83083f6cf + .quad 0xbfa95c830ec8e3eb + .quad 0xbfaa4fe9ffa3d235 + .quad 0xbfab42dd711971bf + .quad 0xbfac355dd0921f2d + .quad 0xbfad276b8adb0b52 + .quad 0xbfae19070c276016 + .quad 0xbfaf0a30c01162a6 + .quad 0xbfaffae9119b9303 + .quad 0xbfb075983598e471 + .quad 0xbfb0ed839b5526fe + .quad 0xbfb16536eea37ae1 + .quad 0xbfb1dcb263db1944 + .quad 0xbfb253f62f0a1417 + .quad 0xbfb2cb0283f5de1f + .quad 0xbfb341d7961bd1d1 + .quad 0xbfb3b87598b1b6ee + .quad 0xbfb42edcbea646f0 + .quad 0xbfb4a50d3aa1b040 + .quad 0xbfb51b073f06183f + .quad 0xbfb590cafdf01c28 + .quad 0xbfb60658a93750c4 + .quad 0xbfb67bb0726ec0fc + .quad 0xbfb6f0d28ae56b4c + .quad 0xbfb765bf23a6be13 + .quad 0xbfb7da766d7b12cd + .quad 0xbfb84ef898e8282a + .quad 0xbfb8c345d6319b21 + .quad 0xbfb9375e55595ede + .quad 0xbfb9ab42462033ad + .quad 0xbfba1ef1d8061cd4 + .quad 0xbfba926d3a4ad563 + .quad 0xbfbb05b49bee43fe + .quad 0xbfbb78c82bb0eda1 + .quad 0xbfbbeba818146765 + .quad 0xbfbc5e548f5bc743 + .quad 0xbfbcd0cdbf8c13e1 + .quad 0xbfbd4313d66cb35d + .quad 0xbfbdb5270187d927 + .quad 0xbfbe27076e2af2e6 + .quad 0xbfbe98b549671467 + .quad 0xbfbf0a30c01162a6 + .quad 0xbfbf7b79fec37ddf + .quad 0xbfbfec9131dbeabb + .quad 0xbfc02ebb42bf3d4b + .quad 0xbfc0671512ca596e + .quad 0xbfc09f561ee719c3 + .quad 0xbfc0d77e7cd08e59 + .quad 0xbfc10f8e422539b1 + .quad 0xbfc14785846742ac + .quad 0xbfc17f6458fca611 + .quad 0xbfc1b72ad52f67a0 + .quad 0xbfc1eed90e2dc2c3 + .quad 0xbfc2266f190a5acb + .quad 0xbfc25ded0abc6ad2 + .quad 0xbfc29552f81ff523 + .quad 0xbfc2cca0f5f5f251 + .quad 0xbfc303d718e47fd3 + .quad 0xbfc33af575770e4f + .quad 0xbfc371fc201e8f74 + .quad 0xbfc3a8eb2d31a376 + .quad 0xbfc3dfc2b0ecc62a + .quad 0xbfc41682bf727bc0 + .quad 0xbfc44d2b6ccb7d1e + .quad 0xbfc483bccce6e3dd + .quad 0xbfc4ba36f39a55e5 + .quad 0xbfc4f099f4a230b2 + .quad 0xbfc526e5e3a1b438 + .quad 0xbfc55d1ad4232d6f + .quad 0xbfc59338d9982086 + .quad 0xbfc5c940075972b9 + .quad 0xbfc5ff3070a793d4 + .quad 0xbfc6350a28aaa758 + .quad 0xbfc66acd4272ad51 + .quad 0xbfc6a079d0f7aad2 + .quad 0xbfc6d60fe719d21d + .quad 0xbfc70b8f97a1aa75 + .quad 0xbfc740f8f54037a5 + .quad 0xbfc7764c128f2127 + .quad 0xbfc7ab890210d909 + .quad 0xbfc7e0afd630c274 + .quad 0xbfc815c0a14357eb + .quad 0xbfc84abb75865139 + .quad 0xbfc87fa06520c911 + .quad 0xbfc8b46f8223625b + .quad 0xbfc8e928de886d41 + .quad 0xbfc91dcc8c340bde + .quad 0xbfc9525a9cf456b4 + .quad 0xbfc986d3228180ca + .quad 0xbfc9bb362e7dfb83 + .quad 0xbfc9ef83d2769a34 + .quad 0xbfca23bc1fe2b563 + .quad 0xbfca57df28244dcd + .quad 0xbfca8becfc882f19 + .quad 0xbfcabfe5ae46124c + .quad 0xbfcaf3c94e80bff3 + .quad 0xbfcb2797ee46320c + .quad 0xbfcb5b519e8fb5a4 + .quad 0xbfcb8ef670420c3b + .quad 0xbfcbc286742d8cd6 + .quad 0xbfcbf601bb0e44e2 + .quad 0xbfcc2968558c18c1 + .quad 0xbfcc5cba543ae425 + .quad 0xbfcc8ff7c79a9a22 + .quad 0xbfccc320c0176502 + .quad 0xbfccf6354e09c5dc + .quad 0xbfcd293581b6b3e7 + .quad 0xbfcd5c216b4fbb91 + .quad 0xbfcd8ef91af31d5e + .quad 0xbfcdc1bca0abec7d + .quad 0xbfcdf46c0c722d2f + .quad 0xbfce27076e2af2e6 + .quad 0xbfce598ed5a87e2f + .quad 0xbfce8c0252aa5a60 + .quad 0xbfcebe61f4dd7b0b + .quad 0xbfcef0adcbdc5936 + .quad 0xbfcf22e5e72f105d + .quad 0xbfcf550a564b7b37 + .quad 0xbfcf871b28955045 + .quad 0xbfcfb9186d5e3e2b + .quad 0xbfcfeb0233e607cc + .quad 0xbfd00e6c45ad501d + .quad 0xbfd0274dc16c232f + .quad 0xbfd0402594b4d041 + .quad 0xbfd058f3c703ebc6 + .quad 0xbfd071b85fcd590d + .quad 0xbfd08a73667c57af + .quad 0xbfd0a324e27390e3 + .quad 0xbfd0bbccdb0d24bd + .quad 0xbfd0d46b579ab74b + .quad 0xbfd0ed005f657da4 + .quad 0xbfd1058bf9ae4ad5 + .quad 0xbfd11e0e2dad9cb7 + .quad 0xbfd136870293a8b0 + .quad 0xbfd14ef67f88685a + .quad 0xbfd1675cababa60e + .quad 0xbfd17fb98e15095d + .quad 0xbfd1980d2dd4236f + .quad 0xbfd1b05791f07b49 + .quad 0xbfd1c898c16999fb + .quad 0xbfd1e0d0c33716be + .quad 0xbfd1f8ff9e48a2f3 + .quad 0xbfd211255986160c + .quad 0xbfd22941fbcf7966 + .quad 0xbfd241558bfd1404 + .quad 0xbfd2596010df763a + .quad 0xbfd27161913f853d + .quad 0xbfd2895a13de86a3 + .quad 0xbfd2a1499f762bc9 + .quad 0xbfd2b9303ab89d25 + .quad 0xbfd2d10dec508583 + .quad 0xbfd2e8e2bae11d31 + .quad 0xbfd300aead06350c + .quad 0xbfd31871c9544185 + .quad 0xbfd3302c16586588 + .quad 0xbfd347dd9a987d55 + .quad 0xbfd35f865c93293e + .quad 0xbfd3772662bfd85b + .quad 0xbfd38ebdb38ed321 + .quad 0xbfd3a64c556945ea + .quad 0xbfd3bdd24eb14b6a + .quad 0xbfd3d54fa5c1f710 + .quad 0xbfd3ecc460ef5f50 + .quad 0xbfd404308686a7e4 + .quad 0xbfd41b941cce0bee + .quad 0xbfd432ef2a04e814 + .quad 0xbfd44a41b463c47c + .quad 0xbfd4618bc21c5ec2 + .quad 0xbfd478cd5959b3d9 + .quad 0xbfd49006804009d1 + .quad 0xbfd4a7373cecf997 + .quad 0xbfd4be5f957778a1 + .quad 0xbfd4d57f8fefe27f + .quad 0xbfd4ec973260026a + .quad 0xbfd503a682cb1cb3 + .quad 0xbfd51aad872df82d + .quad 0xbfd531ac457ee77e + .quad 0xbfd548a2c3add263 + .quad 0xbfd55f9107a43ee2 + .quad 0xbfd5767717455a6c + .quad 0xbfd58d54f86e02f2 + .quad 0xbfd5a42ab0f4cfe2 + .quad 0xbfd5baf846aa1b19 + .quad 0xbfd5d1bdbf5809ca + .quad 0xbfd5e87b20c2954a + .quad 0xbfd5ff3070a793d4 + .quad 0xbfd615ddb4bec13c + .quad 0xbfd62c82f2b9c795 + .quad 0x3fd61965cdb02c1f + .quad 0x3fd602d08af091ec + .quad 0x3fd5ec433d5c35ae + .quad 0x3fd5d5bddf595f30 + .quad 0x3fd5bf406b543db2 + .quad 0x3fd5a8cadbbedfa1 + .quad 0x3fd5925d2b112a59 + .quad 0x3fd57bf753c8d1fb + .quad 0x3fd565995069514c + .quad 0x3fd54f431b7be1a9 + .quad 0x3fd538f4af8f72fe + .quad 0x3fd522ae0738a3d8 + .quad 0x3fd50c6f1d11b97c + .quad 0x3fd4f637ebba9810 + .quad 0x3fd4e0086dd8baca + .quad 0x3fd4c9e09e172c3c + .quad 0x3fd4b3c077267e9a + .quad 0x3fd49da7f3bcc41f + .quad 0x3fd487970e958770 + .quad 0x3fd4718dc271c41b + .quad 0x3fd45b8c0a17df13 + .quad 0x3fd44591e0539f49 + .quad 0x3fd42f9f3ff62642 + .quad 0x3fd419b423d5e8c7 + .quad 0x3fd403d086cea79c + .quad 0x3fd3edf463c1683e + .quad 0x3fd3d81fb5946dba + .quad 0x3fd3c25277333184 + .quad 0x3fd3ac8ca38e5c5f + .quad 0x3fd396ce359bbf54 + .quad 0x3fd3811728564cb2 + .quad 0x3fd36b6776be1117 + .quad 0x3fd355bf1bd82c8b + .quad 0x3fd3401e12aecba1 + .quad 0x3fd32a84565120a8 + .quad 0x3fd314f1e1d35ce4 + .quad 0x3fd2ff66b04ea9d4 + .quad 0x3fd2e9e2bce12286 + .quad 0x3fd2d46602adccee + .quad 0x3fd2bef07cdc9354 + .quad 0x3fd2a982269a3dbf + .quad 0x3fd2941afb186b7c + .quad 0x3fd27ebaf58d8c9d + .quad 0x3fd269621134db92 + .quad 0x3fd25410494e56c7 + .quad 0x3fd23ec5991eba49 + .quad 0x3fd22981fbef797b + .quad 0x3fd214456d0eb8d4 + .quad 0x3fd1ff0fe7cf47a7 + .quad 0x3fd1e9e1678899f4 + .quad 0x3fd1d4b9e796c245 + .quad 0x3fd1bf99635a6b95 + .quad 0x3fd1aa7fd638d33f + .quad 0x3fd1956d3b9bc2fa + .quad 0x3fd180618ef18adf + .quad 0x3fd16b5ccbacfb73 + .quad 0x3fd1565eed455fc3 + .quad 0x3fd14167ef367783 + .quad 0x3fd12c77cd00713b + .quad 0x3fd1178e8227e47c + .quad 0x3fd102ac0a35cc1c + .quad 0x3fd0edd060b78081 + .quad 0x3fd0d8fb813eb1ef + .quad 0x3fd0c42d676162e3 + .quad 0x3fd0af660eb9e279 + .quad 0x3fd09aa572e6c6d4 + .quad 0x3fd085eb8f8ae797 + .quad 0x3fd07138604d5862 + .quad 0x3fd05c8be0d9635a + .quad 0x3fd047e60cde83b8 + .quad 0x3fd03346e0106062 + .quad 0x3fd01eae5626c691 + .quad 0x3fd00a1c6adda473 + .quad 0x3fcfeb2233ea07cd + .quad 0x3fcfc218be620a5e + .quad 0x3fcf991c6cb3b379 + .quad 0x3fcf702d36777df0 + .quad 0x3fcf474b134df229 + .quad 0x3fcf1e75fadf9bde + .quad 0x3fcef5ade4dcffe6 + .quad 0x3fceccf2c8fe920a + .quad 0x3fcea4449f04aaf5 + .quad 0x3fce7ba35eb77e2a + .quad 0x3fce530effe71012 + .quad 0x3fce2a877a6b2c12 + .quad 0x3fce020cc6235ab5 + .quad 0x3fcdd99edaf6d7e9 + .quad 0x3fcdb13db0d48940 + .quad 0x3fcd88e93fb2f450 + .quad 0x3fcd60a17f903515 + .quad 0x3fcd38666871f465 + .quad 0x3fcd1037f2655e7b + .quad 0x3fcce816157f1988 + .quad 0x3fccc000c9db3c52 + .quad 0x3fcc97f8079d44ec + .quad 0x3fcc6ffbc6f00f71 + .quad 0x3fcc480c0005ccd1 + .quad 0x3fcc2028ab17f9b4 + .quad 0x3fcbf851c067555f + .quad 0x3fcbd087383bd8ad + .quad 0x3fcba8c90ae4ad19 + .quad 0x3fcb811730b823d2 + .quad 0x3fcb5971a213acdb + .quad 0x3fcb31d8575bce3d + .quad 0x3fcb0a4b48fc1b46 + .quad 0x3fcae2ca6f672bd4 + .quad 0x3fcabb55c31693ad + .quad 0x3fca93ed3c8ad9e3 + .quad 0x3fca6c90d44b704e + .quad 0x3fca454082e6ab05 + .quad 0x3fca1dfc40f1b7f1 + .quad 0x3fc9f6c407089664 + .quad 0x3fc9cf97cdce0ec3 + .quad 0x3fc9a8778debaa38 + .quad 0x3fc981634011aa75 + .quad 0x3fc95a5adcf7017f + .quad 0x3fc9335e5d594989 + .quad 0x3fc90c6db9fcbcd9 + .quad 0x3fc8e588ebac2dbf + .quad 0x3fc8beafeb38fe8c + .quad 0x3fc897e2b17b19a5 + .quad 0x3fc871213750e994 + .quad 0x3fc84a6b759f512f + .quad 0x3fc823c16551a3c2 + .quad 0x3fc7fd22ff599d4f + .quad 0x3fc7d6903caf5ad0 + .quad 0x3fc7b0091651528c + .quad 0x3fc7898d85444c73 + .quad 0x3fc7631d82935a86 + .quad 0x3fc73cb9074fd14d + .quad 0x3fc716600c914054 + .quad 0x3fc6f0128b756abc + .quad 0x3fc6c9d07d203fc7 + .quad 0x3fc6a399dabbd383 + .quad 0x3fc67d6e9d785771 + .quad 0x3fc6574ebe8c133a + .quad 0x3fc6313a37335d76 + .quad 0x3fc60b3100b09476 + .quad 0x3fc5e533144c1719 + .quad 0x3fc5bf406b543db2 + .quad 0x3fc59958ff1d52f1 + .quad 0x3fc5737cc9018cdd + .quad 0x3fc54dabc26105d2 + .quad 0x3fc527e5e4a1b58d + .quad 0x3fc5022b292f6a45 + .quad 0x3fc4dc7b897bc1c8 + .quad 0x3fc4b6d6fefe22a4 + .quad 0x3fc4913d8333b561 + .quad 0x3fc46baf0f9f5db7 + .quad 0x3fc4462b9dc9b3dc + .quad 0x3fc420b32740fdd4 + .quad 0x3fc3fb45a59928cc + .quad 0x3fc3d5e3126bc27f + .quad 0x3fc3b08b6757f2a9 + .quad 0x3fc38b3e9e027479 + .quad 0x3fc365fcb0159016 + .quad 0x3fc340c59741142e + .quad 0x3fc31b994d3a4f85 + .quad 0x3fc2f677cbbc0a96 + .quad 0x3fc2d1610c86813a + .quad 0x3fc2ac55095f5c59 + .quad 0x3fc28753bc11aba5 + .quad 0x3fc2625d1e6ddf57 + .quad 0x3fc23d712a49c202 + .quad 0x3fc2188fd9807263 + .quad 0x3fc1f3b925f25d41 + .quad 0x3fc1ceed09853752 + .quad 0x3fc1aa2b7e23f72a + .quad 0x3fc185747dbecf34 + .quad 0x3fc160c8024b27b1 + .quad 0x3fc13c2605c398c3 + .quad 0x3fc1178e8227e47c + .quad 0x3fc0f301717cf0fb + .quad 0x3fc0ce7ecdccc28d + .quad 0x3fc0aa06912675d5 + .quad 0x3fc08598b59e3a07 + .quad 0x3fc06135354d4b18 + .quad 0x3fc03cdc0a51ec0d + .quad 0x3fc0188d2ecf6140 + .quad 0x3fbfe89139dbd566 + .quad 0x3fbfa01c9db57ce2 + .quad 0x3fbf57bc7d9005db + .quad 0x3fbf0f70cdd992e3 + .quad 0x3fbec739830a1120 + .quad 0x3fbe7f1691a32d3e + .quad 0x3fbe3707ee30487b + .quad 0x3fbdef0d8d466db9 + .quad 0x3fbda727638446a2 + .quad 0x3fbd5f55659210e2 + .quad 0x3fbd179788219364 + .quad 0x3fbccfedbfee13a8 + .quad 0x3fbc885801bc4b23 + .quad 0x3fbc40d6425a5cb1 + .quad 0x3fbbf968769fca11 + .quad 0x3fbbb20e936d6974 + .quad 0x3fbb6ac88dad5b1c + .quad 0x3fbb23965a52ff00 + .quad 0x3fbadc77ee5aea8c + .quad 0x3fba956d3ecade63 + .quad 0x3fba4e7640b1bc38 + .quad 0x3fba0792e9277cac + .quad 0x3fb9c0c32d4d2548 + .quad 0x3fb97a07024cbe74 + .quad 0x3fb9335e5d594989 + .quad 0x3fb8ecc933aeb6e8 + .quad 0x3fb8a6477a91dc29 + .quad 0x3fb85fd927506a48 + .quad 0x3fb8197e2f40e3f0 + .quad 0x3fb7d33687c293c9 + .quad 0x3fb78d02263d82d3 + .quad 0x3fb746e100226ed9 + .quad 0x3fb700d30aeac0e1 + .quad 0x3fb6bad83c1883b6 + .quad 0x3fb674f089365a7a + .quad 0x3fb62f1be7d77743 + .quad 0x3fb5e95a4d9791cb + .quad 0x3fb5a3abb01ade25 + .quad 0x3fb55e10050e0384 + .quad 0x3fb518874226130a + .quad 0x3fb4d3115d207eac + .quad 0x3fb48dae4bc31018 + .quad 0x3fb4485e03dbdfad + .quad 0x3fb403207b414b7f + .quad 0x3fb3bdf5a7d1ee64 + .quad 0x3fb378dd7f749714 + .quad 0x3fb333d7f8183f4b + .quad 0x3fb2eee507b40301 + .quad 0x3fb2aa04a44717a5 + .quad 0x3fb26536c3d8c369 + .quad 0x3fb2207b5c78549e + .quad 0x3fb1dbd2643d190b + .quad 0x3fb1973bd1465567 + .quad 0x3fb152b799bb3cc9 + .quad 0x3fb10e45b3cae831 + .quad 0x3fb0c9e615ac4e17 + .quad 0x3fb08598b59e3a07 + .quad 0x3fb0415d89e74444 + .quad 0x3faffa6911ab9301 + .quad 0x3faf723b517fc523 + .quad 0x3faeea31c006b87c + .quad 0x3fae624c4a0b5e1b + .quad 0x3fadda8adc67ee4e + .quad 0x3fad52ed6405d86f + .quad 0x3faccb73cdddb2cc + .quad 0x3fac441e06f72a9e + .quad 0x3fabbcebfc68f420 + .quad 0x3fab35dd9b58baad + .quad 0x3faaaef2d0fb10fc + .quad 0x3faa282b8a936171 + .quad 0x3fa9a187b573de7c + .quad 0x3fa91b073efd7314 + .quad 0x3fa894aa149fb343 + .quad 0x3fa80e7023d8ccc4 + .quad 0x3fa788595a3577ba + .quad 0x3fa70265a550e777 + .quad 0x3fa67c94f2d4bb58 + .quad 0x3fa5f6e73078efb8 + .quad 0x3fa5715c4c03ceef + .quad 0x3fa4ebf43349e26f + .quad 0x3fa466aed42de3ea + .quad 0x3fa3e18c1ca0ae92 + .quad 0x3fa35c8bfaa1306b + .quad 0x3fa2d7ae5c3c5bae + .quad 0x3fa252f32f8d183f + .quad 0x3fa1ce5a62bc353a + .quad 0x3fa149e3e4005a8d + .quad 0x3fa0c58fa19dfaaa + .quad 0x3fa0415d89e74444 + .quad 0x3f9f7a9b16782856 + .quad 0x3f9e72bf2813ce51 + .quad 0x3f9d6b2725979802 + .quad 0x3f9c63d2ec14aaf2 + .quad 0x3f9b5cc258b718e6 + .quad 0x3f9a55f548c5c43f + .quad 0x3f994f6b99a24475 + .quad 0x3f98492528c8cabf + .quad 0x3f974321d3d006d3 + .quad 0x3f963d6178690bd6 + .quad 0x3f9537e3f45f3565 + .quad 0x3f9432a925980cc1 + .quad 0x3f932db0ea132e22 + .quad 0x3f9228fb1fea2e28 + .quad 0x3f912487a5507f70 + .quad 0x3f90205658935847 + .quad 0x3f8e38ce3033310c + .quad 0x3f8c317384c75f06 + .quad 0x3f8a2a9c6c170462 + .quad 0x3f882448a388a2aa + .quad 0x3f861e77e8b53fc6 + .quad 0x3f841929f96832f0 + .quad 0x3f82145e939ef1e9 + .quad 0x3f8010157588de71 + .quad 0x3f7c189cbb0e27fb + .quad 0x3f78121214586b54 + .quad 0x3f740c8a747878e2 + .quad 0x3f70080559588b35 + .quad 0x3f680904828985c0 + .quad 0x3f60040155d5889e + .quad 0x3f50020055655889 + .quad 0x0000000000000000 + .rept 56 + .byte 0 + .endr + +/* Polynomial coefficients: */ +double_vector _poly_coeff_1 0x3fc9999cacdb4d0a +double_vector _poly_coeff_2 0xbfd0000148058ee1 +double_vector _poly_coeff_3 0x3fd55555555543c5 +double_vector _poly_coeff_4 0xbfdffffffffff81f + +/* Exponent mask */ +double_vector _ExpMask 0x000fffffffffffff + +/* 2^10 */ +double_vector _Two10 0x3f50000000000000 + +/* Minimum normal number */ +double_vector _MinNorm 0x0010000000000000 + +/* Maximum normal number */ +double_vector _MaxNorm 0x7fefffffffffffff + +/* Half of mantissa mask */ +double_vector _HalfMask 0xfffffffffc000000 + +/* 1.0 */ +double_vector _One 0x3ff0000000000000 + +/* log(2) high part */ +double_vector _L2H 0x3fe62e42fefa0000 + +/* log(2) low part */ +double_vector _L2L 0x3d7cf79abc9e0000 + +/* Work range threshold = 724 */ +double_vector _Threshold 0x4086a00000000000 + +/* Bias */ +double_vector _Bias 0x408ff80000000000 + +/* Bias (-1 bit) */ +double_vector _Bias1 0x408ff00000000000 + +/* log(2) */ +double_vector _L2 0x3fe62e42fefa39ef + +/* General purpose constants: + DP infinities, +/- */ +.if .-__svml_dlog_data != _dInfs +.err +.endif + .quad 0x7ff0000000000000 + .quad 0xfff0000000000000 + .rept 48 + .byte 0 + .endr + +/* DP 1.0, +/- */ +.if .-__svml_dlog_data != _dOnes +.err +.endif + .quad 0x3ff0000000000000 + .quad 0xbff0000000000000 + .rept 48 + .byte 0 + .endr + +/* DP 0.0, +/- */ +.if .-__svml_dlog_data != _dZeros +.err +.endif + .quad 0x0000000000000000 + .quad 0x8000000000000000 + .rept 48 + .byte 0 + .endr + .type __svml_dlog_data,@object + .size __svml_dlog_data,.-__svml_dlog_data diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.h b/sysdeps/x86_64/fpu/svml_d_log_data.h new file mode 100644 index 0000000000..8ca55a8010 --- /dev/null +++ b/sysdeps/x86_64/fpu/svml_d_log_data.h @@ -0,0 +1,54 @@ +/* Offsets for data table for function log. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef D_LOG_DATA_H +#define D_LOG_DATA_H + +#define _LogRcp_lookup -4218816 +#define _Log_HA_table 0 +#define _Log_LA_table 8256 +#define _poly_coeff_1 12416 +#define _poly_coeff_2 12480 +#define _poly_coeff_3 12544 +#define _poly_coeff_4 12608 +#define _ExpMask 12672 +#define _Two10 12736 +#define _MinNorm 12800 +#define _MaxNorm 12864 +#define _HalfMask 12928 +#define _One 12992 +#define _L2H 13056 +#define _L2L 13120 +#define _Threshold 13184 +#define _Bias 13248 +#define _Bias1 13312 +#define _L2 13376 +#define _dInfs 13440 +#define _dOnes 13504 +#define _dZeros 13568 + +.macro double_vector offset value +.if .-__svml_dlog_data != \offset +.err +.endif +.rept 8 +.quad \value +.endr +.endm + +#endif diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c index 347aab5e8a..dfbc3d3708 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c @@ -24,3 +24,4 @@ VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos) VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin) +VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVbN2v_log) diff --git a/sysdeps/x86_64/fpu/test-double-vlen2.c b/sysdeps/x86_64/fpu/test-double-vlen2.c index 353b680039..a119bfc33a 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen2.c +++ b/sysdeps/x86_64/fpu/test-double-vlen2.c @@ -20,5 +20,6 @@ #define TEST_VECTOR_cos 1 #define TEST_VECTOR_sin 1 +#define TEST_VECTOR_log 1 #include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c index 006c795d8b..6e01a8945f 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c @@ -27,3 +27,4 @@ VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVdN4v_cos) VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVdN4v_sin) +VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVdN4v_log) diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c index 51247b7a48..ef6e1c2a42 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c @@ -23,6 +23,7 @@ #define TEST_VECTOR_cos 1 #define TEST_VECTOR_sin 1 +#define TEST_VECTOR_log 1 #define REQUIRE_AVX2 diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c index b87454e864..e9f890573f 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c @@ -24,3 +24,4 @@ VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVcN4v_cos) VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVcN4v_sin) +VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVcN4v_log) diff --git a/sysdeps/x86_64/fpu/test-double-vlen4.c b/sysdeps/x86_64/fpu/test-double-vlen4.c index 4c1aefa979..71ea85cc0e 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen4.c +++ b/sysdeps/x86_64/fpu/test-double-vlen4.c @@ -20,5 +20,6 @@ #define TEST_VECTOR_cos 1 #define TEST_VECTOR_sin 1 +#define TEST_VECTOR_log 1 #include "libm-test.c" diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c index b789f5e18d..290d59c74c 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c +++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c @@ -24,3 +24,4 @@ VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVeN8v_cos) VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVeN8v_sin) +VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVeN8v_log) diff --git a/sysdeps/x86_64/fpu/test-double-vlen8.c b/sysdeps/x86_64/fpu/test-double-vlen8.c index 99982800ee..e2f2cfef2e 100644 --- a/sysdeps/x86_64/fpu/test-double-vlen8.c +++ b/sysdeps/x86_64/fpu/test-double-vlen8.c @@ -20,6 +20,7 @@ #define TEST_VECTOR_cos 1 #define TEST_VECTOR_sin 1 +#define TEST_VECTOR_log 1 #define REQUIRE_AVX512F -- 2.43.5