This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
On Tue, Dec 23, 2014 at 08:15:00PM +0100, OndÅej BÃlka wrote: > On Mon, Dec 22, 2014 at 02:56:10PM +0300, Andrew Senkevich wrote: > > Hi, > > > > it is ping for patch which was discussed at the beginning of august, > > end of discussion is > > https://sourceware.org/ml/libc-alpha/2014-08/msg00078.html > > > > Is it Ok for trunk? > > > It looked OK for me performancewise. I planned to check that for bugs > which is still in my TODO list. If somebody else checked that it would > be ok to commit. I checked it in with a couple fixes: 1. Add __bcopy_sse2_unaligned. 2. Check HAS_SSE2 in ifunc-impl-list.c. 3. Replace !defined NOT_IN_libc with IS_IN (libc). 4. Check bit_SSE2 before using __xxx_sse2_unaligned. 5. Replace CPUID_OFFSET with FEATURE_OFFSET in bit_Fast_Unaligned_Load check. Andrew, please double check my checkin. Thanks. H.J. -- diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S index a0fca88..4041eed 100644 --- a/sysdeps/i386/i686/multiarch/bcopy.S +++ b/sysdeps/i386/i686/multiarch/bcopy.S @@ -35,6 +35,11 @@ ENTRY(bcopy) jne 1f call __init_cpu_features 1: leal __bcopy_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __bcopy_sse2_unaligned@GOTOFF(%ebx), %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) + jnz 2f testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __bcopy_ssse3@GOTOFF(%ebx), %eax diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c index 8a6c4a8..4efa9c5 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c @@ -41,6 +41,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3_rep) IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3) + IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSE2, + __bcopy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32)) /* Support sysdeps/i386/i686/multiarch/bzero.S. */ @@ -69,7 +71,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_chk_ssse3_rep) IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, __memmove_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSE2, __memmove_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __memmove_chk, 1, __memmove_chk_ia32)) @@ -80,7 +82,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_ssse3_rep) IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3, __memmove_ssse3) - IFUNC_IMPL_ADD (array, i, memmove, 1, + IFUNC_IMPL_ADD (array, i, memmove, HAS_SSE2, __memmove_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32)) @@ -272,7 +274,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memcpy_chk_ssse3_rep) IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3, __memcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, + IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSE2, __memcpy_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, __memcpy_chk_ia32)) @@ -282,7 +284,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3_rep) IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3) - IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSE2, + __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */ @@ -291,7 +294,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __mempcpy_chk_ssse3_rep) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3, __mempcpy_chk_ssse3) - IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3, + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSE2, __mempcpy_chk_sse2_unaligned) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1, __mempcpy_chk_ia32)) @@ -302,7 +305,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __mempcpy_ssse3_rep) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3, __mempcpy_ssse3) - IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSE2, + __mempcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32)) /* Support sysdeps/i386/i686/multiarch/strlen.S. */ diff --git a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S index fc85c18..ff89de2 100644 --- a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S +++ b/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S @@ -1,5 +1,5 @@ -/* memcpy optimized with sse2 unaligned memory access instructions. - Copyright (C) 2010-2014 Free Software Foundation, Inc. +/* memcpy optimized with SSE2 unaligned memory access instructions. + Copyright (C) 2014 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,7 +16,7 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#if !defined NOT_IN_libc \ +#if IS_IN (libc) \ && (defined SHARED \ || defined USE_AS_MEMMOVE \ || !defined USE_MULTIARCH) @@ -29,9 +29,15 @@ # define MEMCPY_CHK __memcpy_chk_sse2_unaligned # endif -# define DEST PARMS -# define SRC DEST+4 -# define LEN SRC+4 +# ifdef USE_AS_BCOPY +# define SRC PARMS +# define DEST SRC+4 +# define LEN DEST+4 +# else +# define DEST PARMS +# define SRC DEST+4 +# define LEN SRC+4 +# endif # define CFI_PUSH(REG) \ cfi_adjust_cfa_offset (4); \ @@ -665,7 +671,7 @@ L(len_5_8_bytes): L(return): movl %edx, %eax -# ifdef USE_AS_MEMPCPY +# if !defined USE_AS_BCOPY && defined USE_AS_MEMPCPY movl LEN(%esp), %ecx add %ecx, %eax # endif diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S index 6e70730..845492c 100644 --- a/sysdeps/i386/i686/multiarch/memcpy.S +++ b/sysdeps/i386/i686/multiarch/memcpy.S @@ -35,10 +35,12 @@ ENTRY(memcpy) cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) jne 1f call __init_cpu_features -1: leal __memcpy_sse2_unaligned@GOTOFF(%ebx), %eax - testl $bit_Fast_Unaligned_Load, CPUID_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) +1: leal __memcpy_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memcpy_sse2_unaligned@GOTOFF(%ebx), %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) jnz 2f - leal __memcpy_ia32@GOTOFF(%ebx), %eax testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __memcpy_ssse3@GOTOFF(%ebx), %eax diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S index 142ba14..415d910 100644 --- a/sysdeps/i386/i686/multiarch/memcpy_chk.S +++ b/sysdeps/i386/i686/multiarch/memcpy_chk.S @@ -36,10 +36,12 @@ ENTRY(__memcpy_chk) cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) jne 1f call __init_cpu_features -1: leal __memcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax - testl $bit_Fast_Unaligned_Load, CPUID_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) +1: leal __memcpy_chk_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) jnz 2f - leal __memcpy_chk_ia32@GOTOFF(%ebx), %eax testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __memcpy_chk_ssse3@GOTOFF(%ebx), %eax diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S index 3975545..29644dd 100644 --- a/sysdeps/i386/i686/multiarch/memmove.S +++ b/sysdeps/i386/i686/multiarch/memmove.S @@ -34,10 +34,12 @@ ENTRY(memmove) cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) jne 1f call __init_cpu_features -1: leal __memmove_sse2_unaligned@GOTOFF(%ebx), %eax - testl $bit_Fast_Unaligned_Load, CPUID_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) +1: leal __memmove_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memmove_sse2_unaligned@GOTOFF(%ebx), %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) jnz 2f - leal __memmove_ia32@GOTOFF(%ebx), %eax testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __memmove_ssse3@GOTOFF(%ebx), %eax @@ -65,10 +67,12 @@ ENTRY(memmove) cmpl $0, KIND_OFFSET+__cpu_features jne 1f call __init_cpu_features -1: leal __memmove_sse2_unaligned, %eax - testl $bit_Fast_Unaligned_Load, CPUID_OFFSET+index_Fast_Unaligned_Load+__cpu_features +1: leal __memmove_ia32, %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features + jz 2f + leal __memmove_sse2_unaligned, %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features jnz 2f - leal __memmove_ia32, %eax testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features jz 2f leal __memmove_ssse3, %eax diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S index 0b560f9..fea9b54 100644 --- a/sysdeps/i386/i686/multiarch/memmove_chk.S +++ b/sysdeps/i386/i686/multiarch/memmove_chk.S @@ -34,10 +34,12 @@ ENTRY(__memmove_chk) cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) jne 1f call __init_cpu_features -1: leal __memmove_chk_sse2_unaligned@GOTOFF(%ebx), %eax - testl $bit_Fast_Unaligned_Load, CPUID_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) +1: leal __memmove_chk_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __memmove_chk_sse2_unaligned@GOTOFF(%ebx), %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) jnz 2f - leal __memmove_chk_ia32@GOTOFF(%ebx), %eax testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __memmove_chk_ssse3@GOTOFF(%ebx), %eax @@ -56,10 +58,12 @@ ENTRY(__memmove_chk) cmpl $0, KIND_OFFSET+__cpu_features jne 1f call __init_cpu_features -1: leal __memmove_chk_sse2_unaligned, %eax - testl $bit_Fast_Unaligned_Load, CPUID_OFFSET+index_Fast_Unaligned_Load+__cpu_features +1: leal __memmove_chk_ia32, %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features + jz 2f + leal __memmove_chk_sse2_unaligned, %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features jnz 2f - leal __memmove_chk_ia32, %eax testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features jz 2f leal __memmove_chk_ssse3, %eax diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S index dab21e0..fd8b82c 100644 --- a/sysdeps/i386/i686/multiarch/mempcpy.S +++ b/sysdeps/i386/i686/multiarch/mempcpy.S @@ -35,10 +35,12 @@ ENTRY(__mempcpy) cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) jne 1f call __init_cpu_features -1: leal __mempcpy_sse2_unaligned@GOTOFF(%ebx), %eax - testl $bit_Fast_Unaligned_Load, CPUID_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) +1: leal __mempcpy_ia32@GOTOFF(%ebx), %eax + testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __mempcpy_sse2_unaligned@GOTOFF(%ebx), %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) jnz 2f - leal __mempcpy_ia32@GOTOFF(%ebx), %eax testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __mempcpy_ssse3@GOTOFF(%ebx), %eax diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S index 6a408ab..ed23b1b 100644 --- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S +++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.S @@ -36,10 +36,12 @@ ENTRY(__mempcpy_chk) cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx) jne 1f call __init_cpu_features -1: leal __mempcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax - testl $bit_Fast_Unaligned_Load, CPUID_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) - jnz 2f leal __mempcpy_chk_ia32@GOTOFF(%ebx), %eax +1: testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx) + jz 2f + leal __mempcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax + testl $bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx) + jnz 2f testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx) jz 2f leal __mempcpy_chk_ssse3@GOTOFF(%ebx), %eax
Attachment:
0001-i386-memcpy-functions-with-SSE2-unaligned-load-store.patch
Description: Text document
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |