This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH v2] Fix rawmemchr regression on bulldozer
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: Andreas Jaeger <aj at suse dot com>
- Cc: Liubov Dmitrieva <liubov dot dmitrieva at gmail dot com>, GNU C Library <libc-alpha at sourceware dot org>
- Date: Thu, 29 Aug 2013 09:32:57 +0200
- Subject: [PATCH v2] Fix rawmemchr regression on bulldozer
- Authentication-results: sourceware.org; auth=none
- References: <20130805122624 dot GA4682 at domone dot kolej dot mff dot cuni dot cz> <521CDBF1 dot 7020005 at suse dot com> <CAHjhQ91k+j-rfCbJ9vx-B0ZmXaVUAT3X_xbGCgFcvdtP1dLkGA at mail dot gmail dot com> <521CDFC7 dot 3000102 at suse dot com>
On Tue, Aug 27, 2013 at 07:20:07PM +0200, Andreas Jaeger wrote:
> On 08/27/2013 07:10 PM, Liubov Dmitrieva wrote:
> > This patch is ok. We don't use SSE42 version for any Intel Processor,
> > so there is no changes here impacting Intel.
> > This is a good clean up.
>
> Thanks for the confirmation. Ondrey, could you update
> ./sysdeps/x86_64/multiarch/ifunc-impl-list.c for your change and resend
> both patches?
>
Here is v2 for rawmemchr
* sysdeps/x86_64/multiarch/rawmemchr.S: Delete.
* sysdeps/x86_64/multiarch/ifunc-impl-list.c: Remove rawmemchr ifunc.
---
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 --
sysdeps/x86_64/multiarch/rawmemchr.S | 103 -----------------------------
2 files changed, 109 deletions(-)
delete mode 100644 sysdeps/x86_64/multiarch/rawmemchr.S
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 28d3579..d0992e1 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -61,12 +61,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memmove_ssse3)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
- /* Support sysdeps/x86_64/multiarch/rawmemchr.S. */
- IFUNC_IMPL (i, name, rawmemchr,
- IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_SSE4_2,
- __rawmemchr_sse42)
- IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
-
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
IFUNC_IMPL (i, name, stpncpy,
IFUNC_IMPL_ADD (array, i, stpncpy, HAS_SSSE3,
diff --git a/sysdeps/x86_64/multiarch/rawmemchr.S b/sysdeps/x86_64/multiarch/rawmemchr.S
deleted file mode 100644
index 50de38f..0000000
--- a/sysdeps/x86_64/multiarch/rawmemchr.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* Multiple versions of rawmemchr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2013 Free Software Foundation, Inc.
- Contributed by Ulrich Drepper <drepper@redhat.com>.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in lib. */
-#ifndef NOT_IN_libc
- .text
-ENTRY(rawmemchr)
- .type rawmemchr, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: testl $bit_Prefer_PMINUB_for_stringop, __cpu_features+FEATURE_OFFSET+index_Prefer_PMINUB_for_stringop(%rip)
- jnz 2f
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
- jz 2f
- leaq __rawmemchr_sse42(%rip), %rax
- ret
-2: leaq __rawmemchr_sse2(%rip), %rax
- ret
-
-END(rawmemchr)
-strong_alias (rawmemchr, __rawmemchr)
-
-
- .section .text.sse4.2,"ax",@progbits
- .align 16
- .type __rawmemchr_sse42, @function
- .globl __rawmemchr_sse42
- .hidden __rawmemchr_sse42
-__rawmemchr_sse42:
- cfi_startproc
- CALL_MCOUNT
- movd %esi, %xmm1
- movq %rdi, %rcx
- pxor %xmm2, %xmm2
- andq $~15, %rdi
- orl $0xffffffff, %esi
- pshufb %xmm2, %xmm1
- movdqa (%rdi), %xmm0
- subq %rdi, %rcx
- pcmpeqb %xmm1, %xmm0
- shl %cl, %esi
- pmovmskb %xmm0, %ecx
- movl $16, %eax
- movl $16, %edx
- andl %esi, %ecx
- jnz 1f
-
-2: pcmpestri $0x08, 16(%rdi), %xmm1
- leaq 16(%rdi), %rdi
- jnc 2b
-
- leaq (%rdi,%rcx), %rax
- ret
-
-1: bsfl %ecx, %eax
- addq %rdi, %rax
- ret
- cfi_endproc
- .size __rawmemchr_sse42, .-__rawmemchr_sse42
-
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __rawmemchr_sse2, @function; \
- .align 16; \
- .globl __rawmemchr_sse2; \
- .hidden __rawmemchr_sse2; \
- __rawmemchr_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __rawmemchr_sse2, .-__rawmemchr_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal rawmemchr calls through a PLT.
- The speedup we get from using SSE4.2 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI___rawmemchr; __GI___rawmemchr = __rawmemchr_sse2
-#endif
-
-#include "../rawmemchr.S"
--
1.8.3.2