This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] faster strcat
- From: Dmitrieva Liubov <liubov dot dmitrieva at gmail dot com>
- To: Ondřej Bílka <neleai at seznam dot cz>
- Cc: libc-alpha at sourceware dot org
- Date: Tue, 9 Oct 2012 16:37:34 +0400
- Subject: Re: [PATCH] faster strcat
- References: <20121008111514.GA18330@domone.kolej.mff.cuni.cz>
Why do you think this is faster?
You two times iterates all src array instead of one time in the
current version (the first is strnlen and the second is strcpy, in the
current assembler version something like strncpy is used instead but
without strncpy zero filling)
It's likely true that current version inlines slow strlen but that
code can be switched to use other strlen version.
+ size_t dest_len = strlen (dest);
+ size_t src_len = strnlen (src , n);
- if (c != '\0')
- *++s1 = '\0';
+ if (src_len == n)
+ {
+ memcpy (dest + dest_len, src, n);
+ dest[dest_len + n] = '\0';
+ }
+ else
+ strcpy (dest + dest_len, src);
- return s;
+ return dest;
}
--
Liubov Dmitrieva
Intel Corporation
2012/10/8 OndÅej BÃlka <neleai@seznam.cz>
>
> This is next version of my patch
> http://sourceware.org/ml/libc-alpha/2012-06/msg00489.html
>
> I investigated strcat bit futher and speed degradation
> was caused by improper usage of indirect functions.
>
> strcat ifunc first tests bit_Fast_Unaligned_Load which is
> false on core2 and AMD processors. Then it checks ssse3 and
> calls ssse3 version.
> But strcat_ssse3 inlines strlen_sse2_no_bsf which on core2 and phenomII
> is slowest strlen variant unless strings is larger than 2000 where
> strlen_sse2 takes lead.
>
> Then I deleted strcat variants that are no longer needed.
>
> Files ports/sysdeps/ia64/strcat.c, sysdeps/powerpc/strcat.c, became
> duplicates of string/strcat.c.
>
>
> * string/strcat.c: Reduce algorithm selection
> to strlen,strcpy
> * string/strncat.c: Likewise
> * sysdeps/powerpc/strcat.c: Duplicated string/strcat.c
> * ports/sysdeps/ia64/strcat.c: Likewise
>
> * sysdeps/i386/i686/multiarch/Makefile: Updated
> * sysdeps/x86_64/multiarch/Makefile: Updated
>
> * sysdeps/i386/i486/strcat.S: No longer needed
> * sysdeps/i386/i686/multiarch/strcat-sse2.S:Likewise
> * sysdeps/i386/i686/multiarch/strcat-ssse3.S:Likewise
> * sysdeps/i386/i686/multiarch/strcat.S:Likewise
> * sysdeps/i386/i686/multiarch/strncat-c.c:Likewise
> * sysdeps/i386/i686/multiarch/strncat-sse2.S:Likewise
> * sysdeps/i386/i686/multiarch/strncat-ssse3.S:Likewise
> * sysdeps/i386/i686/multiarch/strncat.S:Likewise
> * sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S:Likewise
> * sysdeps/x86_64/multiarch/strcat-ssse3.S:Likewise
> * sysdeps/x86_64/multiarch/strcat.S:Likewise
> * sysdeps/x86_64/multiarch/strncat-c.c:Likewise
> * sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S:Likewise
> * sysdeps/x86_64/multiarch/strncat-ssse3.S:Likewise
> * sysdeps/x86_64/multiarch/strncat.S:Likewise
> * sysdeps/x86_64/strcat.S:Likewise
>
>
> ---
> ports/sysdeps/ia64/strcat.c | 26 -
> string/strcat.c | 29 +-
> string/strncat.c | 62 +-
> sysdeps/i386/i486/strcat.S | 273 -----
> sysdeps/i386/i686/multiarch/Makefile | 3 +-
> sysdeps/i386/i686/multiarch/strcat-sse2.S | 1243
> ---------------------
> sysdeps/i386/i686/multiarch/strcat-ssse3.S | 572 ----------
> sysdeps/i386/i686/multiarch/strcat.S | 119 --
> sysdeps/i386/i686/multiarch/strncat-c.c | 8 -
> sysdeps/i386/i686/multiarch/strncat-sse2.S | 4 -
> sysdeps/i386/i686/multiarch/strncat-ssse3.S | 4 -
> sysdeps/i386/i686/multiarch/strncat.S | 3 -
> sysdeps/powerpc/strcat.c | 30 -
> sysdeps/x86_64/multiarch/Makefile | 5 +-
> sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S | 53 -
> sysdeps/x86_64/multiarch/strcat-ssse3.S | 557 ---------
> sysdeps/x86_64/multiarch/strcat.S | 84 --
> sysdeps/x86_64/multiarch/strncat-c.c | 8 -
> sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S | 3 -
> sysdeps/x86_64/multiarch/strncat-ssse3.S | 3 -
> sysdeps/x86_64/multiarch/strncat.S | 3 -
> sysdeps/x86_64/strcat.S | 259 -----
> 24 files changed, 15 insertions(+), 4280 deletions(-)
> delete mode 100644 ports/sysdeps/ia64/strcat.c
> delete mode 100644 sysdeps/i386/i486/strcat.S
> delete mode 100644 sysdeps/i386/i686/multiarch/strcat-sse2.S
> delete mode 100644 sysdeps/i386/i686/multiarch/strcat-ssse3.S
> delete mode 100644 sysdeps/i386/i686/multiarch/strcat.S
> delete mode 100644 sysdeps/i386/i686/multiarch/strncat-c.c
> delete mode 100644 sysdeps/i386/i686/multiarch/strncat-sse2.S
> delete mode 100644 sysdeps/i386/i686/multiarch/strncat-ssse3.S
> delete mode 100644 sysdeps/i386/i686/multiarch/strncat.S
> delete mode 100644 sysdeps/powerpc/strcat.c
> delete mode 100644 sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
> delete mode 100644 sysdeps/x86_64/multiarch/strcat-ssse3.S
> delete mode 100644 sysdeps/x86_64/multiarch/strcat.S
> delete mode 100644 sysdeps/x86_64/multiarch/strncat-c.c
> delete mode 100644 sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
> delete mode 100644 sysdeps/x86_64/multiarch/strncat-ssse3.S
> delete mode 100644 sysdeps/x86_64/multiarch/strncat.S
> delete mode 100644 sysdeps/x86_64/strcat.S
>
> diff --git a/ports/sysdeps/ia64/strcat.c b/ports/sysdeps/ia64/strcat.c
> deleted file mode 100644
> index 53cd4d1..0000000
> --- a/ports/sysdeps/ia64/strcat.c
> +++ /dev/null
> @@ -1,26 +0,0 @@
> -/* Copyright (C) 2004 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <string.h>
> -
> -char *
> -strcat (char *dest, const char *src)
> -{
> - strcpy (dest + strlen (dest), src);
> - return dest;
> -}
> -libc_hidden_builtin_def (strcat)
> diff --git a/string/strcat.c b/string/strcat.c
> index f9e4bc6..28575d0 100644
> --- a/string/strcat.c
> +++ b/string/strcat.c
> @@ -1,4 +1,5 @@
> -/* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
> +/* strcat version that uses fast strcpy/strlen.
> + Copyright (C) 1997, 2003 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> The GNU C Library is free software; you can redistribute it and/or
> @@ -16,36 +17,14 @@
> <http://www.gnu.org/licenses/>. */
>
> #include <string.h>
> -#include <memcopy.h>
>
> #undef strcat
>
> /* Append SRC on the end of DEST. */
> char *
> -strcat (dest, src)
> - char *dest;
> - const char *src;
> +strcat (char *dest, const char *src)
> {
> - char *s1 = dest;
> - const char *s2 = src;
> - char c;
> -
> - /* Find the end of the string. */
> - do
> - c = *s1++;
> - while (c != '\0');
> -
> - /* Make S1 point before the next character, so we can increment
> - it while memory is read (wins on pipelined cpus). */
> - s1 -= 2;
> -
> - do
> - {
> - c = *s2++;
> - *++s1 = c;
> - }
> - while (c != '\0');
> -
> + strcpy (dest + strlen (dest), src);
> return dest;
> }
> libc_hidden_builtin_def (strcat)
> diff --git a/string/strncat.c b/string/strncat.c
> index dcfb04d..17b4c9a 100644
> --- a/string/strncat.c
> +++ b/string/strncat.c
> @@ -1,4 +1,4 @@
> -/* Copyright (C) 1991,1997,2011 Free Software Foundation, Inc.
> +/* Copyright (C) 1991-2012 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> The GNU C Library is free software; you can redistribute it and/or
> @@ -17,66 +17,20 @@
>
> #include <string.h>
>
> -#ifdef _LIBC
> -# include <memcopy.h>
> -#endif
> -
> #ifndef STRNCAT
> # undef strncat
> # define STRNCAT strncat
> #endif
>
> char *
> -STRNCAT (char *s1, const char *s2, size_t n)
> +STRNCAT (char *dest, const char *src, size_t n)
> {
> - char c;
> - char *s = s1;
> -
> - /* Find the end of S1. */
> - do
> - c = *s1++;
> - while (c != '\0');
> -
> - /* Make S1 point before next character, so we can increment
> - it while memory is read (wins on pipelined cpus). */
> - s1 -= 2;
> -
> - if (n >= 4)
> - {
> - size_t n4 = n >> 2;
> - do
> - {
> - c = *s2++;
> - *++s1 = c;
> - if (c == '\0')
> - return s;
> - c = *s2++;
> - *++s1 = c;
> - if (c == '\0')
> - return s;
> - c = *s2++;
> - *++s1 = c;
> - if (c == '\0')
> - return s;
> - c = *s2++;
> - *++s1 = c;
> - if (c == '\0')
> - return s;
> - } while (--n4 > 0);
> - n &= 3;
> - }
> -
> - while (n > 0)
> - {
> - c = *s2++;
> - *++s1 = c;
> - if (c == '\0')
> - return s;
> - n--;
> - }
> + size_t dest_len = strlen (dest);
> + size_t src_len = strnlen (src , n);
>
> - if (c != '\0')
> - *++s1 = '\0';
> + if (src_len == n)
> + {
> + memcpy (dest + dest_len, src, n);
> + dest[dest_len + n] = '\0';
> + }
> + else
> + strcpy (dest + dest_len, src);
>
> - return s;
> + return dest;
> }
> diff --git a/sysdeps/i386/i486/strcat.S b/sysdeps/i386/i486/strcat.S
> deleted file mode 100644
> index 7596a0d..0000000
> --- a/sysdeps/i386/i486/strcat.S
> +++ /dev/null
> @@ -1,273 +0,0 @@
> -/* strcat(dest, src) -- Append SRC on the end of DEST.
> - For Intel 80x86, x>=4.
> - Copyright (C) 1994-1997,2000,2003,2005 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> - Contributed by Ulrich Drepper <drepper@ipd.info.uni-karlsruhe.de>.
> - Optimised a little by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <sysdep.h>
> -#include "asm-syntax.h"
> -#include "bp-sym.h"
> -#include "bp-asm.h"
> -
> -#define PARMS LINKAGE+4 /* space for 1 saved reg */
> -#define RTN PARMS
> -#define DEST RTN+RTN_SIZE
> -#define SRC DEST+PTR_SIZE
> -
> - .text
> -ENTRY (BP_SYM (strcat))
> - ENTER
> -
> - pushl %edi /* Save callee-safe register. */
> - cfi_adjust_cfa_offset (4)
> -
> - movl DEST(%esp), %edx
> - movl SRC(%esp), %ecx
> - CHECK_BOUNDS_LOW (%edx, DEST(%esp))
> - CHECK_BOUNDS_LOW (%ecx, SRC(%esp))
> -
> - testb $0xff, (%ecx) /* Is source string empty? */
> - jz L(8) /* yes => return */
> -
> - /* Test the first bytes separately until destination is aligned.
> */
> - testl $3, %edx /* destination pointer aligned? */
> - jz L(1) /* yes => begin scan loop */
> - testb $0xff, (%edx) /* is end of string? */
> - jz L(2) /* yes => start appending */
> - incl %edx /* increment source pointer */
> -
> - testl $3, %edx /* destination pointer aligned? */
> - jz L(1) /* yes => begin scan loop */
> - testb $0xff, (%edx) /* is end of string? */
> - jz L(2) /* yes => start appending */
> - incl %edx /* increment source pointer */
> -
> - testl $3, %edx /* destination pointer aligned? */
> - jz L(1) /* yes => begin scan loop */
> - testb $0xff, (%edx) /* is end of string? */
> - jz L(2) /* yes => start appending */
> - incl %edx /* increment source pointer */
> -
> - /* Now we are aligned. Begin scan loop. */
> - jmp L(1)
> -
> - cfi_rel_offset (edi, 0)
> - ALIGN(4)
> -
> -L(4): addl $16,%edx /* increment destination pointer for round
> */
> -
> -L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
> - movl $0xfefefeff, %edi /* magic value */
> -
> - /* If you compare this with the algorithm in memchr.S you will
> - notice that here is an `xorl' statement missing. But you must
> - not forget that we are looking for C == 0 and `xorl $0, %eax'
> - is a no-op. */
> -
> - addl %eax, %edi /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> -
> - /* According to the algorithm we had to reverse the effect of the
> - XOR first and then test the overflow bits. But because the
> - following XOR would destroy the carry flag and it would (in a
> - representation with more than 32 bits) not alter then last
> - overflow, we can now test this condition. If no carry is
> signaled
> - no overflow must have occurred in the last byte => it was 0. */
> - jnc L(3)
> -
> - /* We are only interested in carry bits that change due to the
> - previous add, so remove original bits */
> - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask)
> */
> -
> - /* Now test for the other three overflow bits. */
> - orl $0xfefefeff, %edi /* set all non-carry bits */
> - incl %edi /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> -
> - /* If at least one byte of the word is C we don't get 0 in %ecx.
> */
> - jnz L(3)
> -
> - movl 4(%edx), %eax /* get word from source */
> - movl $0xfefefeff, %edi /* magic value */
> - addl %eax, %edi /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc L(5) /* highest byte is C => stop copying */
> - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask)
> */
> - orl $0xfefefeff, %edi /* set all non-carry bits */
> - incl %edi /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jnz L(5) /* one byte is NUL => stop copying */
> -
> - movl 8(%edx), %eax /* get word from source */
> - movl $0xfefefeff, %edi /* magic value */
> - addl %eax, %edi /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc L(6) /* highest byte is C => stop copying */
> - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask)
> */
> - orl $0xfefefeff, %edi /* set all non-carry bits */
> - incl %edi /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jnz L(6) /* one byte is NUL => stop copying */
> -
> - movl 12(%edx), %eax /* get word from source */
> - movl $0xfefefeff, %edi /* magic value */
> - addl %eax, %edi /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc L(7) /* highest byte is C => stop copying */
> - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask)
> */
> - orl $0xfefefeff, %edi /* set all non-carry bits */
> - incl %edi /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jz L(4) /* no byte is NUL => carry on copying */
> -
> -L(7): addl $4, %edx /* adjust source pointer */
> -L(6): addl $4, %edx
> -L(5): addl $4, %edx
> -
> -L(3): testb %al, %al /* is first byte NUL? */
> - jz L(2) /* yes => start copying */
> - incl %edx /* increment source pointer */
> -
> - testb %ah, %ah /* is second byte NUL? */
> - jz L(2) /* yes => start copying */
> - incl %edx /* increment source pointer */
> -
> - testl $0xff0000, %eax /* is third byte NUL? */
> - jz L(2) /* yes => start copying */
> - incl %edx /* increment source pointer */
> -
> -L(2): subl %ecx, %edx /* reduce number of loop variants */
> -
> - /* Now we have to align the source pointer. */
> - testl $3, %ecx /* pointer correctly aligned? */
> - jz L(29) /* yes => start copy loop */
> - movb (%ecx), %al /* get first byte */
> - movb %al, (%ecx,%edx) /* and store it */
> - andb %al, %al /* is byte NUL? */
> - jz L(8) /* yes => return */
> - incl %ecx /* increment pointer */
> -
> - testl $3, %ecx /* pointer correctly aligned? */
> - jz L(29) /* yes => start copy loop */
> - movb (%ecx), %al /* get first byte */
> - movb %al, (%ecx,%edx) /* and store it */
> - andb %al, %al /* is byte NUL? */
> - jz L(8) /* yes => return */
> - incl %ecx /* increment pointer */
> -
> - testl $3, %ecx /* pointer correctly aligned? */
> - jz L(29) /* yes => start copy loop */
> - movb (%ecx), %al /* get first byte */
> - movb %al, (%ecx,%edx) /* and store it */
> - andb %al, %al /* is byte NUL? */
> - jz L(8) /* yes => return */
> - incl %ecx /* increment pointer */
> -
> - /* Now we are aligned. */
> - jmp L(29) /* start copy loop */
> -
> - ALIGN(4)
> -
> -L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */
> - addl $16, %ecx /* adjust pointer for full round */
> -
> -L(29): movl (%ecx), %eax /* get word from source */
> - movl $0xfefefeff, %edi /* magic value */
> - addl %eax, %edi /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc L(9) /* highest byte is C => stop copying */
> - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask)
> */
> - orl $0xfefefeff, %edi /* set all non-carry bits */
> - incl %edi /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jnz L(9) /* one byte is NUL => stop copying */
> - movl %eax, (%ecx,%edx) /* store word to destination */
> -
> - movl 4(%ecx), %eax /* get word from source */
> - movl $0xfefefeff, %edi /* magic value */
> - addl %eax, %edi /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc L(91) /* highest byte is C => stop copying */
> - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask)
> */
> - orl $0xfefefeff, %edi /* set all non-carry bits */
> - incl %edi /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jnz L(91) /* one byte is NUL => stop copying */
> - movl %eax, 4(%ecx,%edx) /* store word to destination */
> -
> - movl 8(%ecx), %eax /* get word from source */
> - movl $0xfefefeff, %edi /* magic value */
> - addl %eax, %edi /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc L(92) /* highest byte is C => stop copying */
> - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask)
> */
> - orl $0xfefefeff, %edi /* set all non-carry bits */
> - incl %edi /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jnz L(92) /* one byte is NUL => stop copying */
> - movl %eax, 8(%ecx,%edx) /* store word to destination */
> -
> - movl 12(%ecx), %eax /* get word from source */
> - movl $0xfefefeff, %edi /* magic value */
> - addl %eax, %edi /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc L(93) /* highest byte is C => stop copying */
> - xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask)
> */
> - orl $0xfefefeff, %edi /* set all non-carry bits */
> - incl %edi /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jz L(28) /* no is NUL => carry on copying */
> -
> -L(93): addl $4, %ecx /* adjust pointer */
> -L(92): addl $4, %ecx
> -L(91): addl $4, %ecx
> -
> -L(9): movb %al, (%ecx,%edx) /* store first byte of last word */
> - orb %al, %al /* is it NUL? */
> - jz L(8) /* yes => return */
> -
> - movb %ah, 1(%ecx,%edx) /* store second byte of last word */
> - orb %ah, %ah /* is it NUL? */
> - jz L(8) /* yes => return */
> -
> - shrl $16, %eax /* make upper bytes accessible */
> - movb %al, 2(%ecx,%edx) /* store third byte of last word */
> - orb %al, %al /* is it NUL? */
> - jz L(8) /* yes => return */
> -
> - movb %ah, 3(%ecx,%edx) /* store fourth byte of last word */
> -
> -L(8): /* GKM FIXME: check high bounds */
> - movl DEST(%esp), %eax /* start address of destination is result
> */
> - RETURN_BOUNDED_POINTER (DEST(%esp))
> - popl %edi /* restore saved register */
> - cfi_adjust_cfa_offset (-4)
> - cfi_restore (edi)
> -
> - LEAVE
> - RET_PTR
> -END (BP_SYM (strcat))
> -libc_hidden_builtin_def (strcat)
> diff --git a/sysdeps/i386/i686/multiarch/Makefile
> b/sysdeps/i386/i686/multiarch/Makefile
> index 8946bfa..92a2b8f 100644
> --- a/sysdeps/i386/i686/multiarch/Makefile
> +++ b/sysdeps/i386/i686/multiarch/Makefile
> @@ -14,8 +14,7 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3
> mempcpy-ssse3 \
> memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
> strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \
> strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
> - strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
> - strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
> + strncpy-sse2 stpcpy-sse2 stpncpy-sse2 \
> strchr-sse2 strrchr-sse2 strchr-sse2-bsf
> strrchr-sse2-bsf \
> memchr-sse2 memchr-sse2-bsf \
> memrchr-sse2 memrchr-sse2-bsf memrchr-c \
> diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S
> b/sysdeps/i386/i686/multiarch/strcat-sse2.S
> deleted file mode 100644
> index e75f92c..0000000
> --- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
> +++ /dev/null
> @@ -1,1243 +0,0 @@
> -/* strcat with SSE2
> - Copyright (C) 2011-2012 Free Software Foundation, Inc.
> - Contributed by Intel Corporation.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -
> -#ifndef NOT_IN_libc
> -
> -# include <sysdep.h>
> -
> -
> -# define CFI_PUSH(REG) \
> - cfi_adjust_cfa_offset (4); \
> - cfi_rel_offset (REG, 0)
> -
> -# define CFI_POP(REG) \
> - cfi_adjust_cfa_offset (-4); \
> - cfi_restore (REG)
> -
> -# define PUSH(REG) pushl REG; CFI_PUSH (REG)
> -# define POP(REG) popl REG; CFI_POP (REG)
> -
> -# ifdef SHARED
> -# define JMPTBL(I, B) I - B
> -
> -/* Load an entry in a jump table into ECX and branch to it. TABLE is a
> - jump table with relative offsets. INDEX is a register contains
> the
> - index into the jump table. SCALE is the scale of INDEX. */
> -
> -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
> - /* We first load PC into ECX. */ \
> - SETUP_PIC_REG(cx); \
> - /* Get the address of the jump table. */ \
> - addl $(TABLE - .), %ecx; \
> - /* Get the entry and convert the relative offset to the \
> - absolute address. */ \
> - addl (%ecx,INDEX,SCALE), %ecx; \
> - /* We loaded the jump table and adjuested ECX. Go. */ \
> - jmp *%ecx
> -# else
> -# define JMPTBL(I, B) I
> -
> -/* Branch to an entry in a jump table. TABLE is a jump table with
> - absolute offsets. INDEX is a register contains the index into the
> - jump table. SCALE is the scale of INDEX. */
> -
> -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
> - jmp *TABLE(,INDEX,SCALE)
> -# endif
> -
> -# ifndef STRCAT
> -# define STRCAT __strcat_sse2
> -# endif
> -
> -# define PARMS 4
> -# define STR1 PARMS+4
> -# define STR2 STR1+4
> -
> -# ifdef USE_AS_STRNCAT
> -# define LEN STR2+8
> -# define STR3 STR1+4
> -# else
> -# define STR3 STR1
> -# endif
> -
> -# define USE_AS_STRCAT
> -# ifdef USE_AS_STRNCAT
> -# define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx);
> CFI_PUSH(%esi);
> -# else
> -# define RETURN POP(%esi); ret; CFI_PUSH(%esi);
> -# endif
> -
> -.text
> -ENTRY (STRCAT)
> - PUSH (%esi)
> - mov STR1(%esp), %eax
> - mov STR2(%esp), %esi
> -# ifdef USE_AS_STRNCAT
> - PUSH (%ebx)
> - movl LEN(%esp), %ebx
> - test %ebx, %ebx
> - jz L(ExitZero)
> -# endif
> - cmpb $0, (%esi)
> - mov %esi, %ecx
> - mov %eax, %edx
> - jz L(ExitZero)
> -
> - and $63, %ecx
> - and $63, %edx
> - cmp $32, %ecx
> - ja L(StrlenCore7_1)
> - cmp $48, %edx
> - ja L(alignment_prolog)
> -
> - pxor %xmm0, %xmm0
> - pxor %xmm4, %xmm4
> - pxor %xmm7, %xmm7
> - movdqu (%eax), %xmm1
> - movdqu (%esi), %xmm5
> - pcmpeqb %xmm1, %xmm0
> - movdqu 16(%esi), %xmm6
> - pmovmskb %xmm0, %ecx
> - pcmpeqb %xmm5, %xmm4
> - pcmpeqb %xmm6, %xmm7
> - test %ecx, %ecx
> - jnz L(exit_less16_)
> - mov %eax, %ecx
> - and $-16, %eax
> - jmp L(loop_prolog)
> -
> -L(alignment_prolog):
> - pxor %xmm0, %xmm0
> - pxor %xmm4, %xmm4
> - mov %edx, %ecx
> - pxor %xmm7, %xmm7
> - and $15, %ecx
> - and $-16, %eax
> - pcmpeqb (%eax), %xmm0
> - movdqu (%esi), %xmm5
> - movdqu 16(%esi), %xmm6
> - pmovmskb %xmm0, %edx
> - pcmpeqb %xmm5, %xmm4
> - shr %cl, %edx
> - pcmpeqb %xmm6, %xmm7
> - test %edx, %edx
> - jnz L(exit_less16)
> - add %eax, %ecx
> -
> - pxor %xmm0, %xmm0
> -L(loop_prolog):
> - pxor %xmm1, %xmm1
> - pxor %xmm2, %xmm2
> - pxor %xmm3, %xmm3
> - .p2align 4
> -L(align16_loop):
> - pcmpeqb 16(%eax), %xmm0
> - pmovmskb %xmm0, %edx
> - test %edx, %edx
> - jnz L(exit16)
> -
> - pcmpeqb 32(%eax), %xmm1
> - pmovmskb %xmm1, %edx
> - test %edx, %edx
> - jnz L(exit32)
> -
> - pcmpeqb 48(%eax), %xmm2
> - pmovmskb %xmm2, %edx
> - test %edx, %edx
> - jnz L(exit48)
> -
> - pcmpeqb 64(%eax), %xmm3
> - pmovmskb %xmm3, %edx
> - lea 64(%eax), %eax
> - test %edx, %edx
> - jz L(align16_loop)
> - bsf %edx, %edx
> - add %edx, %eax
> - jmp L(StartStrcpyPart)
> -
> - .p2align 4
> -L(exit16):
> - bsf %edx, %edx
> - lea 16(%eax, %edx), %eax
> - jmp L(StartStrcpyPart)
> -
> - .p2align 4
> -L(exit32):
> - bsf %edx, %edx
> - lea 32(%eax, %edx), %eax
> - jmp L(StartStrcpyPart)
> -
> - .p2align 4
> -L(exit48):
> - bsf %edx, %edx
> - lea 48(%eax, %edx), %eax
> - jmp L(StartStrcpyPart)
> -
> - .p2align 4
> -L(exit_less16):
> - bsf %edx, %edx
> - add %ecx, %eax
> - add %edx, %eax
> - jmp L(StartStrcpyPart)
> -
> - .p2align 4
> -L(exit_less16_):
> - bsf %ecx, %ecx
> - add %ecx, %eax
> -
> - .p2align 4
> -L(StartStrcpyPart):
> - pmovmskb %xmm4, %edx
> -# ifdef USE_AS_STRNCAT
> - cmp $16, %ebx
> - jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To16BytesTail1)
> -
> - movdqu %xmm5, (%eax)
> - pmovmskb %xmm7, %edx
> -# ifdef USE_AS_STRNCAT
> - cmp $32, %ebx
> - jbe L(CopyFrom1To32Bytes1Case2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To32Bytes1)
> -
> - mov %esi, %ecx
> - and $-16, %esi
> - and $15, %ecx
> - pxor %xmm0, %xmm0
> -# ifdef USE_AS_STRNCAT
> - add %ecx, %ebx
> -# endif
> - sub %ecx, %eax
> - jmp L(Unalign16Both)
> -
> -L(StrlenCore7_1):
> - mov %eax, %ecx
> - pxor %xmm0, %xmm0
> - and $15, %ecx
> - and $-16, %eax
> - pcmpeqb (%eax), %xmm0
> - pmovmskb %xmm0, %edx
> - shr %cl, %edx
> - test %edx, %edx
> - jnz L(exit_less16_1)
> - add %eax, %ecx
> -
> - pxor %xmm0, %xmm0
> - pxor %xmm1, %xmm1
> - pxor %xmm2, %xmm2
> - pxor %xmm3, %xmm3
> -
> - .p2align 4
> -L(align16_loop_1):
> - pcmpeqb 16(%eax), %xmm0
> - pmovmskb %xmm0, %edx
> - test %edx, %edx
> - jnz L(exit16_1)
> -
> - pcmpeqb 32(%eax), %xmm1
> - pmovmskb %xmm1, %edx
> - test %edx, %edx
> - jnz L(exit32_1)
> -
> - pcmpeqb 48(%eax), %xmm2
> - pmovmskb %xmm2, %edx
> - test %edx, %edx
> - jnz L(exit48_1)
> -
> - pcmpeqb 64(%eax), %xmm3
> - pmovmskb %xmm3, %edx
> - lea 64(%eax), %eax
> - test %edx, %edx
> - jz L(align16_loop_1)
> - bsf %edx, %edx
> - add %edx, %eax
> - jmp L(StartStrcpyPart_1)
> -
> - .p2align 4
> -L(exit16_1):
> - bsf %edx, %edx
> - lea 16(%eax, %edx), %eax
> - jmp L(StartStrcpyPart_1)
> -
> - .p2align 4
> -L(exit32_1):
> - bsf %edx, %edx
> - lea 32(%eax, %edx), %eax
> - jmp L(StartStrcpyPart_1)
> -
> - .p2align 4
> -L(exit48_1):
> - bsf %edx, %edx
> - lea 48(%eax, %edx), %eax
> - jmp L(StartStrcpyPart_1)
> -
> - .p2align 4
> -L(exit_less16_1):
> - bsf %edx, %edx
> - add %ecx, %eax
> - add %edx, %eax
> -
> - .p2align 4
> -L(StartStrcpyPart_1):
> - mov %esi, %ecx
> - and $15, %ecx
> - and $-16, %esi
> - pxor %xmm0, %xmm0
> - pxor %xmm1, %xmm1
> -
> -# ifdef USE_AS_STRNCAT
> - cmp $48, %ebx
> - ja L(BigN)
> -# endif
> - pcmpeqb (%esi), %xmm1
> -# ifdef USE_AS_STRNCAT
> - add %ecx, %ebx
> -# endif
> - pmovmskb %xmm1, %edx
> - shr %cl, %edx
> -# ifdef USE_AS_STRNCAT
> - cmp $16, %ebx
> - jbe L(CopyFrom1To16BytesTailCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To16BytesTail)
> -
> - pcmpeqb 16(%esi), %xmm0
> - pmovmskb %xmm0, %edx
> -# ifdef USE_AS_STRNCAT
> - cmp $32, %ebx
> - jbe L(CopyFrom1To32BytesCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To32Bytes)
> -
> - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
> - movdqu %xmm1, (%eax)
> - sub %ecx, %eax
> -
> - .p2align 4
> -L(Unalign16Both):
> - mov $16, %ecx
> - movdqa (%esi, %ecx), %xmm1
> - movaps 16(%esi, %ecx), %xmm2
> - movdqu %xmm1, (%eax, %ecx)
> - pcmpeqb %xmm2, %xmm0
> - pmovmskb %xmm0, %edx
> - add $16, %ecx
> -# ifdef USE_AS_STRNCAT
> - sub $48, %ebx
> - jbe L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> -L(Unalign16BothBigN):
> - movaps 16(%esi, %ecx), %xmm3
> - movdqu %xmm2, (%eax, %ecx)
> - pcmpeqb %xmm3, %xmm0
> - pmovmskb %xmm0, %edx
> - add $16, %ecx
> -# ifdef USE_AS_STRNCAT
> - sub $16, %ebx
> - jbe L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> -
> - movaps 16(%esi, %ecx), %xmm4
> - movdqu %xmm3, (%eax, %ecx)
> - pcmpeqb %xmm4, %xmm0
> - pmovmskb %xmm0, %edx
> - add $16, %ecx
> -# ifdef USE_AS_STRNCAT
> - sub $16, %ebx
> - jbe L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> -
> - movaps 16(%esi, %ecx), %xmm1
> - movdqu %xmm4, (%eax, %ecx)
> - pcmpeqb %xmm1, %xmm0
> - pmovmskb %xmm0, %edx
> - add $16, %ecx
> -# ifdef USE_AS_STRNCAT
> - sub $16, %ebx
> - jbe L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> -
> - movaps 16(%esi, %ecx), %xmm2
> - movdqu %xmm1, (%eax, %ecx)
> - pcmpeqb %xmm2, %xmm0
> - pmovmskb %xmm0, %edx
> - add $16, %ecx
> -# ifdef USE_AS_STRNCAT
> - sub $16, %ebx
> - jbe L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> -
> - movaps 16(%esi, %ecx), %xmm3
> - movdqu %xmm2, (%eax, %ecx)
> - pcmpeqb %xmm3, %xmm0
> - pmovmskb %xmm0, %edx
> - add $16, %ecx
> -# ifdef USE_AS_STRNCAT
> - sub $16, %ebx
> - jbe L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> -
> - movdqu %xmm3, (%eax, %ecx)
> - mov %esi, %edx
> - lea 16(%esi, %ecx), %esi
> - and $-0x40, %esi
> - sub %esi, %edx
> - sub %edx, %eax
> -# ifdef USE_AS_STRNCAT
> - lea 128(%ebx, %edx), %ebx
> -# endif
> - movaps (%esi), %xmm2
> - movaps %xmm2, %xmm4
> - movaps 16(%esi), %xmm5
> - movaps 32(%esi), %xmm3
> - movaps %xmm3, %xmm6
> - movaps 48(%esi), %xmm7
> - pminub %xmm5, %xmm2
> - pminub %xmm7, %xmm3
> - pminub %xmm2, %xmm3
> - pcmpeqb %xmm0, %xmm3
> - pmovmskb %xmm3, %edx
> -# ifdef USE_AS_STRNCAT
> - sub $64, %ebx
> - jbe L(UnalignedLeaveCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jnz L(Unaligned64Leave)
> -
> - .p2align 4
> -L(Unaligned64Loop_start):
> - add $64, %eax
> - add $64, %esi
> - movdqu %xmm4, -64(%eax)
> - movaps (%esi), %xmm2
> - movdqa %xmm2, %xmm4
> - movdqu %xmm5, -48(%eax)
> - movaps 16(%esi), %xmm5
> - pminub %xmm5, %xmm2
> - movaps 32(%esi), %xmm3
> - movdqu %xmm6, -32(%eax)
> - movaps %xmm3, %xmm6
> - movdqu %xmm7, -16(%eax)
> - movaps 48(%esi), %xmm7
> - pminub %xmm7, %xmm3
> - pminub %xmm2, %xmm3
> - pcmpeqb %xmm0, %xmm3
> - pmovmskb %xmm3, %edx
> -# ifdef USE_AS_STRNCAT
> - sub $64, %ebx
> - jbe L(UnalignedLeaveCase2OrCase3)
> -# endif
> - test %edx, %edx
> - jz L(Unaligned64Loop_start)
> -
> -L(Unaligned64Leave):
> - pxor %xmm1, %xmm1
> -
> - pcmpeqb %xmm4, %xmm0
> - pcmpeqb %xmm5, %xmm1
> - pmovmskb %xmm0, %edx
> - pmovmskb %xmm1, %ecx
> - test %edx, %edx
> - jnz L(CopyFrom1To16BytesUnaligned_0)
> - test %ecx, %ecx
> - jnz L(CopyFrom1To16BytesUnaligned_16)
> -
> - pcmpeqb %xmm6, %xmm0
> - pcmpeqb %xmm7, %xmm1
> - pmovmskb %xmm0, %edx
> - pmovmskb %xmm1, %ecx
> - test %edx, %edx
> - jnz L(CopyFrom1To16BytesUnaligned_32)
> -
> - bsf %ecx, %edx
> - movdqu %xmm4, (%eax)
> - movdqu %xmm5, 16(%eax)
> - movdqu %xmm6, 32(%eax)
> - add $48, %esi
> - add $48, %eax
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
> -
> -# ifdef USE_AS_STRNCAT
> - .p2align 4
> -L(BigN):
> - pcmpeqb (%esi), %xmm1
> - pmovmskb %xmm1, %edx
> - shr %cl, %edx
> - test %edx, %edx
> - jnz L(CopyFrom1To16BytesTail)
> -
> - pcmpeqb 16(%esi), %xmm0
> - pmovmskb %xmm0, %edx
> - test %edx, %edx
> - jnz L(CopyFrom1To32Bytes)
> -
> - movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
> - movdqu %xmm1, (%eax)
> - sub %ecx, %eax
> - sub $48, %ebx
> - add %ecx, %ebx
> -
> - mov $16, %ecx
> - movdqa (%esi, %ecx), %xmm1
> - movaps 16(%esi, %ecx), %xmm2
> - movdqu %xmm1, (%eax, %ecx)
> - pcmpeqb %xmm2, %xmm0
> - pmovmskb %xmm0, %edx
> - add $16, %ecx
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> - jmp L(Unalign16BothBigN)
> -# endif
> -
> -/*------------end of main part-------------------------------*/
> -
> -/* Case1 */
> - .p2align 4
> -L(CopyFrom1To16Bytes):
> - add %ecx, %eax
> - add %ecx, %esi
> - bsf %edx, %edx
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To16BytesTail):
> - add %ecx, %esi
> - bsf %edx, %edx
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To32Bytes1):
> - add $16, %esi
> - add $16, %eax
> -L(CopyFrom1To16BytesTail1):
> - bsf %edx, %edx
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To32Bytes):
> - bsf %edx, %edx
> - add %ecx, %esi
> - add $16, %edx
> - sub %ecx, %edx
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To16BytesUnaligned_0):
> - bsf %edx, %edx
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To16BytesUnaligned_16):
> - bsf %ecx, %edx
> - movdqu %xmm4, (%eax)
> - add $16, %esi
> - add $16, %eax
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To16BytesUnaligned_32):
> - bsf %edx, %edx
> - movdqu %xmm4, (%eax)
> - movdqu %xmm5, 16(%eax)
> - add $32, %esi
> - add $32, %eax
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
> -
> -# ifdef USE_AS_STRNCAT
> -
> - .p2align 4
> -L(CopyFrom1To16BytesExit):
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
> -
> -/* Case2 */
> -
> - .p2align 4
> -L(CopyFrom1To16BytesCase2):
> - add $16, %ebx
> - add %ecx, %eax
> - add %ecx, %esi
> - bsf %edx, %edx
> - cmp %ebx, %edx
> - jb L(CopyFrom1To16BytesExit)
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To32BytesCase2):
> - sub %ecx, %ebx
> - add %ecx, %esi
> - bsf %edx, %edx
> - add $16, %edx
> - sub %ecx, %edx
> - cmp %ebx, %edx
> - jb L(CopyFrom1To16BytesExit)
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
> -
> -L(CopyFrom1To16BytesTailCase2):
> - sub %ecx, %ebx
> - add %ecx, %esi
> - bsf %edx, %edx
> - cmp %ebx, %edx
> - jb L(CopyFrom1To16BytesExit)
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
> -
> -L(CopyFrom1To16BytesTail1Case2):
> - bsf %edx, %edx
> - cmp %ebx, %edx
> - jb L(CopyFrom1To16BytesExit)
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
> -
> -/* Case2 or Case3, Case3 */
> -
> - .p2align 4
> -L(CopyFrom1To16BytesCase2OrCase3):
> - test %edx, %edx
> - jnz L(CopyFrom1To16BytesCase2)
> -L(CopyFrom1To16BytesCase3):
> - add $16, %ebx
> - add %ecx, %eax
> - add %ecx, %esi
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To32BytesCase2OrCase3):
> - test %edx, %edx
> - jnz L(CopyFrom1To32BytesCase2)
> - sub %ecx, %ebx
> - add %ecx, %esi
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To16BytesTailCase2OrCase3):
> - test %edx, %edx
> - jnz L(CopyFrom1To16BytesTailCase2)
> - sub %ecx, %ebx
> - add %ecx, %esi
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
> -
> - .p2align 4
> -L(CopyFrom1To32Bytes1Case2OrCase3):
> - add $16, %eax
> - add $16, %esi
> - sub $16, %ebx
> -L(CopyFrom1To16BytesTail1Case2OrCase3):
> - test %edx, %edx
> - jnz L(CopyFrom1To16BytesTail1Case2)
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
> -
> -# endif
> -
> -# ifdef USE_AS_STRNCAT
> - .p2align 4
> -L(StrncatExit0):
> - movb %bh, (%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -# endif
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit1):
> - movb %bh, 1(%eax)
> -# endif
> -L(Exit1):
> -# ifdef USE_AS_STRNCAT
> - movb (%esi), %dh
> -# endif
> - movb %dh, (%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit2):
> - movb %bh, 2(%eax)
> -# endif
> -L(Exit2):
> - movw (%esi), %dx
> - movw %dx, (%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit3):
> - movb %bh, 3(%eax)
> -# endif
> -L(Exit3):
> - movw (%esi), %cx
> - movw %cx, (%eax)
> -# ifdef USE_AS_STRNCAT
> - movb 2(%esi), %dh
> -# endif
> - movb %dh, 2(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit4):
> - movb %bh, 4(%eax)
> -# endif
> -L(Exit4):
> - movl (%esi), %edx
> - movl %edx, (%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit5):
> - movb %bh, 5(%eax)
> -# endif
> -L(Exit5):
> - movl (%esi), %ecx
> -# ifdef USE_AS_STRNCAT
> - movb 4(%esi), %dh
> -# endif
> - movb %dh, 4(%eax)
> - movl %ecx, (%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit6):
> - movb %bh, 6(%eax)
> -# endif
> -L(Exit6):
> - movl (%esi), %ecx
> - movw 4(%esi), %dx
> - movl %ecx, (%eax)
> - movw %dx, 4(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit7):
> - movb %bh, 7(%eax)
> -# endif
> -L(Exit7):
> - movl (%esi), %ecx
> - movl 3(%esi), %edx
> - movl %ecx, (%eax)
> - movl %edx, 3(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit8):
> - movb %bh, 8(%eax)
> -# endif
> -L(Exit8):
> - movlpd (%esi), %xmm0
> - movlpd %xmm0, (%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit9):
> - movb %bh, 9(%eax)
> -# endif
> -L(Exit9):
> - movlpd (%esi), %xmm0
> -# ifdef USE_AS_STRNCAT
> - movb 8(%esi), %dh
> -# endif
> - movb %dh, 8(%eax)
> - movlpd %xmm0, (%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit10):
> - movb %bh, 10(%eax)
> -# endif
> -L(Exit10):
> - movlpd (%esi), %xmm0
> - movw 8(%esi), %dx
> - movlpd %xmm0, (%eax)
> - movw %dx, 8(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit11):
> - movb %bh, 11(%eax)
> -# endif
> -L(Exit11):
> - movlpd (%esi), %xmm0
> - movl 7(%esi), %edx
> - movlpd %xmm0, (%eax)
> - movl %edx, 7(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit12):
> - movb %bh, 12(%eax)
> -# endif
> -L(Exit12):
> - movlpd (%esi), %xmm0
> - movl 8(%esi), %edx
> - movlpd %xmm0, (%eax)
> - movl %edx, 8(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit13):
> - movb %bh, 13(%eax)
> -# endif
> -L(Exit13):
> - movlpd (%esi), %xmm0
> - movlpd 5(%esi), %xmm1
> - movlpd %xmm0, (%eax)
> - movlpd %xmm1, 5(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit14):
> - movb %bh, 14(%eax)
> -# endif
> -L(Exit14):
> - movlpd (%esi), %xmm0
> - movlpd 6(%esi), %xmm1
> - movlpd %xmm0, (%eax)
> - movlpd %xmm1, 6(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit15):
> - movb %bh, 15(%eax)
> -# endif
> -L(Exit15):
> - movlpd (%esi), %xmm0
> - movlpd 7(%esi), %xmm1
> - movlpd %xmm0, (%eax)
> - movlpd %xmm1, 7(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit16):
> - movb %bh, 16(%eax)
> -# endif
> -L(Exit16):
> - movdqu (%esi), %xmm0
> - movdqu %xmm0, (%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit17):
> - movb %bh, 17(%eax)
> -# endif
> -L(Exit17):
> - movdqu (%esi), %xmm0
> -# ifdef USE_AS_STRNCAT
> - movb 16(%esi), %dh
> -# endif
> - movdqu %xmm0, (%eax)
> - movb %dh, 16(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit18):
> - movb %bh, 18(%eax)
> -# endif
> -L(Exit18):
> - movdqu (%esi), %xmm0
> - movw 16(%esi), %cx
> - movdqu %xmm0, (%eax)
> - movw %cx, 16(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit19):
> - movb %bh, 19(%eax)
> -# endif
> -L(Exit19):
> - movdqu (%esi), %xmm0
> - movl 15(%esi), %ecx
> - movdqu %xmm0, (%eax)
> - movl %ecx, 15(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit20):
> - movb %bh, 20(%eax)
> -# endif
> -L(Exit20):
> - movdqu (%esi), %xmm0
> - movl 16(%esi), %ecx
> - movdqu %xmm0, (%eax)
> - movl %ecx, 16(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit21):
> - movb %bh, 21(%eax)
> -# endif
> -L(Exit21):
> - movdqu (%esi), %xmm0
> - movl 16(%esi), %ecx
> -# ifdef USE_AS_STRNCAT
> - movb 20(%esi), %dh
> -# endif
> - movdqu %xmm0, (%eax)
> - movl %ecx, 16(%eax)
> - movb %dh, 20(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit22):
> - movb %bh, 22(%eax)
> -# endif
> -L(Exit22):
> - movdqu (%esi), %xmm0
> - movlpd 14(%esi), %xmm3
> - movdqu %xmm0, (%eax)
> - movlpd %xmm3, 14(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit23):
> - movb %bh, 23(%eax)
> -# endif
> -L(Exit23):
> - movdqu (%esi), %xmm0
> - movlpd 15(%esi), %xmm3
> - movdqu %xmm0, (%eax)
> - movlpd %xmm3, 15(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit24):
> - movb %bh, 24(%eax)
> -# endif
> -L(Exit24):
> - movdqu (%esi), %xmm0
> - movlpd 16(%esi), %xmm2
> - movdqu %xmm0, (%eax)
> - movlpd %xmm2, 16(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit25):
> - movb %bh, 25(%eax)
> -# endif
> -L(Exit25):
> - movdqu (%esi), %xmm0
> - movlpd 16(%esi), %xmm2
> -# ifdef USE_AS_STRNCAT
> - movb 24(%esi), %dh
> -# endif
> - movdqu %xmm0, (%eax)
> - movlpd %xmm2, 16(%eax)
> - movb %dh, 24(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit26):
> - movb %bh, 26(%eax)
> -# endif
> -L(Exit26):
> - movdqu (%esi), %xmm0
> - movlpd 16(%esi), %xmm2
> - movw 24(%esi), %cx
> - movdqu %xmm0, (%eax)
> - movlpd %xmm2, 16(%eax)
> - movw %cx, 24(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit27):
> - movb %bh, 27(%eax)
> -# endif
> -L(Exit27):
> - movdqu (%esi), %xmm0
> - movlpd 16(%esi), %xmm2
> - movl 23(%esi), %ecx
> - movdqu %xmm0, (%eax)
> - movlpd %xmm2, 16(%eax)
> - movl %ecx, 23(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit28):
> - movb %bh, 28(%eax)
> -# endif
> -L(Exit28):
> - movdqu (%esi), %xmm0
> - movlpd 16(%esi), %xmm2
> - movl 24(%esi), %ecx
> - movdqu %xmm0, (%eax)
> - movlpd %xmm2, 16(%eax)
> - movl %ecx, 24(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit29):
> - movb %bh, 29(%eax)
> -# endif
> -L(Exit29):
> - movdqu (%esi), %xmm0
> - movdqu 13(%esi), %xmm2
> - movdqu %xmm0, (%eax)
> - movdqu %xmm2, 13(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit30):
> - movb %bh, 30(%eax)
> -# endif
> -L(Exit30):
> - movdqu (%esi), %xmm0
> - movdqu 14(%esi), %xmm2
> - movdqu %xmm0, (%eax)
> - movdqu %xmm2, 14(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit31):
> - movb %bh, 31(%eax)
> -# endif
> -L(Exit31):
> - movdqu (%esi), %xmm0
> - movdqu 15(%esi), %xmm2
> - movdqu %xmm0, (%eax)
> - movdqu %xmm2, 15(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -# ifdef USE_AS_STRNCAT
> -L(StrncatExit32):
> - movb %bh, 32(%eax)
> -# endif
> -L(Exit32):
> - movdqu (%esi), %xmm0
> - movdqu 16(%esi), %xmm2
> - movdqu %xmm0, (%eax)
> - movdqu %xmm2, 16(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> -# ifdef USE_AS_STRNCAT
> -
> - .p2align 4
> -L(UnalignedLeaveCase2OrCase3):
> - test %edx, %edx
> - jnz L(Unaligned64LeaveCase2)
> -L(Unaligned64LeaveCase3):
> - lea 64(%ebx), %ecx
> - and $-16, %ecx
> - add $48, %ebx
> - jl L(CopyFrom1To16BytesCase3)
> - movdqu %xmm4, (%eax)
> - sub $16, %ebx
> - jb L(CopyFrom1To16BytesCase3)
> - movdqu %xmm5, 16(%eax)
> - sub $16, %ebx
> - jb L(CopyFrom1To16BytesCase3)
> - movdqu %xmm6, 32(%eax)
> - sub $16, %ebx
> - jb L(CopyFrom1To16BytesCase3)
> - movdqu %xmm7, 48(%eax)
> - xor %bh, %bh
> - movb %bh, 64(%eax)
> - mov STR3(%esp), %eax
> - RETURN
> -
> - .p2align 4
> -L(Unaligned64LeaveCase2):
> - xor %ecx, %ecx
> - pcmpeqb %xmm4, %xmm0
> - pmovmskb %xmm0, %edx
> - add $48, %ebx
> - jle L(CopyFrom1To16BytesCase2OrCase3)
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> -
> - pcmpeqb %xmm5, %xmm0
> - pmovmskb %xmm0, %edx
> - movdqu %xmm4, (%eax)
> - add $16, %ecx
> - sub $16, %ebx
> - jbe L(CopyFrom1To16BytesCase2OrCase3)
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> -
> - pcmpeqb %xmm6, %xmm0
> - pmovmskb %xmm0, %edx
> - movdqu %xmm5, 16(%eax)
> - add $16, %ecx
> - sub $16, %ebx
> - jbe L(CopyFrom1To16BytesCase2OrCase3)
> - test %edx, %edx
> - jnz L(CopyFrom1To16Bytes)
> -
> - pcmpeqb %xmm7, %xmm0
> - pmovmskb %xmm0, %edx
> - movdqu %xmm6, 32(%eax)
> - lea 16(%eax, %ecx), %eax
> - lea 16(%esi, %ecx), %esi
> - bsf %edx, %edx
> - cmp %ebx, %edx
> - jb L(CopyFrom1To16BytesExit)
> - BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
> -# endif
> - .p2align 4
> -L(ExitZero):
> - RETURN
> -
> -END (STRCAT)
> -
> - .p2align 4
> - .section .rodata
> -L(ExitTable):
> - .int JMPTBL(L(Exit1), L(ExitTable))
> - .int JMPTBL(L(Exit2), L(ExitTable))
> - .int JMPTBL(L(Exit3), L(ExitTable))
> - .int JMPTBL(L(Exit4), L(ExitTable))
> - .int JMPTBL(L(Exit5), L(ExitTable))
> - .int JMPTBL(L(Exit6), L(ExitTable))
> - .int JMPTBL(L(Exit7), L(ExitTable))
> - .int JMPTBL(L(Exit8), L(ExitTable))
> - .int JMPTBL(L(Exit9), L(ExitTable))
> - .int JMPTBL(L(Exit10), L(ExitTable))
> - .int JMPTBL(L(Exit11), L(ExitTable))
> - .int JMPTBL(L(Exit12), L(ExitTable))
> - .int JMPTBL(L(Exit13), L(ExitTable))
> - .int JMPTBL(L(Exit14), L(ExitTable))
> - .int JMPTBL(L(Exit15), L(ExitTable))
> - .int JMPTBL(L(Exit16), L(ExitTable))
> - .int JMPTBL(L(Exit17), L(ExitTable))
> - .int JMPTBL(L(Exit18), L(ExitTable))
> - .int JMPTBL(L(Exit19), L(ExitTable))
> - .int JMPTBL(L(Exit20), L(ExitTable))
> - .int JMPTBL(L(Exit21), L(ExitTable))
> - .int JMPTBL(L(Exit22), L(ExitTable))
> - .int JMPTBL(L(Exit23), L(ExitTable))
> - .int JMPTBL(L(Exit24), L(ExitTable))
> - .int JMPTBL(L(Exit25), L(ExitTable))
> - .int JMPTBL(L(Exit26), L(ExitTable))
> - .int JMPTBL(L(Exit27), L(ExitTable))
> - .int JMPTBL(L(Exit28), L(ExitTable))
> - .int JMPTBL(L(Exit29), L(ExitTable))
> - .int JMPTBL(L(Exit30), L(ExitTable))
> - .int JMPTBL(L(Exit31), L(ExitTable))
> - .int JMPTBL(L(Exit32), L(ExitTable))
> -# ifdef USE_AS_STRNCAT
> -L(ExitStrncatTable):
> - .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
> - .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))
> -# endif
> -#endif
> diff --git a/sysdeps/i386/i686/multiarch/strcat-ssse3.S
> b/sysdeps/i386/i686/multiarch/strcat-ssse3.S
> deleted file mode 100644
> index 72bc49c..0000000
> --- a/sysdeps/i386/i686/multiarch/strcat-ssse3.S
> +++ /dev/null
> @@ -1,572 +0,0 @@
> -/* strcat with SSSE3
> - Copyright (C) 2011 Free Software Foundation, Inc.
> - Contributed by Intel Corporation.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -
> -#ifndef NOT_IN_libc
> -
> -# include <sysdep.h>
> -
> -# define CFI_PUSH(REG) \
> - cfi_adjust_cfa_offset (4); \
> - cfi_rel_offset (REG, 0)
> -
> -# define CFI_POP(REG) \
> - cfi_adjust_cfa_offset (-4); \
> - cfi_restore (REG)
> -
> -# define PUSH(REG) pushl REG; CFI_PUSH (REG)
> -# define POP(REG) popl REG; CFI_POP (REG)
> -
> -# ifndef STRCAT
> -# define STRCAT __strcat_ssse3
> -# endif
> -
> -# define PARMS 4
> -# define STR1 PARMS+4
> -# define STR2 STR1+4
> -
> -# ifdef USE_AS_STRNCAT
> -# define LEN STR2+8
> -# endif
> -
> -# define USE_AS_STRCAT
> -
> -.text
> -ENTRY (STRCAT)
> - PUSH (%edi)
> - mov STR1(%esp), %edi
> - mov %edi, %edx
> -
> -# define RETURN jmp L(StartStrcpyPart)
> -# include "strlen-sse2.S"
> -
> -L(StartStrcpyPart):
> - mov STR2(%esp), %ecx
> - lea (%edi, %eax), %edx
> -# ifdef USE_AS_STRNCAT
> - PUSH (%ebx)
> - mov LEN(%esp), %ebx
> - test %ebx, %ebx
> - jz L(StrncatExit0)
> - cmp $8, %ebx
> - jbe L(StrncatExit8Bytes)
> -# endif
> - cmpb $0, (%ecx)
> - jz L(Exit1)
> - cmpb $0, 1(%ecx)
> - jz L(Exit2)
> - cmpb $0, 2(%ecx)
> - jz L(Exit3)
> - cmpb $0, 3(%ecx)
> - jz L(Exit4)
> - cmpb $0, 4(%ecx)
> - jz L(Exit5)
> - cmpb $0, 5(%ecx)
> - jz L(Exit6)
> - cmpb $0, 6(%ecx)
> - jz L(Exit7)
> - cmpb $0, 7(%ecx)
> - jz L(Exit8)
> - cmpb $0, 8(%ecx)
> - jz L(Exit9)
> -# ifdef USE_AS_STRNCAT
> - cmp $16, %ebx
> - jb L(StrncatExit15Bytes)
> -# endif
> - cmpb $0, 9(%ecx)
> - jz L(Exit10)
> - cmpb $0, 10(%ecx)
> - jz L(Exit11)
> - cmpb $0, 11(%ecx)
> - jz L(Exit12)
> - cmpb $0, 12(%ecx)
> - jz L(Exit13)
> - cmpb $0, 13(%ecx)
> - jz L(Exit14)
> - cmpb $0, 14(%ecx)
> - jz L(Exit15)
> - cmpb $0, 15(%ecx)
> - jz L(Exit16)
> -# ifdef USE_AS_STRNCAT
> - cmp $16, %ebx
> - je L(StrncatExit16)
> -
> -# define RETURN1 \
> - POP (%ebx); \
> - POP (%edi); \
> - ret; \
> - CFI_PUSH (%ebx); \
> - CFI_PUSH (%edi)
> -# define USE_AS_STRNCPY
> -# else
> -# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
> -# endif
> -# include "strcpy-ssse3.S"
> - .p2align 4
> -L(CopyFrom1To16Bytes):
> - add %esi, %edx
> - add %esi, %ecx
> -
> - POP (%esi)
> - test %al, %al
> - jz L(ExitHigh)
> - test $0x01, %al
> - jnz L(Exit1)
> - test $0x02, %al
> - jnz L(Exit2)
> - test $0x04, %al
> - jnz L(Exit3)
> - test $0x08, %al
> - jnz L(Exit4)
> - test $0x10, %al
> - jnz L(Exit5)
> - test $0x20, %al
> - jnz L(Exit6)
> - test $0x40, %al
> - jnz L(Exit7)
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(ExitHigh):
> - test $0x01, %ah
> - jnz L(Exit9)
> - test $0x02, %ah
> - jnz L(Exit10)
> - test $0x04, %ah
> - jnz L(Exit11)
> - test $0x08, %ah
> - jnz L(Exit12)
> - test $0x10, %ah
> - jnz L(Exit13)
> - test $0x20, %ah
> - jnz L(Exit14)
> - test $0x40, %ah
> - jnz L(Exit15)
> - movlpd (%ecx), %xmm0
> - movlpd 8(%ecx), %xmm1
> - movlpd %xmm0, (%edx)
> - movlpd %xmm1, 8(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit1):
> - movb %bh, 1(%edx)
> -L(Exit1):
> - movb (%ecx), %al
> - movb %al, (%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit2):
> - movb %bh, 2(%edx)
> -L(Exit2):
> - movw (%ecx), %ax
> - movw %ax, (%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit3):
> - movb %bh, 3(%edx)
> -L(Exit3):
> - movw (%ecx), %ax
> - movw %ax, (%edx)
> - movb 2(%ecx), %al
> - movb %al, 2(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit4):
> - movb %bh, 4(%edx)
> -L(Exit4):
> - movl (%ecx), %eax
> - movl %eax, (%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit5):
> - movb %bh, 5(%edx)
> -L(Exit5):
> - movl (%ecx), %eax
> - movl %eax, (%edx)
> - movb 4(%ecx), %al
> - movb %al, 4(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit6):
> - movb %bh, 6(%edx)
> -L(Exit6):
> - movl (%ecx), %eax
> - movl %eax, (%edx)
> - movw 4(%ecx), %ax
> - movw %ax, 4(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit7):
> - movb %bh, 7(%edx)
> -L(Exit7):
> - movl (%ecx), %eax
> - movl %eax, (%edx)
> - movl 3(%ecx), %eax
> - movl %eax, 3(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit8):
> - movb %bh, 8(%edx)
> -L(Exit8):
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit9):
> - movb %bh, 9(%edx)
> -L(Exit9):
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movb 8(%ecx), %al
> - movb %al, 8(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit10):
> - movb %bh, 10(%edx)
> -L(Exit10):
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movw 8(%ecx), %ax
> - movw %ax, 8(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit11):
> - movb %bh, 11(%edx)
> -L(Exit11):
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movl 7(%ecx), %eax
> - movl %eax, 7(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit12):
> - movb %bh, 12(%edx)
> -L(Exit12):
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movl 8(%ecx), %eax
> - movl %eax, 8(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit13):
> - movb %bh, 13(%edx)
> -L(Exit13):
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movlpd 5(%ecx), %xmm0
> - movlpd %xmm0, 5(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit14):
> - movb %bh, 14(%edx)
> -L(Exit14):
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movlpd 6(%ecx), %xmm0
> - movlpd %xmm0, 6(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit15):
> - movb %bh, 15(%edx)
> -L(Exit15):
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movlpd 7(%ecx), %xmm0
> - movlpd %xmm0, 7(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit16):
> - movb %bh, 16(%edx)
> -L(Exit16):
> - movlpd (%ecx), %xmm0
> - movlpd 8(%ecx), %xmm1
> - movlpd %xmm0, (%edx)
> - movlpd %xmm1, 8(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> -# ifdef USE_AS_STRNCPY
> -
> - CFI_PUSH(%esi)
> -
> - .p2align 4
> -L(CopyFrom1To16BytesCase2):
> - add $16, %ebx
> - add %esi, %ecx
> - lea (%esi, %edx), %esi
> - lea -9(%ebx), %edx
> - and $1<<7, %dh
> - or %al, %dh
> - test %dh, %dh
> - lea (%esi), %edx
> - POP (%esi)
> - jz L(ExitHighCase2)
> -
> - test $0x01, %al
> - jnz L(Exit1)
> - cmp $1, %ebx
> - je L(StrncatExit1)
> - test $0x02, %al
> - jnz L(Exit2)
> - cmp $2, %ebx
> - je L(StrncatExit2)
> - test $0x04, %al
> - jnz L(Exit3)
> - cmp $3, %ebx
> - je L(StrncatExit3)
> - test $0x08, %al
> - jnz L(Exit4)
> - cmp $4, %ebx
> - je L(StrncatExit4)
> - test $0x10, %al
> - jnz L(Exit5)
> - cmp $5, %ebx
> - je L(StrncatExit5)
> - test $0x20, %al
> - jnz L(Exit6)
> - cmp $6, %ebx
> - je L(StrncatExit6)
> - test $0x40, %al
> - jnz L(Exit7)
> - cmp $7, %ebx
> - je L(StrncatExit7)
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - lea 7(%edx), %eax
> - cmpb $1, (%eax)
> - sbb $-1, %eax
> - xor %cl, %cl
> - movb %cl, (%eax)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(ExitHighCase2):
> - test $0x01, %ah
> - jnz L(Exit9)
> - cmp $9, %ebx
> - je L(StrncatExit9)
> - test $0x02, %ah
> - jnz L(Exit10)
> - cmp $10, %ebx
> - je L(StrncatExit10)
> - test $0x04, %ah
> - jnz L(Exit11)
> - cmp $11, %ebx
> - je L(StrncatExit11)
> - test $0x8, %ah
> - jnz L(Exit12)
> - cmp $12, %ebx
> - je L(StrncatExit12)
> - test $0x10, %ah
> - jnz L(Exit13)
> - cmp $13, %ebx
> - je L(StrncatExit13)
> - test $0x20, %ah
> - jnz L(Exit14)
> - cmp $14, %ebx
> - je L(StrncatExit14)
> - test $0x40, %ah
> - jnz L(Exit15)
> - cmp $15, %ebx
> - je L(StrncatExit15)
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movlpd 8(%ecx), %xmm1
> - movlpd %xmm1, 8(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - CFI_PUSH(%esi)
> -
> -L(CopyFrom1To16BytesCase2OrCase3):
> - test %eax, %eax
> - jnz L(CopyFrom1To16BytesCase2)
> -
> - .p2align 4
> -L(CopyFrom1To16BytesCase3):
> - add $16, %ebx
> - add %esi, %edx
> - add %esi, %ecx
> -
> - POP (%esi)
> -
> - cmp $8, %ebx
> - ja L(ExitHighCase3)
> - cmp $1, %ebx
> - je L(StrncatExit1)
> - cmp $2, %ebx
> - je L(StrncatExit2)
> - cmp $3, %ebx
> - je L(StrncatExit3)
> - cmp $4, %ebx
> - je L(StrncatExit4)
> - cmp $5, %ebx
> - je L(StrncatExit5)
> - cmp $6, %ebx
> - je L(StrncatExit6)
> - cmp $7, %ebx
> - je L(StrncatExit7)
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movb %bh, 8(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(ExitHighCase3):
> - cmp $9, %ebx
> - je L(StrncatExit9)
> - cmp $10, %ebx
> - je L(StrncatExit10)
> - cmp $11, %ebx
> - je L(StrncatExit11)
> - cmp $12, %ebx
> - je L(StrncatExit12)
> - cmp $13, %ebx
> - je L(StrncatExit13)
> - cmp $14, %ebx
> - je L(StrncatExit14)
> - cmp $15, %ebx
> - je L(StrncatExit15)
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movlpd 8(%ecx), %xmm1
> - movlpd %xmm1, 8(%edx)
> - movb %bh, 16(%edx)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit0):
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit15Bytes):
> - cmp $9, %ebx
> - je L(StrncatExit9)
> - cmpb $0, 9(%ecx)
> - jz L(Exit10)
> - cmp $10, %ebx
> - je L(StrncatExit10)
> - cmpb $0, 10(%ecx)
> - jz L(Exit11)
> - cmp $11, %ebx
> - je L(StrncatExit11)
> - cmpb $0, 11(%ecx)
> - jz L(Exit12)
> - cmp $12, %ebx
> - je L(StrncatExit12)
> - cmpb $0, 12(%ecx)
> - jz L(Exit13)
> - cmp $13, %ebx
> - je L(StrncatExit13)
> - cmpb $0, 13(%ecx)
> - jz L(Exit14)
> - cmp $14, %ebx
> - je L(StrncatExit14)
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - movlpd 7(%ecx), %xmm0
> - movlpd %xmm0, 7(%edx)
> - lea 14(%edx), %eax
> - cmpb $1, (%eax)
> - sbb $-1, %eax
> - movb %bh, (%eax)
> - movl %edi, %eax
> - RETURN1
> -
> - .p2align 4
> -L(StrncatExit8Bytes):
> - cmpb $0, (%ecx)
> - jz L(Exit1)
> - cmp $1, %ebx
> - je L(StrncatExit1)
> - cmpb $0, 1(%ecx)
> - jz L(Exit2)
> - cmp $2, %ebx
> - je L(StrncatExit2)
> - cmpb $0, 2(%ecx)
> - jz L(Exit3)
> - cmp $3, %ebx
> - je L(StrncatExit3)
> - cmpb $0, 3(%ecx)
> - jz L(Exit4)
> - cmp $4, %ebx
> - je L(StrncatExit4)
> - cmpb $0, 4(%ecx)
> - jz L(Exit5)
> - cmp $5, %ebx
> - je L(StrncatExit5)
> - cmpb $0, 5(%ecx)
> - jz L(Exit6)
> - cmp $6, %ebx
> - je L(StrncatExit6)
> - cmpb $0, 6(%ecx)
> - jz L(Exit7)
> - cmp $7, %ebx
> - je L(StrncatExit7)
> - movlpd (%ecx), %xmm0
> - movlpd %xmm0, (%edx)
> - lea 7(%edx), %eax
> - cmpb $1, (%eax)
> - sbb $-1, %eax
> - movb %bh, (%eax)
> - movl %edi, %eax
> - RETURN1
> -
> -# endif
> -END (STRCAT)
> -#endif
> diff --git a/sysdeps/i386/i686/multiarch/strcat.S
> b/sysdeps/i386/i686/multiarch/strcat.S
> deleted file mode 100644
> index e68feca..0000000
> --- a/sysdeps/i386/i686/multiarch/strcat.S
> +++ /dev/null
> @@ -1,119 +0,0 @@
> -/* Multiple versions of strcat
> - Copyright (C) 2011-2012 Free Software Foundation, Inc.
> - Contributed by Intel Corporation.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <sysdep.h>
> -#include <init-arch.h>
> -
> -#ifndef USE_AS_STRNCAT
> -# ifndef STRCAT
> -# define STRCAT strcat
> -# endif
> -#endif
> -
> -#ifdef USE_AS_STRNCAT
> -# define STRCAT_SSSE3 __strncat_ssse3
> -# define STRCAT_SSE2 __strncat_sse2
> -# define STRCAT_IA32 __strncat_ia32
> -# define __GI_STRCAT __GI_strncat
> -#else
> -# define STRCAT_SSSE3 __strcat_ssse3
> -# define STRCAT_SSE2 __strcat_sse2
> -# define STRCAT_IA32 __strcat_ia32
> -# define __GI_STRCAT __GI_strcat
> -#endif
> -
> -
> -/* Define multiple versions only for the definition in libc. Don't
> - define multiple versions for strncat in static library since we
> - need strncat before the initialization happened. */
> -#ifndef NOT_IN_libc
> -
> -# ifdef SHARED
> - .text
> -ENTRY(STRCAT)
> - .type STRCAT, @gnu_indirect_function
> - pushl %ebx
> - cfi_adjust_cfa_offset (4)
> - cfi_rel_offset (ebx, 0)
> - LOAD_PIC_REG(bx)
> - cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
> - jne 1f
> - call __init_cpu_features
> -1: leal STRCAT_IA32@GOTOFF(%ebx), %eax
> - testl $bit_SSE2,
> CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
> - jz 2f
> - leal STRCAT_SSE2@GOTOFF(%ebx), %eax
> - testl $bit_Fast_Unaligned_Load,
> FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
> - jnz 2f
> - testl $bit_SSSE3,
> CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
> - jz 2f
> - leal STRCAT_SSSE3@GOTOFF(%ebx), %eax
> -2: popl %ebx
> - cfi_adjust_cfa_offset (-4)
> - cfi_restore (ebx)
> - ret
> -END(STRCAT)
> -# else
> -
> -ENTRY(STRCAT)
> - .type STRCAT, @gnu_indirect_function
> - cmpl $0, KIND_OFFSET+__cpu_features
> - jne 1f
> - call __init_cpu_features
> -1: leal STRCAT_IA32, %eax
> - testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
> - jz 2f
> - leal STRCAT_SSE2, %eax
> - testl $bit_Fast_Unaligned_Load,
> FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
> - jnz 2f
> - testl $bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
> - jz 2f
> - leal STRCAT_SSSE3, %eax
> -2: ret
> -END(STRCAT)
> -
> -# endif
> -
> -# undef ENTRY
> -# define ENTRY(name) \
> - .type STRCAT_IA32, @function; \
> - .align 16; \
> - STRCAT_IA32: cfi_startproc; \
> - CALL_MCOUNT
> -# undef END
> -# define END(name) \
> - cfi_endproc; .size STRCAT_IA32, .-STRCAT_IA32
> -
> -# ifdef SHARED
> -# undef libc_hidden_builtin_def
> -/* It doesn't make sense to send libc-internal strcat calls through a
> PLT.
> - The speedup we get from using SSSE3 instruction is likely eaten away
> - by the indirect call in the PLT. */
> -# define libc_hidden_builtin_def(name) \
> - .globl __GI_STRCAT; __GI_STRCAT = STRCAT_IA32
> -# undef libc_hidden_def
> -# define libc_hidden_def(name) \
> - .globl __GI___STRCAT; __GI___STRCAT = STRCAT_IA32
> -
> -# endif
> -#endif
> -
> -#ifndef USE_AS_STRNCAT
> -# include "../../i486/strcat.S"
> -#endif
> diff --git a/sysdeps/i386/i686/multiarch/strncat-c.c
> b/sysdeps/i386/i686/multiarch/strncat-c.c
> deleted file mode 100644
> index 132a000..0000000
> --- a/sysdeps/i386/i686/multiarch/strncat-c.c
> +++ /dev/null
> @@ -1,8 +0,0 @@
> -#define STRNCAT __strncat_ia32
> -#ifdef SHARED
> -#undef libc_hidden_def
> -#define libc_hidden_def(name) \
> - __hidden_ver1 (__strncat_ia32, __GI___strncat, __strncat_ia32);
> -#endif
> -
> -#include "string/strncat.c"
> diff --git a/sysdeps/i386/i686/multiarch/strncat-sse2.S
> b/sysdeps/i386/i686/multiarch/strncat-sse2.S
> deleted file mode 100644
> index f1045b7..0000000
> --- a/sysdeps/i386/i686/multiarch/strncat-sse2.S
> +++ /dev/null
> @@ -1,4 +0,0 @@
> -#define STRCAT __strncat_sse2
> -#define USE_AS_STRNCAT
> -
> -#include "strcat-sse2.S"
> diff --git a/sysdeps/i386/i686/multiarch/strncat-ssse3.S
> b/sysdeps/i386/i686/multiarch/strncat-ssse3.S
> deleted file mode 100644
> index 625b90a..0000000
> --- a/sysdeps/i386/i686/multiarch/strncat-ssse3.S
> +++ /dev/null
> @@ -1,4 +0,0 @@
> -#define STRCAT __strncat_ssse3
> -#define USE_AS_STRNCAT
> -
> -#include "strcat-ssse3.S"
> diff --git a/sysdeps/i386/i686/multiarch/strncat.S
> b/sysdeps/i386/i686/multiarch/strncat.S
> deleted file mode 100644
> index fd569c2..0000000
> --- a/sysdeps/i386/i686/multiarch/strncat.S
> +++ /dev/null
> @@ -1,3 +0,0 @@
> -#define STRCAT strncat
> -#define USE_AS_STRNCAT
> -#include "strcat.S"
> diff --git a/sysdeps/powerpc/strcat.c b/sysdeps/powerpc/strcat.c
> deleted file mode 100644
> index 28575d0..0000000
> --- a/sysdeps/powerpc/strcat.c
> +++ /dev/null
> @@ -1,30 +0,0 @@
> -/* strcat version that uses fast strcpy/strlen.
> - Copyright (C) 1997, 2003 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <string.h>
> -
> -#undef strcat
> -
> -/* Append SRC on the end of DEST. */
> -char *
> -strcat (char *dest, const char *src)
> -{
> - strcpy (dest + strlen (dest), src);
> - return dest;
> -}
> -libc_hidden_builtin_def (strcat)
> diff --git a/sysdeps/x86_64/multiarch/Makefile
> b/sysdeps/x86_64/multiarch/Makefile
> index 22f1435..ae94366 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -6,7 +6,7 @@ endif
>
> ifeq ($(subdir),string)
>
> -sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3
> strncmp-ssse3 \
> +sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
> strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
> memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
> memmove-ssse3-back strcasestr-nonascii
> strcasecmp_l-ssse3 \
> @@ -14,8 +14,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c
> strcmp-ssse3 strncmp-ssse3 \
> strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
> strcpy-sse2-unaligned strncpy-sse2-unaligned \
> stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
> - strcat-sse2-unaligned strncat-sse2-unaligned \
> - strcat-ssse3 strncat-ssse3 strlen_atom strlen_avx \
> + strlen_atom strlen_avx \
> strnlen-sse2-no-bsf strrchr-sse2-no-bsf
> strchr-sse2-no-bsf \
> memcmp-ssse3
> ifeq (yes,$(config-cflags-sse4))
> diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
> b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
> deleted file mode 100644
> index 7811ab5..0000000
> --- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
> +++ /dev/null
> @@ -1,53 +0,0 @@
> -/* strcat with SSE2
> - Copyright (C) 2011 Free Software Foundation, Inc.
> - Contributed by Intel Corporation.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#ifndef NOT_IN_libc
> -
> -# include <sysdep.h>
> -
> -# ifndef STRCAT
> -# define STRCAT __strcat_sse2_unaligned
> -# endif
> -
> -# define USE_AS_STRCAT
> -
> -.text
> -ENTRY (STRCAT)
> - mov %rdi, %r9
> -# ifdef USE_AS_STRNCAT
> - mov %rdx, %r8
> -# endif
> -
> -# define RETURN jmp L(StartStrcpyPart)
> -# include "strlen-sse2-pminub.S"
> -# undef RETURN
> -
> -L(StartStrcpyPart):
> - lea (%r9, %rax), %rdi
> - mov %rsi, %rcx
> - mov %r9, %rax /* save result */
> -
> -# ifdef USE_AS_STRNCAT
> - test %r8, %r8
> - jz L(ExitZero)
> -# define USE_AS_STRNCPY
> -# endif
> -
> -# include "strcpy-sse2-unaligned.S"
> -#endif
> diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S
> b/sysdeps/x86_64/multiarch/strcat-ssse3.S
> deleted file mode 100644
> index abd2c0c..0000000
> --- a/sysdeps/x86_64/multiarch/strcat-ssse3.S
> +++ /dev/null
> @@ -1,557 +0,0 @@
> -/* strcat with SSSE3
> - Copyright (C) 2011 Free Software Foundation, Inc.
> - Contributed by Intel Corporation.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#ifndef NOT_IN_libc
> -
> -# include <sysdep.h>
> -
> -# ifndef STRCAT
> -# define STRCAT __strcat_ssse3
> -# endif
> -
> -# define USE_AS_STRCAT
> -
> -.text
> -ENTRY (STRCAT)
> -# ifdef USE_AS_STRNCAT
> - mov %rdx, %r8
> -# endif
> -
> -# define RETURN jmp L(StartStrcpyPart)
> -# include "strlen-sse2-no-bsf.S"
> -
> -# undef RETURN
> -
> -L(StartStrcpyPart):
> - mov %rsi, %rcx
> - lea (%rdi, %rax), %rdx
> -# ifdef USE_AS_STRNCAT
> - test %r8, %r8
> - jz L(StrncatExit0)
> - cmp $8, %r8
> - jbe L(StrncatExit8Bytes)
> -# endif
> - cmpb $0, (%rcx)
> - jz L(Exit1)
> - cmpb $0, 1(%rcx)
> - jz L(Exit2)
> - cmpb $0, 2(%rcx)
> - jz L(Exit3)
> - cmpb $0, 3(%rcx)
> - jz L(Exit4)
> - cmpb $0, 4(%rcx)
> - jz L(Exit5)
> - cmpb $0, 5(%rcx)
> - jz L(Exit6)
> - cmpb $0, 6(%rcx)
> - jz L(Exit7)
> - cmpb $0, 7(%rcx)
> - jz L(Exit8)
> - cmpb $0, 8(%rcx)
> - jz L(Exit9)
> -# ifdef USE_AS_STRNCAT
> - cmp $16, %r8
> - jb L(StrncatExit15Bytes)
> -# endif
> - cmpb $0, 9(%rcx)
> - jz L(Exit10)
> - cmpb $0, 10(%rcx)
> - jz L(Exit11)
> - cmpb $0, 11(%rcx)
> - jz L(Exit12)
> - cmpb $0, 12(%rcx)
> - jz L(Exit13)
> - cmpb $0, 13(%rcx)
> - jz L(Exit14)
> - cmpb $0, 14(%rcx)
> - jz L(Exit15)
> - cmpb $0, 15(%rcx)
> - jz L(Exit16)
> -# ifdef USE_AS_STRNCAT
> - cmp $16, %r8
> - je L(StrncatExit16)
> -# define USE_AS_STRNCPY
> -# endif
> -
> -# include "strcpy-ssse3.S"
> -
> - .p2align 4
> -L(CopyFrom1To16Bytes):
> - add %rsi, %rdx
> - add %rsi, %rcx
> -
> - test %al, %al
> - jz L(ExitHigh)
> - test $0x01, %al
> - jnz L(Exit1)
> - test $0x02, %al
> - jnz L(Exit2)
> - test $0x04, %al
> - jnz L(Exit3)
> - test $0x08, %al
> - jnz L(Exit4)
> - test $0x10, %al
> - jnz L(Exit5)
> - test $0x20, %al
> - jnz L(Exit6)
> - test $0x40, %al
> - jnz L(Exit7)
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(ExitHigh):
> - test $0x01, %ah
> - jnz L(Exit9)
> - test $0x02, %ah
> - jnz L(Exit10)
> - test $0x04, %ah
> - jnz L(Exit11)
> - test $0x08, %ah
> - jnz L(Exit12)
> - test $0x10, %ah
> - jnz L(Exit13)
> - test $0x20, %ah
> - jnz L(Exit14)
> - test $0x40, %ah
> - jnz L(Exit15)
> - movlpd (%rcx), %xmm0
> - movlpd 8(%rcx), %xmm1
> - movlpd %xmm0, (%rdx)
> - movlpd %xmm1, 8(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit1):
> - xor %ah, %ah
> - movb %ah, 1(%rdx)
> -L(Exit1):
> - movb (%rcx), %al
> - movb %al, (%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit2):
> - xor %ah, %ah
> - movb %ah, 2(%rdx)
> -L(Exit2):
> - movw (%rcx), %ax
> - movw %ax, (%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit3):
> - xor %ah, %ah
> - movb %ah, 3(%rdx)
> -L(Exit3):
> - movw (%rcx), %ax
> - movw %ax, (%rdx)
> - movb 2(%rcx), %al
> - movb %al, 2(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit4):
> - xor %ah, %ah
> - movb %ah, 4(%rdx)
> -L(Exit4):
> - mov (%rcx), %eax
> - mov %eax, (%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit5):
> - xor %ah, %ah
> - movb %ah, 5(%rdx)
> -L(Exit5):
> - mov (%rcx), %eax
> - mov %eax, (%rdx)
> - movb 4(%rcx), %al
> - movb %al, 4(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit6):
> - xor %ah, %ah
> - movb %ah, 6(%rdx)
> -L(Exit6):
> - mov (%rcx), %eax
> - mov %eax, (%rdx)
> - movw 4(%rcx), %ax
> - movw %ax, 4(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit7):
> - xor %ah, %ah
> - movb %ah, 7(%rdx)
> -L(Exit7):
> - mov (%rcx), %eax
> - mov %eax, (%rdx)
> - mov 3(%rcx), %eax
> - mov %eax, 3(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit8):
> - xor %ah, %ah
> - movb %ah, 8(%rdx)
> -L(Exit8):
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit9):
> - xor %ah, %ah
> - movb %ah, 9(%rdx)
> -L(Exit9):
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - movb 8(%rcx), %al
> - movb %al, 8(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit10):
> - xor %ah, %ah
> - movb %ah, 10(%rdx)
> -L(Exit10):
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - movw 8(%rcx), %ax
> - movw %ax, 8(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit11):
> - xor %ah, %ah
> - movb %ah, 11(%rdx)
> -L(Exit11):
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - mov 7(%rcx), %eax
> - mov %eax, 7(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit12):
> - xor %ah, %ah
> - movb %ah, 12(%rdx)
> -L(Exit12):
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - mov 8(%rcx), %eax
> - mov %eax, 8(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit13):
> - xor %ah, %ah
> - movb %ah, 13(%rdx)
> -L(Exit13):
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - movlpd 5(%rcx), %xmm1
> - movlpd %xmm1, 5(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit14):
> - xor %ah, %ah
> - movb %ah, 14(%rdx)
> -L(Exit14):
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - movlpd 6(%rcx), %xmm1
> - movlpd %xmm1, 6(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit15):
> - xor %ah, %ah
> - movb %ah, 15(%rdx)
> -L(Exit15):
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - movlpd 7(%rcx), %xmm1
> - movlpd %xmm1, 7(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit16):
> - xor %ah, %ah
> - movb %ah, 16(%rdx)
> -L(Exit16):
> - movlpd (%rcx), %xmm0
> - movlpd 8(%rcx), %xmm1
> - movlpd %xmm0, (%rdx)
> - movlpd %xmm1, 8(%rdx)
> - mov %rdi, %rax
> - ret
> -
> -# ifdef USE_AS_STRNCPY
> -
> - .p2align 4
> -L(CopyFrom1To16BytesCase2):
> - add $16, %r8
> - add %rsi, %rcx
> - lea (%rsi, %rdx), %rsi
> - lea -9(%r8), %rdx
> - and $1<<7, %dh
> - or %al, %dh
> - test %dh, %dh
> - lea (%rsi), %rdx
> - jz L(ExitHighCase2)
> -
> - test $0x01, %al
> - jnz L(Exit1)
> - cmp $1, %r8
> - je L(StrncatExit1)
> - test $0x02, %al
> - jnz L(Exit2)
> - cmp $2, %r8
> - je L(StrncatExit2)
> - test $0x04, %al
> - jnz L(Exit3)
> - cmp $3, %r8
> - je L(StrncatExit3)
> - test $0x08, %al
> - jnz L(Exit4)
> - cmp $4, %r8
> - je L(StrncatExit4)
> - test $0x10, %al
> - jnz L(Exit5)
> - cmp $5, %r8
> - je L(StrncatExit5)
> - test $0x20, %al
> - jnz L(Exit6)
> - cmp $6, %r8
> - je L(StrncatExit6)
> - test $0x40, %al
> - jnz L(Exit7)
> - cmp $7, %r8
> - je L(StrncatExit7)
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - lea 7(%rdx), %rax
> - cmpb $1, (%rax)
> - sbb $-1, %rax
> - xor %cl, %cl
> - movb %cl, (%rax)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(ExitHighCase2):
> - test $0x01, %ah
> - jnz L(Exit9)
> - cmp $9, %r8
> - je L(StrncatExit9)
> - test $0x02, %ah
> - jnz L(Exit10)
> - cmp $10, %r8
> - je L(StrncatExit10)
> - test $0x04, %ah
> - jnz L(Exit11)
> - cmp $11, %r8
> - je L(StrncatExit11)
> - test $0x8, %ah
> - jnz L(Exit12)
> - cmp $12, %r8
> - je L(StrncatExit12)
> - test $0x10, %ah
> - jnz L(Exit13)
> - cmp $13, %r8
> - je L(StrncatExit13)
> - test $0x20, %ah
> - jnz L(Exit14)
> - cmp $14, %r8
> - je L(StrncatExit14)
> - test $0x40, %ah
> - jnz L(Exit15)
> - cmp $15, %r8
> - je L(StrncatExit15)
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - movlpd 8(%rcx), %xmm1
> - movlpd %xmm1, 8(%rdx)
> - mov %rdi, %rax
> - ret
> -
> -L(CopyFrom1To16BytesCase2OrCase3):
> - test %rax, %rax
> - jnz L(CopyFrom1To16BytesCase2)
> -
> - .p2align 4
> -L(CopyFrom1To16BytesCase3):
> - add $16, %r8
> - add %rsi, %rdx
> - add %rsi, %rcx
> -
> - cmp $8, %r8
> - ja L(ExitHighCase3)
> - cmp $1, %r8
> - je L(StrncatExit1)
> - cmp $2, %r8
> - je L(StrncatExit2)
> - cmp $3, %r8
> - je L(StrncatExit3)
> - cmp $4, %r8
> - je L(StrncatExit4)
> - cmp $5, %r8
> - je L(StrncatExit5)
> - cmp $6, %r8
> - je L(StrncatExit6)
> - cmp $7, %r8
> - je L(StrncatExit7)
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - xor %ah, %ah
> - movb %ah, 8(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(ExitHighCase3):
> - cmp $9, %r8
> - je L(StrncatExit9)
> - cmp $10, %r8
> - je L(StrncatExit10)
> - cmp $11, %r8
> - je L(StrncatExit11)
> - cmp $12, %r8
> - je L(StrncatExit12)
> - cmp $13, %r8
> - je L(StrncatExit13)
> - cmp $14, %r8
> - je L(StrncatExit14)
> - cmp $15, %r8
> - je L(StrncatExit15)
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - movlpd 8(%rcx), %xmm1
> - movlpd %xmm1, 8(%rdx)
> - xor %ah, %ah
> - movb %ah, 16(%rdx)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit0):
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit15Bytes):
> - cmp $9, %r8
> - je L(StrncatExit9)
> - cmpb $0, 9(%rcx)
> - jz L(Exit10)
> - cmp $10, %r8
> - je L(StrncatExit10)
> - cmpb $0, 10(%rcx)
> - jz L(Exit11)
> - cmp $11, %r8
> - je L(StrncatExit11)
> - cmpb $0, 11(%rcx)
> - jz L(Exit12)
> - cmp $12, %r8
> - je L(StrncatExit12)
> - cmpb $0, 12(%rcx)
> - jz L(Exit13)
> - cmp $13, %r8
> - je L(StrncatExit13)
> - cmpb $0, 13(%rcx)
> - jz L(Exit14)
> - cmp $14, %r8
> - je L(StrncatExit14)
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - movlpd 7(%rcx), %xmm1
> - movlpd %xmm1, 7(%rdx)
> - lea 14(%rdx), %rax
> - cmpb $1, (%rax)
> - sbb $-1, %rax
> - xor %cl, %cl
> - movb %cl, (%rax)
> - mov %rdi, %rax
> - ret
> -
> - .p2align 4
> -L(StrncatExit8Bytes):
> - cmpb $0, (%rcx)
> - jz L(Exit1)
> - cmp $1, %r8
> - je L(StrncatExit1)
> - cmpb $0, 1(%rcx)
> - jz L(Exit2)
> - cmp $2, %r8
> - je L(StrncatExit2)
> - cmpb $0, 2(%rcx)
> - jz L(Exit3)
> - cmp $3, %r8
> - je L(StrncatExit3)
> - cmpb $0, 3(%rcx)
> - jz L(Exit4)
> - cmp $4, %r8
> - je L(StrncatExit4)
> - cmpb $0, 4(%rcx)
> - jz L(Exit5)
> - cmp $5, %r8
> - je L(StrncatExit5)
> - cmpb $0, 5(%rcx)
> - jz L(Exit6)
> - cmp $6, %r8
> - je L(StrncatExit6)
> - cmpb $0, 6(%rcx)
> - jz L(Exit7)
> - cmp $7, %r8
> - je L(StrncatExit7)
> - movlpd (%rcx), %xmm0
> - movlpd %xmm0, (%rdx)
> - lea 7(%rdx), %rax
> - cmpb $1, (%rax)
> - sbb $-1, %rax
> - xor %cl, %cl
> - movb %cl, (%rax)
> - mov %rdi, %rax
> - ret
> -
> -# endif
> -END (STRCAT)
> -#endif
> diff --git a/sysdeps/x86_64/multiarch/strcat.S
> b/sysdeps/x86_64/multiarch/strcat.S
> deleted file mode 100644
> index 0c256de..0000000
> --- a/sysdeps/x86_64/multiarch/strcat.S
> +++ /dev/null
> @@ -1,84 +0,0 @@
> -/* Multiple versions of strcat
> - Copyright (C) 2009, 2011 Free Software Foundation, Inc.
> - Contributed by Intel Corporation.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <sysdep.h>
> -#include <init-arch.h>
> -
> -#ifndef USE_AS_STRNCAT
> -# ifndef STRCAT
> -# define STRCAT strcat
> -# endif
> -#endif
> -
> -#ifdef USE_AS_STRNCAT
> -# define STRCAT_SSSE3 __strncat_ssse3
> -# define STRCAT_SSE2 __strncat_sse2
> -# define STRCAT_SSE2_UNALIGNED __strncat_sse2_unaligned
> -# define __GI_STRCAT __GI_strncat
> -# define __GI___STRCAT __GI___strncat
> -#else
> -# define STRCAT_SSSE3 __strcat_ssse3
> -# define STRCAT_SSE2 __strcat_sse2
> -# define STRCAT_SSE2_UNALIGNED __strcat_sse2_unaligned
> -# define __GI_STRCAT __GI_strcat
> -# define __GI___STRCAT __GI___strcat
> -#endif
> -
> -
> -/* Define multiple versions only for the definition in libc. */
> -#ifndef NOT_IN_libc
> - .text
> -ENTRY(STRCAT)
> - .type STRCAT, @gnu_indirect_function
> - cmpl $0, __cpu_features+KIND_OFFSET(%rip)
> - jne 1f
> - call __init_cpu_features
> -1: leaq STRCAT_SSE2_UNALIGNED(%rip), %rax
> - testl $bit_Fast_Unaligned_Load,
> __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
> - jnz 2f
> - leaq STRCAT_SSE2(%rip), %rax
> - testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
> - jz 2f
> - leaq STRCAT_SSSE3(%rip), %rax
> -2: ret
> -END(STRCAT)
> -
> -# undef ENTRY
> -# define ENTRY(name) \
> - .type STRCAT_SSE2, @function; \
> - .align 16; \
> - STRCAT_SSE2: cfi_startproc; \
> - CALL_MCOUNT
> -# undef END
> -# define END(name) \
> - cfi_endproc; .size STRCAT_SSE2, .-STRCAT_SSE2
> -# undef libc_hidden_builtin_def
> -/* It doesn't make sense to send libc-internal strcat calls through a
> PLT.
> - The speedup we get from using SSSE3 instruction is likely eaten away
> - by the indirect call in the PLT. */
> -# define libc_hidden_builtin_def(name) \
> - .globl __GI_STRCAT; __GI_STRCAT = STRCAT_SSE2
> -# undef libc_hidden_def
> -# define libc_hidden_def(name) \
> - .globl __GI___STRCAT; __GI___STRCAT = STRCAT_SSE2
> -#endif
> -
> -#ifndef USE_AS_STRNCAT
> -# include "../strcat.S"
> -#endif
> diff --git a/sysdeps/x86_64/multiarch/strncat-c.c
> b/sysdeps/x86_64/multiarch/strncat-c.c
> deleted file mode 100644
> index a3cdbff..0000000
> --- a/sysdeps/x86_64/multiarch/strncat-c.c
> +++ /dev/null
> @@ -1,8 +0,0 @@
> -#define STRNCAT __strncat_sse2
> -#ifdef SHARED
> -#undef libc_hidden_def
> -#define libc_hidden_def(name) \
> - __hidden_ver1 (__strncat_sse2, __GI___strncat, __strncat_sse2);
> -#endif
> -
> -#include "string/strncat.c"
> diff --git a/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
> b/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
> deleted file mode 100644
> index 133e1d2..0000000
> --- a/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
> +++ /dev/null
> @@ -1,3 +0,0 @@
> -#define USE_AS_STRNCAT
> -#define STRCAT __strncat_sse2_unaligned
> -#include "strcat-sse2-unaligned.S"
> diff --git a/sysdeps/x86_64/multiarch/strncat-ssse3.S
> b/sysdeps/x86_64/multiarch/strncat-ssse3.S
> deleted file mode 100644
> index 6c45ff3..0000000
> --- a/sysdeps/x86_64/multiarch/strncat-ssse3.S
> +++ /dev/null
> @@ -1,3 +0,0 @@
> -#define USE_AS_STRNCAT
> -#define STRCAT __strncat_ssse3
> -#include "strcat-ssse3.S"
> diff --git a/sysdeps/x86_64/multiarch/strncat.S
> b/sysdeps/x86_64/multiarch/strncat.S
> deleted file mode 100644
> index fd569c2..0000000
> --- a/sysdeps/x86_64/multiarch/strncat.S
> +++ /dev/null
> @@ -1,3 +0,0 @@
> -#define STRCAT strncat
> -#define USE_AS_STRNCAT
> -#include "strcat.S"
> diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
> deleted file mode 100644
> index 535a18d..0000000
> --- a/sysdeps/x86_64/strcat.S
> +++ /dev/null
> @@ -1,259 +0,0 @@
> -/* strcat(dest, src) -- Append SRC on the end of DEST.
> - Optimized for x86-64.
> - Copyright (C) 2002 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> - Contributed by Andreas Jaeger <aj@suse.de>, 2002.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <sysdep.h>
> -#include "asm-syntax.h"
> -#include "bp-sym.h"
> -#include "bp-asm.h"
> -
> -
> - .text
> -ENTRY (BP_SYM (strcat))
> - movq %rdi, %rcx /* Dest. register. */
> - andl $7, %ecx /* mask alignment bits */
> - movq %rdi, %rax /* Duplicate destination pointer. */
> - movq $0xfefefefefefefeff,%r8
> -
> - /* First step: Find end of destination. */
> - jz 4f /* aligned => start loop */
> -
> - neg %ecx /* We need to align to 8 bytes. */
> - addl $8,%ecx
> - /* Search the first bytes directly. */
> -0: cmpb $0x0,(%rax) /* is byte NUL? */
> - je 2f /* yes => start copy */
> - incq %rax /* increment pointer */
> - decl %ecx
> - jnz 0b
> -
> -
> -
> - /* Now the source is aligned. Scan for NUL byte. */
> - .p2align 4
> -4:
> - /* First unroll. */
> - movq (%rax), %rcx /* get double word (= 8 bytes) in question
> */
> - addq $8,%rax /* adjust pointer for next word */
> - movq %r8, %rdx /* magic value */
> - addq %rcx, %rdx /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc 3f /* highest byte is NUL => return pointer
> */
> - xorq %rcx, %rdx /* (word+magic)^word */
> - orq %r8, %rdx /* set all non-carry bits */
> - incq %rdx /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jnz 3f /* found NUL => return pointer */
> -
> - /* Second unroll. */
> - movq (%rax), %rcx /* get double word (= 8 bytes) in question
> */
> - addq $8,%rax /* adjust pointer for next word */
> - movq %r8, %rdx /* magic value */
> - addq %rcx, %rdx /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc 3f /* highest byte is NUL => return pointer
> */
> - xorq %rcx, %rdx /* (word+magic)^word */
> - orq %r8, %rdx /* set all non-carry bits */
> - incq %rdx /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jnz 3f /* found NUL => return pointer */
> -
> - /* Third unroll. */
> - movq (%rax), %rcx /* get double word (= 8 bytes) in question
> */
> - addq $8,%rax /* adjust pointer for next word */
> - movq %r8, %rdx /* magic value */
> - addq %rcx, %rdx /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc 3f /* highest byte is NUL => return pointer
> */
> - xorq %rcx, %rdx /* (word+magic)^word */
> - orq %r8, %rdx /* set all non-carry bits */
> - incq %rdx /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jnz 3f /* found NUL => return pointer */
> -
> - /* Fourth unroll. */
> - movq (%rax), %rcx /* get double word (= 8 bytes) in question
> */
> - addq $8,%rax /* adjust pointer for next word */
> - movq %r8, %rdx /* magic value */
> - addq %rcx, %rdx /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc 3f /* highest byte is NUL => return pointer
> */
> - xorq %rcx, %rdx /* (word+magic)^word */
> - orq %r8, %rdx /* set all non-carry bits */
> - incq %rdx /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> - jz 4b /* no NUL found => continue loop */
> -
> - .p2align 4 /* Align, it's a jump target. */
> -3: subq $8,%rax /* correct pointer increment. */
> -
> - testb %cl, %cl /* is first byte NUL? */
> - jz 2f /* yes => return */
> - incq %rax /* increment pointer */
> -
> - testb %ch, %ch /* is second byte NUL? */
> - jz 2f /* yes => return */
> - incq %rax /* increment pointer */
> -
> - testl $0x00ff0000, %ecx /* is third byte NUL? */
> - jz 2f /* yes => return pointer */
> - incq %rax /* increment pointer */
> -
> - testl $0xff000000, %ecx /* is fourth byte NUL? */
> - jz 2f /* yes => return pointer */
> - incq %rax /* increment pointer */
> -
> - shrq $32, %rcx /* look at other half. */
> -
> - testb %cl, %cl /* is first byte NUL? */
> - jz 2f /* yes => return */
> - incq %rax /* increment pointer */
> -
> - testb %ch, %ch /* is second byte NUL? */
> - jz 2f /* yes => return */
> - incq %rax /* increment pointer */
> -
> - testl $0xff0000, %ecx /* is third byte NUL? */
> - jz 2f /* yes => return pointer */
> - incq %rax /* increment pointer */
> -
> -2:
> - /* Second step: Copy source to destination. */
> -
> - movq %rsi, %rcx /* duplicate */
> - andl $7,%ecx /* mask alignment bits */
> - movq %rax, %rdx /* move around */
> - jz 22f /* aligned => start loop */
> -
> - neg %ecx /* align to 8 bytes. */
> - addl $8, %ecx
> - /* Align the source pointer. */
> -21:
> - movb (%rsi), %al /* Fetch a byte */
> - testb %al, %al /* Is it NUL? */
> - movb %al, (%rdx) /* Store it */
> - jz 24f /* If it was NUL, done! */
> - incq %rsi
> - incq %rdx
> - decl %ecx
> - jnz 21b
> -
> - /* Now the sources is aligned. Unfortunatly we cannot force
> - to have both source and destination aligned, so ignore the
> - alignment of the destination. */
> - .p2align 4
> -22:
> - /* 1st unroll. */
> - movq (%rsi), %rax /* Read double word (8 bytes). */
> - addq $8, %rsi /* Adjust pointer for next word. */
> - movq %rax, %r9 /* Save a copy for NUL finding. */
> - addq %r8, %r9 /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc 23f /* highest byte is NUL => return pointer
> */
> - xorq %rax, %r9 /* (word+magic)^word */
> - orq %r8, %r9 /* set all non-carry bits */
> - incq %r9 /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> -
> - jnz 23f /* found NUL => return pointer */
> -
> - movq %rax, (%rdx) /* Write value to destination. */
> - addq $8, %rdx /* Adjust pointer. */
> -
> - /* 2nd unroll. */
> - movq (%rsi), %rax /* Read double word (8 bytes). */
> - addq $8, %rsi /* Adjust pointer for next word. */
> - movq %rax, %r9 /* Save a copy for NUL finding. */
> - addq %r8, %r9 /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc 23f /* highest byte is NUL => return pointer
> */
> - xorq %rax, %r9 /* (word+magic)^word */
> - orq %r8, %r9 /* set all non-carry bits */
> - incq %r9 /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> -
> - jnz 23f /* found NUL => return pointer */
> -
> - movq %rax, (%rdx) /* Write value to destination. */
> - addq $8, %rdx /* Adjust pointer. */
> -
> - /* 3rd unroll. */
> - movq (%rsi), %rax /* Read double word (8 bytes). */
> - addq $8, %rsi /* Adjust pointer for next word. */
> - movq %rax, %r9 /* Save a copy for NUL finding. */
> - addq %r8, %r9 /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc 23f /* highest byte is NUL => return pointer
> */
> - xorq %rax, %r9 /* (word+magic)^word */
> - orq %r8, %r9 /* set all non-carry bits */
> - incq %r9 /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> -
> - jnz 23f /* found NUL => return pointer */
> -
> - movq %rax, (%rdx) /* Write value to destination. */
> - addq $8, %rdx /* Adjust pointer. */
> -
> - /* 4th unroll. */
> - movq (%rsi), %rax /* Read double word (8 bytes). */
> - addq $8, %rsi /* Adjust pointer for next word. */
> - movq %rax, %r9 /* Save a copy for NUL finding. */
> - addq %r8, %r9 /* add the magic value to the word. We
> get
> - carry bits reported for each byte which
> - is *not* 0 */
> - jnc 23f /* highest byte is NUL => return pointer
> */
> - xorq %rax, %r9 /* (word+magic)^word */
> - orq %r8, %r9 /* set all non-carry bits */
> - incq %r9 /* add 1: if one carry bit was *not* set
> - the addition will not result in 0. */
> -
> - jnz 23f /* found NUL => return pointer */
> -
> - movq %rax, (%rdx) /* Write value to destination. */
> - addq $8, %rdx /* Adjust pointer. */
> - jmp 22b /* Next iteration. */
> -
> - /* Do the last few bytes. %rax contains the value to write.
> - The loop is unrolled twice. */
> - .p2align 4
> -23:
> - movb %al, (%rdx) /* 1st byte. */
> - testb %al, %al /* Is it NUL. */
> - jz 24f /* yes, finish. */
> - incq %rdx /* Increment destination. */
> - movb %ah, (%rdx) /* 2nd byte. */
> - testb %ah, %ah /* Is it NUL?. */
> - jz 24f /* yes, finish. */
> - incq %rdx /* Increment destination. */
> - shrq $16, %rax /* Shift... */
> - jmp 23b /* and look at next two bytes in %rax. */
> -
> -
> -24:
> - movq %rdi, %rax /* Source is return value. */
> - retq
> -END (BP_SYM (strcat))
> -libc_hidden_builtin_def (strcat)
> --
> 1.7.4.4
>
>
>