This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch, master, updated. glibc-2.14-115-g8002999
- From: drepper at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 19 Jul 2011 21:27:18 -0000
- Subject: GNU C Library master sources branch, master, updated. glibc-2.14-115-g8002999
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 80029994814f0718aef9568c90f04b3d9a31802c (commit)
via 99710781cc47002612e609c7dc5f34692b64e9b3 (commit)
from 7dc6bd90c569c49807462b0740b18e32fab4d8b7 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=80029994814f0718aef9568c90f04b3d9a31802c
commit 80029994814f0718aef9568c90f04b3d9a31802c
Author: Ulrich Drepper <drepper@gmail.com>
Date: Tue Jul 19 17:27:09 2011 -0400
Fix whitespaces
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 1150281..c02c6f0 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -52,4 +52,3 @@ L(StartStrcpyPart):
# include "strcpy-sse2-unaligned.S"
#endif
-
diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S
index 66736a7..34b61b8 100644
--- a/sysdeps/x86_64/multiarch/strcat-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcat-ssse3.S
@@ -556,4 +556,3 @@ L(StrncatExit8Bytes):
# endif
END (STRCAT)
#endif
-
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index 6de8c47..e73778a 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -1742,7 +1742,7 @@ L(Unaligned64LeaveCase2):
# ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm4)
# else
- jnz L(CopyFrom1To16Bytes)
+ jnz L(CopyFrom1To16Bytes)
# endif
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %rdx
@@ -1754,7 +1754,7 @@ L(Unaligned64LeaveCase2):
# ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm5)
# else
- jnz L(CopyFrom1To16Bytes)
+ jnz L(CopyFrom1To16Bytes)
# endif
pcmpeqb %xmm6, %xmm0
@@ -1767,7 +1767,7 @@ L(Unaligned64LeaveCase2):
# ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm6)
# else
- jnz L(CopyFrom1To16Bytes)
+ jnz L(CopyFrom1To16Bytes)
# endif
pcmpeqb %xmm7, %xmm0
@@ -1888,4 +1888,3 @@ L(FillTable):
# endif
# endif
#endif
-
diff --git a/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S b/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S
index 57778cf..6048072 100644
--- a/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S
+++ b/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S
@@ -141,7 +141,7 @@ L(align16_start):
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
-
+
test $0x3f, %rax
jz L(align64_loop)
@@ -183,10 +183,10 @@ L(align16_start):
.p2align 4
L(align64_loop):
movaps (%rax), %xmm4
- pminub 16(%rax), %xmm4
- movaps 32(%rax), %xmm5
- pminub 48(%rax), %xmm5
- add $64, %rax
+ pminub 16(%rax), %xmm4
+ movaps 32(%rax), %xmm5
+ pminub 48(%rax), %xmm5
+ add $64, %rax
pminub %xmm4, %xmm5
pcmpeqb %xmm0, %xmm5
pmovmskb %xmm5, %edx
@@ -195,7 +195,7 @@ L(align16_start):
pcmpeqb -64(%rax), %xmm0
- sub $80, %rax
+ sub $80, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=99710781cc47002612e609c7dc5f34692b64e9b3
commit 99710781cc47002612e609c7dc5f34692b64e9b3
Author: Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
Date: Tue Jul 19 17:11:54 2011 -0400
Improve 64 bit strcat functions with SSE2/SSSE3
diff --git a/ChangeLog b/ChangeLog
index 0932ae5..e3dc2ee 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,32 @@
+2011-07-15 Liubov Dmitrieva <liubov.dmitrieva@intel.com>
+
+ * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+ strcat-ssse3 strcat-sse2-unaligned strncat-ssse3
+ strncat-sse2-unaligned strncat-c strlen-sse2-pminub
+ * sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S: New file.
+ * sysdeps/x86_64/multiarch/strcat.S: New file.
+ * sysdeps/x86_64/multiarch/strncat.S: New file.
+ * sysdeps/x86_64/multiarch/strncat-c.c: New file.
+ * sysdeps/x86_64/multiarch/strcat-ssse3.S: New file.
+ * sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S: New file.
+ * sysdeps/x86_64/multiarch/strncat-ssse3.S: New file.
+ * sysdeps/x86_64/multiarch/strcpy-ssse3.S
+ (USE_AS_STRCAT): Define.
+ Add strcat and strncat support.
+ * sysdeps/x86_64/multiarch/strlen-no-bsf.S: Likewise.
+ * sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S: Likewise.
+ * sysdeps/x86_64/multiarch/strlen-sse2-pminub.S: New file.
+ * string/strncat.c: Update.
+ (USE_AS_STRNCAT): Define.
+ * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
+ Turn on bit_Prefer_PMINUB_for_stringop for Intel Core i3, i5
+ and i7.
+ * sysdeps/x86_64/multiarch/init-arch.h
+ (bit_Prefer_PMINUB_for_stringop): New.
+ (index_Prefer_PMINUB_for_stringop): Likewise.
+ * sysdeps/x86_64/multiarch/strlen.S (strlen): Check
+ bit_Prefer_PMINUB_for_stringop.
+
2011-07-19 Ulrich Drepper <drepper@gmail.com>
* crypt/sha512.h (struct sha512_ctx): Move buffer into union and add
diff --git a/NEWS b/NEWS
index f3cead3..fb2c15e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,4 @@
-GNU C Library NEWS -- history of user-visible changes. 2011-7-6
+GNU C Library NEWS -- history of user-visible changes. 2011-7-19
Copyright (C) 1992-2009, 2010, 2011 Free Software Foundation, Inc.
See the end for copying conditions.
@@ -23,6 +23,9 @@ Version 2.15
* Improved strcpy, strncpy, stpcpy, stpncpy for SSE2 and SSSE3 on x86-64.
Contributed by HJ Lu.
+
+* Improved strcat and strncat on x86-64.
+ Contributed by Liubov Dmitrieva.
Version 2.14
diff --git a/string/strncat.c b/string/strncat.c
index 2e2de11..72d9d69 100644
--- a/string/strncat.c
+++ b/string/strncat.c
@@ -24,10 +24,12 @@
typedef char reg_char;
#endif
-#undef strncat
+#ifndef STRNCAT
+# define STRNCAT strncat
+#endif
char *
-strncat (s1, s2, n)
+STRNCAT (s1, s2, n)
char *s1;
const char *s2;
size_t n;
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 88410b3..c959dd1 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -5,14 +5,16 @@ endif
ifeq ($(subdir),string)
-sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
+sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
strncase_l-ssse3 strlen-sse4 strlen-no-bsf memset-x86-64 \
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
strcpy-sse2-unaligned strncpy-sse2-unaligned \
- stpcpy-sse2-unaligned stpncpy-sse2-unaligned
+ stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
+ strcat-sse2-unaligned strncat-sse2-unaligned \
+ strcat-ssse3 strncat-ssse3 strlen-sse2-pminub
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 81b2378..0a145ca 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -97,18 +97,22 @@ __init_cpu_features (void)
case 0x2c:
case 0x2e:
case 0x2f:
- /* Rep string instructions, copy backward and unaligned loads
- are fast on Intel Core i3, i5 and i7. */
+ /* Rep string instructions, copy backward, unaligned loads
+ and pminub are fast on Intel Core i3, i5 and i7. */
#if index_Fast_Rep_String != index_Fast_Copy_Backward
# error index_Fast_Rep_String != index_Fast_Copy_Backward
#endif
#if index_Fast_Rep_String != index_Fast_Unaligned_Load
# error index_Fast_Rep_String != index_Fast_Unaligned_Load
#endif
+#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
+# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
+#endif
__cpu_features.feature[index_Fast_Rep_String]
|= (bit_Fast_Rep_String
| bit_Fast_Copy_Backward
- | bit_Fast_Unaligned_Load);
+ | bit_Fast_Unaligned_Load
+ | bit_Prefer_PMINUB_for_stringop);
break;
}
}
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index addf5f3..6cfdbdd 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -21,6 +21,7 @@
#define bit_Slow_BSF (1 << 2)
#define bit_Prefer_SSE_for_memop (1 << 3)
#define bit_Fast_Unaligned_Load (1 << 4)
+#define bit_Prefer_PMINUB_for_stringop (1 << 5)
#ifdef __ASSEMBLER__
@@ -41,6 +42,7 @@
# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
new file mode 100644
index 0000000..1150281
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -0,0 +1,55 @@
+/* strcat with SSE2
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+# ifndef STRCAT
+# define STRCAT __strcat_sse2_unaligned
+# endif
+
+# define USE_AS_STRCAT
+
+.text
+ENTRY (STRCAT)
+ mov %rdi, %r9
+# ifdef USE_AS_STRNCAT
+ mov %rdx, %r8
+# endif
+
+# define RETURN jmp L(StartStrcpyPart)
+# include "strlen-sse2-pminub.S"
+# undef RETURN
+
+L(StartStrcpyPart):
+ lea (%r9, %rax), %rdi
+ mov %rsi, %rcx
+ mov %r9, %rax /* save result */
+
+# ifdef USE_AS_STRNCAT
+ test %r8, %r8
+ jz L(ExitZero)
+# define USE_AS_STRNCPY
+# endif
+
+# include "strcpy-sse2-unaligned.S"
+#endif
+
diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S
new file mode 100644
index 0000000..66736a7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcat-ssse3.S
@@ -0,0 +1,559 @@
+/* strcat with SSSE3
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+# ifndef STRCAT
+# define STRCAT __strcat_ssse3
+# endif
+
+# define USE_AS_STRCAT
+
+.text
+ENTRY (STRCAT)
+# ifdef USE_AS_STRNCAT
+ mov %rdx, %r8
+# endif
+
+# define RETURN jmp L(StartStrcpyPart)
+# include "strlen-no-bsf.S"
+
+# undef RETURN
+
+L(StartStrcpyPart):
+ mov %rsi, %rcx
+ lea (%rdi, %rax), %rdx
+# ifdef USE_AS_STRNCAT
+ test %r8, %r8
+ jz L(StrncatExit0)
+ cmp $8, %r8
+ jbe L(StrncatExit8Bytes)
+# endif
+ cmpb $0, (%rcx)
+ jz L(Exit1)
+ cmpb $0, 1(%rcx)
+ jz L(Exit2)
+ cmpb $0, 2(%rcx)
+ jz L(Exit3)
+ cmpb $0, 3(%rcx)
+ jz L(Exit4)
+ cmpb $0, 4(%rcx)
+ jz L(Exit5)
+ cmpb $0, 5(%rcx)
+ jz L(Exit6)
+ cmpb $0, 6(%rcx)
+ jz L(Exit7)
+ cmpb $0, 7(%rcx)
+ jz L(Exit8)
+ cmpb $0, 8(%rcx)
+ jz L(Exit9)
+# ifdef USE_AS_STRNCAT
+ cmp $16, %r8
+ jb L(StrncatExit15Bytes)
+# endif
+ cmpb $0, 9(%rcx)
+ jz L(Exit10)
+ cmpb $0, 10(%rcx)
+ jz L(Exit11)
+ cmpb $0, 11(%rcx)
+ jz L(Exit12)
+ cmpb $0, 12(%rcx)
+ jz L(Exit13)
+ cmpb $0, 13(%rcx)
+ jz L(Exit14)
+ cmpb $0, 14(%rcx)
+ jz L(Exit15)
+ cmpb $0, 15(%rcx)
+ jz L(Exit16)
+# ifdef USE_AS_STRNCAT
+ cmp $16, %r8
+ je L(StrncatExit16)
+# define USE_AS_STRNCPY
+# endif
+
+# include "strcpy-ssse3.S"
+
+ .p2align 4
+L(CopyFrom1To16Bytes):
+ add %rsi, %rdx
+ add %rsi, %rcx
+
+ test %al, %al
+ jz L(ExitHigh)
+ test $0x01, %al
+ jnz L(Exit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ test $0x04, %al
+ jnz L(Exit3)
+ test $0x08, %al
+ jnz L(Exit4)
+ test $0x10, %al
+ jnz L(Exit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ test $0x40, %al
+ jnz L(Exit7)
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(ExitHigh):
+ test $0x01, %ah
+ jnz L(Exit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+ test $0x08, %ah
+ jnz L(Exit12)
+ test $0x10, %ah
+ jnz L(Exit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+ movlpd (%rcx), %xmm0
+ movlpd 8(%rcx), %xmm1
+ movlpd %xmm0, (%rdx)
+ movlpd %xmm1, 8(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit1):
+ xor %ah, %ah
+ movb %ah, 1(%rdx)
+L(Exit1):
+ movb (%rcx), %al
+ movb %al, (%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit2):
+ xor %ah, %ah
+ movb %ah, 2(%rdx)
+L(Exit2):
+ movw (%rcx), %ax
+ movw %ax, (%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit3):
+ xor %ah, %ah
+ movb %ah, 3(%rdx)
+L(Exit3):
+ movw (%rcx), %ax
+ movw %ax, (%rdx)
+ movb 2(%rcx), %al
+ movb %al, 2(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit4):
+ xor %ah, %ah
+ movb %ah, 4(%rdx)
+L(Exit4):
+ mov (%rcx), %eax
+ mov %eax, (%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit5):
+ xor %ah, %ah
+ movb %ah, 5(%rdx)
+L(Exit5):
+ mov (%rcx), %eax
+ mov %eax, (%rdx)
+ movb 4(%rcx), %al
+ movb %al, 4(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit6):
+ xor %ah, %ah
+ movb %ah, 6(%rdx)
+L(Exit6):
+ mov (%rcx), %eax
+ mov %eax, (%rdx)
+ movw 4(%rcx), %ax
+ movw %ax, 4(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit7):
+ xor %ah, %ah
+ movb %ah, 7(%rdx)
+L(Exit7):
+ mov (%rcx), %eax
+ mov %eax, (%rdx)
+ mov 3(%rcx), %eax
+ mov %eax, 3(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit8):
+ xor %ah, %ah
+ movb %ah, 8(%rdx)
+L(Exit8):
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit9):
+ xor %ah, %ah
+ movb %ah, 9(%rdx)
+L(Exit9):
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ movb 8(%rcx), %al
+ movb %al, 8(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit10):
+ xor %ah, %ah
+ movb %ah, 10(%rdx)
+L(Exit10):
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ movw 8(%rcx), %ax
+ movw %ax, 8(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit11):
+ xor %ah, %ah
+ movb %ah, 11(%rdx)
+L(Exit11):
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ mov 7(%rcx), %eax
+ mov %eax, 7(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit12):
+ xor %ah, %ah
+ movb %ah, 12(%rdx)
+L(Exit12):
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ mov 8(%rcx), %eax
+ mov %eax, 8(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit13):
+ xor %ah, %ah
+ movb %ah, 13(%rdx)
+L(Exit13):
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ movlpd 5(%rcx), %xmm1
+ movlpd %xmm1, 5(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit14):
+ xor %ah, %ah
+ movb %ah, 14(%rdx)
+L(Exit14):
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ movlpd 6(%rcx), %xmm1
+ movlpd %xmm1, 6(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit15):
+ xor %ah, %ah
+ movb %ah, 15(%rdx)
+L(Exit15):
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ movlpd 7(%rcx), %xmm1
+ movlpd %xmm1, 7(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit16):
+ xor %ah, %ah
+ movb %ah, 16(%rdx)
+L(Exit16):
+ movlpd (%rcx), %xmm0
+ movlpd 8(%rcx), %xmm1
+ movlpd %xmm0, (%rdx)
+ movlpd %xmm1, 8(%rdx)
+ mov %rdi, %rax
+ ret
+
+# ifdef USE_AS_STRNCPY
+
+ .p2align 4
+L(CopyFrom1To16BytesCase2):
+ add $16, %r8
+ add %rsi, %rcx
+ lea (%rsi, %rdx), %rsi
+ lea -9(%r8), %rdx
+ and $1<<7, %dh
+ or %al, %dh
+ test %dh, %dh
+ lea (%rsi), %rdx
+ jz L(ExitHighCase2)
+
+ test $0x01, %al
+ jnz L(Exit1)
+ cmp $1, %r8
+ je L(StrncatExit1)
+ test $0x02, %al
+ jnz L(Exit2)
+ cmp $2, %r8
+ je L(StrncatExit2)
+ test $0x04, %al
+ jnz L(Exit3)
+ cmp $3, %r8
+ je L(StrncatExit3)
+ test $0x08, %al
+ jnz L(Exit4)
+ cmp $4, %r8
+ je L(StrncatExit4)
+ test $0x10, %al
+ jnz L(Exit5)
+ cmp $5, %r8
+ je L(StrncatExit5)
+ test $0x20, %al
+ jnz L(Exit6)
+ cmp $6, %r8
+ je L(StrncatExit6)
+ test $0x40, %al
+ jnz L(Exit7)
+ cmp $7, %r8
+ je L(StrncatExit7)
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ lea 7(%rdx), %rax
+ cmpb $1, (%rax)
+ sbb $-1, %rax
+ xor %cl, %cl
+ movb %cl, (%rax)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(ExitHighCase2):
+ test $0x01, %ah
+ jnz L(Exit9)
+ cmp $9, %r8
+ je L(StrncatExit9)
+ test $0x02, %ah
+ jnz L(Exit10)
+ cmp $10, %r8
+ je L(StrncatExit10)
+ test $0x04, %ah
+ jnz L(Exit11)
+ cmp $11, %r8
+ je L(StrncatExit11)
+ test $0x8, %ah
+ jnz L(Exit12)
+ cmp $12, %r8
+ je L(StrncatExit12)
+ test $0x10, %ah
+ jnz L(Exit13)
+ cmp $13, %r8
+ je L(StrncatExit13)
+ test $0x20, %ah
+ jnz L(Exit14)
+ cmp $14, %r8
+ je L(StrncatExit14)
+ test $0x40, %ah
+ jnz L(Exit15)
+ cmp $15, %r8
+ je L(StrncatExit15)
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ movlpd 8(%rcx), %xmm1
+ movlpd %xmm1, 8(%rdx)
+ mov %rdi, %rax
+ ret
+
+L(CopyFrom1To16BytesCase2OrCase3):
+ test %rax, %rax
+ jnz L(CopyFrom1To16BytesCase2)
+
+ .p2align 4
+L(CopyFrom1To16BytesCase3):
+ add $16, %r8
+ add %rsi, %rdx
+ add %rsi, %rcx
+
+ cmp $8, %r8
+ ja L(ExitHighCase3)
+ cmp $1, %r8
+ je L(StrncatExit1)
+ cmp $2, %r8
+ je L(StrncatExit2)
+ cmp $3, %r8
+ je L(StrncatExit3)
+ cmp $4, %r8
+ je L(StrncatExit4)
+ cmp $5, %r8
+ je L(StrncatExit5)
+ cmp $6, %r8
+ je L(StrncatExit6)
+ cmp $7, %r8
+ je L(StrncatExit7)
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ xor %ah, %ah
+ movb %ah, 8(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(ExitHighCase3):
+ cmp $9, %r8
+ je L(StrncatExit9)
+ cmp $10, %r8
+ je L(StrncatExit10)
+ cmp $11, %r8
+ je L(StrncatExit11)
+ cmp $12, %r8
+ je L(StrncatExit12)
+ cmp $13, %r8
+ je L(StrncatExit13)
+ cmp $14, %r8
+ je L(StrncatExit14)
+ cmp $15, %r8
+ je L(StrncatExit15)
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ movlpd 8(%rcx), %xmm1
+ movlpd %xmm1, 8(%rdx)
+ xor %ah, %ah
+ movb %ah, 16(%rdx)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit0):
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit15Bytes):
+ cmp $9, %r8
+ je L(StrncatExit9)
+ cmpb $0, 9(%rcx)
+ jz L(Exit10)
+ cmp $10, %r8
+ je L(StrncatExit10)
+ cmpb $0, 10(%rcx)
+ jz L(Exit11)
+ cmp $11, %r8
+ je L(StrncatExit11)
+ cmpb $0, 11(%rcx)
+ jz L(Exit12)
+ cmp $12, %r8
+ je L(StrncatExit12)
+ cmpb $0, 12(%rcx)
+ jz L(Exit13)
+ cmp $13, %r8
+ je L(StrncatExit13)
+ cmpb $0, 13(%rcx)
+ jz L(Exit14)
+ cmp $14, %r8
+ je L(StrncatExit14)
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ movlpd 7(%rcx), %xmm1
+ movlpd %xmm1, 7(%rdx)
+ lea 14(%rdx), %rax
+ cmpb $1, (%rax)
+ sbb $-1, %rax
+ xor %cl, %cl
+ movb %cl, (%rax)
+ mov %rdi, %rax
+ ret
+
+ .p2align 4
+L(StrncatExit8Bytes):
+ cmpb $0, (%rcx)
+ jz L(Exit1)
+ cmp $1, %r8
+ je L(StrncatExit1)
+ cmpb $0, 1(%rcx)
+ jz L(Exit2)
+ cmp $2, %r8
+ je L(StrncatExit2)
+ cmpb $0, 2(%rcx)
+ jz L(Exit3)
+ cmp $3, %r8
+ je L(StrncatExit3)
+ cmpb $0, 3(%rcx)
+ jz L(Exit4)
+ cmp $4, %r8
+ je L(StrncatExit4)
+ cmpb $0, 4(%rcx)
+ jz L(Exit5)
+ cmp $5, %r8
+ je L(StrncatExit5)
+ cmpb $0, 5(%rcx)
+ jz L(Exit6)
+ cmp $6, %r8
+ je L(StrncatExit6)
+ cmpb $0, 6(%rcx)
+ jz L(Exit7)
+ cmp $7, %r8
+ je L(StrncatExit7)
+ movlpd (%rcx), %xmm0
+ movlpd %xmm0, (%rdx)
+ lea 7(%rdx), %rax
+ cmpb $1, (%rax)
+ sbb $-1, %rax
+ xor %cl, %cl
+ movb %cl, (%rax)
+ mov %rdi, %rax
+ ret
+
+# endif
+END (STRCAT)
+#endif
+
diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S
new file mode 100644
index 0000000..f3ccc8e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcat.S
@@ -0,0 +1,85 @@
+/* Multiple versions of strcat
+ Copyright (C) 2009, 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef USE_AS_STRNCAT
+# ifndef STRCAT
+# define STRCAT strcat
+# endif
+#endif
+
+#ifdef USE_AS_STRNCAT
+# define STRCAT_SSSE3 __strncat_ssse3
+# define STRCAT_SSE2 __strncat_sse2
+# define STRCAT_SSE2_UNALIGNED __strncat_sse2_unaligned
+# define __GI_STRCAT __GI_strncat
+# define __GI___STRCAT __GI___strncat
+#else
+# define STRCAT_SSSE3 __strcat_ssse3
+# define STRCAT_SSE2 __strcat_sse2
+# define STRCAT_SSE2_UNALIGNED __strcat_sse2_unaligned
+# define __GI_STRCAT __GI_strcat
+# define __GI___STRCAT __GI___strcat
+#endif
+
+
+/* Define multiple versions only for the definition in libc. */
+#ifndef NOT_IN_libc
+ .text
+ENTRY(STRCAT)
+ .type STRCAT, @gnu_indirect_function
+ cmpl $0, __cpu_features+KIND_OFFSET(%rip)
+ jne 1f
+ call __init_cpu_features
+1: leaq STRCAT_SSE2_UNALIGNED(%rip), %rax
+ testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
+ jnz 2f
+ leaq STRCAT_SSE2(%rip), %rax
+ testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
+ jz 2f
+ leaq STRCAT_SSSE3(%rip), %rax
+2: ret
+END(STRCAT)
+
+# undef ENTRY
+# define ENTRY(name) \
+ .type STRCAT_SSE2, @function; \
+ .align 16; \
+ STRCAT_SSE2: cfi_startproc; \
+ CALL_MCOUNT
+# undef END
+# define END(name) \
+ cfi_endproc; .size STRCAT_SSE2, .-STRCAT_SSE2
+# undef libc_hidden_builtin_def
+/* It doesn't make sense to send libc-internal strcat calls through a PLT.
+ The speedup we get from using SSSE3 instruction is likely eaten away
+ by the indirect call in the PLT. */
+# define libc_hidden_builtin_def(name) \
+ .globl __GI_STRCAT; __GI_STRCAT = STRCAT_SSE2
+# undef libc_hidden_def
+# define libc_hidden_def(name) \
+ .globl __GI___STRCAT; __GI___STRCAT = STRCAT_SSE2
+#endif
+
+#ifndef USE_AS_STRNCAT
+# include "../strcat.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index 9a8d186..6de8c47 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -20,10 +20,13 @@
#ifndef NOT_IN_libc
-# include <sysdep.h>
+# ifndef USE_AS_STRCAT
+# include <sysdep.h>
+
+# ifndef STRCPY
+# define STRCPY __strcpy_sse2_unaligned
+# endif
-# ifndef STRCPY
-# define STRCPY __strcpy_sse2_unaligned
# endif
# define JMPTBL(I, B) I - B
@@ -33,16 +36,20 @@
lea (%r11, %rcx), %rcx; \
jmp *%rcx
- .text
+# ifndef USE_AS_STRCAT
+
+.text
ENTRY (STRCPY)
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
mov %rdx, %r8
test %r8, %r8
jz L(ExitZero)
-# endif
+# endif
mov %rsi, %rcx
-# ifndef USE_AS_STPCPY
+# ifndef USE_AS_STPCPY
mov %rdi, %rax /* save result */
+# endif
+
# endif
and $15, %rcx
@@ -59,7 +66,7 @@ ENTRY (STRCPY)
pmovmskb %xmm1, %rdx
shr %cl, %rdx
# ifdef USE_AS_STRNCPY
-# if defined USE_AS_STPCPY
+# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $16, %r8
# else
cmp $17, %r8
@@ -72,7 +79,7 @@ ENTRY (STRCPY)
pcmpeqb 16(%rsi), %xmm0
pmovmskb %xmm0, %rdx
# ifdef USE_AS_STRNCPY
-# if defined USE_AS_STPCPY
+# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $32, %r8
# else
cmp $33, %r8
@@ -102,7 +109,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rdx, %rdx
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm2)
# else
jnz L(CopyFrom1To16Bytes)
@@ -118,7 +125,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rdx, %rdx
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm3)
# else
jnz L(CopyFrom1To16Bytes)
@@ -134,7 +141,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rdx, %rdx
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm4)
# else
jnz L(CopyFrom1To16Bytes)
@@ -150,7 +157,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rdx, %rdx
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm1)
# else
jnz L(CopyFrom1To16Bytes)
@@ -166,7 +173,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rdx, %rdx
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm2)
# else
jnz L(CopyFrom1To16Bytes)
@@ -182,7 +189,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif
test %rdx, %rdx
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm3)
# else
jnz L(CopyFrom1To16Bytes)
@@ -264,10 +271,10 @@ L(Unaligned64Leave):
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
movdqu %xmm6, 32(%rdi)
-# if defined USE_AS_STRNCPY
-# ifdef USE_AS_STPCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
+# ifdef USE_AS_STPCPY
lea 48(%rdi, %rdx), %rax
-# endif
+# endif
movdqu %xmm7, 48(%rdi)
add $15, %r8
sub %rdx, %r8
@@ -288,7 +295,7 @@ L(SourceStringAlignmentZero):
pmovmskb %xmm0, %rdx
# ifdef USE_AS_STRNCPY
-# if defined USE_AS_STPCPY
+# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $16, %r8
# else
cmp $17, %r8
@@ -303,7 +310,7 @@ L(SourceStringAlignmentZero):
pmovmskb %xmm0, %rdx
# ifdef USE_AS_STRNCPY
-# if defined USE_AS_STPCPY
+# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $32, %r8
# else
cmp $33, %r8
@@ -314,11 +321,11 @@ L(SourceStringAlignmentZero):
jnz L(CopyFrom1To32Bytes1)
jmp L(Unalign16Both)
-/* ------End of main part with loops--------------------- */
+/*------End of main part with loops---------------------*/
/* Case1 */
-# if (!defined USE_AS_STRNCPY)
+# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
.p2align 4
L(CopyFrom1To16Bytes):
add %rcx, %rdi
@@ -328,7 +335,7 @@ L(CopyFrom1To16Bytes):
# endif
.p2align 4
L(CopyFrom1To16BytesTail):
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub %rcx, %r8
# endif
add %rcx, %rsi
@@ -339,7 +346,7 @@ L(CopyFrom1To16BytesTail):
L(CopyFrom1To32Bytes1):
add $16, %rsi
add $16, %rdi
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $16, %r8
# endif
L(CopyFrom1To16BytesTail1):
@@ -348,7 +355,7 @@ L(CopyFrom1To16BytesTail1):
.p2align 4
L(CopyFrom1To32Bytes):
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub %rcx, %r8
# endif
bsf %rdx, %rdx
@@ -360,10 +367,10 @@ L(CopyFrom1To32Bytes):
.p2align 4
L(CopyFrom1To16BytesUnaligned_0):
bsf %rdx, %rdx
-# if defined USE_AS_STRNCPY
-# ifdef USE_AS_STPCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
+# ifdef USE_AS_STPCPY
lea (%rdi, %rdx), %rax
-# endif
+# endif
movdqu %xmm4, (%rdi)
add $63, %r8
sub %rdx, %r8
@@ -377,10 +384,10 @@ L(CopyFrom1To16BytesUnaligned_0):
L(CopyFrom1To16BytesUnaligned_16):
bsf %rcx, %rdx
movdqu %xmm4, (%rdi)
-# if defined USE_AS_STRNCPY
-# ifdef USE_AS_STPCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
+# ifdef USE_AS_STPCPY
lea 16(%rdi, %rdx), %rax
-# endif
+# endif
movdqu %xmm5, 16(%rdi)
add $47, %r8
sub %rdx, %r8
@@ -397,10 +404,10 @@ L(CopyFrom1To16BytesUnaligned_32):
bsf %rdx, %rdx
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
-# if defined USE_AS_STRNCPY
-# ifdef USE_AS_STPCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
+# ifdef USE_AS_STPCPY
lea 32(%rdi, %rdx), %rax
-# endif
+# endif
movdqu %xmm6, 32(%rdi)
add $31, %r8
sub %rdx, %r8
@@ -413,6 +420,7 @@ L(CopyFrom1To16BytesUnaligned_32):
# endif
# ifdef USE_AS_STRNCPY
+# ifndef USE_AS_STRCAT
.p2align 4
L(CopyFrom1To16BytesUnalignedXmm6):
movdqu %xmm6, (%rdi, %rcx)
@@ -437,6 +445,7 @@ L(CopyFrom1To16BytesUnalignedXmm3):
L(CopyFrom1To16BytesUnalignedXmm1):
movdqu %xmm1, (%rdi, %rcx)
jmp L(CopyFrom1To16BytesXmmExit)
+# endif
.p2align 4
L(CopyFrom1To16BytesExit):
@@ -519,7 +528,7 @@ L(CopyFrom1To16BytesTail1Case2OrCase3):
# endif
-/* ----End labels regarding with copying 1-16 bytes--and 1-32 bytes---- */
+/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
.p2align 4
L(Exit1):
@@ -527,7 +536,7 @@ L(Exit1):
# ifdef USE_AS_STPCPY
lea (%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $1, %r8
lea 1(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -541,7 +550,7 @@ L(Exit2):
# ifdef USE_AS_STPCPY
lea 1(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $2, %r8
lea 2(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -556,7 +565,7 @@ L(Exit3):
# ifdef USE_AS_STPCPY
lea 2(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $3, %r8
lea 3(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -570,7 +579,7 @@ L(Exit4):
# ifdef USE_AS_STPCPY
lea 3(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $4, %r8
lea 4(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -585,7 +594,7 @@ L(Exit5):
# ifdef USE_AS_STPCPY
lea 4(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $5, %r8
lea 5(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -601,7 +610,7 @@ L(Exit6):
# ifdef USE_AS_STPCPY
lea 5(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $6, %r8
lea 6(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -617,7 +626,7 @@ L(Exit7):
# ifdef USE_AS_STPCPY
lea 6(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $7, %r8
lea 7(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -631,7 +640,7 @@ L(Exit8):
# ifdef USE_AS_STPCPY
lea 7(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $8, %r8
lea 8(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -646,7 +655,7 @@ L(Exit9):
# ifdef USE_AS_STPCPY
lea 8(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $9, %r8
lea 9(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -662,7 +671,7 @@ L(Exit10):
# ifdef USE_AS_STPCPY
lea 9(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $10, %r8
lea 10(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -678,7 +687,7 @@ L(Exit11):
# ifdef USE_AS_STPCPY
lea 10(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $11, %r8
lea 11(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -694,7 +703,7 @@ L(Exit12):
# ifdef USE_AS_STPCPY
lea 11(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $12, %r8
lea 12(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -710,7 +719,7 @@ L(Exit13):
# ifdef USE_AS_STPCPY
lea 12(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $13, %r8
lea 13(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -726,7 +735,7 @@ L(Exit14):
# ifdef USE_AS_STPCPY
lea 13(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $14, %r8
lea 14(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -742,7 +751,7 @@ L(Exit15):
# ifdef USE_AS_STPCPY
lea 14(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $15, %r8
lea 15(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -756,7 +765,7 @@ L(Exit16):
# ifdef USE_AS_STPCPY
lea 15(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $16, %r8
lea 16(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -771,7 +780,7 @@ L(Exit17):
# ifdef USE_AS_STPCPY
lea 16(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $17, %r8
lea 17(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -787,7 +796,7 @@ L(Exit18):
# ifdef USE_AS_STPCPY
lea 17(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $18, %r8
lea 18(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -803,7 +812,7 @@ L(Exit19):
# ifdef USE_AS_STPCPY
lea 18(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $19, %r8
lea 19(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -819,7 +828,7 @@ L(Exit20):
# ifdef USE_AS_STPCPY
lea 19(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $20, %r8
lea 20(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -836,7 +845,7 @@ L(Exit21):
# ifdef USE_AS_STPCPY
lea 20(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $21, %r8
lea 21(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -852,7 +861,7 @@ L(Exit22):
# ifdef USE_AS_STPCPY
lea 21(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $22, %r8
lea 22(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -868,7 +877,7 @@ L(Exit23):
# ifdef USE_AS_STPCPY
lea 22(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $23, %r8
lea 23(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -884,7 +893,7 @@ L(Exit24):
# ifdef USE_AS_STPCPY
lea 23(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $24, %r8
lea 24(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -901,7 +910,7 @@ L(Exit25):
# ifdef USE_AS_STPCPY
lea 24(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $25, %r8
lea 25(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -919,7 +928,7 @@ L(Exit26):
# ifdef USE_AS_STPCPY
lea 25(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $26, %r8
lea 26(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -937,7 +946,7 @@ L(Exit27):
# ifdef USE_AS_STPCPY
lea 26(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $27, %r8
lea 27(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -955,7 +964,7 @@ L(Exit28):
# ifdef USE_AS_STPCPY
lea 27(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $28, %r8
lea 28(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -971,7 +980,7 @@ L(Exit29):
# ifdef USE_AS_STPCPY
lea 28(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $29, %r8
lea 29(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -987,7 +996,7 @@ L(Exit30):
# ifdef USE_AS_STPCPY
lea 29(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $30, %r8
lea 30(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -1003,7 +1012,7 @@ L(Exit31):
# ifdef USE_AS_STPCPY
lea 30(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $31, %r8
lea 31(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -1019,7 +1028,7 @@ L(Exit32):
# ifdef USE_AS_STPCPY
lea 31(%rdi), %rax
# endif
-# if defined USE_AS_STRNCPY
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $32, %r8
lea 32(%rdi), %rdi
jnz L(StrncpyFillTailWithZero)
@@ -1030,27 +1039,39 @@ L(Exit32):
.p2align 4
L(StrncpyExit0):
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
mov %rdi, %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, (%rdi)
+# endif
ret
.p2align 4
L(StrncpyExit1):
mov (%rsi), %dl
mov %dl, (%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 1(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 1(%rdi)
+# endif
ret
.p2align 4
L(StrncpyExit2):
mov (%rsi), %dx
mov %dx, (%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 2(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 2(%rdi)
+# endif
ret
.p2align 4
@@ -1059,18 +1080,26 @@ L(StrncpyExit3):
mov 2(%rsi), %dl
mov %cx, (%rdi)
mov %dl, 2(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 3(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 3(%rdi)
+# endif
ret
.p2align 4
L(StrncpyExit4):
mov (%rsi), %edx
mov %edx, (%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 4(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 4(%rdi)
+# endif
ret
.p2align 4
@@ -1079,9 +1108,13 @@ L(StrncpyExit5):
mov 4(%rsi), %dl
mov %ecx, (%rdi)
mov %dl, 4(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 5(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 5(%rdi)
+# endif
ret
.p2align 4
@@ -1090,9 +1123,13 @@ L(StrncpyExit6):
mov 4(%rsi), %dx
mov %ecx, (%rdi)
mov %dx, 4(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 6(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 6(%rdi)
+# endif
ret
.p2align 4
@@ -1101,18 +1138,26 @@ L(StrncpyExit7):
mov 3(%rsi), %edx
mov %ecx, (%rdi)
mov %edx, 3(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 7(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 7(%rdi)
+# endif
ret
.p2align 4
L(StrncpyExit8):
mov (%rsi), %rdx
mov %rdx, (%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 8(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 8(%rdi)
+# endif
ret
.p2align 4
@@ -1121,9 +1166,13 @@ L(StrncpyExit9):
mov 8(%rsi), %dl
mov %rcx, (%rdi)
mov %dl, 8(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 9(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 9(%rdi)
+# endif
ret
.p2align 4
@@ -1132,9 +1181,13 @@ L(StrncpyExit10):
mov 8(%rsi), %dx
mov %rcx, (%rdi)
mov %dx, 8(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 10(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 10(%rdi)
+# endif
ret
.p2align 4
@@ -1143,9 +1196,13 @@ L(StrncpyExit11):
mov 7(%rsi), %edx
mov %rcx, (%rdi)
mov %edx, 7(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 11(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 11(%rdi)
+# endif
ret
.p2align 4
@@ -1154,9 +1211,13 @@ L(StrncpyExit12):
mov 8(%rsi), %edx
mov %rcx, (%rdi)
mov %edx, 8(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 12(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 12(%rdi)
+# endif
ret
.p2align 4
@@ -1165,9 +1226,13 @@ L(StrncpyExit13):
mov 5(%rsi), %rdx
mov %rcx, (%rdi)
mov %rdx, 5(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 13(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 13(%rdi)
+# endif
ret
.p2align 4
@@ -1176,9 +1241,13 @@ L(StrncpyExit14):
mov 6(%rsi), %rdx
mov %rcx, (%rdi)
mov %rdx, 6(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 14(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 14(%rdi)
+# endif
ret
.p2align 4
@@ -1187,18 +1256,26 @@ L(StrncpyExit15):
mov 7(%rsi), %rdx
mov %rcx, (%rdi)
mov %rdx, 7(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 15(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 15(%rdi)
+# endif
ret
.p2align 4
L(StrncpyExit16):
movdqu (%rsi), %xmm0
movdqu %xmm0, (%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 16(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 16(%rdi)
+# endif
ret
.p2align 4
@@ -1207,9 +1284,13 @@ L(StrncpyExit17):
mov 16(%rsi), %cl
movdqu %xmm0, (%rdi)
mov %cl, 16(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 17(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 17(%rdi)
+# endif
ret
.p2align 4
@@ -1218,9 +1299,13 @@ L(StrncpyExit18):
mov 16(%rsi), %cx
movdqu %xmm0, (%rdi)
mov %cx, 16(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 18(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 18(%rdi)
+# endif
ret
.p2align 4
@@ -1229,9 +1314,13 @@ L(StrncpyExit19):
mov 15(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %ecx, 15(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 19(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 19(%rdi)
+# endif
ret
.p2align 4
@@ -1240,9 +1329,13 @@ L(StrncpyExit20):
mov 16(%rsi), %ecx
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 20(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 20(%rdi)
+# endif
ret
.p2align 4
@@ -1253,9 +1346,13 @@ L(StrncpyExit21):
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
mov %dl, 20(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 21(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 21(%rdi)
+# endif
ret
.p2align 4
@@ -1264,9 +1361,13 @@ L(StrncpyExit22):
mov 14(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 14(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 22(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 22(%rdi)
+# endif
ret
.p2align 4
@@ -1275,9 +1376,13 @@ L(StrncpyExit23):
mov 15(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 15(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 23(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 23(%rdi)
+# endif
ret
.p2align 4
@@ -1286,9 +1391,13 @@ L(StrncpyExit24):
mov 16(%rsi), %rcx
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 24(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 24(%rdi)
+# endif
ret
.p2align 4
@@ -1299,9 +1408,13 @@ L(StrncpyExit25):
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %cl, 24(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 25(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 25(%rdi)
+# endif
ret
.p2align 4
@@ -1312,9 +1425,13 @@ L(StrncpyExit26):
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %cx, 24(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 26(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 26(%rdi)
+# endif
ret
.p2align 4
@@ -1325,9 +1442,13 @@ L(StrncpyExit27):
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 23(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 27(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 27(%rdi)
+# endif
ret
.p2align 4
@@ -1338,9 +1459,13 @@ L(StrncpyExit28):
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 24(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 28(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 28(%rdi)
+# endif
ret
.p2align 4
@@ -1349,9 +1474,13 @@ L(StrncpyExit29):
movdqu 13(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 13(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 29(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 29(%rdi)
+# endif
ret
.p2align 4
@@ -1360,9 +1489,13 @@ L(StrncpyExit30):
movdqu 14(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 14(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 30(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 30(%rdi)
+# endif
ret
.p2align 4
@@ -1371,9 +1504,13 @@ L(StrncpyExit31):
movdqu 15(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 15(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 31(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 31(%rdi)
+# endif
ret
.p2align 4
@@ -1382,9 +1519,13 @@ L(StrncpyExit32):
movdqu 16(%rsi), %xmm2
movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 32(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 32(%rdi)
+# endif
ret
.p2align 4
@@ -1395,8 +1536,14 @@ L(StrncpyExit33):
movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi)
mov %cl, 32(%rdi)
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 33(%rdi)
+# endif
ret
+# ifndef USE_AS_STRCAT
+
.p2align 4
L(Fill0):
ret
@@ -1498,9 +1645,9 @@ L(CopyFrom1To16BytesXmmExit):
bsf %rdx, %rdx
add $15, %r8
add %rcx, %rdi
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea (%rdi, %rdx), %rax
-# endif
+# endif
sub %rdx, %r8
lea 1(%rdi, %rdx), %rdi
@@ -1553,6 +1700,9 @@ L(StrncpyFillExit):
add $16, %r8
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
+/* end of ifndef USE_AS_STRCAT */
+# endif
+
.p2align 4
L(UnalignedLeaveCase2OrCase3):
test %rdx, %rdx
@@ -1572,9 +1722,13 @@ L(Unaligned64LeaveCase3):
sub $16, %r8
jb L(CopyFrom1To16BytesCase3)
movdqu %xmm7, 48(%rdi)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 64(%rdi), %rax
-# endif
+# endif
+# ifdef USE_AS_STRCAT
+ xor %ch, %ch
+ movb %ch, 64(%rdi)
+# endif
ret
.p2align 4
@@ -1585,8 +1739,11 @@ L(Unaligned64LeaveCase2):
add $48, %r8
jle L(CopyFrom1To16BytesCase2OrCase3)
test %rdx, %rdx
+# ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm4)
-
+# else
+ jnz L(CopyFrom1To16Bytes)
+# endif
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %rdx
movdqu %xmm4, (%rdi)
@@ -1594,7 +1751,11 @@ L(Unaligned64LeaveCase2):
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rdx, %rdx
+# ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm5)
+# else
+ jnz L(CopyFrom1To16Bytes)
+# endif
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %rdx
@@ -1603,7 +1764,11 @@ L(Unaligned64LeaveCase2):
sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rdx, %rdx
+# ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm6)
+# else
+ jnz L(CopyFrom1To16Bytes)
+# endif
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %rdx
@@ -1617,13 +1782,18 @@ L(Unaligned64LeaveCase2):
.p2align 4
L(ExitZero):
+# ifndef USE_AS_STRCAT
mov %rdi, %rax
+# endif
ret
# endif
+# ifndef USE_AS_STRCAT
END (STRCPY)
-
+# else
+END (STRCAT)
+# endif
.p2align 4
.section .rodata
L(ExitTable):
@@ -1695,6 +1865,7 @@ L(ExitStrncpyTable):
.int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
+# ifndef USE_AS_STRCAT
.p2align 4
L(FillTable):
.int JMPTBL(L(Fill0), L(FillTable))
@@ -1714,5 +1885,7 @@ L(FillTable):
.int JMPTBL(L(Fill14), L(FillTable))
.int JMPTBL(L(Fill15), L(FillTable))
.int JMPTBL(L(Fill16), L(FillTable))
+# endif
# endif
#endif
+
diff --git a/sysdeps/x86_64/multiarch/strcpy-ssse3.S b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
index efbd3bf..05faf0d 100644
--- a/sysdeps/x86_64/multiarch/strcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
@@ -20,25 +20,26 @@
#ifndef NOT_IN_libc
-# include <sysdep.h>
+# ifndef USE_AS_STRCAT
+# include <sysdep.h>
-# ifndef STRCPY
-# define STRCPY __strcpy_ssse3
-# endif
+# ifndef STRCPY
+# define STRCPY __strcpy_ssse3
+# endif
.section .text.ssse3,"ax",@progbits
ENTRY (STRCPY)
mov %rsi, %rcx
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
mov %rdx, %r8
-# endif
+# endif
mov %rdi, %rdx
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
test %r8, %r8
jz L(Exit0)
cmp $8, %r8
jbe L(StrncpyExit8Bytes)
-# endif
+# endif
cmpb $0, (%rcx)
jz L(Exit1)
cmpb $0, 1(%rcx)
@@ -55,10 +56,10 @@ ENTRY (STRCPY)
jz L(Exit7)
cmpb $0, 7(%rcx)
jz L(Exit8)
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
cmp $16, %r8
jb L(StrncpyExit15Bytes)
-# endif
+# endif
cmpb $0, 8(%rcx)
jz L(Exit9)
cmpb $0, 9(%rcx)
@@ -73,12 +74,13 @@ ENTRY (STRCPY)
jz L(Exit14)
cmpb $0, 14(%rcx)
jz L(Exit15)
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
cmp $16, %r8
je L(Exit16)
-# endif
+# endif
cmpb $0, 15(%rcx)
jz L(Exit16)
+# endif
# ifdef USE_AS_STRNCPY
mov %rcx, %rsi
@@ -2180,12 +2182,12 @@ L(Shl15LoopExit):
jmp L(CopyFrom1To16Bytes)
# endif
-
+# ifndef USE_AS_STRCAT
.p2align 4
L(CopyFrom1To16Bytes):
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
add $16, %r8
-# endif
+# endif
add %rsi, %rdx
add %rsi, %rcx
@@ -2210,20 +2212,20 @@ L(CopyFrom1To16Bytes):
L(Exit8):
mov (%rcx), %rax
mov %rax, (%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 7(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $8, %r8
lea 8(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2249,23 +2251,23 @@ L(Exit16):
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 15(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $16, %r8
lea 16(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
.p2align 4
L(CopyFrom1To16BytesCase2):
@@ -2381,46 +2383,46 @@ L(Less12Case3): /* but more than 8 */
jl L(Exit9)
je L(Exit10)
jg L(Exit11)
-# endif
+# endif
.p2align 4
L(Exit1):
movb (%rcx), %al
movb %al, (%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea (%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $1, %r8
lea 1(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
L(Exit2):
movw (%rcx), %ax
movw %ax, (%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 1(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $2, %r8
lea 2(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2429,40 +2431,40 @@ L(Exit3):
movw %ax, (%rdx)
movb 2(%rcx), %al
movb %al, 2(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 2(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $3, %r8
lea 3(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
L(Exit4):
movl (%rcx), %eax
movl %eax, (%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 3(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $4, %r8
lea 4(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2471,20 +2473,20 @@ L(Exit5):
movl %eax, (%rdx)
movb 4(%rcx), %al
movb %al, 4(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 4(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $5, %r8
lea 5(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2493,20 +2495,20 @@ L(Exit6):
movl %eax, (%rdx)
movw 4(%rcx), %ax
movw %ax, 4(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 5(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $6, %r8
lea 6(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2515,20 +2517,20 @@ L(Exit7):
movl %eax, (%rdx)
movl 3(%rcx), %eax
movl %eax, 3(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 6(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $7, %r8
lea 7(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2537,20 +2539,20 @@ L(Exit9):
mov %rax, (%rdx)
mov 5(%rcx), %eax
mov %eax, 5(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 8(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $9, %r8
lea 9(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2559,20 +2561,20 @@ L(Exit10):
mov %rax, (%rdx)
mov 6(%rcx), %eax
mov %eax, 6(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 9(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $10, %r8
lea 10(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2581,20 +2583,20 @@ L(Exit11):
mov %rax, (%rdx)
mov 7(%rcx), %eax
mov %eax, 7(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 10(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $11, %r8
lea 11(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2603,20 +2605,20 @@ L(Exit12):
mov %rax, (%rdx)
mov 8(%rcx), %eax
mov %eax, 8(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 11(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $12, %r8
lea 12(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2625,20 +2627,20 @@ L(Exit13):
mov %rax, (%rdx)
mov 5(%rcx), %rax
mov %rax, 5(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 12(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $13, %r8
lea 13(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2647,20 +2649,20 @@ L(Exit14):
mov %rax, (%rdx)
mov 6(%rcx), %rax
mov %rax, 6(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 13(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $14, %r8
lea 14(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
.p2align 4
@@ -2669,23 +2671,23 @@ L(Exit15):
mov %rax, (%rdx)
mov 7(%rcx), %rax
mov %rax, 7(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 14(%rdx), %rax
-# else
+# else
mov %rdi, %rax
-# endif
-# ifdef USE_AS_STRNCPY
+# endif
+# ifdef USE_AS_STRNCPY
sub $15, %r8
lea 15(%rdx), %rcx
jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%rax)
sbb $-1, %rax
-# endif
-# endif
+# endif
+# endif
ret
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
.p2align 4
L(Fill0):
ret
@@ -2902,13 +2904,13 @@ L(StrncpyExit15Bytes):
mov %rax, (%rdx)
mov 7(%rcx), %rax
mov %rax, 7(%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 14(%rdx), %rax
cmpb $1, (%rax)
sbb $-1, %rax
-# else
+# else
mov %rdi, %rax
-# endif
+# endif
ret
.p2align 4
@@ -2943,15 +2945,17 @@ L(StrncpyExit8Bytes):
jz L(Exit7)
mov (%rcx), %rax
mov %rax, (%rdx)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
lea 7(%rdx), %rax
cmpb $1, (%rax)
sbb $-1, %rax
-# else
+# else
mov %rdi, %rax
-# endif
+# endif
ret
+# endif
+
# endif
# ifdef USE_AS_STRNCPY
@@ -3715,7 +3719,7 @@ L(StrncpyExit15):
lea 1(%rsi), %rsi
jmp L(CopyFrom1To16BytesCase3)
# endif
-
+# ifndef USE_AS_STRCAT
END (STRCPY)
-
+# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/strlen-no-bsf.S b/sysdeps/x86_64/multiarch/strlen-no-bsf.S
index 3e52f81..c730e0a 100644
--- a/sysdeps/x86_64/multiarch/strlen-no-bsf.S
+++ b/sysdeps/x86_64/multiarch/strlen-no-bsf.S
@@ -1,5 +1,5 @@
-/* strlen without BSF
- Copyright (C) 2010 Free Software Foundation, Inc.
+/* strlen SSE2 without bsf
+ Copyright (C) 2010, 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -18,12 +18,17 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-#if defined SHARED && !defined NOT_IN_libc
+#if (defined SHARED || defined USE_AS_STRCAT) && !defined NOT_IN_libc
-#include <sysdep.h>
+# ifndef USE_AS_STRCAT
- .section .text.slow,"ax",@progbits
+# include <sysdep.h>
+
+# define RETURN ret
+
+ .section .text.sse2,"ax",@progbits
ENTRY (__strlen_no_bsf)
+# endif
xor %eax, %eax
cmpb $0, (%rdi)
jz L(exit_tail0)
@@ -165,39 +170,37 @@ ENTRY (__strlen_no_bsf)
jnz L(exit)
and $-0x40, %rax
- xor %r8d, %r8d
L(aligned_64):
pcmpeqb (%rax), %xmm0
pcmpeqb 16(%rax), %xmm1
pcmpeqb 32(%rax), %xmm2
pcmpeqb 48(%rax), %xmm3
pmovmskb %xmm0, %edx
- pmovmskb %xmm1, %esi
- pmovmskb %xmm2, %edi
+ pmovmskb %xmm1, %r11d
+ pmovmskb %xmm2, %r10d
pmovmskb %xmm3, %r9d
- or %edx, %r8d
- or %esi, %r8d
- or %edi, %r8d
- or %r9d, %r8d
+ or %edx, %r9d
+ or %r11d, %r9d
+ or %r10d, %r9d
lea 64(%rax), %rax
jz L(aligned_64)
test %edx, %edx
jnz L(aligned_64_exit_16)
- test %esi, %esi
+ test %r11d, %r11d
jnz L(aligned_64_exit_32)
- test %edi, %edi
+ test %r10d, %r10d
jnz L(aligned_64_exit_48)
L(aligned_64_exit_64):
- mov %r9d, %edx
+ pmovmskb %xmm3, %edx
jmp L(aligned_64_exit)
L(aligned_64_exit_48):
lea -16(%rax), %rax
- mov %edi, %edx
+ mov %r10d, %edx
jmp L(aligned_64_exit)
L(aligned_64_exit_32):
lea -32(%rax), %rax
- mov %esi, %edx
+ mov %r11d, %edx
jmp L(aligned_64_exit)
L(aligned_64_exit_16):
lea -48(%rax), %rax
@@ -228,7 +231,7 @@ L(exit):
jnz L(exit_tail6)
add $7, %eax
L(exit_tail0):
- ret
+ RETURN
L(exit_high):
add $8, %eax
@@ -253,57 +256,58 @@ L(exit_high):
test $0x40, %dh
jnz L(exit_tail6)
add $7, %eax
- ret
+ RETURN
.p2align 4
L(exit_tail1):
add $1, %eax
- ret
+ RETURN
L(exit_tail2):
add $2, %eax
- ret
+ RETURN
L(exit_tail3):
add $3, %eax
- ret
+ RETURN
L(exit_tail4):
add $4, %eax
- ret
+ RETURN
L(exit_tail5):
add $5, %eax
- ret
+ RETURN
L(exit_tail6):
add $6, %eax
- ret
+ RETURN
L(exit_tail7):
add $7, %eax
- ret
+ RETURN
L(exit_tail8):
add $8, %eax
- ret
+ RETURN
L(exit_tail9):
add $9, %eax
- ret
+ RETURN
L(exit_tail10):
add $10, %eax
- ret
+ RETURN
L(exit_tail11):
add $11, %eax
- ret
+ RETURN
L(exit_tail12):
add $12, %eax
- ret
+ RETURN
L(exit_tail13):
add $13, %eax
- ret
+ RETURN
L(exit_tail14):
add $14, %eax
- ret
+ RETURN
L(exit_tail15):
add $15, %eax
- ret
+# ifndef USE_AS_STRCAT
+ RETURN
END (__strlen_no_bsf)
-
+# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S b/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S
new file mode 100644
index 0000000..57778cf
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S
@@ -0,0 +1,260 @@
+/* strlen SSE2
+ Copyright (C) 2011 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#if !defined NOT_IN_libc && (defined SHARED || defined USE_AS_STRCAT)
+
+# ifndef USE_AS_STRCAT
+
+# include <sysdep.h>
+
+# define RETURN ret
+
+ .section .text.sse2,"ax",@progbits
+ENTRY (__strlen_sse2_pminub)
+
+# endif
+ xor %rax, %rax
+ mov %edi, %ecx
+ and $0x3f, %ecx
+ pxor %xmm0, %xmm0
+ cmp $0x30, %ecx
+ ja L(next)
+ movdqu (%rdi), %xmm1
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit_less16)
+ mov %rdi, %rax
+ and $-16, %rax
+ jmp L(align16_start)
+L(next):
+ mov %rdi, %rax
+ and $-16, %rax
+ pcmpeqb (%rax), %xmm0
+ mov $-1, %r10d
+ sub %rax, %rcx
+ shl %cl, %r10d
+ pmovmskb %xmm0, %edx
+ and %r10d, %edx
+ jnz L(exit)
+L(align16_start):
+ pxor %xmm0, %xmm0
+ pxor %xmm1, %xmm1
+ pxor %xmm2, %xmm2
+ pxor %xmm3, %xmm3
+ pcmpeqb 16(%rax), %xmm0
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16)
+
+ pcmpeqb 32(%rax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32)
+
+ pcmpeqb 48(%rax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48)
+
+ pcmpeqb 64(%rax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ jnz L(exit64)
+
+ pcmpeqb 80(%rax), %xmm0
+ add $64, %rax
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16)
+
+ pcmpeqb 32(%rax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32)
+
+ pcmpeqb 48(%rax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48)
+
+ pcmpeqb 64(%rax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ jnz L(exit64)
+
+ pcmpeqb 80(%rax), %xmm0
+ add $64, %rax
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16)
+
+ pcmpeqb 32(%rax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32)
+
+ pcmpeqb 48(%rax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48)
+
+ pcmpeqb 64(%rax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ jnz L(exit64)
+
+ pcmpeqb 80(%rax), %xmm0
+ add $64, %rax
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16)
+
+ pcmpeqb 32(%rax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32)
+
+ pcmpeqb 48(%rax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48)
+
+ pcmpeqb 64(%rax), %xmm3
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ jnz L(exit64)
+
+
+ test $0x3f, %rax
+ jz L(align64_loop)
+
+ pcmpeqb 80(%rax), %xmm0
+ add $80, %rax
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit)
+
+ test $0x3f, %rax
+ jz L(align64_loop)
+
+ pcmpeqb 16(%rax), %xmm1
+ add $16, %rax
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit)
+
+ test $0x3f, %rax
+ jz L(align64_loop)
+
+ pcmpeqb 16(%rax), %xmm2
+ add $16, %rax
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit)
+
+ test $0x3f, %rax
+ jz L(align64_loop)
+
+ pcmpeqb 16(%rax), %xmm3
+ add $16, %rax
+ pmovmskb %xmm3, %edx
+ test %edx, %edx
+ jnz L(exit)
+
+ add $16, %rax
+ .p2align 4
+ L(align64_loop):
+ movaps (%rax), %xmm4
+ pminub 16(%rax), %xmm4
+ movaps 32(%rax), %xmm5
+ pminub 48(%rax), %xmm5
+ add $64, %rax
+ pminub %xmm4, %xmm5
+ pcmpeqb %xmm0, %xmm5
+ pmovmskb %xmm5, %edx
+ test %edx, %edx
+ jz L(align64_loop)
+
+
+ pcmpeqb -64(%rax), %xmm0
+ sub $80, %rax
+ pmovmskb %xmm0, %edx
+ test %edx, %edx
+ jnz L(exit16)
+
+ pcmpeqb 32(%rax), %xmm1
+ pmovmskb %xmm1, %edx
+ test %edx, %edx
+ jnz L(exit32)
+
+ pcmpeqb 48(%rax), %xmm2
+ pmovmskb %xmm2, %edx
+ test %edx, %edx
+ jnz L(exit48)
+
+ pcmpeqb 64(%rax), %xmm3
+ pmovmskb %xmm3, %edx
+ sub %rdi, %rax
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ add $64, %rax
+ RETURN
+
+ .p2align 4
+L(exit):
+ sub %rdi, %rax
+L(exit_less16):
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ RETURN
+ .p2align 4
+L(exit16):
+ sub %rdi, %rax
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ add $16, %rax
+ RETURN
+ .p2align 4
+L(exit32):
+ sub %rdi, %rax
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ add $32, %rax
+ RETURN
+ .p2align 4
+L(exit48):
+ sub %rdi, %rax
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ add $48, %rax
+ RETURN
+ .p2align 4
+L(exit64):
+ sub %rdi, %rax
+ bsf %rdx, %rdx
+ add %rdx, %rax
+ add $64, %rax
+# ifndef USE_AS_STRCAT
+ RETURN
+
+END (__strlen_sse2_pminub)
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strlen.S b/sysdeps/x86_64/multiarch/strlen.S
index 83a88ec..d789707 100644
--- a/sysdeps/x86_64/multiarch/strlen.S
+++ b/sysdeps/x86_64/multiarch/strlen.S
@@ -32,7 +32,10 @@ ENTRY(strlen)
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
-1: leaq __strlen_sse2(%rip), %rax
+1: leaq __strlen_sse2_pminub(%rip), %rax
+ testl $bit_Prefer_PMINUB_for_stringop, __cpu_features+FEATURE_OFFSET+index_Prefer_PMINUB_for_stringop(%rip)
+ jnz 2f
+ leaq __strlen_sse2(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f
leaq __strlen_sse42(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
new file mode 100644
index 0000000..a3cdbff
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncat-c.c
@@ -0,0 +1,8 @@
+#define STRNCAT __strncat_sse2
+#ifdef SHARED
+#undef libc_hidden_def
+#define libc_hidden_def(name) \
+ __hidden_ver1 (__strncat_sse2, __GI___strncat, __strncat_sse2);
+#endif
+
+#include "string/strncat.c"
diff --git a/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
new file mode 100644
index 0000000..133e1d2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
@@ -0,0 +1,3 @@
+#define USE_AS_STRNCAT
+#define STRCAT __strncat_sse2_unaligned
+#include "strcat-sse2-unaligned.S"
diff --git a/sysdeps/x86_64/multiarch/strncat-ssse3.S b/sysdeps/x86_64/multiarch/strncat-ssse3.S
new file mode 100644
index 0000000..6c45ff3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncat-ssse3.S
@@ -0,0 +1,3 @@
+#define USE_AS_STRNCAT
+#define STRCAT __strncat_ssse3
+#include "strcat-ssse3.S"
diff --git a/sysdeps/x86_64/multiarch/strncat.S b/sysdeps/x86_64/multiarch/strncat.S
new file mode 100644
index 0000000..fd569c2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncat.S
@@ -0,0 +1,3 @@
+#define STRCAT strncat
+#define USE_AS_STRNCAT
+#include "strcat.S"
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 29 ++
NEWS | 5 +-
string/strncat.c | 6 +-
sysdeps/x86_64/multiarch/Makefile | 6 +-
sysdeps/x86_64/multiarch/init-arch.c | 10 +-
sysdeps/x86_64/multiarch/init-arch.h | 2 +
sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S | 54 ++
sysdeps/x86_64/multiarch/strcat-ssse3.S | 558 +++++++++++++++++++++
sysdeps/x86_64/multiarch/strcat.S | 85 ++++
sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S | 450 ++++++++++++-----
sysdeps/x86_64/multiarch/strcpy-ssse3.S | 280 ++++++-----
sysdeps/x86_64/multiarch/strlen-no-bsf.S | 74 ++--
sysdeps/x86_64/multiarch/strlen-sse2-pminub.S | 260 ++++++++++
sysdeps/x86_64/multiarch/strlen.S | 5 +-
sysdeps/x86_64/multiarch/strncat-c.c | 8 +
sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S | 3 +
sysdeps/x86_64/multiarch/strncat-ssse3.S | 3 +
sysdeps/x86_64/multiarch/strncat.S | 3 +
18 files changed, 1520 insertions(+), 321 deletions(-)
create mode 100644 sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
create mode 100644 sysdeps/x86_64/multiarch/strcat-ssse3.S
create mode 100644 sysdeps/x86_64/multiarch/strcat.S
create mode 100644 sysdeps/x86_64/multiarch/strlen-sse2-pminub.S
create mode 100644 sysdeps/x86_64/multiarch/strncat-c.c
create mode 100644 sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S
create mode 100644 sysdeps/x86_64/multiarch/strncat-ssse3.S
create mode 100644 sysdeps/x86_64/multiarch/strncat.S
hooks/post-receive
--
GNU C Library master sources