This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch, master, updated. glibc-2.14-447-g618280a
- From: drepper at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 25 Oct 2011 18:50:57 -0000
- Subject: GNU C Library master sources branch, master, updated. glibc-2.14-447-g618280a
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 618280a192aed70b47d6b2deb2a81c6359b9a92b (commit)
from 32d2a6ec31c5b9a02c339530df4a1b6728514868 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=618280a192aed70b47d6b2deb2a81c6359b9a92b
commit 618280a192aed70b47d6b2deb2a81c6359b9a92b
Author: Ulrich Drepper <drepper@gmail.com>
Date: Tue Oct 25 14:50:31 2011 -0400
Optimize x86-64 SSE4.2+ strcmp a bit more
diff --git a/ChangeLog b/ChangeLog
index ef63939..b4f22bd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2011-10-25 Ulrich Drepper <drepper@gmail.com>
+
+ * sysdeps/x86_64/multiarch/strcmp-sse42.S: Move common code to earlier
+ place. Use VEX encoding when compiling for AVX.
+
2011-10-25 Andreas Schwab <schwab@redhat.com>
* wcsmbs/wcscmp.c (WCSCMP): Compare as wchar_t, not wint_t.
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index c9e03b9..b93eda1 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -72,6 +72,23 @@ END (GLABEL(__strncasecmp))
/* FALLTHROUGH to strncasecmp_l. */
#endif
+
+#ifdef USE_AVX
+# define movdqa vmovdqa
+# define movdqu vmovdqu
+# define pmovmskb vpmovmskb
+# define pcmpistri vpcmpistri
+# define psubb vpsubb
+# define pcmpeqb vpcmpeqb
+# define psrldq vpsrldq
+# define pslldq vpslldq
+# define palignr vpalignr
+# define pxor vpxor
+# define D(arg) arg, arg
+#else
+# define D(arg) arg
+#endif
+
STRCMP_SSE42:
cfi_startproc
CALL_MCOUNT
@@ -179,10 +196,10 @@ LABEL(touppermask):
#else
# define TOLOWER(reg1, reg2)
#endif
- pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
- pcmpeqb %xmm1, %xmm0 /* Any null chars? */
- pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char checks */
+ pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
+ pcmpeqb %xmm2, D(%xmm1) /* compare first 16 bytes for equality */
+ psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
jnz LABEL(less16bytes)/* If not, find different value or null char */
@@ -206,6 +223,7 @@ LABEL(crosscache):
xor %r8d, %r8d
and $0xf, %ecx /* offset of rsi */
and $0xf, %eax /* offset of rdi */
+ pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char check */
cmp %eax, %ecx
je LABEL(ashr_0) /* rsi and rdi relative offset same */
ja LABEL(bigger)
@@ -213,10 +231,13 @@ LABEL(crosscache):
xchg %ecx, %eax
xchg %rsi, %rdi
LABEL(bigger):
+ movdqa (%rdi), %xmm2
+ movdqa (%rsi), %xmm1
lea 15(%rax), %r9
sub %rcx, %r9
lea LABEL(unaligned_table)(%rip), %r10
movslq (%r10, %r9,4), %r9
+ pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
lea (%r10, %r9), %r10
jmp *%r10 /* jump to corresponding case */
@@ -229,16 +250,15 @@ LABEL(bigger):
LABEL(ashr_0):
movdqa (%rsi), %xmm1
- pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
- pcmpeqb %xmm1, %xmm0 /* Any null chars? */
+ pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
- pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
+ pcmpeqb (%rdi), D(%xmm1) /* compare 16 bytes for equality */
#else
movdqa (%rdi), %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
+ pcmpeqb %xmm2, D(%xmm1) /* compare 16 bytes for equality */
#endif
- psubb %xmm0, %xmm1 /* packed sub of comparison results*/
+ psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/
pmovmskb %xmm1, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
shr %cl, %r9d /* adjust for 16-byte offset */
@@ -251,7 +271,6 @@ LABEL(ashr_0):
UPDATE_STRNCMP_COUNTER
mov $16, %rcx
mov $16, %r9
- pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
/*
* Now both strings are aligned at 16-byte boundary. Loop over strings
@@ -319,14 +338,10 @@ LABEL(ashr_0_exit_use):
*/
.p2align 4
LABEL(ashr_1):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0 /* Any null chars? */
- pslldq $15, %xmm2 /* shift first string to align with second */
+ pslldq $15, D(%xmm2) /* shift first string to align with second */
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
- psubb %xmm0, %xmm2 /* packed sub of comparison results*/
+ pcmpeqb %xmm1, D(%xmm2) /* compare 16 bytes for equality */
+ psubb %xmm0, D(%xmm2) /* packed sub of comparison results*/
pmovmskb %xmm2, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
shr %cl, %r9d /* adjust for 16-byte offset */
@@ -335,7 +350,6 @@ LABEL(ashr_1):
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads*/
mov $1, %r9d /* byte position left over from less32bytes case */
/*
@@ -355,7 +369,7 @@ LABEL(loop_ashr_1_use):
LABEL(nibble_ashr_1_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $1, -16(%rdi, %rdx), %xmm0
+ palignr $1, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -374,7 +388,7 @@ LABEL(nibble_ashr_1_restart_use):
jg LABEL(nibble_ashr_1_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $1, -16(%rdi, %rdx), %xmm0
+ palignr $1, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -394,7 +408,7 @@ LABEL(nibble_ashr_1_restart_use):
LABEL(nibble_ashr_1_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $1, %xmm0
+ psrldq $1, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -412,14 +426,10 @@ LABEL(nibble_ashr_1_use):
*/
.p2align 4
LABEL(ashr_2):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $14, %xmm2
+ pslldq $14, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -428,7 +438,6 @@ LABEL(ashr_2):
movdqa (%rdi), %xmm3
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $2, %r9d /* byte position left over from less32bytes case */
/*
@@ -448,7 +457,7 @@ LABEL(loop_ashr_2_use):
LABEL(nibble_ashr_2_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $2, -16(%rdi, %rdx), %xmm0
+ palignr $2, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -467,7 +476,7 @@ LABEL(nibble_ashr_2_restart_use):
jg LABEL(nibble_ashr_2_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $2, -16(%rdi, %rdx), %xmm0
+ palignr $2, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -487,7 +496,7 @@ LABEL(nibble_ashr_2_restart_use):
LABEL(nibble_ashr_2_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $2, %xmm0
+ psrldq $2, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -505,14 +514,10 @@ LABEL(nibble_ashr_2_use):
*/
.p2align 4
LABEL(ashr_3):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $13, %xmm2
+ pslldq $13, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -522,7 +527,6 @@ LABEL(ashr_3):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $3, %r9d /* byte position left over from less32bytes case */
/*
@@ -541,7 +545,7 @@ LABEL(loop_ashr_3_use):
LABEL(nibble_ashr_3_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $3, -16(%rdi, %rdx), %xmm0
+ palignr $3, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -560,7 +564,7 @@ LABEL(nibble_ashr_3_restart_use):
jg LABEL(nibble_ashr_3_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $3, -16(%rdi, %rdx), %xmm0
+ palignr $3, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -580,7 +584,7 @@ LABEL(nibble_ashr_3_restart_use):
LABEL(nibble_ashr_3_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $3, %xmm0
+ psrldq $3, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -598,14 +602,10 @@ LABEL(nibble_ashr_3_use):
*/
.p2align 4
LABEL(ashr_4):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $12, %xmm2
+ pslldq $12, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -615,7 +615,6 @@ LABEL(ashr_4):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $4, %r9d /* byte position left over from less32bytes case */
/*
@@ -635,7 +634,7 @@ LABEL(loop_ashr_4_use):
LABEL(nibble_ashr_4_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $4, -16(%rdi, %rdx), %xmm0
+ palignr $4, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -654,7 +653,7 @@ LABEL(nibble_ashr_4_restart_use):
jg LABEL(nibble_ashr_4_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $4, -16(%rdi, %rdx), %xmm0
+ palignr $4, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -674,7 +673,7 @@ LABEL(nibble_ashr_4_restart_use):
LABEL(nibble_ashr_4_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $4, %xmm0
+ psrldq $4, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -692,14 +691,10 @@ LABEL(nibble_ashr_4_use):
*/
.p2align 4
LABEL(ashr_5):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $11, %xmm2
+ pslldq $11, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -709,7 +704,6 @@ LABEL(ashr_5):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $5, %r9d /* byte position left over from less32bytes case */
/*
@@ -729,7 +723,7 @@ LABEL(loop_ashr_5_use):
LABEL(nibble_ashr_5_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $5, -16(%rdi, %rdx), %xmm0
+ palignr $5, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -749,7 +743,7 @@ LABEL(nibble_ashr_5_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $5, -16(%rdi, %rdx), %xmm0
+ palignr $5, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -769,7 +763,7 @@ LABEL(nibble_ashr_5_restart_use):
LABEL(nibble_ashr_5_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $5, %xmm0
+ psrldq $5, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -787,14 +781,10 @@ LABEL(nibble_ashr_5_use):
*/
.p2align 4
LABEL(ashr_6):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $10, %xmm2
+ pslldq $10, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -804,7 +794,6 @@ LABEL(ashr_6):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $6, %r9d /* byte position left over from less32bytes case */
/*
@@ -824,7 +813,7 @@ LABEL(loop_ashr_6_use):
LABEL(nibble_ashr_6_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $6, -16(%rdi, %rdx), %xmm0
+ palignr $6, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -843,7 +832,7 @@ LABEL(nibble_ashr_6_restart_use):
jg LABEL(nibble_ashr_6_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $6, -16(%rdi, %rdx), %xmm0
+ palignr $6, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -863,7 +852,7 @@ LABEL(nibble_ashr_6_restart_use):
LABEL(nibble_ashr_6_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $6, %xmm0
+ psrldq $6, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -881,14 +870,10 @@ LABEL(nibble_ashr_6_use):
*/
.p2align 4
LABEL(ashr_7):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $9, %xmm2
+ pslldq $9, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -898,7 +883,6 @@ LABEL(ashr_7):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $7, %r9d /* byte position left over from less32bytes case */
/*
@@ -918,7 +902,7 @@ LABEL(loop_ashr_7_use):
LABEL(nibble_ashr_7_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $7, -16(%rdi, %rdx), %xmm0
+ palignr $7, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -937,7 +921,7 @@ LABEL(nibble_ashr_7_restart_use):
jg LABEL(nibble_ashr_7_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $7, -16(%rdi, %rdx), %xmm0
+ palignr $7, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
@@ -957,7 +941,7 @@ LABEL(nibble_ashr_7_restart_use):
LABEL(nibble_ashr_7_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $7, %xmm0
+ psrldq $7, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -975,14 +959,10 @@ LABEL(nibble_ashr_7_use):
*/
.p2align 4
LABEL(ashr_8):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $8, %xmm2
+ pslldq $8, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -992,7 +972,6 @@ LABEL(ashr_8):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $8, %r9d /* byte position left over from less32bytes case */
/*
@@ -1012,7 +991,7 @@ LABEL(loop_ashr_8_use):
LABEL(nibble_ashr_8_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $8, -16(%rdi, %rdx), %xmm0
+ palignr $8, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1031,7 +1010,7 @@ LABEL(nibble_ashr_8_restart_use):
jg LABEL(nibble_ashr_8_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $8, -16(%rdi, %rdx), %xmm0
+ palignr $8, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1051,7 +1030,7 @@ LABEL(nibble_ashr_8_restart_use):
LABEL(nibble_ashr_8_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $8, %xmm0
+ psrldq $8, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -1069,14 +1048,10 @@ LABEL(nibble_ashr_8_use):
*/
.p2align 4
LABEL(ashr_9):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $7, %xmm2
+ pslldq $7, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -1086,7 +1061,6 @@ LABEL(ashr_9):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $9, %r9d /* byte position left over from less32bytes case */
/*
@@ -1107,7 +1081,7 @@ LABEL(loop_ashr_9_use):
LABEL(nibble_ashr_9_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $9, -16(%rdi, %rdx), %xmm0
+ palignr $9, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1126,7 +1100,7 @@ LABEL(nibble_ashr_9_restart_use):
jg LABEL(nibble_ashr_9_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $9, -16(%rdi, %rdx), %xmm0
+ palignr $9, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1146,7 +1120,7 @@ LABEL(nibble_ashr_9_restart_use):
LABEL(nibble_ashr_9_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $9, %xmm0
+ psrldq $9, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -1164,14 +1138,10 @@ LABEL(nibble_ashr_9_use):
*/
.p2align 4
LABEL(ashr_10):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $6, %xmm2
+ pslldq $6, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -1181,7 +1151,6 @@ LABEL(ashr_10):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $10, %r9d /* byte position left over from less32bytes case */
/*
@@ -1201,7 +1170,7 @@ LABEL(loop_ashr_10_use):
LABEL(nibble_ashr_10_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $10, -16(%rdi, %rdx), %xmm0
+ palignr $10, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1220,7 +1189,7 @@ LABEL(nibble_ashr_10_restart_use):
jg LABEL(nibble_ashr_10_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $10, -16(%rdi, %rdx), %xmm0
+ palignr $10, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1240,7 +1209,7 @@ LABEL(nibble_ashr_10_restart_use):
LABEL(nibble_ashr_10_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $10, %xmm0
+ psrldq $10, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -1258,14 +1227,10 @@ LABEL(nibble_ashr_10_use):
*/
.p2align 4
LABEL(ashr_11):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $5, %xmm2
+ pslldq $5, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -1275,7 +1240,6 @@ LABEL(ashr_11):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $11, %r9d /* byte position left over from less32bytes case */
/*
@@ -1295,7 +1259,7 @@ LABEL(loop_ashr_11_use):
LABEL(nibble_ashr_11_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $11, -16(%rdi, %rdx), %xmm0
+ palignr $11, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1314,7 +1278,7 @@ LABEL(nibble_ashr_11_restart_use):
jg LABEL(nibble_ashr_11_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $11, -16(%rdi, %rdx), %xmm0
+ palignr $11, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1334,7 +1298,7 @@ LABEL(nibble_ashr_11_restart_use):
LABEL(nibble_ashr_11_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $11, %xmm0
+ psrldq $11, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -1352,14 +1316,10 @@ LABEL(nibble_ashr_11_use):
*/
.p2align 4
LABEL(ashr_12):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $4, %xmm2
+ pslldq $4, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -1369,7 +1329,6 @@ LABEL(ashr_12):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $12, %r9d /* byte position left over from less32bytes case */
/*
@@ -1389,7 +1348,7 @@ LABEL(loop_ashr_12_use):
LABEL(nibble_ashr_12_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $12, -16(%rdi, %rdx), %xmm0
+ palignr $12, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1408,7 +1367,7 @@ LABEL(nibble_ashr_12_restart_use):
jg LABEL(nibble_ashr_12_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $12, -16(%rdi, %rdx), %xmm0
+ palignr $12, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1428,7 +1387,7 @@ LABEL(nibble_ashr_12_restart_use):
LABEL(nibble_ashr_12_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $12, %xmm0
+ psrldq $12, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -1446,14 +1405,10 @@ LABEL(nibble_ashr_12_use):
*/
.p2align 4
LABEL(ashr_13):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $3, %xmm2
+ pslldq $3, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -1463,7 +1418,6 @@ LABEL(ashr_13):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $13, %r9d /* byte position left over from less32bytes case */
/*
@@ -1484,7 +1438,7 @@ LABEL(loop_ashr_13_use):
LABEL(nibble_ashr_13_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $13, -16(%rdi, %rdx), %xmm0
+ palignr $13, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1503,7 +1457,7 @@ LABEL(nibble_ashr_13_restart_use):
jg LABEL(nibble_ashr_13_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $13, -16(%rdi, %rdx), %xmm0
+ palignr $13, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1523,7 +1477,7 @@ LABEL(nibble_ashr_13_restart_use):
LABEL(nibble_ashr_13_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $13, %xmm0
+ psrldq $13, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -1541,14 +1495,10 @@ LABEL(nibble_ashr_13_use):
*/
.p2align 4
LABEL(ashr_14):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $2, %xmm2
+ pslldq $2, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -1558,7 +1508,6 @@ LABEL(ashr_14):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $14, %r9d /* byte position left over from less32bytes case */
/*
@@ -1579,7 +1528,7 @@ LABEL(loop_ashr_14_use):
LABEL(nibble_ashr_14_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $14, -16(%rdi, %rdx), %xmm0
+ palignr $14, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1598,7 +1547,7 @@ LABEL(nibble_ashr_14_restart_use):
jg LABEL(nibble_ashr_14_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $14, -16(%rdi, %rdx), %xmm0
+ palignr $14, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1618,7 +1567,7 @@ LABEL(nibble_ashr_14_restart_use):
LABEL(nibble_ashr_14_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $14, %xmm0
+ psrldq $14, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -1636,14 +1585,10 @@ LABEL(nibble_ashr_14_use):
*/
.p2align 4
LABEL(ashr_15):
- pxor %xmm0, %xmm0
- movdqa (%rdi), %xmm2
- movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pslldq $1, %xmm2
+ pslldq $1, D(%xmm2)
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, %xmm2
- psubb %xmm0, %xmm2
+ pcmpeqb %xmm1, D(%xmm2)
+ psubb %xmm0, D(%xmm2)
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
@@ -1654,7 +1599,6 @@ LABEL(ashr_15):
UPDATE_STRNCMP_COUNTER
- pxor %xmm0, %xmm0
mov $16, %rcx /* index for loads */
mov $15, %r9d /* byte position left over from less32bytes case */
/*
@@ -1676,7 +1620,7 @@ LABEL(loop_ashr_15_use):
LABEL(nibble_ashr_15_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $15, -16(%rdi, %rdx), %xmm0
+ palignr $15, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1695,7 +1639,7 @@ LABEL(nibble_ashr_15_restart_use):
jg LABEL(nibble_ashr_15_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $15, -16(%rdi, %rdx), %xmm0
+ palignr $15, -16(%rdi, %rdx), D(%xmm0)
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
@@ -1715,7 +1659,7 @@ LABEL(nibble_ashr_15_restart_use):
LABEL(nibble_ashr_15_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $15, %xmm0
+ psrldq $15, D(%xmm0)
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
@@ -1834,3 +1778,14 @@ LABEL(unaligned_table):
#undef LABEL
#undef GLABEL
#undef SECTION
+#undef movdqa
+#undef movdqu
+#undef pmovmskb
+#undef pcmpistri
+#undef psubb
+#undef pcmpeqb
+#undef psrldq
+#undef pslldq
+#undef palignr
+#undef pxor
+#undef D
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 5 +
sysdeps/x86_64/multiarch/strcmp-sse42.S | 305 +++++++++++++------------------
2 files changed, 135 insertions(+), 175 deletions(-)
hooks/post-receive
--
GNU C Library master sources