This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.14-447-g618280a


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  618280a192aed70b47d6b2deb2a81c6359b9a92b (commit)
      from  32d2a6ec31c5b9a02c339530df4a1b6728514868 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=618280a192aed70b47d6b2deb2a81c6359b9a92b

commit 618280a192aed70b47d6b2deb2a81c6359b9a92b
Author: Ulrich Drepper <drepper@gmail.com>
Date:   Tue Oct 25 14:50:31 2011 -0400

    Optimize x86-64 SSE4.2+ strcmp a bit more

diff --git a/ChangeLog b/ChangeLog
index ef63939..b4f22bd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2011-10-25  Ulrich Drepper  <drepper@gmail.com>
+
+	* sysdeps/x86_64/multiarch/strcmp-sse42.S: Move common code to earlier
+	place.  Use VEX encoding when compiling for AVX.
+
 2011-10-25  Andreas Schwab  <schwab@redhat.com>
 
 	* wcsmbs/wcscmp.c (WCSCMP): Compare as wchar_t, not wint_t.
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index c9e03b9..b93eda1 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -72,6 +72,23 @@ END (GLABEL(__strncasecmp))
 	/* FALLTHROUGH to strncasecmp_l.  */
 #endif
 
+
+#ifdef USE_AVX
+# define movdqa vmovdqa
+# define movdqu vmovdqu
+# define pmovmskb vpmovmskb
+# define pcmpistri vpcmpistri
+# define psubb vpsubb
+# define pcmpeqb vpcmpeqb
+# define psrldq vpsrldq
+# define pslldq vpslldq
+# define palignr vpalignr
+# define pxor vpxor
+# define D(arg) arg, arg
+#else
+# define D(arg) arg
+#endif
+
 STRCMP_SSE42:
 	cfi_startproc
 	CALL_MCOUNT
@@ -179,10 +196,10 @@ LABEL(touppermask):
 #else
 # define TOLOWER(reg1, reg2)
 #endif
-	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
-	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
-	pcmpeqb	%xmm2, %xmm1		/* compare first 16 bytes for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	pxor	%xmm0, D(%xmm0)		/* clear %xmm0 for null char checks */
+	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
+	pcmpeqb	%xmm2, D(%xmm1)		/* compare first 16 bytes for equality */
+	psubb	%xmm0, D(%xmm1)		/* packed sub of comparison results*/
 	pmovmskb %xmm1, %edx
 	sub	$0xffff, %edx		/* if first 16 bytes are same, edx == 0xffff */
 	jnz	LABEL(less16bytes)/* If not, find different value or null char */
@@ -206,6 +223,7 @@ LABEL(crosscache):
 	xor	%r8d, %r8d
 	and	$0xf, %ecx		/* offset of rsi */
 	and	$0xf, %eax		/* offset of rdi */
+	pxor	%xmm0, D(%xmm0)		/* clear %xmm0 for null char check */
 	cmp	%eax, %ecx
 	je	LABEL(ashr_0)		/* rsi and rdi relative offset same */
 	ja	LABEL(bigger)
@@ -213,10 +231,13 @@ LABEL(crosscache):
 	xchg	%ecx, %eax
 	xchg	%rsi, %rdi
 LABEL(bigger):
+	movdqa	(%rdi), %xmm2
+	movdqa	(%rsi), %xmm1
 	lea	15(%rax), %r9
 	sub	%rcx, %r9
 	lea	LABEL(unaligned_table)(%rip), %r10
 	movslq	(%r10, %r9,4), %r9
+	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
 	lea	(%r10, %r9), %r10
 	jmp	*%r10			/* jump to corresponding case */
 
@@ -229,16 +250,15 @@ LABEL(bigger):
 LABEL(ashr_0):
 
 	movdqa	(%rsi), %xmm1
-	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char check */
-	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
+	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
-	pcmpeqb	(%rdi), %xmm1		/* compare 16 bytes for equality */
+	pcmpeqb	(%rdi), D(%xmm1)	/* compare 16 bytes for equality */
 #else
 	movdqa	(%rdi), %xmm2
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm2, %xmm1		/* compare 16 bytes for equality */
+	pcmpeqb	%xmm2, D(%xmm1)		/* compare 16 bytes for equality */
 #endif
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
+	psubb	%xmm0, D(%xmm1)		/* packed sub of comparison results*/
 	pmovmskb %xmm1, %r9d
 	shr	%cl, %edx		/* adjust 0xffff for offset */
 	shr	%cl, %r9d		/* adjust for 16-byte offset */
@@ -251,7 +271,6 @@ LABEL(ashr_0):
 	UPDATE_STRNCMP_COUNTER
 	mov	$16, %rcx
 	mov	$16, %r9
-	pxor	%xmm0, %xmm0		/* clear xmm0, may have changed above */
 
 	/*
 	 * Now both strings are aligned at 16-byte boundary. Loop over strings
@@ -319,14 +338,10 @@ LABEL(ashr_0_exit_use):
  */
 	.p2align 4
 LABEL(ashr_1):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
-	pslldq	$15, %xmm2		/* shift first string to align with second */
+	pslldq	$15, D(%xmm2)		/* shift first string to align with second */
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2		/* compare 16 bytes for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
+	pcmpeqb	%xmm1, D(%xmm2)		/* compare 16 bytes for equality */
+	psubb	%xmm0, D(%xmm2)		/* packed sub of comparison results*/
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx		/* adjust 0xffff for offset */
 	shr	%cl, %r9d		/* adjust for 16-byte offset */
@@ -335,7 +350,6 @@ LABEL(ashr_1):
 	movdqa	(%rdi), %xmm3
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx		/* index for loads*/
 	mov	$1, %r9d		/* byte position left over from less32bytes case */
 	/*
@@ -355,7 +369,7 @@ LABEL(loop_ashr_1_use):
 
 LABEL(nibble_ashr_1_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $1, -16(%rdi, %rdx), %xmm0
+	palignr $1, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -374,7 +388,7 @@ LABEL(nibble_ashr_1_restart_use):
 	jg	LABEL(nibble_ashr_1_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $1, -16(%rdi, %rdx), %xmm0
+	palignr $1, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -394,7 +408,7 @@ LABEL(nibble_ashr_1_restart_use):
 LABEL(nibble_ashr_1_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$1, %xmm0
+	psrldq	$1, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -412,14 +426,10 @@ LABEL(nibble_ashr_1_use):
  */
 	.p2align 4
 LABEL(ashr_2):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$14, %xmm2
+	pslldq	$14, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -428,7 +438,6 @@ LABEL(ashr_2):
 	movdqa	(%rdi), %xmm3
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$2, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -448,7 +457,7 @@ LABEL(loop_ashr_2_use):
 
 LABEL(nibble_ashr_2_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $2, -16(%rdi, %rdx), %xmm0
+	palignr $2, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -467,7 +476,7 @@ LABEL(nibble_ashr_2_restart_use):
 	jg	LABEL(nibble_ashr_2_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $2, -16(%rdi, %rdx), %xmm0
+	palignr $2, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -487,7 +496,7 @@ LABEL(nibble_ashr_2_restart_use):
 LABEL(nibble_ashr_2_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$2, %xmm0
+	psrldq	$2, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -505,14 +514,10 @@ LABEL(nibble_ashr_2_use):
  */
 	.p2align 4
 LABEL(ashr_3):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$13, %xmm2
+	pslldq	$13, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -522,7 +527,6 @@ LABEL(ashr_3):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$3, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -541,7 +545,7 @@ LABEL(loop_ashr_3_use):
 
 LABEL(nibble_ashr_3_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $3, -16(%rdi, %rdx), %xmm0
+	palignr $3, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -560,7 +564,7 @@ LABEL(nibble_ashr_3_restart_use):
 	jg	LABEL(nibble_ashr_3_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $3, -16(%rdi, %rdx), %xmm0
+	palignr $3, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -580,7 +584,7 @@ LABEL(nibble_ashr_3_restart_use):
 LABEL(nibble_ashr_3_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$3, %xmm0
+	psrldq	$3, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -598,14 +602,10 @@ LABEL(nibble_ashr_3_use):
  */
 	.p2align 4
 LABEL(ashr_4):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$12, %xmm2
+	pslldq	$12, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -615,7 +615,6 @@ LABEL(ashr_4):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$4, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -635,7 +634,7 @@ LABEL(loop_ashr_4_use):
 
 LABEL(nibble_ashr_4_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $4, -16(%rdi, %rdx), %xmm0
+	palignr $4, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -654,7 +653,7 @@ LABEL(nibble_ashr_4_restart_use):
 	jg	LABEL(nibble_ashr_4_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $4, -16(%rdi, %rdx), %xmm0
+	palignr $4, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -674,7 +673,7 @@ LABEL(nibble_ashr_4_restart_use):
 LABEL(nibble_ashr_4_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$4, %xmm0
+	psrldq	$4, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -692,14 +691,10 @@ LABEL(nibble_ashr_4_use):
  */
 	.p2align 4
 LABEL(ashr_5):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$11, %xmm2
+	pslldq	$11, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -709,7 +704,6 @@ LABEL(ashr_5):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$5, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -729,7 +723,7 @@ LABEL(loop_ashr_5_use):
 
 LABEL(nibble_ashr_5_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $5, -16(%rdi, %rdx), %xmm0
+	palignr $5, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -749,7 +743,7 @@ LABEL(nibble_ashr_5_restart_use):
 
 	movdqa	(%rdi, %rdx), %xmm0
 
-	palignr $5, -16(%rdi, %rdx), %xmm0
+	palignr $5, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -769,7 +763,7 @@ LABEL(nibble_ashr_5_restart_use):
 LABEL(nibble_ashr_5_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$5, %xmm0
+	psrldq	$5, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -787,14 +781,10 @@ LABEL(nibble_ashr_5_use):
  */
 	.p2align 4
 LABEL(ashr_6):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$10, %xmm2
+	pslldq	$10, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -804,7 +794,6 @@ LABEL(ashr_6):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$6, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -824,7 +813,7 @@ LABEL(loop_ashr_6_use):
 
 LABEL(nibble_ashr_6_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $6, -16(%rdi, %rdx), %xmm0
+	palignr $6, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -843,7 +832,7 @@ LABEL(nibble_ashr_6_restart_use):
 	jg	LABEL(nibble_ashr_6_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $6, -16(%rdi, %rdx), %xmm0
+	palignr $6, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -863,7 +852,7 @@ LABEL(nibble_ashr_6_restart_use):
 LABEL(nibble_ashr_6_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$6, %xmm0
+	psrldq	$6, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -881,14 +870,10 @@ LABEL(nibble_ashr_6_use):
  */
 	.p2align 4
 LABEL(ashr_7):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$9, %xmm2
+	pslldq	$9, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -898,7 +883,6 @@ LABEL(ashr_7):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$7, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -918,7 +902,7 @@ LABEL(loop_ashr_7_use):
 
 LABEL(nibble_ashr_7_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $7, -16(%rdi, %rdx), %xmm0
+	palignr $7, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -937,7 +921,7 @@ LABEL(nibble_ashr_7_restart_use):
 	jg	LABEL(nibble_ashr_7_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $7, -16(%rdi, %rdx), %xmm0
+	palignr $7, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
 #else
@@ -957,7 +941,7 @@ LABEL(nibble_ashr_7_restart_use):
 LABEL(nibble_ashr_7_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$7, %xmm0
+	psrldq	$7, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -975,14 +959,10 @@ LABEL(nibble_ashr_7_use):
  */
 	.p2align 4
 LABEL(ashr_8):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$8, %xmm2
+	pslldq	$8, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -992,7 +972,6 @@ LABEL(ashr_8):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$8, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -1012,7 +991,7 @@ LABEL(loop_ashr_8_use):
 
 LABEL(nibble_ashr_8_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $8, -16(%rdi, %rdx), %xmm0
+	palignr $8, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1031,7 +1010,7 @@ LABEL(nibble_ashr_8_restart_use):
 	jg	LABEL(nibble_ashr_8_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $8, -16(%rdi, %rdx), %xmm0
+	palignr $8, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1051,7 +1030,7 @@ LABEL(nibble_ashr_8_restart_use):
 LABEL(nibble_ashr_8_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$8, %xmm0
+	psrldq	$8, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -1069,14 +1048,10 @@ LABEL(nibble_ashr_8_use):
  */
 	.p2align 4
 LABEL(ashr_9):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$7, %xmm2
+	pslldq	$7, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -1086,7 +1061,6 @@ LABEL(ashr_9):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$9, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -1107,7 +1081,7 @@ LABEL(loop_ashr_9_use):
 LABEL(nibble_ashr_9_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
 
-	palignr $9, -16(%rdi, %rdx), %xmm0
+	palignr $9, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1126,7 +1100,7 @@ LABEL(nibble_ashr_9_restart_use):
 	jg	LABEL(nibble_ashr_9_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $9, -16(%rdi, %rdx), %xmm0
+	palignr $9, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1146,7 +1120,7 @@ LABEL(nibble_ashr_9_restart_use):
 LABEL(nibble_ashr_9_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$9, %xmm0
+	psrldq	$9, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -1164,14 +1138,10 @@ LABEL(nibble_ashr_9_use):
  */
 	.p2align 4
 LABEL(ashr_10):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$6, %xmm2
+	pslldq	$6, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -1181,7 +1151,6 @@ LABEL(ashr_10):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$10, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -1201,7 +1170,7 @@ LABEL(loop_ashr_10_use):
 
 LABEL(nibble_ashr_10_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $10, -16(%rdi, %rdx), %xmm0
+	palignr $10, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1220,7 +1189,7 @@ LABEL(nibble_ashr_10_restart_use):
 	jg	LABEL(nibble_ashr_10_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $10, -16(%rdi, %rdx), %xmm0
+	palignr $10, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1240,7 +1209,7 @@ LABEL(nibble_ashr_10_restart_use):
 LABEL(nibble_ashr_10_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$10, %xmm0
+	psrldq	$10, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -1258,14 +1227,10 @@ LABEL(nibble_ashr_10_use):
  */
 	.p2align 4
 LABEL(ashr_11):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$5, %xmm2
+	pslldq	$5, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -1275,7 +1240,6 @@ LABEL(ashr_11):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$11, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -1295,7 +1259,7 @@ LABEL(loop_ashr_11_use):
 
 LABEL(nibble_ashr_11_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $11, -16(%rdi, %rdx), %xmm0
+	palignr $11, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1314,7 +1278,7 @@ LABEL(nibble_ashr_11_restart_use):
 	jg	LABEL(nibble_ashr_11_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $11, -16(%rdi, %rdx), %xmm0
+	palignr $11, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1334,7 +1298,7 @@ LABEL(nibble_ashr_11_restart_use):
 LABEL(nibble_ashr_11_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$11, %xmm0
+	psrldq	$11, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -1352,14 +1316,10 @@ LABEL(nibble_ashr_11_use):
  */
 	.p2align 4
 LABEL(ashr_12):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$4, %xmm2
+	pslldq	$4, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -1369,7 +1329,6 @@ LABEL(ashr_12):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$12, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -1389,7 +1348,7 @@ LABEL(loop_ashr_12_use):
 
 LABEL(nibble_ashr_12_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $12, -16(%rdi, %rdx), %xmm0
+	palignr $12, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1408,7 +1367,7 @@ LABEL(nibble_ashr_12_restart_use):
 	jg	LABEL(nibble_ashr_12_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $12, -16(%rdi, %rdx), %xmm0
+	palignr $12, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1428,7 +1387,7 @@ LABEL(nibble_ashr_12_restart_use):
 LABEL(nibble_ashr_12_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$12, %xmm0
+	psrldq	$12, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -1446,14 +1405,10 @@ LABEL(nibble_ashr_12_use):
  */
 	.p2align 4
 LABEL(ashr_13):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$3, %xmm2
+	pslldq	$3, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -1463,7 +1418,6 @@ LABEL(ashr_13):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$13, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -1484,7 +1438,7 @@ LABEL(loop_ashr_13_use):
 
 LABEL(nibble_ashr_13_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $13, -16(%rdi, %rdx), %xmm0
+	palignr $13, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1503,7 +1457,7 @@ LABEL(nibble_ashr_13_restart_use):
 	jg	LABEL(nibble_ashr_13_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $13, -16(%rdi, %rdx), %xmm0
+	palignr $13, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1523,7 +1477,7 @@ LABEL(nibble_ashr_13_restart_use):
 LABEL(nibble_ashr_13_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$13, %xmm0
+	psrldq	$13, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -1541,14 +1495,10 @@ LABEL(nibble_ashr_13_use):
  */
 	.p2align 4
 LABEL(ashr_14):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq  $2, %xmm2
+	pslldq  $2, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -1558,7 +1508,6 @@ LABEL(ashr_14):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$14, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -1579,7 +1528,7 @@ LABEL(loop_ashr_14_use):
 
 LABEL(nibble_ashr_14_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $14, -16(%rdi, %rdx), %xmm0
+	palignr $14, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1598,7 +1547,7 @@ LABEL(nibble_ashr_14_restart_use):
 	jg	LABEL(nibble_ashr_14_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $14, -16(%rdi, %rdx), %xmm0
+	palignr $14, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1618,7 +1567,7 @@ LABEL(nibble_ashr_14_restart_use):
 LABEL(nibble_ashr_14_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$14, %xmm0
+	psrldq	$14, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -1636,14 +1585,10 @@ LABEL(nibble_ashr_14_use):
  */
 	.p2align 4
 LABEL(ashr_15):
-	pxor	%xmm0, %xmm0
-	movdqa	(%rdi), %xmm2
-	movdqa	(%rsi), %xmm1
-	pcmpeqb	%xmm1, %xmm0
-	pslldq	$1, %xmm2
+	pslldq	$1, D(%xmm2)
 	TOLOWER (%xmm1, %xmm2)
-	pcmpeqb	%xmm1, %xmm2
-	psubb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, D(%xmm2)
+	psubb	%xmm0, D(%xmm2)
 	pmovmskb %xmm2, %r9d
 	shr	%cl, %edx
 	shr	%cl, %r9d
@@ -1654,7 +1599,6 @@ LABEL(ashr_15):
 
 	UPDATE_STRNCMP_COUNTER
 
-	pxor	%xmm0, %xmm0
 	mov	$16, %rcx	/* index for loads */
 	mov	$15, %r9d	/* byte position left over from less32bytes case */
 	/*
@@ -1676,7 +1620,7 @@ LABEL(loop_ashr_15_use):
 
 LABEL(nibble_ashr_15_restart_use):
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $15, -16(%rdi, %rdx), %xmm0
+	palignr $15, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1695,7 +1639,7 @@ LABEL(nibble_ashr_15_restart_use):
 	jg	LABEL(nibble_ashr_15_use)
 
 	movdqa	(%rdi, %rdx), %xmm0
-	palignr $15, -16(%rdi, %rdx), %xmm0
+	palignr $15, -16(%rdi, %rdx), D(%xmm0)
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
 #else
@@ -1715,7 +1659,7 @@ LABEL(nibble_ashr_15_restart_use):
 LABEL(nibble_ashr_15_use):
 	sub	$0x1000, %r10
 	movdqa	-16(%rdi, %rdx), %xmm0
-	psrldq	$15, %xmm0
+	psrldq	$15, D(%xmm0)
 	pcmpistri      $0x3a,%xmm0, %xmm0
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
 	cmp	%r11, %rcx
@@ -1834,3 +1778,14 @@ LABEL(unaligned_table):
 #undef LABEL
 #undef GLABEL
 #undef SECTION
+#undef movdqa
+#undef movdqu
+#undef pmovmskb
+#undef pcmpistri
+#undef psubb
+#undef pcmpeqb
+#undef psrldq
+#undef pslldq
+#undef palignr
+#undef pxor
+#undef D

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                               |    5 +
 sysdeps/x86_64/multiarch/strcmp-sse42.S |  305 +++++++++++++------------------
 2 files changed, 135 insertions(+), 175 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]