This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [PATCH] x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]

From: Florian Weimer <fweimer at redhat dot com>
To: libc-alpha at sourceware dot org
Date: Fri, 23 Jun 2017 15:51:16 +0200
Subject: Re: [PATCH] x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
Authentication-results: sourceware.org; auth=none
Authentication-results: ext-mx07.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com
Authentication-results: ext-mx07.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=fweimer at redhat dot com
Dkim-filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 522D8C04F4A5
Dmarc-filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 522D8C04F4A5
References: <20170623132026.82F2D4017D45E@oldenburg.str.redhat.com>

I think this revised patch is an improvement because it avoids adding a
branch.

Thanks,
Florian

x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]

This code:

L(between_2_3):
	/* Load as big endian with overlapping loads and bswap to avoid
	   branches.  */
	movzwl	-2(%rdi, %rdx), %eax
	movzwl	-2(%rsi, %rdx), %ecx
	shll	$16, %eax
	shll	$16, %ecx
	movzwl	(%rdi), %edi
	movzwl	(%rsi), %esi
	orl	%edi, %eax
	orl	%esi, %ecx
	bswap	%eax
	bswap	%ecx
	subl	%ecx, %eax
	ret

needs a saturating subtract because the full register is used.
With this commit, only the lower 24 bits of the register are used,
so a regular subtraction suffices.

The test case change adds coverage for these kinds of bugs.

2017-06-23  Florian Weimer  <fweimer@redhat.com>

	[BZ #21662]
	* sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S (between_2_3):
	Peform saturating subtraction.
	* string/test-memcmp.c (check1): Check with different lengths.
	(check_result, do_random_tests): Write error messages to standard
	output.

diff --git a/string/test-memcmp.c b/string/test-memcmp.c
index a7969ed..93bc972 100644
--- a/string/test-memcmp.c
+++ b/string/test-memcmp.c
@@ -85,8 +85,8 @@ check_result (impl_t *impl, const CHAR *s1, const CHAR *s2, size_t len,
       || (exp_result < 0 && result >= 0)
       || (exp_result > 0 && result <= 0))
     {
-      error (0, 0, "Wrong result in function %s %d %d", impl->name,
-	     result, exp_result);
+      printf ("error: wrong result in function %s %d %d", impl->name,
+	      result, exp_result);
       ret = 1;
       return -1;
     }
@@ -192,8 +192,10 @@ do_random_tests (void)
 	      || (r < 0 && result >= 0)
 	      || (r > 0 && result <= 0))
 	    {
-	      error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
-		     n, impl->name, align1 * CHARBYTES & 63,  align2 * CHARBYTES & 63, len, pos, r, result, p1, p2);
+	      printf ("error: Iteration %zd - wrong result in function %s"
+		      " (%zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
+		      n, impl->name, align1 * CHARBYTES & 63,
+		      align2 * CHARBYTES & 63, len, pos, r, result, p1, p2);
 	      ret = 1;
 	    }
 	}
@@ -441,11 +443,12 @@ check1 (void)
 
   n = 116;
   for (size_t i = 0; i < n; i++)
-    {
-      exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, n - i);
-      FOR_EACH_IMPL (impl, 0)
-	check_result (impl, s1 + i, s2 + i, n - i, exp_result);
-    }
+    for (size_t len = 0; len <= n - i; ++len)
+      {
+	exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, len);
+	FOR_EACH_IMPL (impl, 0)
+	  check_result (impl, s1 + i, s2 + i, len, exp_result);
+      }
 }
 
 /* This test checks that memcmp doesn't overrun buffers.  */
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
index 47630dd..4cc9386 100644
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
@@ -139,16 +139,17 @@ L(exit):
 L(between_2_3):
 	/* Load as big endian with overlapping loads and bswap to avoid
 	   branches.  */
-	movzwl	-2(%rdi, %rdx), %eax
-	movzwl	-2(%rsi, %rdx), %ecx
-	shll	$16, %eax
-	shll	$16, %ecx
-	movzwl	(%rdi), %edi
-	movzwl	(%rsi), %esi
-	orl	%edi, %eax
-	orl	%esi, %ecx
+	movzwl	(%rdi), %eax
+	movzwl	(%rsi), %ecx
+	shll	$8, %eax
+	shll	$8, %ecx
 	bswap	%eax
 	bswap	%ecx
+	movzbl	-1(%rdi, %rdx), %edi
+	movzbl	-1(%rsi, %rdx), %esi
+	orl	%edi, %eax
+	orl	%esi, %ecx
+	/* Subtraction is okay because the upper 8 bits a zero.  */
 	subl	%ecx, %eax
 	ret

Follow-Ups:
- Re: [PATCH] x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
  - From: H.J. Lu
- Re: [PATCH] x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
  - From: Carlos O'Donell
- Re: [PATCH] x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
  - From: Richard Henderson

References:
- [PATCH] x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
  - From: Florian Weimer

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]