This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
- From: Florian Weimer <fweimer at redhat dot com>
- To: libc-alpha at sourceware dot org
- Date: Fri, 23 Jun 2017 15:51:16 +0200
- Subject: Re: [PATCH] x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
- Authentication-results: sourceware.org; auth=none
- Authentication-results: ext-mx07.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com
- Authentication-results: ext-mx07.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=fweimer at redhat dot com
- Dkim-filter: OpenDKIM Filter v2.11.0 mx1.redhat.com 522D8C04F4A5
- Dmarc-filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 522D8C04F4A5
- References: <20170623132026.82F2D4017D45E@oldenburg.str.redhat.com>
I think this revised patch is an improvement because it avoids adding a
branch.
Thanks,
Florian
x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
This code:
L(between_2_3):
/* Load as big endian with overlapping loads and bswap to avoid
branches. */
movzwl -2(%rdi, %rdx), %eax
movzwl -2(%rsi, %rdx), %ecx
shll $16, %eax
shll $16, %ecx
movzwl (%rdi), %edi
movzwl (%rsi), %esi
orl %edi, %eax
orl %esi, %ecx
bswap %eax
bswap %ecx
subl %ecx, %eax
ret
needs a saturating subtract because the full register is used.
With this commit, only the lower 24 bits of the register are used,
so a regular subtraction suffices.
The test case change adds coverage for these kinds of bugs.
2017-06-23 Florian Weimer <fweimer@redhat.com>
[BZ #21662]
* sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S (between_2_3):
Peform saturating subtraction.
* string/test-memcmp.c (check1): Check with different lengths.
(check_result, do_random_tests): Write error messages to standard
output.
diff --git a/string/test-memcmp.c b/string/test-memcmp.c
index a7969ed..93bc972 100644
--- a/string/test-memcmp.c
+++ b/string/test-memcmp.c
@@ -85,8 +85,8 @@ check_result (impl_t *impl, const CHAR *s1, const CHAR *s2, size_t len,
|| (exp_result < 0 && result >= 0)
|| (exp_result > 0 && result <= 0))
{
- error (0, 0, "Wrong result in function %s %d %d", impl->name,
- result, exp_result);
+ printf ("error: wrong result in function %s %d %d", impl->name,
+ result, exp_result);
ret = 1;
return -1;
}
@@ -192,8 +192,10 @@ do_random_tests (void)
|| (r < 0 && result >= 0)
|| (r > 0 && result <= 0))
{
- error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
- n, impl->name, align1 * CHARBYTES & 63, align2 * CHARBYTES & 63, len, pos, r, result, p1, p2);
+ printf ("error: Iteration %zd - wrong result in function %s"
+ " (%zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
+ n, impl->name, align1 * CHARBYTES & 63,
+ align2 * CHARBYTES & 63, len, pos, r, result, p1, p2);
ret = 1;
}
}
@@ -441,11 +443,12 @@ check1 (void)
n = 116;
for (size_t i = 0; i < n; i++)
- {
- exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, n - i);
- FOR_EACH_IMPL (impl, 0)
- check_result (impl, s1 + i, s2 + i, n - i, exp_result);
- }
+ for (size_t len = 0; len <= n - i; ++len)
+ {
+ exp_result = SIMPLE_MEMCMP (s1 + i, s2 + i, len);
+ FOR_EACH_IMPL (impl, 0)
+ check_result (impl, s1 + i, s2 + i, len, exp_result);
+ }
}
/* This test checks that memcmp doesn't overrun buffers. */
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
index 47630dd..4cc9386 100644
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
@@ -139,16 +139,17 @@ L(exit):
L(between_2_3):
/* Load as big endian with overlapping loads and bswap to avoid
branches. */
- movzwl -2(%rdi, %rdx), %eax
- movzwl -2(%rsi, %rdx), %ecx
- shll $16, %eax
- shll $16, %ecx
- movzwl (%rdi), %edi
- movzwl (%rsi), %esi
- orl %edi, %eax
- orl %esi, %ecx
+ movzwl (%rdi), %eax
+ movzwl (%rsi), %ecx
+ shll $8, %eax
+ shll $8, %ecx
bswap %eax
bswap %ecx
+ movzbl -1(%rdi, %rdx), %edi
+ movzbl -1(%rsi, %rdx), %esi
+ orl %edi, %eax
+ orl %esi, %ecx
+ /* Subtraction is okay because the upper 8 bits a zero. */
subl %ecx, %eax
ret