This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
PATCH: Improve x86-64 bcopy/bzero
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: GNU C Library <libc-alpha at sourceware dot org>
- Date: Sat, 6 Oct 2012 15:38:13 -0700
- Subject: PATCH: Improve x86-64 bcopy/bzero
Hi,
This patch improves x86-64 bcopy/bzero by branching to internal IFUNC
memmove/memset, respectively. It also adds tests for bcopy/bzero since
the current bcopy/bzero tests in string/testers.c are inlined by GCC
4.7. Tested on x86-64 and i686. OK to install?
Thanks.
H.J.
---
2012-10-06 H.J. Lu <hongjiu.lu@intel.com>
* string/Makefile (strop-tests): Add bcopy and bzero.
* string/test-bcopy-ifunc.c: New file.
* string/test-bcopy.c: Likewise.
* string/test-bzero-ifunc.c: Likewise.
* string/test-bzero.c: Likewise.
* sysdeps/x86_64/multiarch/bcopy.S (bcopy): Jump to
__libc_memmove.
* sysdeps/x86_64/multiarch/bzero.S (__bzero): Jump to
__libc_memset.
* sysdeps/x86_64/multiarch/memset.S (__libc_memset): New alias
of memset.
diff --git a/string/Makefile b/string/Makefile
index cf53009..e22ea32 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -49,7 +49,7 @@ strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
strlen strncmp strncpy strpbrk strrchr strspn memmem \
strstr strcasestr strnlen strcasecmp strncasecmp \
- strncat rawmemchr strchrnul
+ strncat rawmemchr strchrnul bcopy bzero
tests := tester inl-tester noinl-tester testcopy test-ffs \
tst-strlen stratcliff tst-svc tst-inlcall \
bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \
diff --git a/string/test-bcopy-ifunc.c b/string/test-bcopy-ifunc.c
new file mode 100644
index 0000000..70655ad
--- /dev/null
+++ b/string/test-bcopy-ifunc.c
@@ -0,0 +1,2 @@
+#define TEST_IFUNC 1
+#include "test-bcopy.c"
diff --git a/string/test-bcopy.c b/string/test-bcopy.c
new file mode 100644
index 0000000..0b0b8c6
--- /dev/null
+++ b/string/test-bcopy.c
@@ -0,0 +1,262 @@
+/* Test and measure bcopy functions.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define TEST_MAIN
+#define TEST_NAME "bcopy"
+#include "test-string.h"
+
+typedef void (*proto_t) (const char *, char *, size_t);
+void simple_bcopy (const char *, char *, size_t);
+
+IMPL (simple_bcopy, 0)
+IMPL (bcopy, 1)
+
+void
+simple_bcopy (const char *src, char *dst, size_t n)
+{
+ if (src < dst)
+ {
+ dst += n;
+ src += n;
+ while (n--)
+ *--dst = *--src;
+ }
+ else
+ while (n--)
+ *dst++ = *src++;
+}
+
+static void
+do_one_test (impl_t *impl, char *dst, char *src, const char *orig_src,
+ size_t len)
+{
+ memcpy (src, orig_src, len);
+ CALL (impl, src, dst, len);
+
+ if (memcmp (dst, orig_src, len) != 0)
+ {
+ error (0, 0, "Wrong result in function %s dst \"%s\" src \"%s\"",
+ impl->name, dst, src);
+ ret = 1;
+ return;
+ }
+
+ if (HP_TIMING_AVAIL)
+ {
+ hp_timing_t start __attribute ((unused));
+ hp_timing_t stop __attribute ((unused));
+ hp_timing_t best_time = ~ (hp_timing_t) 0;
+ size_t i;
+
+ for (i = 0; i < 32; ++i)
+ {
+ HP_TIMING_NOW (start);
+ CALL (impl, src, dst, len);
+ HP_TIMING_NOW (stop);
+ HP_TIMING_BEST (best_time, start, stop);
+ }
+
+ printf ("\t%zd", (size_t) best_time);
+ }
+}
+
+static void
+do_test (size_t align1, size_t align2, size_t len)
+{
+ size_t i, j;
+ char *s1, *s2;
+
+ align1 &= 63;
+ if (align1 + len >= page_size)
+ return;
+
+ align2 &= 63;
+ if (align2 + len >= page_size)
+ return;
+
+ s1 = (char *) (buf1 + align1);
+ s2 = (char *) (buf2 + align2);
+
+ for (i = 0, j = 1; i < len; i++, j += 23)
+ s1[i] = j;
+
+ if (HP_TIMING_AVAIL)
+ printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
+
+ FOR_EACH_IMPL (impl, 0)
+ do_one_test (impl, s2, (char *) (buf2 + align1), s1, len);
+
+ if (HP_TIMING_AVAIL)
+ putchar ('\n');
+}
+
+static void
+do_random_tests (void)
+{
+ size_t i, n, align1, align2, len, size;
+ size_t srcstart, srcend, dststart, dstend;
+ int c;
+ unsigned char *p1, *p2;
+
+ for (n = 0; n < ITERATIONS; n++)
+ {
+ if ((random () & 255) == 0)
+ size = 65536;
+ else
+ size = 512;
+ if (size > page_size)
+ size = page_size;
+ if ((random () & 3) == 0)
+ {
+ len = random () & (size - 1);
+ align1 = size - len - (random () & 31);
+ align2 = size - len - (random () & 31);
+ if (align1 > size)
+ align1 = 0;
+ if (align2 > size)
+ align2 = 0;
+ }
+ else
+ {
+ align1 = random () & (size / 2 - 1);
+ align2 = random () & (size / 2 - 1);
+ len = random () & (size - 1);
+ if (align1 + len > size)
+ align1 = size - len;
+ if (align2 + len > size)
+ align2 = size - len;
+ }
+
+ p1 = buf1 + page_size - size;
+ p2 = buf2 + page_size - size;
+ c = random () & 255;
+ srcend = align1 + len + 256;
+ if (srcend > size)
+ srcend = size;
+ if (align1 > 256)
+ srcstart = align1 - 256;
+ else
+ srcstart = 0;
+ for (i = srcstart; i < srcend; ++i)
+ p1[i] = random () & 255;
+ dstend = align2 + len + 256;
+ if (dstend > size)
+ dstend = size;
+ if (align2 > 256)
+ dststart = align2 - 256;
+ else
+ dststart = 0;
+
+ FOR_EACH_IMPL (impl, 1)
+ {
+ memset (p2 + dststart, c, dstend - dststart);
+ memcpy (p2 + srcstart, p1 + srcstart, srcend - srcstart);
+ CALL (impl, (char *) (p2 + align1), (char *) (p2 + align2), len);
+ if (memcmp (p1 + align1, p2 + align2, len))
+ {
+ error (0, 0, "Iteration %zd - different strings, %s (%zd, %zd, %zd)",
+ n, impl->name, align1, align2, len);
+ ret = 1;
+ }
+ for (i = dststart; i < dstend; ++i)
+ {
+ if (i >= align2 && i < align2 + len)
+ {
+ i = align2 + len - 1;
+ continue;
+ }
+ if (i >= srcstart && i < srcend)
+ {
+ i = srcend - 1;
+ continue;
+ }
+ if (p2[i] != c)
+ {
+ error (0, 0, "Iteration %zd - garbage in memset area, %s (%zd, %zd, %zd)",
+ n, impl->name, align1, align2, len);
+ ret = 1;
+ break;
+ }
+ }
+
+ if (srcstart < align2
+ && memcmp (p2 + srcstart, p1 + srcstart,
+ (srcend > align2 ? align2 : srcend) - srcstart))
+ {
+ error (0, 0, "Iteration %zd - garbage before dst, %s (%zd, %zd, %zd)",
+ n, impl->name, align1, align2, len);
+ ret = 1;
+ break;
+ }
+
+ i = srcstart > align2 + len ? srcstart : align2 + len;
+ if (srcend > align2 + len
+ && memcmp (p2 + i, p1 + i, srcend - i))
+ {
+ error (0, 0, "Iteration %zd - garbage after dst, %s (%zd, %zd, %zd)",
+ n, impl->name, align1, align2, len);
+ ret = 1;
+ break;
+ }
+ }
+ }
+}
+
+int
+test_main (void)
+{
+ size_t i;
+
+ test_init ();
+
+ printf ("%23s", "");
+ FOR_EACH_IMPL (impl, 0)
+ printf ("\t%s", impl->name);
+ putchar ('\n');
+
+ for (i = 0; i < 14; ++i)
+ {
+ do_test (0, 32, 1 << i);
+ do_test (32, 0, 1 << i);
+ do_test (0, i, 1 << i);
+ do_test (i, 0, 1 << i);
+ }
+
+ for (i = 0; i < 32; ++i)
+ {
+ do_test (0, 32, i);
+ do_test (32, 0, i);
+ do_test (0, i, i);
+ do_test (i, 0, i);
+ }
+
+ for (i = 3; i < 32; ++i)
+ {
+ if ((i & (i - 1)) == 0)
+ continue;
+ do_test (0, 32, 16 * i);
+ do_test (32, 0, 16 * i);
+ do_test (0, i, 16 * i);
+ do_test (i, 0, 16 * i);
+ }
+
+ do_random_tests ();
+ return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/string/test-bzero-ifunc.c b/string/test-bzero-ifunc.c
new file mode 100644
index 0000000..2fddf36
--- /dev/null
+++ b/string/test-bzero-ifunc.c
@@ -0,0 +1,2 @@
+#define TEST_IFUNC 1
+#include "test-bzero.c"
diff --git a/string/test-bzero.c b/string/test-bzero.c
new file mode 100644
index 0000000..28158f4
--- /dev/null
+++ b/string/test-bzero.c
@@ -0,0 +1,124 @@
+/* Test and measure bzero functions.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define TEST_MAIN
+#define TEST_NAME "bzero"
+#define MIN_PAGE_SIZE 131072
+#include "test-string.h"
+
+typedef void (*proto_t) (char *, size_t);
+void simple_bzero (char *, size_t);
+void builtin_bzero (char *, size_t);
+
+IMPL (simple_bzero, 0)
+IMPL (builtin_bzero, 0)
+IMPL (bzero, 1)
+
+void
+simple_bzero (char *s, size_t n)
+{
+ char *r = s, *end = s + n;
+ while (r < end)
+ *r++ = 0;
+}
+
+void
+builtin_bzero (char *s, size_t n)
+{
+ __builtin_bzero (s, n);
+}
+
+static void
+do_one_test (impl_t *impl, char *s, size_t n)
+{
+ char tstbuf[n];
+ simple_bzero (tstbuf, n);
+ CALL (impl, s, n);
+ if (memcmp (s, tstbuf, n) != 0)
+ {
+ error (0, 0, "Wrong result in function %s", impl->name);
+ ret = 1;
+ return;
+ }
+
+ if (HP_TIMING_AVAIL)
+ {
+ hp_timing_t start __attribute ((unused));
+ hp_timing_t stop __attribute ((unused));
+ hp_timing_t best_time = ~ (hp_timing_t) 0;
+ size_t i;
+
+ for (i = 0; i < 32; ++i)
+ {
+ HP_TIMING_NOW (start);
+ CALL (impl, s, n);
+ HP_TIMING_NOW (stop);
+ HP_TIMING_BEST (best_time, start, stop);
+ }
+
+ printf ("\t%zd", (size_t) best_time);
+ }
+}
+
+static void
+do_test (size_t align, size_t len)
+{
+ align &= 7;
+ if (align + len > page_size)
+ return;
+
+ if (HP_TIMING_AVAIL)
+ printf ("Length %4zd, alignment %2zd, c %2d:", len, align);
+
+ FOR_EACH_IMPL (impl, 0)
+ do_one_test (impl, (char *) buf1 + align, len);
+
+ if (HP_TIMING_AVAIL)
+ putchar ('\n');
+}
+
+int
+test_main (void)
+{
+ size_t i;
+
+ test_init ();
+
+ printf ("%24s", "");
+ FOR_EACH_IMPL (impl, 0)
+ printf ("\t%s", impl->name);
+ putchar ('\n');
+
+ for (i = 0; i < 18; ++i)
+ do_test (0, 1 << i);
+ for (i = 1; i < 32; ++i)
+ {
+ do_test (i, i);
+ if (i & (i - 1))
+ do_test (0, i);
+ }
+
+ do_test (1, 14);
+ do_test (2, 25);
+ do_test (3, 1024);
+ do_test (4, 64);
+
+ return ret;
+}
+
+#include "../test-skeleton.c"
diff --git a/sysdeps/x86_64/multiarch/bcopy.S b/sysdeps/x86_64/multiarch/bcopy.S
index 11e250f..bfc73fe 100644
--- a/sysdeps/x86_64/multiarch/bcopy.S
+++ b/sysdeps/x86_64/multiarch/bcopy.S
@@ -3,5 +3,5 @@
.text
ENTRY(bcopy)
xchg %rdi, %rsi
- jmp HIDDEN_BUILTIN_JUMPTARGET(memmove)
+ jmp __libc_memmove
END(bcopy)
diff --git a/sysdeps/x86_64/multiarch/bzero.S b/sysdeps/x86_64/multiarch/bzero.S
index 92e9fcf..309a717 100644
--- a/sysdeps/x86_64/multiarch/bzero.S
+++ b/sysdeps/x86_64/multiarch/bzero.S
@@ -1,5 +1,5 @@
/* Multiple versions of bzero
- Copyright (C) 2010 Free Software Foundation, Inc.
+ Copyright (C) 2010-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,35 +21,8 @@
.text
ENTRY(__bzero)
- .type __bzero, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __bzero_x86_64(%rip), %rax
- testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
- jz 2f
- leaq __bzero_sse2(%rip), %rax
-2: ret
-END(__bzero)
-
- .type __bzero_sse2, @function
-__bzero_sse2:
- cfi_startproc
- CALL_MCOUNT
mov %rsi,%rdx /* Adjust parameter. */
xorl %esi,%esi /* Fill with 0s. */
- jmp __memset_sse2
- cfi_endproc
- .size __bzero_sse2, .-__bzero_sse2
-
- .type __bzero_x86_64, @function
-__bzero_x86_64:
- cfi_startproc
- CALL_MCOUNT
- mov %rsi,%rdx /* Adjust parameter. */
- xorl %esi,%esi /* Fill with 0s. */
- jmp __memset_x86_64
- cfi_endproc
- .size __bzero_x86_64, .-__bzero_x86_64
-
+ jmp __libc_memset
+END(__bzero)
weak_alias (__bzero, bzero)
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
index 1a7fa2d..abf4191 100644
--- a/sysdeps/x86_64/multiarch/memset.S
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -1,5 +1,5 @@
/* Multiple versions of memset
- Copyright (C) 2010 Free Software Foundation, Inc.
+ Copyright (C) 2010-12 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -32,6 +32,10 @@ ENTRY(memset)
leaq __memset_sse2(%rip), %rax
2: ret
END(memset)
+/* Define __libc_memset for bzero. */
+ .globl __libc_memset
+ .hidden __libc_memset
+ __libc_memset = memset
# define USE_SSE2 1