This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH 06/20] libcpu-rt-c/x86-64: Add memcpy, memmove and mempcpy
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: libc-alpha at sourceware dot org
- Date: Tue, 12 Jun 2018 15:19:25 -0700
- Subject: [PATCH 06/20] libcpu-rt-c/x86-64: Add memcpy, memmove and mempcpy
- References: <20180612221939.19545-1-hjl.tools@gmail.com>
* sysdeps/x86_64/Makefile (cpu-rt-c-routines): Add memcpy,
memmove and mempcpy.
* sysdeps/x86_64/memmove.S: Support libcpu-rt-c.
* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise.
* sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise.
* sysdeps/x86_64/multiarch/memcpy.c: Likewise.
* sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S:
Likewise.
* sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S:
Likewise.
* sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S:
Likewise.
* sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S:
Likewise.
* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: Likewise.
* sysdeps/x86_64/multiarch/memmove.c: Likewise.
* sysdeps/x86_64/multiarch/mempcpy.c: Likewise.
* sysdeps/x86_64/multiarch/Makefile
(libcpu-rt-c-sysdep_routines): Add memcpy-ssse3, memcpy-ssse3-back
memmove-sse2-unaligned-erms, memmove-ssse3, memmove-ssse3-back,
memmove-avx-unaligned-erms, memmove-avx512-unaligned-erms and
memmove-avx512-no-vzeroupper.
---
sysdeps/x86_64/Makefile | 2 +-
sysdeps/x86_64/memmove.S | 12 +++++--
sysdeps/x86_64/multiarch/Makefile | 8 ++++-
sysdeps/x86_64/multiarch/memcpy-ssse3-back.S | 6 ++--
sysdeps/x86_64/multiarch/memcpy-ssse3.S | 6 ++--
sysdeps/x86_64/multiarch/memcpy.c | 14 +++++---
.../multiarch/memmove-avx-unaligned-erms.S | 2 +-
.../multiarch/memmove-avx512-no-vzeroupper.S | 8 +++--
.../multiarch/memmove-avx512-unaligned-erms.S | 2 +-
.../multiarch/memmove-sse2-unaligned-erms.S | 2 +-
.../multiarch/memmove-vec-unaligned-erms.S | 33 ++++++++++++-------
sysdeps/x86_64/multiarch/memmove.c | 10 ++++--
sysdeps/x86_64/multiarch/mempcpy.c | 10 ++++--
13 files changed, 82 insertions(+), 33 deletions(-)
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 1eb13d01da..978cff6cba 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -147,7 +147,7 @@ endif
endif
ifeq ($(subdir),cpu-rt-c)
-cpu-rt-c-routines += memchr memcmp
+cpu-rt-c-routines += memchr memcmp memcpy memmove mempcpy
# For the CPU run-time tests.
vpath %.c $(..)string
diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S
index 9cc92ff9a9..ec2b624966 100644
--- a/sysdeps/x86_64/memmove.S
+++ b/sysdeps/x86_64/memmove.S
@@ -29,7 +29,7 @@
#define SECTION(p) p
#ifdef USE_MULTIARCH
-# if !IS_IN (libc)
+# if !(IS_IN (libc) || IS_IN (libcpu_rt_c))
# define MEMCPY_SYMBOL(p,s) memcpy
# endif
#else
@@ -39,8 +39,12 @@
# define MEMCPY_SYMBOL(p,s) memcpy
# endif
#endif
-#if !defined USE_MULTIARCH || !IS_IN (libc)
-# define MEMPCPY_SYMBOL(p,s) __mempcpy
+#if !defined USE_MULTIARCH || !(IS_IN (libc) || IS_IN (libcpu_rt_c))
+# if IS_IN (libcpu_rt_c)
+# define MEMPCPY_SYMBOL(p,s) mempcpy
+# else
+# define MEMPCPY_SYMBOL(p,s) __mempcpy
+# endif
#endif
#ifndef MEMMOVE_SYMBOL
# define MEMMOVE_CHK_SYMBOL(p,s) p
@@ -55,9 +59,11 @@ libc_hidden_builtin_def (memmove)
strong_alias (memmove, __memcpy)
libc_hidden_ver (memmove, memcpy)
# endif
+# if !IS_IN (libcpu_rt_c)
libc_hidden_def (__mempcpy)
weak_alias (__mempcpy, mempcpy)
libc_hidden_builtin_def (mempcpy)
+# endif
# if defined SHARED && IS_IN (libc)
# undef memcpy
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 9bb6c8c3cd..8e86e44e33 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -69,5 +69,11 @@ endif
ifeq ($(subdir),cpu-rt-c)
libcpu-rt-c-sysdep_routines += memchr-sse2 memchr-avx2 \
memcmp-sse2 memcmp-ssse3 memcmp-sse4 \
- memcmp-avx2-movbe
+ memcmp-avx2-movbe memcpy-ssse3 \
+ memcpy-ssse3-back \
+ memmove-sse2-unaligned-erms \
+ memmove-ssse3 memmove-ssse3-back \
+ memmove-avx-unaligned-erms \
+ memmove-avx512-unaligned-erms \
+ memmove-avx512-no-vzeroupper
endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index 7e37035487..9bcd5957cf 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -19,7 +19,7 @@
#include <sysdep.h>
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
#include "asm-syntax.h"
@@ -44,10 +44,12 @@
.section .text.ssse3,"ax",@progbits
#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+# if IS_IN (libc)
ENTRY (MEMPCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMPCPY_CHK)
+# endif
ENTRY (MEMPCPY)
movq %rdi, %rax
@@ -56,7 +58,7 @@ ENTRY (MEMPCPY)
END (MEMPCPY)
#endif
-#if !defined USE_AS_BCOPY
+#if !defined USE_AS_BCOPY && IS_IN (libc)
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 5dd209034b..988ce0fc83 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -19,7 +19,7 @@
#include <sysdep.h>
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
#include "asm-syntax.h"
@@ -44,10 +44,12 @@
.section .text.ssse3,"ax",@progbits
#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+# if IS_IN (libc)
ENTRY (MEMPCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMPCPY_CHK)
+# endif
ENTRY (MEMPCPY)
movq %rdi, %rax
@@ -56,7 +58,7 @@ ENTRY (MEMPCPY)
END (MEMPCPY)
#endif
-#if !defined USE_AS_BCOPY
+#if !defined USE_AS_BCOPY && IS_IN (libc)
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
index 419f76aefc..ee3bb2706c 100644
--- a/sysdeps/x86_64/multiarch/memcpy.c
+++ b/sysdeps/x86_64/multiarch/memcpy.c
@@ -18,7 +18,7 @@
<http://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
# define memcpy __redirect_memcpy
# include <string.h>
# undef memcpy
@@ -26,14 +26,20 @@
# define SYMBOL_NAME memcpy
# include "ifunc-memmove.h"
+# if IS_IN (libcpu_rt_c)
+# define __new_memcpy memcpy
+# endif
+
libc_ifunc_redirected (__redirect_memcpy, __new_memcpy,
IFUNC_SELECTOR ());
-# ifdef SHARED
+# if !IS_IN (libcpu_rt_c)
+# ifdef SHARED
__hidden_ver1 (__new_memcpy, __GI_memcpy, __redirect_memcpy)
__attribute__ ((visibility ("hidden")));
-# endif
+# endif
-# include <shlib-compat.h>
+# include <shlib-compat.h>
versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
+# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
index e195e93f15..e996ace136 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
@@ -1,4 +1,4 @@
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
# define VEC_SIZE 32
# define VEC(i) ymm##i
# define VMOVNT vmovntdq
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
index effc3ac2de..6cd1accfc5 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
@@ -18,15 +18,18 @@
#include <sysdep.h>
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
# include "asm-syntax.h"
.section .text.avx512,"ax",@progbits
+# if IS_IN (libc)
ENTRY (__mempcpy_chk_avx512_no_vzeroupper)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (__mempcpy_chk_avx512_no_vzeroupper)
+strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper)
+# endif
ENTRY (__mempcpy_avx512_no_vzeroupper)
movq %rdi, %rax
@@ -34,10 +37,12 @@ ENTRY (__mempcpy_avx512_no_vzeroupper)
jmp L(start)
END (__mempcpy_avx512_no_vzeroupper)
+# if IS_IN (libc)
ENTRY (__memmove_chk_avx512_no_vzeroupper)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (__memmove_chk_avx512_no_vzeroupper)
+# endif
ENTRY (__memmove_avx512_no_vzeroupper)
mov %rdi, %rax
@@ -411,5 +416,4 @@ L(gobble_256bytes_nt_loop_bkw):
END (__memmove_avx512_no_vzeroupper)
strong_alias (__memmove_avx512_no_vzeroupper, __memcpy_avx512_no_vzeroupper)
-strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper)
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
index aac1515cf6..95381d458e 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
@@ -1,4 +1,4 @@
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
# define VEC_SIZE 64
# define VEC(i) zmm##i
# define VMOVNT vmovntdq
diff --git a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
index 7c6163ddcb..eae941e58e 100644
--- a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
@@ -16,7 +16,7 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
# define MEMMOVE_SYMBOL(p,s) p##_sse2_##s
#else
weak_alias (__mempcpy, mempcpy)
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index e2ede45e9f..ada93ea6ad 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -131,7 +131,7 @@ L(start):
jb L(less_vec)
cmpq $(VEC_SIZE * 2), %rdx
ja L(more_2x_vec)
-#if !defined USE_MULTIARCH || !IS_IN (libc)
+#if !defined USE_MULTIARCH || !(IS_IN (libc) || IS_IN (libcpu_rt_c))
L(last_2x_vec):
#endif
/* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
@@ -140,18 +140,20 @@ L(last_2x_vec):
VMOVU %VEC(0), (%rdi)
VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
VZEROUPPER
-#if !defined USE_MULTIARCH || !IS_IN (libc)
+#if !defined USE_MULTIARCH || !(IS_IN (libc) || IS_IN (libcpu_rt_c))
L(nop):
#endif
ret
-#if defined USE_MULTIARCH && IS_IN (libc)
+#if defined USE_MULTIARCH && (IS_IN (libc) || IS_IN (libcpu_rt_c))
END (MEMMOVE_SYMBOL (__memmove, unaligned))
# if VEC_SIZE == 16
+# if IS_IN (libc)
ENTRY (__mempcpy_chk_erms)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (__mempcpy_chk_erms)
+# endif
/* Only used to measure performance of REP MOVSB. */
ENTRY (__mempcpy_erms)
@@ -163,10 +165,12 @@ ENTRY (__mempcpy_erms)
jmp L(start_movsb)
END (__mempcpy_erms)
+# if IS_IN (libc)
ENTRY (__memmove_chk_erms)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (__memmove_chk_erms)
+# endif
ENTRY (__memmove_erms)
movq %rdi, %rax
@@ -195,10 +199,12 @@ L(movsb_backward):
ret
END (__memmove_erms)
strong_alias (__memmove_erms, __memcpy_erms)
+# if IS_IN (libc)
strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
+# endif
# endif
-# ifdef SHARED
+# if defined SHARED && IS_IN (libc)
ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
@@ -212,10 +218,12 @@ ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
# ifdef SHARED
+# if IS_IN (libc)
ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
+# endif
# endif
ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
@@ -321,7 +329,7 @@ L(between_2_3):
movw %si, (%rdi)
ret
-#if defined USE_MULTIARCH && IS_IN (libc)
+#if defined USE_MULTIARCH && (IS_IN (libc) || IS_IN (libcpu_rt_c))
L(movsb_more_2x_vec):
cmpq $REP_MOVSB_THRESHOLD, %rdx
ja L(movsb)
@@ -392,7 +400,8 @@ L(more_8x_vec):
subq %r8, %rdi
/* Adjust length. */
addq %r8, %rdx
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+#if ((defined USE_MULTIARCH || VEC_SIZE == 16) \
+ && (IS_IN (libc) || IS_IN (libcpu_rt_c)))
/* Check non-temporal store threshold. */
cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
ja L(large_forward)
@@ -444,7 +453,8 @@ L(more_8x_vec_backward):
subq %r8, %r9
/* Adjust length. */
subq %r8, %rdx
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+#if ((defined USE_MULTIARCH || VEC_SIZE == 16) \
+ && (IS_IN (libc) || IS_IN (libcpu_rt_c)))
/* Check non-temporal store threshold. */
cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
ja L(large_backward)
@@ -474,7 +484,8 @@ L(loop_4x_vec_backward):
VZEROUPPER
ret
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+#if ((defined USE_MULTIARCH || VEC_SIZE == 16) \
+ && (IS_IN (libc) || IS_IN (libcpu_rt_c)))
L(large_forward):
/* Don't use non-temporal store if there is overlap between
destination and source since destination may be in cache
@@ -547,16 +558,16 @@ L(loop_large_backward):
#endif
END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
# ifdef USE_MULTIARCH
strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
-# ifdef SHARED
+# if defined SHARED && IS_IN (libc)
strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
# endif
# endif
-# ifdef SHARED
+# if defined SHARED && IS_IN (libc)
strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned),
MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned))
# endif
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index d512228eae..a4fad5ae60 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -18,7 +18,7 @@
<http://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
# define memmove __redirect_memmove
# include <string.h>
# undef memmove
@@ -26,12 +26,18 @@
# define SYMBOL_NAME memmove
# include "ifunc-memmove.h"
+# if IS_IN (libcpu_rt_c)
+# define __libc_memmove memmove
+# endif
+
libc_ifunc_redirected (__redirect_memmove, __libc_memmove,
IFUNC_SELECTOR ());
+# if !IS_IN (libcpu_rt_c)
strong_alias (__libc_memmove, memmove);
-# ifdef SHARED
+# ifdef SHARED
__hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove)
__attribute__ ((visibility ("hidden")));
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c
index 9fe41dda82..fe3ece0774 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.c
+++ b/sysdeps/x86_64/multiarch/mempcpy.c
@@ -18,7 +18,7 @@
<http://www.gnu.org/licenses/>. */
/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
# define mempcpy __redirect_mempcpy
# define __mempcpy __redirect___mempcpy
# define NO_MEMPCPY_STPCPY_REDIRECT
@@ -30,13 +30,19 @@
# define SYMBOL_NAME mempcpy
# include "ifunc-memmove.h"
+# if IS_IN (libcpu_rt_c)
+# define __mempcpy mempcpy
+# endif
+
libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR ());
+# if !IS_IN (libcpu_rt_c)
weak_alias (__mempcpy, mempcpy)
-# ifdef SHARED
+# ifdef SHARED
__hidden_ver1 (__mempcpy, __GI___mempcpy, __redirect___mempcpy)
__attribute__ ((visibility ("hidden")));
__hidden_ver1 (mempcpy, __GI_mempcpy, __redirect_mempcpy)
__attribute__ ((visibility ("hidden")));
+# endif
# endif
#endif
--
2.17.1