[PATCH] riscv: Fix alignment-ignorant memcpy implementation

Adhemerval Zanella adhemerval.zanella@linaro.org
Mon Mar 4 17:59:02 GMT 2024


The memcpy optimization (commit 587a1290a1af7bee6db) has a series
of mistakes:

  - The implementation is wrong: the chunk size calculation is wrong
    leading to invalid memory access.

  - It adds ifunc supports as default, so --disable-multi-arch does
    not work as expected for riscv.

  - It mixes Linux files (memcpy ifunc selection which requires the
    vDSO/syscall mechanism)  with generic support (the memcpy
    optimization itself).

  - There is no __libc_ifunc_impl_list, which makes testing only
    check the selected implementation instead of all supported
    by the system.

Also, there is no reason why the implementation can't be coded in C,
since it uses only normal registers and the compiler is able to
generate code as good as the assembly implementation.  I have not
checked the performance, but the C implementation uses the same
strategies, the generate code with gcc 13 seems straightforward, and
the tail code also avoid byte-operations.

This patch also simplifies the required bits to enable ifunc: there
is no need to memcopy.h; nor to add Linux-specific files.

Checked on riscv64 and riscv32 by explicitly enabling the function
on __libc_ifunc_impl_list on qemu-system.
---
 sysdeps/riscv/memcpy_noalignment.S            | 136 ------------------
 .../multiarch}/memcpy-generic.c               |   8 +-
 sysdeps/riscv/multiarch/memcpy_noalignment.c  | 100 +++++++++++++
 sysdeps/unix/sysv/linux/riscv/Makefile        |   9 --
 sysdeps/unix/sysv/linux/riscv/hwprobe.c       |   1 +
 .../sysv/linux/riscv/include/sys/hwprobe.h    |   8 ++
 .../unix/sysv/linux/riscv/multiarch/Makefile  |   7 +
 .../linux/riscv/multiarch/ifunc-impl-list.c}  |  33 +++--
 .../sysv/linux/riscv/multiarch}/memcpy.c      |  16 +--
 9 files changed, 151 insertions(+), 167 deletions(-)
 delete mode 100644 sysdeps/riscv/memcpy_noalignment.S
 rename sysdeps/{unix/sysv/linux/riscv => riscv/multiarch}/memcpy-generic.c (87%)
 create mode 100644 sysdeps/riscv/multiarch/memcpy_noalignment.c
 create mode 100644 sysdeps/unix/sysv/linux/riscv/include/sys/hwprobe.h
 create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
 rename sysdeps/{riscv/memcopy.h => unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c} (51%)
 rename sysdeps/{riscv => unix/sysv/linux/riscv/multiarch}/memcpy.c (90%)

diff --git a/sysdeps/riscv/memcpy_noalignment.S b/sysdeps/riscv/memcpy_noalignment.S
deleted file mode 100644
index 621f8d028f..0000000000
--- a/sysdeps/riscv/memcpy_noalignment.S
+++ /dev/null
@@ -1,136 +0,0 @@
-/* memcpy for RISC-V, ignoring buffer alignment
-   Copyright (C) 2024 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <sys/asm.h>
-
-/* void *memcpy(void *, const void *, size_t) */
-ENTRY (__memcpy_noalignment)
-	move t6, a0  /* Preserve return value */
-
-	/* Bail if 0 */
-	beqz a2, 7f
-
-	/* Jump to byte copy if size < SZREG */
-	li a4, SZREG
-	bltu a2, a4, 5f
-
-	/* Round down to the nearest "page" size */
-	andi a4, a2, ~((16*SZREG)-1)
-	beqz a4, 2f
-	add a3, a1, a4
-
-	/* Copy the first word to get dest word aligned */
-	andi a5, t6, SZREG-1
-	beqz a5, 1f
-	REG_L a6, (a1)
-	REG_S a6, (t6)
-
-	/* Align dst up to a word, move src and size as well. */
-	addi t6, t6, SZREG-1
-	andi t6, t6, ~(SZREG-1)
-	sub a5, t6, a0
-	add a1, a1, a5
-	sub a2, a2, a5
-
-	/* Recompute page count */
-	andi a4, a2, ~((16*SZREG)-1)
-	beqz a4, 2f
-
-1:
-	/* Copy "pages" (chunks of 16 registers) */
-	REG_L a4,       0(a1)
-	REG_L a5,   SZREG(a1)
-	REG_L a6, 2*SZREG(a1)
-	REG_L a7, 3*SZREG(a1)
-	REG_L t0, 4*SZREG(a1)
-	REG_L t1, 5*SZREG(a1)
-	REG_L t2, 6*SZREG(a1)
-	REG_L t3, 7*SZREG(a1)
-	REG_L t4, 8*SZREG(a1)
-	REG_L t5, 9*SZREG(a1)
-	REG_S a4,       0(t6)
-	REG_S a5,   SZREG(t6)
-	REG_S a6, 2*SZREG(t6)
-	REG_S a7, 3*SZREG(t6)
-	REG_S t0, 4*SZREG(t6)
-	REG_S t1, 5*SZREG(t6)
-	REG_S t2, 6*SZREG(t6)
-	REG_S t3, 7*SZREG(t6)
-	REG_S t4, 8*SZREG(t6)
-	REG_S t5, 9*SZREG(t6)
-	REG_L a4, 10*SZREG(a1)
-	REG_L a5, 11*SZREG(a1)
-	REG_L a6, 12*SZREG(a1)
-	REG_L a7, 13*SZREG(a1)
-	REG_L t0, 14*SZREG(a1)
-	REG_L t1, 15*SZREG(a1)
-	addi a1, a1, 16*SZREG
-	REG_S a4, 10*SZREG(t6)
-	REG_S a5, 11*SZREG(t6)
-	REG_S a6, 12*SZREG(t6)
-	REG_S a7, 13*SZREG(t6)
-	REG_S t0, 14*SZREG(t6)
-	REG_S t1, 15*SZREG(t6)
-	addi t6, t6, 16*SZREG
-	bltu a1, a3, 1b
-	andi a2, a2, (16*SZREG)-1  /* Update count */
-
-2:
-	/* Remainder is smaller than a page, compute native word count */
-	beqz a2, 7f
-	andi a5, a2, ~(SZREG-1)
-	andi a2, a2, (SZREG-1)
-	add a3, a1, a5
-	/* Jump directly to last word if no words. */
-	beqz a5, 4f
-
-3:
-	/* Use single native register copy */
-	REG_L a4, 0(a1)
-	addi a1, a1, SZREG
-	REG_S a4, 0(t6)
-	addi t6, t6, SZREG
-	bltu a1, a3, 3b
-
-	/* Jump directly out if no more bytes */
-	beqz a2, 7f
-
-4:
-	/* Copy the last word unaligned */
-	add a3, a1, a2
-	add a4, t6, a2
-	REG_L a5, -SZREG(a3)
-	REG_S a5, -SZREG(a4)
-	ret
-
-5:
-	/* Copy bytes when the total copy is <SZREG */
-	add a3, a1, a2
-
-6:
-	lb a4, 0(a1)
-	addi a1, a1, 1
-	sb a4, 0(t6)
-	addi t6, t6, 1
-	bltu a1, a3, 6b
-
-7:
-	ret
-
-END (__memcpy_noalignment)
diff --git a/sysdeps/unix/sysv/linux/riscv/memcpy-generic.c b/sysdeps/riscv/multiarch/memcpy-generic.c
similarity index 87%
rename from sysdeps/unix/sysv/linux/riscv/memcpy-generic.c
rename to sysdeps/riscv/multiarch/memcpy-generic.c
index f06f4bda15..4235d33859 100644
--- a/sysdeps/unix/sysv/linux/riscv/memcpy-generic.c
+++ b/sysdeps/riscv/multiarch/memcpy-generic.c
@@ -18,7 +18,9 @@
 
 #include <string.h>
 
-extern __typeof (memcpy) __memcpy_generic;
-hidden_proto (__memcpy_generic)
-
+#if IS_IN(libc)
+# define MEMCPY __memcpy_generic
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(x)
+#endif
 #include <string/memcpy.c>
diff --git a/sysdeps/riscv/multiarch/memcpy_noalignment.c b/sysdeps/riscv/multiarch/memcpy_noalignment.c
new file mode 100644
index 0000000000..9552ae45fc
--- /dev/null
+++ b/sysdeps/riscv/multiarch/memcpy_noalignment.c
@@ -0,0 +1,100 @@
+/* Copy memory to memory until the specified number of bytes.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+#include <stdint.h>
+#include <string-optype.h>
+
+/* memcpy optimization for chips with fast unaligned support
+   (RISCV_HWPROBE_MISALIGNED_FAST).  */
+
+#define OPSIZ  (sizeof (op_t))
+
+typedef uint32_t __attribute__ ((__may_alias__)) op32_t;
+typedef uint16_t __attribute__ ((__may_alias__)) op16_t;
+
+void *
+__memcpy_noalignment (void *dest, const void *src, size_t n)
+{
+  unsigned char *d = dest;
+  const unsigned char *s = src;
+
+  if (__glibc_unlikely (n == 0))
+    return dest;
+
+  if (n < OPSIZ)
+    goto tail;
+
+#define COPY_WORD(__d, __s, __i) \
+  *(op_t *)(__d + (__i * OPSIZ)) = *(op_t *)(__s + (__i * OPSIZ))
+
+  /* Copy the first op_t to get dest word aligned.  */
+  COPY_WORD (d, s, 0);
+  size_t off = OPSIZ - (uintptr_t)d % OPSIZ;
+  d += off;
+  s += off;
+  n -= off;
+
+  /* Copy chunks of 16 registers.  */
+  enum { block_size = 16 * OPSIZ };
+
+  for (; n >= block_size; s += block_size, d += block_size, n -= block_size)
+    {
+
+      COPY_WORD (d, s, 0);
+      COPY_WORD (d, s, 1);
+      COPY_WORD (d, s, 2);
+      COPY_WORD (d, s, 3);
+      COPY_WORD (d, s, 4);
+      COPY_WORD (d, s, 5);
+      COPY_WORD (d, s, 6);
+      COPY_WORD (d, s, 7);
+      COPY_WORD (d, s, 8);
+      COPY_WORD (d, s, 9);
+      COPY_WORD (d, s, 10);
+      COPY_WORD (d, s, 11);
+      COPY_WORD (d, s, 12);
+      COPY_WORD (d, s, 13);
+      COPY_WORD (d, s, 14);
+      COPY_WORD (d, s, 15);
+    }
+
+  /* Remainder of chunks, issue a op_t copy until n < OPSIZ.  */
+  for (; n >= OPSIZ; s += OPSIZ, d += OPSIZ, n -= OPSIZ)
+    COPY_WORD (d, s, 0);
+
+tail:
+  if (n & 4)
+    {
+      *(op32_t *)(d) = *(op32_t *)s;
+      s += sizeof (op32_t);
+      d += sizeof (op32_t);
+    }
+
+  if (n & 2)
+    {
+      *(op16_t *)(d) = *(op16_t *)s;
+      s += sizeof (op16_t);
+      d += sizeof (op16_t);
+    }
+
+  if (n & 1)
+    *d = *s;
+
+  return dest;
+}
diff --git a/sysdeps/unix/sysv/linux/riscv/Makefile b/sysdeps/unix/sysv/linux/riscv/Makefile
index 398ff7418b..04abf226ad 100644
--- a/sysdeps/unix/sysv/linux/riscv/Makefile
+++ b/sysdeps/unix/sysv/linux/riscv/Makefile
@@ -15,15 +15,6 @@ ifeq ($(subdir),stdlib)
 gen-as-const-headers += ucontext_i.sym
 endif
 
-ifeq ($(subdir),string)
-sysdep_routines += \
-  memcpy \
-  memcpy-generic \
-  memcpy_noalignment \
-  # sysdep_routines
-
-endif
-
 abi-variants := ilp32 ilp32d lp64 lp64d
 
 ifeq (,$(filter $(default-abi),$(abi-variants)))
diff --git a/sysdeps/unix/sysv/linux/riscv/hwprobe.c b/sysdeps/unix/sysv/linux/riscv/hwprobe.c
index e64c159eb3..9159045478 100644
--- a/sysdeps/unix/sysv/linux/riscv/hwprobe.c
+++ b/sysdeps/unix/sysv/linux/riscv/hwprobe.c
@@ -34,3 +34,4 @@ int __riscv_hwprobe (struct riscv_hwprobe *pairs, size_t pair_count,
   /* Negate negative errno values to match pthreads API. */
   return -r;
 }
+libc_hidden_def (__riscv_hwprobe)
diff --git a/sysdeps/unix/sysv/linux/riscv/include/sys/hwprobe.h b/sysdeps/unix/sysv/linux/riscv/include/sys/hwprobe.h
new file mode 100644
index 0000000000..cce91c1b53
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/riscv/include/sys/hwprobe.h
@@ -0,0 +1,8 @@
+#ifndef _SYS_HWPROBE_H
+# include_next <sys/hwprobe.h>
+
+#ifndef _ISOMAC
+libc_hidden_proto (__riscv_hwprobe)
+#endif
+
+#endif
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
new file mode 100644
index 0000000000..7e567cb95c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
@@ -0,0 +1,7 @@
+ifeq ($(subdir),string)
+sysdep_routines += \
+  memcpy \
+  memcpy-generic \
+  memcpy_noalignment \
+  # sysdep_routines
+endif
diff --git a/sysdeps/riscv/memcopy.h b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
similarity index 51%
rename from sysdeps/riscv/memcopy.h
rename to sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
index 27675964b0..9f806d7a9e 100644
--- a/sysdeps/riscv/memcopy.h
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
@@ -1,4 +1,4 @@
-/* memcopy.h -- definitions for memory copy functions. RISC-V version.
+/* Enumerate available IFUNC implementations of a function.  RISCV version.
    Copyright (C) 2024 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -16,11 +16,28 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <sysdeps/generic/memcopy.h>
+#include <ifunc-impl-list.h>
+#include <string.h>
+#include <sys/hwprobe.h>
 
-/* Redefine the generic memcpy implementation to __memcpy_generic, so
-   the memcpy ifunc can select between generic and special versions.
-   In rtld, don't bother with all the ifunciness. */
-#if IS_IN (libc)
-#define MEMCPY __memcpy_generic
-#endif
+size_t
+__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+			size_t max)
+{
+  size_t i = max;
+
+  bool fast_unaligned = false;
+
+  struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
+  if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
+      && (pair.value & RISCV_HWPROBE_MISALIGNED_MASK)
+          == RISCV_HWPROBE_MISALIGNED_FAST)
+    fast_unaligned = true;
+
+  IFUNC_IMPL (i, name, memcpy,
+	      IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
+			      __memcpy_noalignment)
+	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
+
+  return 0;
+}
diff --git a/sysdeps/riscv/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
similarity index 90%
rename from sysdeps/riscv/memcpy.c
rename to sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
index 20f9548c44..51d8ace858 100644
--- a/sysdeps/riscv/memcpy.c
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
@@ -28,8 +28,6 @@
 # include <riscv-ifunc.h>
 # include <sys/hwprobe.h>
 
-# define INIT_ARCH()
-
 extern __typeof (__redirect_memcpy) __libc_memcpy;
 
 extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
@@ -38,14 +36,9 @@ extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
 static inline __typeof (__redirect_memcpy) *
 select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
 {
-  unsigned long long int value;
-
-  INIT_ARCH ();
-
-  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &value) != 0)
-    return __memcpy_generic;
-
-  if ((value & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
+  unsigned long long int v;
+  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
+      && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
     return __memcpy_noalignment;
 
   return __memcpy_generic;
@@ -59,5 +52,6 @@ strong_alias (__libc_memcpy, memcpy);
 __hidden_ver1 (memcpy, __GI_memcpy, __redirect_memcpy)
   __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcpy);
 # endif
-
+#else
+# include <string/memcpy.c>
 #endif
-- 
2.34.1



More information about the Libc-alpha mailing list