This is the mail archive of the libc-ports@sources.redhat.com mailing list for the libc-ports project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH roland/arm-memcpy] ARM: Make multiarch memcpy always use NEON when compiler does


When the compiler is emitting NEON instructions anyway, there is no point
in using IFUNC when we can just use the NEON memcpy unconditionally.

Tested on armv7l-linux-gnueabihf with CC='gcc -mfpu=neon',
no check-abi failures, no regressions in 'make check subdirs=string'.


Thanks,
Roland


ports/ChangeLog.arm
2013-05-13  Roland McGrath  <roland@hack.frob.com>

	* sysdeps/arm/armv7/multiarch/memcpy.S [__ARM_NEON__]: Don't define
	memcpy here, just __memcpy_arm and __aeabi_memcpy*.
	* sysdeps/arm/armv7/multiarch/memcpy_neon.S [__ARM_NEON__]:
	Define memcpy here, not __memcpy_neon.
	* sysdeps/arm/armv7/multiarch/memcpy_vfp.S [__ARM_NEON__]:
	Define nothing here.
	* sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
	(__libc_ifunc_impl_list) [__ARM_NEON__]: Don't list __memcpy_vfp;
	use memcpy name for NEON implementation.

--- a/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
+++ b/ports/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
@@ -35,9 +35,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   IFUNC_IMPL (i, name, memcpy,
 	      IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_ARM_NEON,
-			      __memcpy_neon)
+#ifdef __ARM_NEON__
+                              memcpy
+#else
+			      __memcpy_neon
+#endif
+                              )
+#ifndef __ARM_NEON__
 	      IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_ARM_VFP,
 			      __memcpy_vfp)
+#endif
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_arm));
 
   return i;
--- a/ports/sysdeps/arm/armv7/multiarch/memcpy.S
+++ b/ports/sysdeps/arm/armv7/multiarch/memcpy.S
@@ -22,27 +22,29 @@
 #include <sysdep.h>
 #include <rtld-global-offsets.h>
 
-#if !defined NOT_IN_libc
+#ifndef NOT_IN_libc
+/* Under __ARM_NEON__, memcpy_neon.S defines the name memcpy.  */
+# ifndef __ARM_NEON__
 	.text
 ENTRY(memcpy)
 	.type	memcpy, %gnu_indirect_function
-#ifdef __SOFTFP__
+# ifdef __SOFTFP__
 	ldr	r1, .Lmemcpy_arm
 	tst	r0, #HWCAP_ARM_VFP
 	ldrne	r1, .Lmemcpy_vfp
-#else
+# else
 	ldr	r1, .Lmemcpy_vfp
-#endif
+# endif
 	tst	r0, #HWCAP_ARM_NEON
 	ldrne	r1, .Lmemcpy_neon
 1:
 	add	r0, r1, pc
 	DO_RET(lr)
 
-#ifdef __SOFTFP__
+# ifdef __SOFTFP__
 .Lmemcpy_arm:
 	.long	C_SYMBOL_NAME(__memcpy_arm) - 1b - PC_OFS
-#endif
+# endif
 .Lmemcpy_neon:
 	.long	C_SYMBOL_NAME(__memcpy_neon) - 1b - PC_OFS
 .Lmemcpy_vfp:
@@ -51,6 +53,7 @@ ENTRY(memcpy)
 END(memcpy)
 
 libc_hidden_builtin_def (memcpy)
+#endif  /* Not __ARM_NEON__.  */
 
 /* These versions of memcpy are defined not to clobber any VFP or NEON
    registers so they must always call the ARM variant of the memcpy code.  */
--- a/ports/sysdeps/arm/armv7/multiarch/memcpy_neon.S
+++ b/ports/sysdeps/arm/armv7/multiarch/memcpy_neon.S
@@ -1,3 +1,9 @@
+#ifdef __ARM_NEON__
+/* Under __ARM_NEON__, this file defines memcpy directly.  */
+libc_hidden_builtin_def (memcpy)
+#else
+# define memcpy __memcpy_neon
+#endif
+
 #define MEMCPY_NEON
-#define memcpy __memcpy_neon
 #include "memcpy_impl.S"
--- a/ports/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
+++ b/ports/sysdeps/arm/armv7/multiarch/memcpy_vfp.S
@@ -1,3 +1,7 @@
-#define MEMCPY_VFP
-#define memcpy __memcpy_vfp
-#include "memcpy_impl.S"
+/* Under __ARM_NEON__, memcpy_neon.S defines memcpy directly
+   and the __memcpy_vfp code will never be used.  */
+#ifndef __ARM_NEON__
+# define MEMCPY_VFP
+# define memcpy __memcpy_vfp
+# include "memcpy_impl.S"
+#endif


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]