This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: RFC: Rewrite x86-64 IFUNC selector in C


Here's a patch for memcmp.

I haven't investigated that.  I don't think I understand the implications.

On Wed, Jun 7, 2017 at 3:21 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Jun 7, 2017 at 1:39 PM, Erich Elsen <eriche@google.com> wrote:
>> Here's a patch that also moves memset.S to memset.c.
>
> I integrated your patch into hjl/ifunc/c branch.  Thanks.
>
> BTW, have you investigated using IFUNC internally in libc,so?
>
>> On Tue, May 30, 2017 at 1:32 PM, Adhemerval Zanella
>> <adhemerval.zanella@linaro.org> wrote:
>>>
>>>
>>> On 30/05/2017 17:13, H.J. Lu wrote:
>>>> On Tue, May 30, 2017 at 12:50 PM, Adhemerval Zanella
>>>> <adhemerval.zanella@linaro.org> wrote:
>>>>>
>>>>>
>>>>> On 30/05/2017 13:24, H.J. Lu wrote:
>>>>>> On Mon, May 29, 2017 at 1:34 PM, Adhemerval Zanella
>>>>>> <adhemerval.zanella@linaro.org> wrote:
>>>>>>>
>>>>>>> I think we can simplify it further and use the already existent ifunc macros on
>>>>>>> libc-symbols.h.  Also, for memmove I think we can organize the code better (at
>>>>>>> least for ifunc) and build a extra object with a more meaningful name.  I used
>>>>>>> your logic for the ifunc selection and extended for memmove as well.
>>>>>>>
>>>>>> Here is the combined patch.
>>>>>
>>>>>
>>>>>> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
>>>>>> index 3736f54..8a6ff00 100644
>>>>>> --- a/sysdeps/x86_64/multiarch/Makefile
>>>>>> +++ b/sysdeps/x86_64/multiarch/Makefile
>>>>>> @@ -19,6 +19,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
>>>>>>                  strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
>>>>>>                  strcspn-c strpbrk-c strspn-c varshift \
>>>>>>                  memset-avx512-no-vzeroupper \
>>>>>> +                memmove-sse2-unaligned-erms \
>>>>>
>>>>> I still think this is a misleading name file since it build not only memmove but
>>>>> also memcpy and mempcpy implementations.  I would prefer to rename to a more
>>>>> general name as in my suggestion.
>>>>
>>>> I'd like to keep it consistent with memmove-avx-unaligned-erms.S and
>>>> memmove-avx512-unaligned-erms.S.   As for making memcpy an alias
>>>> of memmove, and embedding mempcpy, __memcpy_chk,
>>>> __mempcpy_chk, __memmove_chk is an implementation detail.
>>>>
>>>
>>> Alright, at least I think it should add some comments that the object
>>> is building multiple symbols (it wasn't clear when I started to work
>>> on the patch).
>
>
>
> --
> H.J.
From 8210930c1451abad9af8bd97a2fdd6c83ab46651 Mon Sep 17 00:00:00 2001
From: Erich Elsen <eriche@google.com>
Date: Wed, 7 Jun 2017 16:57:25 -0700
Subject: [PATCH 1/1] convert memcmp.S to memcmp.c

---
 sysdeps/x86_64/memcmp.S                            |  8 +++
 sysdeps/x86_64/multiarch/Makefile                  |  1 +
 .../x86_64/multiarch/{memcmp.S => memcmp-sse2.S}   | 36 +------------
 sysdeps/x86_64/multiarch/memcmp.c                  | 60 ++++++++++++++++++++++
 4 files changed, 70 insertions(+), 35 deletions(-)
 rename sysdeps/x86_64/multiarch/{memcmp.S => memcmp-sse2.S} (70%)
 create mode 100644 sysdeps/x86_64/multiarch/memcmp.c

diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S
index 0828a22534..891881dc5f 100644
--- a/sysdeps/x86_64/memcmp.S
+++ b/sysdeps/x86_64/memcmp.S
@@ -20,7 +20,11 @@
 #include <sysdep.h>
 
 	.text
+#if IS_IN(libc)
+ENTRY (__memcmp_sse2)
+#else
 ENTRY (memcmp)
+#endif
 	test	%rdx, %rdx
 	jz	L(finz)
 	cmpq	$1, %rdx
@@ -351,7 +355,11 @@ L(ATR32res):
 	jmp	  L(small)
 	/* Align to 16byte to improve instruction fetch.  */
 	.p2align 4,, 4
+#if IS_IN(libc)
+END(__memcmp_sse2)
+#else
 END(memcmp)
+#endif
 
 #undef bcmp
 weak_alias (memcmp, bcmp)
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 9d2583ad6f..d7f697cebd 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -6,6 +6,7 @@ ifeq ($(subdir),string)
 
 sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
 		   strcmp-sse2-unaligned strncmp-ssse3 \
+		   memcmp-sse2 \
 		   memcmp-avx2-movbe \
 		   memcmp-sse4 memcpy-ssse3 \
 		   memmove-ssse3 \
diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp-sse2.S
similarity index 70%
rename from sysdeps/x86_64/multiarch/memcmp.S
rename to sysdeps/x86_64/multiarch/memcmp-sse2.S
index 0c9804b7e9..d7eb99f48c 100644
--- a/sysdeps/x86_64/multiarch/memcmp.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse2.S
@@ -1,4 +1,4 @@
-/* Multiple versions of memcmp
+/* memcmp with SSE2.
    All versions must be listed in ifunc-impl-list.c.
    Copyright (C) 2010-2017 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
@@ -18,41 +18,7 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
 #if IS_IN (libc)
-	.text
-ENTRY(memcmp)
-	.type	memcmp, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jnz	1f
-	HAS_ARCH_FEATURE (AVX2_Usable)
-	jz	1f
-	HAS_CPU_FEATURE (MOVBE)
-	jz	1f
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz	1f
-	leaq	__memcmp_avx2_movbe(%rip), %rax
-	ret
-
-1:	HAS_CPU_FEATURE (SSSE3)
-	jnz	2f
-	leaq	__memcmp_sse2(%rip), %rax
-	ret
-
-2:	HAS_CPU_FEATURE (SSE4_1)
-	jz	3f
-	leaq	__memcmp_sse4_1(%rip), %rax
-	ret
-
-3:	leaq	__memcmp_ssse3(%rip), %rax
-	ret
-
-END(memcmp)
-
 # undef ENTRY
 # define ENTRY(name) \
 	.type __memcmp_sse2, @function; \
diff --git a/sysdeps/x86_64/multiarch/memcmp.c b/sysdeps/x86_64/multiarch/memcmp.c
new file mode 100644
index 0000000000..a0623b6745
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcmp.c
@@ -0,0 +1,60 @@
+/* Multiple versions of memcmp.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in lib and for
+   DSO.  */
+#if IS_IN (libc)
+# define memcmp __redirect_memcmp
+# include <string.h>
+# undef memcmp
+
+# include <init-arch.h>
+# define SYMBOL_NAME memcmp
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) ||
+      !CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) ||
+      !CPU_FEATURES_CPU_P (cpu_features, MOVBE) ||
+      !CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    {
+      if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+	return OPTIMIZE (sse2);
+
+      if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+	return OPTIMIZE (sse4_1);
+
+      return OPTIMIZE (ssse3);
+    }
+
+  return OPTIMIZE (avx2_movbe);
+}
+
+extern __typeof (__redirect_memcmp) __libc_memcmp;
+
+libc_ifunc(__libc_memcmp, IFUNC_SELECTOR())
+strong_alias (__libc_memcmp, memcmp);
+#endif
-- 
2.13.0.506.g27d5fe0cd-goog


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]