This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch hjl/erms/master created. glibc-2.23-108-g13b76b0


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/erms/master has been created
        at  13b76b08d4ebbd9e0bad848dea0e9d8f65a5a4ca (commit)

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=13b76b08d4ebbd9e0bad848dea0e9d8f65a5a4ca

commit 13b76b08d4ebbd9e0bad848dea0e9d8f65a5a4ca
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Apr 11 08:51:16 2014 -0700

    Test 32-bit ERMS memcpy/memset
    
    	* sysdeps/i386/i686/multiarch/ifunc-impl-list.c
    	(__libc_ifunc_impl_list): Add __bcopy_erms, __bzero_erms,
    	__memmove_chk_erms, __memmove_erms, __memset_chk_erms,
    	__memset_erms, __memcpy_chk_erms, __memcpy_erms,
    	__mempcpy_chk_erms and __mempcpy_erms.

diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
index ef30a95..f3cbca0 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -44,6 +44,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __bcopy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_CPU_FEATURE (SSE2),
 			      __bcopy_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_erms)
 	      IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/bzero.S.  */
@@ -52,6 +53,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __bzero_sse2_rep)
 	      IFUNC_IMPL_ADD (array, i, bzero, HAS_CPU_FEATURE (SSE2),
 			      __bzero_sse2)
+	      IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_erms)
 	      IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memchr.S.  */
@@ -82,6 +84,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSE2),
 			      __memmove_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+			      __memmove_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
 			      __memmove_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memmove.S.  */
@@ -92,6 +96,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memmove_ssse3)
 	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSE2),
 			      __memmove_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_erms)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memrchr.S.  */
@@ -111,6 +116,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSE2),
 			      __memset_chk_sse2)
 	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
+			      __memset_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
 			      __memset_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memset.S.  */
@@ -119,6 +126,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memset_sse2_rep)
 	      IFUNC_IMPL_ADD (array, i, memset, HAS_CPU_FEATURE (SSE2),
 			      __memset_sse2)
+	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_erms)
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/rawmemchr.S.  */
@@ -319,6 +327,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSE2),
 			      __memcpy_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+			      __memcpy_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
 			      __memcpy_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memcpy.S.  */
@@ -329,6 +339,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memcpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSE2),
 			      __memcpy_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_erms)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S.  */
@@ -343,6 +354,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSE2),
 			      __mempcpy_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+			      __mempcpy_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
 			      __mempcpy_chk_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/mempcpy.S.  */
@@ -353,6 +366,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __mempcpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSE2),
 			      __mempcpy_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_erms)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strlen.S.  */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=e0bac95bc67df50941d3b9ccd9cdbd174ed24c7e

commit e0bac95bc67df50941d3b9ccd9cdbd174ed24c7e
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Apr 11 08:25:17 2014 -0700

    Test 64-bit ERMS memcpy/memset
    
    	* sysdeps/x86_64/multiarch/ifunc-impl-list.c
    	(__libc_ifunc_impl_list): Add __memmove_chk_erms,
    	__memmove_erms, __memset_erms, __memset_chk_erms,
    	__memcpy_chk_erms, __memcpy_erms, __mempcpy_chk_erms and
    	__mempcpy_erms.

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 188b6d3..b0d300d 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -63,6 +63,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSSE3),
 			      __memmove_chk_ssse3)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+			      __memmove_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
 			      __memmove_chk_sse2))
 
   /* Support sysdeps/x86_64/multiarch/memmove.S.  */
@@ -79,12 +81,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memmove_ssse3_back)
 	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
 			      __memmove_ssse3)
+	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_erms)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
 
   /* Support sysdeps/x86_64/multiarch/memset_chk.S.  */
   IFUNC_IMPL (i, name, __memset_chk,
 	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
 			      __memset_chk_sse2)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
+			      __memset_chk_erms)
 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
 			      HAS_ARCH_FEATURE (AVX2_Usable),
 			      __memset_chk_avx2)
@@ -98,6 +103,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   /* Support sysdeps/x86_64/multiarch/memset.S.  */
   IFUNC_IMPL (i, name, memset,
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
+	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_erms)
 	      IFUNC_IMPL_ADD (array, i, memset,
 			      HAS_ARCH_FEATURE (AVX2_Usable),
 			      __memset_avx2)
@@ -278,6 +284,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSSE3),
 			      __memcpy_chk_ssse3)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+			      __memcpy_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
 			      __memcpy_chk_sse2))
 
   /* Support sysdeps/x86_64/multiarch/memcpy.S.  */
@@ -295,6 +303,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memcpy_avx512_no_vzeroupper)
 #endif
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_erms)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2))
 
   /* Support sysdeps/x86_64/multiarch/mempcpy_chk.S.  */
@@ -314,6 +323,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_chk_ssse3)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+			      __mempcpy_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
 			      __mempcpy_chk_sse2))
 
   /* Support sysdeps/x86_64/multiarch/mempcpy.S.  */
@@ -330,6 +341,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __mempcpy_ssse3_back)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_ssse3)
+	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_erms)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
 
   /* Support sysdeps/x86_64/multiarch/strncmp.S.  */

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=efae27774340f8988cca33076fc4c3c2e944b995

commit efae27774340f8988cca33076fc4c3c2e944b995
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Wed Sep 21 15:21:28 2011 -0700

    Add 32it ERMS memcpy/memset
    
    	* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
    	bcopy-erms, memcpy-erms, memmove-erms, mempcpy-erms, bzero-erms
    	and memset-erms.
    	* sysdeps/i386/i686/multiarch/bcopy-erms.S: New file.
    	* sysdeps/i386/i686/multiarch/bzero-erms.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memcpy-erms.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memmove-erms.S: Likewise.
    	* sysdeps/i386/i686/multiarch/mempcpy-erms.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memset-erms.S: Likewise.
    	* sysdeps/i386/i686/multiarch/ifunc-defines.sym: Add
    	COMMON_CPUID_INDEX_7.
    	* sysdeps/i386/i686/multiarch/bcopy.S: Enable ERMS optimization
    	for Fast_ERMS.
    	* sysdeps/i386/i686/multiarch/bzero.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memcpy.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memcpy_chk.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memmove.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memmove_chk.S: Likewise.
    	* sysdeps/i386/i686/multiarch/mempcpy.S: Likewise.
    	* sysdeps/i386/i686/multiarch/mempcpy_chk.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memset.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memset_chk.S: Likewise.

diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 700010d..6bcef4c 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -25,7 +25,9 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
 		   strcasecmp_l-sse4 strncase_l-sse4 \
 		   bcopy-sse2-unaligned memcpy-sse2-unaligned \
 		   mempcpy-sse2-unaligned memmove-sse2-unaligned \
-		   strcspn-c strpbrk-c strspn-c
+		   strcspn-c strpbrk-c strspn-c \
+		   bcopy-erms memcpy-erms memmove-erms mempcpy-erms \
+		   bzero-erms memset-erms
 CFLAGS-varshift.c += -msse4
 CFLAGS-strcspn-c.c += -msse4
 CFLAGS-strpbrk-c.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/bcopy-erms.S b/sysdeps/i386/i686/multiarch/bcopy-erms.S
new file mode 100644
index 0000000..da9e160
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bcopy-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY		__bcopy_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S
index ce6661b..04f5a3a 100644
--- a/sysdeps/i386/i686/multiarch/bcopy.S
+++ b/sysdeps/i386/i686/multiarch/bcopy.S
@@ -27,6 +27,9 @@
 ENTRY(bcopy)
 	.type	bcopy, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__bcopy_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__bcopy_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
diff --git a/sysdeps/i386/i686/multiarch/bzero-erms.S b/sysdeps/i386/i686/multiarch/bzero-erms.S
new file mode 100644
index 0000000..2c3bed6
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bzero-erms.S
@@ -0,0 +1,3 @@
+#define USE_AS_BZERO
+#define __memset_erms __bzero_erms
+#include "memset-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S
index 738ca69..a61b5d2 100644
--- a/sysdeps/i386/i686/multiarch/bzero.S
+++ b/sysdeps/i386/i686/multiarch/bzero.S
@@ -27,6 +27,9 @@
 ENTRY(__bzero)
 	.type	__bzero, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__bzero_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__bzero_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
diff --git a/sysdeps/i386/i686/multiarch/ifunc-defines.sym b/sysdeps/i386/i686/multiarch/ifunc-defines.sym
index 96e9cfa..3df946f 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-defines.sym
+++ b/sysdeps/i386/i686/multiarch/ifunc-defines.sym
@@ -16,4 +16,5 @@ FEATURE_OFFSET		offsetof (struct cpu_features, feature)
 FEATURE_SIZE		sizeof (unsigned int)
 
 COMMON_CPUID_INDEX_1
+COMMON_CPUID_INDEX_7
 FEATURE_INDEX_1
diff --git a/sysdeps/i386/i686/multiarch/memcpy-erms.S b/sysdeps/i386/i686/multiarch/memcpy-erms.S
new file mode 100644
index 0000000..df0c801
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy-erms.S
@@ -0,0 +1,100 @@
+/* memcpy with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+#if !defined NOT_IN_libc \
+    && (defined SHARED \
+	|| defined USE_AS_MEMMOVE \
+	|| !defined USE_MULTIARCH)
+
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY		__memcpy_erms
+# define MEMCPY_CHK	__memcpy_chk_erms
+#endif
+
+#ifdef USE_AS_BCOPY
+# define STR2		12
+# define STR1		STR2+4
+# define N     		STR1+4
+#else
+# define STR1		12
+# define STR2		STR1+4
+# define N     		STR2+4
+#endif
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+	.section .text.erms,"ax",@progbits
+#if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+#endif
+ENTRY (MEMCPY)
+	PUSH	(%esi)
+	PUSH	(%edi)
+	movl	N(%esp), %ecx
+	movl	STR1(%esp), %edi
+	movl	STR2(%esp), %esi
+	mov	%edi, %eax
+#ifdef USE_AS_MEMPCPY
+	add	%ecx, %eax
+#endif
+
+#ifdef USE_AS_MEMMOVE
+	cmp	%esi, %edi
+	ja	L(copy_backward)
+	je	L(bwd_write_0bytes)
+#endif
+
+	rep	movsb
+	POP	(%edi)
+	POP	(%esi)
+	ret
+
+#ifdef USE_AS_MEMMOVE
+L(copy_backward):
+	lea	-1(%edi,%ecx), %edi
+	lea	-1(%esi,%ecx), %esi
+	std
+	rep	movsb
+	cld
+L(bwd_write_0bytes):
+	POP	(%edi)
+	POP	(%esi)
+	ret
+#endif
+
+END (MEMCPY)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S
index 652b5a2..79ae41f 100644
--- a/sysdeps/i386/i686/multiarch/memcpy.S
+++ b/sysdeps/i386/i686/multiarch/memcpy.S
@@ -29,6 +29,9 @@
 ENTRY(memcpy)
 	.type	memcpy, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memcpy_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__memcpy_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S
index 0eee32c..dd1d38a 100644
--- a/sysdeps/i386/i686/multiarch/memcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/memcpy_chk.S
@@ -30,6 +30,9 @@
 ENTRY(__memcpy_chk)
 	.type	__memcpy_chk, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memcpy_chk_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__memcpy_chk_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
diff --git a/sysdeps/i386/i686/multiarch/memmove-erms.S b/sysdeps/i386/i686/multiarch/memmove-erms.S
new file mode 100644
index 0000000..357289a
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY		__memmove_erms
+#define MEMCPY_CHK	__memmove_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S
index 725a421..13223b3 100644
--- a/sysdeps/i386/i686/multiarch/memmove.S
+++ b/sysdeps/i386/i686/multiarch/memmove.S
@@ -27,6 +27,9 @@
 ENTRY(memmove)
 	.type	memmove, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memmove_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__memmove_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S
index a29bbc9..ed000ee 100644
--- a/sysdeps/i386/i686/multiarch/memmove_chk.S
+++ b/sysdeps/i386/i686/multiarch/memmove_chk.S
@@ -27,6 +27,9 @@
 ENTRY(__memmove_chk)
 	.type	__memmove_chk, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memmove_chk_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__memmove_chk_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
@@ -90,5 +93,17 @@ __memmove_chk_ia32:
 	jmp	__memmove_ia32
 	cfi_endproc
 	.size __memmove_chk_ia32, .-__memmove_chk_ia32
+
+	.type __memmove_chk_erms, @function
+	.p2align 4;
+__memmove_chk_erms:
+	cfi_startproc
+	CALL_MCOUNT
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	__chk_fail
+	jmp	__memmove_erms
+	cfi_endproc
+	.size __memmove_chk_erms, .-__memmove_chk_erms
 # endif
 #endif
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-erms.S b/sysdeps/i386/i686/multiarch/mempcpy-erms.S
new file mode 100644
index 0000000..01d3bf8
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY		__mempcpy_erms
+#define MEMCPY_CHK	__mempcpy_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S
index b46f3fc..cceae9b 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy.S
@@ -29,6 +29,9 @@
 ENTRY(__mempcpy)
 	.type	__mempcpy, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__mempcpy_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__mempcpy_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
index 30f3629..97d5179 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
@@ -30,6 +30,9 @@
 ENTRY(__mempcpy_chk)
 	.type	__mempcpy_chk, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__mempcpy_chk_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__mempcpy_chk_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
diff --git a/sysdeps/i386/i686/multiarch/memset-erms.S b/sysdeps/i386/i686/multiarch/memset-erms.S
new file mode 100644
index 0000000..807a6e4
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memset-erms.S
@@ -0,0 +1,69 @@
+/* memset with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#define STR1  8
+#ifdef USE_AS_BZERO
+#define N     STR1+4
+#else
+#define STR2  STR1+4
+#define N     STR2+4
+#endif
+
+	.section .text.erms,"ax",@progbits
+#if defined SHARED && !defined NOT_IN_libc && !defined USE_AS_BZERO
+ENTRY (__memset_chk_erms)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_erms)
+#endif
+ENTRY (__memset_erms)
+	PUSH    (%edi)
+	movl	N(%esp), %ecx
+	movl	STR1(%esp), %edi
+#ifdef USE_AS_BZERO
+	xor	%eax, %eax
+#else
+	movzbl	STR2(%esp), %eax
+	mov	%edi, %edx
+#endif
+	rep	stosb
+#ifndef USE_AS_BZERO
+	mov	%edx, %eax
+#endif
+	POP     (%edi)
+	ret
+END (__memset_erms)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/memset.S b/sysdeps/i386/i686/multiarch/memset.S
index 14180e4..3c11b91 100644
--- a/sysdeps/i386/i686/multiarch/memset.S
+++ b/sysdeps/i386/i686/multiarch/memset.S
@@ -27,6 +27,9 @@
 ENTRY(memset)
 	.type	memset, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memset_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__memset_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
diff --git a/sysdeps/i386/i686/multiarch/memset_chk.S b/sysdeps/i386/i686/multiarch/memset_chk.S
index d73f202..fa1c5fb 100644
--- a/sysdeps/i386/i686/multiarch/memset_chk.S
+++ b/sysdeps/i386/i686/multiarch/memset_chk.S
@@ -27,6 +27,9 @@
 ENTRY(__memset_chk)
 	.type	__memset_chk, @gnu_indirect_function
 	LOAD_GOT_AND_RTLD_GLOBAL_RO
+	LOAD_FUNC_GOT_EAX (__memset_chk_erms)
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	LOAD_FUNC_GOT_EAX (__memset_chk_ia32)
 	HAS_CPU_FEATURE (SSE2)
 	jz	2f
@@ -78,5 +81,17 @@ __memset_chk_ia32:
 	jmp	__memset_ia32
 	cfi_endproc
 	.size __memset_chk_ia32, .-__memset_chk_ia32
+
+	.type __memset_chk_erms, @function
+	.p2align 4;
+__memset_chk_erms:
+	cfi_startproc
+	CALL_MCOUNT
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	__chk_fail
+	jmp	__memset_erms
+	cfi_endproc
+	.size __memset_chk_erms, .-__memset_chk_erms
 # endif
 #endif

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4a6249c693579ec7bdc9b3ce05b23fd464728eca

commit 4a6249c693579ec7bdc9b3ce05b23fd464728eca
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu Sep 15 16:16:10 2011 -0700

    Add 64-bit ERMS memcpy and memset
    
    	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
    	memcpy-erms, mempcpy-erms, memmove-erms and memset-erms.
    	* sysdeps/x86_64/multiarch/memcpy-erms.S: New.
    	* sysdeps/x86_64/multiarch/memmove-erms.S: Likewise.
    	* sysdeps/x86_64/multiarch/mempcpy-erms.S: Likewise.
    	* sysdeps/x86_64/multiarch/memset-erms.S: Likewise.
    	* sysdeps/x86_64/multiarch/memcpy.S: Enable ERMS optimization
    	for Fast_ERMS.
    	* sysdeps/x86_64/multiarch/memcpy_chk.S: Likewise.
    	* sysdeps/x86_64/multiarch/memmove.c: Likewise.
    	* sysdeps/x86_64/multiarch/memmove_chk.c: Likewise.
    	* sysdeps/x86_64/multiarch/mempcpy.S: Likewise.
    	* sysdeps/x86_64/multiarch/mempcpy_chk.S: Likewise.
    	* sysdeps/x86_64/multiarch/memset.S: Likewise.
    	* sysdeps/x86_64/multiarch/memset_chk.S: Likewise.

diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index d234f4a..2f29a2a 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -20,7 +20,9 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
 		   strcat-sse2-unaligned strncat-sse2-unaligned \
 		   strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
 		   strcspn-c strpbrk-c strspn-c varshift memset-avx2 \
-		   memset-avx512-no-vzeroupper
+		   memset-avx512-no-vzeroupper \
+		   memcpy-erms mempcpy-erms memmove-erms \
+		   memset-erms
 CFLAGS-varshift.c += -msse4
 CFLAGS-strcspn-c.c += -msse4
 CFLAGS-strpbrk-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/memcpy-erms.S b/sysdeps/x86_64/multiarch/memcpy-erms.S
new file mode 100644
index 0000000..f0595d6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-erms.S
@@ -0,0 +1,71 @@
+/* memcpy with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+#if !defined NOT_IN_libc \
+    && (defined SHARED \
+        || defined USE_AS_MEMMOVE \
+	|| !defined USE_MULTIARCH)
+
+#  include "asm-syntax.h"
+
+#  ifndef MEMCPY
+#   define MEMCPY	__memcpy_erms
+#   define MEMCPY_CHK	__memcpy_chk_erms
+#  endif
+
+	.section .text.erms,"ax",@progbits
+# if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+# endif
+
+ENTRY (MEMCPY)
+	mov	%rdi, %rax
+	mov	%rdx, %rcx
+# ifdef USE_AS_MEMPCPY
+	add	%rdx, %rax
+# endif
+
+# ifdef USE_AS_MEMMOVE
+	cmp	%rsi, %rdi
+	ja	L(copy_backward)
+	je	L(bwd_write_0bytes)
+# endif
+
+	rep movsb
+	ret
+
+# ifdef USE_AS_MEMMOVE
+L(copy_backward):
+	lea	-1(%rdi,%rdx), %rdi
+	lea	-1(%rsi,%rdx), %rsi
+	std
+	rep movsb
+	cld
+L(bwd_write_0bytes):
+	ret
+# endif
+
+END (MEMCPY)
+
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 8882590..58d9223 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -30,6 +30,9 @@
 ENTRY(__new_memcpy)
 	.type	__new_memcpy, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	lea    __memcpy_erms(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 #ifdef HAVE_AVX512_ASM_SUPPORT
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 648217e..0e21c09 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -30,6 +30,9 @@
 ENTRY(__memcpy_chk)
 	.type	__memcpy_chk, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	leaq	__memcpy_chk_erms(%rip), %rax
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 #ifdef HAVE_AVX512_ASM_SUPPORT
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz      1f
diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S
new file mode 100644
index 0000000..357289a
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY		__memmove_erms
+#define MEMCPY_CHK	__memmove_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 8da5640..3777bea 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -35,6 +35,7 @@
 extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden;
 extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden;
 extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden;
+extern __typeof (__redirect_memmove) __memmove_erms attribute_hidden;
 extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden;
 # ifdef HAVE_AVX512_ASM_SUPPORT
   extern __typeof (__redirect_memmove) __memmove_avx512_no_vzeroupper attribute_hidden;
@@ -52,6 +53,9 @@ extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden;
    ifunc symbol properly.  */
 extern __typeof (__redirect_memmove) __libc_memmove;
 libc_ifunc (__libc_memmove,
+	    HAS_ARCH_FEATURE (Fast_ERMS)
+	    ? __memmove_erms
+	    : (
 #ifdef HAVE_AVX512_ASM_SUPPORT
 	    HAS_ARCH_FEATURE (AVX512F_Usable)
 	      && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
@@ -63,7 +67,7 @@ libc_ifunc (__libc_memmove,
 	    : (HAS_CPU_FEATURE (SSSE3)
 	       ? (HAS_ARCH_FEATURE (Fast_Copy_Backward)
 	          ? __memmove_ssse3_back : __memmove_ssse3)
-	       : __memmove_sse2)));
+	       : __memmove_sse2))));
 
 strong_alias (__libc_memmove, memmove)
 
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index f64da63..4cd360a 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -25,6 +25,7 @@
 extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
 extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
 extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
+extern __typeof (__memmove_chk) __memmove_chk_erms attribute_hidden;
 extern __typeof (__memmove_chk) __memmove_chk_avx_unaligned attribute_hidden;
 # ifdef HAVE_AVX512_ASM_SUPPORT
   extern __typeof (__memmove_chk) __memmove_chk_avx512_no_vzeroupper attribute_hidden;
@@ -33,6 +34,9 @@ extern __typeof (__memmove_chk) __memmove_chk_avx_unaligned attribute_hidden;
 #include "debug/memmove_chk.c"
 
 libc_ifunc (__memmove_chk,
+	    HAS_ARCH_FEATURE (Fast_ERMS)
+	    ? __memmove_chk_erms
+	    : (
 #ifdef HAVE_AVX512_ASM_SUPPORT
 	    HAS_ARCH_FEATURE (AVX512F_Usable)
 	      && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
@@ -43,4 +47,4 @@ libc_ifunc (__memmove_chk,
 	    (HAS_CPU_FEATURE (SSSE3)
 	    ? (HAS_ARCH_FEATURE (Fast_Copy_Backward)
 	       ? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
-	    : __memmove_chk_sse2));
+	    : __memmove_chk_sse2)));
diff --git a/sysdeps/x86_64/multiarch/mempcpy-erms.S b/sysdeps/x86_64/multiarch/mempcpy-erms.S
new file mode 100644
index 0000000..01d3bf8
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY		__mempcpy_erms
+#define MEMCPY_CHK	__mempcpy_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index ed78623..b85cf27 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -28,6 +28,9 @@
 ENTRY(__mempcpy)
 	.type	__mempcpy, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	leaq	__mempcpy_erms(%rip), %rax
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 #ifdef HAVE_AVX512_ASM_SUPPORT
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 6e8a89d..de888f3 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -30,6 +30,9 @@
 ENTRY(__mempcpy_chk)
 	.type	__mempcpy_chk, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	leaq	__mempcpy_chk_erms(%rip), %rax
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 #ifdef HAVE_AVX512_ASM_SUPPORT
 	HAS_ARCH_FEATURE (AVX512F_Usable)
 	jz	1f
diff --git a/sysdeps/x86_64/multiarch/memset-erms.S b/sysdeps/x86_64/multiarch/memset-erms.S
new file mode 100644
index 0000000..af9f80b
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-erms.S
@@ -0,0 +1,40 @@
+/* memset with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+#ifndef NOT_IN_libc
+
+	.text
+# ifdef SHARED
+ENTRY (__memset_chk_erms)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_erms)
+# endif
+
+ENTRY (__memset_erms)
+	mov	%rdx, %rcx
+	movzbl	%sil, %eax
+	mov	%rdi, %rdx
+	rep stosb
+	mov %rdx, %rax
+	ret
+END (__memset_erms)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
index 8e3b9b9..dda8185 100644
--- a/sysdeps/x86_64/multiarch/memset.S
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -26,6 +26,9 @@
 ENTRY(memset)
 	.type	memset, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	leaq	__memset_erms(%rip), %rax
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	leaq	__memset_sse2(%rip), %rax
 	HAS_ARCH_FEATURE (AVX2_Usable)
 	jz	2f
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
index 9a7b270..b8c9940 100644
--- a/sysdeps/x86_64/multiarch/memset_chk.S
+++ b/sysdeps/x86_64/multiarch/memset_chk.S
@@ -26,6 +26,9 @@
 ENTRY(__memset_chk)
 	.type	__memset_chk, @gnu_indirect_function
 	LOAD_RTLD_GLOBAL_RO_RDX
+	leaq	__memset_chk_erms(%rip), %rax
+	HAS_ARCH_FEATURE (Fast_ERMS)
+	jnz	2f
 	leaq	__memset_chk_sse2(%rip), %rax
 	HAS_ARCH_FEATURE (AVX2_Usable)
 	jz	2f

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=63d945f4ae4c0588b66551bb6acffd1816a971f3

commit 63d945f4ae4c0588b66551bb6acffd1816a971f3
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu Sep 15 15:47:01 2011 -0700

    Initial ERMS support
    
    	* sysdeps/x86/cpu-features.h (bit_arch_Fast_ERMS): New.
    	(bit_cpu_ERMS): Likewise.
    	(index_cpu_ERMS): Likewise.
    	(index_arch_Fast_ERMS): Likewise.
    	(reg_ERMS): Likewise.

diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 0624a92..e099e30 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -35,6 +35,7 @@
 #define bit_arch_I686				(1 << 15)
 #define bit_arch_Prefer_MAP_32BIT_EXEC		(1 << 16)
 #define bit_arch_Prefer_No_VZEROUPPER		(1 << 17)
+#define bit_arch_Fast_ERMS			(1 << 18)
 
 /* CPUID Feature flags.  */
 
@@ -52,6 +53,7 @@
 #define bit_cpu_FMA4		(1 << 16)
 
 /* COMMON_CPUID_INDEX_7.  */
+#define bit_cpu_ERMS		(1 << 9)
 #define bit_cpu_RTM		(1 << 11)
 #define bit_cpu_AVX2		(1 << 5)
 #define bit_cpu_AVX512F		(1 << 16)
@@ -83,6 +85,7 @@
 # define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_cpu_AVX	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_cpu_AVX2	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
+# define index_cpu_ERMS	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
 
 # define index_arch_Fast_Rep_String	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
@@ -101,6 +104,7 @@
 # define index_arch_I686		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_Fast_ERMS		FEATURE_INDEX_1*FEATURE_SIZE
 
 
 # if defined (_LIBC) && !IS_IN (nonlib)
@@ -220,6 +224,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_cpu_AVX2		COMMON_CPUID_INDEX_7
 # define index_cpu_AVX512F	COMMON_CPUID_INDEX_7
 # define index_cpu_AVX512DQ	COMMON_CPUID_INDEX_7
+# define index_cpu_ERMS		COMMON_CPUID_INDEX_7
 # define index_cpu_RTM		COMMON_CPUID_INDEX_7
 # define index_cpu_FMA		COMMON_CPUID_INDEX_1
 # define index_cpu_FMA4		COMMON_CPUID_INDEX_80000001
@@ -236,6 +241,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define reg_AVX2		ebx
 # define reg_AVX512F		ebx
 # define reg_AVX512DQ		ebx
+# define reg_ERMS		ebx
 # define reg_RTM		ebx
 # define reg_FMA		ecx
 # define reg_FMA4		ecx
@@ -259,6 +265,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_arch_I686		FEATURE_INDEX_1
 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
+# define index_arch_Fast_ERMS		FEATURE_INDEX_1
 
 #endif	/* !__ASSEMBLER__ */
 

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]