This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch hjl/erms created. glibc-2.14-318-g90a10df


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/erms has been created
        at  90a10dfe3bd53ead5aa40b3d58e3757bcb32796e (commit)

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=90a10dfe3bd53ead5aa40b3d58e3757bcb32796e

commit 90a10dfe3bd53ead5aa40b3d58e3757bcb32796e
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Wed Sep 21 15:21:28 2011 -0700

    Add ERMS optimized 32it memcpy/memset.

diff --git a/ChangeLog.erms b/ChangeLog.erms
index 175d69f..b7ee25d 100644
--- a/ChangeLog.erms
+++ b/ChangeLog.erms
@@ -1,5 +1,30 @@
 2011-09-21  H.J. Lu  <hongjiu.lu@intel.com>
 
+	* sysdeps/i386/i686/multiarch/bzero-erms.S: New.
+	* sysdeps/i386/i686/multiarch/memcpy-erms.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memmove-erms.S: Likewise.
+	* sysdeps/i386/i686/multiarch/mempcpy-erms.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memset-erms.S: Likewise.
+
+	* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
+	bzero-erms, memcpy-erms, memmove-erms, mempcpy-erms and
+	memset-erms.
+
+	* sysdeps/i386/i686/multiarch/bzero.S: Optimize for ERMS.
+	* sysdeps/i386/i686/multiarch/memcpy.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memcpy_chk.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memmove.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memmove_chk.S: Likewise.
+	* sysdeps/i386/i686/multiarch/mempcpy.S: Likewise.
+	* sysdeps/i386/i686/multiarch/mempcpy_chk.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memset.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memset_chk.S: Likewise.
+
+	* sysdeps/i386/i686/multiarch/ifunc-defines.sym: Add
+	COMMON_CPUID_INDEX_7 and FEATURE_INDEX_7.
+
+2011-09-21  H.J. Lu  <hongjiu.lu@intel.com>
+
 	* sysdeps/x86_64/multiarch/memset-erms.S: Check USE_AS_BZERO
 	instead of USE_AS_BZERO_P.
 
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index c89ae92..d2f6a73 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -15,7 +15,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
 		   strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
 		   strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
 		   strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
-		   wcscmp-sse2 wcscmp-c
+		   wcscmp-sse2 wcscmp-c bzero-erms memcpy-erms \
+		   memmove-erms mempcpy-erms memset-erms
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
 CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/bzero-erms.S b/sysdeps/i386/i686/multiarch/bzero-erms.S
new file mode 100644
index 0000000..2c3bed6
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bzero-erms.S
@@ -0,0 +1,3 @@
+#define USE_AS_BZERO
+#define __memset_erms __bzero_erms
+#include "memset-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/bzero.S b/sysdeps/i386/i686/multiarch/bzero.S
index 8c740a4..6aa0266 100644
--- a/sysdeps/i386/i686/multiarch/bzero.S
+++ b/sysdeps/i386/i686/multiarch/bzero.S
@@ -44,7 +44,10 @@ ENTRY(__bzero)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__bzero_ia32@GOTOFF(%ebx), %eax
+1:	leal	__bzero_erms@GOTOFF(%ebx), %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	leal	__bzero_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__bzero_sse2@GOTOFF(%ebx), %eax
@@ -63,7 +66,10 @@ ENTRY(__bzero)
 	cmpl	$0, KIND_OFFSET+__cpu_features
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__bzero_ia32, %eax
+1:	leal	__bzero_erms, %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features
+	jnz	2f
+	leal	__bzero_ia32, %eax
 	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
 	jz	2f
 	leal	__bzero_sse2, %eax
diff --git a/sysdeps/i386/i686/multiarch/ifunc-defines.sym b/sysdeps/i386/i686/multiarch/ifunc-defines.sym
index eb1538a..448b8c4 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-defines.sym
+++ b/sysdeps/i386/i686/multiarch/ifunc-defines.sym
@@ -17,4 +17,6 @@ FEATURE_OFFSET		offsetof (struct cpu_features, feature)
 FEATURE_SIZE		sizeof (unsigned int)
 
 COMMON_CPUID_INDEX_1
+COMMON_CPUID_INDEX_7
 FEATURE_INDEX_1
+FEATURE_INDEX_7
diff --git a/sysdeps/i386/i686/multiarch/memcpy-erms.S b/sysdeps/i386/i686/multiarch/memcpy-erms.S
new file mode 100644
index 0000000..ff1aab7
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy-erms.S
@@ -0,0 +1,105 @@
+/* memcpy with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+#if !defined NOT_IN_libc \
+    && (defined SHARED \
+	|| defined USE_AS_MEMMOVE \
+	|| !defined USE_MULTIARCH)
+
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY		__memcpy_erms
+# define MEMCPY_CHK	__memcpy_chk_erms
+#endif
+
+#ifdef USE_AS_BCOPY
+# define SRC		PARMS
+# define DEST		SRC+4
+# define LEN		DEST+4
+#else
+# define DEST		PARMS
+# define SRC		DEST+4
+# define LEN		SRC+4
+#endif
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#define STR1  12
+#define STR2  STR1+4
+#define N     STR2+4
+
+	.section .text.erms,"ax",@progbits
+#if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+#endif
+ENTRY (MEMCPY)
+	PUSH	(%esi)
+	PUSH	(%edi)
+	movl	N(%esp), %ecx
+	movl	STR1(%esp), %edi
+	movl	STR2(%esp), %esi
+	mov	%edi, %eax
+#ifdef USE_AS_MEMPCPY
+	add	%ecx, %eax
+#endif
+
+#ifdef USE_AS_MEMMOVE
+	cmp	%esi, %edi
+	ja	L(copy_backward)
+	je	L(bwd_write_0bytes)
+#endif
+
+	rep	movsb
+	POP	(%edi)
+	POP	(%esi)
+	ret
+
+#ifdef USE_AS_MEMMOVE
+L(copy_backward):
+	lea	-1(%edi,%ecx), %edi
+	lea	-1(%esi,%ecx), %esi
+	std
+	rep	movsb
+	cld
+L(bwd_write_0bytes):
+	POP	(%edi)
+	POP	(%esi)
+	ret
+#endif
+
+END (MEMCPY)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S
index bf1c7cc..68e9ffd 100644
--- a/sysdeps/i386/i686/multiarch/memcpy.S
+++ b/sysdeps/i386/i686/multiarch/memcpy.S
@@ -45,7 +45,10 @@ ENTRY(memcpy)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memcpy_ia32@GOTOFF(%ebx), %eax
+1:	leal	__memcpy_erms@GOTOFF(%ebx), %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	leal	__memcpy_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memcpy_ssse3@GOTOFF(%ebx), %eax
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S
index 171ac8a..41d552f 100644
--- a/sysdeps/i386/i686/multiarch/memcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/memcpy_chk.S
@@ -46,7 +46,10 @@ ENTRY(__memcpy_chk)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memcpy_chk_ia32@GOTOFF(%ebx), %eax
+1:	leal	__memcpy_chk_erms@GOTOFF(%ebx), %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	leal	__memcpy_chk_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memcpy_chk_ssse3@GOTOFF(%ebx), %eax
diff --git a/sysdeps/i386/i686/multiarch/memmove-erms.S b/sysdeps/i386/i686/multiarch/memmove-erms.S
new file mode 100644
index 0000000..357289a
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY		__memmove_erms
+#define MEMCPY_CHK	__memmove_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S
index e0529c0..004aea3 100644
--- a/sysdeps/i386/i686/multiarch/memmove.S
+++ b/sysdeps/i386/i686/multiarch/memmove.S
@@ -44,7 +44,10 @@ ENTRY(memmove)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memmove_ia32@GOTOFF(%ebx), %eax
+1:	leal	__memmove_erms@GOTOFF(%ebx), %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	leal	__memmove_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memmove_ssse3@GOTOFF(%ebx), %eax
@@ -70,7 +73,10 @@ ENTRY(memmove)
 	cmpl	$0, KIND_OFFSET+__cpu_features
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memmove_ia32, %eax
+1:	leal	__memmove_erms, %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features
+	jnz	2f
+	leal	__memmove_ia32, %eax
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
 	jz	2f
 	leal	__memmove_ssse3, %eax
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S
index e33f2a3..fa44725 100644
--- a/sysdeps/i386/i686/multiarch/memmove_chk.S
+++ b/sysdeps/i386/i686/multiarch/memmove_chk.S
@@ -44,7 +44,10 @@ ENTRY(__memmove_chk)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memmove_chk_ia32@GOTOFF(%ebx), %eax
+1:	leal	__memmove_chk_erms@GOTOFF(%ebx), %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	leal	__memmove_chk_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memmove_chk_ssse3@GOTOFF(%ebx), %eax
@@ -63,7 +66,10 @@ ENTRY(__memmove_chk)
 	cmpl	$0, KIND_OFFSET+__cpu_features
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memmove_chk_ia32, %eax
+1:	leal	__memmove_chk_erms, %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features
+	jnz	2f
+	leal	__memmove_chk_ia32, %eax
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
 	jz	2f
 	leal	__memmove_chk_ssse3, %eax
@@ -108,5 +114,17 @@ __memmove_chk_ia32:
 	jmp	__memmove_ia32
 	cfi_endproc
 	.size __memmove_chk_ia32, .-__memmove_chk_ia32
+
+	.type __memmove_chk_erms, @function
+	.p2align 4;
+__memmove_chk_erms:
+	cfi_startproc
+	CALL_MCOUNT
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	__chk_fail
+	jmp	__memmove_erms
+	cfi_endproc
+	.size __memmove_chk_erms, .-__memmove_chk_erms
 # endif
 #endif
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-erms.S b/sysdeps/i386/i686/multiarch/mempcpy-erms.S
new file mode 100644
index 0000000..01d3bf8
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY		__mempcpy_erms
+#define MEMCPY_CHK	__mempcpy_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S
index df830d2..84a6c6b 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy.S
@@ -45,7 +45,10 @@ ENTRY(__mempcpy)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__mempcpy_ia32@GOTOFF(%ebx), %eax
+1:	leal	__mempcpy_erms@GOTOFF(%ebx), %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	leal	__mempcpy_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__mempcpy_ssse3@GOTOFF(%ebx), %eax
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
index 828fb5e..17de738 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
@@ -46,7 +46,10 @@ ENTRY(__mempcpy_chk)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__mempcpy_chk_ia32@GOTOFF(%ebx), %eax
+1:	leal	__mempcpy_chk_erms@GOTOFF(%ebx), %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	leal	__mempcpy_chk_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__mempcpy_chk_ssse3@GOTOFF(%ebx), %eax
diff --git a/sysdeps/i386/i686/multiarch/memset-erms.S b/sysdeps/i386/i686/multiarch/memset-erms.S
new file mode 100644
index 0000000..7857619
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memset-erms.S
@@ -0,0 +1,70 @@
+/* memset with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef NOT_IN_libc
+
+#include <sysdep.h>
+
+#define CFI_PUSH(REG)						\
+  cfi_adjust_cfa_offset (4);					\
+  cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG)						\
+  cfi_adjust_cfa_offset (-4);					\
+  cfi_restore (REG)
+
+#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+#define POP(REG)	popl REG; CFI_POP (REG)
+
+#define STR1  8
+#ifdef USE_AS_BZERO
+#define N     STR1+4
+#else
+#define STR2  STR1+4
+#define N     STR2+4
+#endif
+
+	.section .text.erms,"ax",@progbits
+#if defined SHARED && !defined NOT_IN_libc && !defined USE_AS_BZERO
+ENTRY (__memset_chk_erms)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_erms)
+#endif
+ENTRY (__memset_erms)
+	PUSH    (%edi)
+	movl	N(%esp), %ecx
+	movl	STR1(%esp), %edi
+#ifdef USE_AS_BZERO
+	xor	%eax, %eax
+#else
+	movzbl	STR2(%esp), %eax
+	mov	%edi, %edx
+#endif
+	rep	stosb
+#ifndef USE_AS_BZERO
+	mov	%edx, %eax
+#endif
+	POP     (%edi)
+	ret
+END (__memset_erms)
+
+#endif
diff --git a/sysdeps/i386/i686/multiarch/memset.S b/sysdeps/i386/i686/multiarch/memset.S
index 34dddce..06bb950 100644
--- a/sysdeps/i386/i686/multiarch/memset.S
+++ b/sysdeps/i386/i686/multiarch/memset.S
@@ -44,7 +44,10 @@ ENTRY(memset)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memset_ia32@GOTOFF(%ebx), %eax
+1:	leal	__memset_erms@GOTOFF(%ebx), %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	leal	__memset_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memset_sse2@GOTOFF(%ebx), %eax
@@ -63,7 +66,10 @@ ENTRY(memset)
 	cmpl	$0, KIND_OFFSET+__cpu_features
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memset_ia32, %eax
+1:	leal	__memset_erms, %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features
+	jnz	2f
+	leal	__memset_ia32, %eax
 	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
 	jz	2f
 	leal	__memset_sse2, %eax
diff --git a/sysdeps/i386/i686/multiarch/memset_chk.S b/sysdeps/i386/i686/multiarch/memset_chk.S
index d659c7e..6bf10aa 100644
--- a/sysdeps/i386/i686/multiarch/memset_chk.S
+++ b/sysdeps/i386/i686/multiarch/memset_chk.S
@@ -44,7 +44,10 @@ ENTRY(__memset_chk)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memset_chk_ia32@GOTOFF(%ebx), %eax
+1:	leal	__memset_chk_erms@GOTOFF(%ebx), %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
+	leal	__memset_chk_ia32@GOTOFF(%ebx), %eax
 	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memset_chk_sse2@GOTOFF(%ebx), %eax
@@ -67,7 +70,10 @@ ENTRY(__memset_chk)
 	cmpl	$0, KIND_OFFSET+__cpu_features
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__memset_chk_ia32, %eax
+1:	leal	__memset_chk_erms, %eax
+	testl	$bit_ERMS, CPUID_OFFSET+index_ERMS+__cpu_features
+	jnz	2f
+	leal	__memset_chk_ia32, %eax
 	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
 	jz	2f
 	leal	__memset_chk_sse2, %eax
@@ -112,5 +118,17 @@ __memset_chk_ia32:
 	jmp	__memset_ia32
 	cfi_endproc
 	.size __memset_chk_ia32, .-__memset_chk_ia32
+
+	.type __memset_chk_erms, @function
+	.p2align 4;
+__memset_chk_erms:
+	cfi_startproc
+	CALL_MCOUNT
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	__chk_fail
+	jmp	__memset_erms
+	cfi_endproc
+	.size __memset_chk_erms, .-__memset_chk_erms
 # endif
 #endif

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=dd33df9abdd14e079e692ed27f4ad57011f7ede5

commit dd33df9abdd14e079e692ed27f4ad57011f7ede5
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Wed Sep 21 13:55:51 2011 -0700

    Check USE_AS_BZERO instead of USE_AS_BZERO_P.

diff --git a/ChangeLog.erms b/ChangeLog.erms
index 267c451..175d69f 100644
--- a/ChangeLog.erms
+++ b/ChangeLog.erms
@@ -1,3 +1,8 @@
+2011-09-21  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86_64/multiarch/memset-erms.S: Check USE_AS_BZERO
+	instead of USE_AS_BZERO_P.
+
 2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* sysdeps/x86_64/multiarch/init-arch.c (get_common_indeces):
diff --git a/sysdeps/x86_64/multiarch/memset-erms.S b/sysdeps/x86_64/multiarch/memset-erms.S
index 1923a14..721e915 100644
--- a/sysdeps/x86_64/multiarch/memset-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-erms.S
@@ -31,7 +31,7 @@ END (__memset_chk_erms)
 # endif
 
 ENTRY (__memset_erms)
-# ifdef USE_AS_BZERO_P
+# ifdef USE_AS_BZERO
 	mov	%rsi, %rcx
 	xor	%eax, %eax
 # else
@@ -40,7 +40,7 @@ ENTRY (__memset_erms)
 	mov	%rdi, %rdx
 # endif
 	rep stosb
-# ifndef USE_AS_BZERO_P
+# ifndef USE_AS_BZERO
 	mov %rdx, %rax
 # endif
 	ret

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=97f0bbf7e7f2e9b05685f97d236401bb9a513842

commit 97f0bbf7e7f2e9b05685f97d236401bb9a513842
Merge: 758ec29 88738eb
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Sep 16 11:23:30 2011 -0700

    Merge remote-tracking branch 'origin/master' into hjl/erms


http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=758ec295e2007e78b7e22a25fb674e64e53a58ee

commit 758ec295e2007e78b7e22a25fb674e64e53a58ee
Merge: 110a84b 2840865
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu Sep 15 18:08:35 2011 -0700

    Merge remote-tracking branch 'origin/master' into hjl/erms


http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=110a84b296f512efff526c1141216d9394a89dc0

commit 110a84b296f512efff526c1141216d9394a89dc0
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu Sep 15 18:04:31 2011 -0700

    Check max_cpuid and use __cpuid_count.

diff --git a/ChangeLog.erms b/ChangeLog.erms
index d2a776a..267c451 100644
--- a/ChangeLog.erms
+++ b/ChangeLog.erms
@@ -1,5 +1,10 @@
 2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
 
+	* sysdeps/x86_64/multiarch/init-arch.c (get_common_indeces):
+	Check max_cpuid and use __cpuid_count.
+
+2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
+
 	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
 	memset-erms.
 
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index c6b42d4..bc12fa4 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -33,10 +33,11 @@ get_common_indeces (unsigned int *family, unsigned int *model)
 	   __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
 	   __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
 	   __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
-  __cpuid (7, __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax,
-	   __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
-	   __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
-	   __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
+  if (__cpu_features.max_cpuid >= 7)
+    __cpuid_count (7, 0, __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax,
+		   __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
+		   __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
+		   __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
 
   unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
   *family = (eax >> 8) & 0x0f;

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0411160a33c7642d4c6788ac0e1dec339b822cb1

commit 0411160a33c7642d4c6788ac0e1dec339b822cb1
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu Sep 15 16:36:05 2011 -0700

    Add ERMS optimized 64it memset.

diff --git a/ChangeLog.erms b/ChangeLog.erms
index bb303d4..d2a776a 100644
--- a/ChangeLog.erms
+++ b/ChangeLog.erms
@@ -1,6 +1,17 @@
 2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+	memset-erms.
+
+	* sysdeps/x86_64/multiarch/bzero.S: Optimize for ERMS.
+	* sysdeps/x86_64/multiarch/memset.S: Likewise.
+	* sysdeps/x86_64/multiarch/memset_chk.S: Likewise.
+
+	* sysdeps/x86_64/multiarch/memset-erms.S: New.
+
+2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
 	memcpy-erms, mempcpy-erms and memmove-erms.
 
 	* sysdeps/x86_64/multiarch/memcpy.S: Optimize for ERMS.
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 9b4ea17..4c9f87a 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -16,7 +16,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   strcat-sse2-unaligned strncat-sse2-unaligned \
 		   strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
 		   strrchr-sse2-no-bsf strchr-sse2-no-bsf \
-		   memcpy-erms mempcpy-erms memmove-erms 
+		   memcpy-erms mempcpy-erms memmove-erms memset-erms
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
 CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/bzero.S b/sysdeps/x86_64/multiarch/bzero.S
index 9c9eebd..6b7908b 100644
--- a/sysdeps/x86_64/multiarch/bzero.S
+++ b/sysdeps/x86_64/multiarch/bzero.S
@@ -26,13 +26,26 @@ ENTRY(__bzero)
 	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
-1:	leaq	__bzero_x86_64(%rip), %rax
+1:	leaq	__bzero_erms(%rip), %rax
+	testl	$bit_ERMS, __cpu_features+CPUID_OFFSET+index_ERMS(%rip)
+	jnz	2f
+	leaq	__bzero_x86_64(%rip), %rax
 	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
 	jz	2f
 	leaq	__bzero_sse2(%rip), %rax
 2:	ret
 END(__bzero)
 
+	.type	__bzero_erms, @function
+__bzero_erms:
+	cfi_startproc
+	CALL_MCOUNT
+	mov	%rsi,%rdx	/* Adjust parameter.  */
+	xorl	%esi,%esi	/* Fill with 0s.  */
+	jmp	__memset_erms
+	cfi_endproc
+	.size __bzero_erms, .-__bzero_erms
+
 	.type	__bzero_sse2, @function
 __bzero_sse2:
 	cfi_startproc
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset-erms.S
similarity index 52%
copy from sysdeps/x86_64/multiarch/memset_chk.S
copy to sysdeps/x86_64/multiarch/memset-erms.S
index 16afe60..1923a14 100644
--- a/sysdeps/x86_64/multiarch/memset_chk.S
+++ b/sysdeps/x86_64/multiarch/memset-erms.S
@@ -1,5 +1,6 @@
-/* Multiple versions of __memset_chk
-   Copyright (C) 2010 Free Software Foundation, Inc.
+/* memset with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,27 +19,31 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
-#include <init-arch.h>
 
-/* Define multiple versions only for the definition in lib.  */
 #ifndef NOT_IN_libc
+
+	.text
 # ifdef SHARED
-ENTRY(__memset_chk)
-	.type	__memset_chk, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	__memset_chk_x86_64(%rip), %rax
-	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
-	jz	2f
-	leaq	__memset_chk_sse2(%rip), %rax
-2:	ret
-END(__memset_chk)
-
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
-	.section .gnu.warning.__memset_zero_constant_len_parameter
-	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
+ENTRY (__memset_chk_erms)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_erms)
+# endif
+
+ENTRY (__memset_erms)
+# ifdef USE_AS_BZERO_P
+	mov	%rsi, %rcx
+	xor	%eax, %eax
 # else
-#  include "../memset_chk.S"
+	mov	%rdx, %rcx
+	movzbl	%sil, %eax
+	mov	%rdi, %rdx
 # endif
+	rep stosb
+# ifndef USE_AS_BZERO_P
+	mov %rdx, %rax
+# endif
+	ret
+END (__memset_erms)
+
 #endif
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
index a8d0e9e..fafd474 100644
--- a/sysdeps/x86_64/multiarch/memset.S
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -27,7 +27,10 @@ ENTRY(memset)
 	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
-1:	leaq	__memset_x86_64(%rip), %rax
+1:	leaq	__memset_erms(%rip), %rax
+	testl	$bit_ERMS, __cpu_features+CPUID_OFFSET+index_ERMS(%rip)
+	jnz	2f
+	leaq	__memset_x86_64(%rip), %rax
 	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
 	jz	2f
 	leaq	__memset_sse2(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
index 16afe60..bbf3419 100644
--- a/sysdeps/x86_64/multiarch/memset_chk.S
+++ b/sysdeps/x86_64/multiarch/memset_chk.S
@@ -28,7 +28,10 @@ ENTRY(__memset_chk)
 	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
 	jne	1f
 	call	__init_cpu_features
-1:	leaq	__memset_chk_x86_64(%rip), %rax
+1:	leaq	__memset_chk_erms(%rip), %rax
+	testl	$bit_ERMS, __cpu_features+CPUID_OFFSET+index_ERMS(%rip)
+	jnz	2f
+	leaq	__memset_chk_x86_64(%rip), %rax
 	testl	$bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
 	jz	2f
 	leaq	__memset_chk_sse2(%rip), %rax

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=e290866f605262caf721182df9fe433321c029c7

commit e290866f605262caf721182df9fe433321c029c7
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu Sep 15 16:16:10 2011 -0700

    Add ERMS optimized memcpy.

diff --git a/ChangeLog.erms b/ChangeLog.erms
index 42e9dde..bb303d4 100644
--- a/ChangeLog.erms
+++ b/ChangeLog.erms
@@ -1,5 +1,21 @@
 2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
 
+	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+	memcpy-erms, mempcpy-erms and memmove-erms.
+
+	* sysdeps/x86_64/multiarch/memcpy.S: Optimize for ERMS.
+	* sysdeps/x86_64/multiarch/memcpy_chk.S: Likewise.
+	* sysdeps/x86_64/multiarch/memmove.c: Likewise.
+	* sysdeps/x86_64/multiarch/memmove_chk.c: Likewise.
+	* sysdeps/x86_64/multiarch/mempcpy.S: Likewise.
+	* sysdeps/x86_64/multiarch/mempcpy_chk.S: Likewise.
+
+	* sysdeps/x86_64/multiarch/memcpy-erms.S: New.
+	* sysdeps/x86_64/multiarch/memmove-erms.S: Likewise.
+	* sysdeps/x86_64/multiarch/mempcpy-erms.S: Likewise.
+
+2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
+
 	* sysdeps/x86_64/multiarch/ifunc-defines.sym: Add
 	COMMON_CPUID_INDEX_7 and FEATURE_INDEX_7.
 
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index a5254dc..9b4ea17 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -15,7 +15,8 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
 		   stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
 		   strcat-sse2-unaligned strncat-sse2-unaligned \
 		   strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \
-		   strrchr-sse2-no-bsf strchr-sse2-no-bsf
+		   strrchr-sse2-no-bsf strchr-sse2-no-bsf \
+		   memcpy-erms mempcpy-erms memmove-erms 
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
 CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy-erms.S
similarity index 50%
copy from sysdeps/x86_64/multiarch/mempcpy_chk.S
copy to sysdeps/x86_64/multiarch/memcpy-erms.S
index 024c775..4702619 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy-erms.S
@@ -1,5 +1,5 @@
-/* Multiple versions of __mempcpy_chk
-   Copyright (C) 2010 Free Software Foundation, Inc.
+/* memcpy with Enhanced REP MOVSB/STOSB
+   Copyright (C) 2011 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -19,29 +19,54 @@
    02111-1307 USA.  */
 
 #include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  There are no multiarch mempcpy functions for static binaries.
- */
-#ifndef NOT_IN_libc
-# ifdef SHARED
-	.text
-ENTRY(__mempcpy_chk)
-	.type	__mempcpy_chk, @gnu_indirect_function
-	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	__mempcpy_chk_sse2(%rip), %rax
-	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
-	jz	2f
-	leaq	__mempcpy_chk_ssse3(%rip), %rax
-	testl	$bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
-	jz	2f
-	leaq	__mempcpy_chk_ssse3_back(%rip), %rax
-2:	ret
-END(__mempcpy_chk)
-# else
-#  include "../mempcpy_chk.S"
+
+#if !defined NOT_IN_libc \
+    && (defined SHARED \
+        || defined USE_AS_MEMMOVE \
+	|| !defined USE_MULTIARCH)
+
+#  include "asm-syntax.h"
+
+#  ifndef MEMCPY
+#   define MEMCPY	__memcpy_erms
+#   define MEMCPY_CHK	__memcpy_chk_erms
+#  endif
+
+	.section .text.erms,"ax",@progbits
+# if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
 # endif
+
+ENTRY (MEMCPY)
+	mov	%rdi, %rax
+	mov	%rdx, %rcx
+# ifdef USE_AS_MEMPCPY
+	add	%rdx, %rax
+# endif
+
+# ifdef USE_AS_MEMMOVE
+	cmp	%rsi, %rdi
+	ja	L(copy_backward)
+	je	L(bwd_write_0bytes)
+# endif
+
+	rep movsb
+	ret
+
+# ifdef USE_AS_MEMMOVE
+L(copy_backward):
+	lea	-1(%rdi,%rdx), %rdi
+	lea	-1(%rsi,%rdx), %rsi
+	std
+	rep movsb
+	cld
+L(bwd_write_0bytes):
+	ret
+# endif
+
+END (MEMCPY)
+
 #endif
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 9863014..124ac85 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -33,7 +33,10 @@ ENTRY(__new_memcpy)
 	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
 	jne	1f
 	call	__init_cpu_features
-1:	leaq	__memcpy_sse2(%rip), %rax
+1:	leaq	__memcpy_erms(%rip), %rax
+	testl	$bit_ERMS, __cpu_features+CPUID_OFFSET+index_ERMS(%rip)
+	jnz	2f
+	leaq	__memcpy_sse2(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
 	jz	2f
 	leaq	__memcpy_ssse3(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 948f61c..6b93847 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -32,7 +32,10 @@ ENTRY(__memcpy_chk)
 	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
 	jne	1f
 	call	__init_cpu_features
-1:	leaq	__memcpy_chk_sse2(%rip), %rax
+1:	leaq	__memcpy_chk_erms(%rip), %rax
+	testl	$bit_ERMS, __cpu_features+CPUID_OFFSET+index_ERMS(%rip)
+	jnz	2f
+	leaq	__memcpy_chk_sse2(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
 	jz	2f
 	leaq	__memcpy_chk_ssse3(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S
new file mode 100644
index 0000000..357289a
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY		__memmove_erms
+#define MEMCPY_CHK	__memmove_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 04de891..dc04f8c 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -35,15 +35,18 @@
 extern __typeof (memmove) __memmove_sse2 attribute_hidden;
 extern __typeof (memmove) __memmove_ssse3 attribute_hidden;
 extern __typeof (memmove) __memmove_ssse3_back attribute_hidden;
+extern __typeof (memmove) __memmove_erms attribute_hidden;
 
 #include "string/memmove.c"
 
 #ifndef NOT_IN_libc
 libc_ifunc (memmove,
-	    HAS_SSSE3
-	    ? (HAS_FAST_COPY_BACKWARD
-	       ? __memmove_ssse3_back : __memmove_ssse3)
-	    : __memmove_sse2);
+	    HAS_ERMS
+	    ? __memmove_erms
+	    : (HAS_SSSE3
+	       ? (HAS_FAST_COPY_BACKWARD
+		  ? __memmove_ssse3_back : __memmove_ssse3)
+	       : __memmove_sse2));
 
 #if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
 compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 962501d..1439006 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -25,11 +25,14 @@
 extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
 extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
 extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
+extern __typeof (__memmove_chk) __memmove_chk_erms attribute_hidden;
 
 #include "debug/memmove_chk.c"
 
 libc_ifunc (__memmove_chk,
-	    HAS_SSSE3
-	    ? (HAS_FAST_COPY_BACKWARD
-	       ? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
-	    : __memmove_chk_sse2);
+	    HAS_ERMS
+	    ? __memmove_chk_erms
+	    : (HAS_SSSE3
+	       ? (HAS_FAST_COPY_BACKWARD
+		  ? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
+	       : __memmove_chk_sse2));
diff --git a/sysdeps/x86_64/multiarch/mempcpy-erms.S b/sysdeps/x86_64/multiarch/mempcpy-erms.S
new file mode 100644
index 0000000..01d3bf8
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-erms.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY		__mempcpy_erms
+#define MEMCPY_CHK	__mempcpy_chk_erms
+#include "memcpy-erms.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index e8152d6..44962e9 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -31,6 +31,9 @@ ENTRY(__mempcpy)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__mempcpy_sse2(%rip), %rax
+	testl	$bit_ERMS, __cpu_features+CPUID_OFFSET+index_ERMS(%rip)
+	jnz	2f
+	leaq	__mempcpy_sse2(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
 	jz	2f
 	leaq	__mempcpy_ssse3(%rip), %rax
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 024c775..716df2b 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -32,7 +32,10 @@ ENTRY(__mempcpy_chk)
 	cmpl	$0, KIND_OFFSET+__cpu_features(%rip)
 	jne	1f
 	call	__init_cpu_features
-1:	leaq	__mempcpy_chk_sse2(%rip), %rax
+1:	leaq	__mempcpy_chk_erms(%rip), %rax
+	testl	$bit_ERMS, __cpu_features+CPUID_OFFSET+index_ERMS(%rip)
+	jnz	2f
+	leaq	__mempcpy_chk_sse2(%rip), %rax
 	testl	$bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
 	jz	2f
 	leaq	__mempcpy_chk_ssse3(%rip), %rax

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=09b65da29393b52a452cc5d868db9af8afd2396e

commit 09b65da29393b52a452cc5d868db9af8afd2396e
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu Sep 15 15:49:45 2011 -0700

    Add COMMON_CPUID_INDEX_7 and FEATURE_INDEX_7.

diff --git a/ChangeLog.erms b/ChangeLog.erms
index b386fe6..42e9dde 100644
--- a/ChangeLog.erms
+++ b/ChangeLog.erms
@@ -1,5 +1,10 @@
 2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
 
+	* sysdeps/x86_64/multiarch/ifunc-defines.sym: Add
+	COMMON_CPUID_INDEX_7 and FEATURE_INDEX_7.
+
+2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
+
 	* sysdeps/x86_64/multiarch/init-arch.c (get_common_indeces):
 	Initialize COMMON_CPUID_INDEX_7.
 
diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
index eb1538a..448b8c4 100644
--- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
+++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym
@@ -17,4 +17,6 @@ FEATURE_OFFSET		offsetof (struct cpu_features, feature)
 FEATURE_SIZE		sizeof (unsigned int)
 
 COMMON_CPUID_INDEX_1
+COMMON_CPUID_INDEX_7
 FEATURE_INDEX_1
+FEATURE_INDEX_7

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=eedc48396520937344e3f50ed4c68602f14b68b2

commit eedc48396520937344e3f50ed4c68602f14b68b2
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Thu Sep 15 15:47:01 2011 -0700

    Initial ERMS support.

diff --git a/ChangeLog.erms b/ChangeLog.erms
new file mode 100644
index 0000000..b386fe6
--- /dev/null
+++ b/ChangeLog.erms
@@ -0,0 +1,10 @@
+2011-09-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/x86_64/multiarch/init-arch.c (get_common_indeces):
+	Initialize COMMON_CPUID_INDEX_7.
+
+	* sysdeps/x86_64/multiarch/init-arch.h (bit_ERMS): New.
+	(index_ERMS): Likewise.
+	(COMMON_CPUID_INDEX_7): Likewise.
+	(FEATURE_INDEX_7): Likewise.
+	(HAS_ERMS): Likewise.
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 0a145ca..c6b42d4 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -33,6 +33,10 @@ get_common_indeces (unsigned int *family, unsigned int *model)
 	   __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx,
 	   __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx,
 	   __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx);
+  __cpuid (7, __cpu_features.cpuid[COMMON_CPUID_INDEX_7].eax,
+	   __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
+	   __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
+	   __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
 
   unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
   *family = (eax >> 8) & 0x0f;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 6cfdbdd..c3364d1 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -31,11 +31,13 @@
 # define bit_SSSE3	(1 << 9)
 # define bit_SSE4_1	(1 << 19)
 # define bit_SSE4_2	(1 << 20)
+# define bit_ERMS	(1 << 9)
 
 # define index_SSE2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
 # define index_SSSE3	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_SSE4_1	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_SSE4_2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_ERMS	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
@@ -51,6 +53,7 @@
 enum
   {
     COMMON_CPUID_INDEX_1 = 0,
+    COMMON_CPUID_INDEX_7,
     /* Keep the following line at the end.  */
     COMMON_CPUID_INDEX_MAX
   };
@@ -58,6 +61,7 @@ enum
 enum
   {
     FEATURE_INDEX_1 = 0,
+    FEATURE_INDEX_7,
     /* Keep the following line at the end.  */
     FEATURE_INDEX_MAX
   };
@@ -111,6 +115,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 19)
 # define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
 # define HAS_FMA	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
+# define HAS_ERMS	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, 9)
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]