This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch hjl/pr17711 created. glibc-2.20-448-g16b4600


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, hjl/pr17711 has been created
        at  16b46002c46f8b07dac9d7880ba7b7fc6353c599 (commit)

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=16b46002c46f8b07dac9d7880ba7b7fc6353c599

commit 16b46002c46f8b07dac9d7880ba7b7fc6353c599
Author: Andrew Senkevich <andrew.senkevich@intel.com>
Date:   Mon Dec 29 14:39:46 2014 +0300

    i386: memcpy functions with SSE2 unaligned load/store
    
    These new memcpy functions are the 32-bit version of x86_64 SSE2 unaligned
    memcpy.  Memcpy average performace benefit is 18% on Silvermont, other
    platforms also improved about 35%, benchmarked on Silvermont, Haswell, Ivy
    Bridge, Sandy Bridge and Westmere, performance results attached in
    
    https://sourceware.org/ml/libc-alpha/2014-07/msg00157.html
    
    	* sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S: New file.
    	* sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S: Likewise.
    	* sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S: Likewise.
    	* sysdeps/i386/i686/multiarch/bcopy.S: Select the sse2_unaligned
    	version if bit_Fast_Unaligned_Load is set.
    	* sysdeps/i386/i686/multiarch/memcpy.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memcpy_chk.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memmove.S: Likewise.
    	* sysdeps/i386/i686/multiarch/memmove_chk.S: Likewise.
    	* sysdeps/i386/i686/multiarch/mempcpy.S: Likewise.
    	* sysdeps/i386/i686/multiarch/mempcpy_chk.S: Likewise.
    	* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
    	bcopy-sse2-unaligned, memcpy-sse2-unaligned,
    	memmove-sse2-unaligned and mempcpy-sse2-unaligned.
    	* sysdeps/i386/i686/multiarch/ifunc-impl-list.c (MAX_IFUNC): Set
    	to 4.
    	(__libc_ifunc_impl_list): Test __bcopy_sse2_unaligned,
    	__memmove_chk_sse2_unaligned, __memmove_sse2_unaligned,
    	__memcpy_chk_sse2_unaligned, __memcpy_sse2_unaligned,
    	__mempcpy_chk_sse2_unaligned, and __mempcpy_sse2_unaligned.

diff --git a/ChangeLog b/ChangeLog
index 147fdf8..772a5a6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2014-12-30  Andrew Senkevich  <andrew.senkevich@intel.com>
+	    H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S: New file.
+	* sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S: Likewise.
+	* sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S: Likewise.
+	* sysdeps/i386/i686/multiarch/bcopy.S: Select the sse2_unaligned
+	version if bit_Fast_Unaligned_Load is set.
+	* sysdeps/i386/i686/multiarch/memcpy.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memcpy_chk.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memmove.S: Likewise.
+	* sysdeps/i386/i686/multiarch/memmove_chk.S: Likewise.
+	* sysdeps/i386/i686/multiarch/mempcpy.S: Likewise.
+	* sysdeps/i386/i686/multiarch/mempcpy_chk.S: Likewise.
+	* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
+	bcopy-sse2-unaligned, memcpy-sse2-unaligned,
+	memmove-sse2-unaligned and mempcpy-sse2-unaligned.
+	* sysdeps/i386/i686/multiarch/ifunc-impl-list.c (MAX_IFUNC): Set
+	to 4.
+	(__libc_ifunc_impl_list): Test __bcopy_sse2_unaligned,
+	__memmove_chk_sse2_unaligned, __memmove_sse2_unaligned,
+	__memcpy_chk_sse2_unaligned, __memcpy_sse2_unaligned,
+	__mempcpy_chk_sse2_unaligned, and __mempcpy_sse2_unaligned.
+
 2014-12-29  Chris Metcalf  <cmetcalf@ezchip.com>
 
 	* sysdeps/unix/sysv/linux/tst-setgetname.c (do_test): Use #ifndef
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 55778cb..11ce4ba 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -23,7 +23,9 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
 		   strnlen-sse2 strnlen-c \
 		   strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
 		   strncase_l-c strncase-c strncase_l-ssse3 \
-		   strcasecmp_l-sse4 strncase_l-sse4
+		   strcasecmp_l-sse4 strncase_l-sse4 \
+		   bcopy-sse2-unaligned memcpy-sse2-unaligned \
+		   mempcpy-sse2-unaligned memmove-sse2-unaligned
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c
 CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S
new file mode 100644
index 0000000..efef2a1
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bcopy-sse2-unaligned.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY		__bcopy_sse2_unaligned
+#include "memcpy-sse2-unaligned.S"
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/bcopy.S
index a0fca88..4041eed 100644
--- a/sysdeps/i386/i686/multiarch/bcopy.S
+++ b/sysdeps/i386/i686/multiarch/bcopy.S
@@ -35,6 +35,11 @@ ENTRY(bcopy)
 	jne	1f
 	call	__init_cpu_features
 1:	leal	__bcopy_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__bcopy_sse2_unaligned@GOTOFF(%ebx), %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__bcopy_ssse3@GOTOFF(%ebx), %eax
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
index e475776..4efa9c5 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -23,7 +23,7 @@
 #include "init-arch.h"
 
 /* Maximum number of IFUNC implementations.  */
-#define MAX_IFUNC	3
+#define MAX_IFUNC	4
 
 /* Fill ARRAY of MAX elements with IFUNC implementations for function
    NAME and return the number of valid entries.  */
@@ -41,6 +41,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
 			      __bcopy_ssse3_rep)
 	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
+	      IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSE2,
+			      __bcopy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/bzero.S.  */
@@ -69,6 +71,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memmove_chk_ssse3_rep)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
 			      __memmove_chk_ssse3)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSE2,
+			      __memmove_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
 			      __memmove_chk_ia32))
 
@@ -78,6 +82,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memmove_ssse3_rep)
 	      IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
 			      __memmove_ssse3)
+	      IFUNC_IMPL_ADD (array, i, memmove, HAS_SSE2,
+			      __memmove_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/memrchr.S.  */
@@ -268,6 +274,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __memcpy_chk_ssse3_rep)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
 			      __memcpy_chk_ssse3)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSE2,
+			      __memcpy_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
 			      __memcpy_chk_ia32))
 
@@ -276,6 +284,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
 			      __memcpy_ssse3_rep)
 	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
+	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSE2,
+			      __memcpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S.  */
@@ -284,6 +294,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __mempcpy_chk_ssse3_rep)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
 			      __mempcpy_chk_ssse3)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSE2,
+			      __mempcpy_chk_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
 			      __mempcpy_chk_ia32))
 
@@ -293,6 +305,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __mempcpy_ssse3_rep)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
 			      __mempcpy_ssse3)
+	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSE2,
+			      __mempcpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
 
   /* Support sysdeps/i386/i686/multiarch/strlen.S.  */
diff --git a/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S
new file mode 100644
index 0000000..ff89de2
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy-sse2-unaligned.S
@@ -0,0 +1,681 @@
+/* memcpy optimized with SSE2 unaligned memory access instructions.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc) \
+    && (defined SHARED \
+	|| defined USE_AS_MEMMOVE \
+	|| !defined USE_MULTIARCH)
+
+# include <sysdep.h>
+# include "asm-syntax.h"
+
+# ifndef MEMCPY
+#  define MEMCPY	__memcpy_sse2_unaligned
+#  define MEMCPY_CHK	__memcpy_chk_sse2_unaligned
+# endif
+
+# ifdef USE_AS_BCOPY
+#  define SRC		PARMS
+#  define DEST		SRC+4
+#  define LEN		DEST+4
+# else
+#  define DEST		PARMS
+#  define SRC		DEST+4
+#  define LEN		SRC+4
+# endif
+
+# define CFI_PUSH(REG)		\
+  cfi_adjust_cfa_offset (4);		\
+  cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG)		\
+  cfi_adjust_cfa_offset (-4);		\
+  cfi_restore (REG)
+
+# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
+# define POP(REG)	popl REG; CFI_POP (REG)
+
+# define PARMS		8		/* Preserve EBX.  */
+# define ENTRANCE	PUSH (%ebx);
+# define RETURN_END	POP (%ebx); ret
+# define RETURN	RETURN_END; CFI_PUSH (%ebx)
+
+	.section .text.sse2,"ax",@progbits
+# if !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+# endif
+
+ENTRY (MEMCPY)
+	ENTRANCE
+	movl	LEN(%esp), %ecx
+	movl	SRC(%esp), %eax
+	movl	DEST(%esp), %edx
+	cmp	%edx, %eax
+
+# ifdef USE_AS_MEMMOVE
+	jg	L(check_forward)
+
+L(mm_len_0_or_more_backward):
+/* Now do checks for lengths. We do [0..16], [16..32], [32..64], [64..128]
+	separately.  */
+	cmp	$16, %ecx
+	jbe	L(mm_len_0_16_bytes_backward)
+
+	cmpl	$32, %ecx
+	jg	L(mm_len_32_or_more_backward)
+
+/* Copy [0..32] and return.  */
+	movdqu	(%eax), %xmm0
+	movdqu	-16(%eax, %ecx), %xmm1
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm1, -16(%edx, %ecx)
+	jmp	L(return)
+
+L(mm_len_32_or_more_backward):
+	cmpl	$64, %ecx
+	jg	L(mm_len_64_or_more_backward)
+
+/* Copy [0..64] and return.  */
+	movdqu	(%eax), %xmm0
+	movdqu	16(%eax), %xmm1
+	movdqu	-16(%eax, %ecx), %xmm2
+	movdqu	-32(%eax, %ecx), %xmm3
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm1, 16(%edx)
+	movdqu	%xmm2, -16(%edx, %ecx)
+	movdqu	%xmm3, -32(%edx, %ecx)
+	jmp	L(return)
+
+L(mm_len_64_or_more_backward):
+	cmpl	$128, %ecx
+	jg	L(mm_len_128_or_more_backward)
+
+/* Copy [0..128] and return.  */
+	movdqu	(%eax), %xmm0
+	movdqu	16(%eax), %xmm1
+	movdqu	32(%eax), %xmm2
+	movdqu	48(%eax), %xmm3
+	movdqu	-64(%eax, %ecx), %xmm4
+	movdqu	-48(%eax, %ecx), %xmm5
+	movdqu	-32(%eax, %ecx), %xmm6
+	movdqu	-16(%eax, %ecx), %xmm7
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm1, 16(%edx)
+	movdqu	%xmm2, 32(%edx)
+	movdqu	%xmm3, 48(%edx)
+	movdqu	%xmm4, -64(%edx, %ecx)
+	movdqu	%xmm5, -48(%edx, %ecx)
+	movdqu	%xmm6, -32(%edx, %ecx)
+	movdqu	%xmm7, -16(%edx, %ecx)
+	jmp	L(return)
+
+L(mm_len_128_or_more_backward):
+	add	%ecx, %eax
+	cmp	%edx, %eax
+	movl	SRC(%esp), %eax
+	jle	L(forward)
+	PUSH (%esi)
+	PUSH (%edi)
+	PUSH (%ebx)
+
+/* Aligning the address of destination. */
+	movdqu	(%eax), %xmm4
+	movdqu	16(%eax), %xmm5
+	movdqu	32(%eax), %xmm6
+	movdqu	48(%eax), %xmm7
+	leal	(%edx, %ecx), %esi
+	movdqu	-16(%eax, %ecx), %xmm0
+	subl	$16, %esp
+	movdqu	%xmm0, (%esp)
+	mov	%ecx, %edi
+	movl	%esi, %ecx
+	andl	$-16, %ecx
+	leal	(%ecx), %ebx
+	subl	%edx, %ebx
+	leal	(%eax, %ebx), %eax
+	shrl	$6, %ebx
+
+# ifdef SHARED_CACHE_SIZE_HALF
+	cmp	$SHARED_CACHE_SIZE_HALF, %edi
+# else
+#  ifdef SHARED
+	PUSH (%ebx)
+	SETUP_PIC_REG (bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %edi
+	POP (%ebx)
+#  else
+	cmp	__x86_shared_cache_size_half, %edi
+#  endif
+# endif
+	jae	L(mm_large_page_loop_backward)
+
+	.p2align 4
+L(mm_main_loop_backward):
+
+	prefetcht0 -128(%eax)
+
+	movdqu	-64(%eax), %xmm0
+	movdqu	-48(%eax), %xmm1
+	movdqu	-32(%eax), %xmm2
+	movdqu	-16(%eax), %xmm3
+	movaps	%xmm0, -64(%ecx)
+	subl	$64, %eax
+	movaps	%xmm1, -48(%ecx)
+	movaps	%xmm2, -32(%ecx)
+	movaps	%xmm3, -16(%ecx)
+	subl	$64, %ecx
+	sub	$1, %ebx
+	jnz	L(mm_main_loop_backward)
+	movdqu	(%esp), %xmm0
+	addl	$16, %esp
+	movdqu	%xmm0, -16(%esi)
+	movdqu	%xmm4, (%edx)
+	movdqu	%xmm5, 16(%edx)
+	movdqu	%xmm6, 32(%edx)
+	movdqu	%xmm7, 48(%edx)
+	POP (%ebx)
+	jmp	L(mm_return_pop_all)
+
+/* Copy [0..16] and return.  */
+L(mm_len_0_16_bytes_backward):
+	testb	$24, %cl
+	jnz	L(mm_len_9_16_bytes_backward)
+	testb	$4, %cl
+	.p2align 4,,5
+	jnz	L(mm_len_5_8_bytes_backward)
+	testl	%ecx, %ecx
+	.p2align 4,,2
+	je	L(return)
+	testb	$2, %cl
+	.p2align 4,,1
+	jne	L(mm_len_3_4_bytes_backward)
+	movzbl	-1(%eax,%ecx), %ebx
+	movzbl	(%eax), %eax
+	movb	%bl, -1(%edx,%ecx)
+	movb	%al, (%edx)
+	jmp	L(return)
+
+L(mm_len_3_4_bytes_backward):
+	movzwl	-2(%eax,%ecx), %ebx
+	movzwl	(%eax), %eax
+	movw	%bx, -2(%edx,%ecx)
+	movw	%ax, (%edx)
+	jmp	L(return)
+
+L(mm_len_9_16_bytes_backward):
+	PUSH (%esi)
+	movl	-4(%eax,%ecx), %ebx
+	movl	-8(%eax,%ecx), %esi
+	movl	%ebx, -4(%edx,%ecx)
+	movl	%esi, -8(%edx,%ecx)
+	subl	$8, %ecx
+	POP (%esi)
+	jmp	L(mm_len_0_16_bytes_backward)
+
+L(mm_len_5_8_bytes_backward):
+	movl	(%eax), %ebx
+	movl	-4(%eax,%ecx), %eax
+	movl	%ebx, (%edx)
+	movl	%eax, -4(%edx,%ecx)
+	jmp	L(return)
+
+/* Big length copy backward part.  */
+	.p2align 4
+L(mm_large_page_loop_backward):
+	movdqu	-64(%eax), %xmm0
+	movdqu	-48(%eax), %xmm1
+	movdqu	-32(%eax), %xmm2
+	movdqu	-16(%eax), %xmm3
+	movntdq	%xmm0, -64(%ecx)
+	subl	$64, %eax
+	movntdq	%xmm1, -48(%ecx)
+	movntdq	%xmm2, -32(%ecx)
+	movntdq	%xmm3, -16(%ecx)
+	subl	$64, %ecx
+	sub	$1, %ebx
+	jnz	L(mm_large_page_loop_backward)
+	sfence
+	movdqu	(%esp), %xmm0
+	addl	$16, %esp
+	movdqu	%xmm0, -16(%esi)
+	movdqu	%xmm4, (%edx)
+	movdqu	%xmm5, 16(%edx)
+	movdqu	%xmm6, 32(%edx)
+	movdqu	%xmm7, 48(%edx)
+	POP (%ebx)
+	jmp	L(mm_return_pop_all)
+
+L(check_forward):
+	add	%edx, %ecx
+	cmp	%eax, %ecx
+	movl	LEN(%esp), %ecx
+	jle	L(forward)
+
+/* Now do checks for lengths. We do [0..16], [0..32], [0..64], [0..128]
+	separately.  */
+	cmp	$16, %ecx
+	jbe	L(mm_len_0_16_bytes_forward)
+
+	cmpl	$32, %ecx
+	ja	L(mm_len_32_or_more_forward)
+
+/* Copy [0..32] and return.  */
+	movdqu	(%eax), %xmm0
+	movdqu	-16(%eax, %ecx), %xmm1
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm1, -16(%edx, %ecx)
+	jmp	L(return)
+
+L(mm_len_32_or_more_forward):
+	cmpl	$64, %ecx
+	ja	L(mm_len_64_or_more_forward)
+
+/* Copy [0..64] and return.  */
+	movdqu	(%eax), %xmm0
+	movdqu	16(%eax), %xmm1
+	movdqu	-16(%eax, %ecx), %xmm2
+	movdqu	-32(%eax, %ecx), %xmm3
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm1, 16(%edx)
+	movdqu	%xmm2, -16(%edx, %ecx)
+	movdqu	%xmm3, -32(%edx, %ecx)
+	jmp	L(return)
+
+L(mm_len_64_or_more_forward):
+	cmpl	$128, %ecx
+	ja	L(mm_len_128_or_more_forward)
+
+/* Copy [0..128] and return.  */
+	movdqu	(%eax), %xmm0
+	movdqu	16(%eax), %xmm1
+	movdqu	32(%eax), %xmm2
+	movdqu	48(%eax), %xmm3
+	movdqu	-64(%eax, %ecx), %xmm4
+	movdqu	-48(%eax, %ecx), %xmm5
+	movdqu	-32(%eax, %ecx), %xmm6
+	movdqu	-16(%eax, %ecx), %xmm7
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm1, 16(%edx)
+	movdqu	%xmm2, 32(%edx)
+	movdqu	%xmm3, 48(%edx)
+	movdqu	%xmm4, -64(%edx, %ecx)
+	movdqu	%xmm5, -48(%edx, %ecx)
+	movdqu	%xmm6, -32(%edx, %ecx)
+	movdqu	%xmm7, -16(%edx, %ecx)
+	jmp	L(return)
+
+L(mm_len_128_or_more_forward):
+	PUSH (%esi)
+	PUSH (%edi)
+	PUSH (%ebx)
+
+/* Aligning the address of destination. */
+	movdqu	-16(%eax, %ecx), %xmm4
+	movdqu	-32(%eax, %ecx), %xmm5
+	movdqu	-48(%eax, %ecx), %xmm6
+	movdqu	-64(%eax, %ecx), %xmm7
+	leal	(%edx, %ecx), %esi
+	movdqu	(%eax), %xmm0
+	subl	$16, %esp
+	movdqu	%xmm0, (%esp)
+	mov	%ecx, %edi
+	leal	16(%edx), %ecx
+	andl	$-16, %ecx
+	movl	%ecx, %ebx
+	subl	%edx, %ebx
+	addl	%ebx, %eax
+	movl	%esi, %ebx
+	subl	%ecx, %ebx
+	shrl	$6, %ebx
+
+# ifdef SHARED_CACHE_SIZE_HALF
+	cmp	$SHARED_CACHE_SIZE_HALF, %edi
+# else
+#  ifdef SHARED
+	PUSH (%ebx)
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %edi
+	POP (%ebx)
+#  else
+	cmp	__x86_shared_cache_size_half, %edi
+#  endif
+# endif
+	jae	L(mm_large_page_loop_forward)
+
+	.p2align 4
+L(mm_main_loop_forward):
+
+	prefetcht0 128(%eax)
+
+	movdqu	(%eax), %xmm0
+	movdqu	16(%eax), %xmm1
+	movdqu	32(%eax), %xmm2
+	movdqu	48(%eax), %xmm3
+	movdqa	%xmm0, (%ecx)
+	addl	$64, %eax
+	movaps	%xmm1, 16(%ecx)
+	movaps	%xmm2, 32(%ecx)
+	movaps	%xmm3, 48(%ecx)
+	addl	$64, %ecx
+	sub	$1, %ebx
+	jnz	L(mm_main_loop_forward)
+	movdqu	(%esp), %xmm0
+	addl	$16, %esp
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm4, -16(%esi)
+	movdqu	%xmm5, -32(%esi)
+	movdqu	%xmm6, -48(%esi)
+	movdqu	%xmm7, -64(%esi)
+	POP (%ebx)
+	jmp	L(mm_return_pop_all)
+
+L(mm_len_0_16_bytes_forward):
+	testb	$24, %cl
+	jne	L(mm_len_9_16_bytes_forward)
+	testb	$4, %cl
+	.p2align 4,,5
+	jne	L(mm_len_5_8_bytes_forward)
+	testl	%ecx, %ecx
+	.p2align 4,,2
+	je	L(return)
+	testb	$2, %cl
+	.p2align 4,,1
+	jne	L(mm_len_2_4_bytes_forward)
+	movzbl	-1(%eax,%ecx), %ebx
+	movzbl	(%eax), %eax
+	movb	%bl, -1(%edx,%ecx)
+	movb	%al, (%edx)
+	jmp	L(return)
+
+L(mm_len_2_4_bytes_forward):
+	movzwl	-2(%eax,%ecx), %ebx
+	movzwl	(%eax), %eax
+	movw	%bx, -2(%edx,%ecx)
+	movw	%ax, (%edx)
+	jmp	L(return)
+
+L(mm_len_5_8_bytes_forward):
+	movl	(%eax), %ebx
+	movl	-4(%eax,%ecx), %eax
+	movl	%ebx, (%edx)
+	movl	%eax, -4(%edx,%ecx)
+	jmp	L(return)
+
+L(mm_len_9_16_bytes_forward):
+	movq	(%eax), %xmm0
+	movq	-8(%eax, %ecx), %xmm1
+	movq	%xmm0, (%edx)
+	movq	%xmm1, -8(%edx, %ecx)
+	jmp	L(return)
+
+L(mm_return_pop_all):
+	movl	%edx, %eax
+	POP (%edi)
+	POP (%esi)
+	RETURN
+
+/* Big length copy forward part.  */
+	.p2align 4
+L(mm_large_page_loop_forward):
+	movdqu	(%eax), %xmm0
+	movdqu	16(%eax), %xmm1
+	movdqu	32(%eax), %xmm2
+	movdqu	48(%eax), %xmm3
+	movntdq	%xmm0, (%ecx)
+	addl	$64, %eax
+	movntdq	%xmm1, 16(%ecx)
+	movntdq	%xmm2, 32(%ecx)
+	movntdq	%xmm3, 48(%ecx)
+	addl	$64, %ecx
+	sub	$1, %ebx
+	jnz	L(mm_large_page_loop_forward)
+	sfence
+	movdqu	(%esp), %xmm0
+	addl	$16, %esp
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm4, -16(%esi)
+	movdqu	%xmm5, -32(%esi)
+	movdqu	%xmm6, -48(%esi)
+	movdqu	%xmm7, -64(%esi)
+	POP (%ebx)
+	jmp	L(mm_return_pop_all)
+# endif
+
+L(forward):
+	cmp	$16, %ecx
+	jbe	L(len_0_16_bytes)
+
+# ifdef SHARED_CACHE_SIZE_HALF
+	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
+# else
+#  ifdef SHARED
+	SETUP_PIC_REG(bx)
+	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+#  else
+	cmp	__x86_shared_cache_size_half, %ecx
+#  endif
+# endif
+	jae     L(large_page)
+
+	movdqu	(%eax), %xmm0
+	movdqu	-16(%eax, %ecx), %xmm1
+	cmpl    $32, %ecx
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm1, -16(%edx, %ecx)
+	jbe	L(return)
+
+	movdqu	16(%eax), %xmm0
+	movdqu	-32(%eax, %ecx), %xmm1
+	cmpl    $64, %ecx
+	movdqu	%xmm0, 16(%edx)
+	movdqu	%xmm1, -32(%edx, %ecx)
+	jbe	L(return)
+
+	movdqu	32(%eax), %xmm0
+	movdqu	48(%eax), %xmm1
+	movdqu	-48(%eax, %ecx), %xmm2
+	movdqu	-64(%eax, %ecx), %xmm3
+	cmpl    $128, %ecx
+	movdqu	%xmm0, 32(%edx)
+	movdqu	%xmm1, 48(%edx)
+	movdqu	%xmm2, -48(%edx, %ecx)
+	movdqu	%xmm3, -64(%edx, %ecx)
+	jbe	L(return)
+
+/* Now the main loop: we align the address of the destination.  */
+	leal	64(%edx), %ebx
+	andl	$-64, %ebx
+
+	addl	%edx, %ecx
+	andl	$-64, %ecx
+
+	subl	%edx, %eax
+
+/* We should stop two iterations before the termination
+	(in order not to misprefetch).  */
+	subl	$64, %ecx
+	cmpl	%ebx, %ecx
+	je	L(main_loop_just_one_iteration)
+
+	subl	$64, %ecx
+	cmpl	%ebx, %ecx
+	je	L(main_loop_last_two_iterations)
+
+	.p2align 4
+L(main_loop_cache):
+
+	prefetcht0 128(%ebx, %eax)
+
+	movdqu	(%ebx, %eax), %xmm0
+	movdqu	16(%ebx, %eax), %xmm1
+	movdqu	32(%ebx, %eax), %xmm2
+	movdqu	48(%ebx, %eax), %xmm3
+	movdqa	%xmm0, (%ebx)
+	movaps	%xmm1, 16(%ebx)
+	movaps	%xmm2, 32(%ebx)
+	movaps	%xmm3, 48(%ebx)
+	lea	64(%ebx), %ebx
+	cmpl	%ebx, %ecx
+	jne	L(main_loop_cache)
+
+L(main_loop_last_two_iterations):
+	movdqu	(%ebx, %eax), %xmm0
+	movdqu	16(%ebx, %eax), %xmm1
+	movdqu	32(%ebx, %eax), %xmm2
+	movdqu	48(%ebx, %eax), %xmm3
+	movdqu	64(%ebx, %eax), %xmm4
+	movdqu	80(%ebx, %eax), %xmm5
+	movdqu	96(%ebx, %eax), %xmm6
+	movdqu	112(%ebx, %eax), %xmm7
+	movdqa	%xmm0, (%ebx)
+	movaps	%xmm1, 16(%ebx)
+	movaps	%xmm2, 32(%ebx)
+	movaps	%xmm3, 48(%ebx)
+	movaps	%xmm4, 64(%ebx)
+	movaps	%xmm5, 80(%ebx)
+	movaps	%xmm6, 96(%ebx)
+	movaps	%xmm7, 112(%ebx)
+	jmp	L(return)
+
+L(main_loop_just_one_iteration):
+	movdqu	(%ebx, %eax), %xmm0
+	movdqu	16(%ebx, %eax), %xmm1
+	movdqu	32(%ebx, %eax), %xmm2
+	movdqu	48(%ebx, %eax), %xmm3
+	movdqa	%xmm0, (%ebx)
+	movaps	%xmm1, 16(%ebx)
+	movaps	%xmm2, 32(%ebx)
+	movaps	%xmm3, 48(%ebx)
+	jmp	L(return)
+
+L(large_page):
+	movdqu	(%eax), %xmm0
+	movdqu	16(%eax), %xmm1
+	movdqu	32(%eax), %xmm2
+	movdqu	48(%eax), %xmm3
+	movdqu	-64(%eax, %ecx), %xmm4
+	movdqu	-48(%eax, %ecx), %xmm5
+	movdqu	-32(%eax, %ecx), %xmm6
+	movdqu	-16(%eax, %ecx), %xmm7
+	movdqu	%xmm0, (%edx)
+	movdqu	%xmm1, 16(%edx)
+	movdqu	%xmm2, 32(%edx)
+	movdqu	%xmm3, 48(%edx)
+	movdqu	%xmm4, -64(%edx, %ecx)
+	movdqu	%xmm5, -48(%edx, %ecx)
+	movdqu	%xmm6, -32(%edx, %ecx)
+	movdqu	%xmm7, -16(%edx, %ecx)
+
+	movdqu	64(%eax), %xmm0
+	movdqu	80(%eax), %xmm1
+	movdqu	96(%eax), %xmm2
+	movdqu	112(%eax), %xmm3
+	movdqu	-128(%eax, %ecx), %xmm4
+	movdqu	-112(%eax, %ecx), %xmm5
+	movdqu	-96(%eax, %ecx), %xmm6
+	movdqu	-80(%eax, %ecx), %xmm7
+	movdqu	%xmm0, 64(%edx)
+	movdqu	%xmm1, 80(%edx)
+	movdqu	%xmm2, 96(%edx)
+	movdqu	%xmm3, 112(%edx)
+	movdqu	%xmm4, -128(%edx, %ecx)
+	movdqu	%xmm5, -112(%edx, %ecx)
+	movdqu	%xmm6, -96(%edx, %ecx)
+	movdqu	%xmm7, -80(%edx, %ecx)
+
+/* Now the main loop with non temporal stores. We align
+	the address of the destination.  */
+	leal	128(%edx), %ebx
+	andl	$-128, %ebx
+
+	addl	%edx, %ecx
+	andl	$-128, %ecx
+
+	subl	%edx, %eax
+
+	.p2align 4
+L(main_loop_large_page):
+	movdqu	(%ebx, %eax), %xmm0
+	movdqu	16(%ebx, %eax), %xmm1
+	movdqu	32(%ebx, %eax), %xmm2
+	movdqu	48(%ebx, %eax), %xmm3
+	movdqu	64(%ebx, %eax), %xmm4
+	movdqu	80(%ebx, %eax), %xmm5
+	movdqu	96(%ebx, %eax), %xmm6
+	movdqu	112(%ebx, %eax), %xmm7
+	movntdq	%xmm0, (%ebx)
+	movntdq	%xmm1, 16(%ebx)
+	movntdq	%xmm2, 32(%ebx)
+	movntdq	%xmm3, 48(%ebx)
+	movntdq	%xmm4, 64(%ebx)
+	movntdq	%xmm5, 80(%ebx)
+	movntdq	%xmm6, 96(%ebx)
+	movntdq	%xmm7, 112(%ebx)
+	lea	128(%ebx), %ebx
+	cmpl	%ebx, %ecx
+	jne	L(main_loop_large_page)
+	sfence
+	jmp	L(return)
+
+L(len_0_16_bytes):
+	testb	$24, %cl
+	jne	L(len_9_16_bytes)
+	testb	$4, %cl
+	.p2align 4,,5
+	jne	L(len_5_8_bytes)
+	testl	%ecx, %ecx
+	.p2align 4,,2
+	je	L(return)
+	movzbl	(%eax), %ebx
+	testb	$2, %cl
+	movb	%bl, (%edx)
+	je	L(return)
+	movzwl	-2(%eax,%ecx), %ebx
+	movw	%bx, -2(%edx,%ecx)
+	jmp	L(return)
+
+L(len_9_16_bytes):
+	movq	(%eax), %xmm0
+	movq	-8(%eax, %ecx), %xmm1
+	movq	%xmm0, (%edx)
+	movq	%xmm1, -8(%edx, %ecx)
+	jmp	L(return)
+
+L(len_5_8_bytes):
+	movl	(%eax), %ebx
+	movl	%ebx, (%edx)
+	movl	-4(%eax,%ecx), %ebx
+	movl	%ebx, -4(%edx,%ecx)
+
+L(return):
+	movl	%edx, %eax
+# if !defined USE_AS_BCOPY && defined USE_AS_MEMPCPY
+	movl	LEN(%esp), %ecx
+	add	%ecx, %eax
+# endif
+	RETURN
+
+END (MEMCPY)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/memcpy.S
index 76195a5..845492c 100644
--- a/sysdeps/i386/i686/multiarch/memcpy.S
+++ b/sysdeps/i386/i686/multiarch/memcpy.S
@@ -36,6 +36,11 @@ ENTRY(memcpy)
 	jne	1f
 	call	__init_cpu_features
 1:	leal	__memcpy_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__memcpy_sse2_unaligned@GOTOFF(%ebx), %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memcpy_ssse3@GOTOFF(%ebx), %eax
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/memcpy_chk.S
index c67968e..415d910 100644
--- a/sysdeps/i386/i686/multiarch/memcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/memcpy_chk.S
@@ -37,6 +37,11 @@ ENTRY(__memcpy_chk)
 	jne	1f
 	call	__init_cpu_features
 1:	leal	__memcpy_chk_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__memcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memcpy_chk_ssse3@GOTOFF(%ebx), %eax
diff --git a/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S
new file mode 100644
index 0000000..3873594
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove-sse2-unaligned.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY		__memmove_sse2_unaligned
+#define MEMCPY_CHK	__memmove_chk_sse2_unaligned
+#include "memcpy-sse2-unaligned.S"
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/memmove.S
index d8de7c6..29644dd 100644
--- a/sysdeps/i386/i686/multiarch/memmove.S
+++ b/sysdeps/i386/i686/multiarch/memmove.S
@@ -35,6 +35,11 @@ ENTRY(memmove)
 	jne	1f
 	call	__init_cpu_features
 1:	leal	__memmove_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__memmove_sse2_unaligned@GOTOFF(%ebx), %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memmove_ssse3@GOTOFF(%ebx), %eax
@@ -63,6 +68,11 @@ ENTRY(memmove)
 	jne	1f
 	call	__init_cpu_features
 1:	leal	__memmove_ia32, %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+	jz	2f
+	leal	__memmove_sse2_unaligned, %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
+	jnz	2f
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
 	jz	2f
 	leal	__memmove_ssse3, %eax
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/memmove_chk.S
index 3442ce1..fea9b54 100644
--- a/sysdeps/i386/i686/multiarch/memmove_chk.S
+++ b/sysdeps/i386/i686/multiarch/memmove_chk.S
@@ -35,6 +35,11 @@ ENTRY(__memmove_chk)
 	jne	1f
 	call	__init_cpu_features
 1:	leal	__memmove_chk_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__memmove_chk_sse2_unaligned@GOTOFF(%ebx), %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__memmove_chk_ssse3@GOTOFF(%ebx), %eax
@@ -54,6 +59,11 @@ ENTRY(__memmove_chk)
 	jne	1f
 	call	__init_cpu_features
 1:	leal	__memmove_chk_ia32, %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features
+	jz	2f
+	leal	__memmove_chk_sse2_unaligned, %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features
+	jnz	2f
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features
 	jz	2f
 	leal	__memmove_chk_ssse3, %eax
@@ -63,6 +73,18 @@ ENTRY(__memmove_chk)
 2:	ret
 END(__memmove_chk)
 
+	.type __memmove_chk_sse2_unaligned, @function
+	.p2align 4;
+__memmove_chk_sse2_unaligned:
+	cfi_startproc
+	CALL_MCOUNT
+	movl	12(%esp), %eax
+	cmpl	%eax, 16(%esp)
+	jb	__chk_fail
+	jmp	__memmove_sse2_unaligned
+	cfi_endproc
+	.size __memmove_chk_sse2_unaligned, .-__memmove_chk_sse2_unaligned
+
 	.type __memmove_chk_ssse3, @function
 	.p2align 4;
 __memmove_chk_ssse3:
diff --git a/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S b/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S
new file mode 100644
index 0000000..a1cea50
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy-sse2-unaligned.S
@@ -0,0 +1,4 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY		__mempcpy_sse2_unaligned
+#define MEMCPY_CHK	__mempcpy_chk_sse2_unaligned
+#include "memcpy-sse2-unaligned.S"
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/mempcpy.S
index 3aab63d..fd8b82c 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy.S
@@ -36,6 +36,11 @@ ENTRY(__mempcpy)
 	jne	1f
 	call	__init_cpu_features
 1:	leal	__mempcpy_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__mempcpy_sse2_unaligned@GOTOFF(%ebx), %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__mempcpy_ssse3@GOTOFF(%ebx), %eax
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
index 5d81f4e..ed23b1b 100644
--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.S
+++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.S
@@ -36,7 +36,12 @@ ENTRY(__mempcpy_chk)
 	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
 	jne	1f
 	call	__init_cpu_features
-1:	leal	__mempcpy_chk_ia32@GOTOFF(%ebx), %eax
+	leal	__mempcpy_chk_ia32@GOTOFF(%ebx), %eax
+1:	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__mempcpy_chk_sse2_unaligned@GOTOFF(%ebx), %eax
+	testl	$bit_Fast_Unaligned_Load, FEATURE_OFFSET+index_Fast_Unaligned_Load+__cpu_features@GOTOFF(%ebx)
+	jnz	2f
 	testl	$bit_SSSE3, CPUID_OFFSET+index_SSSE3+__cpu_features@GOTOFF(%ebx)
 	jz	2f
 	leal	__mempcpy_chk_ssse3@GOTOFF(%ebx), %eax

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ad42cb249a9f2c005536993c5e2eb7a783650d0a

commit ad42cb249a9f2c005536993c5e2eb7a783650d0a
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 14:05:35 2014 -0800

    Replace %ld with %jd and cast to intmax_t

diff --git a/ChangeLog b/ChangeLog
index 5657017..147fdf8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -275,6 +275,7 @@
 	* sysdeps/pthread/tst-timer.c (main): Likewise.
 	* time/clocktest.c (main): Likewise.
 	* time/tst-posixtz.c (do_test): Likewise.
+	* timezone/tst-timezone.c (main): Likewise.
 
 2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
 
diff --git a/timezone/tst-timezone.c b/timezone/tst-timezone.c
index b5edfff..135a72f 100644
--- a/timezone/tst-timezone.c
+++ b/timezone/tst-timezone.c
@@ -126,8 +126,8 @@ main (int argc, char ** argv)
     strcpy (envstring, "TZ=Europe/London");
     putenv (envstring);
     t = mktime (&tmBuf);
-    snprintf (buf, sizeof (buf), "TZ=%s %ld %d %d %d %d %d %d %d %d %d",
-	      getenv ("TZ"), t,
+    snprintf (buf, sizeof (buf), "TZ=%s %jd %d %d %d %d %d %d %d %d %d",
+	      getenv ("TZ"), (intmax_t) t,
 	      tmBuf.tm_sec, tmBuf.tm_min, tmBuf.tm_hour,
 	      tmBuf.tm_mday, tmBuf.tm_mon, tmBuf.tm_year,
 	      tmBuf.tm_wday, tmBuf.tm_yday, tmBuf.tm_isdst);
@@ -149,8 +149,8 @@ main (int argc, char ** argv)
     strcpy (envstring, "TZ=GMT");
     /* No putenv call needed!  */
     t = mktime (&tmBuf);
-    snprintf (buf, sizeof (buf), "TZ=%s %ld %d %d %d %d %d %d %d %d %d",
-	      getenv ("TZ"), t,
+    snprintf (buf, sizeof (buf), "TZ=%s %jd %d %d %d %d %d %d %d %d %d",
+	      getenv ("TZ"), (intmax_t) t,
 	      tmBuf.tm_sec, tmBuf.tm_min, tmBuf.tm_hour,
 	      tmBuf.tm_mday, tmBuf.tm_mon, tmBuf.tm_year,
 	      tmBuf.tm_wday, tmBuf.tm_yday, tmBuf.tm_isdst);

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=42c44514d1e85f4c6454f3c31d04f12abc4d2cab

commit 42c44514d1e85f4c6454f3c31d04f12abc4d2cab
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 14:04:35 2014 -0800

    Replace %ld with %jd and cast to intmax_t

diff --git a/ChangeLog b/ChangeLog
index 5e61007..5657017 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -273,6 +273,8 @@
 	* stdio-common/tst-fmemopen2.c (do_test): Replace %ld with %jd
 	and cast to intmax_t.
 	* sysdeps/pthread/tst-timer.c (main): Likewise.
+	* time/clocktest.c (main): Likewise.
+	* time/tst-posixtz.c (do_test): Likewise.
 
 2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
 
diff --git a/time/clocktest.c b/time/clocktest.c
index f2b3ea7..2e6457d 100644
--- a/time/clocktest.c
+++ b/time/clocktest.c
@@ -28,8 +28,8 @@ main (int argc, char ** argv)
   while (!gotit);
   stop = clock ();
 
-  printf ("%ld clock ticks per second (start=%ld,stop=%ld)\n",
-	  stop - start, start, stop);
+  printf ("%jd clock ticks per second (start=%jd,stop=%jd)\n",
+	  (intmax_t) (stop - start), (intmax_t) start, (intmax_t) stop);
   printf ("CLOCKS_PER_SEC=%ld, sysconf(_SC_CLK_TCK)=%ld\n",
 	  CLOCKS_PER_SEC, sysconf(_SC_CLK_TCK));
   return 0;
diff --git a/time/tst-posixtz.c b/time/tst-posixtz.c
index c1ea267..16aa19d 100644
--- a/time/tst-posixtz.c
+++ b/time/tst-posixtz.c
@@ -39,7 +39,8 @@ do_test (void)
       char buf[100];
       struct tm *tmp;
 
-      printf ("TZ = \"%s\", time = %ld => ", tests[cnt].tz, tests[cnt].when);
+      printf ("TZ = \"%s\", time = %jd => ", tests[cnt].tz,
+	      (intmax_t) tests[cnt].when);
       fflush (stdout);
 
       setenv ("TZ", tests[cnt].tz, 1);

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9da05423f7a8d21af0175140f23e93cf12fb00fa

commit 9da05423f7a8d21af0175140f23e93cf12fb00fa
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 14:02:58 2014 -0800

    Replace %ld with %jd and cast to intmax_t

diff --git a/ChangeLog b/ChangeLog
index 9ca4aaa..5e61007 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -272,6 +272,7 @@
 	(do_test): Likewise.
 	* stdio-common/tst-fmemopen2.c (do_test): Replace %ld with %jd
 	and cast to intmax_t.
+	* sysdeps/pthread/tst-timer.c (main): Likewise.
 
 2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
 
diff --git a/sysdeps/pthread/tst-timer.c b/sysdeps/pthread/tst-timer.c
index 16adf3c..f310f6e 100644
--- a/sysdeps/pthread/tst-timer.c
+++ b/sysdeps/pthread/tst-timer.c
@@ -90,13 +90,13 @@ main (void)
 
   setvbuf (stdout, 0, _IOLBF, 0);
 
-  printf ("clock_gettime returned %d, timespec = { %ld, %ld }\n",
-	  retval, ts.tv_sec, ts.tv_nsec);
+  printf ("clock_gettime returned %d, timespec = { %jd, %jd }\n",
+	  retval, (intmax_t) ts.tv_sec, (intmax_t) ts.tv_nsec);
 
   retval = clock_getres (CLOCK_REALTIME, &ts);
 
-  printf ("clock_getres returned %d, timespec = { %ld, %ld }\n",
-	  retval, ts.tv_sec, ts.tv_nsec);
+  printf ("clock_getres returned %d, timespec = { %jd, %jd }\n",
+	  retval, (intmax_t) ts.tv_sec, (intmax_t) ts.tv_nsec);
 
   if (timer_create (CLOCK_REALTIME, &sigev1, &timer_sig) != 0)
     {

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=894e321ef3bb8ab2777b2b6a6bac3a6a8f0b0839

commit 894e321ef3bb8ab2777b2b6a6bac3a6a8f0b0839
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 14:01:40 2014 -0800

    Replace %ld with %jd and cast to intmax_t

diff --git a/ChangeLog b/ChangeLog
index bdf409a..9ca4aaa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -270,6 +270,8 @@
 	* rt/tst-timer4.c: Include <stdint.h>.
 	(check_ts): Replace %ld with %jd and cast to intmax_t.
 	(do_test): Likewise.
+	* stdio-common/tst-fmemopen2.c (do_test): Replace %ld with %jd
+	and cast to intmax_t.
 
 2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
 
diff --git a/stdio-common/tst-fmemopen2.c b/stdio-common/tst-fmemopen2.c
index c2a4baa..e9d8b63 100644
--- a/stdio-common/tst-fmemopen2.c
+++ b/stdio-common/tst-fmemopen2.c
@@ -21,14 +21,15 @@ do_test (void)
   off_t o = ftello (fp);
   if (o != nstr)
     {
-      printf ("first ftello returned %ld, expected %zu\n", o, nstr);
+      printf ("first ftello returned %jd, expected %zu\n",
+	      (intmax_t) o, nstr);
       result = 1;
     }
   rewind (fp);
   o = ftello (fp);
   if (o != 0)
     {
-      printf ("second ftello returned %ld, expected 0\n", o);
+      printf ("second ftello returned %jd, expected 0\n", (intmax_t) o);
       result = 1;
     }
   if (fseeko (fp, 0, SEEK_END) != 0)
@@ -39,7 +40,8 @@ do_test (void)
   o = ftello (fp);
   if (o != nstr)
     {
-      printf ("third ftello returned %ld, expected %zu\n", o, nstr);
+      printf ("third ftello returned %jd, expected %zu\n",
+	      (intmax_t) o, nstr);
       result = 1;
     }
   rewind (fp);
@@ -50,7 +52,8 @@ do_test (void)
   o = ftello (fp);
   if (o != nstr2)
     {
-      printf ("fourth ftello returned %ld, expected %zu\n", o, nstr2);
+      printf ("fourth ftello returned %jd, expected %zu\n",
+	      (intmax_t) o, nstr2);
       result = 1;
     }
   fclose (fp);

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=7945b1ba325ed972f0c2283ed88a29088de6b3d0

commit 7945b1ba325ed972f0c2283ed88a29088de6b3d0
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 14:00:59 2014 -0800

    Replace %ld/%lu with %jd/%ju and cast to intmax_t/uintmax_t

diff --git a/ChangeLog b/ChangeLog
index f355ce6..bdf409a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -256,6 +256,20 @@
 	(do_test): Replace %ld with %jd and cast to intmax_t.
 	* posix/tst-regex.c (run_test): Likewise.
 	(run_test_backwards): Likewise.
+	* rt/tst-clock.c: Include <stdint.h>.
+	(clock_test): Replace %ld with %jd and cast to intmax_t.
+	* rt/tst-cpuclock1.c: Include <stdint.h>.
+	(do_test): Replace %lu with %ju and cast to uintmax_t.
+	* rt/tst-cpuclock2.c: Include <stdint.h>.
+	(do_test): Replace %lu with %ju and cast to uintmax_t.
+	* rt/tst-mqueue1.c: Include <stdint.h>.
+	(check_attrs): Replace %ld with %jd and cast to intmax_t.
+	* rt/tst-mqueue3.c (fct): Replace %ld with %jd and cast to
+	intmax_t.
+	* rt/tst-mqueue4.c (do_test): Likewise.
+	* rt/tst-timer4.c: Include <stdint.h>.
+	(check_ts): Replace %ld with %jd and cast to intmax_t.
+	(do_test): Likewise.
 
 2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
 
diff --git a/rt/tst-clock.c b/rt/tst-clock.c
index f6133f5..1cfdcfe 100644
--- a/rt/tst-clock.c
+++ b/rt/tst-clock.c
@@ -20,6 +20,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
+#include <stdint.h>
 
 
 /* We want to see output immediately.  */
@@ -51,8 +52,8 @@ clock_test (clockid_t cl)
 	  result = 1;
 	}
       else
-	printf ("clock %d: resolution = %ld.%09ld secs\n",
-		cl, ts.tv_sec, ts.tv_nsec);
+	printf ("clock %d: resolution = %jd.%09jd secs\n",
+		cl, (intmax_t) ts.tv_sec, (intmax_t) ts.tv_nsec);
     }
   else
     {
@@ -76,8 +77,8 @@ clock_test (clockid_t cl)
 	    }
 	  else
 	    {
-	      printf ("clock %d: time = %ld.%09ld secs\n",
-		      cl, ts.tv_sec, ts.tv_nsec);
+	      printf ("clock %d: time = %jd.%09jd secs\n",
+		      cl, (intmax_t) ts.tv_sec, (intmax_t) ts.tv_nsec);
 
 	      if (memcmp (&ts, &old_ts, sizeof ts) == 0)
 		{
diff --git a/rt/tst-cpuclock1.c b/rt/tst-cpuclock1.c
index f503bc2..ef140fd 100644
--- a/rt/tst-cpuclock1.c
+++ b/rt/tst-cpuclock1.c
@@ -24,6 +24,7 @@
 #include <string.h>
 #include <errno.h>
 #include <signal.h>
+#include <stdint.h>
 #include <sys/wait.h>
 
 /* This function is intended to rack up both user and system time.  */
@@ -119,8 +120,9 @@ do_test (void)
       result = 1;
       goto done;
     }
-  printf ("live PID %d clock %lx resolution %lu.%.9lu\n",
-	  child, (unsigned long int) child_clock, res.tv_sec, res.tv_nsec);
+  printf ("live PID %d clock %lx resolution %ju.%.9ju\n",
+	  child, (unsigned long int) child_clock,
+	  (uintmax_t) res.tv_sec, (uintmax_t) res.tv_nsec);
 
   struct timespec before, after;
   if (clock_gettime (child_clock, &before) < 0)
@@ -131,8 +133,8 @@ do_test (void)
       goto done;
     }
   /* Should be close to 0.0.  */
-  printf ("live PID %d before sleep => %lu.%.9lu\n",
-	  child, before.tv_sec, before.tv_nsec);
+  printf ("live PID %d before sleep => %ju.%.9ju\n",
+	  child, (uintmax_t) before.tv_sec, (uintmax_t) before.tv_nsec);
 
   struct timespec sleeptime = { .tv_nsec = 500000000 };
   if (nanosleep (&sleeptime, NULL) != 0)
@@ -150,8 +152,8 @@ do_test (void)
       goto done;
     }
   /* Should be close to 0.5.  */
-  printf ("live PID %d after sleep => %lu.%.9lu\n",
-	  child, after.tv_sec, after.tv_nsec);
+  printf ("live PID %d after sleep => %ju.%.9ju\n",
+	  child, (uintmax_t) after.tv_sec, (uintmax_t) after.tv_nsec);
 
   struct timespec diff = { .tv_sec = after.tv_sec - before.tv_sec,
 			   .tv_nsec = after.tv_nsec - before.tv_nsec };
@@ -164,8 +166,8 @@ do_test (void)
       || diff.tv_nsec > 600000000
       || diff.tv_nsec < 100000000)
     {
-      printf ("before - after %lu.%.9lu outside reasonable range\n",
-	      diff.tv_sec, diff.tv_nsec);
+      printf ("before - after %ju.%.9ju outside reasonable range\n",
+	      (uintmax_t) diff.tv_sec, (uintmax_t) diff.tv_nsec);
       result = 1;
     }
 
@@ -203,8 +205,8 @@ do_test (void)
 	      || d.tv_nsec < sleeptime.tv_nsec
 	      || d.tv_nsec > sleeptime.tv_nsec * 2)
 	    {
-	      printf ("nanosleep time %lu.%.9lu outside reasonable range\n",
-		      d.tv_sec, d.tv_nsec);
+	      printf ("nanosleep time %ju.%.9ju outside reasonable range\n",
+		      (uintmax_t) d.tv_sec, (uintmax_t) d.tv_nsec);
 	      result = 1;
 	    }
 	}
@@ -236,8 +238,8 @@ do_test (void)
       goto done;
     }
   /* Should be close to 0.6.  */
-  printf ("dead PID %d => %lu.%.9lu\n",
-	  child, dead.tv_sec, dead.tv_nsec);
+  printf ("dead PID %d => %ju.%.9ju\n",
+	  child, (uintmax_t) dead.tv_sec, (uintmax_t) dead.tv_nsec);
 
   diff.tv_sec = dead.tv_sec - after.tv_sec;
   diff.tv_nsec = dead.tv_nsec - after.tv_nsec;
@@ -248,8 +250,8 @@ do_test (void)
     }
   if (diff.tv_sec != 0 || diff.tv_nsec > 200000000)
     {
-      printf ("dead - after %lu.%.9lu outside reasonable range\n",
-	      diff.tv_sec, diff.tv_nsec);
+      printf ("dead - after %ju.%.9ju outside reasonable range\n",
+	      (uintmax_t) diff.tv_sec, (uintmax_t) diff.tv_nsec);
       result = 1;
     }
 
@@ -265,9 +267,9 @@ do_test (void)
 
   if (clock_gettime (child_clock, &dead) == 0)
     {
-      printf ("clock_gettime on reaped PID %d clock %lx => %lu%.9lu\n",
+      printf ("clock_gettime on reaped PID %d clock %lx => %ju%.9ju\n",
 	      child, (unsigned long int) child_clock,
-	      dead.tv_sec, dead.tv_nsec);
+	      (uintmax_t) dead.tv_sec, (uintmax_t) dead.tv_nsec);
       result = 1;
     }
   else
@@ -280,9 +282,9 @@ do_test (void)
 
   if (clock_getres (child_clock, &dead) == 0)
     {
-      printf ("clock_getres on reaped PID %d clock %lx => %lu%.9lu\n",
+      printf ("clock_getres on reaped PID %d clock %lx => %ju%.9ju\n",
 	      child, (unsigned long int) child_clock,
-	      dead.tv_sec, dead.tv_nsec);
+	      (uintmax_t) dead.tv_sec, (uintmax_t) dead.tv_nsec);
       result = 1;
     }
   else
diff --git a/rt/tst-cpuclock2.c b/rt/tst-cpuclock2.c
index 8a447e1..caaa94f 100644
--- a/rt/tst-cpuclock2.c
+++ b/rt/tst-cpuclock2.c
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <unistd.h>
+#include <stdint.h>
 
 #if (_POSIX_THREADS - 0) <= 0
 
@@ -221,8 +222,9 @@ do_test (void)
       result = 1;
       return 1;
     }
-  printf ("live thread clock %lx resolution %lu.%.9lu\n",
-	  (unsigned long int) th_clock, res.tv_sec, res.tv_nsec);
+  printf ("live thread clock %lx resolution %ju.%.9ju\n",
+	  (unsigned long int) th_clock,
+	  (uintmax_t) res.tv_sec, (uintmax_t) res.tv_nsec);
 
   struct timespec process_before, process_after;
   if (clock_gettime (process_clock, &process_before) < 0)
@@ -239,8 +241,8 @@ do_test (void)
 	      (unsigned long int) th_clock, strerror (errno));
       return 1;
     }
-  printf ("live thread before sleep => %lu.%.9lu\n",
-	  before.tv_sec, before.tv_nsec);
+  printf ("live thread before sleep => %ju.%.9ju\n",
+	  (uintmax_t) before.tv_sec, (uintmax_t) before.tv_nsec);
 
   struct timespec me_before, me_after;
   if (clock_gettime (my_thread_clock, &me_before) < 0)
@@ -249,8 +251,8 @@ do_test (void)
 	      (unsigned long int) my_thread_clock, strerror (errno));
       return 1;
     }
-  printf ("self thread before sleep => %lu.%.9lu\n",
-	  me_before.tv_sec, me_before.tv_nsec);
+  printf ("self thread before sleep => %ju.%.9ju\n",
+	  (uintmax_t) me_before.tv_sec, (uintmax_t) me_before.tv_nsec);
 
   struct timespec sleeptime = { .tv_nsec = 500000000 };
   if (nanosleep (&sleeptime, NULL) != 0)
@@ -265,8 +267,8 @@ do_test (void)
 	      (unsigned long int) th_clock, strerror (errno));
       return 1;
     }
-  printf ("live thread after sleep => %lu.%.9lu\n",
-	  after.tv_sec, after.tv_nsec);
+  printf ("live thread after sleep => %ju.%.9ju\n",
+	  (uintmax_t) after.tv_sec, (uintmax_t) after.tv_nsec);
 
   if (clock_gettime (process_clock, &process_after) < 0)
     {
@@ -281,8 +283,8 @@ do_test (void)
 	      (unsigned long int) my_thread_clock, strerror (errno));
       return 1;
     }
-  printf ("self thread after sleep => %lu.%.9lu\n",
-	  me_after.tv_sec, me_after.tv_nsec);
+  printf ("self thread after sleep => %ju.%.9ju\n",
+	  (uintmax_t) me_after.tv_sec, (uintmax_t) me_after.tv_nsec);
 
   unsigned long long int th_diff = tsdiff (&before, &after);
   unsigned long long int pdiff = tsdiff (&process_before, &process_after);
diff --git a/rt/tst-mqueue1.c b/rt/tst-mqueue1.c
index c242c37..e310fb3 100644
--- a/rt/tst-mqueue1.c
+++ b/rt/tst-mqueue1.c
@@ -26,6 +26,7 @@
 #include <sys/wait.h>
 #include <time.h>
 #include <unistd.h>
+#include <stdint.h>
 #include "tst-mqueue.h"
 
 static int
@@ -46,20 +47,21 @@ check_attrs (struct mq_attr *attr, int nonblock, long cnt)
   if (attr->mq_maxmsg != 10 || attr->mq_msgsize != 1)
     {
       printf ("attributes don't match those passed to mq_open\n"
-	      "mq_maxmsg %ld, mq_msgsize %ld\n",
-	      attr->mq_maxmsg, attr->mq_msgsize);
+	      "mq_maxmsg %jd, mq_msgsize %jd\n",
+	      (intmax_t) attr->mq_maxmsg, (intmax_t) attr->mq_msgsize);
       result = 1;
     }
 
   if ((attr->mq_flags & O_NONBLOCK) != nonblock)
     {
-      printf ("mq_flags %lx != %x\n", (attr->mq_flags & O_NONBLOCK), nonblock);
+      printf ("mq_flags %jx != %x\n",
+	      (intmax_t) (attr->mq_flags & O_NONBLOCK), nonblock);
       result = 1;
     }
 
   if (attr->mq_curmsgs != cnt)
     {
-      printf ("mq_curmsgs %ld != %ld\n", attr->mq_curmsgs, cnt);
+      printf ("mq_curmsgs %jd != %ld\n", (intmax_t) attr->mq_curmsgs, cnt);
       result = 1;
     }
 
diff --git a/rt/tst-mqueue3.c b/rt/tst-mqueue3.c
index 4607e1b..c7270a1 100644
--- a/rt/tst-mqueue3.c
+++ b/rt/tst-mqueue3.c
@@ -61,14 +61,14 @@ fct (union sigval s)
   /* Check the values.  */
   if (attr.mq_maxmsg != MAXMSG)
     {
-      printf ("%s: mq_maxmsg wrong: is %ld, expecte %d\n",
-	      __FUNCTION__, attr.mq_maxmsg, MAXMSG);
+      printf ("%s: mq_maxmsg wrong: is %jd, expecte %d\n",
+	      __FUNCTION__, (intmax_t) attr.mq_maxmsg, MAXMSG);
       exit (1);
     }
   if (attr.mq_msgsize != MAXMSG)
     {
-      printf ("%s: mq_msgsize wrong: is %ld, expecte %d\n",
-	      __FUNCTION__, attr.mq_msgsize, MSGSIZE);
+      printf ("%s: mq_msgsize wrong: is %jd, expecte %d\n",
+	      __FUNCTION__, (intmax_t) attr.mq_msgsize, MSGSIZE);
       exit (1);
     }
 
diff --git a/rt/tst-mqueue4.c b/rt/tst-mqueue4.c
index 60603be..d6733c2 100644
--- a/rt/tst-mqueue4.c
+++ b/rt/tst-mqueue4.c
@@ -139,9 +139,10 @@ do_test (void)
       || attr.mq_msgsize != 2
       || attr.mq_curmsgs != 2)
     {
-      printf ("mq_getattr returned unexpected { .mq_flags = %ld,\n"
-	      ".mq_maxmsg = %ld, .mq_msgsize = %ld, .mq_curmsgs = %ld }\n",
-	      attr.mq_flags, attr.mq_maxmsg, attr.mq_msgsize, attr.mq_curmsgs);
+      printf ("mq_getattr returned unexpected { .mq_flags = %jd,\n"
+	      ".mq_maxmsg = %jd, .mq_msgsize = %jd, .mq_curmsgs = %jd }\n",
+	      (intmax_t) attr.mq_flags, (intmax_t) attr.mq_maxmsg,
+	      (intmax_t) attr.mq_msgsize, (intmax_t) attr.mq_curmsgs);
       result = 1;
     }
 
@@ -215,9 +216,10 @@ do_test (void)
       || attr.mq_msgsize != 2
       || attr.mq_curmsgs != 1)
     {
-      printf ("mq_getattr returned unexpected { .mq_flags = %ld,\n"
-	      ".mq_maxmsg = %ld, .mq_msgsize = %ld, .mq_curmsgs = %ld }\n",
-	      attr.mq_flags, attr.mq_maxmsg, attr.mq_msgsize, attr.mq_curmsgs);
+      printf ("mq_getattr returned unexpected { .mq_flags = %jd,\n"
+	      ".mq_maxmsg = %jd, .mq_msgsize = %jd, .mq_curmsgs = %jd }\n",
+	      (intmax_t) attr.mq_flags, (intmax_t) attr.mq_maxmsg,
+	      (intmax_t) attr.mq_msgsize, (intmax_t) attr.mq_curmsgs);
       result = 1;
     }
 
diff --git a/rt/tst-timer4.c b/rt/tst-timer4.c
index f64fadd..a397663 100644
--- a/rt/tst-timer4.c
+++ b/rt/tst-timer4.c
@@ -23,6 +23,7 @@
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
+#include <stdint.h>
 #if _POSIX_THREADS
 # include <pthread.h>
 
@@ -168,9 +169,9 @@ check_ts (const char *name, const struct timespec *start,
       || (end->tv_sec == ts.tv_sec && end->tv_nsec < ts.tv_nsec))
     {
       printf ("\
-*** timer %s invoked too soon: %ld.%09ld instead of expected %ld.%09ld\n",
-	      name, (long) end->tv_sec, end->tv_nsec,
-	      (long) ts.tv_sec, ts.tv_nsec);
+*** timer %s invoked too soon: %ld.%09jd instead of expected %ld.%09jd\n",
+	      name, (long) end->tv_sec, (intmax_t) end->tv_nsec,
+	      (long) ts.tv_sec, (intmax_t) ts.tv_nsec);
       return 1;
     }
   else
@@ -200,8 +201,8 @@ do_test (void)
       result = 1;
     }
   else
-    printf ("clock_gettime returned timespec = { %ld, %ld }\n",
-	    (long) ts.tv_sec, ts.tv_nsec);
+    printf ("clock_gettime returned timespec = { %ld, %jd }\n",
+	    (long) ts.tv_sec, (intmax_t) ts.tv_nsec);
 
   if (clock_getres (TEST_CLOCK, &ts) != 0)
     {
@@ -209,8 +210,8 @@ do_test (void)
       result = 1;
     }
   else
-    printf ("clock_getres returned timespec = { %ld, %ld }\n",
-	    (long) ts.tv_sec, ts.tv_nsec);
+    printf ("clock_getres returned timespec = { %ld, %jd }\n",
+	    (long) ts.tv_sec, (intmax_t) ts.tv_nsec);
 
   struct sigevent ev;
   memset (&ev, 0x11, sizeof (ev));
@@ -488,9 +489,9 @@ do_test (void)
 	   || it.it_interval.tv_sec || it.it_interval.tv_nsec)
     {
       printf ("\
-*** timer_gettime timer_none returned { %ld.%09ld, %ld.%09ld }\n",
-	      (long) it.it_value.tv_sec, it.it_value.tv_nsec,
-	      (long) it.it_interval.tv_sec, it.it_interval.tv_nsec);
+*** timer_gettime timer_none returned { %ld.%09jd, %ld.%09jd }\n",
+	      (long) it.it_value.tv_sec, (intmax_t) it.it_value.tv_nsec,
+	      (long) it.it_interval.tv_sec, (intmax_t) it.it_interval.tv_nsec);
       result = 1;
     }
 
@@ -603,8 +604,8 @@ do_test (void)
   else if (it.it_interval.tv_sec || it.it_interval.tv_nsec != 100000000)
     {
       printf ("\
-!!! second timer_gettime timer_none returned it_interval %ld.%09ld\n",
-	      (long) it.it_interval.tv_sec, it.it_interval.tv_nsec);
+!!! second timer_gettime timer_none returned it_interval %ld.%09jd\n",
+	      (long) it.it_interval.tv_sec, (intmax_t) it.it_interval.tv_nsec);
       /* FIXME: For now disabled.
       result = 1; */
     }

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0a253b22546a24b9f286626a7f5d0810cc631134

commit 0a253b22546a24b9f286626a7f5d0810cc631134
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 13:52:08 2014 -0800

    Replace %ld with %jd and cast to intmax_t

diff --git a/ChangeLog b/ChangeLog
index e6959f9..f355ce6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -254,6 +254,8 @@
 	* libio/tst-ftell-active-handler.c (do_append_test): Likewise.
 	* nptl/tst-mutex5.c: Include <stdint.h>.
 	(do_test): Replace %ld with %jd and cast to intmax_t.
+	* posix/tst-regex.c (run_test): Likewise.
+	(run_test_backwards): Likewise.
 
 2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
 
diff --git a/posix/tst-regex.c b/posix/tst-regex.c
index 7df0bd1..0e086ab 100644
--- a/posix/tst-regex.c
+++ b/posix/tst-regex.c
@@ -274,8 +274,8 @@ run_test (const char *expr, const char *mem, size_t memlen, int icase,
 	      finish.tv_sec -= start.tv_sec;
 	    }
 
-	  printf ("elapsed time: %ld.%09ld sec\n",
-		  finish.tv_sec, finish.tv_nsec);
+	  printf ("elapsed time: %jd.%09jd sec\n",
+		  (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec);
 	}
     }
 
@@ -327,8 +327,8 @@ run_test (const char *expr, const char *mem, size_t memlen, int icase,
 		mintime = finish;
 	    }
 	}
-      printf ("elapsed time: %ld.%09ld sec\n",
-	      mintime.tv_sec, mintime.tv_nsec);
+      printf ("elapsed time: %jd.%09jd sec\n",
+	      (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec);
     }
 #endif
 
@@ -420,8 +420,8 @@ run_test_backwards (const char *expr, const char *mem, size_t memlen,
 	      finish.tv_sec -= start.tv_sec;
 	    }
 
-	  printf ("elapsed time: %ld.%09ld sec\n",
-		  finish.tv_sec, finish.tv_nsec);
+	  printf ("elapsed time: %jd.%09jd sec\n",
+		  (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec);
 	}
     }
 
@@ -489,8 +489,8 @@ run_test_backwards (const char *expr, const char *mem, size_t memlen,
 		mintime = finish;
 	    }
 	}
-      printf ("elapsed time: %ld.%09ld sec\n",
-	      mintime.tv_sec, mintime.tv_nsec);
+      printf ("elapsed time: %jd.%09jd sec\n",
+	      (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec);
     }
 #endif
 

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0d50b0de969c1eac165a5eb1e1cbd96488edc38e

commit 0d50b0de969c1eac165a5eb1e1cbd96488edc38e
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 13:51:13 2014 -0800

    Replace %ld with %jd and cast to intmax_t

diff --git a/ChangeLog b/ChangeLog
index 845cc5f..e6959f9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -252,6 +252,8 @@
 	* io/test-utime.c (main): Replace %ld with %jd and cast to
 	intmax_t.
 	* libio/tst-ftell-active-handler.c (do_append_test): Likewise.
+	* nptl/tst-mutex5.c: Include <stdint.h>.
+	(do_test): Replace %ld with %jd and cast to intmax_t.
 
 2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
 
diff --git a/nptl/tst-mutex5.c b/nptl/tst-mutex5.c
index 6988840..48e1ea8 100644
--- a/nptl/tst-mutex5.c
+++ b/nptl/tst-mutex5.c
@@ -22,6 +22,7 @@
 #include <time.h>
 #include <unistd.h>
 #include <sys/time.h>
+#include <stdint.h>
 #include <config.h>
 
 
@@ -132,8 +133,8 @@ do_test (void)
 
       if (tv2.tv_sec < 2)
 	{
-	  printf ("premature timeout: %ld.%06ld difference\n",
-		  tv2.tv_sec, tv2.tv_usec);
+	  printf ("premature timeout: %jd.%06jd difference\n",
+		  (intmax_t) tv2.tv_sec, (intmax_t) tv2.tv_usec);
 	  return 1;
 	}
     }

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=153a21b224ea507aa2a1995e704ff7eac8dda57d

commit 153a21b224ea507aa2a1995e704ff7eac8dda57d
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 13:49:58 2014 -0800

    Replace %ld with %jd and cast to intmax_t

diff --git a/ChangeLog b/ChangeLog
index a338a4c..845cc5f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -251,6 +251,7 @@
 	[BZ #17732]
 	* io/test-utime.c (main): Replace %ld with %jd and cast to
 	intmax_t.
+	* libio/tst-ftell-active-handler.c (do_append_test): Likewise.
 
 2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
 
diff --git a/libio/tst-ftell-active-handler.c b/libio/tst-ftell-active-handler.c
index 7be75bc..186aec6 100644
--- a/libio/tst-ftell-active-handler.c
+++ b/libio/tst-ftell-active-handler.c
@@ -574,23 +574,23 @@ do_append_test (const char *filename)
 
   if (seek_ret != new_seek_ret)
     {
-      printf ("incorrectly modified file offset to %ld, should be %ld",
-	      new_seek_ret, seek_ret);
+      printf ("incorrectly modified file offset to %jd, should be %jd",
+	      (intmax_t)  new_seek_ret, (intmax_t) seek_ret);
       ret |= 1;
     }
   else
-    printf ("retained current file offset %ld", seek_ret);
+    printf ("retained current file offset %jd", (intmax_t) seek_ret);
 
   new_seek_ret = ftello (fp);
 
   if (seek_ret != new_seek_ret)
     {
-      printf (", ftello reported incorrect offset %ld, should be %ld\n",
-	      new_seek_ret, seek_ret);
+      printf (", ftello reported incorrect offset %jd, should be %jd\n",
+	      (intmax_t) new_seek_ret, (intmax_t) seek_ret);
       ret |= 1;
     }
   else
-    printf (", ftello reported correct offset %ld\n", seek_ret);
+    printf (", ftello reported correct offset %jd\n", (intmax_t) seek_ret);
 
   fclose (fp);
 

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=75e20e33689b13ca0290802f6797ec8ed5277dee

commit 75e20e33689b13ca0290802f6797ec8ed5277dee
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 13:48:40 2014 -0800

    Replace %ld with %jd and cast to intmax_t

diff --git a/ChangeLog b/ChangeLog
index d5aeddc..a338a4c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -248,6 +248,12 @@
 
 2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
 
+	[BZ #17732]
+	* io/test-utime.c (main): Replace %ld with %jd and cast to
+	intmax_t.
+
+2014-12-19  H.J. Lu  <hongjiu.lu@intel.com>
+
 	* sysdeps/x86_64/x32/Makefile (CFLAGS-s_llround.c): Replace
 	-Wno-error with -fno-builtin-lround.
 
diff --git a/io/test-utime.c b/io/test-utime.c
index 26a5464..afb8a29 100644
--- a/io/test-utime.c
+++ b/io/test-utime.c
@@ -109,25 +109,29 @@ main (int argc, char *argv[])
 
   if (st.st_mtime != ut.modtime)
     {
-      printf ("modtime %ld != %ld\n", st.st_mtime, ut.modtime);
+      printf ("modtime %jd != %jd\n",
+	      (intmax_t) st.st_mtime, (intmax_t) ut.modtime);
       return 1;
     }
 
   if (st.st_atime != ut.actime)
     {
-      printf ("actime %ld != %ld\n", st.st_atime, ut.actime);
+      printf ("actime %jd != %jd\n",
+	      (intmax_t) st.st_atime, (intmax_t) ut.actime);
       return 1;
     }
 
   if (stnow.st_mtime < now1 || stnow.st_mtime > now2)
     {
-      printf ("modtime %ld <%ld >%ld\n", stnow.st_mtime, now1, now2);
+      printf ("modtime %jd <%jd >%jd\n",
+	      (intmax_t) stnow.st_mtime, (intmax_t) now1, (intmax_t) now2);
       return 1;
     }
 
   if (stnow.st_atime < now1 || stnow.st_atime > now2)
     {
-      printf ("actime %ld <%ld >%ld\n", stnow.st_atime, now1, now2);
+      printf ("actime %jd <%jd >%jd\n",
+	      (intmax_t) stnow.st_atime, (intmax_t) now1, (intmax_t) now2);
       return 1;
     }
 

http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=307629b06e290654cbd797b4976df5432670591a

commit 307629b06e290654cbd797b4976df5432670591a
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Dec 19 06:30:31 2014 -0800

    Compile vismain with -fPIC and link with -pie

diff --git a/elf/Makefile b/elf/Makefile
index 9e07073..4563421 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -130,7 +130,7 @@ endif
 tests += $(tests-static)
 ifeq (yes,$(build-shared))
 tests += loadtest restest1 preloadtest loadfail multiload origtest resolvfail \
-	 constload1 order vismain noload filter unload \
+	 constload1 order noload filter unload \
 	 reldep reldep2 reldep3 reldep4 nodelete nodelete2 \
 	 nodlopen nodlopen2 neededtest neededtest2 \
 	 neededtest3 neededtest4 unload2 lateglobal initfirst global \
@@ -218,6 +218,9 @@ ifeq (yesyes,$(have-fpie)$(build-shared))
 modules-names += tst-piemod1
 tests += tst-pie1 tst-pie2
 tests-pie += tst-pie1 tst-pie2
+tests += vismain
+tests-pie += vismain
+CFLAGS-vismain.c = $(pic-ccflag)
 endif
 modules-execstack-yes = tst-execstack-mod
 extra-test-objs += $(addsuffix .os,$(strip $(modules-names)))

-----------------------------------------------------------------------


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]