This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
GNU C Library master sources branch master updated. glibc-2.18-272-ge7044ea

From: neleai at sourceware dot org
To: glibc-cvs at sourceware dot org
Date: 8 Oct 2013 13:47:22 -0000
Subject: GNU C Library master sources branch master updated. glibc-2.18-272-ge7044ea
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  e7044ea76bd95f8adc0eab0b2bdcab7f51055b48 (commit)
      from  41500766f71fd072b6b6a9e4603fb7f85bddcfe2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=e7044ea76bd95f8adc0eab0b2bdcab7f51055b48

commit e7044ea76bd95f8adc0eab0b2bdcab7f51055b48
Author: OndÅ?ej BÃlka <neleai@seznam.cz>
Date:   Tue Oct 8 15:46:48 2013 +0200

    Use p2align instead ALIGN

diff --git a/ChangeLog b/ChangeLog
index 297ff42..8a39723 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2013-10-08  OndÅ?ej BÃlka  <neleai@seznam.cz>
+
+	* sysdeps/x86_64/memset.S (ALIGN): Macro removed.
+	Use .p2align directive instead, throughout.
+	* sysdeps/x86_64/multiarch/memcmp-sse4.S: Likewise.
+	* sysdeps/x86_64/multiarch/memcmp-ssse3.S: Likewise.
+	* sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: Likewise.
+	* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise.
+	* sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise.
+	* sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S: Likewise.
+	* sysdeps/x86_64/strchr.S: Likewise.
+	* sysdeps/x86_64/strrchr.S: Likewise.
+
 2013-10-08  Siddhesh Poyarekar  <siddhesh@redhat.com>
 
 	* sysdeps/ieee754/dbl-64/e_pow.c: Fix code formatting.
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index 6c69f4b..9b1de89 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -19,10 +19,6 @@
 
 #include <sysdep.h>
 
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
-
 	.text
 #if !defined NOT_IN_libc
 ENTRY(__bzero)
@@ -71,12 +67,12 @@ L(entry_from_bzero):
 L(return):
 	rep
 	ret
-	ALIGN (4)
+	.p2align 4
 L(between_32_64_bytes):
 	movdqu	%xmm8, 16(%rdi)
 	movdqu	%xmm8, -32(%rdi,%rdx)
 	ret
-	ALIGN (4)
+	.p2align 4
 L(loop_start):
 	leaq	64(%rdi), %rcx
 	movdqu	%xmm8, (%rdi)
@@ -92,7 +88,7 @@ L(loop_start):
 	andq	$-64, %rdx
 	cmpq	%rdx, %rcx
 	je	L(return)
-	ALIGN (4)
+	.p2align 4
 L(loop):
 	movdqa	%xmm8, (%rcx)
 	movdqa	%xmm8, 16(%rcx)
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 1ed4200..d7b147e 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -25,10 +25,6 @@
 #  define MEMCMP	__memcmp_sse4_1
 # endif
 
-# ifndef ALIGN
-#  define ALIGN(n)	.p2align n
-# endif
-
 # define JMPTBL(I, B)	(I - B)
 
 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
@@ -60,7 +56,7 @@ ENTRY (MEMCMP)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
 # ifndef USE_AS_WMEMCMP
-	ALIGN (4)
+	.p2align 4
 L(firstbyte):
 	movzbl	(%rdi), %eax
 	movzbl	(%rsi), %ecx
@@ -68,7 +64,7 @@ L(firstbyte):
 	ret
 # endif
 
-	ALIGN (4)
+	.p2align 4
 L(79bytesormore):
 	movdqu	(%rsi), %xmm1
 	movdqu	(%rdi), %xmm2
@@ -316,7 +312,7 @@ L(less32bytesin256):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(512bytesormore):
 # ifdef DATA_CACHE_SIZE_HALF
 	mov	$DATA_CACHE_SIZE_HALF, %R8_LP
@@ -329,7 +325,7 @@ L(512bytesormore):
 	cmp	%r8, %rdx
 	ja	L(L2_L3_cache_unaglined)
 	sub	$64, %rdx
-	ALIGN (4)
+	.p2align 4
 L(64bytesormore_loop):
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
@@ -361,7 +357,7 @@ L(64bytesormore_loop):
 
 L(L2_L3_cache_unaglined):
 	sub	$64, %rdx
-	ALIGN (4)
+	.p2align 4
 L(L2_L3_unaligned_128bytes_loop):
 	prefetchnta 0x1c0(%rdi)
 	prefetchnta 0x1c0(%rsi)
@@ -396,7 +392,7 @@ L(L2_L3_unaligned_128bytes_loop):
 /*
  * This case is for machines which are sensitive for unaligned instructions.
  */
-	ALIGN (4)
+	.p2align 4
 L(2aligned):
 	cmp	$128, %rdx
 	ja	L(128bytesormorein2aligned)
@@ -444,7 +440,7 @@ L(less32bytesin64in2alinged):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(128bytesormorein2aligned):
 	cmp	$512, %rdx
 	ja	L(512bytesormorein2aligned)
@@ -519,7 +515,7 @@ L(less32bytesin128in2aligned):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(256bytesormorein2aligned):
 
 	sub	$256, %rdx
@@ -632,7 +628,7 @@ L(less32bytesin256in2alinged):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(512bytesormorein2aligned):
 # ifdef DATA_CACHE_SIZE_HALF
 	mov	$DATA_CACHE_SIZE_HALF, %R8_LP
@@ -646,7 +642,7 @@ L(512bytesormorein2aligned):
 	ja	L(L2_L3_cache_aglined)
 
 	sub	$64, %rdx
-	ALIGN (4)
+	.p2align 4
 L(64bytesormore_loopin2aligned):
 	movdqa	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
@@ -678,7 +674,7 @@ L(64bytesormore_loopin2aligned):
 L(L2_L3_cache_aglined):
 	sub	$64, %rdx
 
-	ALIGN (4)
+	.p2align 4
 L(L2_L3_aligned_128bytes_loop):
 	prefetchnta 0x1c0(%rdi)
 	prefetchnta 0x1c0(%rsi)
@@ -711,7 +707,7 @@ L(L2_L3_aligned_128bytes_loop):
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
 
-	ALIGN (4)
+	.p2align 4
 L(64bytesormore_loop_end):
 	add	$16, %rdi
 	add	$16, %rsi
@@ -806,7 +802,7 @@ L(8bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(12bytes):
 	mov	-12(%rdi), %rax
 	mov	-12(%rsi), %rcx
@@ -827,7 +823,7 @@ L(0bytes):
 
 # ifndef USE_AS_WMEMCMP
 /* unreal case for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(65bytes):
 	movdqu	-65(%rdi), %xmm1
 	movdqu	-65(%rsi), %xmm2
@@ -864,7 +860,7 @@ L(9bytes):
 	sub	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(13bytes):
 	mov	-13(%rdi), %rax
 	mov	-13(%rsi), %rcx
@@ -877,7 +873,7 @@ L(13bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(5bytes):
 	mov	-5(%rdi), %eax
 	mov	-5(%rsi), %ecx
@@ -888,7 +884,7 @@ L(5bytes):
 	sub	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(66bytes):
 	movdqu	-66(%rdi), %xmm1
 	movdqu	-66(%rsi), %xmm2
@@ -929,7 +925,7 @@ L(10bytes):
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(14bytes):
 	mov	-14(%rdi), %rax
 	mov	-14(%rsi), %rcx
@@ -942,7 +938,7 @@ L(14bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(6bytes):
 	mov	-6(%rdi), %eax
 	mov	-6(%rsi), %ecx
@@ -958,7 +954,7 @@ L(2bytes):
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(67bytes):
 	movdqu	-67(%rdi), %xmm2
 	movdqu	-67(%rsi), %xmm1
@@ -997,7 +993,7 @@ L(11bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(15bytes):
 	mov	-15(%rdi), %rax
 	mov	-15(%rsi), %rcx
@@ -1010,7 +1006,7 @@ L(15bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(7bytes):
 	mov	-7(%rdi), %eax
 	mov	-7(%rsi), %ecx
@@ -1023,7 +1019,7 @@ L(7bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(3bytes):
 	movzwl	-3(%rdi), %eax
 	movzwl	-3(%rsi), %ecx
@@ -1036,7 +1032,7 @@ L(1bytes):
 	ret
 # endif
 
-	ALIGN (4)
+	.p2align 4
 L(68bytes):
 	movdqu	-68(%rdi), %xmm2
 	movdqu	-68(%rsi), %xmm1
@@ -1079,7 +1075,7 @@ L(20bytes):
 
 # ifndef USE_AS_WMEMCMP
 /* unreal cases for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(69bytes):
 	movdqu	-69(%rsi), %xmm1
 	movdqu	-69(%rdi), %xmm2
@@ -1115,7 +1111,7 @@ L(21bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(70bytes):
 	movdqu	-70(%rsi), %xmm1
 	movdqu	-70(%rdi), %xmm2
@@ -1151,7 +1147,7 @@ L(22bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(71bytes):
 	movdqu	-71(%rsi), %xmm1
 	movdqu	-71(%rdi), %xmm2
@@ -1188,7 +1184,7 @@ L(23bytes):
 	ret
 # endif
 
-	ALIGN (4)
+	.p2align 4
 L(72bytes):
 	movdqu	-72(%rsi), %xmm1
 	movdqu	-72(%rdi), %xmm2
@@ -1227,7 +1223,7 @@ L(24bytes):
 
 # ifndef USE_AS_WMEMCMP
 /* unreal cases for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(73bytes):
 	movdqu	-73(%rsi), %xmm1
 	movdqu	-73(%rdi), %xmm2
@@ -1265,7 +1261,7 @@ L(25bytes):
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(74bytes):
 	movdqu	-74(%rsi), %xmm1
 	movdqu	-74(%rdi), %xmm2
@@ -1302,7 +1298,7 @@ L(26bytes):
 	movzwl	-2(%rsi), %ecx
 	jmp	L(diffin2bytes)
 
-	ALIGN (4)
+	.p2align 4
 L(75bytes):
 	movdqu	-75(%rsi), %xmm1
 	movdqu	-75(%rdi), %xmm2
@@ -1342,7 +1338,7 @@ L(27bytes):
 	xor	%eax, %eax
 	ret
 # endif
-	ALIGN (4)
+	.p2align 4
 L(76bytes):
 	movdqu	-76(%rsi), %xmm1
 	movdqu	-76(%rdi), %xmm2
@@ -1388,7 +1384,7 @@ L(28bytes):
 
 # ifndef USE_AS_WMEMCMP
 /* unreal cases for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(77bytes):
 	movdqu	-77(%rsi), %xmm1
 	movdqu	-77(%rdi), %xmm2
@@ -1430,7 +1426,7 @@ L(29bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(78bytes):
 	movdqu	-78(%rsi), %xmm1
 	movdqu	-78(%rdi), %xmm2
@@ -1470,7 +1466,7 @@ L(30bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(79bytes):
 	movdqu	-79(%rsi), %xmm1
 	movdqu	-79(%rdi), %xmm2
@@ -1510,7 +1506,7 @@ L(31bytes):
 	xor	%eax, %eax
 	ret
 # endif
-	ALIGN (4)
+	.p2align 4
 L(64bytes):
 	movdqu	-64(%rdi), %xmm2
 	movdqu	-64(%rsi), %xmm1
@@ -1548,7 +1544,7 @@ L(32bytes):
 /*
  * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block.
  */
-	ALIGN (3)
+	.p2align 3
 L(less16bytes):
 	movsbq	%dl, %rdx
 	mov	(%rsi, %rdx), %rcx
@@ -1585,7 +1581,7 @@ L(diffin2bytes):
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(end):
 	and	$0xff, %eax
 	and	$0xff, %ecx
@@ -1599,7 +1595,7 @@ L(end):
 	neg	%eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(nequal_bigger):
 	ret
 
@@ -1611,7 +1607,7 @@ L(unreal_case):
 END (MEMCMP)
 
 	.section .rodata.sse4.1,"a",@progbits
-	ALIGN (3)
+	.p2align 3
 # ifndef USE_AS_WMEMCMP
 L(table_64bytes):
 	.int	JMPTBL (L(0bytes), L(table_64bytes))
diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
index e319df9..e04f918 100644
--- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
@@ -25,10 +25,6 @@
 #  define MEMCMP	__memcmp_ssse3
 # endif
 
-# ifndef ALIGN
-#  define ALIGN(n)	.p2align n
-# endif
-
 /* Warning!
 	   wmemcmp has to use SIGNED comparison for elements.
 	   memcmp has to use UNSIGNED comparison for elemnts.
@@ -50,7 +46,7 @@ ENTRY (MEMCMP)
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 /* ECX >= 32.  */
 L(48bytesormore):
 	movdqu	(%rdi), %xmm3
@@ -90,7 +86,7 @@ L(48bytesormore):
 	je	L(shr_6)
 	jmp	L(shr_7)
 
-	ALIGN	(2)
+	.p2align 2
 L(next_unaligned_table):
 	cmp	$8, %edx
 	je	L(shr_8)
@@ -117,7 +113,7 @@ L(next_unaligned_table):
 	jmp	L(shr_12)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_0):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -137,7 +133,7 @@ L(shr_0):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_0_gobble):
 	movdqa	(%rsi), %xmm0
 	xor	%eax, %eax
@@ -180,7 +176,7 @@ L(next):
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_1):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -207,7 +203,7 @@ L(shr_1):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_1_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -258,7 +254,7 @@ L(shr_1_gobble_next):
 	jmp	L(less48bytes)
 
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_2):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -285,7 +281,7 @@ L(shr_2):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_2_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -335,7 +331,7 @@ L(shr_2_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_3):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -362,7 +358,7 @@ L(shr_3):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_3_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -414,7 +410,7 @@ L(shr_3_gobble_next):
 
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_4):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -441,7 +437,7 @@ L(shr_4):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_4_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -493,7 +489,7 @@ L(shr_4_gobble_next):
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_5):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -520,7 +516,7 @@ L(shr_5):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_5_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -570,7 +566,7 @@ L(shr_5_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_6):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -597,7 +593,7 @@ L(shr_6):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_6_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -647,7 +643,7 @@ L(shr_6_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_7):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -674,7 +670,7 @@ L(shr_7):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_7_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -726,7 +722,7 @@ L(shr_7_gobble_next):
 
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_8):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -753,7 +749,7 @@ L(shr_8):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_8_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -805,7 +801,7 @@ L(shr_8_gobble_next):
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_9):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -832,7 +828,7 @@ L(shr_9):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_9_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -882,7 +878,7 @@ L(shr_9_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_10):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -909,7 +905,7 @@ L(shr_10):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_10_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -959,7 +955,7 @@ L(shr_10_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_11):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -986,7 +982,7 @@ L(shr_11):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_11_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1038,7 +1034,7 @@ L(shr_11_gobble_next):
 
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_12):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1065,7 +1061,7 @@ L(shr_12):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_12_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1117,7 +1113,7 @@ L(shr_12_gobble_next):
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_13):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1144,7 +1140,7 @@ L(shr_13):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_13_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1194,7 +1190,7 @@ L(shr_13_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_14):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1221,7 +1217,7 @@ L(shr_14):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_14_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1271,7 +1267,7 @@ L(shr_14_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_15):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1298,7 +1294,7 @@ L(shr_15):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_15_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1348,7 +1344,7 @@ L(shr_15_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 # endif
-	ALIGN	(4)
+	.p2align 4
 L(exit):
 	pmovmskb %xmm1, %r8d
 	sub	$0xffff, %r8d
@@ -1389,56 +1385,56 @@ L(less16bytes):
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte16):
 	movzbl	-16(%rdi), %eax
 	movzbl	-16(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte17):
 	movzbl	-15(%rdi), %eax
 	movzbl	-15(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte18):
 	movzbl	-14(%rdi), %eax
 	movzbl	-14(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte19):
 	movzbl	-13(%rdi), %eax
 	movzbl	-13(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte20):
 	movzbl	-12(%rdi), %eax
 	movzbl	-12(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte21):
 	movzbl	-11(%rdi), %eax
 	movzbl	-11(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte22):
 	movzbl	-10(%rdi), %eax
 	movzbl	-10(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(next_24_bytes):
 	lea	8(%rdi), %rdi
 	lea	8(%rsi), %rsi
@@ -1479,14 +1475,14 @@ L(next_24_bytes):
 	jne	L(find_diff)
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(second_double_word):
 	mov	-12(%rdi), %eax
 	cmp	-12(%rsi), %eax
 	jne	L(find_diff)
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(next_two_double_words):
 	and	$15, %dh
 	jz	L(fourth_double_word)
@@ -1495,7 +1491,7 @@ L(next_two_double_words):
 	jne	L(find_diff)
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(fourth_double_word):
 	mov	-4(%rdi), %eax
 	cmp	-4(%rsi), %eax
@@ -1503,7 +1499,7 @@ L(fourth_double_word):
 	ret
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(less48bytes):
 	cmp	$8, %ecx
 	jae	L(more8bytes)
@@ -1527,7 +1523,7 @@ L(less48bytes):
 	jmp	L(4bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more8bytes):
 	cmp	$16, %ecx
 	jae	L(more16bytes)
@@ -1551,7 +1547,7 @@ L(more8bytes):
 	jmp	L(12bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more16bytes):
 	cmp	$24, %ecx
 	jae	L(more24bytes)
@@ -1575,7 +1571,7 @@ L(more16bytes):
 	jmp	L(20bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more24bytes):
 	cmp	$32, %ecx
 	jae	L(more32bytes)
@@ -1599,7 +1595,7 @@ L(more24bytes):
 	jmp	L(28bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more32bytes):
 	cmp	$40, %ecx
 	jae	L(more40bytes)
@@ -1623,7 +1619,7 @@ L(more32bytes):
 	jmp	L(36bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more40bytes):
 	cmp	$40, %ecx
 	je	L(40bytes)
@@ -1642,7 +1638,7 @@ L(more40bytes):
 	je	L(46bytes)
 	jmp	L(47bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(44bytes):
 	movl	-44(%rdi), %eax
 	movl	-44(%rsi), %ecx
@@ -1702,7 +1698,7 @@ L(0bytes):
 	xor	%eax, %eax
 	ret
 # else
-	ALIGN	(4)
+	.p2align 4
 L(44bytes):
 	movl	-44(%rdi), %eax
 	cmp	-44(%rsi), %eax
@@ -1753,7 +1749,7 @@ L(0bytes):
 # endif
 
 # ifndef USE_AS_WMEMCMP
-	ALIGN	(4)
+	.p2align 4
 L(45bytes):
 	movl	-45(%rdi), %eax
 	movl	-45(%rsi), %ecx
@@ -1816,7 +1812,7 @@ L(1bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(46bytes):
 	movl	-46(%rdi), %eax
 	movl	-46(%rsi), %ecx
@@ -1882,7 +1878,7 @@ L(2bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(47bytes):
 	movl	-47(%rdi), %eax
 	movl	-47(%rsi), %ecx
@@ -1951,7 +1947,7 @@ L(3bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(find_diff):
 	cmpb	%cl, %al
 	jne	L(set)
@@ -1973,19 +1969,19 @@ L(set):
 # else
 
 /* for wmemcmp */
-	ALIGN	(4)
+	.p2align 4
 L(find_diff):
 	mov	$1, %eax
 	jg	L(find_diff_bigger)
 	neg	%eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(find_diff_bigger):
 	ret
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(equal):
 	xor	%eax, %eax
 	ret
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
index efdfea2..df6578e 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -20,10 +20,6 @@
 
 #include "asm-syntax.h"
 
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
 
 ENTRY(__memcpy_sse2_unaligned)
 	movq	%rsi, %rax
@@ -44,7 +40,7 @@ L(return):
 	movq	%rdi, %rax
 	ret
 	.p2align 4,,10
-	ALIGN(4)
+	.p2align 4
 .L31:
 	movdqu	16(%rsi), %xmm8
 	cmpq	$64, %rdx
@@ -77,7 +73,7 @@ L(return):
 	leaq	32(%r10), %r8
 	leaq	48(%r10), %rax
 	.p2align 4,,10
-	ALIGN(4)
+	.p2align 4
 L(loop):
 	movdqu	(%rcx,%r10), %xmm8
 	movdqa	%xmm8, (%rcx)
@@ -151,7 +147,7 @@ L(less_16):
 .L3:
 	leaq	-1(%rdx), %rax
 	.p2align 4,,10
-	ALIGN(4)
+	.p2align 4
 .L11:
 	movzbl	(%rsi,%rax), %edx
 	movb	%dl, (%rdi,%rax)
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index fc9fcef..0eb7d9b 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -31,10 +31,6 @@
 # define MEMCPY_CHK	__memcpy_chk_ssse3_back
 #endif
 
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
 #define JMPTBL(I, B)	I - B
 
 /* Branch to an entry in a jump table.  TABLE is a jump table with
@@ -87,7 +83,7 @@ L(bk_write):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 #endif
 
-	ALIGN (4)
+	.p2align 4
 L(144bytesormore):
 
 #ifndef USE_AS_MEMMOVE
@@ -119,7 +115,7 @@ L(144bytesormore):
 	jmp	*%r9
 	ud2
 
-	ALIGN (4)
+	.p2align 4
 L(copy_backward):
 #ifdef DATA_CACHE_SIZE
 	mov	$DATA_CACHE_SIZE, %RCX_LP
@@ -149,7 +145,7 @@ L(copy_backward):
 	jmp	*%r9
 	ud2
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0):
 
 	mov	%rdx, %r9
@@ -162,7 +158,7 @@ L(shl_0):
 #endif
 	jae	L(gobble_mem_fwd)
 	sub	$0x80, %rdx
-	ALIGN (4)
+	.p2align 4
 L(shl_0_loop):
 	movdqa	(%rsi), %xmm1
 	movdqa	%xmm1, (%rdi)
@@ -190,7 +186,7 @@ L(shl_0_loop):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_bwd):
 	sub	$0x80, %rdx
 L(copy_backward_loop):
@@ -221,7 +217,7 @@ L(copy_backward_loop):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1):
 	sub	$0x80, %rdx
 	movaps	-0x01(%rsi), %xmm1
@@ -258,7 +254,7 @@ L(shl_1):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1_bwd):
 	movaps	-0x01(%rsi), %xmm1
 
@@ -304,7 +300,7 @@ L(shl_1_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2):
 	sub	$0x80, %rdx
 	movaps	-0x02(%rsi), %xmm1
@@ -341,7 +337,7 @@ L(shl_2):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2_bwd):
 	movaps	-0x02(%rsi), %xmm1
 
@@ -387,7 +383,7 @@ L(shl_2_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3):
 	sub	$0x80, %rdx
 	movaps -0x03(%rsi), %xmm1
@@ -424,7 +420,7 @@ L(shl_3):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3_bwd):
 	movaps	-0x03(%rsi), %xmm1
 
@@ -470,7 +466,7 @@ L(shl_3_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4):
 	sub	$0x80, %rdx
 	movaps	-0x04(%rsi), %xmm1
@@ -507,7 +503,7 @@ L(shl_4):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4_bwd):
 	movaps	-0x04(%rsi), %xmm1
 
@@ -553,7 +549,7 @@ L(shl_4_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5):
 	sub	$0x80, %rdx
 	movaps	-0x05(%rsi), %xmm1
@@ -590,7 +586,7 @@ L(shl_5):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5_bwd):
 	movaps	-0x05(%rsi), %xmm1
 
@@ -636,7 +632,7 @@ L(shl_5_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6):
 	sub	$0x80, %rdx
 	movaps	-0x06(%rsi), %xmm1
@@ -673,7 +669,7 @@ L(shl_6):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6_bwd):
 	movaps	-0x06(%rsi), %xmm1
 
@@ -719,7 +715,7 @@ L(shl_6_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7):
 	sub	$0x80, %rdx
 	movaps	-0x07(%rsi), %xmm1
@@ -756,7 +752,7 @@ L(shl_7):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7_bwd):
 	movaps	-0x07(%rsi), %xmm1
 
@@ -802,7 +798,7 @@ L(shl_7_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8):
 	sub	$0x80, %rdx
 	movaps	-0x08(%rsi), %xmm1
@@ -839,7 +835,7 @@ L(shl_8):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8_bwd):
 	movaps	-0x08(%rsi), %xmm1
 
@@ -886,7 +882,7 @@ L(shl_8_end_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9):
 	sub	$0x80, %rdx
 	movaps	-0x09(%rsi), %xmm1
@@ -923,7 +919,7 @@ L(shl_9):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9_bwd):
 	movaps	-0x09(%rsi), %xmm1
 
@@ -969,7 +965,7 @@ L(shl_9_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10):
 	sub	$0x80, %rdx
 	movaps	-0x0a(%rsi), %xmm1
@@ -1006,7 +1002,7 @@ L(shl_10):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10_bwd):
 	movaps	-0x0a(%rsi), %xmm1
 
@@ -1052,7 +1048,7 @@ L(shl_10_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11):
 	sub	$0x80, %rdx
 	movaps	-0x0b(%rsi), %xmm1
@@ -1089,7 +1085,7 @@ L(shl_11):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11_bwd):
 	movaps	-0x0b(%rsi), %xmm1
 
@@ -1135,7 +1131,7 @@ L(shl_11_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12):
 	sub	$0x80, %rdx
 	movdqa	-0x0c(%rsi), %xmm1
@@ -1173,7 +1169,7 @@ L(shl_12):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12_bwd):
 	movaps	-0x0c(%rsi), %xmm1
 
@@ -1219,7 +1215,7 @@ L(shl_12_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13):
 	sub	$0x80, %rdx
 	movaps	-0x0d(%rsi), %xmm1
@@ -1256,7 +1252,7 @@ L(shl_13):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13_bwd):
 	movaps	-0x0d(%rsi), %xmm1
 
@@ -1302,7 +1298,7 @@ L(shl_13_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14):
 	sub	$0x80, %rdx
 	movaps	-0x0e(%rsi), %xmm1
@@ -1339,7 +1335,7 @@ L(shl_14):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14_bwd):
 	movaps	-0x0e(%rsi), %xmm1
 
@@ -1385,7 +1381,7 @@ L(shl_14_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15):
 	sub	$0x80, %rdx
 	movaps	-0x0f(%rsi), %xmm1
@@ -1422,7 +1418,7 @@ L(shl_15):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15_bwd):
 	movaps	-0x0f(%rsi), %xmm1
 
@@ -1468,7 +1464,7 @@ L(shl_15_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(gobble_mem_fwd):
 	movdqu	(%rsi), %xmm1
 	movdqu	%xmm0, (%r8)
@@ -1570,7 +1566,7 @@ L(gobble_mem_fwd_end):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(gobble_mem_bwd):
 	add	%rdx, %rsi
 	add	%rdx, %rdi
@@ -2833,7 +2829,7 @@ L(bwd_write_1bytes):
 END (MEMCPY)
 
 	.section .rodata.ssse3,"a",@progbits
-	ALIGN (3)
+	.p2align 3
 L(table_144_bytes_bwd):
 	.int	JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
 	.int	JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
@@ -2980,7 +2976,7 @@ L(table_144_bytes_bwd):
 	.int	JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
 	.int	JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
 
-	ALIGN (3)
+	.p2align 3
 L(table_144_bytes_fwd):
 	.int	JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
 	.int	JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
@@ -3127,7 +3123,7 @@ L(table_144_bytes_fwd):
 	.int	JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
 	.int	JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table_fwd):
 	.int	JMPTBL (L(shl_0), L(shl_table_fwd))
 	.int	JMPTBL (L(shl_1), L(shl_table_fwd))
@@ -3146,7 +3142,7 @@ L(shl_table_fwd):
 	.int	JMPTBL (L(shl_14), L(shl_table_fwd))
 	.int	JMPTBL (L(shl_15), L(shl_table_fwd))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table_bwd):
 	.int	JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
 	.int	JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 9642cee..0cedab2 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -31,10 +31,6 @@
 # define MEMCPY_CHK	__memcpy_chk_ssse3
 #endif
 
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
 #define JMPTBL(I, B)	I - B
 
 /* Branch to an entry in a jump table.  TABLE is a jump table with
@@ -80,7 +76,7 @@ L(copy_forward):
 	jmp	*%r9
 	ud2
 
-	ALIGN (4)
+	.p2align 4
 L(80bytesormore):
 #ifndef USE_AS_MEMMOVE
 	cmp	%dil, %sil
@@ -113,7 +109,7 @@ L(80bytesormore):
 #endif
 	BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %r9, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(copy_backward):
 	movdqu	-16(%rsi, %rdx), %xmm0
 	add	%rdx, %rsi
@@ -144,7 +140,7 @@ L(copy_backward):
 #endif
 	BRANCH_TO_JMPTBL_ENTRY (L(shl_table_bwd), %r9, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0):
 	sub	$16, %rdx
 	movdqa	(%rsi), %xmm1
@@ -172,7 +168,7 @@ L(shl_0_less_64bytes):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble):
 #ifdef DATA_CACHE_SIZE_HALF
 	cmp	$DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -228,7 +224,7 @@ L(shl_0_cache_less_64bytes):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_mem_loop):
 	prefetcht0 0x1c0(%rsi)
 	prefetcht0 0x280(%rsi)
@@ -287,7 +283,7 @@ L(shl_0_mem_less_32bytes):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_bwd):
 	sub	$16, %rdx
 	movdqa	-0x10(%rsi), %xmm1
@@ -313,7 +309,7 @@ L(shl_0_bwd):
 L(shl_0_less_64bytes_bwd):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_bwd):
 #ifdef DATA_CACHE_SIZE_HALF
 	cmp	$DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -367,7 +363,7 @@ L(shl_0_gobble_bwd_loop):
 L(shl_0_gobble_bwd_less_64bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_mem_bwd_loop):
 	prefetcht0 -0x1c0(%rsi)
 	prefetcht0 -0x280(%rsi)
@@ -423,7 +419,7 @@ L(shl_0_mem_bwd_less_64bytes):
 L(shl_0_mem_bwd_less_32bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1):
 	lea	(L(shl_1_loop_L1)-L(shl_1))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -466,7 +462,7 @@ L(shl_1_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1_bwd):
 	lea	(L(shl_1_bwd_loop_L1)-L(shl_1_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -508,7 +504,7 @@ L(shl_1_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2):
 	lea	(L(shl_2_loop_L1)-L(shl_2))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -551,7 +547,7 @@ L(shl_2_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2_bwd):
 	lea	(L(shl_2_bwd_loop_L1)-L(shl_2_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -593,7 +589,7 @@ L(shl_2_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3):
 	lea	(L(shl_3_loop_L1)-L(shl_3))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -636,7 +632,7 @@ L(shl_3_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3_bwd):
 	lea	(L(shl_3_bwd_loop_L1)-L(shl_3_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -678,7 +674,7 @@ L(shl_3_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4):
 	lea	(L(shl_4_loop_L1)-L(shl_4))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -721,7 +717,7 @@ L(shl_4_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4_bwd):
 	lea	(L(shl_4_bwd_loop_L1)-L(shl_4_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -763,7 +759,7 @@ L(shl_4_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5):
 	lea	(L(shl_5_loop_L1)-L(shl_5))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -806,7 +802,7 @@ L(shl_5_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5_bwd):
 	lea	(L(shl_5_bwd_loop_L1)-L(shl_5_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -848,7 +844,7 @@ L(shl_5_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6):
 	lea	(L(shl_6_loop_L1)-L(shl_6))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -891,7 +887,7 @@ L(shl_6_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6_bwd):
 	lea	(L(shl_6_bwd_loop_L1)-L(shl_6_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -933,7 +929,7 @@ L(shl_6_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7):
 	lea	(L(shl_7_loop_L1)-L(shl_7))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -976,7 +972,7 @@ L(shl_7_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7_bwd):
 	lea	(L(shl_7_bwd_loop_L1)-L(shl_7_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1018,7 +1014,7 @@ L(shl_7_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8):
 	lea	(L(shl_8_loop_L1)-L(shl_8))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1051,7 +1047,7 @@ L(shl_8_loop_L1):
 	movaps	%xmm5, -0x10(%rdi)
 	jmp	*%r9
 	ud2
-	ALIGN (4)
+	.p2align 4
 L(shl_8_end):
 	lea	64(%rdx), %rdx
 	movaps	%xmm4, -0x20(%rdi)
@@ -1061,7 +1057,7 @@ L(shl_8_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8_bwd):
 	lea	(L(shl_8_bwd_loop_L1)-L(shl_8_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1103,7 +1099,7 @@ L(shl_8_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9):
 	lea	(L(shl_9_loop_L1)-L(shl_9))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1146,7 +1142,7 @@ L(shl_9_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9_bwd):
 	lea	(L(shl_9_bwd_loop_L1)-L(shl_9_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1188,7 +1184,7 @@ L(shl_9_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10):
 	lea	(L(shl_10_loop_L1)-L(shl_10))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1231,7 +1227,7 @@ L(shl_10_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10_bwd):
 	lea	(L(shl_10_bwd_loop_L1)-L(shl_10_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1273,7 +1269,7 @@ L(shl_10_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11):
 	lea	(L(shl_11_loop_L1)-L(shl_11))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1316,7 +1312,7 @@ L(shl_11_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11_bwd):
 	lea	(L(shl_11_bwd_loop_L1)-L(shl_11_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1358,7 +1354,7 @@ L(shl_11_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12):
 	lea	(L(shl_12_loop_L1)-L(shl_12))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1401,7 +1397,7 @@ L(shl_12_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12_bwd):
 	lea	(L(shl_12_bwd_loop_L1)-L(shl_12_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1443,7 +1439,7 @@ L(shl_12_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13):
 	lea	(L(shl_13_loop_L1)-L(shl_13))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1486,7 +1482,7 @@ L(shl_13_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13_bwd):
 	lea	(L(shl_13_bwd_loop_L1)-L(shl_13_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1528,7 +1524,7 @@ L(shl_13_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14):
 	lea	(L(shl_14_loop_L1)-L(shl_14))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1571,7 +1567,7 @@ L(shl_14_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14_bwd):
 	lea	(L(shl_14_bwd_loop_L1)-L(shl_14_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1613,7 +1609,7 @@ L(shl_14_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15):
 	lea	(L(shl_15_loop_L1)-L(shl_15))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1656,7 +1652,7 @@ L(shl_15_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15_bwd):
 	lea	(L(shl_15_bwd_loop_L1)-L(shl_15_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1698,7 +1694,7 @@ L(shl_15_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(write_72bytes):
 	movdqu	-72(%rsi), %xmm0
 	movdqu	-56(%rsi), %xmm1
@@ -1716,7 +1712,7 @@ L(write_72bytes):
 	mov	 %rcx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_64bytes):
 	movdqu	-64(%rsi), %xmm0
 	mov	-48(%rsi), %rcx
@@ -1734,7 +1730,7 @@ L(write_64bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_56bytes):
 	movdqu	-56(%rsi), %xmm0
 	mov	-40(%rsi), %r8
@@ -1750,7 +1746,7 @@ L(write_56bytes):
 	mov	 %rcx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_48bytes):
 	mov	-48(%rsi), %rcx
 	mov	-40(%rsi), %r8
@@ -1766,7 +1762,7 @@ L(write_48bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_40bytes):
 	mov	-40(%rsi), %r8
 	mov	-32(%rsi), %r9
@@ -1780,7 +1776,7 @@ L(write_40bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_32bytes):
 	mov	-32(%rsi), %r9
 	mov	-24(%rsi), %r10
@@ -1792,7 +1788,7 @@ L(write_32bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_24bytes):
 	mov	-24(%rsi), %r10
 	mov	-16(%rsi), %r11
@@ -1802,7 +1798,7 @@ L(write_24bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_16bytes):
 	mov	-16(%rsi), %r11
 	mov	-8(%rsi), %rdx
@@ -1810,14 +1806,14 @@ L(write_16bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_8bytes):
 	mov	-8(%rsi), %rdx
 	mov	 %rdx, -8(%rdi)
 L(write_0bytes):
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_73bytes):
 	movdqu	-73(%rsi), %xmm0
 	movdqu	-57(%rsi), %xmm1
@@ -1837,7 +1833,7 @@ L(write_73bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_65bytes):
 	movdqu	-65(%rsi), %xmm0
 	movdqu	-49(%rsi), %xmm1
@@ -1855,7 +1851,7 @@ L(write_65bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_57bytes):
 	movdqu	-57(%rsi), %xmm0
 	mov	-41(%rsi), %r8
@@ -1873,7 +1869,7 @@ L(write_57bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_49bytes):
 	movdqu	-49(%rsi), %xmm0
 	mov	-33(%rsi), %r9
@@ -1889,7 +1885,7 @@ L(write_49bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_41bytes):
 	mov	-41(%rsi), %r8
 	mov	-33(%rsi), %r9
@@ -1905,7 +1901,7 @@ L(write_41bytes):
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_33bytes):
 	mov	-33(%rsi), %r9
 	mov	-25(%rsi), %r10
@@ -1919,7 +1915,7 @@ L(write_33bytes):
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_25bytes):
 	mov	-25(%rsi), %r10
 	mov	-17(%rsi), %r11
@@ -1931,7 +1927,7 @@ L(write_25bytes):
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_17bytes):
 	mov	-17(%rsi), %r11
 	mov	-9(%rsi), %rcx
@@ -1941,7 +1937,7 @@ L(write_17bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_9bytes):
 	mov	-9(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -1949,13 +1945,13 @@ L(write_9bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_1bytes):
 	mov	-1(%rsi), %dl
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_74bytes):
 	movdqu	-74(%rsi), %xmm0
 	movdqu	-58(%rsi), %xmm1
@@ -1975,7 +1971,7 @@ L(write_74bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_66bytes):
 	movdqu	-66(%rsi), %xmm0
 	movdqu	-50(%rsi), %xmm1
@@ -1995,7 +1991,7 @@ L(write_66bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_58bytes):
 	movdqu	-58(%rsi), %xmm1
 	mov	-42(%rsi), %r8
@@ -2013,7 +2009,7 @@ L(write_58bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_50bytes):
 	movdqu	-50(%rsi), %xmm0
 	mov	-34(%rsi), %r9
@@ -2029,7 +2025,7 @@ L(write_50bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_42bytes):
 	mov	-42(%rsi), %r8
 	mov	-34(%rsi), %r9
@@ -2045,7 +2041,7 @@ L(write_42bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_34bytes):
 	mov	-34(%rsi), %r9
 	mov	-26(%rsi), %r10
@@ -2059,7 +2055,7 @@ L(write_34bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_26bytes):
 	mov	-26(%rsi), %r10
 	mov	-18(%rsi), %r11
@@ -2071,7 +2067,7 @@ L(write_26bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_18bytes):
 	mov	-18(%rsi), %r11
 	mov	-10(%rsi), %rcx
@@ -2081,7 +2077,7 @@ L(write_18bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_10bytes):
 	mov	-10(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -2089,13 +2085,13 @@ L(write_10bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_2bytes):
 	mov	-2(%rsi), %dx
 	mov	 %dx, -2(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_75bytes):
 	movdqu	-75(%rsi), %xmm0
 	movdqu	-59(%rsi), %xmm1
@@ -2115,7 +2111,7 @@ L(write_75bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_67bytes):
 	movdqu	-67(%rsi), %xmm0
 	movdqu	-59(%rsi), %xmm1
@@ -2135,7 +2131,7 @@ L(write_67bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_59bytes):
 	movdqu	-59(%rsi), %xmm0
 	mov	-43(%rsi), %r8
@@ -2153,7 +2149,7 @@ L(write_59bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_51bytes):
 	movdqu	-51(%rsi), %xmm0
 	mov	-35(%rsi), %r9
@@ -2169,7 +2165,7 @@ L(write_51bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_43bytes):
 	mov	-43(%rsi), %r8
 	mov	-35(%rsi), %r9
@@ -2185,7 +2181,7 @@ L(write_43bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_35bytes):
 	mov	-35(%rsi), %r9
 	mov	-27(%rsi), %r10
@@ -2199,7 +2195,7 @@ L(write_35bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_27bytes):
 	mov	-27(%rsi), %r10
 	mov	-19(%rsi), %r11
@@ -2211,7 +2207,7 @@ L(write_27bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_19bytes):
 	mov	-19(%rsi), %r11
 	mov	-11(%rsi), %rcx
@@ -2221,7 +2217,7 @@ L(write_19bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_11bytes):
 	mov	-11(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -2229,7 +2225,7 @@ L(write_11bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_3bytes):
 	mov	-3(%rsi), %dx
 	mov	-2(%rsi), %cx
@@ -2237,7 +2233,7 @@ L(write_3bytes):
 	mov	 %cx, -2(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_76bytes):
 	movdqu	-76(%rsi), %xmm0
 	movdqu	-60(%rsi), %xmm1
@@ -2257,7 +2253,7 @@ L(write_76bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_68bytes):
 	movdqu	-68(%rsi), %xmm0
 	movdqu	-52(%rsi), %xmm1
@@ -2275,7 +2271,7 @@ L(write_68bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_60bytes):
 	movdqu	-60(%rsi), %xmm0
 	mov	-44(%rsi), %r8
@@ -2293,7 +2289,7 @@ L(write_60bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_52bytes):
 	movdqu	-52(%rsi), %xmm0
 	mov	-36(%rsi), %r9
@@ -2309,7 +2305,7 @@ L(write_52bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_44bytes):
 	mov	-44(%rsi), %r8
 	mov	-36(%rsi), %r9
@@ -2325,7 +2321,7 @@ L(write_44bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_36bytes):
 	mov	-36(%rsi), %r9
 	mov	-28(%rsi), %r10
@@ -2339,7 +2335,7 @@ L(write_36bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_28bytes):
 	mov	-28(%rsi), %r10
 	mov	-20(%rsi), %r11
@@ -2351,7 +2347,7 @@ L(write_28bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_20bytes):
 	mov	-20(%rsi), %r11
 	mov	-12(%rsi), %rcx
@@ -2361,7 +2357,7 @@ L(write_20bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_12bytes):
 	mov	-12(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -2369,13 +2365,13 @@ L(write_12bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_4bytes):
 	mov	-4(%rsi), %edx
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_77bytes):
 	movdqu	-77(%rsi), %xmm0
 	movdqu	-61(%rsi), %xmm1
@@ -2395,7 +2391,7 @@ L(write_77bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_69bytes):
 	movdqu	-69(%rsi), %xmm0
 	movdqu	-53(%rsi), %xmm1
@@ -2413,7 +2409,7 @@ L(write_69bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_61bytes):
 	movdqu	-61(%rsi), %xmm0
 	mov	-45(%rsi), %r8
@@ -2431,7 +2427,7 @@ L(write_61bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_53bytes):
 	movdqu	-53(%rsi), %xmm0
 	mov	-45(%rsi), %r8
@@ -2448,7 +2444,7 @@ L(write_53bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_45bytes):
 	mov	-45(%rsi), %r8
 	mov	-37(%rsi), %r9
@@ -2464,7 +2460,7 @@ L(write_45bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_37bytes):
 	mov	-37(%rsi), %r9
 	mov	-29(%rsi), %r10
@@ -2478,7 +2474,7 @@ L(write_37bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_29bytes):
 	mov	-29(%rsi), %r10
 	mov	-21(%rsi), %r11
@@ -2490,7 +2486,7 @@ L(write_29bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_21bytes):
 	mov	-21(%rsi), %r11
 	mov	-13(%rsi), %rcx
@@ -2500,7 +2496,7 @@ L(write_21bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_13bytes):
 	mov	-13(%rsi), %rcx
 	mov	-8(%rsi), %rdx
@@ -2508,7 +2504,7 @@ L(write_13bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_5bytes):
 	mov	-5(%rsi), %edx
 	mov	-4(%rsi), %ecx
@@ -2516,7 +2512,7 @@ L(write_5bytes):
 	mov	 %ecx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_78bytes):
 	movdqu	-78(%rsi), %xmm0
 	movdqu	-62(%rsi), %xmm1
@@ -2536,7 +2532,7 @@ L(write_78bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_70bytes):
 	movdqu	-70(%rsi), %xmm0
 	movdqu	-54(%rsi), %xmm1
@@ -2554,7 +2550,7 @@ L(write_70bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_62bytes):
 	movdqu	-62(%rsi), %xmm0
 	mov	-46(%rsi), %r8
@@ -2572,7 +2568,7 @@ L(write_62bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_54bytes):
 	movdqu	-54(%rsi), %xmm0
 	mov	-38(%rsi), %r9
@@ -2588,7 +2584,7 @@ L(write_54bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_46bytes):
 	mov	-46(%rsi), %r8
 	mov	-38(%rsi), %r9
@@ -2604,7 +2600,7 @@ L(write_46bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_38bytes):
 	mov	-38(%rsi), %r9
 	mov	-30(%rsi), %r10
@@ -2618,7 +2614,7 @@ L(write_38bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_30bytes):
 	mov	-30(%rsi), %r10
 	mov	-22(%rsi), %r11
@@ -2630,7 +2626,7 @@ L(write_30bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_22bytes):
 	mov	-22(%rsi), %r11
 	mov	-14(%rsi), %rcx
@@ -2640,7 +2636,7 @@ L(write_22bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_14bytes):
 	mov	-14(%rsi), %rcx
 	mov	-8(%rsi), %rdx
@@ -2648,7 +2644,7 @@ L(write_14bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_6bytes):
 	mov	-6(%rsi), %edx
 	mov	-4(%rsi), %ecx
@@ -2656,7 +2652,7 @@ L(write_6bytes):
 	mov	 %ecx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_79bytes):
 	movdqu	-79(%rsi), %xmm0
 	movdqu	-63(%rsi), %xmm1
@@ -2676,7 +2672,7 @@ L(write_79bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_71bytes):
 	movdqu	-71(%rsi), %xmm0
 	movdqu	-55(%rsi), %xmm1
@@ -2694,7 +2690,7 @@ L(write_71bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_63bytes):
 	movdqu	-63(%rsi), %xmm0
 	mov	-47(%rsi), %r8
@@ -2712,7 +2708,7 @@ L(write_63bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_55bytes):
 	movdqu	-55(%rsi), %xmm0
 	mov	-39(%rsi), %r9
@@ -2728,7 +2724,7 @@ L(write_55bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_47bytes):
 	mov	-47(%rsi), %r8
 	mov	-39(%rsi), %r9
@@ -2744,7 +2740,7 @@ L(write_47bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_39bytes):
 	mov	-39(%rsi), %r9
 	mov	-31(%rsi), %r10
@@ -2758,7 +2754,7 @@ L(write_39bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_31bytes):
 	mov	-31(%rsi), %r10
 	mov	-23(%rsi), %r11
@@ -2770,7 +2766,7 @@ L(write_31bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_23bytes):
 	mov	-23(%rsi), %r11
 	mov	-15(%rsi), %rcx
@@ -2780,7 +2776,7 @@ L(write_23bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_15bytes):
 	mov	-15(%rsi), %rcx
 	mov	-8(%rsi), %rdx
@@ -2788,7 +2784,7 @@ L(write_15bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_7bytes):
 	mov	-7(%rsi), %edx
 	mov	-4(%rsi), %ecx
@@ -2796,7 +2792,7 @@ L(write_7bytes):
 	mov	 %ecx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(large_page_fwd):
 	movdqu	(%rsi), %xmm1
 	lea	16(%rsi), %rsi
@@ -2859,7 +2855,7 @@ L(large_page_less_64bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
 #ifdef USE_AS_MEMMOVE
-	ALIGN (4)
+	.p2align 4
 L(ll_cache_copy_fwd_start):
 	prefetcht0 0x1c0(%rsi)
 	prefetcht0 0x200(%rsi)
@@ -2906,7 +2902,7 @@ L(large_page_ll_less_fwd_64bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
 #endif
-	ALIGN (4)
+	.p2align 4
 L(large_page_bwd):
 	movdqu	-0x10(%rsi), %xmm1
 	lea	-16(%rsi), %rsi
@@ -2966,7 +2962,7 @@ L(large_page_less_bwd_64bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
 #ifdef USE_AS_MEMMOVE
-	ALIGN (4)
+	.p2align 4
 L(ll_cache_copy_bwd_start):
 	prefetcht0 -0x1c0(%rsi)
 	prefetcht0 -0x200(%rsi)
@@ -3014,7 +3010,7 @@ L(large_page_ll_less_bwd_64bytes):
 END (MEMCPY)
 
 	.section .rodata.ssse3,"a",@progbits
-	ALIGN (3)
+	.p2align 3
 L(table_less_80bytes):
 	.int	JMPTBL (L(write_0bytes), L(table_less_80bytes))
 	.int	JMPTBL (L(write_1bytes), L(table_less_80bytes))
@@ -3097,7 +3093,7 @@ L(table_less_80bytes):
 	.int	JMPTBL (L(write_78bytes), L(table_less_80bytes))
 	.int	JMPTBL (L(write_79bytes), L(table_less_80bytes))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table):
 	.int	JMPTBL (L(shl_0), L(shl_table))
 	.int	JMPTBL (L(shl_1), L(shl_table))
@@ -3116,7 +3112,7 @@ L(shl_table):
 	.int	JMPTBL (L(shl_14), L(shl_table))
 	.int	JMPTBL (L(shl_15), L(shl_table))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table_bwd):
 	.int	JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
 	.int	JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
index eed8432..4a8e57a 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
@@ -17,7 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "sysdep.h"
-#define ALIGN(x)	.p2align x
 
 ENTRY ( __strcmp_sse2_unaligned)
 	movl	%edi, %eax
@@ -43,7 +42,7 @@ L(return):
 	subl	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(next_48_bytes):
 	movdqu	16(%rdi), %xmm6
 	movdqu	16(%rsi), %xmm3
@@ -85,7 +84,7 @@ L(main_loop_header):
 	movq	%rcx, %rsi
 	jmp	L(loop_start)
 
-	ALIGN	(4)
+	.p2align 4
 L(loop):
 	addq	$64, %rax
 	addq	$64, %rdx
@@ -141,7 +140,7 @@ L(back_to_loop):
 	subl	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(loop_cross_page):
 	xor	%r10, %r10
 	movq	%rdx, %r9
@@ -191,7 +190,7 @@ L(loop_cross_page):
 	subl	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(cross_page_loop):
 	cmpb	%cl, %al
 	jne	L(different)
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
index 1900b37..7440500 100644
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -19,11 +19,6 @@
 
 #include <sysdep.h>
 
-# ifndef ALIGN
-#  define ALIGN(n)	.p2align n
-# endif
-
-
 	.text
 ENTRY (strchr)
 	movd	%esi, %xmm1
@@ -54,7 +49,7 @@ ENTRY (strchr)
 #endif
 	ret
 
-	ALIGN(3)
+	.p2align 3
 	L(next_48_bytes):
 	movdqu	16(%rdi), %xmm0
 	movdqa	%xmm0, %xmm4
@@ -83,10 +78,10 @@ ENTRY (strchr)
 L(loop_start):
 	/* We use this alignment to force loop be aligned to 8 but not
 	   16 bytes.  This gives better sheduling on AMD processors.  */
-	ALIGN(4)
+	.p2align 4
 	pxor	%xmm6, %xmm6
 	andq	$-64, %rdi
-	ALIGN(3)
+	.p2align 3
 L(loop64):
 	addq	$64, %rdi
 	movdqa	(%rdi), %xmm5
@@ -129,7 +124,7 @@ L(loop64):
 	orq	%rcx, %rax
 	salq	$48, %rdx
 	orq	%rdx, %rax
-	ALIGN(3)
+	.p2align 3
 L(return):
 	bsfq	%rax, %rax
 #ifdef AS_STRCHRNUL
@@ -141,7 +136,7 @@ L(return):
 	cmovne	%rdx, %rax
 #endif
 	ret
-	ALIGN(4)
+	.p2align 4
 
 L(cross_page):
 	movq	%rdi, %rdx
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index 514765b..2a07ff7 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -19,11 +19,6 @@
 
 #include <sysdep.h>
 
-# ifndef ALIGN
-#  define ALIGN(n)	.p2align n
-# endif
-
-
 	.text
 ENTRY (strrchr)
 	movd	%esi, %xmm1
@@ -51,7 +46,7 @@ ENTRY (strrchr)
 	addq	%rdi, %rax
 	ret
 
-	ALIGN(4)
+	.p2align 4
 L(next_48_bytes):
 	movdqu	16(%rdi), %xmm4
 	movdqa	%xmm4, %xmm5
@@ -91,7 +86,7 @@ L(next_48_bytes):
 	leaq	(%rdi,%rsi), %rax
 	ret
 
-	ALIGN(4)
+	.p2align 4
 L(loop_header2):
 	testq	%rsi, %rsi
 	movq	%rdi, %rcx
@@ -102,7 +97,7 @@ L(loop_header):
 	andq	$-64, %rdi
 	jmp	L(loop_entry)
 
-	ALIGN(4)
+	.p2align 4
 L(loop64):
 	testq	%rdx, %rdx
 	cmovne	%rdx, %rsi
@@ -163,18 +158,18 @@ L(loop_entry):
 	leaq	(%rcx,%rsi), %rax
 	ret
 
-	ALIGN(4)
+	.p2align 4
 L(no_c_found):
 	movl	$1, %esi
 	xorl	%ecx, %ecx
 	jmp	L(loop_header)
 
-	ALIGN(4)
+	.p2align 4
 L(exit):
 	xorl	%eax, %eax
 	ret
 
-	ALIGN(4)
+	.p2align 4
 L(cross_page):
 	movq	%rdi, %rax
 	pxor	%xmm0, %xmm0

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                        |   13 +
 sysdeps/x86_64/memset.S                          |   10 +-
 sysdeps/x86_64/multiarch/memcmp-sse4.S           |   84 ++++----
 sysdeps/x86_64/multiarch/memcmp-ssse3.S          |  126 +++++------
 sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S |   10 +-
 sysdeps/x86_64/multiarch/memcpy-ssse3-back.S     |   86 ++++----
 sysdeps/x86_64/multiarch/memcpy-ssse3.S          |  254 +++++++++++-----------
 sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S |    9 +-
 sysdeps/x86_64/strchr.S                          |   15 +-
 sysdeps/x86_64/strrchr.S                         |   17 +-
 10 files changed, 301 insertions(+), 323 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]