This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch hjl/pr18858/master created. glibc-2.23-68-gd315d48
- From: hjl at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 6 Mar 2016 23:20:19 -0000
- Subject: GNU C Library master sources branch hjl/pr18858/master created. glibc-2.23-68-gd315d48
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, hjl/pr18858/master has been created
at d315d48783ac57ed5fe6aa5d928d3d7653dd8fb8 (commit)
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=d315d48783ac57ed5fe6aa5d928d3d7653dd8fb8
commit d315d48783ac57ed5fe6aa5d928d3d7653dd8fb8
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 15:18:04 2016 -0800
Test unaligned_1 mempcpy functions
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index d234f4a..c3949d3 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -20,7 +20,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
strcat-sse2-unaligned strncat-sse2-unaligned \
strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
strcspn-c strpbrk-c strspn-c varshift memset-avx2 \
- memset-avx512-no-vzeroupper
+ memset-avx512-no-vzeroupper memcpy-sse2-unaligned-1
CFLAGS-varshift.c += -msse4
CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 188b6d3..148e380 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -278,6 +278,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_CPU_FEATURE (SSSE3),
__memcpy_chk_ssse3)
IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+ __memcpy_chk_sse2_unaligned_1)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
__memcpy_chk_sse2))
/* Support sysdeps/x86_64/multiarch/memcpy.S. */
@@ -295,6 +297,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__memcpy_avx512_no_vzeroupper)
#endif
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned_1)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2))
/* Support sysdeps/x86_64/multiarch/mempcpy_chk.S. */
@@ -308,11 +311,22 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
HAS_ARCH_FEATURE (AVX_Usable),
__mempcpy_chk_avx_unaligned)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __mempcpy_chk_avx_unaligned_1)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_CPU_FEATURE (SSSE3),
__mempcpy_chk_ssse3_back)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_chk_ssse3_back_1)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
__mempcpy_chk_ssse3)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_chk_ssse3_1)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+ __mempcpy_chk_sse2_unaligned_1)
IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
__mempcpy_chk_sse2))
@@ -322,14 +336,24 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, mempcpy,
HAS_ARCH_FEATURE (AVX512F_Usable),
__mempcpy_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_avx512_no_vzeroupper_1)
#endif
IFUNC_IMPL_ADD (array, i, mempcpy,
HAS_ARCH_FEATURE (AVX_Usable),
__mempcpy_avx_unaligned)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __mempcpy_avx_unaligned_1)
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
__mempcpy_ssse3_back)
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_ssse3_back_1)
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
__mempcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_ssse3_1)
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
/* Support sysdeps/x86_64/multiarch/strncmp.S. */
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=cced3fc7767ac06c753e2492f43f0f9034657045
commit cced3fc7767ac06c753e2492f43f0f9034657045
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 15:07:54 2016 -0800
Remove L(overlapping)
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
index 05ed749..ef21c50 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
@@ -45,12 +45,8 @@ END (__memcpy_chk_sse2_unaligned_1)
ENTRY(__memcpy_sse2_unaligned_1)
movq %rdi, %rax
L(start):
- movq %rsi, %r11
- leaq (%rdx,%rdx), %rcx
- subq %rdi, %r11
- subq %rdx, %r11
- cmpq %rcx, %r11
- jb L(overlapping)
+ testq %rdx, %rdx
+ je L(return)
cmpq $16, %rdx
jbe L(less_16)
movdqu (%rsi), %xmm8
@@ -109,45 +105,6 @@ L(loop):
cmpq %rcx, %rdx
jne L(loop)
ret
-L(overlapping):
- testq %rdx, %rdx
- .p2align 4,,5
- je L(return)
- movq %rdx, %r9
- leaq 16(%rsi), %rcx
- leaq 16(%rdi), %r8
- shrq $4, %r9
- movq %r9, %r11
- salq $4, %r11
- cmpq %rcx, %rdi
- setae %cl
- cmpq %r8, %rsi
- setae %r8b
- orl %r8d, %ecx
- cmpq $15, %rdx
- seta %r8b
- testb %r8b, %cl
- je .L21
- testq %r11, %r11
- je .L21
- xorl %ecx, %ecx
- xorl %r8d, %r8d
-.L7:
- movdqu (%rsi,%rcx), %xmm8
- addq $1, %r8
- movdqu %xmm8, (%rdi,%rcx)
- addq $16, %rcx
- cmpq %r8, %r9
- ja .L7
- cmpq %r11, %rdx
- je L(return)
-.L21:
- movzbl (%rsi,%r11), %ecx
- movb %cl, (%rdi,%r11)
- addq $1, %r11
- cmpq %r11, %rdx
- ja .L21
- ret
L(less_16):
testb $24, %dl
jne L(between_9_16)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=25c8f055ab723ca457c3fa4bcd8db3bbfed62158
commit 25c8f055ab723ca457c3fa4bcd8db3bbfed62158
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 14:37:05 2016 -0800
Add __mempcpy_chk_sse2_unaligned_1
Add __mempcpy_chk_sse2_unaligned_1, __mempcpy_sse2_unaligned_1 and
__memcpy_chk_sse2_unaligned_1.
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
index 5f514e7..05ed749 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
@@ -22,9 +22,29 @@
#include "asm-syntax.h"
+ENTRY (__mempcpy_chk_sse2_unaligned_1)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__mempcpy_chk_sse2_unaligned_1)
+
+ENTRY (__mempcpy_sse2_unaligned_1)
+#if 0
+ lea (%rdi, %rdx), %rax
+#else
+ mov %rdi, %rax
+ add %rdx, %rax
+#endif
+ jmp L(start)
+END (__mempcpy_sse2_unaligned_1)
+
+ENTRY (__memcpy_chk_sse2_unaligned_1)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memcpy_chk_sse2_unaligned_1)
ENTRY(__memcpy_sse2_unaligned_1)
movq %rdi, %rax
+L(start):
movq %rsi, %r11
leaq (%rdx,%rdx), %rcx
subq %rdi, %r11
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=da5979306cac0f1955962f6459934db4667e0eb4
commit da5979306cac0f1955962f6459934db4667e0eb4
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 13:46:54 2016 -0800
Don't use rax as temp
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
index 029216d..5f514e7 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
@@ -24,11 +24,12 @@
ENTRY(__memcpy_sse2_unaligned_1)
- movq %rsi, %rax
+ movq %rdi, %rax
+ movq %rsi, %r11
leaq (%rdx,%rdx), %rcx
- subq %rdi, %rax
- subq %rdx, %rax
- cmpq %rcx, %rax
+ subq %rdi, %r11
+ subq %rdx, %r11
+ cmpq %rcx, %r11
jb L(overlapping)
cmpq $16, %rdx
jbe L(less_16)
@@ -39,7 +40,6 @@ ENTRY(__memcpy_sse2_unaligned_1)
movdqu %xmm8, -16(%rdi,%rdx)
ja .L31
L(return):
- movq %rdi, %rax
ret
.p2align 4,,10
.p2align 4
@@ -64,16 +64,16 @@ L(return):
addq %rdi, %rdx
andq $-64, %rdx
andq $-64, %rcx
- movq %rcx, %rax
- subq %rdi, %rax
- addq %rax, %rsi
+ movq %rcx, %r11
+ subq %rdi, %r11
+ addq %r11, %rsi
cmpq %rdx, %rcx
je L(return)
movq %rsi, %r10
subq %rcx, %r10
leaq 16(%r10), %r9
leaq 32(%r10), %r8
- leaq 48(%r10), %rax
+ leaq 48(%r10), %r11
.p2align 4,,10
.p2align 4
L(loop):
@@ -83,12 +83,12 @@ L(loop):
movdqa %xmm8, 16(%rcx)
movdqu (%rcx,%r8), %xmm8
movdqa %xmm8, 32(%rcx)
- movdqu (%rcx,%rax), %xmm8
+ movdqu (%rcx,%r11), %xmm8
movdqa %xmm8, 48(%rcx)
addq $64, %rcx
cmpq %rcx, %rdx
jne L(loop)
- jmp L(return)
+ ret
L(overlapping):
testq %rdx, %rdx
.p2align 4,,5
@@ -97,8 +97,8 @@ L(overlapping):
leaq 16(%rsi), %rcx
leaq 16(%rdi), %r8
shrq $4, %r9
- movq %r9, %rax
- salq $4, %rax
+ movq %r9, %r11
+ salq $4, %r11
cmpq %rcx, %rdi
setae %cl
cmpq %r8, %rsi
@@ -107,9 +107,9 @@ L(overlapping):
cmpq $15, %rdx
seta %r8b
testb %r8b, %cl
- je .L16
- testq %rax, %rax
- je .L16
+ je .L21
+ testq %r11, %r11
+ je .L21
xorl %ecx, %ecx
xorl %r8d, %r8d
.L7:
@@ -119,15 +119,15 @@ L(overlapping):
addq $16, %rcx
cmpq %r8, %r9
ja .L7
- cmpq %rax, %rdx
+ cmpq %r11, %rdx
je L(return)
.L21:
- movzbl (%rsi,%rax), %ecx
- movb %cl, (%rdi,%rax)
- addq $1, %rax
- cmpq %rax, %rdx
+ movzbl (%rsi,%r11), %ecx
+ movb %cl, (%rdi,%r11)
+ addq $1, %r11
+ cmpq %r11, %rdx
ja .L21
- jmp L(return)
+ ret
L(less_16):
testb $24, %dl
jne L(between_9_16)
@@ -137,28 +137,25 @@ L(less_16):
testq %rdx, %rdx
.p2align 4,,2
je L(return)
- movzbl (%rsi), %eax
+ movzbl (%rsi), %ecx
testb $2, %dl
- movb %al, (%rdi)
+ movb %cl, (%rdi)
je L(return)
- movzwl -2(%rsi,%rdx), %eax
- movw %ax, -2(%rdi,%rdx)
- jmp L(return)
+ movzwl -2(%rsi,%rdx), %ecx
+ movw %cx, -2(%rdi,%rdx)
+ ret
L(between_9_16):
- movq (%rsi), %rax
- movq %rax, (%rdi)
- movq -8(%rsi,%rdx), %rax
- movq %rax, -8(%rdi,%rdx)
- jmp L(return)
-.L16:
- xorl %eax, %eax
- jmp .L21
+ movq (%rsi), %rcx
+ movq %rcx, (%rdi)
+ movq -8(%rsi,%rdx), %rcx
+ movq %rcx, -8(%rdi,%rdx)
+ ret
L(between_5_8):
- movl (%rsi), %eax
- movl %eax, (%rdi)
- movl -4(%rsi,%rdx), %eax
- movl %eax, -4(%rdi,%rdx)
- jmp L(return)
+ movl (%rsi), %ecx
+ movl %ecx, (%rdi)
+ movl -4(%rsi,%rdx), %ecx
+ movl %ecx, -4(%rdi,%rdx)
+ ret
END(__memcpy_sse2_unaligned_1)
#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0741d6a44fab52168473a0e92b1b75cc573dd3aa
commit 0741d6a44fab52168473a0e92b1b75cc573dd3aa
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 14:16:32 2016 -0800
Remove .L3
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
index fdb4c30..029216d 100644
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
@@ -90,8 +90,6 @@ L(loop):
jne L(loop)
jmp L(return)
L(overlapping):
- cmpq %rsi, %rdi
- jae .L3
testq %rdx, %rdx
.p2align 4,,5
je L(return)
@@ -146,15 +144,6 @@ L(less_16):
movzwl -2(%rsi,%rdx), %eax
movw %ax, -2(%rdi,%rdx)
jmp L(return)
-.L3:
- leaq -1(%rdx), %rax
- .p2align 4,,10
- .p2align 4
-.L11:
- movzbl (%rsi,%rax), %edx
- movb %dl, (%rdi,%rax)
- subq $1, %rax
- jmp .L11
L(between_9_16):
movq (%rsi), %rax
movq %rax, (%rdi)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=bcd6945ef33fd2ad4e143d8f56e263bbbffc06b3
commit bcd6945ef33fd2ad4e143d8f56e263bbbffc06b3
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 13:43:25 2016 -0800
Add memcpy-sse2-unaligned-1.S
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
new file mode 100644
index 0000000..fdb4c30
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned-1.S
@@ -0,0 +1,175 @@
+/* memcpy with unaliged loads
+ Copyright (C) 2013-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+#include <sysdep.h>
+
+#include "asm-syntax.h"
+
+
+ENTRY(__memcpy_sse2_unaligned_1)
+ movq %rsi, %rax
+ leaq (%rdx,%rdx), %rcx
+ subq %rdi, %rax
+ subq %rdx, %rax
+ cmpq %rcx, %rax
+ jb L(overlapping)
+ cmpq $16, %rdx
+ jbe L(less_16)
+ movdqu (%rsi), %xmm8
+ cmpq $32, %rdx
+ movdqu %xmm8, (%rdi)
+ movdqu -16(%rsi,%rdx), %xmm8
+ movdqu %xmm8, -16(%rdi,%rdx)
+ ja .L31
+L(return):
+ movq %rdi, %rax
+ ret
+ .p2align 4,,10
+ .p2align 4
+.L31:
+ movdqu 16(%rsi), %xmm8
+ cmpq $64, %rdx
+ movdqu %xmm8, 16(%rdi)
+ movdqu -32(%rsi,%rdx), %xmm8
+ movdqu %xmm8, -32(%rdi,%rdx)
+ jbe L(return)
+ movdqu 32(%rsi), %xmm8
+ cmpq $128, %rdx
+ movdqu %xmm8, 32(%rdi)
+ movdqu -48(%rsi,%rdx), %xmm8
+ movdqu %xmm8, -48(%rdi,%rdx)
+ movdqu 48(%rsi), %xmm8
+ movdqu %xmm8, 48(%rdi)
+ movdqu -64(%rsi,%rdx), %xmm8
+ movdqu %xmm8, -64(%rdi,%rdx)
+ jbe L(return)
+ leaq 64(%rdi), %rcx
+ addq %rdi, %rdx
+ andq $-64, %rdx
+ andq $-64, %rcx
+ movq %rcx, %rax
+ subq %rdi, %rax
+ addq %rax, %rsi
+ cmpq %rdx, %rcx
+ je L(return)
+ movq %rsi, %r10
+ subq %rcx, %r10
+ leaq 16(%r10), %r9
+ leaq 32(%r10), %r8
+ leaq 48(%r10), %rax
+ .p2align 4,,10
+ .p2align 4
+L(loop):
+ movdqu (%rcx,%r10), %xmm8
+ movdqa %xmm8, (%rcx)
+ movdqu (%rcx,%r9), %xmm8
+ movdqa %xmm8, 16(%rcx)
+ movdqu (%rcx,%r8), %xmm8
+ movdqa %xmm8, 32(%rcx)
+ movdqu (%rcx,%rax), %xmm8
+ movdqa %xmm8, 48(%rcx)
+ addq $64, %rcx
+ cmpq %rcx, %rdx
+ jne L(loop)
+ jmp L(return)
+L(overlapping):
+ cmpq %rsi, %rdi
+ jae .L3
+ testq %rdx, %rdx
+ .p2align 4,,5
+ je L(return)
+ movq %rdx, %r9
+ leaq 16(%rsi), %rcx
+ leaq 16(%rdi), %r8
+ shrq $4, %r9
+ movq %r9, %rax
+ salq $4, %rax
+ cmpq %rcx, %rdi
+ setae %cl
+ cmpq %r8, %rsi
+ setae %r8b
+ orl %r8d, %ecx
+ cmpq $15, %rdx
+ seta %r8b
+ testb %r8b, %cl
+ je .L16
+ testq %rax, %rax
+ je .L16
+ xorl %ecx, %ecx
+ xorl %r8d, %r8d
+.L7:
+ movdqu (%rsi,%rcx), %xmm8
+ addq $1, %r8
+ movdqu %xmm8, (%rdi,%rcx)
+ addq $16, %rcx
+ cmpq %r8, %r9
+ ja .L7
+ cmpq %rax, %rdx
+ je L(return)
+.L21:
+ movzbl (%rsi,%rax), %ecx
+ movb %cl, (%rdi,%rax)
+ addq $1, %rax
+ cmpq %rax, %rdx
+ ja .L21
+ jmp L(return)
+L(less_16):
+ testb $24, %dl
+ jne L(between_9_16)
+ testb $4, %dl
+ .p2align 4,,5
+ jne L(between_5_8)
+ testq %rdx, %rdx
+ .p2align 4,,2
+ je L(return)
+ movzbl (%rsi), %eax
+ testb $2, %dl
+ movb %al, (%rdi)
+ je L(return)
+ movzwl -2(%rsi,%rdx), %eax
+ movw %ax, -2(%rdi,%rdx)
+ jmp L(return)
+.L3:
+ leaq -1(%rdx), %rax
+ .p2align 4,,10
+ .p2align 4
+.L11:
+ movzbl (%rsi,%rax), %edx
+ movb %dl, (%rdi,%rax)
+ subq $1, %rax
+ jmp .L11
+L(between_9_16):
+ movq (%rsi), %rax
+ movq %rax, (%rdi)
+ movq -8(%rsi,%rdx), %rax
+ movq %rax, -8(%rdi,%rdx)
+ jmp L(return)
+.L16:
+ xorl %eax, %eax
+ jmp .L21
+L(between_5_8):
+ movl (%rsi), %eax
+ movl %eax, (%rdi)
+ movl -4(%rsi,%rdx), %eax
+ movl %eax, -4(%rdi,%rdx)
+ jmp L(return)
+END(__memcpy_sse2_unaligned_1)
+
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a23b58ee49f557d49449f90657c0de9119b0da28
commit a23b58ee49f557d49449f90657c0de9119b0da28
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 13:37:31 2016 -0800
Merge memcpy with mempcpy
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
index 74fed18..4f4bf45 100644
--- a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
@@ -25,11 +25,30 @@
#include "asm-syntax.h"
#ifndef MEMCPY
-# define MEMCPY __memcpy_avx_unaligned
+# define MEMCPY __memcpy_avx_unaligned
# define MEMCPY_CHK __memcpy_chk_avx_unaligned
+# define MEMPCPY __mempcpy_avx_unaligned_1
+# define MEMPCPY_CHK __mempcpy_chk_avx_unaligned_1
#endif
.section .text.avx,"ax",@progbits
+#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+ENTRY (MEMPCPY_CHK)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMPCPY_CHK)
+
+ENTRY (MEMPCPY)
+#if 0
+ lea (%rdi, %rdx), %rax
+#else
+ mov %rdi, %rax
+ add %rdx, %rax
+#endif
+ jmp L(start)
+END (MEMPCPY)
+#endif
+
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@@ -42,6 +61,7 @@ ENTRY (MEMCPY)
#ifdef USE_AS_MEMPCPY
add %rdx, %rax
#endif
+L(start):
cmp $256, %rdx
jae L(256bytesormore)
cmp $16, %dl
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
index 3d567fc..3a57b73 100644
--- a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
@@ -27,9 +27,28 @@
#ifndef MEMCPY
# define MEMCPY __memcpy_avx512_no_vzeroupper
# define MEMCPY_CHK __memcpy_chk_avx512_no_vzeroupper
+# define MEMPCPY __mempcpy_avx512_no_vzeroupper_1
+# define MEMPCPY_CHK __mempcpy_chk_avx512_no_vzeroupper_1
#endif
.section .text.avx512,"ax",@progbits
+#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+ENTRY (MEMPCPY_CHK)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMPCPY_CHK)
+
+ENTRY (MEMPCPY)
+#if 0
+ lea (%rdi, %rdx), %rax
+#else
+ mov %rdi, %rax
+ add %rdx, %rax
+#endif
+ jmp L(start)
+END (MEMPCPY)
+#endif
+
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@@ -42,6 +61,7 @@ ENTRY (MEMCPY)
#ifdef USE_AS_MEMPCPY
add %rdx, %rax
#endif
+L(start):
lea (%rsi, %rdx), %rcx
lea (%rdi, %rdx), %r9
cmp $512, %rdx
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index 08b41e9..6184e4e 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -29,6 +29,8 @@
#ifndef MEMCPY
# define MEMCPY __memcpy_ssse3_back
# define MEMCPY_CHK __memcpy_chk_ssse3_back
+# define MEMPCPY __mempcpy_ssse3_back_1
+# define MEMPCPY_CHK __mempcpy_chk_ssse3_back_1
#endif
#define JMPTBL(I, B) I - B
@@ -44,6 +46,23 @@
ud2
.section .text.ssse3,"ax",@progbits
+#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+ENTRY (MEMPCPY_CHK)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMPCPY_CHK)
+
+ENTRY (MEMPCPY)
+#if 0
+ lea (%rdi, %rdx), %rax
+#else
+ mov %rdi, %rax
+ add %rdx, %rax
+#endif
+ jmp L(start)
+END (MEMPCPY)
+#endif
+
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@@ -66,6 +85,7 @@ ENTRY (MEMCPY)
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
L(copy_forward):
#endif
+L(start):
cmp $144, %rdx
jae L(144bytesormore)
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 95de969..127afaa 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -29,6 +29,8 @@
#ifndef MEMCPY
# define MEMCPY __memcpy_ssse3
# define MEMCPY_CHK __memcpy_chk_ssse3
+# define MEMPCPY __mempcpy_ssse3_1
+# define MEMPCPY_CHK __mempcpy_chk_ssse3_1
#endif
#define JMPTBL(I, B) I - B
@@ -44,6 +46,23 @@
ud2
.section .text.ssse3,"ax",@progbits
+#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+ENTRY (MEMPCPY_CHK)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMPCPY_CHK)
+
+ENTRY (MEMPCPY)
+#if 0
+ lea (%rdi, %rdx), %rax
+#else
+ mov %rdi, %rax
+ add %rdx, %rax
+#endif
+ jmp L(start)
+END (MEMPCPY)
+#endif
+
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@@ -66,6 +85,7 @@ ENTRY (MEMCPY)
jmp L(copy_backward)
L(copy_forward):
#endif
+L(start):
cmp $79, %rdx
lea L(table_less_80bytes)(%rip), %r11
ja L(80bytesormore)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=8f263a30a3fb1f14cd9fa243546a3e208dec30db
commit 8f263a30a3fb1f14cd9fa243546a3e208dec30db
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Mar 6 10:44:14 2016 -0800
Group AVX512 functions in .text.avx512 section
* sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S:
Replace .text with .text.avx512.
* sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S:
Likewise.
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
index 1bb12e8..3d567fc 100644
--- a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
@@ -29,7 +29,7 @@
# define MEMCPY_CHK __memcpy_chk_avx512_no_vzeroupper
#endif
- .section .text,"ax",@progbits
+ .section .text.avx512,"ax",@progbits
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
index 1e638d7..eab8c5a 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
@@ -26,7 +26,7 @@
# define MEMSET_CHK __memset_chk_avx512_no_vzeroupper
#endif
- .section .text,"ax",@progbits
+ .section .text.avx512,"ax",@progbits
#if defined PIC
ENTRY (MEMSET_CHK)
cmpq %rdx, %rcx
-----------------------------------------------------------------------
hooks/post-receive
--
GNU C Library master sources