View | Details | Raw Unified | Return to bug 12518 | Differences between
and this patch

Collapse All | Expand All

(-)a/sysdeps/x86_64/multiarch/memcpy-ssse3.S (-24 / +24 lines)
Lines 61-78 ENTRY (MEMCPY) Link Here
61
#ifdef USE_AS_MEMPCPY
61
#ifdef USE_AS_MEMPCPY
62
	add	%rdx, %rax
62
	add	%rdx, %rax
63
#endif
63
#endif
64
64
	/*
65
#ifdef USE_AS_MEMMOVE
65
	 * The small cases we can do without checking for any
66
	cmp	%rsi, %rdi
66
	 * overlap at all, since we do them as all loads followed
67
	jb	L(copy_forward)
67
	 * by all stores.
68
	je	L(write_0bytes)
68
	 *
69
	cmp	$79, %rdx
69
	 * So just jump through the less-than-80bytes table.
70
	jbe	L(copy_forward)
70
	 */
71
	jmp	L(copy_backward)
71
	cmp	$79,%rdx
72
L(copy_forward):
72
	lea	L(table_less_80bytes)(%rip), %r11
73
#endif
74
	cmp	$79, %rdx
75
	lea     L(table_less_80bytes)(%rip), %r11
76
	ja	L(80bytesormore)
73
	ja	L(80bytesormore)
77
	movslq	(%r11, %rdx, 4), %r9
74
	movslq	(%r11, %rdx, 4), %r9
78
	add	%rdx, %rsi
75
	add	%rdx, %rsi
Lines 81-93 L(copy_forward): Link Here
81
	jmp	*%r9
78
	jmp	*%r9
82
	ud2
79
	ud2
83
80
84
	ALIGN (4)
81
	/*
82
	 * For the 80+ byte cases we need to check overlap
83
	 */
85
L(80bytesormore):
84
L(80bytesormore):
86
#ifndef USE_AS_MEMMOVE
85
	lea	(%rsi,%rdx),%r9
86
	lea	(%rdi,%rdx),%r11
87
	cmp	%rdi,%r9		/* dest start >= source end */
88
	jae	L(nonoverlap)		/*  -> nonoverlapping */
89
	cmp	%rsi,%r11		/* source start >= destination end */
90
	jae	L(nonoverlap)		/*  -> nonoverlapping */
91
	cmp	%rsi, %rdi		/* overlap: */
92
	jb	L(copy_forward)		/* source < dest: forward copy */
93
	je	L(write_0bytes)		/* source == dest: no copy */
94
	jmp	L(copy_backward)	/* source > dest: backward copy */
95
L(nonoverlap):
87
	cmp	%dil, %sil
96
	cmp	%dil, %sil
88
	jle	L(copy_backward)
97
	jle	L(copy_backward)
89
#endif
98
L(copy_forward):
90
91
	movdqu	(%rsi), %xmm0
99
	movdqu	(%rsi), %xmm0
92
	mov	%rdi, %rcx
100
	mov	%rdi, %rcx
93
	and	$-16, %rdi
101
	and	$-16, %rdi
Lines 2805-2811 L(large_page_fwd): Link Here
2805
	movntdq	%xmm1, (%rdi)
2813
	movntdq	%xmm1, (%rdi)
2806
	lea	16(%rdi), %rdi
2814
	lea	16(%rdi), %rdi
2807
	lea	-0x90(%rdx), %rdx
2815
	lea	-0x90(%rdx), %rdx
2808
#ifdef USE_AS_MEMMOVE
2809
	mov	%rsi, %r9
2816
	mov	%rsi, %r9
2810
	sub	%rdi, %r9
2817
	sub	%rdi, %r9
2811
	cmp	%rdx, %r9
2818
	cmp	%rdx, %r9
Lines 2814-2820 L(large_page_fwd): Link Here
2814
	cmp	%rcx, %rdx
2821
	cmp	%rcx, %rdx
2815
	jb	L(ll_cache_copy_fwd_start)
2822
	jb	L(ll_cache_copy_fwd_start)
2816
L(memmove_is_memcpy_fwd):
2823
L(memmove_is_memcpy_fwd):
2817
#endif
2818
L(large_page_loop):
2824
L(large_page_loop):
2819
	movdqu	(%rsi), %xmm0
2825
	movdqu	(%rsi), %xmm0
2820
	movdqu	0x10(%rsi), %xmm1
2826
	movdqu	0x10(%rsi), %xmm1
Lines 2859-2865 L(large_page_less_64bytes): Link Here
2859
	sfence
2865
	sfence
2860
	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
2866
	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
2861
2867
2862
#ifdef USE_AS_MEMMOVE
2863
	ALIGN (4)
2868
	ALIGN (4)
2864
L(ll_cache_copy_fwd_start):
2869
L(ll_cache_copy_fwd_start):
2865
	prefetcht0 0x1c0(%rsi)
2870
	prefetcht0 0x1c0(%rsi)
Lines 2906-2912 L(large_page_ll_less_fwd_64bytes): Link Here
2906
	add	%rdx, %rdi
2911
	add	%rdx, %rdi
2907
	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
2912
	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
2908
2913
2909
#endif
2910
	ALIGN (4)
2914
	ALIGN (4)
2911
L(large_page_bwd):
2915
L(large_page_bwd):
2912
	movdqu	-0x10(%rsi), %xmm1
2916
	movdqu	-0x10(%rsi), %xmm1
Lines 2915-2921 L(large_page_bwd): Link Here
2915
	movdqa	%xmm1, -0x10(%rdi)
2919
	movdqa	%xmm1, -0x10(%rdi)
2916
	lea	-16(%rdi), %rdi
2920
	lea	-16(%rdi), %rdi
2917
	lea	-0x90(%rdx), %rdx
2921
	lea	-0x90(%rdx), %rdx
2918
#ifdef USE_AS_MEMMOVE
2919
	mov	%rdi, %r9
2922
	mov	%rdi, %r9
2920
	sub	%rsi, %r9
2923
	sub	%rsi, %r9
2921
	cmp	%rdx, %r9
2924
	cmp	%rdx, %r9
Lines 2923-2929 L(large_page_bwd): Link Here
2923
	cmp	%rcx, %r9
2926
	cmp	%rcx, %r9
2924
	jb	L(ll_cache_copy_bwd_start)
2927
	jb	L(ll_cache_copy_bwd_start)
2925
L(memmove_is_memcpy_bwd):
2928
L(memmove_is_memcpy_bwd):
2926
#endif
2927
L(large_page_bwd_loop):
2929
L(large_page_bwd_loop):
2928
	movdqu	-0x10(%rsi), %xmm0
2930
	movdqu	-0x10(%rsi), %xmm0
2929
	movdqu	-0x20(%rsi), %xmm1
2931
	movdqu	-0x20(%rsi), %xmm1
Lines 2966-2972 L(large_page_less_bwd_64bytes): Link Here
2966
	sfence
2968
	sfence
2967
	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
2969
	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
2968
2970
2969
#ifdef USE_AS_MEMMOVE
2970
	ALIGN (4)
2971
	ALIGN (4)
2971
L(ll_cache_copy_bwd_start):
2972
L(ll_cache_copy_bwd_start):
2972
	prefetcht0 -0x1c0(%rsi)
2973
	prefetcht0 -0x1c0(%rsi)
Lines 3010-3016 L(ll_cache_copy_bwd_start): Link Here
3010
	sub	$0x40, %rdx
3011
	sub	$0x40, %rdx
3011
L(large_page_ll_less_bwd_64bytes):
3012
L(large_page_ll_less_bwd_64bytes):
3012
	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
3013
	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
3013
#endif
3014
3014
3015
END (MEMCPY)
3015
END (MEMCPY)
3016
3016
(-)a/sysdeps/x86_64/multiarch/memmove-ssse3.S (-4 / +4 lines)
Lines 1-4 Link Here
1
#define USE_AS_MEMMOVE
1
#include <sysdep.h>
2
#define MEMCPY		__memmove_ssse3
2
3
#define MEMCPY_CHK	__memmove_chk_ssse3
3
strong_alias(__memmove_ssse3, __memcpy_ssse3);
4
#include "memcpy-ssse3.S"
4
strong_alias(__memmove_chk_ssse3, __memcpy_chk_ssse3);

Return to bug 12518