Lines 61-78
ENTRY (MEMCPY)
Link Here
|
61 |
#ifdef USE_AS_MEMPCPY |
61 |
#ifdef USE_AS_MEMPCPY |
62 |
add %rdx, %rax |
62 |
add %rdx, %rax |
63 |
#endif |
63 |
#endif |
64 |
|
64 |
/* |
65 |
#ifdef USE_AS_MEMMOVE |
65 |
* The small cases we can do without checking for any |
66 |
cmp %rsi, %rdi |
66 |
* overlap at all, since we do them as all loads followed |
67 |
jb L(copy_forward) |
67 |
* by all stores. |
68 |
je L(write_0bytes) |
68 |
* |
69 |
cmp $79, %rdx |
69 |
* So just jump through the less-than-80bytes table. |
70 |
jbe L(copy_forward) |
70 |
*/ |
71 |
jmp L(copy_backward) |
71 |
cmp $79,%rdx |
72 |
L(copy_forward): |
72 |
lea L(table_less_80bytes)(%rip), %r11 |
73 |
#endif |
|
|
74 |
cmp $79, %rdx |
75 |
lea L(table_less_80bytes)(%rip), %r11 |
76 |
ja L(80bytesormore) |
73 |
ja L(80bytesormore) |
77 |
movslq (%r11, %rdx, 4), %r9 |
74 |
movslq (%r11, %rdx, 4), %r9 |
78 |
add %rdx, %rsi |
75 |
add %rdx, %rsi |
Lines 81-93
L(copy_forward):
Link Here
|
81 |
jmp *%r9 |
78 |
jmp *%r9 |
82 |
ud2 |
79 |
ud2 |
83 |
|
80 |
|
84 |
ALIGN (4) |
81 |
/* |
|
|
82 |
* For the 80+ byte cases we need to check overlap |
83 |
*/ |
85 |
L(80bytesormore): |
84 |
L(80bytesormore): |
86 |
#ifndef USE_AS_MEMMOVE |
85 |
lea (%rsi,%rdx),%r9 |
|
|
86 |
lea (%rdi,%rdx),%r11 |
87 |
cmp %rdi,%r9 /* dest start >= source end */ |
88 |
jae L(nonoverlap) /* -> nonoverlapping */ |
89 |
cmp %rsi,%r11 /* source start >= destination end */ |
90 |
jae L(nonoverlap) /* -> nonoverlapping */ |
91 |
cmp %rsi, %rdi /* overlap: */ |
92 |
jb L(copy_forward) /* source < dest: forward copy */ |
93 |
je L(write_0bytes) /* source == dest: no copy */ |
94 |
jmp L(copy_backward) /* source > dest: backward copy */ |
95 |
L(nonoverlap): |
87 |
cmp %dil, %sil |
96 |
cmp %dil, %sil |
88 |
jle L(copy_backward) |
97 |
jle L(copy_backward) |
89 |
#endif |
98 |
L(copy_forward): |
90 |
|
|
|
91 |
movdqu (%rsi), %xmm0 |
99 |
movdqu (%rsi), %xmm0 |
92 |
mov %rdi, %rcx |
100 |
mov %rdi, %rcx |
93 |
and $-16, %rdi |
101 |
and $-16, %rdi |
Lines 2805-2811
L(large_page_fwd):
Link Here
|
2805 |
movntdq %xmm1, (%rdi) |
2813 |
movntdq %xmm1, (%rdi) |
2806 |
lea 16(%rdi), %rdi |
2814 |
lea 16(%rdi), %rdi |
2807 |
lea -0x90(%rdx), %rdx |
2815 |
lea -0x90(%rdx), %rdx |
2808 |
#ifdef USE_AS_MEMMOVE |
|
|
2809 |
mov %rsi, %r9 |
2816 |
mov %rsi, %r9 |
2810 |
sub %rdi, %r9 |
2817 |
sub %rdi, %r9 |
2811 |
cmp %rdx, %r9 |
2818 |
cmp %rdx, %r9 |
Lines 2814-2820
L(large_page_fwd):
Link Here
|
2814 |
cmp %rcx, %rdx |
2821 |
cmp %rcx, %rdx |
2815 |
jb L(ll_cache_copy_fwd_start) |
2822 |
jb L(ll_cache_copy_fwd_start) |
2816 |
L(memmove_is_memcpy_fwd): |
2823 |
L(memmove_is_memcpy_fwd): |
2817 |
#endif |
|
|
2818 |
L(large_page_loop): |
2824 |
L(large_page_loop): |
2819 |
movdqu (%rsi), %xmm0 |
2825 |
movdqu (%rsi), %xmm0 |
2820 |
movdqu 0x10(%rsi), %xmm1 |
2826 |
movdqu 0x10(%rsi), %xmm1 |
Lines 2859-2865
L(large_page_less_64bytes):
Link Here
|
2859 |
sfence |
2865 |
sfence |
2860 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
2866 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
2861 |
|
2867 |
|
2862 |
#ifdef USE_AS_MEMMOVE |
|
|
2863 |
ALIGN (4) |
2868 |
ALIGN (4) |
2864 |
L(ll_cache_copy_fwd_start): |
2869 |
L(ll_cache_copy_fwd_start): |
2865 |
prefetcht0 0x1c0(%rsi) |
2870 |
prefetcht0 0x1c0(%rsi) |
Lines 2906-2912
L(large_page_ll_less_fwd_64bytes):
Link Here
|
2906 |
add %rdx, %rdi |
2911 |
add %rdx, %rdi |
2907 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
2912 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
2908 |
|
2913 |
|
2909 |
#endif |
|
|
2910 |
ALIGN (4) |
2914 |
ALIGN (4) |
2911 |
L(large_page_bwd): |
2915 |
L(large_page_bwd): |
2912 |
movdqu -0x10(%rsi), %xmm1 |
2916 |
movdqu -0x10(%rsi), %xmm1 |
Lines 2915-2921
L(large_page_bwd):
Link Here
|
2915 |
movdqa %xmm1, -0x10(%rdi) |
2919 |
movdqa %xmm1, -0x10(%rdi) |
2916 |
lea -16(%rdi), %rdi |
2920 |
lea -16(%rdi), %rdi |
2917 |
lea -0x90(%rdx), %rdx |
2921 |
lea -0x90(%rdx), %rdx |
2918 |
#ifdef USE_AS_MEMMOVE |
|
|
2919 |
mov %rdi, %r9 |
2922 |
mov %rdi, %r9 |
2920 |
sub %rsi, %r9 |
2923 |
sub %rsi, %r9 |
2921 |
cmp %rdx, %r9 |
2924 |
cmp %rdx, %r9 |
Lines 2923-2929
L(large_page_bwd):
Link Here
|
2923 |
cmp %rcx, %r9 |
2926 |
cmp %rcx, %r9 |
2924 |
jb L(ll_cache_copy_bwd_start) |
2927 |
jb L(ll_cache_copy_bwd_start) |
2925 |
L(memmove_is_memcpy_bwd): |
2928 |
L(memmove_is_memcpy_bwd): |
2926 |
#endif |
|
|
2927 |
L(large_page_bwd_loop): |
2929 |
L(large_page_bwd_loop): |
2928 |
movdqu -0x10(%rsi), %xmm0 |
2930 |
movdqu -0x10(%rsi), %xmm0 |
2929 |
movdqu -0x20(%rsi), %xmm1 |
2931 |
movdqu -0x20(%rsi), %xmm1 |
Lines 2966-2972
L(large_page_less_bwd_64bytes):
Link Here
|
2966 |
sfence |
2968 |
sfence |
2967 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
2969 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
2968 |
|
2970 |
|
2969 |
#ifdef USE_AS_MEMMOVE |
|
|
2970 |
ALIGN (4) |
2971 |
ALIGN (4) |
2971 |
L(ll_cache_copy_bwd_start): |
2972 |
L(ll_cache_copy_bwd_start): |
2972 |
prefetcht0 -0x1c0(%rsi) |
2973 |
prefetcht0 -0x1c0(%rsi) |
Lines 3010-3016
L(ll_cache_copy_bwd_start):
Link Here
|
3010 |
sub $0x40, %rdx |
3011 |
sub $0x40, %rdx |
3011 |
L(large_page_ll_less_bwd_64bytes): |
3012 |
L(large_page_ll_less_bwd_64bytes): |
3012 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
3013 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
3013 |
#endif |
|
|
3014 |
|
3014 |
|
3015 |
END (MEMCPY) |
3015 |
END (MEMCPY) |
3016 |
|
3016 |
|