|
|
| 61 |
#ifdef USE_AS_MEMPCPY |
61 |
#ifdef USE_AS_MEMPCPY |
| 62 |
add %rdx, %rax |
62 |
add %rdx, %rax |
| 63 |
#endif |
63 |
#endif |
| 64 |
|
64 |
/* |
| 65 |
#ifdef USE_AS_MEMMOVE |
65 |
* The small cases we can do without checking for any |
| 66 |
cmp %rsi, %rdi |
66 |
* overlap at all, since we do them as all loads followed |
| 67 |
jb L(copy_forward) |
67 |
* by all stores. |
| 68 |
je L(write_0bytes) |
68 |
* |
| 69 |
cmp $79, %rdx |
69 |
* So just jump through the less-than-80bytes table. |
| 70 |
jbe L(copy_forward) |
70 |
*/ |
| 71 |
jmp L(copy_backward) |
71 |
cmp $79,%rdx |
| 72 |
L(copy_forward): |
72 |
lea L(table_less_80bytes)(%rip), %r11 |
| 73 |
#endif |
|
|
| 74 |
cmp $79, %rdx |
| 75 |
lea L(table_less_80bytes)(%rip), %r11 |
| 76 |
ja L(80bytesormore) |
73 |
ja L(80bytesormore) |
| 77 |
movslq (%r11, %rdx, 4), %r9 |
74 |
movslq (%r11, %rdx, 4), %r9 |
| 78 |
add %rdx, %rsi |
75 |
add %rdx, %rsi |
|
|
| 81 |
jmp *%r9 |
78 |
jmp *%r9 |
| 82 |
ud2 |
79 |
ud2 |
| 83 |
|
80 |
|
| 84 |
ALIGN (4) |
81 |
/* |
|
|
82 |
* For the 80+ byte cases we need to check overlap |
| 83 |
*/ |
| 85 |
L(80bytesormore): |
84 |
L(80bytesormore): |
| 86 |
#ifndef USE_AS_MEMMOVE |
85 |
lea (%rsi,%rdx),%r9 |
|
|
86 |
lea (%rdi,%rdx),%r11 |
| 87 |
cmp %rdi,%r9 /* dest start >= source end */ |
| 88 |
jae L(nonoverlap) /* -> nonoverlapping */ |
| 89 |
cmp %rsi,%r11 /* source start >= destination end */ |
| 90 |
jae L(nonoverlap) /* -> nonoverlapping */ |
| 91 |
cmp %rsi, %rdi /* overlap: */ |
| 92 |
jb L(copy_forward) /* source < dest: forward copy */ |
| 93 |
je L(write_0bytes) /* source == dest: no copy */ |
| 94 |
jmp L(copy_backward) /* source > dest: backward copy */ |
| 95 |
L(nonoverlap): |
| 87 |
cmp %dil, %sil |
96 |
cmp %dil, %sil |
| 88 |
jle L(copy_backward) |
97 |
jle L(copy_backward) |
| 89 |
#endif |
98 |
L(copy_forward): |
| 90 |
|
|
|
| 91 |
movdqu (%rsi), %xmm0 |
99 |
movdqu (%rsi), %xmm0 |
| 92 |
mov %rdi, %rcx |
100 |
mov %rdi, %rcx |
| 93 |
and $-16, %rdi |
101 |
and $-16, %rdi |
|
|
| 2805 |
movntdq %xmm1, (%rdi) |
2813 |
movntdq %xmm1, (%rdi) |
| 2806 |
lea 16(%rdi), %rdi |
2814 |
lea 16(%rdi), %rdi |
| 2807 |
lea -0x90(%rdx), %rdx |
2815 |
lea -0x90(%rdx), %rdx |
| 2808 |
#ifdef USE_AS_MEMMOVE |
|
|
| 2809 |
mov %rsi, %r9 |
2816 |
mov %rsi, %r9 |
| 2810 |
sub %rdi, %r9 |
2817 |
sub %rdi, %r9 |
| 2811 |
cmp %rdx, %r9 |
2818 |
cmp %rdx, %r9 |
|
|
| 2814 |
cmp %rcx, %rdx |
2821 |
cmp %rcx, %rdx |
| 2815 |
jb L(ll_cache_copy_fwd_start) |
2822 |
jb L(ll_cache_copy_fwd_start) |
| 2816 |
L(memmove_is_memcpy_fwd): |
2823 |
L(memmove_is_memcpy_fwd): |
| 2817 |
#endif |
|
|
| 2818 |
L(large_page_loop): |
2824 |
L(large_page_loop): |
| 2819 |
movdqu (%rsi), %xmm0 |
2825 |
movdqu (%rsi), %xmm0 |
| 2820 |
movdqu 0x10(%rsi), %xmm1 |
2826 |
movdqu 0x10(%rsi), %xmm1 |
|
Lines 2859-2865
L(large_page_less_64bytes):
|
Link Here
|
|---|
|
| 2859 |
sfence |
2865 |
sfence |
| 2860 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
2866 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
| 2861 |
|
2867 |
|
| 2862 |
#ifdef USE_AS_MEMMOVE |
|
|
| 2863 |
ALIGN (4) |
2868 |
ALIGN (4) |
| 2864 |
L(ll_cache_copy_fwd_start): |
2869 |
L(ll_cache_copy_fwd_start): |
| 2865 |
prefetcht0 0x1c0(%rsi) |
2870 |
prefetcht0 0x1c0(%rsi) |
|
Lines 2906-2912
L(large_page_ll_less_fwd_64bytes):
|
Link Here
|
|---|
|
| 2906 |
add %rdx, %rdi |
2911 |
add %rdx, %rdi |
| 2907 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
2912 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
| 2908 |
|
2913 |
|
| 2909 |
#endif |
|
|
| 2910 |
ALIGN (4) |
2914 |
ALIGN (4) |
| 2911 |
L(large_page_bwd): |
2915 |
L(large_page_bwd): |
| 2912 |
movdqu -0x10(%rsi), %xmm1 |
2916 |
movdqu -0x10(%rsi), %xmm1 |
|
|
| 2915 |
movdqa %xmm1, -0x10(%rdi) |
2919 |
movdqa %xmm1, -0x10(%rdi) |
| 2916 |
lea -16(%rdi), %rdi |
2920 |
lea -16(%rdi), %rdi |
| 2917 |
lea -0x90(%rdx), %rdx |
2921 |
lea -0x90(%rdx), %rdx |
| 2918 |
#ifdef USE_AS_MEMMOVE |
|
|
| 2919 |
mov %rdi, %r9 |
2922 |
mov %rdi, %r9 |
| 2920 |
sub %rsi, %r9 |
2923 |
sub %rsi, %r9 |
| 2921 |
cmp %rdx, %r9 |
2924 |
cmp %rdx, %r9 |
|
|
| 2923 |
cmp %rcx, %r9 |
2926 |
cmp %rcx, %r9 |
| 2924 |
jb L(ll_cache_copy_bwd_start) |
2927 |
jb L(ll_cache_copy_bwd_start) |
| 2925 |
L(memmove_is_memcpy_bwd): |
2928 |
L(memmove_is_memcpy_bwd): |
| 2926 |
#endif |
|
|
| 2927 |
L(large_page_bwd_loop): |
2929 |
L(large_page_bwd_loop): |
| 2928 |
movdqu -0x10(%rsi), %xmm0 |
2930 |
movdqu -0x10(%rsi), %xmm0 |
| 2929 |
movdqu -0x20(%rsi), %xmm1 |
2931 |
movdqu -0x20(%rsi), %xmm1 |
|
Lines 2966-2972
L(large_page_less_bwd_64bytes):
|
Link Here
|
|---|
|
| 2966 |
sfence |
2968 |
sfence |
| 2967 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
2969 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
| 2968 |
|
2970 |
|
| 2969 |
#ifdef USE_AS_MEMMOVE |
|
|
| 2970 |
ALIGN (4) |
2971 |
ALIGN (4) |
| 2971 |
L(ll_cache_copy_bwd_start): |
2972 |
L(ll_cache_copy_bwd_start): |
| 2972 |
prefetcht0 -0x1c0(%rsi) |
2973 |
prefetcht0 -0x1c0(%rsi) |
|
Lines 3010-3016
L(ll_cache_copy_bwd_start):
|
Link Here
|
|---|
|
| 3010 |
sub $0x40, %rdx |
3011 |
sub $0x40, %rdx |
| 3011 |
L(large_page_ll_less_bwd_64bytes): |
3012 |
L(large_page_ll_less_bwd_64bytes): |
| 3012 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
3013 |
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4) |
| 3013 |
#endif |
|
|
| 3014 |
|
3014 |
|
| 3015 |
END (MEMCPY) |
3015 |
END (MEMCPY) |
| 3016 |
|
3016 |
|