Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/lib/memmove_64.S
10817 views
1
/*
2
* Normally compiler builtins are used, but sometimes the compiler calls out
3
* of line code. Based on asm-i386/string.h.
4
*
5
* This assembly file is re-written from memmove_64.c file.
6
* - Copyright 2011 Fenghua Yu <[email protected]>
7
*/
8
#define _STRING_C
9
#include <linux/linkage.h>
10
#include <asm/dwarf2.h>
11
#include <asm/cpufeature.h>
12
13
#undef memmove
14
15
/*
16
* Implement memmove(). This can handle overlap between src and dst.
17
*
18
* Input:
19
* rdi: dest
20
* rsi: src
21
* rdx: count
22
*
23
* Output:
24
* rax: dest
25
*/
26
ENTRY(memmove)
27
CFI_STARTPROC
28
29
/* Handle more 32bytes in loop */
30
mov %rdi, %rax
31
cmp $0x20, %rdx
32
jb 1f
33
34
/* Decide forward/backward copy mode */
35
cmp %rdi, %rsi
36
jge .Lmemmove_begin_forward
37
mov %rsi, %r8
38
add %rdx, %r8
39
cmp %rdi, %r8
40
jg 2f
41
42
.Lmemmove_begin_forward:
43
/*
44
* movsq instruction have many startup latency
45
* so we handle small size by general register.
46
*/
47
cmp $680, %rdx
48
jb 3f
49
/*
50
* movsq instruction is only good for aligned case.
51
*/
52
53
cmpb %dil, %sil
54
je 4f
55
3:
56
sub $0x20, %rdx
57
/*
58
* We gobble 32byts forward in each loop.
59
*/
60
5:
61
sub $0x20, %rdx
62
movq 0*8(%rsi), %r11
63
movq 1*8(%rsi), %r10
64
movq 2*8(%rsi), %r9
65
movq 3*8(%rsi), %r8
66
leaq 4*8(%rsi), %rsi
67
68
movq %r11, 0*8(%rdi)
69
movq %r10, 1*8(%rdi)
70
movq %r9, 2*8(%rdi)
71
movq %r8, 3*8(%rdi)
72
leaq 4*8(%rdi), %rdi
73
jae 5b
74
addq $0x20, %rdx
75
jmp 1f
76
/*
77
* Handle data forward by movsq.
78
*/
79
.p2align 4
80
4:
81
movq %rdx, %rcx
82
movq -8(%rsi, %rdx), %r11
83
lea -8(%rdi, %rdx), %r10
84
shrq $3, %rcx
85
rep movsq
86
movq %r11, (%r10)
87
jmp 13f
88
.Lmemmove_end_forward:
89
90
/*
91
* Handle data backward by movsq.
92
*/
93
.p2align 4
94
7:
95
movq %rdx, %rcx
96
movq (%rsi), %r11
97
movq %rdi, %r10
98
leaq -8(%rsi, %rdx), %rsi
99
leaq -8(%rdi, %rdx), %rdi
100
shrq $3, %rcx
101
std
102
rep movsq
103
cld
104
movq %r11, (%r10)
105
jmp 13f
106
107
/*
108
* Start to prepare for backward copy.
109
*/
110
.p2align 4
111
2:
112
cmp $680, %rdx
113
jb 6f
114
cmp %dil, %sil
115
je 7b
116
6:
117
/*
118
* Calculate copy position to tail.
119
*/
120
addq %rdx, %rsi
121
addq %rdx, %rdi
122
subq $0x20, %rdx
123
/*
124
* We gobble 32byts backward in each loop.
125
*/
126
8:
127
subq $0x20, %rdx
128
movq -1*8(%rsi), %r11
129
movq -2*8(%rsi), %r10
130
movq -3*8(%rsi), %r9
131
movq -4*8(%rsi), %r8
132
leaq -4*8(%rsi), %rsi
133
134
movq %r11, -1*8(%rdi)
135
movq %r10, -2*8(%rdi)
136
movq %r9, -3*8(%rdi)
137
movq %r8, -4*8(%rdi)
138
leaq -4*8(%rdi), %rdi
139
jae 8b
140
/*
141
* Calculate copy position to head.
142
*/
143
addq $0x20, %rdx
144
subq %rdx, %rsi
145
subq %rdx, %rdi
146
1:
147
cmpq $16, %rdx
148
jb 9f
149
/*
150
* Move data from 16 bytes to 31 bytes.
151
*/
152
movq 0*8(%rsi), %r11
153
movq 1*8(%rsi), %r10
154
movq -2*8(%rsi, %rdx), %r9
155
movq -1*8(%rsi, %rdx), %r8
156
movq %r11, 0*8(%rdi)
157
movq %r10, 1*8(%rdi)
158
movq %r9, -2*8(%rdi, %rdx)
159
movq %r8, -1*8(%rdi, %rdx)
160
jmp 13f
161
.p2align 4
162
9:
163
cmpq $8, %rdx
164
jb 10f
165
/*
166
* Move data from 8 bytes to 15 bytes.
167
*/
168
movq 0*8(%rsi), %r11
169
movq -1*8(%rsi, %rdx), %r10
170
movq %r11, 0*8(%rdi)
171
movq %r10, -1*8(%rdi, %rdx)
172
jmp 13f
173
10:
174
cmpq $4, %rdx
175
jb 11f
176
/*
177
* Move data from 4 bytes to 7 bytes.
178
*/
179
movl (%rsi), %r11d
180
movl -4(%rsi, %rdx), %r10d
181
movl %r11d, (%rdi)
182
movl %r10d, -4(%rdi, %rdx)
183
jmp 13f
184
11:
185
cmp $2, %rdx
186
jb 12f
187
/*
188
* Move data from 2 bytes to 3 bytes.
189
*/
190
movw (%rsi), %r11w
191
movw -2(%rsi, %rdx), %r10w
192
movw %r11w, (%rdi)
193
movw %r10w, -2(%rdi, %rdx)
194
jmp 13f
195
12:
196
cmp $1, %rdx
197
jb 13f
198
/*
199
* Move data for 1 byte.
200
*/
201
movb (%rsi), %r11b
202
movb %r11b, (%rdi)
203
13:
204
retq
205
CFI_ENDPROC
206
207
.section .altinstr_replacement,"ax"
208
.Lmemmove_begin_forward_efs:
209
/* Forward moving data. */
210
movq %rdx, %rcx
211
rep movsb
212
retq
213
.Lmemmove_end_forward_efs:
214
.previous
215
216
.section .altinstructions,"a"
217
.align 8
218
.quad .Lmemmove_begin_forward
219
.quad .Lmemmove_begin_forward_efs
220
.word X86_FEATURE_ERMS
221
.byte .Lmemmove_end_forward-.Lmemmove_begin_forward
222
.byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
223
.previous
224
ENDPROC(memmove)
225
226