Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/lib/memmove_64.S
26451 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* Normally compiler builtins are used, but sometimes the compiler calls out
4
* of line code. Based on asm-i386/string.h.
5
*
6
* This assembly file is re-written from memmove_64.c file.
7
* - Copyright 2011 Fenghua Yu <[email protected]>
8
*/
9
#include <linux/export.h>
10
#include <linux/linkage.h>
11
#include <linux/cfi_types.h>
12
#include <asm/cpufeatures.h>
13
#include <asm/alternative.h>
14
15
#undef memmove
16
17
.section .noinstr.text, "ax"
18
19
/*
20
* Implement memmove(). This can handle overlap between src and dst.
21
*
22
* Input:
23
* rdi: dest
24
* rsi: src
25
* rdx: count
26
*
27
* Output:
28
* rax: dest
29
*/
30
SYM_TYPED_FUNC_START(__memmove)
31
32
mov %rdi, %rax
33
34
/* Decide forward/backward copy mode */
35
cmp %rdi, %rsi
36
jge .Lmemmove_begin_forward
37
mov %rsi, %r8
38
add %rdx, %r8
39
cmp %rdi, %r8
40
jg 2f
41
42
#define CHECK_LEN cmp $0x20, %rdx; jb 1f
43
#define MEMMOVE_BYTES movq %rdx, %rcx; rep movsb; RET
44
.Lmemmove_begin_forward:
45
ALTERNATIVE_2 __stringify(CHECK_LEN), \
46
__stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \
47
__stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM
48
49
/*
50
* movsq instruction have many startup latency
51
* so we handle small size by general register.
52
*/
53
cmp $680, %rdx
54
jb 3f
55
/*
56
* movsq instruction is only good for aligned case.
57
*/
58
59
cmpb %dil, %sil
60
je 4f
61
3:
62
sub $0x20, %rdx
63
/*
64
* We gobble 32 bytes forward in each loop.
65
*/
66
5:
67
sub $0x20, %rdx
68
movq 0*8(%rsi), %r11
69
movq 1*8(%rsi), %r10
70
movq 2*8(%rsi), %r9
71
movq 3*8(%rsi), %r8
72
leaq 4*8(%rsi), %rsi
73
74
movq %r11, 0*8(%rdi)
75
movq %r10, 1*8(%rdi)
76
movq %r9, 2*8(%rdi)
77
movq %r8, 3*8(%rdi)
78
leaq 4*8(%rdi), %rdi
79
jae 5b
80
addq $0x20, %rdx
81
jmp 1f
82
/*
83
* Handle data forward by movsq.
84
*/
85
.p2align 4
86
4:
87
movq %rdx, %rcx
88
movq -8(%rsi, %rdx), %r11
89
lea -8(%rdi, %rdx), %r10
90
shrq $3, %rcx
91
rep movsq
92
movq %r11, (%r10)
93
jmp 13f
94
.Lmemmove_end_forward:
95
96
/*
97
* Handle data backward by movsq.
98
*/
99
.p2align 4
100
7:
101
movq %rdx, %rcx
102
movq (%rsi), %r11
103
movq %rdi, %r10
104
leaq -8(%rsi, %rdx), %rsi
105
leaq -8(%rdi, %rdx), %rdi
106
shrq $3, %rcx
107
std
108
rep movsq
109
cld
110
movq %r11, (%r10)
111
jmp 13f
112
113
/*
114
* Start to prepare for backward copy.
115
*/
116
.p2align 4
117
2:
118
cmp $0x20, %rdx
119
jb 1f
120
cmp $680, %rdx
121
jb 6f
122
cmp %dil, %sil
123
je 7b
124
6:
125
/*
126
* Calculate copy position to tail.
127
*/
128
addq %rdx, %rsi
129
addq %rdx, %rdi
130
subq $0x20, %rdx
131
/*
132
* We gobble 32 bytes backward in each loop.
133
*/
134
8:
135
subq $0x20, %rdx
136
movq -1*8(%rsi), %r11
137
movq -2*8(%rsi), %r10
138
movq -3*8(%rsi), %r9
139
movq -4*8(%rsi), %r8
140
leaq -4*8(%rsi), %rsi
141
142
movq %r11, -1*8(%rdi)
143
movq %r10, -2*8(%rdi)
144
movq %r9, -3*8(%rdi)
145
movq %r8, -4*8(%rdi)
146
leaq -4*8(%rdi), %rdi
147
jae 8b
148
/*
149
* Calculate copy position to head.
150
*/
151
addq $0x20, %rdx
152
subq %rdx, %rsi
153
subq %rdx, %rdi
154
1:
155
cmpq $16, %rdx
156
jb 9f
157
/*
158
* Move data from 16 bytes to 31 bytes.
159
*/
160
movq 0*8(%rsi), %r11
161
movq 1*8(%rsi), %r10
162
movq -2*8(%rsi, %rdx), %r9
163
movq -1*8(%rsi, %rdx), %r8
164
movq %r11, 0*8(%rdi)
165
movq %r10, 1*8(%rdi)
166
movq %r9, -2*8(%rdi, %rdx)
167
movq %r8, -1*8(%rdi, %rdx)
168
jmp 13f
169
.p2align 4
170
9:
171
cmpq $8, %rdx
172
jb 10f
173
/*
174
* Move data from 8 bytes to 15 bytes.
175
*/
176
movq 0*8(%rsi), %r11
177
movq -1*8(%rsi, %rdx), %r10
178
movq %r11, 0*8(%rdi)
179
movq %r10, -1*8(%rdi, %rdx)
180
jmp 13f
181
10:
182
cmpq $4, %rdx
183
jb 11f
184
/*
185
* Move data from 4 bytes to 7 bytes.
186
*/
187
movl (%rsi), %r11d
188
movl -4(%rsi, %rdx), %r10d
189
movl %r11d, (%rdi)
190
movl %r10d, -4(%rdi, %rdx)
191
jmp 13f
192
11:
193
cmp $2, %rdx
194
jb 12f
195
/*
196
* Move data from 2 bytes to 3 bytes.
197
*/
198
movw (%rsi), %r11w
199
movw -2(%rsi, %rdx), %r10w
200
movw %r11w, (%rdi)
201
movw %r10w, -2(%rdi, %rdx)
202
jmp 13f
203
12:
204
cmp $1, %rdx
205
jb 13f
206
/*
207
* Move data for 1 byte.
208
*/
209
movb (%rsi), %r11b
210
movb %r11b, (%rdi)
211
13:
212
RET
213
SYM_FUNC_END(__memmove)
214
EXPORT_SYMBOL(__memmove)
215
216
SYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove)
217
EXPORT_SYMBOL(memmove)
218
219