Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/lib/copy_user_uncached_64.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* Copyright 2023 Linus Torvalds <[email protected]>
4
*/
5
6
#include <linux/export.h>
7
#include <linux/linkage.h>
8
#include <linux/objtool.h>
9
#include <asm/asm.h>
10
11
/*
12
* copy_user_nocache - Uncached memory copy with exception handling
13
*
14
* This copies from user space into kernel space, but the kernel
15
* space accesses can take a machine check exception, so they too
16
* need exception handling.
17
*
18
* Note: only 32-bit and 64-bit stores have non-temporal versions,
19
* and we only use aligned versions. Any unaligned parts at the
20
* start or end of the copy will be done using normal cached stores.
21
*
22
* Input:
23
* rdi destination
24
* rsi source
25
* edx count
26
*
27
* Output:
28
* rax uncopied bytes or 0 if successful.
29
*/
30
SYM_FUNC_START(__copy_user_nocache)
31
ANNOTATE_NOENDBR
32
/* If destination is not 7-byte aligned, we'll have to align it */
33
testb $7,%dil
34
jne .Lalign
35
36
.Lis_aligned:
37
cmp $64,%edx
38
jb .Lquadwords
39
40
.p2align 4,0x90
41
.Lunrolled:
42
10: movq (%rsi),%r8
43
11: movq 8(%rsi),%r9
44
12: movq 16(%rsi),%r10
45
13: movq 24(%rsi),%r11
46
20: movnti %r8,(%rdi)
47
21: movnti %r9,8(%rdi)
48
22: movnti %r10,16(%rdi)
49
23: movnti %r11,24(%rdi)
50
30: movq 32(%rsi),%r8
51
31: movq 40(%rsi),%r9
52
32: movq 48(%rsi),%r10
53
33: movq 56(%rsi),%r11
54
40: movnti %r8,32(%rdi)
55
41: movnti %r9,40(%rdi)
56
42: movnti %r10,48(%rdi)
57
43: movnti %r11,56(%rdi)
58
59
addq $64,%rsi
60
addq $64,%rdi
61
sub $64,%edx
62
cmp $64,%edx
63
jae .Lunrolled
64
65
/*
66
* First set of user mode loads have been done
67
* without any stores, so if they fail, we can
68
* just try the non-unrolled loop.
69
*/
70
_ASM_EXTABLE_UA(10b, .Lquadwords)
71
_ASM_EXTABLE_UA(11b, .Lquadwords)
72
_ASM_EXTABLE_UA(12b, .Lquadwords)
73
_ASM_EXTABLE_UA(13b, .Lquadwords)
74
75
/*
76
* The second set of user mode loads have been
77
* done with 32 bytes stored to the destination,
78
* so we need to take that into account before
79
* falling back to the unrolled loop.
80
*/
81
_ASM_EXTABLE_UA(30b, .Lfixup32)
82
_ASM_EXTABLE_UA(31b, .Lfixup32)
83
_ASM_EXTABLE_UA(32b, .Lfixup32)
84
_ASM_EXTABLE_UA(33b, .Lfixup32)
85
86
/*
87
* An exception on a write means that we're
88
* done, but we need to update the count
89
* depending on where in the unrolled loop
90
* we were.
91
*/
92
_ASM_EXTABLE_UA(20b, .Ldone0)
93
_ASM_EXTABLE_UA(21b, .Ldone8)
94
_ASM_EXTABLE_UA(22b, .Ldone16)
95
_ASM_EXTABLE_UA(23b, .Ldone24)
96
_ASM_EXTABLE_UA(40b, .Ldone32)
97
_ASM_EXTABLE_UA(41b, .Ldone40)
98
_ASM_EXTABLE_UA(42b, .Ldone48)
99
_ASM_EXTABLE_UA(43b, .Ldone56)
100
101
.Lquadwords:
102
cmp $8,%edx
103
jb .Llong
104
50: movq (%rsi),%rax
105
51: movnti %rax,(%rdi)
106
addq $8,%rsi
107
addq $8,%rdi
108
sub $8,%edx
109
jmp .Lquadwords
110
111
/*
112
* If we fail on the last full quadword, we will
113
* not try to do any byte-wise cached accesses.
114
* We will try to do one more 4-byte uncached
115
* one, though.
116
*/
117
_ASM_EXTABLE_UA(50b, .Llast4)
118
_ASM_EXTABLE_UA(51b, .Ldone0)
119
120
.Llong:
121
test $4,%dl
122
je .Lword
123
60: movl (%rsi),%eax
124
61: movnti %eax,(%rdi)
125
addq $4,%rsi
126
addq $4,%rdi
127
sub $4,%edx
128
.Lword:
129
sfence
130
test $2,%dl
131
je .Lbyte
132
70: movw (%rsi),%ax
133
71: movw %ax,(%rdi)
134
addq $2,%rsi
135
addq $2,%rdi
136
sub $2,%edx
137
.Lbyte:
138
test $1,%dl
139
je .Ldone
140
80: movb (%rsi),%al
141
81: movb %al,(%rdi)
142
dec %edx
143
.Ldone:
144
mov %edx,%eax
145
RET
146
147
/*
148
* If we fail on the last four bytes, we won't
149
* bother with any fixups. It's dead, Jim. Note
150
* that there's no need for 'sfence' for any
151
* of this, since the exception will have been
152
* serializing.
153
*/
154
_ASM_EXTABLE_UA(60b, .Ldone)
155
_ASM_EXTABLE_UA(61b, .Ldone)
156
_ASM_EXTABLE_UA(70b, .Ldone)
157
_ASM_EXTABLE_UA(71b, .Ldone)
158
_ASM_EXTABLE_UA(80b, .Ldone)
159
_ASM_EXTABLE_UA(81b, .Ldone)
160
161
/*
162
* This is the "head needs aliging" case when
163
* the destination isn't 8-byte aligned. The
164
* 4-byte case can be done uncached, but any
165
* smaller alignment is done with regular stores.
166
*/
167
.Lalign:
168
test $1,%dil
169
je .Lalign_word
170
test %edx,%edx
171
je .Ldone
172
90: movb (%rsi),%al
173
91: movb %al,(%rdi)
174
inc %rsi
175
inc %rdi
176
dec %edx
177
.Lalign_word:
178
test $2,%dil
179
je .Lalign_long
180
cmp $2,%edx
181
jb .Lbyte
182
92: movw (%rsi),%ax
183
93: movw %ax,(%rdi)
184
addq $2,%rsi
185
addq $2,%rdi
186
sub $2,%edx
187
.Lalign_long:
188
test $4,%dil
189
je .Lis_aligned
190
cmp $4,%edx
191
jb .Lword
192
94: movl (%rsi),%eax
193
95: movnti %eax,(%rdi)
194
addq $4,%rsi
195
addq $4,%rdi
196
sub $4,%edx
197
jmp .Lis_aligned
198
199
/*
200
* If we fail on the initial alignment accesses,
201
* we're all done. Again, no point in trying to
202
* do byte-by-byte probing if the 4-byte load
203
* fails - we're not doing any uncached accesses
204
* any more.
205
*/
206
_ASM_EXTABLE_UA(90b, .Ldone)
207
_ASM_EXTABLE_UA(91b, .Ldone)
208
_ASM_EXTABLE_UA(92b, .Ldone)
209
_ASM_EXTABLE_UA(93b, .Ldone)
210
_ASM_EXTABLE_UA(94b, .Ldone)
211
_ASM_EXTABLE_UA(95b, .Ldone)
212
213
/*
214
* Exception table fixups for faults in the middle
215
*/
216
.Ldone56: sub $8,%edx
217
.Ldone48: sub $8,%edx
218
.Ldone40: sub $8,%edx
219
.Ldone32: sub $8,%edx
220
.Ldone24: sub $8,%edx
221
.Ldone16: sub $8,%edx
222
.Ldone8: sub $8,%edx
223
.Ldone0:
224
mov %edx,%eax
225
RET
226
227
.Lfixup32:
228
addq $32,%rsi
229
addq $32,%rdi
230
sub $32,%edx
231
jmp .Lquadwords
232
233
.Llast4:
234
52: movl (%rsi),%eax
235
53: movnti %eax,(%rdi)
236
sfence
237
sub $4,%edx
238
mov %edx,%eax
239
RET
240
_ASM_EXTABLE_UA(52b, .Ldone0)
241
_ASM_EXTABLE_UA(53b, .Ldone0)
242
243
SYM_FUNC_END(__copy_user_nocache)
244
EXPORT_SYMBOL(__copy_user_nocache)
245
246