Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/lib/csum-copy_64.S
10817 views
1
/*
2
* Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3
*
4
* This file is subject to the terms and conditions of the GNU General Public
5
* License. See the file COPYING in the main directory of this archive
6
* for more details. No warranty for anything given at all.
7
*/
8
#include <linux/linkage.h>
9
#include <asm/dwarf2.h>
10
#include <asm/errno.h>
11
12
/*
13
* Checksum copy with exception handling.
14
* On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15
* destination is zeroed.
16
*
17
* Input
18
* rdi source
19
* rsi destination
20
* edx len (32bit)
21
* ecx sum (32bit)
22
* r8 src_err_ptr (int)
23
* r9 dst_err_ptr (int)
24
*
25
* Output
26
* eax 64bit sum. undefined in case of exception.
27
*
28
* Wrappers need to take care of valid exception sum and zeroing.
29
* They also should align source or destination to 8 bytes.
30
*/
31
32
.macro source
33
10:
34
.section __ex_table, "a"
35
.align 8
36
.quad 10b, .Lbad_source
37
.previous
38
.endm
39
40
.macro dest
41
20:
42
.section __ex_table, "a"
43
.align 8
44
.quad 20b, .Lbad_dest
45
.previous
46
.endm
47
48
.macro ignore L=.Lignore
49
30:
50
.section __ex_table, "a"
51
.align 8
52
.quad 30b, \L
53
.previous
54
.endm
55
56
57
ENTRY(csum_partial_copy_generic)
58
CFI_STARTPROC
59
cmpl $3*64, %edx
60
jle .Lignore
61
62
.Lignore:
63
subq $7*8, %rsp
64
CFI_ADJUST_CFA_OFFSET 7*8
65
movq %rbx, 2*8(%rsp)
66
CFI_REL_OFFSET rbx, 2*8
67
movq %r12, 3*8(%rsp)
68
CFI_REL_OFFSET r12, 3*8
69
movq %r14, 4*8(%rsp)
70
CFI_REL_OFFSET r14, 4*8
71
movq %r13, 5*8(%rsp)
72
CFI_REL_OFFSET r13, 5*8
73
movq %rbp, 6*8(%rsp)
74
CFI_REL_OFFSET rbp, 6*8
75
76
movq %r8, (%rsp)
77
movq %r9, 1*8(%rsp)
78
79
movl %ecx, %eax
80
movl %edx, %ecx
81
82
xorl %r9d, %r9d
83
movq %rcx, %r12
84
85
shrq $6, %r12
86
jz .Lhandle_tail /* < 64 */
87
88
clc
89
90
/* main loop. clear in 64 byte blocks */
91
/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
92
/* r11: temp3, rdx: temp4, r12 loopcnt */
93
/* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
94
.p2align 4
95
.Lloop:
96
source
97
movq (%rdi), %rbx
98
source
99
movq 8(%rdi), %r8
100
source
101
movq 16(%rdi), %r11
102
source
103
movq 24(%rdi), %rdx
104
105
source
106
movq 32(%rdi), %r10
107
source
108
movq 40(%rdi), %rbp
109
source
110
movq 48(%rdi), %r14
111
source
112
movq 56(%rdi), %r13
113
114
ignore 2f
115
prefetcht0 5*64(%rdi)
116
2:
117
adcq %rbx, %rax
118
adcq %r8, %rax
119
adcq %r11, %rax
120
adcq %rdx, %rax
121
adcq %r10, %rax
122
adcq %rbp, %rax
123
adcq %r14, %rax
124
adcq %r13, %rax
125
126
decl %r12d
127
128
dest
129
movq %rbx, (%rsi)
130
dest
131
movq %r8, 8(%rsi)
132
dest
133
movq %r11, 16(%rsi)
134
dest
135
movq %rdx, 24(%rsi)
136
137
dest
138
movq %r10, 32(%rsi)
139
dest
140
movq %rbp, 40(%rsi)
141
dest
142
movq %r14, 48(%rsi)
143
dest
144
movq %r13, 56(%rsi)
145
146
3:
147
148
leaq 64(%rdi), %rdi
149
leaq 64(%rsi), %rsi
150
151
jnz .Lloop
152
153
adcq %r9, %rax
154
155
/* do last up to 56 bytes */
156
.Lhandle_tail:
157
/* ecx: count */
158
movl %ecx, %r10d
159
andl $63, %ecx
160
shrl $3, %ecx
161
jz .Lfold
162
clc
163
.p2align 4
164
.Lloop_8:
165
source
166
movq (%rdi), %rbx
167
adcq %rbx, %rax
168
decl %ecx
169
dest
170
movq %rbx, (%rsi)
171
leaq 8(%rsi), %rsi /* preserve carry */
172
leaq 8(%rdi), %rdi
173
jnz .Lloop_8
174
adcq %r9, %rax /* add in carry */
175
176
.Lfold:
177
/* reduce checksum to 32bits */
178
movl %eax, %ebx
179
shrq $32, %rax
180
addl %ebx, %eax
181
adcl %r9d, %eax
182
183
/* do last up to 6 bytes */
184
.Lhandle_7:
185
movl %r10d, %ecx
186
andl $7, %ecx
187
shrl $1, %ecx
188
jz .Lhandle_1
189
movl $2, %edx
190
xorl %ebx, %ebx
191
clc
192
.p2align 4
193
.Lloop_1:
194
source
195
movw (%rdi), %bx
196
adcl %ebx, %eax
197
decl %ecx
198
dest
199
movw %bx, (%rsi)
200
leaq 2(%rdi), %rdi
201
leaq 2(%rsi), %rsi
202
jnz .Lloop_1
203
adcl %r9d, %eax /* add in carry */
204
205
/* handle last odd byte */
206
.Lhandle_1:
207
testl $1, %r10d
208
jz .Lende
209
xorl %ebx, %ebx
210
source
211
movb (%rdi), %bl
212
dest
213
movb %bl, (%rsi)
214
addl %ebx, %eax
215
adcl %r9d, %eax /* carry */
216
217
CFI_REMEMBER_STATE
218
.Lende:
219
movq 2*8(%rsp), %rbx
220
CFI_RESTORE rbx
221
movq 3*8(%rsp), %r12
222
CFI_RESTORE r12
223
movq 4*8(%rsp), %r14
224
CFI_RESTORE r14
225
movq 5*8(%rsp), %r13
226
CFI_RESTORE r13
227
movq 6*8(%rsp), %rbp
228
CFI_RESTORE rbp
229
addq $7*8, %rsp
230
CFI_ADJUST_CFA_OFFSET -7*8
231
ret
232
CFI_RESTORE_STATE
233
234
/* Exception handlers. Very simple, zeroing is done in the wrappers */
235
.Lbad_source:
236
movq (%rsp), %rax
237
testq %rax, %rax
238
jz .Lende
239
movl $-EFAULT, (%rax)
240
jmp .Lende
241
242
.Lbad_dest:
243
movq 8(%rsp), %rax
244
testq %rax, %rax
245
jz .Lende
246
movl $-EFAULT, (%rax)
247
jmp .Lende
248
CFI_ENDPROC
249
ENDPROC(csum_partial_copy_generic)
250
251