Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/lib/csum-copy_64.S
26442 views
1
/*
2
* Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3
*
4
* This file is subject to the terms and conditions of the GNU General Public
5
* License. See the file COPYING in the main directory of this archive
6
* for more details. No warranty for anything given at all.
7
*/
8
#include <linux/linkage.h>
9
#include <asm/errno.h>
10
#include <asm/asm.h>
11
12
/*
13
* Checksum copy with exception handling.
14
* On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15
* destination is zeroed.
16
*
17
* Input
18
* rdi source
19
* rsi destination
20
* edx len (32bit)
21
*
22
* Output
23
* eax 64bit sum. undefined in case of exception.
24
*
25
* Wrappers need to take care of valid exception sum and zeroing.
26
* They also should align source or destination to 8 bytes.
27
*/
28
29
.macro source
30
10:
31
_ASM_EXTABLE_UA(10b, .Lfault)
32
.endm
33
34
.macro dest
35
20:
36
_ASM_EXTABLE_UA(20b, .Lfault)
37
.endm
38
39
SYM_FUNC_START(csum_partial_copy_generic)
40
subq $5*8, %rsp
41
movq %rbx, 0*8(%rsp)
42
movq %r12, 1*8(%rsp)
43
movq %r14, 2*8(%rsp)
44
movq %r13, 3*8(%rsp)
45
movq %r15, 4*8(%rsp)
46
47
movl $-1, %eax
48
xorl %r9d, %r9d
49
movl %edx, %ecx
50
cmpl $8, %ecx
51
jb .Lshort
52
53
testb $7, %sil
54
jne .Lunaligned
55
.Laligned:
56
movl %ecx, %r12d
57
58
shrq $6, %r12
59
jz .Lhandle_tail /* < 64 */
60
61
clc
62
63
/* main loop. clear in 64 byte blocks */
64
/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
65
/* r11: temp3, rdx: temp4, r12 loopcnt */
66
/* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */
67
.p2align 4
68
.Lloop:
69
source
70
movq (%rdi), %rbx
71
source
72
movq 8(%rdi), %r8
73
source
74
movq 16(%rdi), %r11
75
source
76
movq 24(%rdi), %rdx
77
78
source
79
movq 32(%rdi), %r10
80
source
81
movq 40(%rdi), %r15
82
source
83
movq 48(%rdi), %r14
84
source
85
movq 56(%rdi), %r13
86
87
30:
88
/*
89
* No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
90
* potentially unmapped kernel address.
91
*/
92
_ASM_EXTABLE(30b, 2f)
93
prefetcht0 5*64(%rdi)
94
2:
95
adcq %rbx, %rax
96
adcq %r8, %rax
97
adcq %r11, %rax
98
adcq %rdx, %rax
99
adcq %r10, %rax
100
adcq %r15, %rax
101
adcq %r14, %rax
102
adcq %r13, %rax
103
104
decl %r12d
105
106
dest
107
movq %rbx, (%rsi)
108
dest
109
movq %r8, 8(%rsi)
110
dest
111
movq %r11, 16(%rsi)
112
dest
113
movq %rdx, 24(%rsi)
114
115
dest
116
movq %r10, 32(%rsi)
117
dest
118
movq %r15, 40(%rsi)
119
dest
120
movq %r14, 48(%rsi)
121
dest
122
movq %r13, 56(%rsi)
123
124
leaq 64(%rdi), %rdi
125
leaq 64(%rsi), %rsi
126
127
jnz .Lloop
128
129
adcq %r9, %rax
130
131
/* do last up to 56 bytes */
132
.Lhandle_tail:
133
/* ecx: count, rcx.63: the end result needs to be rol8 */
134
movq %rcx, %r10
135
andl $63, %ecx
136
shrl $3, %ecx
137
jz .Lfold
138
clc
139
.p2align 4
140
.Lloop_8:
141
source
142
movq (%rdi), %rbx
143
adcq %rbx, %rax
144
decl %ecx
145
dest
146
movq %rbx, (%rsi)
147
leaq 8(%rsi), %rsi /* preserve carry */
148
leaq 8(%rdi), %rdi
149
jnz .Lloop_8
150
adcq %r9, %rax /* add in carry */
151
152
.Lfold:
153
/* reduce checksum to 32bits */
154
movl %eax, %ebx
155
shrq $32, %rax
156
addl %ebx, %eax
157
adcl %r9d, %eax
158
159
/* do last up to 6 bytes */
160
.Lhandle_7:
161
movl %r10d, %ecx
162
andl $7, %ecx
163
.L1: /* .Lshort rejoins the common path here */
164
shrl $1, %ecx
165
jz .Lhandle_1
166
movl $2, %edx
167
xorl %ebx, %ebx
168
clc
169
.p2align 4
170
.Lloop_1:
171
source
172
movw (%rdi), %bx
173
adcl %ebx, %eax
174
decl %ecx
175
dest
176
movw %bx, (%rsi)
177
leaq 2(%rdi), %rdi
178
leaq 2(%rsi), %rsi
179
jnz .Lloop_1
180
adcl %r9d, %eax /* add in carry */
181
182
/* handle last odd byte */
183
.Lhandle_1:
184
testb $1, %r10b
185
jz .Lende
186
xorl %ebx, %ebx
187
source
188
movb (%rdi), %bl
189
dest
190
movb %bl, (%rsi)
191
addl %ebx, %eax
192
adcl %r9d, %eax /* carry */
193
194
.Lende:
195
testq %r10, %r10
196
js .Lwas_odd
197
.Lout:
198
movq 0*8(%rsp), %rbx
199
movq 1*8(%rsp), %r12
200
movq 2*8(%rsp), %r14
201
movq 3*8(%rsp), %r13
202
movq 4*8(%rsp), %r15
203
addq $5*8, %rsp
204
RET
205
.Lshort:
206
movl %ecx, %r10d
207
jmp .L1
208
.Lunaligned:
209
xorl %ebx, %ebx
210
testb $1, %sil
211
jne .Lodd
212
1: testb $2, %sil
213
je 2f
214
source
215
movw (%rdi), %bx
216
dest
217
movw %bx, (%rsi)
218
leaq 2(%rdi), %rdi
219
subq $2, %rcx
220
leaq 2(%rsi), %rsi
221
addq %rbx, %rax
222
2: testb $4, %sil
223
je .Laligned
224
source
225
movl (%rdi), %ebx
226
dest
227
movl %ebx, (%rsi)
228
leaq 4(%rdi), %rdi
229
subq $4, %rcx
230
leaq 4(%rsi), %rsi
231
addq %rbx, %rax
232
jmp .Laligned
233
234
.Lodd:
235
source
236
movb (%rdi), %bl
237
dest
238
movb %bl, (%rsi)
239
leaq 1(%rdi), %rdi
240
leaq 1(%rsi), %rsi
241
/* decrement, set MSB */
242
leaq -1(%rcx, %rcx), %rcx
243
rorq $1, %rcx
244
shll $8, %ebx
245
addq %rbx, %rax
246
jmp 1b
247
248
.Lwas_odd:
249
roll $8, %eax
250
jmp .Lout
251
252
/* Exception: just return 0 */
253
.Lfault:
254
xorl %eax, %eax
255
jmp .Lout
256
SYM_FUNC_END(csum_partial_copy_generic)
257
258