CoCalc -- csum-copy

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/lib/csum-copy_64.S
²⁶⁴⁴² views
1
/*
2
 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
3
 *
4
 * This file is subject to the terms and conditions of the GNU General Public
5
 * License.  See the file COPYING in the main directory of this archive
6
 * for more details. No warranty for anything given at all.
7
 */
8
#include <linux/linkage.h>
9
#include <asm/errno.h>
10
#include <asm/asm.h>
11

12
/*
13
 * Checksum copy with exception handling.
14
 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15
 * destination is zeroed.
16
 *
17
 * Input
18
 * rdi  source
19
 * rsi  destination
20
 * edx  len (32bit)
21
 *
22
 * Output
23
 * eax  64bit sum. undefined in case of exception.
24
 *
25
 * Wrappers need to take care of valid exception sum and zeroing.
26
 * They also should align source or destination to 8 bytes.
27
 */
28

29
	.macro source
30
10:
31
	_ASM_EXTABLE_UA(10b, .Lfault)
32
	.endm
33

34
	.macro dest
35
20:
36
	_ASM_EXTABLE_UA(20b, .Lfault)
37
	.endm
38

39
SYM_FUNC_START(csum_partial_copy_generic)
40
	subq  $5*8, %rsp
41
	movq  %rbx, 0*8(%rsp)
42
	movq  %r12, 1*8(%rsp)
43
	movq  %r14, 2*8(%rsp)
44
	movq  %r13, 3*8(%rsp)
45
	movq  %r15, 4*8(%rsp)
46

47
	movl  $-1, %eax
48
	xorl  %r9d, %r9d
49
	movl  %edx, %ecx
50
	cmpl  $8, %ecx
51
	jb    .Lshort
52

53
	testb  $7, %sil
54
	jne   .Lunaligned
55
.Laligned:
56
	movl  %ecx, %r12d
57

58
	shrq  $6, %r12
59
	jz	.Lhandle_tail       /* < 64 */
60

61
	clc
62

63
	/* main loop. clear in 64 byte blocks */
64
	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
65
	/* r11:	temp3, rdx: temp4, r12 loopcnt */
66
	/* r10:	temp5, r15: temp6, r14 temp7, r13 temp8 */
67
	.p2align 4
68
.Lloop:
69
	source
70
	movq  (%rdi), %rbx
71
	source
72
	movq  8(%rdi), %r8
73
	source
74
	movq  16(%rdi), %r11
75
	source
76
	movq  24(%rdi), %rdx
77

78
	source
79
	movq  32(%rdi), %r10
80
	source
81
	movq  40(%rdi), %r15
82
	source
83
	movq  48(%rdi), %r14
84
	source
85
	movq  56(%rdi), %r13
86

87
30:
88
	/*
89
	 * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
90
	 * potentially unmapped kernel address.
91
	 */
92
	_ASM_EXTABLE(30b, 2f)
93
	prefetcht0 5*64(%rdi)
94
2:
95
	adcq  %rbx, %rax
96
	adcq  %r8, %rax
97
	adcq  %r11, %rax
98
	adcq  %rdx, %rax
99
	adcq  %r10, %rax
100
	adcq  %r15, %rax
101
	adcq  %r14, %rax
102
	adcq  %r13, %rax
103

104
	decl %r12d
105

106
	dest
107
	movq %rbx, (%rsi)
108
	dest
109
	movq %r8, 8(%rsi)
110
	dest
111
	movq %r11, 16(%rsi)
112
	dest
113
	movq %rdx, 24(%rsi)
114

115
	dest
116
	movq %r10, 32(%rsi)
117
	dest
118
	movq %r15, 40(%rsi)
119
	dest
120
	movq %r14, 48(%rsi)
121
	dest
122
	movq %r13, 56(%rsi)
123

124
	leaq 64(%rdi), %rdi
125
	leaq 64(%rsi), %rsi
126

127
	jnz	.Lloop
128

129
	adcq  %r9, %rax
130

131
	/* do last up to 56 bytes */
132
.Lhandle_tail:
133
	/* ecx:	count, rcx.63: the end result needs to be rol8 */
134
	movq %rcx, %r10
135
	andl $63, %ecx
136
	shrl $3, %ecx
137
	jz	.Lfold
138
	clc
139
	.p2align 4
140
.Lloop_8:
141
	source
142
	movq (%rdi), %rbx
143
	adcq %rbx, %rax
144
	decl %ecx
145
	dest
146
	movq %rbx, (%rsi)
147
	leaq 8(%rsi), %rsi /* preserve carry */
148
	leaq 8(%rdi), %rdi
149
	jnz	.Lloop_8
150
	adcq %r9, %rax	/* add in carry */
151

152
.Lfold:
153
	/* reduce checksum to 32bits */
154
	movl %eax, %ebx
155
	shrq $32, %rax
156
	addl %ebx, %eax
157
	adcl %r9d, %eax
158

159
	/* do last up to 6 bytes */
160
.Lhandle_7:
161
	movl %r10d, %ecx
162
	andl $7, %ecx
163
.L1:				/* .Lshort rejoins the common path here */
164
	shrl $1, %ecx
165
	jz   .Lhandle_1
166
	movl $2, %edx
167
	xorl %ebx, %ebx
168
	clc
169
	.p2align 4
170
.Lloop_1:
171
	source
172
	movw (%rdi), %bx
173
	adcl %ebx, %eax
174
	decl %ecx
175
	dest
176
	movw %bx, (%rsi)
177
	leaq 2(%rdi), %rdi
178
	leaq 2(%rsi), %rsi
179
	jnz .Lloop_1
180
	adcl %r9d, %eax	/* add in carry */
181

182
	/* handle last odd byte */
183
.Lhandle_1:
184
	testb $1, %r10b
185
	jz    .Lende
186
	xorl  %ebx, %ebx
187
	source
188
	movb (%rdi), %bl
189
	dest
190
	movb %bl, (%rsi)
191
	addl %ebx, %eax
192
	adcl %r9d, %eax		/* carry */
193

194
.Lende:
195
	testq %r10, %r10
196
	js  .Lwas_odd
197
.Lout:
198
	movq 0*8(%rsp), %rbx
199
	movq 1*8(%rsp), %r12
200
	movq 2*8(%rsp), %r14
201
	movq 3*8(%rsp), %r13
202
	movq 4*8(%rsp), %r15
203
	addq $5*8, %rsp
204
	RET
205
.Lshort:
206
	movl %ecx, %r10d
207
	jmp  .L1
208
.Lunaligned:
209
	xorl %ebx, %ebx
210
	testb $1, %sil
211
	jne  .Lodd
212
1:	testb $2, %sil
213
	je   2f
214
	source
215
	movw (%rdi), %bx
216
	dest
217
	movw %bx, (%rsi)
218
	leaq 2(%rdi), %rdi
219
	subq $2, %rcx
220
	leaq 2(%rsi), %rsi
221
	addq %rbx, %rax
222
2:	testb $4, %sil
223
	je .Laligned
224
	source
225
	movl (%rdi), %ebx
226
	dest
227
	movl %ebx, (%rsi)
228
	leaq 4(%rdi), %rdi
229
	subq $4, %rcx
230
	leaq 4(%rsi), %rsi
231
	addq %rbx, %rax
232
	jmp .Laligned
233

234
.Lodd:
235
	source
236
	movb (%rdi), %bl
237
	dest
238
	movb %bl, (%rsi)
239
	leaq 1(%rdi), %rdi
240
	leaq 1(%rsi), %rsi
241
	/* decrement, set MSB */
242
	leaq -1(%rcx, %rcx), %rcx
243
	rorq $1, %rcx
244
	shll $8, %ebx
245
	addq %rbx, %rax
246
	jmp 1b
247

248
.Lwas_odd:
249
	roll $8, %eax
250
	jmp .Lout
251

252
	/* Exception: just return 0 */
253
.Lfault:
254
	xorl %eax, %eax
255
	jmp  .Lout
256
SYM_FUNC_END(csum_partial_copy_generic)
257

258
Product

Resources

Company