Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/lib/checksum_32.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* INET An implementation of the TCP/IP protocol suite for the LINUX
4
* operating system. INET is implemented using the BSD Socket
5
* interface as the means of communication with the user level.
6
*
7
* IP/TCP/UDP checksumming routines
8
*
9
* Authors: Jorge Cwik, <[email protected]>
10
* Arnt Gulbrandsen, <[email protected]>
11
* Tom May, <[email protected]>
12
* Pentium Pro/II routines:
13
* Alexander Kjeldaas <[email protected]>
14
* Finn Arne Gangstad <[email protected]>
15
* Lots of code moved from tcp.c and ip.c; see those files
16
* for more names.
17
*
18
* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
19
* handling.
20
* Andi Kleen, add zeroing on error
21
* converted to pure assembler
22
*/
23
24
#include <linux/export.h>
25
#include <linux/linkage.h>
26
#include <asm/errno.h>
27
#include <asm/asm.h>
28
#include <asm/nospec-branch.h>
29
30
/*
31
* computes a partial checksum, e.g. for TCP/UDP fragments
32
*/
33
34
/*
35
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
36
*/
37
38
.text
39
40
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
41
42
/*
43
* Experiments with Ethernet and SLIP connections show that buff
44
* is aligned on either a 2-byte or 4-byte boundary. We get at
45
* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
46
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
47
* alignment for the unrolled loop.
48
*/
49
SYM_FUNC_START(csum_partial)
50
pushl %esi
51
pushl %ebx
52
movl 20(%esp),%eax # Function arg: unsigned int sum
53
movl 16(%esp),%ecx # Function arg: int len
54
movl 12(%esp),%esi # Function arg: unsigned char *buff
55
testl $3, %esi # Check alignment.
56
jz 2f # Jump if alignment is ok.
57
testl $1, %esi # Check alignment.
58
jz 10f # Jump if alignment is boundary of 2 bytes.
59
60
# buf is odd
61
dec %ecx
62
jl 8f
63
movzbl (%esi), %ebx
64
adcl %ebx, %eax
65
roll $8, %eax
66
inc %esi
67
testl $2, %esi
68
jz 2f
69
10:
70
subl $2, %ecx # Alignment uses up two bytes.
71
jae 1f # Jump if we had at least two bytes.
72
addl $2, %ecx # ecx was < 2. Deal with it.
73
jmp 4f
74
1: movw (%esi), %bx
75
addl $2, %esi
76
addw %bx, %ax
77
adcl $0, %eax
78
2:
79
movl %ecx, %edx
80
shrl $5, %ecx
81
jz 2f
82
testl %esi, %esi
83
1: movl (%esi), %ebx
84
adcl %ebx, %eax
85
movl 4(%esi), %ebx
86
adcl %ebx, %eax
87
movl 8(%esi), %ebx
88
adcl %ebx, %eax
89
movl 12(%esi), %ebx
90
adcl %ebx, %eax
91
movl 16(%esi), %ebx
92
adcl %ebx, %eax
93
movl 20(%esi), %ebx
94
adcl %ebx, %eax
95
movl 24(%esi), %ebx
96
adcl %ebx, %eax
97
movl 28(%esi), %ebx
98
adcl %ebx, %eax
99
lea 32(%esi), %esi
100
dec %ecx
101
jne 1b
102
adcl $0, %eax
103
2: movl %edx, %ecx
104
andl $0x1c, %edx
105
je 4f
106
shrl $2, %edx # This clears CF
107
3: adcl (%esi), %eax
108
lea 4(%esi), %esi
109
dec %edx
110
jne 3b
111
adcl $0, %eax
112
4: andl $3, %ecx
113
jz 7f
114
cmpl $2, %ecx
115
jb 5f
116
movw (%esi),%cx
117
leal 2(%esi),%esi
118
je 6f
119
shll $16,%ecx
120
5: movb (%esi),%cl
121
6: addl %ecx,%eax
122
adcl $0, %eax
123
7:
124
testb $1, 12(%esp)
125
jz 8f
126
roll $8, %eax
127
8:
128
popl %ebx
129
popl %esi
130
RET
131
SYM_FUNC_END(csum_partial)
132
133
#else
134
135
/* Version for PentiumII/PPro */
136
137
SYM_FUNC_START(csum_partial)
138
pushl %esi
139
pushl %ebx
140
movl 20(%esp),%eax # Function arg: unsigned int sum
141
movl 16(%esp),%ecx # Function arg: int len
142
movl 12(%esp),%esi # Function arg: const unsigned char *buf
143
144
testl $3, %esi
145
jnz 25f
146
10:
147
movl %ecx, %edx
148
movl %ecx, %ebx
149
andl $0x7c, %ebx
150
shrl $7, %ecx
151
addl %ebx,%esi
152
shrl $2, %ebx
153
negl %ebx
154
lea 45f(%ebx,%ebx,2), %ebx
155
testl %esi, %esi
156
JMP_NOSPEC ebx
157
158
# Handle 2-byte-aligned regions
159
20: addw (%esi), %ax
160
lea 2(%esi), %esi
161
adcl $0, %eax
162
jmp 10b
163
25:
164
testl $1, %esi
165
jz 30f
166
# buf is odd
167
dec %ecx
168
jl 90f
169
movzbl (%esi), %ebx
170
addl %ebx, %eax
171
adcl $0, %eax
172
roll $8, %eax
173
inc %esi
174
testl $2, %esi
175
jz 10b
176
177
30: subl $2, %ecx
178
ja 20b
179
je 32f
180
addl $2, %ecx
181
jz 80f
182
movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
183
addl %ebx, %eax
184
adcl $0, %eax
185
jmp 80f
186
32:
187
addw (%esi), %ax # csumming 2 bytes, 2-aligned
188
adcl $0, %eax
189
jmp 80f
190
191
40:
192
addl -128(%esi), %eax
193
adcl -124(%esi), %eax
194
adcl -120(%esi), %eax
195
adcl -116(%esi), %eax
196
adcl -112(%esi), %eax
197
adcl -108(%esi), %eax
198
adcl -104(%esi), %eax
199
adcl -100(%esi), %eax
200
adcl -96(%esi), %eax
201
adcl -92(%esi), %eax
202
adcl -88(%esi), %eax
203
adcl -84(%esi), %eax
204
adcl -80(%esi), %eax
205
adcl -76(%esi), %eax
206
adcl -72(%esi), %eax
207
adcl -68(%esi), %eax
208
adcl -64(%esi), %eax
209
adcl -60(%esi), %eax
210
adcl -56(%esi), %eax
211
adcl -52(%esi), %eax
212
adcl -48(%esi), %eax
213
adcl -44(%esi), %eax
214
adcl -40(%esi), %eax
215
adcl -36(%esi), %eax
216
adcl -32(%esi), %eax
217
adcl -28(%esi), %eax
218
adcl -24(%esi), %eax
219
adcl -20(%esi), %eax
220
adcl -16(%esi), %eax
221
adcl -12(%esi), %eax
222
adcl -8(%esi), %eax
223
adcl -4(%esi), %eax
224
45:
225
lea 128(%esi), %esi
226
adcl $0, %eax
227
dec %ecx
228
jge 40b
229
movl %edx, %ecx
230
50: andl $3, %ecx
231
jz 80f
232
233
# Handle the last 1-3 bytes without jumping
234
notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
235
movl $0xffffff,%ebx # by the shll and shrl instructions
236
shll $3,%ecx
237
shrl %cl,%ebx
238
andl -128(%esi),%ebx # esi is 4-aligned so should be ok
239
addl %ebx,%eax
240
adcl $0,%eax
241
80:
242
testb $1, 12(%esp)
243
jz 90f
244
roll $8, %eax
245
90:
246
popl %ebx
247
popl %esi
248
RET
249
SYM_FUNC_END(csum_partial)
250
251
#endif
252
EXPORT_SYMBOL(csum_partial)
253
254
/*
255
unsigned int csum_partial_copy_generic (const char *src, char *dst,
256
int len)
257
*/
258
259
/*
260
* Copy from ds while checksumming, otherwise like csum_partial
261
*/
262
263
#define EXC(y...) \
264
9999: y; \
265
_ASM_EXTABLE_TYPE(9999b, 7f, EX_TYPE_UACCESS | EX_FLAG_CLEAR_AX)
266
267
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
268
269
#define ARGBASE 16
270
#define FP 12
271
272
SYM_FUNC_START(csum_partial_copy_generic)
273
subl $4,%esp
274
pushl %edi
275
pushl %esi
276
pushl %ebx
277
movl ARGBASE+12(%esp),%ecx # len
278
movl ARGBASE+4(%esp),%esi # src
279
movl ARGBASE+8(%esp),%edi # dst
280
281
movl $-1, %eax # sum
282
testl $2, %edi # Check alignment.
283
jz 2f # Jump if alignment is ok.
284
subl $2, %ecx # Alignment uses up two bytes.
285
jae 1f # Jump if we had at least two bytes.
286
addl $2, %ecx # ecx was < 2. Deal with it.
287
jmp 4f
288
EXC(1: movw (%esi), %bx )
289
addl $2, %esi
290
EXC( movw %bx, (%edi) )
291
addl $2, %edi
292
addw %bx, %ax
293
adcl $0, %eax
294
2:
295
movl %ecx, FP(%esp)
296
shrl $5, %ecx
297
jz 2f
298
testl %esi, %esi # what's wrong with clc?
299
EXC(1: movl (%esi), %ebx )
300
EXC( movl 4(%esi), %edx )
301
adcl %ebx, %eax
302
EXC( movl %ebx, (%edi) )
303
adcl %edx, %eax
304
EXC( movl %edx, 4(%edi) )
305
306
EXC( movl 8(%esi), %ebx )
307
EXC( movl 12(%esi), %edx )
308
adcl %ebx, %eax
309
EXC( movl %ebx, 8(%edi) )
310
adcl %edx, %eax
311
EXC( movl %edx, 12(%edi) )
312
313
EXC( movl 16(%esi), %ebx )
314
EXC( movl 20(%esi), %edx )
315
adcl %ebx, %eax
316
EXC( movl %ebx, 16(%edi) )
317
adcl %edx, %eax
318
EXC( movl %edx, 20(%edi) )
319
320
EXC( movl 24(%esi), %ebx )
321
EXC( movl 28(%esi), %edx )
322
adcl %ebx, %eax
323
EXC( movl %ebx, 24(%edi) )
324
adcl %edx, %eax
325
EXC( movl %edx, 28(%edi) )
326
327
lea 32(%esi), %esi
328
lea 32(%edi), %edi
329
dec %ecx
330
jne 1b
331
adcl $0, %eax
332
2: movl FP(%esp), %edx
333
movl %edx, %ecx
334
andl $0x1c, %edx
335
je 4f
336
shrl $2, %edx # This clears CF
337
EXC(3: movl (%esi), %ebx )
338
adcl %ebx, %eax
339
EXC( movl %ebx, (%edi) )
340
lea 4(%esi), %esi
341
lea 4(%edi), %edi
342
dec %edx
343
jne 3b
344
adcl $0, %eax
345
4: andl $3, %ecx
346
jz 7f
347
cmpl $2, %ecx
348
jb 5f
349
EXC( movw (%esi), %cx )
350
leal 2(%esi), %esi
351
EXC( movw %cx, (%edi) )
352
leal 2(%edi), %edi
353
je 6f
354
shll $16,%ecx
355
EXC(5: movb (%esi), %cl )
356
EXC( movb %cl, (%edi) )
357
6: addl %ecx, %eax
358
adcl $0, %eax
359
7:
360
361
popl %ebx
362
popl %esi
363
popl %edi
364
popl %ecx # equivalent to addl $4,%esp
365
RET
366
SYM_FUNC_END(csum_partial_copy_generic)
367
368
#else
369
370
/* Version for PentiumII/PPro */
371
372
#define ROUND1(x) \
373
EXC(movl x(%esi), %ebx ) ; \
374
addl %ebx, %eax ; \
375
EXC(movl %ebx, x(%edi) ) ;
376
377
#define ROUND(x) \
378
EXC(movl x(%esi), %ebx ) ; \
379
adcl %ebx, %eax ; \
380
EXC(movl %ebx, x(%edi) ) ;
381
382
#define ARGBASE 12
383
384
SYM_FUNC_START(csum_partial_copy_generic)
385
pushl %ebx
386
pushl %edi
387
pushl %esi
388
movl ARGBASE+4(%esp),%esi #src
389
movl ARGBASE+8(%esp),%edi #dst
390
movl ARGBASE+12(%esp),%ecx #len
391
movl $-1, %eax #sum
392
# movl %ecx, %edx
393
movl %ecx, %ebx
394
movl %esi, %edx
395
shrl $6, %ecx
396
andl $0x3c, %ebx
397
negl %ebx
398
subl %ebx, %esi
399
subl %ebx, %edi
400
lea -1(%esi),%edx
401
andl $-32,%edx
402
lea 3f(%ebx,%ebx), %ebx
403
testl %esi, %esi
404
JMP_NOSPEC ebx
405
1: addl $64,%esi
406
addl $64,%edi
407
EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl)
408
ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
409
ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
410
ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
411
ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
412
3: adcl $0,%eax
413
addl $64, %edx
414
dec %ecx
415
jge 1b
416
4: movl ARGBASE+12(%esp),%edx #len
417
andl $3, %edx
418
jz 7f
419
cmpl $2, %edx
420
jb 5f
421
EXC( movw (%esi), %dx )
422
leal 2(%esi), %esi
423
EXC( movw %dx, (%edi) )
424
leal 2(%edi), %edi
425
je 6f
426
shll $16,%edx
427
5:
428
EXC( movb (%esi), %dl )
429
EXC( movb %dl, (%edi) )
430
6: addl %edx, %eax
431
adcl $0, %eax
432
7:
433
434
popl %esi
435
popl %edi
436
popl %ebx
437
RET
438
SYM_FUNC_END(csum_partial_copy_generic)
439
440
#undef ROUND
441
#undef ROUND1
442
443
#endif
444
EXPORT_SYMBOL(csum_partial_copy_generic)
445
446