CoCalc -- checksum

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/lib/checksum_32.S
¹⁰⁸¹⁷ views
1
/*
2
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3
 *		operating system.  INET is implemented using the  BSD Socket
4
 *		interface as the means of communication with the user level.
5
 *
6
 *		IP/TCP/UDP checksumming routines
7
 *
8
 * Authors:	Jorge Cwik, <[email protected]>
9
 *		Arnt Gulbrandsen, <[email protected]>
10
 *		Tom May, <[email protected]>
11
 *              Pentium Pro/II routines:
12
 *              Alexander Kjeldaas <[email protected]>
13
 *              Finn Arne Gangstad <[email protected]>
14
 *		Lots of code moved from tcp.c and ip.c; see those files
15
 *		for more names.
16
 *
17
 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18
 *			     handling.
19
 *		Andi Kleen,  add zeroing on error
20
 *                   converted to pure assembler
21
 *
22
 *		This program is free software; you can redistribute it and/or
23
 *		modify it under the terms of the GNU General Public License
24
 *		as published by the Free Software Foundation; either version
25
 *		2 of the License, or (at your option) any later version.
26
 */
27

28
#include <linux/linkage.h>
29
#include <asm/dwarf2.h>
30
#include <asm/errno.h>
31
				
32
/*
33
 * computes a partial checksum, e.g. for TCP/UDP fragments
34
 */
35

36
/*	
37
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
38
 */
39
		
40
.text
41
		
42
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
43

44
	  /*		
45
	   * Experiments with Ethernet and SLIP connections show that buff
46
	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
47
	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
48
	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
49
	   * alignment for the unrolled loop.
50
	   */		
51
ENTRY(csum_partial)
52
	CFI_STARTPROC
53
	pushl_cfi %esi
54
	CFI_REL_OFFSET esi, 0
55
	pushl_cfi %ebx
56
	CFI_REL_OFFSET ebx, 0
57
	movl 20(%esp),%eax	# Function arg: unsigned int sum
58
	movl 16(%esp),%ecx	# Function arg: int len
59
	movl 12(%esp),%esi	# Function arg: unsigned char *buff
60
	testl $3, %esi		# Check alignment.
61
	jz 2f			# Jump if alignment is ok.
62
	testl $1, %esi		# Check alignment.
63
	jz 10f			# Jump if alignment is boundary of 2bytes.
64

65
	# buf is odd
66
	dec %ecx
67
	jl 8f
68
	movzbl (%esi), %ebx
69
	adcl %ebx, %eax
70
	roll $8, %eax
71
	inc %esi
72
	testl $2, %esi
73
	jz 2f
74
10:
75
	subl $2, %ecx		# Alignment uses up two bytes.
76
	jae 1f			# Jump if we had at least two bytes.
77
	addl $2, %ecx		# ecx was < 2.  Deal with it.
78
	jmp 4f
79
1:	movw (%esi), %bx
80
	addl $2, %esi
81
	addw %bx, %ax
82
	adcl $0, %eax
83
2:
84
	movl %ecx, %edx
85
	shrl $5, %ecx
86
	jz 2f
87
	testl %esi, %esi
88
1:	movl (%esi), %ebx
89
	adcl %ebx, %eax
90
	movl 4(%esi), %ebx
91
	adcl %ebx, %eax
92
	movl 8(%esi), %ebx
93
	adcl %ebx, %eax
94
	movl 12(%esi), %ebx
95
	adcl %ebx, %eax
96
	movl 16(%esi), %ebx
97
	adcl %ebx, %eax
98
	movl 20(%esi), %ebx
99
	adcl %ebx, %eax
100
	movl 24(%esi), %ebx
101
	adcl %ebx, %eax
102
	movl 28(%esi), %ebx
103
	adcl %ebx, %eax
104
	lea 32(%esi), %esi
105
	dec %ecx
106
	jne 1b
107
	adcl $0, %eax
108
2:	movl %edx, %ecx
109
	andl $0x1c, %edx
110
	je 4f
111
	shrl $2, %edx		# This clears CF
112
3:	adcl (%esi), %eax
113
	lea 4(%esi), %esi
114
	dec %edx
115
	jne 3b
116
	adcl $0, %eax
117
4:	andl $3, %ecx
118
	jz 7f
119
	cmpl $2, %ecx
120
	jb 5f
121
	movw (%esi),%cx
122
	leal 2(%esi),%esi
123
	je 6f
124
	shll $16,%ecx
125
5:	movb (%esi),%cl
126
6:	addl %ecx,%eax
127
	adcl $0, %eax 
128
7:	
129
	testl $1, 12(%esp)
130
	jz 8f
131
	roll $8, %eax
132
8:
133
	popl_cfi %ebx
134
	CFI_RESTORE ebx
135
	popl_cfi %esi
136
	CFI_RESTORE esi
137
	ret
138
	CFI_ENDPROC
139
ENDPROC(csum_partial)
140

141
#else
142

143
/* Version for PentiumII/PPro */
144

145
ENTRY(csum_partial)
146
	CFI_STARTPROC
147
	pushl_cfi %esi
148
	CFI_REL_OFFSET esi, 0
149
	pushl_cfi %ebx
150
	CFI_REL_OFFSET ebx, 0
151
	movl 20(%esp),%eax	# Function arg: unsigned int sum
152
	movl 16(%esp),%ecx	# Function arg: int len
153
	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
154

155
	testl $3, %esi         
156
	jnz 25f                 
157
10:
158
	movl %ecx, %edx
159
	movl %ecx, %ebx
160
	andl $0x7c, %ebx
161
	shrl $7, %ecx
162
	addl %ebx,%esi
163
	shrl $2, %ebx  
164
	negl %ebx
165
	lea 45f(%ebx,%ebx,2), %ebx
166
	testl %esi, %esi
167
	jmp *%ebx
168

169
	# Handle 2-byte-aligned regions
170
20:	addw (%esi), %ax
171
	lea 2(%esi), %esi
172
	adcl $0, %eax
173
	jmp 10b
174
25:
175
	testl $1, %esi         
176
	jz 30f                 
177
	# buf is odd
178
	dec %ecx
179
	jl 90f
180
	movzbl (%esi), %ebx
181
	addl %ebx, %eax
182
	adcl $0, %eax
183
	roll $8, %eax
184
	inc %esi
185
	testl $2, %esi
186
	jz 10b
187

188
30:	subl $2, %ecx          
189
	ja 20b                 
190
	je 32f
191
	addl $2, %ecx
192
	jz 80f
193
	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
194
	addl %ebx, %eax
195
	adcl $0, %eax
196
	jmp 80f
197
32:
198
	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
199
	adcl $0, %eax
200
	jmp 80f
201

202
40: 
203
	addl -128(%esi), %eax
204
	adcl -124(%esi), %eax
205
	adcl -120(%esi), %eax
206
	adcl -116(%esi), %eax   
207
	adcl -112(%esi), %eax   
208
	adcl -108(%esi), %eax
209
	adcl -104(%esi), %eax
210
	adcl -100(%esi), %eax
211
	adcl -96(%esi), %eax
212
	adcl -92(%esi), %eax
213
	adcl -88(%esi), %eax
214
	adcl -84(%esi), %eax
215
	adcl -80(%esi), %eax
216
	adcl -76(%esi), %eax
217
	adcl -72(%esi), %eax
218
	adcl -68(%esi), %eax
219
	adcl -64(%esi), %eax     
220
	adcl -60(%esi), %eax     
221
	adcl -56(%esi), %eax     
222
	adcl -52(%esi), %eax   
223
	adcl -48(%esi), %eax   
224
	adcl -44(%esi), %eax
225
	adcl -40(%esi), %eax
226
	adcl -36(%esi), %eax
227
	adcl -32(%esi), %eax
228
	adcl -28(%esi), %eax
229
	adcl -24(%esi), %eax
230
	adcl -20(%esi), %eax
231
	adcl -16(%esi), %eax
232
	adcl -12(%esi), %eax
233
	adcl -8(%esi), %eax
234
	adcl -4(%esi), %eax
235
45:
236
	lea 128(%esi), %esi
237
	adcl $0, %eax
238
	dec %ecx
239
	jge 40b
240
	movl %edx, %ecx
241
50:	andl $3, %ecx
242
	jz 80f
243

244
	# Handle the last 1-3 bytes without jumping
245
	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
246
	movl $0xffffff,%ebx	# by the shll and shrl instructions
247
	shll $3,%ecx
248
	shrl %cl,%ebx
249
	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
250
	addl %ebx,%eax
251
	adcl $0,%eax
252
80: 
253
	testl $1, 12(%esp)
254
	jz 90f
255
	roll $8, %eax
256
90: 
257
	popl_cfi %ebx
258
	CFI_RESTORE ebx
259
	popl_cfi %esi
260
	CFI_RESTORE esi
261
	ret
262
	CFI_ENDPROC
263
ENDPROC(csum_partial)
264
				
265
#endif
266

267
/*
268
unsigned int csum_partial_copy_generic (const char *src, char *dst,
269
				  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
270
 */ 
271

272
/*
273
 * Copy from ds while checksumming, otherwise like csum_partial
274
 *
275
 * The macros SRC and DST specify the type of access for the instruction.
276
 * thus we can call a custom exception handler for all access types.
277
 *
278
 * FIXME: could someone double-check whether I haven't mixed up some SRC and
279
 *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
280
 *	  them all but there's no guarantee.
281
 */
282

283
#define SRC(y...)			\
284
	9999: y;			\
285
	.section __ex_table, "a";	\
286
	.long 9999b, 6001f	;	\
287
	.previous
288

289
#define DST(y...)			\
290
	9999: y;			\
291
	.section __ex_table, "a";	\
292
	.long 9999b, 6002f	;	\
293
	.previous
294

295
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
296

297
#define ARGBASE 16		
298
#define FP		12
299
		
300
ENTRY(csum_partial_copy_generic)
301
	CFI_STARTPROC
302
	subl  $4,%esp	
303
	CFI_ADJUST_CFA_OFFSET 4
304
	pushl_cfi %edi
305
	CFI_REL_OFFSET edi, 0
306
	pushl_cfi %esi
307
	CFI_REL_OFFSET esi, 0
308
	pushl_cfi %ebx
309
	CFI_REL_OFFSET ebx, 0
310
	movl ARGBASE+16(%esp),%eax	# sum
311
	movl ARGBASE+12(%esp),%ecx	# len
312
	movl ARGBASE+4(%esp),%esi	# src
313
	movl ARGBASE+8(%esp),%edi	# dst
314

315
	testl $2, %edi			# Check alignment. 
316
	jz 2f				# Jump if alignment is ok.
317
	subl $2, %ecx			# Alignment uses up two bytes.
318
	jae 1f				# Jump if we had at least two bytes.
319
	addl $2, %ecx			# ecx was < 2.  Deal with it.
320
	jmp 4f
321
SRC(1:	movw (%esi), %bx	)
322
	addl $2, %esi
323
DST(	movw %bx, (%edi)	)
324
	addl $2, %edi
325
	addw %bx, %ax	
326
	adcl $0, %eax
327
2:
328
	movl %ecx, FP(%esp)
329
	shrl $5, %ecx
330
	jz 2f
331
	testl %esi, %esi
332
SRC(1:	movl (%esi), %ebx	)
333
SRC(	movl 4(%esi), %edx	)
334
	adcl %ebx, %eax
335
DST(	movl %ebx, (%edi)	)
336
	adcl %edx, %eax
337
DST(	movl %edx, 4(%edi)	)
338

339
SRC(	movl 8(%esi), %ebx	)
340
SRC(	movl 12(%esi), %edx	)
341
	adcl %ebx, %eax
342
DST(	movl %ebx, 8(%edi)	)
343
	adcl %edx, %eax
344
DST(	movl %edx, 12(%edi)	)
345

346
SRC(	movl 16(%esi), %ebx 	)
347
SRC(	movl 20(%esi), %edx	)
348
	adcl %ebx, %eax
349
DST(	movl %ebx, 16(%edi)	)
350
	adcl %edx, %eax
351
DST(	movl %edx, 20(%edi)	)
352

353
SRC(	movl 24(%esi), %ebx	)
354
SRC(	movl 28(%esi), %edx	)
355
	adcl %ebx, %eax
356
DST(	movl %ebx, 24(%edi)	)
357
	adcl %edx, %eax
358
DST(	movl %edx, 28(%edi)	)
359

360
	lea 32(%esi), %esi
361
	lea 32(%edi), %edi
362
	dec %ecx
363
	jne 1b
364
	adcl $0, %eax
365
2:	movl FP(%esp), %edx
366
	movl %edx, %ecx
367
	andl $0x1c, %edx
368
	je 4f
369
	shrl $2, %edx			# This clears CF
370
SRC(3:	movl (%esi), %ebx	)
371
	adcl %ebx, %eax
372
DST(	movl %ebx, (%edi)	)
373
	lea 4(%esi), %esi
374
	lea 4(%edi), %edi
375
	dec %edx
376
	jne 3b
377
	adcl $0, %eax
378
4:	andl $3, %ecx
379
	jz 7f
380
	cmpl $2, %ecx
381
	jb 5f
382
SRC(	movw (%esi), %cx	)
383
	leal 2(%esi), %esi
384
DST(	movw %cx, (%edi)	)
385
	leal 2(%edi), %edi
386
	je 6f
387
	shll $16,%ecx
388
SRC(5:	movb (%esi), %cl	)
389
DST(	movb %cl, (%edi)	)
390
6:	addl %ecx, %eax
391
	adcl $0, %eax
392
7:
393
5000:
394

395
# Exception handler:
396
.section .fixup, "ax"							
397

398
6001:
399
	movl ARGBASE+20(%esp), %ebx	# src_err_ptr
400
	movl $-EFAULT, (%ebx)
401

402
	# zero the complete destination - computing the rest
403
	# is too much work 
404
	movl ARGBASE+8(%esp), %edi	# dst
405
	movl ARGBASE+12(%esp), %ecx	# len
406
	xorl %eax,%eax
407
	rep ; stosb
408

409
	jmp 5000b
410

411
6002:
412
	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
413
	movl $-EFAULT,(%ebx)
414
	jmp 5000b
415

416
.previous
417

418
	popl_cfi %ebx
419
	CFI_RESTORE ebx
420
	popl_cfi %esi
421
	CFI_RESTORE esi
422
	popl_cfi %edi
423
	CFI_RESTORE edi
424
	popl_cfi %ecx			# equivalent to addl $4,%esp
425
	ret	
426
	CFI_ENDPROC
427
ENDPROC(csum_partial_copy_generic)
428

429
#else
430

431
/* Version for PentiumII/PPro */
432

433
#define ROUND1(x) \
434
	SRC(movl x(%esi), %ebx	)	;	\
435
	addl %ebx, %eax			;	\
436
	DST(movl %ebx, x(%edi)	)	; 
437

438
#define ROUND(x) \
439
	SRC(movl x(%esi), %ebx	)	;	\
440
	adcl %ebx, %eax			;	\
441
	DST(movl %ebx, x(%edi)	)	;
442

443
#define ARGBASE 12
444
		
445
ENTRY(csum_partial_copy_generic)
446
	CFI_STARTPROC
447
	pushl_cfi %ebx
448
	CFI_REL_OFFSET ebx, 0
449
	pushl_cfi %edi
450
	CFI_REL_OFFSET edi, 0
451
	pushl_cfi %esi
452
	CFI_REL_OFFSET esi, 0
453
	movl ARGBASE+4(%esp),%esi	#src
454
	movl ARGBASE+8(%esp),%edi	#dst	
455
	movl ARGBASE+12(%esp),%ecx	#len
456
	movl ARGBASE+16(%esp),%eax	#sum
457
#	movl %ecx, %edx  
458
	movl %ecx, %ebx  
459
	movl %esi, %edx
460
	shrl $6, %ecx     
461
	andl $0x3c, %ebx  
462
	negl %ebx
463
	subl %ebx, %esi  
464
	subl %ebx, %edi  
465
	lea  -1(%esi),%edx
466
	andl $-32,%edx
467
	lea 3f(%ebx,%ebx), %ebx
468
	testl %esi, %esi 
469
	jmp *%ebx
470
1:	addl $64,%esi
471
	addl $64,%edi 
472
	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
473
	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)	
474
	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)	
475
	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)	
476
	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)	
477
3:	adcl $0,%eax
478
	addl $64, %edx
479
	dec %ecx
480
	jge 1b
481
4:	movl ARGBASE+12(%esp),%edx	#len
482
	andl $3, %edx
483
	jz 7f
484
	cmpl $2, %edx
485
	jb 5f
486
SRC(	movw (%esi), %dx         )
487
	leal 2(%esi), %esi
488
DST(	movw %dx, (%edi)         )
489
	leal 2(%edi), %edi
490
	je 6f
491
	shll $16,%edx
492
5:
493
SRC(	movb (%esi), %dl         )
494
DST(	movb %dl, (%edi)         )
495
6:	addl %edx, %eax
496
	adcl $0, %eax
497
7:
498
.section .fixup, "ax"
499
6001:	movl	ARGBASE+20(%esp), %ebx	# src_err_ptr	
500
	movl $-EFAULT, (%ebx)
501
	# zero the complete destination (computing the rest is too much work)
502
	movl ARGBASE+8(%esp),%edi	# dst
503
	movl ARGBASE+12(%esp),%ecx	# len
504
	xorl %eax,%eax
505
	rep; stosb
506
	jmp 7b
507
6002:	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
508
	movl $-EFAULT, (%ebx)
509
	jmp  7b			
510
.previous				
511

512
	popl_cfi %esi
513
	CFI_RESTORE esi
514
	popl_cfi %edi
515
	CFI_RESTORE edi
516
	popl_cfi %ebx
517
	CFI_RESTORE ebx
518
	ret
519
	CFI_ENDPROC
520
ENDPROC(csum_partial_copy_generic)
521
				
522
#undef ROUND
523
#undef ROUND1		
524
		
525
#endif
526

527
Product

Resources

Company