CoCalc -- checksum.S

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/um/sys-i386/checksum.S
¹⁰⁸¹⁸ views
1
/*
2
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3
 *		operating system.  INET is implemented using the  BSD Socket
4
 *		interface as the means of communication with the user level.
5
 *
6
 *		IP/TCP/UDP checksumming routines
7
 *
8
 * Authors:	Jorge Cwik, <[email protected]>
9
 *		Arnt Gulbrandsen, <[email protected]>
10
 *		Tom May, <[email protected]>
11
 *              Pentium Pro/II routines:
12
 *              Alexander Kjeldaas <[email protected]>
13
 *              Finn Arne Gangstad <[email protected]>
14
 *		Lots of code moved from tcp.c and ip.c; see those files
15
 *		for more names.
16
 *
17
 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18
 *			     handling.
19
 *		Andi Kleen,  add zeroing on error
20
 *                   converted to pure assembler
21
 *
22
 *		This program is free software; you can redistribute it and/or
23
 *		modify it under the terms of the GNU General Public License
24
 *		as published by the Free Software Foundation; either version
25
 *		2 of the License, or (at your option) any later version.
26
 */
27

28
#include <asm/errno.h>
29
				
30
/*
31
 * computes a partial checksum, e.g. for TCP/UDP fragments
32
 */
33

34
/*	
35
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
36
 */
37
		
38
.text
39
.align 4
40
.globl csum_partial
41
		
42
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
43

44
	  /*		
45
	   * Experiments with Ethernet and SLIP connections show that buff
46
	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
47
	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
48
	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
49
	   * alignment for the unrolled loop.
50
	   */		
51
csum_partial:
52
	pushl %esi
53
	pushl %ebx
54
	movl 20(%esp),%eax	# Function arg: unsigned int sum
55
	movl 16(%esp),%ecx	# Function arg: int len
56
	movl 12(%esp),%esi	# Function arg: unsigned char *buff
57
	testl $2, %esi		# Check alignment.
58
	jz 2f			# Jump if alignment is ok.
59
	subl $2, %ecx		# Alignment uses up two bytes.
60
	jae 1f			# Jump if we had at least two bytes.
61
	addl $2, %ecx		# ecx was < 2.  Deal with it.
62
	jmp 4f
63
1:	movw (%esi), %bx
64
	addl $2, %esi
65
	addw %bx, %ax
66
	adcl $0, %eax
67
2:
68
	movl %ecx, %edx
69
	shrl $5, %ecx
70
	jz 2f
71
	testl %esi, %esi
72
1:	movl (%esi), %ebx
73
	adcl %ebx, %eax
74
	movl 4(%esi), %ebx
75
	adcl %ebx, %eax
76
	movl 8(%esi), %ebx
77
	adcl %ebx, %eax
78
	movl 12(%esi), %ebx
79
	adcl %ebx, %eax
80
	movl 16(%esi), %ebx
81
	adcl %ebx, %eax
82
	movl 20(%esi), %ebx
83
	adcl %ebx, %eax
84
	movl 24(%esi), %ebx
85
	adcl %ebx, %eax
86
	movl 28(%esi), %ebx
87
	adcl %ebx, %eax
88
	lea 32(%esi), %esi
89
	dec %ecx
90
	jne 1b
91
	adcl $0, %eax
92
2:	movl %edx, %ecx
93
	andl $0x1c, %edx
94
	je 4f
95
	shrl $2, %edx		# This clears CF
96
3:	adcl (%esi), %eax
97
	lea 4(%esi), %esi
98
	dec %edx
99
	jne 3b
100
	adcl $0, %eax
101
4:	andl $3, %ecx
102
	jz 7f
103
	cmpl $2, %ecx
104
	jb 5f
105
	movw (%esi),%cx
106
	leal 2(%esi),%esi
107
	je 6f
108
	shll $16,%ecx
109
5:	movb (%esi),%cl
110
6:	addl %ecx,%eax
111
	adcl $0, %eax 
112
7:	
113
	popl %ebx
114
	popl %esi
115
	ret
116

117
#else
118

119
/* Version for PentiumII/PPro */
120

121
csum_partial:
122
	pushl %esi
123
	pushl %ebx
124
	movl 20(%esp),%eax	# Function arg: unsigned int sum
125
	movl 16(%esp),%ecx	# Function arg: int len
126
	movl 12(%esp),%esi	# Function arg:	const unsigned char *buf
127

128
	testl $2, %esi         
129
	jnz 30f                 
130
10:
131
	movl %ecx, %edx
132
	movl %ecx, %ebx
133
	andl $0x7c, %ebx
134
	shrl $7, %ecx
135
	addl %ebx,%esi
136
	shrl $2, %ebx  
137
	negl %ebx
138
	lea 45f(%ebx,%ebx,2), %ebx
139
	testl %esi, %esi
140
	jmp *%ebx
141

142
	# Handle 2-byte-aligned regions
143
20:	addw (%esi), %ax
144
	lea 2(%esi), %esi
145
	adcl $0, %eax
146
	jmp 10b
147

148
30:	subl $2, %ecx          
149
	ja 20b                 
150
	je 32f
151
	movzbl (%esi),%ebx	# csumming 1 byte, 2-aligned
152
	addl %ebx, %eax
153
	adcl $0, %eax
154
	jmp 80f
155
32:
156
	addw (%esi), %ax	# csumming 2 bytes, 2-aligned
157
	adcl $0, %eax
158
	jmp 80f
159

160
40: 
161
	addl -128(%esi), %eax
162
	adcl -124(%esi), %eax
163
	adcl -120(%esi), %eax
164
	adcl -116(%esi), %eax   
165
	adcl -112(%esi), %eax   
166
	adcl -108(%esi), %eax
167
	adcl -104(%esi), %eax
168
	adcl -100(%esi), %eax
169
	adcl -96(%esi), %eax
170
	adcl -92(%esi), %eax
171
	adcl -88(%esi), %eax
172
	adcl -84(%esi), %eax
173
	adcl -80(%esi), %eax
174
	adcl -76(%esi), %eax
175
	adcl -72(%esi), %eax
176
	adcl -68(%esi), %eax
177
	adcl -64(%esi), %eax     
178
	adcl -60(%esi), %eax     
179
	adcl -56(%esi), %eax     
180
	adcl -52(%esi), %eax   
181
	adcl -48(%esi), %eax   
182
	adcl -44(%esi), %eax
183
	adcl -40(%esi), %eax
184
	adcl -36(%esi), %eax
185
	adcl -32(%esi), %eax
186
	adcl -28(%esi), %eax
187
	adcl -24(%esi), %eax
188
	adcl -20(%esi), %eax
189
	adcl -16(%esi), %eax
190
	adcl -12(%esi), %eax
191
	adcl -8(%esi), %eax
192
	adcl -4(%esi), %eax
193
45:
194
	lea 128(%esi), %esi
195
	adcl $0, %eax
196
	dec %ecx
197
	jge 40b
198
	movl %edx, %ecx
199
50:	andl $3, %ecx
200
	jz 80f
201

202
	# Handle the last 1-3 bytes without jumping
203
	notl %ecx		# 1->2, 2->1, 3->0, higher bits are masked
204
	movl $0xffffff,%ebx	# by the shll and shrl instructions
205
	shll $3,%ecx
206
	shrl %cl,%ebx
207
	andl -128(%esi),%ebx	# esi is 4-aligned so should be ok
208
	addl %ebx,%eax
209
	adcl $0,%eax
210
80: 
211
	popl %ebx
212
	popl %esi
213
	ret
214
				
215
#endif
216

217
/*
218
unsigned int csum_partial_copy_generic (const char *src, char *dst,
219
				  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
220
 */ 
221

222
/*
223
 * Copy from ds while checksumming, otherwise like csum_partial
224
 *
225
 * The macros SRC and DST specify the type of access for the instruction.
226
 * thus we can call a custom exception handler for all access types.
227
 *
228
 * FIXME: could someone double-check whether I haven't mixed up some SRC and
229
 *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
230
 *	  them all but there's no guarantee.
231
 */
232

233
#define SRC(y...)			\
234
	9999: y;			\
235
	.section __ex_table, "a";	\
236
	.long 9999b, 6001f	;	\
237
	.previous
238

239
#define DST(y...)			\
240
	9999: y;			\
241
	.section __ex_table, "a";	\
242
	.long 9999b, 6002f	;	\
243
	.previous
244

245
.align 4
246

247
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
248

249
#define ARGBASE 16		
250
#define FP		12
251

252
csum_partial_copy_generic_i386:
253
	subl  $4,%esp	
254
	pushl %edi
255
	pushl %esi
256
	pushl %ebx
257
	movl ARGBASE+16(%esp),%eax	# sum
258
	movl ARGBASE+12(%esp),%ecx	# len
259
	movl ARGBASE+4(%esp),%esi	# src
260
	movl ARGBASE+8(%esp),%edi	# dst
261

262
	testl $2, %edi			# Check alignment. 
263
	jz 2f				# Jump if alignment is ok.
264
	subl $2, %ecx			# Alignment uses up two bytes.
265
	jae 1f				# Jump if we had at least two bytes.
266
	addl $2, %ecx			# ecx was < 2.  Deal with it.
267
	jmp 4f
268
SRC(1:	movw (%esi), %bx	)
269
	addl $2, %esi
270
DST(	movw %bx, (%edi)	)
271
	addl $2, %edi
272
	addw %bx, %ax	
273
	adcl $0, %eax
274
2:
275
	movl %ecx, FP(%esp)
276
	shrl $5, %ecx
277
	jz 2f
278
	testl %esi, %esi
279
SRC(1:	movl (%esi), %ebx	)
280
SRC(	movl 4(%esi), %edx	)
281
	adcl %ebx, %eax
282
DST(	movl %ebx, (%edi)	)
283
	adcl %edx, %eax
284
DST(	movl %edx, 4(%edi)	)
285

286
SRC(	movl 8(%esi), %ebx	)
287
SRC(	movl 12(%esi), %edx	)
288
	adcl %ebx, %eax
289
DST(	movl %ebx, 8(%edi)	)
290
	adcl %edx, %eax
291
DST(	movl %edx, 12(%edi)	)
292

293
SRC(	movl 16(%esi), %ebx 	)
294
SRC(	movl 20(%esi), %edx	)
295
	adcl %ebx, %eax
296
DST(	movl %ebx, 16(%edi)	)
297
	adcl %edx, %eax
298
DST(	movl %edx, 20(%edi)	)
299

300
SRC(	movl 24(%esi), %ebx	)
301
SRC(	movl 28(%esi), %edx	)
302
	adcl %ebx, %eax
303
DST(	movl %ebx, 24(%edi)	)
304
	adcl %edx, %eax
305
DST(	movl %edx, 28(%edi)	)
306

307
	lea 32(%esi), %esi
308
	lea 32(%edi), %edi
309
	dec %ecx
310
	jne 1b
311
	adcl $0, %eax
312
2:	movl FP(%esp), %edx
313
	movl %edx, %ecx
314
	andl $0x1c, %edx
315
	je 4f
316
	shrl $2, %edx			# This clears CF
317
SRC(3:	movl (%esi), %ebx	)
318
	adcl %ebx, %eax
319
DST(	movl %ebx, (%edi)	)
320
	lea 4(%esi), %esi
321
	lea 4(%edi), %edi
322
	dec %edx
323
	jne 3b
324
	adcl $0, %eax
325
4:	andl $3, %ecx
326
	jz 7f
327
	cmpl $2, %ecx
328
	jb 5f
329
SRC(	movw (%esi), %cx	)
330
	leal 2(%esi), %esi
331
DST(	movw %cx, (%edi)	)
332
	leal 2(%edi), %edi
333
	je 6f
334
	shll $16,%ecx
335
SRC(5:	movb (%esi), %cl	)
336
DST(	movb %cl, (%edi)	)
337
6:	addl %ecx, %eax
338
	adcl $0, %eax
339
7:
340
5000:
341

342
# Exception handler:
343
.section .fixup, "ax"							
344

345
6001:
346
	movl ARGBASE+20(%esp), %ebx	# src_err_ptr
347
	movl $-EFAULT, (%ebx)
348

349
	# zero the complete destination - computing the rest
350
	# is too much work 
351
	movl ARGBASE+8(%esp), %edi	# dst
352
	movl ARGBASE+12(%esp), %ecx	# len
353
	xorl %eax,%eax
354
	rep ; stosb
355

356
	jmp 5000b
357

358
6002:
359
	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
360
	movl $-EFAULT,(%ebx)
361
	jmp 5000b
362

363
.previous
364

365
	popl %ebx
366
	popl %esi
367
	popl %edi
368
	popl %ecx			# equivalent to addl $4,%esp
369
	ret	
370

371
#else
372

373
/* Version for PentiumII/PPro */
374

375
#define ROUND1(x) \
376
	SRC(movl x(%esi), %ebx	)	;	\
377
	addl %ebx, %eax			;	\
378
	DST(movl %ebx, x(%edi)	)	; 
379

380
#define ROUND(x) \
381
	SRC(movl x(%esi), %ebx	)	;	\
382
	adcl %ebx, %eax			;	\
383
	DST(movl %ebx, x(%edi)	)	;
384

385
#define ARGBASE 12
386
		
387
csum_partial_copy_generic_i386:
388
	pushl %ebx
389
	pushl %edi
390
	pushl %esi
391
	movl ARGBASE+4(%esp),%esi	#src
392
	movl ARGBASE+8(%esp),%edi	#dst	
393
	movl ARGBASE+12(%esp),%ecx	#len
394
	movl ARGBASE+16(%esp),%eax	#sum
395
#	movl %ecx, %edx  
396
	movl %ecx, %ebx  
397
	movl %esi, %edx
398
	shrl $6, %ecx     
399
	andl $0x3c, %ebx  
400
	negl %ebx
401
	subl %ebx, %esi  
402
	subl %ebx, %edi  
403
	lea  -1(%esi),%edx
404
	andl $-32,%edx
405
	lea 3f(%ebx,%ebx), %ebx
406
	testl %esi, %esi 
407
	jmp *%ebx
408
1:	addl $64,%esi
409
	addl $64,%edi 
410
	SRC(movb -32(%edx),%bl)	; SRC(movb (%edx),%bl)
411
	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)	
412
	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)	
413
	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)	
414
	ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)	
415
3:	adcl $0,%eax
416
	addl $64, %edx
417
	dec %ecx
418
	jge 1b
419
4:	movl ARGBASE+12(%esp),%edx	#len
420
	andl $3, %edx
421
	jz 7f
422
	cmpl $2, %edx
423
	jb 5f
424
SRC(	movw (%esi), %dx         )
425
	leal 2(%esi), %esi
426
DST(	movw %dx, (%edi)         )
427
	leal 2(%edi), %edi
428
	je 6f
429
	shll $16,%edx
430
5:
431
SRC(	movb (%esi), %dl         )
432
DST(	movb %dl, (%edi)         )
433
6:	addl %edx, %eax
434
	adcl $0, %eax
435
7:
436
.section .fixup, "ax"
437
6001:	movl	ARGBASE+20(%esp), %ebx	# src_err_ptr	
438
	movl $-EFAULT, (%ebx)
439
	# zero the complete destination (computing the rest is too much work)
440
	movl ARGBASE+8(%esp),%edi	# dst
441
	movl ARGBASE+12(%esp),%ecx	# len
442
	xorl %eax,%eax
443
	rep; stosb
444
	jmp 7b
445
6002:	movl ARGBASE+24(%esp), %ebx	# dst_err_ptr
446
	movl $-EFAULT, (%ebx)
447
	jmp  7b			
448
.previous				
449

450
	popl %esi
451
	popl %edi
452
	popl %ebx
453
	ret
454
				
455
#undef ROUND
456
#undef ROUND1		
457
		
458
#endif
459

460
Product

Resources

Company