CoCalc -- checksum.S

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/m32r/lib/checksum.S
¹⁰⁸¹⁷ views
1
/*
2
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3
 *		operating system.  INET is implemented using the  BSD Socket
4
 *		interface as the means of communication with the user level.
5
 *
6
 *		IP/TCP/UDP checksumming routines
7
 *
8
 * Authors:	Jorge Cwik, <[email protected]>
9
 *		Arnt Gulbrandsen, <[email protected]>
10
 *		Tom May, <[email protected]>
11
 *              Pentium Pro/II routines:
12
 *              Alexander Kjeldaas <[email protected]>
13
 *              Finn Arne Gangstad <[email protected]>
14
 *		Lots of code moved from tcp.c and ip.c; see those files
15
 *		for more names.
16
 *
17
 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18
 *			     handling.
19
 *		Andi Kleen,  add zeroing on error
20
 *                   converted to pure assembler
21
 *		Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
22
 *
23
 *		This program is free software; you can redistribute it and/or
24
 *		modify it under the terms of the GNU General Public License
25
 *		as published by the Free Software Foundation; either version
26
 *		2 of the License, or (at your option) any later version.
27
 */
28

29
#include <linux/linkage.h>
30
#include <asm/assembler.h>
31
#include <asm/errno.h>
32

33
/*
34
 * computes a partial checksum, e.g. for TCP/UDP fragments
35
 */
36

37
/*
38
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
39
 */
40

41

42
#ifdef CONFIG_ISA_DUAL_ISSUE
43

44
	/*
45
	 * Experiments with Ethernet and SLIP connections show that buff
46
	 * is aligned on either a 2-byte or 4-byte boundary.  We get at
47
	 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
48
	 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
49
	 * alignment for the unrolled loop.
50
	 */
51

52
	.text
53
ENTRY(csum_partial)
54
	; Function args
55
	;  r0: unsigned char *buff
56
	;  r1: int len
57
	;  r2: unsigned int sum
58

59
	push	r2		    ||	ldi	r2, #0
60
	and3	r7, r0, #1		; Check alignment.
61
	beqz	r7, 1f	 		; Jump if alignment is ok.
62
	; 1-byte mis aligned
63
	ldub	r4, @r0		    ||	addi	r0, #1
64
	; clear c-bit || Alignment uses up bytes.
65
	cmp	r0, r0		    ||	addi	r1, #-1
66
	ldi	r3, #0		    ||	addx	r2, r4
67
	addx	r2, r3
68
	.fillinsn
69
1:
70
	and3	r4, r0, #2		; Check alignment.
71
	beqz	r4, 2f	 		; Jump if alignment is ok.
72
	; clear c-bit || Alignment uses up two bytes.
73
	cmp	r0, r0		    ||	addi	r1, #-2
74
	bgtz	r1, 1f			; Jump if we had at least two bytes.
75
	bra	4f		    ||	addi	r1, #2
76
	.fillinsn			; len(r1) was < 2.  Deal with it.
77
1:
78
	; 2-byte aligned
79
	lduh	r4, @r0		    ||	ldi	r3, #0
80
	addx	r2, r4		    ||	addi	r0, #2
81
	addx	r2, r3
82
	.fillinsn
83
2:
84
	; 4-byte aligned
85
	cmp	r0, r0			; clear c-bit
86
	srl3	r6, r1, #5
87
	beqz	r6, 2f
88
	.fillinsn
89

90
1:	ld	r3, @r0+
91
	ld	r4, @r0+					; +4
92
	ld	r5, @r0+					; +8
93
	ld	r3, @r0+	    ||	addx    r2, r3		; +12
94
	ld	r4, @r0+	    ||	addx    r2, r4		; +16
95
	ld	r5, @r0+	    ||	addx    r2, r5		; +20
96
	ld	r3, @r0+	    ||	addx    r2, r3		; +24
97
	ld	r4, @r0+	    ||	addx    r2, r4		; +28
98
	addx	r2, r5		    ||	addi	r6, #-1
99
	addx	r2, r3
100
	addx	r2, r4
101
	bnez	r6, 1b
102

103
	addx	r2, r6			; r6=0
104
	cmp	r0, r0			; This clears c-bit
105
	.fillinsn
106
2:	and3	r6, r1, #0x1c		; withdraw len
107
	beqz	r6, 4f
108
	srli	r6, #2
109
	.fillinsn
110

111
3:	ld	r4, @r0+	    ||	addi	r6, #-1
112
	addx	r2, r4
113
	bnez	r6, 3b
114

115
	addx	r2, r6			; r6=0
116
	cmp	r0, r0			; This clears c-bit
117
	.fillinsn
118
4:	and3	r1, r1, #3
119
	beqz	r1, 7f			; if len == 0 goto end
120
	and3	r6, r1, #2
121
	beqz	r6, 5f			; if len < 2  goto 5f(1byte)
122
	lduh	r4, @r0		    ||	addi	r0, #2
123
	addi	r1, #-2		    ||	slli    r4, #16
124
	addx	r2, r4
125
	beqz	r1, 6f
126
	.fillinsn
127
5:	ldub	r4, @r0		    ||	ldi	r1, #0
128
#ifndef __LITTLE_ENDIAN__
129
	slli    r4, #8
130
#endif
131
	addx	r2, r4
132
	.fillinsn
133
6:	addx	r2, r1
134
	.fillinsn
135
7:
136
	and3	r0, r2, #0xffff
137
	srli	r2, #16
138
	add	r0, r2
139
	srl3	r2, r0, #16
140
	beqz	r2, 1f
141
	addi	r0, #1
142
	and3	r0, r0, #0xffff
143
	.fillinsn
144
1:
145
	beqz	r7, 1f			; swap the upper byte for the lower
146
	and3	r2, r0, #0xff
147
	srl3	r0, r0, #8
148
	slli	r2, #8
149
	or	r0, r2
150
	.fillinsn
151
1:
152
	pop	r2		    ||	cmp	r0, r0
153
	addx	r0, r2		    ||	ldi	r2, #0
154
	addx	r0, r2
155
	jmp	r14
156

157
#else /* not CONFIG_ISA_DUAL_ISSUE */
158

159
	/*
160
	 * Experiments with Ethernet and SLIP connections show that buff
161
	 * is aligned on either a 2-byte or 4-byte boundary.  We get at
162
	 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
163
	 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
164
	 * alignment for the unrolled loop.
165
	 */
166

167
	.text
168
ENTRY(csum_partial)
169
	; Function args
170
	;  r0: unsigned char *buff
171
	;  r1: int len
172
	;  r2: unsigned int sum
173

174
	push	r2
175
	ldi	r2, #0
176
	and3	r7, r0, #1		; Check alignment.
177
	beqz	r7, 1f	 		; Jump if alignment is ok.
178
	; 1-byte mis aligned
179
	ldub	r4, @r0
180
	addi	r0, #1
181
	addi	r1, #-1			; Alignment uses up bytes.
182
	cmp	r0, r0			; clear c-bit
183
	ldi	r3, #0
184
	addx	r2, r4
185
	addx	r2, r3
186
	.fillinsn
187
1:
188
	and3	r4, r0, #2		; Check alignment.
189
	beqz	r4, 2f	 		; Jump if alignment is ok.
190
	addi	r1, #-2			; Alignment uses up two bytes.
191
	cmp		r0, r0			; clear c-bit
192
	bgtz	r1, 1f			; Jump if we had at least two bytes.
193
	addi	r1, #2			; len(r1) was < 2.  Deal with it.
194
	bra	4f
195
	.fillinsn
196
1:
197
	; 2-byte aligned
198
	lduh	r4, @r0
199
	addi	r0, #2
200
	ldi		r3, #0
201
	addx	r2, r4
202
	addx	r2, r3
203
	.fillinsn
204
2:
205
	; 4-byte aligned
206
	cmp	r0, r0			; clear c-bit
207
	srl3	r6, r1, #5
208
	beqz	r6, 2f
209
	.fillinsn
210

211
1:	ld	r3, @r0+
212
	ld	r4, @r0+		; +4
213
	ld	r5, @r0+		; +8
214
	addx	r2, r3
215
	addx	r2, r4
216
	addx	r2, r5
217
	ld	r3, @r0+		; +12
218
	ld	r4, @r0+		; +16
219
	ld	r5, @r0+		; +20
220
	addx	r2, r3
221
	addx	r2, r4
222
	addx	r2, r5
223
	ld	r3, @r0+		; +24
224
	ld	r4, @r0+		; +28
225
	addi	r6, #-1
226
	addx	r2, r3
227
	addx	r2, r4
228
	bnez	r6, 1b
229
	addx	r2, r6			; r6=0
230
	cmp	r0, r0			; This clears c-bit
231
	.fillinsn
232

233
2:	and3	r6, r1, #0x1c		; withdraw len
234
	beqz	r6, 4f
235
	srli	r6, #2
236
	.fillinsn
237

238
3:	ld	r4, @r0+
239
	addi	r6, #-1
240
	addx	r2, r4
241
	bnez	r6, 3b
242
	addx	r2, r6			; r6=0
243
	cmp	r0, r0			; This clears c-bit
244
	.fillinsn
245

246
4:	and3	r1, r1, #3
247
	beqz	r1, 7f			; if len == 0 goto end
248
	and3	r6, r1, #2
249
	beqz	r6, 5f			; if len < 2  goto 5f(1byte)
250

251
	lduh	r4, @r0
252
	addi	r0, #2
253
	addi	r1, #-2
254
	slli    r4, #16
255
	addx	r2, r4
256
	beqz	r1, 6f
257
	.fillinsn
258
5:	ldub	r4, @r0
259
#ifndef __LITTLE_ENDIAN__
260
	slli    r4, #8
261
#endif
262
	addx	r2, r4
263
	.fillinsn
264
6:	ldi	r5, #0
265
	addx	r2, r5
266
	.fillinsn
267
7:
268
	and3	r0, r2, #0xffff
269
	srli	r2, #16
270
	add	r0, r2
271
	srl3	r2, r0, #16
272
	beqz	r2, 1f
273
	addi	r0, #1
274
	and3	r0, r0, #0xffff
275
	.fillinsn
276
1:
277
	beqz	r7, 1f
278
	mv	r2, r0
279
	srl3	r0, r2, #8
280
	and3	r2, r2, #0xff
281
	slli	r2, #8
282
	or	r0, r2
283
	.fillinsn
284
1:
285
	pop	r2
286
	cmp	r0, r0
287
	addx	r0, r2
288
	ldi	r2, #0
289
	addx	r0, r2
290
	jmp	r14
291

292
#endif /* not CONFIG_ISA_DUAL_ISSUE */
293

294
/*
295
unsigned int csum_partial_copy_generic (const char *src, char *dst,
296
				  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
297
 */
298

299
/*
300
 * Copy from ds while checksumming, otherwise like csum_partial
301
 *
302
 * The macros SRC and DST specify the type of access for the instruction.
303
 * thus we can call a custom exception handler for all access types.
304
 *
305
 * FIXME: could someone double-check whether I haven't mixed up some SRC and
306
 *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
307
 *	  them all but there's no guarantee.
308
 */
309

310
ENTRY(csum_partial_copy_generic)
311
	nop
312
	nop
313
	nop
314
	nop
315
	jmp r14
316
	nop
317
	nop
318
	nop
319

320
	.end
321

322
Product

Resources

Company