Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/m32r/lib/checksum.S
10817 views
1
/*
2
* INET An implementation of the TCP/IP protocol suite for the LINUX
3
* operating system. INET is implemented using the BSD Socket
4
* interface as the means of communication with the user level.
5
*
6
* IP/TCP/UDP checksumming routines
7
*
8
* Authors: Jorge Cwik, <[email protected]>
9
* Arnt Gulbrandsen, <[email protected]>
10
* Tom May, <[email protected]>
11
* Pentium Pro/II routines:
12
* Alexander Kjeldaas <[email protected]>
13
* Finn Arne Gangstad <[email protected]>
14
* Lots of code moved from tcp.c and ip.c; see those files
15
* for more names.
16
*
17
* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18
* handling.
19
* Andi Kleen, add zeroing on error
20
* converted to pure assembler
21
* Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
22
*
23
* This program is free software; you can redistribute it and/or
24
* modify it under the terms of the GNU General Public License
25
* as published by the Free Software Foundation; either version
26
* 2 of the License, or (at your option) any later version.
27
*/
28
29
#include <linux/linkage.h>
30
#include <asm/assembler.h>
31
#include <asm/errno.h>
32
33
/*
34
* computes a partial checksum, e.g. for TCP/UDP fragments
35
*/
36
37
/*
38
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
39
*/
40
41
42
#ifdef CONFIG_ISA_DUAL_ISSUE
43
44
/*
45
* Experiments with Ethernet and SLIP connections show that buff
46
* is aligned on either a 2-byte or 4-byte boundary. We get at
47
* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
48
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
49
* alignment for the unrolled loop.
50
*/
51
52
.text
53
ENTRY(csum_partial)
54
; Function args
55
; r0: unsigned char *buff
56
; r1: int len
57
; r2: unsigned int sum
58
59
push r2 || ldi r2, #0
60
and3 r7, r0, #1 ; Check alignment.
61
beqz r7, 1f ; Jump if alignment is ok.
62
; 1-byte mis aligned
63
ldub r4, @r0 || addi r0, #1
64
; clear c-bit || Alignment uses up bytes.
65
cmp r0, r0 || addi r1, #-1
66
ldi r3, #0 || addx r2, r4
67
addx r2, r3
68
.fillinsn
69
1:
70
and3 r4, r0, #2 ; Check alignment.
71
beqz r4, 2f ; Jump if alignment is ok.
72
; clear c-bit || Alignment uses up two bytes.
73
cmp r0, r0 || addi r1, #-2
74
bgtz r1, 1f ; Jump if we had at least two bytes.
75
bra 4f || addi r1, #2
76
.fillinsn ; len(r1) was < 2. Deal with it.
77
1:
78
; 2-byte aligned
79
lduh r4, @r0 || ldi r3, #0
80
addx r2, r4 || addi r0, #2
81
addx r2, r3
82
.fillinsn
83
2:
84
; 4-byte aligned
85
cmp r0, r0 ; clear c-bit
86
srl3 r6, r1, #5
87
beqz r6, 2f
88
.fillinsn
89
90
1: ld r3, @r0+
91
ld r4, @r0+ ; +4
92
ld r5, @r0+ ; +8
93
ld r3, @r0+ || addx r2, r3 ; +12
94
ld r4, @r0+ || addx r2, r4 ; +16
95
ld r5, @r0+ || addx r2, r5 ; +20
96
ld r3, @r0+ || addx r2, r3 ; +24
97
ld r4, @r0+ || addx r2, r4 ; +28
98
addx r2, r5 || addi r6, #-1
99
addx r2, r3
100
addx r2, r4
101
bnez r6, 1b
102
103
addx r2, r6 ; r6=0
104
cmp r0, r0 ; This clears c-bit
105
.fillinsn
106
2: and3 r6, r1, #0x1c ; withdraw len
107
beqz r6, 4f
108
srli r6, #2
109
.fillinsn
110
111
3: ld r4, @r0+ || addi r6, #-1
112
addx r2, r4
113
bnez r6, 3b
114
115
addx r2, r6 ; r6=0
116
cmp r0, r0 ; This clears c-bit
117
.fillinsn
118
4: and3 r1, r1, #3
119
beqz r1, 7f ; if len == 0 goto end
120
and3 r6, r1, #2
121
beqz r6, 5f ; if len < 2 goto 5f(1byte)
122
lduh r4, @r0 || addi r0, #2
123
addi r1, #-2 || slli r4, #16
124
addx r2, r4
125
beqz r1, 6f
126
.fillinsn
127
5: ldub r4, @r0 || ldi r1, #0
128
#ifndef __LITTLE_ENDIAN__
129
slli r4, #8
130
#endif
131
addx r2, r4
132
.fillinsn
133
6: addx r2, r1
134
.fillinsn
135
7:
136
and3 r0, r2, #0xffff
137
srli r2, #16
138
add r0, r2
139
srl3 r2, r0, #16
140
beqz r2, 1f
141
addi r0, #1
142
and3 r0, r0, #0xffff
143
.fillinsn
144
1:
145
beqz r7, 1f ; swap the upper byte for the lower
146
and3 r2, r0, #0xff
147
srl3 r0, r0, #8
148
slli r2, #8
149
or r0, r2
150
.fillinsn
151
1:
152
pop r2 || cmp r0, r0
153
addx r0, r2 || ldi r2, #0
154
addx r0, r2
155
jmp r14
156
157
#else /* not CONFIG_ISA_DUAL_ISSUE */
158
159
/*
160
* Experiments with Ethernet and SLIP connections show that buff
161
* is aligned on either a 2-byte or 4-byte boundary. We get at
162
* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
163
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
164
* alignment for the unrolled loop.
165
*/
166
167
.text
168
ENTRY(csum_partial)
169
; Function args
170
; r0: unsigned char *buff
171
; r1: int len
172
; r2: unsigned int sum
173
174
push r2
175
ldi r2, #0
176
and3 r7, r0, #1 ; Check alignment.
177
beqz r7, 1f ; Jump if alignment is ok.
178
; 1-byte mis aligned
179
ldub r4, @r0
180
addi r0, #1
181
addi r1, #-1 ; Alignment uses up bytes.
182
cmp r0, r0 ; clear c-bit
183
ldi r3, #0
184
addx r2, r4
185
addx r2, r3
186
.fillinsn
187
1:
188
and3 r4, r0, #2 ; Check alignment.
189
beqz r4, 2f ; Jump if alignment is ok.
190
addi r1, #-2 ; Alignment uses up two bytes.
191
cmp r0, r0 ; clear c-bit
192
bgtz r1, 1f ; Jump if we had at least two bytes.
193
addi r1, #2 ; len(r1) was < 2. Deal with it.
194
bra 4f
195
.fillinsn
196
1:
197
; 2-byte aligned
198
lduh r4, @r0
199
addi r0, #2
200
ldi r3, #0
201
addx r2, r4
202
addx r2, r3
203
.fillinsn
204
2:
205
; 4-byte aligned
206
cmp r0, r0 ; clear c-bit
207
srl3 r6, r1, #5
208
beqz r6, 2f
209
.fillinsn
210
211
1: ld r3, @r0+
212
ld r4, @r0+ ; +4
213
ld r5, @r0+ ; +8
214
addx r2, r3
215
addx r2, r4
216
addx r2, r5
217
ld r3, @r0+ ; +12
218
ld r4, @r0+ ; +16
219
ld r5, @r0+ ; +20
220
addx r2, r3
221
addx r2, r4
222
addx r2, r5
223
ld r3, @r0+ ; +24
224
ld r4, @r0+ ; +28
225
addi r6, #-1
226
addx r2, r3
227
addx r2, r4
228
bnez r6, 1b
229
addx r2, r6 ; r6=0
230
cmp r0, r0 ; This clears c-bit
231
.fillinsn
232
233
2: and3 r6, r1, #0x1c ; withdraw len
234
beqz r6, 4f
235
srli r6, #2
236
.fillinsn
237
238
3: ld r4, @r0+
239
addi r6, #-1
240
addx r2, r4
241
bnez r6, 3b
242
addx r2, r6 ; r6=0
243
cmp r0, r0 ; This clears c-bit
244
.fillinsn
245
246
4: and3 r1, r1, #3
247
beqz r1, 7f ; if len == 0 goto end
248
and3 r6, r1, #2
249
beqz r6, 5f ; if len < 2 goto 5f(1byte)
250
251
lduh r4, @r0
252
addi r0, #2
253
addi r1, #-2
254
slli r4, #16
255
addx r2, r4
256
beqz r1, 6f
257
.fillinsn
258
5: ldub r4, @r0
259
#ifndef __LITTLE_ENDIAN__
260
slli r4, #8
261
#endif
262
addx r2, r4
263
.fillinsn
264
6: ldi r5, #0
265
addx r2, r5
266
.fillinsn
267
7:
268
and3 r0, r2, #0xffff
269
srli r2, #16
270
add r0, r2
271
srl3 r2, r0, #16
272
beqz r2, 1f
273
addi r0, #1
274
and3 r0, r0, #0xffff
275
.fillinsn
276
1:
277
beqz r7, 1f
278
mv r2, r0
279
srl3 r0, r2, #8
280
and3 r2, r2, #0xff
281
slli r2, #8
282
or r0, r2
283
.fillinsn
284
1:
285
pop r2
286
cmp r0, r0
287
addx r0, r2
288
ldi r2, #0
289
addx r0, r2
290
jmp r14
291
292
#endif /* not CONFIG_ISA_DUAL_ISSUE */
293
294
/*
295
unsigned int csum_partial_copy_generic (const char *src, char *dst,
296
int len, int sum, int *src_err_ptr, int *dst_err_ptr)
297
*/
298
299
/*
300
* Copy from ds while checksumming, otherwise like csum_partial
301
*
302
* The macros SRC and DST specify the type of access for the instruction.
303
* thus we can call a custom exception handler for all access types.
304
*
305
* FIXME: could someone double-check whether I haven't mixed up some SRC and
306
* DST definitions? It's damn hard to trigger all cases. I hope I got
307
* them all but there's no guarantee.
308
*/
309
310
ENTRY(csum_partial_copy_generic)
311
nop
312
nop
313
nop
314
nop
315
jmp r14
316
nop
317
nop
318
nop
319
320
.end
321
322