Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/mips/lib/csum_partial.S
10817 views
1
/*
2
* This file is subject to the terms and conditions of the GNU General Public
3
* License. See the file "COPYING" in the main directory of this archive
4
* for more details.
5
*
6
* Quick'n'dirty IP checksum ...
7
*
8
* Copyright (C) 1998, 1999 Ralf Baechle
9
* Copyright (C) 1999 Silicon Graphics, Inc.
10
* Copyright (C) 2007 Maciej W. Rozycki
11
*/
12
#include <linux/errno.h>
13
#include <asm/asm.h>
14
#include <asm/asm-offsets.h>
15
#include <asm/regdef.h>
16
17
#ifdef CONFIG_64BIT
18
/*
19
* As we are sharing code base with the mips32 tree (which use the o32 ABI
20
* register definitions). We need to redefine the register definitions from
21
* the n64 ABI register naming to the o32 ABI register naming.
22
*/
23
#undef t0
24
#undef t1
25
#undef t2
26
#undef t3
27
#define t0 $8
28
#define t1 $9
29
#define t2 $10
30
#define t3 $11
31
#define t4 $12
32
#define t5 $13
33
#define t6 $14
34
#define t7 $15
35
36
#define USE_DOUBLE
37
#endif
38
39
#ifdef USE_DOUBLE
40
41
#define LOAD ld
42
#define LOAD32 lwu
43
#define ADD daddu
44
#define NBYTES 8
45
46
#else
47
48
#define LOAD lw
49
#define LOAD32 lw
50
#define ADD addu
51
#define NBYTES 4
52
53
#endif /* USE_DOUBLE */
54
55
#define UNIT(unit) ((unit)*NBYTES)
56
57
#define ADDC(sum,reg) \
58
ADD sum, reg; \
59
sltu v1, sum, reg; \
60
ADD sum, v1; \
61
62
#define ADDC32(sum,reg) \
63
addu sum, reg; \
64
sltu v1, sum, reg; \
65
addu sum, v1; \
66
67
#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
68
LOAD _t0, (offset + UNIT(0))(src); \
69
LOAD _t1, (offset + UNIT(1))(src); \
70
LOAD _t2, (offset + UNIT(2))(src); \
71
LOAD _t3, (offset + UNIT(3))(src); \
72
ADDC(sum, _t0); \
73
ADDC(sum, _t1); \
74
ADDC(sum, _t2); \
75
ADDC(sum, _t3)
76
77
#ifdef USE_DOUBLE
78
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
79
CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
80
#else
81
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
82
CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
83
CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
84
#endif
85
86
/*
87
* a0: source address
88
* a1: length of the area to checksum
89
* a2: partial checksum
90
*/
91
92
#define src a0
93
#define sum v0
94
95
.text
96
.set noreorder
97
.align 5
98
LEAF(csum_partial)
99
move sum, zero
100
move t7, zero
101
102
sltiu t8, a1, 0x8
103
bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */
104
move t2, a1
105
106
andi t7, src, 0x1 /* odd buffer? */
107
108
.Lhword_align:
109
beqz t7, .Lword_align
110
andi t8, src, 0x2
111
112
lbu t0, (src)
113
LONG_SUBU a1, a1, 0x1
114
#ifdef __MIPSEL__
115
sll t0, t0, 8
116
#endif
117
ADDC(sum, t0)
118
PTR_ADDU src, src, 0x1
119
andi t8, src, 0x2
120
121
.Lword_align:
122
beqz t8, .Ldword_align
123
sltiu t8, a1, 56
124
125
lhu t0, (src)
126
LONG_SUBU a1, a1, 0x2
127
ADDC(sum, t0)
128
sltiu t8, a1, 56
129
PTR_ADDU src, src, 0x2
130
131
.Ldword_align:
132
bnez t8, .Ldo_end_words
133
move t8, a1
134
135
andi t8, src, 0x4
136
beqz t8, .Lqword_align
137
andi t8, src, 0x8
138
139
LOAD32 t0, 0x00(src)
140
LONG_SUBU a1, a1, 0x4
141
ADDC(sum, t0)
142
PTR_ADDU src, src, 0x4
143
andi t8, src, 0x8
144
145
.Lqword_align:
146
beqz t8, .Loword_align
147
andi t8, src, 0x10
148
149
#ifdef USE_DOUBLE
150
ld t0, 0x00(src)
151
LONG_SUBU a1, a1, 0x8
152
ADDC(sum, t0)
153
#else
154
lw t0, 0x00(src)
155
lw t1, 0x04(src)
156
LONG_SUBU a1, a1, 0x8
157
ADDC(sum, t0)
158
ADDC(sum, t1)
159
#endif
160
PTR_ADDU src, src, 0x8
161
andi t8, src, 0x10
162
163
.Loword_align:
164
beqz t8, .Lbegin_movement
165
LONG_SRL t8, a1, 0x7
166
167
#ifdef USE_DOUBLE
168
ld t0, 0x00(src)
169
ld t1, 0x08(src)
170
ADDC(sum, t0)
171
ADDC(sum, t1)
172
#else
173
CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
174
#endif
175
LONG_SUBU a1, a1, 0x10
176
PTR_ADDU src, src, 0x10
177
LONG_SRL t8, a1, 0x7
178
179
.Lbegin_movement:
180
beqz t8, 1f
181
andi t2, a1, 0x40
182
183
.Lmove_128bytes:
184
CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
185
CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
186
CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
187
CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
188
LONG_SUBU t8, t8, 0x01
189
.set reorder /* DADDI_WAR */
190
PTR_ADDU src, src, 0x80
191
bnez t8, .Lmove_128bytes
192
.set noreorder
193
194
1:
195
beqz t2, 1f
196
andi t2, a1, 0x20
197
198
.Lmove_64bytes:
199
CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
200
CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
201
PTR_ADDU src, src, 0x40
202
203
1:
204
beqz t2, .Ldo_end_words
205
andi t8, a1, 0x1c
206
207
.Lmove_32bytes:
208
CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209
andi t8, a1, 0x1c
210
PTR_ADDU src, src, 0x20
211
212
.Ldo_end_words:
213
beqz t8, .Lsmall_csumcpy
214
andi t2, a1, 0x3
215
LONG_SRL t8, t8, 0x2
216
217
.Lend_words:
218
LOAD32 t0, (src)
219
LONG_SUBU t8, t8, 0x1
220
ADDC(sum, t0)
221
.set reorder /* DADDI_WAR */
222
PTR_ADDU src, src, 0x4
223
bnez t8, .Lend_words
224
.set noreorder
225
226
/* unknown src alignment and < 8 bytes to go */
227
.Lsmall_csumcpy:
228
move a1, t2
229
230
andi t0, a1, 4
231
beqz t0, 1f
232
andi t0, a1, 2
233
234
/* Still a full word to go */
235
ulw t1, (src)
236
PTR_ADDIU src, 4
237
#ifdef USE_DOUBLE
238
dsll t1, t1, 32 /* clear lower 32bit */
239
#endif
240
ADDC(sum, t1)
241
242
1: move t1, zero
243
beqz t0, 1f
244
andi t0, a1, 1
245
246
/* Still a halfword to go */
247
ulhu t1, (src)
248
PTR_ADDIU src, 2
249
250
1: beqz t0, 1f
251
sll t1, t1, 16
252
253
lbu t2, (src)
254
nop
255
256
#ifdef __MIPSEB__
257
sll t2, t2, 8
258
#endif
259
or t1, t2
260
261
1: ADDC(sum, t1)
262
263
/* fold checksum */
264
#ifdef USE_DOUBLE
265
dsll32 v1, sum, 0
266
daddu sum, v1
267
sltu v1, sum, v1
268
dsra32 sum, sum, 0
269
addu sum, v1
270
#endif
271
272
/* odd buffer alignment? */
273
#ifdef CPU_MIPSR2
274
wsbh v1, sum
275
movn sum, v1, t7
276
#else
277
beqz t7, 1f /* odd buffer alignment? */
278
lui v1, 0x00ff
279
addu v1, 0x00ff
280
and t0, sum, v1
281
sll t0, t0, 8
282
srl sum, sum, 8
283
and sum, sum, v1
284
or sum, sum, t0
285
1:
286
#endif
287
.set reorder
288
/* Add the passed partial csum. */
289
ADDC32(sum, a2)
290
jr ra
291
.set noreorder
292
END(csum_partial)
293
294
295
/*
296
* checksum and copy routines based on memcpy.S
297
*
298
* csum_partial_copy_nocheck(src, dst, len, sum)
299
* __csum_partial_copy_user(src, dst, len, sum, errp)
300
*
301
* See "Spec" in memcpy.S for details. Unlike __copy_user, all
302
* function in this file use the standard calling convention.
303
*/
304
305
#define src a0
306
#define dst a1
307
#define len a2
308
#define psum a3
309
#define sum v0
310
#define odd t8
311
#define errptr t9
312
313
/*
314
* The exception handler for loads requires that:
315
* 1- AT contain the address of the byte just past the end of the source
316
* of the copy,
317
* 2- src_entry <= src < AT, and
318
* 3- (dst - src) == (dst_entry - src_entry),
319
* The _entry suffix denotes values when __copy_user was called.
320
*
321
* (1) is set up up by __csum_partial_copy_from_user and maintained by
322
* not writing AT in __csum_partial_copy
323
* (2) is met by incrementing src by the number of bytes copied
324
* (3) is met by not doing loads between a pair of increments of dst and src
325
*
326
* The exception handlers for stores stores -EFAULT to errptr and return.
327
* These handlers do not need to overwrite any data.
328
*/
329
330
#define EXC(inst_reg,addr,handler) \
331
9: inst_reg, addr; \
332
.section __ex_table,"a"; \
333
PTR 9b, handler; \
334
.previous
335
336
#ifdef USE_DOUBLE
337
338
#define LOAD ld
339
#define LOADL ldl
340
#define LOADR ldr
341
#define STOREL sdl
342
#define STORER sdr
343
#define STORE sd
344
#define ADD daddu
345
#define SUB dsubu
346
#define SRL dsrl
347
#define SLL dsll
348
#define SLLV dsllv
349
#define SRLV dsrlv
350
#define NBYTES 8
351
#define LOG_NBYTES 3
352
353
#else
354
355
#define LOAD lw
356
#define LOADL lwl
357
#define LOADR lwr
358
#define STOREL swl
359
#define STORER swr
360
#define STORE sw
361
#define ADD addu
362
#define SUB subu
363
#define SRL srl
364
#define SLL sll
365
#define SLLV sllv
366
#define SRLV srlv
367
#define NBYTES 4
368
#define LOG_NBYTES 2
369
370
#endif /* USE_DOUBLE */
371
372
#ifdef CONFIG_CPU_LITTLE_ENDIAN
373
#define LDFIRST LOADR
374
#define LDREST LOADL
375
#define STFIRST STORER
376
#define STREST STOREL
377
#define SHIFT_DISCARD SLLV
378
#define SHIFT_DISCARD_REVERT SRLV
379
#else
380
#define LDFIRST LOADL
381
#define LDREST LOADR
382
#define STFIRST STOREL
383
#define STREST STORER
384
#define SHIFT_DISCARD SRLV
385
#define SHIFT_DISCARD_REVERT SLLV
386
#endif
387
388
#define FIRST(unit) ((unit)*NBYTES)
389
#define REST(unit) (FIRST(unit)+NBYTES-1)
390
391
#define ADDRMASK (NBYTES-1)
392
393
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
394
.set noat
395
#else
396
.set at=v1
397
#endif
398
399
LEAF(__csum_partial_copy_user)
400
PTR_ADDU AT, src, len /* See (1) above. */
401
#ifdef CONFIG_64BIT
402
move errptr, a4
403
#else
404
lw errptr, 16(sp)
405
#endif
406
FEXPORT(csum_partial_copy_nocheck)
407
move sum, zero
408
move odd, zero
409
/*
410
* Note: dst & src may be unaligned, len may be 0
411
* Temps
412
*/
413
/*
414
* The "issue break"s below are very approximate.
415
* Issue delays for dcache fills will perturb the schedule, as will
416
* load queue full replay traps, etc.
417
*
418
* If len < NBYTES use byte operations.
419
*/
420
sltu t2, len, NBYTES
421
and t1, dst, ADDRMASK
422
bnez t2, .Lcopy_bytes_checklen
423
and t0, src, ADDRMASK
424
andi odd, dst, 0x1 /* odd buffer? */
425
bnez t1, .Ldst_unaligned
426
nop
427
bnez t0, .Lsrc_unaligned_dst_aligned
428
/*
429
* use delay slot for fall-through
430
* src and dst are aligned; need to compute rem
431
*/
432
.Lboth_aligned:
433
SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
434
beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
435
nop
436
SUB len, 8*NBYTES # subtract here for bgez loop
437
.align 4
438
1:
439
EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
440
EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
441
EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
442
EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
443
EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy)
444
EXC( LOAD t5, UNIT(5)(src), .Ll_exc_copy)
445
EXC( LOAD t6, UNIT(6)(src), .Ll_exc_copy)
446
EXC( LOAD t7, UNIT(7)(src), .Ll_exc_copy)
447
SUB len, len, 8*NBYTES
448
ADD src, src, 8*NBYTES
449
EXC( STORE t0, UNIT(0)(dst), .Ls_exc)
450
ADDC(sum, t0)
451
EXC( STORE t1, UNIT(1)(dst), .Ls_exc)
452
ADDC(sum, t1)
453
EXC( STORE t2, UNIT(2)(dst), .Ls_exc)
454
ADDC(sum, t2)
455
EXC( STORE t3, UNIT(3)(dst), .Ls_exc)
456
ADDC(sum, t3)
457
EXC( STORE t4, UNIT(4)(dst), .Ls_exc)
458
ADDC(sum, t4)
459
EXC( STORE t5, UNIT(5)(dst), .Ls_exc)
460
ADDC(sum, t5)
461
EXC( STORE t6, UNIT(6)(dst), .Ls_exc)
462
ADDC(sum, t6)
463
EXC( STORE t7, UNIT(7)(dst), .Ls_exc)
464
ADDC(sum, t7)
465
.set reorder /* DADDI_WAR */
466
ADD dst, dst, 8*NBYTES
467
bgez len, 1b
468
.set noreorder
469
ADD len, 8*NBYTES # revert len (see above)
470
471
/*
472
* len == the number of bytes left to copy < 8*NBYTES
473
*/
474
.Lcleanup_both_aligned:
475
#define rem t7
476
beqz len, .Ldone
477
sltu t0, len, 4*NBYTES
478
bnez t0, .Lless_than_4units
479
and rem, len, (NBYTES-1) # rem = len % NBYTES
480
/*
481
* len >= 4*NBYTES
482
*/
483
EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
484
EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
485
EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
486
EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
487
SUB len, len, 4*NBYTES
488
ADD src, src, 4*NBYTES
489
EXC( STORE t0, UNIT(0)(dst), .Ls_exc)
490
ADDC(sum, t0)
491
EXC( STORE t1, UNIT(1)(dst), .Ls_exc)
492
ADDC(sum, t1)
493
EXC( STORE t2, UNIT(2)(dst), .Ls_exc)
494
ADDC(sum, t2)
495
EXC( STORE t3, UNIT(3)(dst), .Ls_exc)
496
ADDC(sum, t3)
497
.set reorder /* DADDI_WAR */
498
ADD dst, dst, 4*NBYTES
499
beqz len, .Ldone
500
.set noreorder
501
.Lless_than_4units:
502
/*
503
* rem = len % NBYTES
504
*/
505
beq rem, len, .Lcopy_bytes
506
nop
507
1:
508
EXC( LOAD t0, 0(src), .Ll_exc)
509
ADD src, src, NBYTES
510
SUB len, len, NBYTES
511
EXC( STORE t0, 0(dst), .Ls_exc)
512
ADDC(sum, t0)
513
.set reorder /* DADDI_WAR */
514
ADD dst, dst, NBYTES
515
bne rem, len, 1b
516
.set noreorder
517
518
/*
519
* src and dst are aligned, need to copy rem bytes (rem < NBYTES)
520
* A loop would do only a byte at a time with possible branch
521
* mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
522
* because can't assume read-access to dst. Instead, use
523
* STREST dst, which doesn't require read access to dst.
524
*
525
* This code should perform better than a simple loop on modern,
526
* wide-issue mips processors because the code has fewer branches and
527
* more instruction-level parallelism.
528
*/
529
#define bits t2
530
beqz len, .Ldone
531
ADD t1, dst, len # t1 is just past last byte of dst
532
li bits, 8*NBYTES
533
SLL rem, len, 3 # rem = number of bits to keep
534
EXC( LOAD t0, 0(src), .Ll_exc)
535
SUB bits, bits, rem # bits = number of bits to discard
536
SHIFT_DISCARD t0, t0, bits
537
EXC( STREST t0, -1(t1), .Ls_exc)
538
SHIFT_DISCARD_REVERT t0, t0, bits
539
.set reorder
540
ADDC(sum, t0)
541
b .Ldone
542
.set noreorder
543
.Ldst_unaligned:
544
/*
545
* dst is unaligned
546
* t0 = src & ADDRMASK
547
* t1 = dst & ADDRMASK; T1 > 0
548
* len >= NBYTES
549
*
550
* Copy enough bytes to align dst
551
* Set match = (src and dst have same alignment)
552
*/
553
#define match rem
554
EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc)
555
ADD t2, zero, NBYTES
556
EXC( LDREST t3, REST(0)(src), .Ll_exc_copy)
557
SUB t2, t2, t1 # t2 = number of bytes copied
558
xor match, t0, t1
559
EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc)
560
SLL t4, t1, 3 # t4 = number of bits to discard
561
SHIFT_DISCARD t3, t3, t4
562
/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
563
ADDC(sum, t3)
564
beq len, t2, .Ldone
565
SUB len, len, t2
566
ADD dst, dst, t2
567
beqz match, .Lboth_aligned
568
ADD src, src, t2
569
570
.Lsrc_unaligned_dst_aligned:
571
SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
572
beqz t0, .Lcleanup_src_unaligned
573
and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
574
1:
575
/*
576
* Avoid consecutive LD*'s to the same register since some mips
577
* implementations can't issue them in the same cycle.
578
* It's OK to load FIRST(N+1) before REST(N) because the two addresses
579
* are to the same unit (unless src is aligned, but it's not).
580
*/
581
EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
582
EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy)
583
SUB len, len, 4*NBYTES
584
EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
585
EXC( LDREST t1, REST(1)(src), .Ll_exc_copy)
586
EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy)
587
EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy)
588
EXC( LDREST t2, REST(2)(src), .Ll_exc_copy)
589
EXC( LDREST t3, REST(3)(src), .Ll_exc_copy)
590
ADD src, src, 4*NBYTES
591
#ifdef CONFIG_CPU_SB1
592
nop # improves slotting
593
#endif
594
EXC( STORE t0, UNIT(0)(dst), .Ls_exc)
595
ADDC(sum, t0)
596
EXC( STORE t1, UNIT(1)(dst), .Ls_exc)
597
ADDC(sum, t1)
598
EXC( STORE t2, UNIT(2)(dst), .Ls_exc)
599
ADDC(sum, t2)
600
EXC( STORE t3, UNIT(3)(dst), .Ls_exc)
601
ADDC(sum, t3)
602
.set reorder /* DADDI_WAR */
603
ADD dst, dst, 4*NBYTES
604
bne len, rem, 1b
605
.set noreorder
606
607
.Lcleanup_src_unaligned:
608
beqz len, .Ldone
609
and rem, len, NBYTES-1 # rem = len % NBYTES
610
beq rem, len, .Lcopy_bytes
611
nop
612
1:
613
EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
614
EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
615
ADD src, src, NBYTES
616
SUB len, len, NBYTES
617
EXC( STORE t0, 0(dst), .Ls_exc)
618
ADDC(sum, t0)
619
.set reorder /* DADDI_WAR */
620
ADD dst, dst, NBYTES
621
bne len, rem, 1b
622
.set noreorder
623
624
.Lcopy_bytes_checklen:
625
beqz len, .Ldone
626
nop
627
.Lcopy_bytes:
628
/* 0 < len < NBYTES */
629
#ifdef CONFIG_CPU_LITTLE_ENDIAN
630
#define SHIFT_START 0
631
#define SHIFT_INC 8
632
#else
633
#define SHIFT_START 8*(NBYTES-1)
634
#define SHIFT_INC -8
635
#endif
636
move t2, zero # partial word
637
li t3, SHIFT_START # shift
638
/* use .Ll_exc_copy here to return correct sum on fault */
639
#define COPY_BYTE(N) \
640
EXC( lbu t0, N(src), .Ll_exc_copy); \
641
SUB len, len, 1; \
642
EXC( sb t0, N(dst), .Ls_exc); \
643
SLLV t0, t0, t3; \
644
addu t3, SHIFT_INC; \
645
beqz len, .Lcopy_bytes_done; \
646
or t2, t0
647
648
COPY_BYTE(0)
649
COPY_BYTE(1)
650
#ifdef USE_DOUBLE
651
COPY_BYTE(2)
652
COPY_BYTE(3)
653
COPY_BYTE(4)
654
COPY_BYTE(5)
655
#endif
656
EXC( lbu t0, NBYTES-2(src), .Ll_exc_copy)
657
SUB len, len, 1
658
EXC( sb t0, NBYTES-2(dst), .Ls_exc)
659
SLLV t0, t0, t3
660
or t2, t0
661
.Lcopy_bytes_done:
662
ADDC(sum, t2)
663
.Ldone:
664
/* fold checksum */
665
#ifdef USE_DOUBLE
666
dsll32 v1, sum, 0
667
daddu sum, v1
668
sltu v1, sum, v1
669
dsra32 sum, sum, 0
670
addu sum, v1
671
#endif
672
673
#ifdef CPU_MIPSR2
674
wsbh v1, sum
675
movn sum, v1, odd
676
#else
677
beqz odd, 1f /* odd buffer alignment? */
678
lui v1, 0x00ff
679
addu v1, 0x00ff
680
and t0, sum, v1
681
sll t0, t0, 8
682
srl sum, sum, 8
683
and sum, sum, v1
684
or sum, sum, t0
685
1:
686
#endif
687
.set reorder
688
ADDC32(sum, psum)
689
jr ra
690
.set noreorder
691
692
.Ll_exc_copy:
693
/*
694
* Copy bytes from src until faulting load address (or until a
695
* lb faults)
696
*
697
* When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
698
* may be more than a byte beyond the last address.
699
* Hence, the lb below may get an exception.
700
*
701
* Assumes src < THREAD_BUADDR($28)
702
*/
703
LOAD t0, TI_TASK($28)
704
li t2, SHIFT_START
705
LOAD t0, THREAD_BUADDR(t0)
706
1:
707
EXC( lbu t1, 0(src), .Ll_exc)
708
ADD src, src, 1
709
sb t1, 0(dst) # can't fault -- we're copy_from_user
710
SLLV t1, t1, t2
711
addu t2, SHIFT_INC
712
ADDC(sum, t1)
713
.set reorder /* DADDI_WAR */
714
ADD dst, dst, 1
715
bne src, t0, 1b
716
.set noreorder
717
.Ll_exc:
718
LOAD t0, TI_TASK($28)
719
nop
720
LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address
721
nop
722
SUB len, AT, t0 # len number of uncopied bytes
723
/*
724
* Here's where we rely on src and dst being incremented in tandem,
725
* See (3) above.
726
* dst += (fault addr - src) to put dst at first byte to clear
727
*/
728
ADD dst, t0 # compute start address in a1
729
SUB dst, src
730
/*
731
* Clear len bytes starting at dst. Can't call __bzero because it
732
* might modify len. An inefficient loop for these rare times...
733
*/
734
.set reorder /* DADDI_WAR */
735
SUB src, len, 1
736
beqz len, .Ldone
737
.set noreorder
738
1: sb zero, 0(dst)
739
ADD dst, dst, 1
740
.set push
741
.set noat
742
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
743
bnez src, 1b
744
SUB src, src, 1
745
#else
746
li v1, 1
747
bnez src, 1b
748
SUB src, src, v1
749
#endif
750
li v1, -EFAULT
751
b .Ldone
752
sw v1, (errptr)
753
754
.Ls_exc:
755
li v0, -1 /* invalid checksum */
756
li v1, -EFAULT
757
jr ra
758
sw v1, (errptr)
759
.set pop
760
END(__csum_partial_copy_user)
761
762