Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/mips/lib/csum_partial.S
26439 views
1
/*
2
* This file is subject to the terms and conditions of the GNU General Public
3
* License. See the file "COPYING" in the main directory of this archive
4
* for more details.
5
*
6
* Quick'n'dirty IP checksum ...
7
*
8
* Copyright (C) 1998, 1999 Ralf Baechle
9
* Copyright (C) 1999 Silicon Graphics, Inc.
10
* Copyright (C) 2007 Maciej W. Rozycki
11
* Copyright (C) 2014 Imagination Technologies Ltd.
12
*/
13
#include <linux/errno.h>
14
#include <linux/export.h>
15
#include <asm/asm.h>
16
#include <asm/asm-offsets.h>
17
#include <asm/regdef.h>
18
19
#ifdef CONFIG_64BIT
20
/*
21
* As we are sharing code base with the mips32 tree (which use the o32 ABI
22
* register definitions). We need to redefine the register definitions from
23
* the n64 ABI register naming to the o32 ABI register naming.
24
*/
25
#undef t0
26
#undef t1
27
#undef t2
28
#undef t3
29
#define t0 $8
30
#define t1 $9
31
#define t2 $10
32
#define t3 $11
33
#define t4 $12
34
#define t5 $13
35
#define t6 $14
36
#define t7 $15
37
38
#define USE_DOUBLE
39
#endif
40
41
#ifdef USE_DOUBLE
42
43
#define LOAD ld
44
#define LOAD32 lwu
45
#define ADD daddu
46
#define NBYTES 8
47
48
#else
49
50
#define LOAD lw
51
#define LOAD32 lw
52
#define ADD addu
53
#define NBYTES 4
54
55
#endif /* USE_DOUBLE */
56
57
#define UNIT(unit) ((unit)*NBYTES)
58
59
#define ADDC(sum,reg) \
60
.set push; \
61
.set noat; \
62
ADD sum, reg; \
63
sltu v1, sum, reg; \
64
ADD sum, v1; \
65
.set pop
66
67
#define ADDC32(sum,reg) \
68
.set push; \
69
.set noat; \
70
addu sum, reg; \
71
sltu v1, sum, reg; \
72
addu sum, v1; \
73
.set pop
74
75
#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
76
LOAD _t0, (offset + UNIT(0))(src); \
77
LOAD _t1, (offset + UNIT(1))(src); \
78
LOAD _t2, (offset + UNIT(2))(src); \
79
LOAD _t3, (offset + UNIT(3))(src); \
80
ADDC(_t0, _t1); \
81
ADDC(_t2, _t3); \
82
ADDC(sum, _t0); \
83
ADDC(sum, _t2)
84
85
#ifdef USE_DOUBLE
86
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
87
CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
88
#else
89
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
90
CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
91
CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
92
#endif
93
94
/*
95
* a0: source address
96
* a1: length of the area to checksum
97
* a2: partial checksum
98
*/
99
100
#define src a0
101
#define sum v0
102
103
.text
104
.set noreorder
105
.align 5
106
LEAF(csum_partial)
107
EXPORT_SYMBOL(csum_partial)
108
move sum, zero
109
move t7, zero
110
111
sltiu t8, a1, 0x8
112
bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */
113
move t2, a1
114
115
andi t7, src, 0x1 /* odd buffer? */
116
117
.Lhword_align:
118
beqz t7, .Lword_align
119
andi t8, src, 0x2
120
121
lbu t0, (src)
122
LONG_SUBU a1, a1, 0x1
123
#ifdef __MIPSEL__
124
sll t0, t0, 8
125
#endif
126
ADDC(sum, t0)
127
PTR_ADDU src, src, 0x1
128
andi t8, src, 0x2
129
130
.Lword_align:
131
beqz t8, .Ldword_align
132
sltiu t8, a1, 56
133
134
lhu t0, (src)
135
LONG_SUBU a1, a1, 0x2
136
ADDC(sum, t0)
137
sltiu t8, a1, 56
138
PTR_ADDU src, src, 0x2
139
140
.Ldword_align:
141
bnez t8, .Ldo_end_words
142
move t8, a1
143
144
andi t8, src, 0x4
145
beqz t8, .Lqword_align
146
andi t8, src, 0x8
147
148
LOAD32 t0, 0x00(src)
149
LONG_SUBU a1, a1, 0x4
150
ADDC(sum, t0)
151
PTR_ADDU src, src, 0x4
152
andi t8, src, 0x8
153
154
.Lqword_align:
155
beqz t8, .Loword_align
156
andi t8, src, 0x10
157
158
#ifdef USE_DOUBLE
159
ld t0, 0x00(src)
160
LONG_SUBU a1, a1, 0x8
161
ADDC(sum, t0)
162
#else
163
lw t0, 0x00(src)
164
lw t1, 0x04(src)
165
LONG_SUBU a1, a1, 0x8
166
ADDC(sum, t0)
167
ADDC(sum, t1)
168
#endif
169
PTR_ADDU src, src, 0x8
170
andi t8, src, 0x10
171
172
.Loword_align:
173
beqz t8, .Lbegin_movement
174
LONG_SRL t8, a1, 0x7
175
176
#ifdef USE_DOUBLE
177
ld t0, 0x00(src)
178
ld t1, 0x08(src)
179
ADDC(sum, t0)
180
ADDC(sum, t1)
181
#else
182
CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
183
#endif
184
LONG_SUBU a1, a1, 0x10
185
PTR_ADDU src, src, 0x10
186
LONG_SRL t8, a1, 0x7
187
188
.Lbegin_movement:
189
beqz t8, 1f
190
andi t2, a1, 0x40
191
192
.Lmove_128bytes:
193
CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
194
CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
195
CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
196
CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
197
LONG_SUBU t8, t8, 0x01
198
.set reorder /* DADDI_WAR */
199
PTR_ADDU src, src, 0x80
200
bnez t8, .Lmove_128bytes
201
.set noreorder
202
203
1:
204
beqz t2, 1f
205
andi t2, a1, 0x20
206
207
.Lmove_64bytes:
208
CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209
CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
210
PTR_ADDU src, src, 0x40
211
212
1:
213
beqz t2, .Ldo_end_words
214
andi t8, a1, 0x1c
215
216
.Lmove_32bytes:
217
CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
218
andi t8, a1, 0x1c
219
PTR_ADDU src, src, 0x20
220
221
.Ldo_end_words:
222
beqz t8, .Lsmall_csumcpy
223
andi t2, a1, 0x3
224
LONG_SRL t8, t8, 0x2
225
226
.Lend_words:
227
LOAD32 t0, (src)
228
LONG_SUBU t8, t8, 0x1
229
ADDC(sum, t0)
230
.set reorder /* DADDI_WAR */
231
PTR_ADDU src, src, 0x4
232
bnez t8, .Lend_words
233
.set noreorder
234
235
/* unknown src alignment and < 8 bytes to go */
236
.Lsmall_csumcpy:
237
move a1, t2
238
239
andi t0, a1, 4
240
beqz t0, 1f
241
andi t0, a1, 2
242
243
/* Still a full word to go */
244
ulw t1, (src)
245
PTR_ADDIU src, 4
246
#ifdef USE_DOUBLE
247
dsll t1, t1, 32 /* clear lower 32bit */
248
#endif
249
ADDC(sum, t1)
250
251
1: move t1, zero
252
beqz t0, 1f
253
andi t0, a1, 1
254
255
/* Still a halfword to go */
256
ulhu t1, (src)
257
PTR_ADDIU src, 2
258
259
1: beqz t0, 1f
260
sll t1, t1, 16
261
262
lbu t2, (src)
263
nop
264
265
#ifdef __MIPSEB__
266
sll t2, t2, 8
267
#endif
268
or t1, t2
269
270
1: ADDC(sum, t1)
271
272
/* fold checksum */
273
#ifdef USE_DOUBLE
274
dsll32 v1, sum, 0
275
daddu sum, v1
276
sltu v1, sum, v1
277
dsra32 sum, sum, 0
278
addu sum, v1
279
#endif
280
281
/* odd buffer alignment? */
282
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
283
defined(CONFIG_CPU_LOONGSON64)
284
.set push
285
.set arch=mips32r2
286
wsbh v1, sum
287
movn sum, v1, t7
288
.set pop
289
#else
290
beqz t7, 1f /* odd buffer alignment? */
291
lui v1, 0x00ff
292
addu v1, 0x00ff
293
and t0, sum, v1
294
sll t0, t0, 8
295
srl sum, sum, 8
296
and sum, sum, v1
297
or sum, sum, t0
298
1:
299
#endif
300
.set reorder
301
/* Add the passed partial csum. */
302
ADDC32(sum, a2)
303
jr ra
304
.set noreorder
305
END(csum_partial)
306
307
308
/*
309
* checksum and copy routines based on memcpy.S
310
*
311
* csum_partial_copy_nocheck(src, dst, len)
312
* __csum_partial_copy_kernel(src, dst, len)
313
*
314
* See "Spec" in memcpy.S for details. Unlike __copy_user, all
315
* function in this file use the standard calling convention.
316
*/
317
318
#define src a0
319
#define dst a1
320
#define len a2
321
#define sum v0
322
#define odd t8
323
324
/*
325
* All exception handlers simply return 0.
326
*/
327
328
/* Instruction type */
329
#define LD_INSN 1
330
#define ST_INSN 2
331
#define LEGACY_MODE 1
332
#define EVA_MODE 2
333
#define USEROP 1
334
#define KERNELOP 2
335
336
/*
337
* Wrapper to add an entry in the exception table
338
* in case the insn causes a memory exception.
339
* Arguments:
340
* insn : Load/store instruction
341
* type : Instruction type
342
* reg : Register
343
* addr : Address
344
* handler : Exception handler
345
*/
346
#define EXC(insn, type, reg, addr) \
347
.if \mode == LEGACY_MODE; \
348
9: insn reg, addr; \
349
.section __ex_table,"a"; \
350
PTR_WD 9b, .L_exc; \
351
.previous; \
352
/* This is enabled in EVA mode */ \
353
.else; \
354
/* If loading from user or storing to user */ \
355
.if ((\from == USEROP) && (type == LD_INSN)) || \
356
((\to == USEROP) && (type == ST_INSN)); \
357
9: __BUILD_EVA_INSN(insn##e, reg, addr); \
358
.section __ex_table,"a"; \
359
PTR_WD 9b, .L_exc; \
360
.previous; \
361
.else; \
362
/* EVA without exception */ \
363
insn reg, addr; \
364
.endif; \
365
.endif
366
367
#undef LOAD
368
369
#ifdef USE_DOUBLE
370
371
#define LOADK ld /* No exception */
372
#define LOAD(reg, addr) EXC(ld, LD_INSN, reg, addr)
373
#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
374
#define LOADL(reg, addr) EXC(ldl, LD_INSN, reg, addr)
375
#define LOADR(reg, addr) EXC(ldr, LD_INSN, reg, addr)
376
#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
377
#define STOREL(reg, addr) EXC(sdl, ST_INSN, reg, addr)
378
#define STORER(reg, addr) EXC(sdr, ST_INSN, reg, addr)
379
#define STORE(reg, addr) EXC(sd, ST_INSN, reg, addr)
380
#define ADD daddu
381
#define SUB dsubu
382
#define SRL dsrl
383
#define SLL dsll
384
#define SLLV dsllv
385
#define SRLV dsrlv
386
#define NBYTES 8
387
#define LOG_NBYTES 3
388
389
#else
390
391
#define LOADK lw /* No exception */
392
#define LOAD(reg, addr) EXC(lw, LD_INSN, reg, addr)
393
#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
394
#define LOADL(reg, addr) EXC(lwl, LD_INSN, reg, addr)
395
#define LOADR(reg, addr) EXC(lwr, LD_INSN, reg, addr)
396
#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
397
#define STOREL(reg, addr) EXC(swl, ST_INSN, reg, addr)
398
#define STORER(reg, addr) EXC(swr, ST_INSN, reg, addr)
399
#define STORE(reg, addr) EXC(sw, ST_INSN, reg, addr)
400
#define ADD addu
401
#define SUB subu
402
#define SRL srl
403
#define SLL sll
404
#define SLLV sllv
405
#define SRLV srlv
406
#define NBYTES 4
407
#define LOG_NBYTES 2
408
409
#endif /* USE_DOUBLE */
410
411
#ifdef CONFIG_CPU_LITTLE_ENDIAN
412
#define LDFIRST LOADR
413
#define LDREST LOADL
414
#define STFIRST STORER
415
#define STREST STOREL
416
#define SHIFT_DISCARD SLLV
417
#define SHIFT_DISCARD_REVERT SRLV
418
#else
419
#define LDFIRST LOADL
420
#define LDREST LOADR
421
#define STFIRST STOREL
422
#define STREST STORER
423
#define SHIFT_DISCARD SRLV
424
#define SHIFT_DISCARD_REVERT SLLV
425
#endif
426
427
#define FIRST(unit) ((unit)*NBYTES)
428
#define REST(unit) (FIRST(unit)+NBYTES-1)
429
430
#define ADDRMASK (NBYTES-1)
431
432
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
433
.set noat
434
#else
435
.set at=v1
436
#endif
437
438
.macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to
439
440
li sum, -1
441
move odd, zero
442
/*
443
* Note: dst & src may be unaligned, len may be 0
444
* Temps
445
*/
446
/*
447
* The "issue break"s below are very approximate.
448
* Issue delays for dcache fills will perturb the schedule, as will
449
* load queue full replay traps, etc.
450
*
451
* If len < NBYTES use byte operations.
452
*/
453
sltu t2, len, NBYTES
454
and t1, dst, ADDRMASK
455
bnez t2, .Lcopy_bytes_checklen\@
456
and t0, src, ADDRMASK
457
andi odd, dst, 0x1 /* odd buffer? */
458
bnez t1, .Ldst_unaligned\@
459
nop
460
bnez t0, .Lsrc_unaligned_dst_aligned\@
461
/*
462
* use delay slot for fall-through
463
* src and dst are aligned; need to compute rem
464
*/
465
.Lboth_aligned\@:
466
SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
467
beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
468
nop
469
SUB len, 8*NBYTES # subtract here for bgez loop
470
.align 4
471
1:
472
LOAD(t0, UNIT(0)(src))
473
LOAD(t1, UNIT(1)(src))
474
LOAD(t2, UNIT(2)(src))
475
LOAD(t3, UNIT(3)(src))
476
LOAD(t4, UNIT(4)(src))
477
LOAD(t5, UNIT(5)(src))
478
LOAD(t6, UNIT(6)(src))
479
LOAD(t7, UNIT(7)(src))
480
SUB len, len, 8*NBYTES
481
ADD src, src, 8*NBYTES
482
STORE(t0, UNIT(0)(dst))
483
ADDC(t0, t1)
484
STORE(t1, UNIT(1)(dst))
485
ADDC(sum, t0)
486
STORE(t2, UNIT(2)(dst))
487
ADDC(t2, t3)
488
STORE(t3, UNIT(3)(dst))
489
ADDC(sum, t2)
490
STORE(t4, UNIT(4)(dst))
491
ADDC(t4, t5)
492
STORE(t5, UNIT(5)(dst))
493
ADDC(sum, t4)
494
STORE(t6, UNIT(6)(dst))
495
ADDC(t6, t7)
496
STORE(t7, UNIT(7)(dst))
497
ADDC(sum, t6)
498
.set reorder /* DADDI_WAR */
499
ADD dst, dst, 8*NBYTES
500
bgez len, 1b
501
.set noreorder
502
ADD len, 8*NBYTES # revert len (see above)
503
504
/*
505
* len == the number of bytes left to copy < 8*NBYTES
506
*/
507
.Lcleanup_both_aligned\@:
508
#define rem t7
509
beqz len, .Ldone\@
510
sltu t0, len, 4*NBYTES
511
bnez t0, .Lless_than_4units\@
512
and rem, len, (NBYTES-1) # rem = len % NBYTES
513
/*
514
* len >= 4*NBYTES
515
*/
516
LOAD(t0, UNIT(0)(src))
517
LOAD(t1, UNIT(1)(src))
518
LOAD(t2, UNIT(2)(src))
519
LOAD(t3, UNIT(3)(src))
520
SUB len, len, 4*NBYTES
521
ADD src, src, 4*NBYTES
522
STORE(t0, UNIT(0)(dst))
523
ADDC(t0, t1)
524
STORE(t1, UNIT(1)(dst))
525
ADDC(sum, t0)
526
STORE(t2, UNIT(2)(dst))
527
ADDC(t2, t3)
528
STORE(t3, UNIT(3)(dst))
529
ADDC(sum, t2)
530
.set reorder /* DADDI_WAR */
531
ADD dst, dst, 4*NBYTES
532
beqz len, .Ldone\@
533
.set noreorder
534
.Lless_than_4units\@:
535
/*
536
* rem = len % NBYTES
537
*/
538
beq rem, len, .Lcopy_bytes\@
539
nop
540
1:
541
LOAD(t0, 0(src))
542
ADD src, src, NBYTES
543
SUB len, len, NBYTES
544
STORE(t0, 0(dst))
545
ADDC(sum, t0)
546
.set reorder /* DADDI_WAR */
547
ADD dst, dst, NBYTES
548
bne rem, len, 1b
549
.set noreorder
550
551
/*
552
* src and dst are aligned, need to copy rem bytes (rem < NBYTES)
553
* A loop would do only a byte at a time with possible branch
554
* mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
555
* because can't assume read-access to dst. Instead, use
556
* STREST dst, which doesn't require read access to dst.
557
*
558
* This code should perform better than a simple loop on modern,
559
* wide-issue mips processors because the code has fewer branches and
560
* more instruction-level parallelism.
561
*/
562
#define bits t2
563
beqz len, .Ldone\@
564
ADD t1, dst, len # t1 is just past last byte of dst
565
li bits, 8*NBYTES
566
SLL rem, len, 3 # rem = number of bits to keep
567
LOAD(t0, 0(src))
568
SUB bits, bits, rem # bits = number of bits to discard
569
SHIFT_DISCARD t0, t0, bits
570
STREST(t0, -1(t1))
571
SHIFT_DISCARD_REVERT t0, t0, bits
572
.set reorder
573
ADDC(sum, t0)
574
b .Ldone\@
575
.set noreorder
576
.Ldst_unaligned\@:
577
/*
578
* dst is unaligned
579
* t0 = src & ADDRMASK
580
* t1 = dst & ADDRMASK; T1 > 0
581
* len >= NBYTES
582
*
583
* Copy enough bytes to align dst
584
* Set match = (src and dst have same alignment)
585
*/
586
#define match rem
587
LDFIRST(t3, FIRST(0)(src))
588
ADD t2, zero, NBYTES
589
LDREST(t3, REST(0)(src))
590
SUB t2, t2, t1 # t2 = number of bytes copied
591
xor match, t0, t1
592
STFIRST(t3, FIRST(0)(dst))
593
SLL t4, t1, 3 # t4 = number of bits to discard
594
SHIFT_DISCARD t3, t3, t4
595
/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
596
ADDC(sum, t3)
597
beq len, t2, .Ldone\@
598
SUB len, len, t2
599
ADD dst, dst, t2
600
beqz match, .Lboth_aligned\@
601
ADD src, src, t2
602
603
.Lsrc_unaligned_dst_aligned\@:
604
SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
605
beqz t0, .Lcleanup_src_unaligned\@
606
and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
607
1:
608
/*
609
* Avoid consecutive LD*'s to the same register since some mips
610
* implementations can't issue them in the same cycle.
611
* It's OK to load FIRST(N+1) before REST(N) because the two addresses
612
* are to the same unit (unless src is aligned, but it's not).
613
*/
614
LDFIRST(t0, FIRST(0)(src))
615
LDFIRST(t1, FIRST(1)(src))
616
SUB len, len, 4*NBYTES
617
LDREST(t0, REST(0)(src))
618
LDREST(t1, REST(1)(src))
619
LDFIRST(t2, FIRST(2)(src))
620
LDFIRST(t3, FIRST(3)(src))
621
LDREST(t2, REST(2)(src))
622
LDREST(t3, REST(3)(src))
623
ADD src, src, 4*NBYTES
624
#ifdef CONFIG_CPU_SB1
625
nop # improves slotting
626
#endif
627
STORE(t0, UNIT(0)(dst))
628
ADDC(t0, t1)
629
STORE(t1, UNIT(1)(dst))
630
ADDC(sum, t0)
631
STORE(t2, UNIT(2)(dst))
632
ADDC(t2, t3)
633
STORE(t3, UNIT(3)(dst))
634
ADDC(sum, t2)
635
.set reorder /* DADDI_WAR */
636
ADD dst, dst, 4*NBYTES
637
bne len, rem, 1b
638
.set noreorder
639
640
.Lcleanup_src_unaligned\@:
641
beqz len, .Ldone\@
642
and rem, len, NBYTES-1 # rem = len % NBYTES
643
beq rem, len, .Lcopy_bytes\@
644
nop
645
1:
646
LDFIRST(t0, FIRST(0)(src))
647
LDREST(t0, REST(0)(src))
648
ADD src, src, NBYTES
649
SUB len, len, NBYTES
650
STORE(t0, 0(dst))
651
ADDC(sum, t0)
652
.set reorder /* DADDI_WAR */
653
ADD dst, dst, NBYTES
654
bne len, rem, 1b
655
.set noreorder
656
657
.Lcopy_bytes_checklen\@:
658
beqz len, .Ldone\@
659
nop
660
.Lcopy_bytes\@:
661
/* 0 < len < NBYTES */
662
#ifdef CONFIG_CPU_LITTLE_ENDIAN
663
#define SHIFT_START 0
664
#define SHIFT_INC 8
665
#else
666
#define SHIFT_START 8*(NBYTES-1)
667
#define SHIFT_INC -8
668
#endif
669
move t2, zero # partial word
670
li t3, SHIFT_START # shift
671
#define COPY_BYTE(N) \
672
LOADBU(t0, N(src)); \
673
SUB len, len, 1; \
674
STOREB(t0, N(dst)); \
675
SLLV t0, t0, t3; \
676
addu t3, SHIFT_INC; \
677
beqz len, .Lcopy_bytes_done\@; \
678
or t2, t0
679
680
COPY_BYTE(0)
681
COPY_BYTE(1)
682
#ifdef USE_DOUBLE
683
COPY_BYTE(2)
684
COPY_BYTE(3)
685
COPY_BYTE(4)
686
COPY_BYTE(5)
687
#endif
688
LOADBU(t0, NBYTES-2(src))
689
SUB len, len, 1
690
STOREB(t0, NBYTES-2(dst))
691
SLLV t0, t0, t3
692
or t2, t0
693
.Lcopy_bytes_done\@:
694
ADDC(sum, t2)
695
.Ldone\@:
696
/* fold checksum */
697
.set push
698
.set noat
699
#ifdef USE_DOUBLE
700
dsll32 v1, sum, 0
701
daddu sum, v1
702
sltu v1, sum, v1
703
dsra32 sum, sum, 0
704
addu sum, v1
705
#endif
706
707
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
708
defined(CONFIG_CPU_LOONGSON64)
709
.set push
710
.set arch=mips32r2
711
wsbh v1, sum
712
movn sum, v1, odd
713
.set pop
714
#else
715
beqz odd, 1f /* odd buffer alignment? */
716
lui v1, 0x00ff
717
addu v1, 0x00ff
718
and t0, sum, v1
719
sll t0, t0, 8
720
srl sum, sum, 8
721
and sum, sum, v1
722
or sum, sum, t0
723
1:
724
#endif
725
.set pop
726
.set reorder
727
jr ra
728
.set noreorder
729
.endm
730
731
.set noreorder
732
.L_exc:
733
jr ra
734
li v0, 0
735
736
FEXPORT(__csum_partial_copy_nocheck)
737
EXPORT_SYMBOL(__csum_partial_copy_nocheck)
738
#ifndef CONFIG_EVA
739
FEXPORT(__csum_partial_copy_to_user)
740
EXPORT_SYMBOL(__csum_partial_copy_to_user)
741
FEXPORT(__csum_partial_copy_from_user)
742
EXPORT_SYMBOL(__csum_partial_copy_from_user)
743
#endif
744
__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP
745
746
#ifdef CONFIG_EVA
747
LEAF(__csum_partial_copy_to_user)
748
__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP
749
END(__csum_partial_copy_to_user)
750
751
LEAF(__csum_partial_copy_from_user)
752
__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP
753
END(__csum_partial_copy_from_user)
754
#endif
755
756