Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/score/lib/checksum.S
10817 views
1
/*
2
* arch/score/lib/csum_partial.S
3
*
4
* Score Processor version.
5
*
6
* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
7
* Lennox Wu <[email protected]>
8
* Chen Liqin <[email protected]>
9
*
10
* This program is free software; you can redistribute it and/or modify
11
* it under the terms of the GNU General Public License as published by
12
* the Free Software Foundation; either version 2 of the License, or
13
* (at your option) any later version.
14
*
15
* This program is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
* GNU General Public License for more details.
19
*
20
* You should have received a copy of the GNU General Public License
21
* along with this program; if not, see the file COPYING, or write
22
* to the Free Software Foundation, Inc.,
23
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
*/
25
#include <linux/linkage.h>
26
27
#define ADDC(sum,reg) \
28
add sum, sum, reg; \
29
cmp.c reg, sum; \
30
bleu 9f; \
31
addi sum, 0x1; \
32
9:
33
34
#define CSUM_BIGCHUNK(src, offset, sum) \
35
lw r8, [src, offset + 0x00]; \
36
lw r9, [src, offset + 0x04]; \
37
lw r10, [src, offset + 0x08]; \
38
lw r11, [src, offset + 0x0c]; \
39
ADDC(sum, r8); \
40
ADDC(sum, r9); \
41
ADDC(sum, r10); \
42
ADDC(sum, r11); \
43
lw r8, [src, offset + 0x10]; \
44
lw r9, [src, offset + 0x14]; \
45
lw r10, [src, offset + 0x18]; \
46
lw r11, [src, offset + 0x1c]; \
47
ADDC(sum, r8); \
48
ADDC(sum, r9); \
49
ADDC(sum, r10); \
50
ADDC(sum, r11); \
51
52
#define src r4
53
#define dest r5
54
#define sum r27
55
56
.text
57
/* unknown src alignment and < 8 bytes to go */
58
small_csumcpy:
59
mv r5, r10
60
ldi r9, 0x0
61
cmpi.c r25, 0x1
62
beq pass_small_set_t7 /*already set, jump to pass_small_set_t7*/
63
andri.c r25,r4 , 0x1 /*Is src 2 bytes aligned?*/
64
65
pass_small_set_t7:
66
beq aligned
67
cmpi.c r5, 0x0
68
beq fold
69
lbu r9, [src]
70
slli r9,r9, 0x8 /*Little endian*/
71
ADDC(sum, r9)
72
addi src, 0x1
73
subi.c r5, 0x1
74
75
/*len still a full word */
76
aligned:
77
andri.c r8, r5, 0x4 /*Len >= 4?*/
78
beq len_less_4bytes
79
80
/* Still a full word (4byte) to go,and the src is word aligned.*/
81
andri.c r8, src, 0x3 /*src is 4bytes aligned, so use LW!!*/
82
beq four_byte_aligned
83
lhu r9, [src]
84
addi src, 2
85
ADDC(sum, r9)
86
lhu r9, [src]
87
addi src, 2
88
ADDC(sum, r9)
89
b len_less_4bytes
90
91
four_byte_aligned: /* Len >=4 and four byte aligned */
92
lw r9, [src]
93
addi src, 4
94
ADDC(sum, r9)
95
96
len_less_4bytes: /* 2 byte aligned aligned and length<4B */
97
andri.c r8, r5, 0x2
98
beq len_less_2bytes
99
lhu r9, [src]
100
addi src, 0x2 /* src+=2 */
101
ADDC(sum, r9)
102
103
len_less_2bytes: /* len = 1 */
104
andri.c r8, r5, 0x1
105
beq fold /* less than 2 and not equal 1--> len=0 -> fold */
106
lbu r9, [src]
107
108
fold_ADDC:
109
ADDC(sum, r9)
110
fold:
111
/* fold checksum */
112
slli r26, sum, 16
113
add sum, sum, r26
114
cmp.c r26, sum
115
srli sum, sum, 16
116
bleu 1f /* if r26<=sum */
117
addi sum, 0x1 /* r26>sum */
118
1:
119
/* odd buffer alignment? r25 was set in csum_partial */
120
cmpi.c r25, 0x0
121
beq 1f
122
slli r26, sum, 8
123
srli sum, sum, 8
124
or sum, sum, r26
125
andi sum, 0xffff
126
1:
127
.set optimize
128
/* Add the passed partial csum. */
129
ADDC(sum, r6)
130
mv r4, sum
131
br r3
132
.set volatile
133
134
.align 5
135
ENTRY(csum_partial)
136
ldi sum, 0
137
ldi r25, 0
138
mv r10, r5
139
cmpi.c r5, 0x8
140
blt small_csumcpy /* < 8(singed) bytes to copy */
141
cmpi.c r5, 0x0
142
beq out
143
andri.c r25, src, 0x1 /* odd buffer? */
144
145
beq word_align
146
hword_align: /* 1 byte */
147
lbu r8, [src]
148
subi r5, 0x1
149
slli r8, r8, 8
150
ADDC(sum, r8)
151
addi src, 0x1
152
153
word_align: /* 2 bytes */
154
andri.c r8, src, 0x2 /* 4bytes(dword)_aligned? */
155
beq dword_align /* not, maybe dword_align */
156
lhu r8, [src]
157
subi r5, 0x2
158
ADDC(sum, r8)
159
addi src, 0x2
160
161
dword_align: /* 4bytes */
162
mv r26, r5 /* maybe useless when len >=56 */
163
ldi r8, 56
164
cmp.c r8, r5
165
bgtu do_end_words /* if a1(len)<t0(56) ,unsigned */
166
andri.c r26, src, 0x4
167
beq qword_align
168
lw r8, [src]
169
subi r5, 0x4
170
ADDC(sum, r8)
171
addi src, 0x4
172
173
qword_align: /* 8 bytes */
174
andri.c r26, src, 0x8
175
beq oword_align
176
lw r8, [src, 0x0]
177
lw r9, [src, 0x4]
178
subi r5, 0x8 /* len-=0x8 */
179
ADDC(sum, r8)
180
ADDC(sum, r9)
181
addi src, 0x8
182
183
oword_align: /* 16bytes */
184
andri.c r26, src, 0x10
185
beq begin_movement
186
lw r10, [src, 0x08]
187
lw r11, [src, 0x0c]
188
lw r8, [src, 0x00]
189
lw r9, [src, 0x04]
190
ADDC(sum, r10)
191
ADDC(sum, r11)
192
ADDC(sum, r8)
193
ADDC(sum, r9)
194
subi r5, 0x10
195
addi src, 0x10
196
197
begin_movement:
198
srli.c r26, r5, 0x7 /* len>=128? */
199
beq 1f /* len<128 */
200
201
/* r26 is the result that computed in oword_align */
202
move_128bytes:
203
CSUM_BIGCHUNK(src, 0x00, sum)
204
CSUM_BIGCHUNK(src, 0x20, sum)
205
CSUM_BIGCHUNK(src, 0x40, sum)
206
CSUM_BIGCHUNK(src, 0x60, sum)
207
subi.c r26, 0x01 /* r26 equals len/128 */
208
addi src, 0x80
209
bne move_128bytes
210
211
1: /* len<128,we process 64byte here */
212
andri.c r10, r5, 0x40
213
beq 1f
214
215
move_64bytes:
216
CSUM_BIGCHUNK(src, 0x00, sum)
217
CSUM_BIGCHUNK(src, 0x20, sum)
218
addi src, 0x40
219
220
1: /* len<64 */
221
andri r26, r5, 0x1c /* 0x1c=28 */
222
andri.c r10, r5, 0x20
223
beq do_end_words /* decided by andri */
224
225
move_32bytes:
226
CSUM_BIGCHUNK(src, 0x00, sum)
227
andri r26, r5, 0x1c
228
addri src, src, 0x20
229
230
do_end_words: /* len<32 */
231
/* r26 was set already in dword_align */
232
cmpi.c r26, 0x0
233
beq maybe_end_cruft /* len<28 or len<56 */
234
srli r26, r26, 0x2
235
236
end_words:
237
lw r8, [src]
238
subi.c r26, 0x1 /* unit is 4 byte */
239
ADDC(sum, r8)
240
addi src, 0x4
241
cmpi.c r26, 0x0
242
bne end_words /* r26!=0 */
243
244
maybe_end_cruft: /* len<4 */
245
andri r10, r5, 0x3
246
247
small_memcpy:
248
mv r5, r10
249
j small_csumcpy
250
251
out:
252
mv r4, sum
253
br r3
254
255
END(csum_partial)
256
257