Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/arm/lib/csumpartialcopygeneric.S
10817 views
1
/*
2
* linux/arch/arm/lib/csumpartialcopygeneric.S
3
*
4
* Copyright (C) 1995-2001 Russell King
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License version 2 as
8
* published by the Free Software Foundation.
9
*/
10
11
/*
12
* unsigned int
13
* csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
14
* r0 = src, r1 = dst, r2 = len, r3 = sum
15
* Returns : r0 = checksum
16
*
17
* Note that 'tst' and 'teq' preserve the carry flag.
18
*/
19
20
src .req r0
21
dst .req r1
22
len .req r2
23
sum .req r3
24
25
.Lzero: mov r0, sum
26
load_regs
27
28
/*
29
* Align an unaligned destination pointer. We know that
30
* we have >= 8 bytes here, so we don't need to check
31
* the length. Note that the source pointer hasn't been
32
* aligned yet.
33
*/
34
.Ldst_unaligned:
35
tst dst, #1
36
beq .Ldst_16bit
37
38
load1b ip
39
sub len, len, #1
40
adcs sum, sum, ip, put_byte_1 @ update checksum
41
strb ip, [dst], #1
42
tst dst, #2
43
moveq pc, lr @ dst is now 32bit aligned
44
45
.Ldst_16bit: load2b r8, ip
46
sub len, len, #2
47
adcs sum, sum, r8, put_byte_0
48
strb r8, [dst], #1
49
adcs sum, sum, ip, put_byte_1
50
strb ip, [dst], #1
51
mov pc, lr @ dst is now 32bit aligned
52
53
/*
54
* Handle 0 to 7 bytes, with any alignment of source and
55
* destination pointers. Note that when we get here, C = 0
56
*/
57
.Lless8: teq len, #0 @ check for zero count
58
beq .Lzero
59
60
/* we must have at least one byte. */
61
tst dst, #1 @ dst 16-bit aligned
62
beq .Lless8_aligned
63
64
/* Align dst */
65
load1b ip
66
sub len, len, #1
67
adcs sum, sum, ip, put_byte_1 @ update checksum
68
strb ip, [dst], #1
69
tst len, #6
70
beq .Lless8_byteonly
71
72
1: load2b r8, ip
73
sub len, len, #2
74
adcs sum, sum, r8, put_byte_0
75
strb r8, [dst], #1
76
adcs sum, sum, ip, put_byte_1
77
strb ip, [dst], #1
78
.Lless8_aligned:
79
tst len, #6
80
bne 1b
81
.Lless8_byteonly:
82
tst len, #1
83
beq .Ldone
84
load1b r8
85
adcs sum, sum, r8, put_byte_0 @ update checksum
86
strb r8, [dst], #1
87
b .Ldone
88
89
FN_ENTRY
90
save_regs
91
92
cmp len, #8 @ Ensure that we have at least
93
blo .Lless8 @ 8 bytes to copy.
94
95
adds sum, sum, #0 @ C = 0
96
tst dst, #3 @ Test destination alignment
97
blne .Ldst_unaligned @ align destination, return here
98
99
/*
100
* Ok, the dst pointer is now 32bit aligned, and we know
101
* that we must have more than 4 bytes to copy. Note
102
* that C contains the carry from the dst alignment above.
103
*/
104
105
tst src, #3 @ Test source alignment
106
bne .Lsrc_not_aligned
107
108
/* Routine for src & dst aligned */
109
110
bics ip, len, #15
111
beq 2f
112
113
1: load4l r4, r5, r6, r7
114
stmia dst!, {r4, r5, r6, r7}
115
adcs sum, sum, r4
116
adcs sum, sum, r5
117
adcs sum, sum, r6
118
adcs sum, sum, r7
119
sub ip, ip, #16
120
teq ip, #0
121
bne 1b
122
123
2: ands ip, len, #12
124
beq 4f
125
tst ip, #8
126
beq 3f
127
load2l r4, r5
128
stmia dst!, {r4, r5}
129
adcs sum, sum, r4
130
adcs sum, sum, r5
131
tst ip, #4
132
beq 4f
133
134
3: load1l r4
135
str r4, [dst], #4
136
adcs sum, sum, r4
137
138
4: ands len, len, #3
139
beq .Ldone
140
load1l r4
141
tst len, #2
142
mov r5, r4, get_byte_0
143
beq .Lexit
144
adcs sum, sum, r4, push #16
145
strb r5, [dst], #1
146
mov r5, r4, get_byte_1
147
strb r5, [dst], #1
148
mov r5, r4, get_byte_2
149
.Lexit: tst len, #1
150
strneb r5, [dst], #1
151
andne r5, r5, #255
152
adcnes sum, sum, r5, put_byte_0
153
154
/*
155
* If the dst pointer was not 16-bit aligned, we
156
* need to rotate the checksum here to get around
157
* the inefficient byte manipulations in the
158
* architecture independent code.
159
*/
160
.Ldone: adc r0, sum, #0
161
ldr sum, [sp, #0] @ dst
162
tst sum, #1
163
movne r0, r0, ror #8
164
load_regs
165
166
.Lsrc_not_aligned:
167
adc sum, sum, #0 @ include C from dst alignment
168
and ip, src, #3
169
bic src, src, #3
170
load1l r5
171
cmp ip, #2
172
beq .Lsrc2_aligned
173
bhi .Lsrc3_aligned
174
mov r4, r5, pull #8 @ C = 0
175
bics ip, len, #15
176
beq 2f
177
1: load4l r5, r6, r7, r8
178
orr r4, r4, r5, push #24
179
mov r5, r5, pull #8
180
orr r5, r5, r6, push #24
181
mov r6, r6, pull #8
182
orr r6, r6, r7, push #24
183
mov r7, r7, pull #8
184
orr r7, r7, r8, push #24
185
stmia dst!, {r4, r5, r6, r7}
186
adcs sum, sum, r4
187
adcs sum, sum, r5
188
adcs sum, sum, r6
189
adcs sum, sum, r7
190
mov r4, r8, pull #8
191
sub ip, ip, #16
192
teq ip, #0
193
bne 1b
194
2: ands ip, len, #12
195
beq 4f
196
tst ip, #8
197
beq 3f
198
load2l r5, r6
199
orr r4, r4, r5, push #24
200
mov r5, r5, pull #8
201
orr r5, r5, r6, push #24
202
stmia dst!, {r4, r5}
203
adcs sum, sum, r4
204
adcs sum, sum, r5
205
mov r4, r6, pull #8
206
tst ip, #4
207
beq 4f
208
3: load1l r5
209
orr r4, r4, r5, push #24
210
str r4, [dst], #4
211
adcs sum, sum, r4
212
mov r4, r5, pull #8
213
4: ands len, len, #3
214
beq .Ldone
215
mov r5, r4, get_byte_0
216
tst len, #2
217
beq .Lexit
218
adcs sum, sum, r4, push #16
219
strb r5, [dst], #1
220
mov r5, r4, get_byte_1
221
strb r5, [dst], #1
222
mov r5, r4, get_byte_2
223
b .Lexit
224
225
.Lsrc2_aligned: mov r4, r5, pull #16
226
adds sum, sum, #0
227
bics ip, len, #15
228
beq 2f
229
1: load4l r5, r6, r7, r8
230
orr r4, r4, r5, push #16
231
mov r5, r5, pull #16
232
orr r5, r5, r6, push #16
233
mov r6, r6, pull #16
234
orr r6, r6, r7, push #16
235
mov r7, r7, pull #16
236
orr r7, r7, r8, push #16
237
stmia dst!, {r4, r5, r6, r7}
238
adcs sum, sum, r4
239
adcs sum, sum, r5
240
adcs sum, sum, r6
241
adcs sum, sum, r7
242
mov r4, r8, pull #16
243
sub ip, ip, #16
244
teq ip, #0
245
bne 1b
246
2: ands ip, len, #12
247
beq 4f
248
tst ip, #8
249
beq 3f
250
load2l r5, r6
251
orr r4, r4, r5, push #16
252
mov r5, r5, pull #16
253
orr r5, r5, r6, push #16
254
stmia dst!, {r4, r5}
255
adcs sum, sum, r4
256
adcs sum, sum, r5
257
mov r4, r6, pull #16
258
tst ip, #4
259
beq 4f
260
3: load1l r5
261
orr r4, r4, r5, push #16
262
str r4, [dst], #4
263
adcs sum, sum, r4
264
mov r4, r5, pull #16
265
4: ands len, len, #3
266
beq .Ldone
267
mov r5, r4, get_byte_0
268
tst len, #2
269
beq .Lexit
270
adcs sum, sum, r4
271
strb r5, [dst], #1
272
mov r5, r4, get_byte_1
273
strb r5, [dst], #1
274
tst len, #1
275
beq .Ldone
276
load1b r5
277
b .Lexit
278
279
.Lsrc3_aligned: mov r4, r5, pull #24
280
adds sum, sum, #0
281
bics ip, len, #15
282
beq 2f
283
1: load4l r5, r6, r7, r8
284
orr r4, r4, r5, push #8
285
mov r5, r5, pull #24
286
orr r5, r5, r6, push #8
287
mov r6, r6, pull #24
288
orr r6, r6, r7, push #8
289
mov r7, r7, pull #24
290
orr r7, r7, r8, push #8
291
stmia dst!, {r4, r5, r6, r7}
292
adcs sum, sum, r4
293
adcs sum, sum, r5
294
adcs sum, sum, r6
295
adcs sum, sum, r7
296
mov r4, r8, pull #24
297
sub ip, ip, #16
298
teq ip, #0
299
bne 1b
300
2: ands ip, len, #12
301
beq 4f
302
tst ip, #8
303
beq 3f
304
load2l r5, r6
305
orr r4, r4, r5, push #8
306
mov r5, r5, pull #24
307
orr r5, r5, r6, push #8
308
stmia dst!, {r4, r5}
309
adcs sum, sum, r4
310
adcs sum, sum, r5
311
mov r4, r6, pull #24
312
tst ip, #4
313
beq 4f
314
3: load1l r5
315
orr r4, r4, r5, push #8
316
str r4, [dst], #4
317
adcs sum, sum, r4
318
mov r4, r5, pull #24
319
4: ands len, len, #3
320
beq .Ldone
321
mov r5, r4, get_byte_0
322
tst len, #2
323
beq .Lexit
324
strb r5, [dst], #1
325
adcs sum, sum, r4
326
load1l r4
327
mov r5, r4, get_byte_0
328
strb r5, [dst], #1
329
adcs sum, sum, r4, push #24
330
mov r5, r4, get_byte_1
331
b .Lexit
332
FN_EXIT
333
334