Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm/lib/csumpartialcopygeneric.S
26292 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* linux/arch/arm/lib/csumpartialcopygeneric.S
4
*
5
* Copyright (C) 1995-2001 Russell King
6
*/
7
#include <asm/assembler.h>
8
9
/*
10
* unsigned int
11
* csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
12
* r0 = src, r1 = dst, r2 = len, r3 = sum
13
* Returns : r0 = checksum
14
*
15
* Note that 'tst' and 'teq' preserve the carry flag.
16
*/
17
18
src .req r0
19
dst .req r1
20
len .req r2
21
sum .req r3
22
23
.Lzero: mov r0, sum
24
load_regs
25
26
/*
27
* Align an unaligned destination pointer. We know that
28
* we have >= 8 bytes here, so we don't need to check
29
* the length. Note that the source pointer hasn't been
30
* aligned yet.
31
*/
32
.Ldst_unaligned:
33
tst dst, #1
34
beq .Ldst_16bit
35
36
load1b ip
37
sub len, len, #1
38
adcs sum, sum, ip, put_byte_1 @ update checksum
39
strb ip, [dst], #1
40
tst dst, #2
41
reteq lr @ dst is now 32bit aligned
42
43
.Ldst_16bit: load2b r8, ip
44
sub len, len, #2
45
adcs sum, sum, r8, put_byte_0
46
strb r8, [dst], #1
47
adcs sum, sum, ip, put_byte_1
48
strb ip, [dst], #1
49
ret lr @ dst is now 32bit aligned
50
51
/*
52
* Handle 0 to 7 bytes, with any alignment of source and
53
* destination pointers. Note that when we get here, C = 0
54
*/
55
.Lless8: teq len, #0 @ check for zero count
56
beq .Lzero
57
58
/* we must have at least one byte. */
59
tst dst, #1 @ dst 16-bit aligned
60
beq .Lless8_aligned
61
62
/* Align dst */
63
load1b ip
64
sub len, len, #1
65
adcs sum, sum, ip, put_byte_1 @ update checksum
66
strb ip, [dst], #1
67
tst len, #6
68
beq .Lless8_byteonly
69
70
1: load2b r8, ip
71
sub len, len, #2
72
adcs sum, sum, r8, put_byte_0
73
strb r8, [dst], #1
74
adcs sum, sum, ip, put_byte_1
75
strb ip, [dst], #1
76
.Lless8_aligned:
77
tst len, #6
78
bne 1b
79
.Lless8_byteonly:
80
tst len, #1
81
beq .Ldone
82
load1b r8
83
adcs sum, sum, r8, put_byte_0 @ update checksum
84
strb r8, [dst], #1
85
b .Ldone
86
87
FN_ENTRY
88
save_regs
89
mov sum, #-1
90
91
cmp len, #8 @ Ensure that we have at least
92
blo .Lless8 @ 8 bytes to copy.
93
94
adds sum, sum, #0 @ C = 0
95
tst dst, #3 @ Test destination alignment
96
blne .Ldst_unaligned @ align destination, return here
97
98
/*
99
* Ok, the dst pointer is now 32bit aligned, and we know
100
* that we must have more than 4 bytes to copy. Note
101
* that C contains the carry from the dst alignment above.
102
*/
103
104
tst src, #3 @ Test source alignment
105
bne .Lsrc_not_aligned
106
107
/* Routine for src & dst aligned */
108
109
bics ip, len, #15
110
beq 2f
111
112
1: load4l r4, r5, r6, r7
113
stmia dst!, {r4, r5, r6, r7}
114
adcs sum, sum, r4
115
adcs sum, sum, r5
116
adcs sum, sum, r6
117
adcs sum, sum, r7
118
sub ip, ip, #16
119
teq ip, #0
120
bne 1b
121
122
2: ands ip, len, #12
123
beq 4f
124
tst ip, #8
125
beq 3f
126
load2l r4, r5
127
stmia dst!, {r4, r5}
128
adcs sum, sum, r4
129
adcs sum, sum, r5
130
tst ip, #4
131
beq 4f
132
133
3: load1l r4
134
str r4, [dst], #4
135
adcs sum, sum, r4
136
137
4: ands len, len, #3
138
beq .Ldone
139
load1l r4
140
tst len, #2
141
mov r5, r4, get_byte_0
142
beq .Lexit
143
adcs sum, sum, r4, lspush #16
144
strb r5, [dst], #1
145
mov r5, r4, get_byte_1
146
strb r5, [dst], #1
147
mov r5, r4, get_byte_2
148
.Lexit: tst len, #1
149
strbne r5, [dst], #1
150
andne r5, r5, #255
151
adcsne sum, sum, r5, put_byte_0
152
153
/*
154
* If the dst pointer was not 16-bit aligned, we
155
* need to rotate the checksum here to get around
156
* the inefficient byte manipulations in the
157
* architecture independent code.
158
*/
159
.Ldone: adc r0, sum, #0
160
ldr sum, [sp, #0] @ dst
161
tst sum, #1
162
movne r0, r0, ror #8
163
load_regs
164
165
.Lsrc_not_aligned:
166
adc sum, sum, #0 @ include C from dst alignment
167
and ip, src, #3
168
bic src, src, #3
169
load1l r5
170
cmp ip, #2
171
beq .Lsrc2_aligned
172
bhi .Lsrc3_aligned
173
mov r4, r5, lspull #8 @ C = 0
174
bics ip, len, #15
175
beq 2f
176
1: load4l r5, r6, r7, r8
177
orr r4, r4, r5, lspush #24
178
mov r5, r5, lspull #8
179
orr r5, r5, r6, lspush #24
180
mov r6, r6, lspull #8
181
orr r6, r6, r7, lspush #24
182
mov r7, r7, lspull #8
183
orr r7, r7, r8, lspush #24
184
stmia dst!, {r4, r5, r6, r7}
185
adcs sum, sum, r4
186
adcs sum, sum, r5
187
adcs sum, sum, r6
188
adcs sum, sum, r7
189
mov r4, r8, lspull #8
190
sub ip, ip, #16
191
teq ip, #0
192
bne 1b
193
2: ands ip, len, #12
194
beq 4f
195
tst ip, #8
196
beq 3f
197
load2l r5, r6
198
orr r4, r4, r5, lspush #24
199
mov r5, r5, lspull #8
200
orr r5, r5, r6, lspush #24
201
stmia dst!, {r4, r5}
202
adcs sum, sum, r4
203
adcs sum, sum, r5
204
mov r4, r6, lspull #8
205
tst ip, #4
206
beq 4f
207
3: load1l r5
208
orr r4, r4, r5, lspush #24
209
str r4, [dst], #4
210
adcs sum, sum, r4
211
mov r4, r5, lspull #8
212
4: ands len, len, #3
213
beq .Ldone
214
mov r5, r4, get_byte_0
215
tst len, #2
216
beq .Lexit
217
adcs sum, sum, r4, lspush #16
218
strb r5, [dst], #1
219
mov r5, r4, get_byte_1
220
strb r5, [dst], #1
221
mov r5, r4, get_byte_2
222
b .Lexit
223
224
.Lsrc2_aligned: mov r4, r5, lspull #16
225
adds sum, sum, #0
226
bics ip, len, #15
227
beq 2f
228
1: load4l r5, r6, r7, r8
229
orr r4, r4, r5, lspush #16
230
mov r5, r5, lspull #16
231
orr r5, r5, r6, lspush #16
232
mov r6, r6, lspull #16
233
orr r6, r6, r7, lspush #16
234
mov r7, r7, lspull #16
235
orr r7, r7, r8, lspush #16
236
stmia dst!, {r4, r5, r6, r7}
237
adcs sum, sum, r4
238
adcs sum, sum, r5
239
adcs sum, sum, r6
240
adcs sum, sum, r7
241
mov r4, r8, lspull #16
242
sub ip, ip, #16
243
teq ip, #0
244
bne 1b
245
2: ands ip, len, #12
246
beq 4f
247
tst ip, #8
248
beq 3f
249
load2l r5, r6
250
orr r4, r4, r5, lspush #16
251
mov r5, r5, lspull #16
252
orr r5, r5, r6, lspush #16
253
stmia dst!, {r4, r5}
254
adcs sum, sum, r4
255
adcs sum, sum, r5
256
mov r4, r6, lspull #16
257
tst ip, #4
258
beq 4f
259
3: load1l r5
260
orr r4, r4, r5, lspush #16
261
str r4, [dst], #4
262
adcs sum, sum, r4
263
mov r4, r5, lspull #16
264
4: ands len, len, #3
265
beq .Ldone
266
mov r5, r4, get_byte_0
267
tst len, #2
268
beq .Lexit
269
adcs sum, sum, r4
270
strb r5, [dst], #1
271
mov r5, r4, get_byte_1
272
strb r5, [dst], #1
273
tst len, #1
274
beq .Ldone
275
load1b r5
276
b .Lexit
277
278
.Lsrc3_aligned: mov r4, r5, lspull #24
279
adds sum, sum, #0
280
bics ip, len, #15
281
beq 2f
282
1: load4l r5, r6, r7, r8
283
orr r4, r4, r5, lspush #8
284
mov r5, r5, lspull #24
285
orr r5, r5, r6, lspush #8
286
mov r6, r6, lspull #24
287
orr r6, r6, r7, lspush #8
288
mov r7, r7, lspull #24
289
orr r7, r7, r8, lspush #8
290
stmia dst!, {r4, r5, r6, r7}
291
adcs sum, sum, r4
292
adcs sum, sum, r5
293
adcs sum, sum, r6
294
adcs sum, sum, r7
295
mov r4, r8, lspull #24
296
sub ip, ip, #16
297
teq ip, #0
298
bne 1b
299
2: ands ip, len, #12
300
beq 4f
301
tst ip, #8
302
beq 3f
303
load2l r5, r6
304
orr r4, r4, r5, lspush #8
305
mov r5, r5, lspull #24
306
orr r5, r5, r6, lspush #8
307
stmia dst!, {r4, r5}
308
adcs sum, sum, r4
309
adcs sum, sum, r5
310
mov r4, r6, lspull #24
311
tst ip, #4
312
beq 4f
313
3: load1l r5
314
orr r4, r4, r5, lspush #8
315
str r4, [dst], #4
316
adcs sum, sum, r4
317
mov r4, r5, lspull #24
318
4: ands len, len, #3
319
beq .Ldone
320
mov r5, r4, get_byte_0
321
tst len, #2
322
beq .Lexit
323
strb r5, [dst], #1
324
adcs sum, sum, r4
325
load1l r4
326
mov r5, r4, get_byte_0
327
strb r5, [dst], #1
328
adcs sum, sum, r4, lspush #24
329
mov r5, r4, get_byte_1
330
b .Lexit
331
FN_EXIT
332
333