CoCalc -- csumpartialcopygeneric.S

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/arm/lib/csumpartialcopygeneric.S
¹⁰⁸¹⁷ views
1
/*
2
 *  linux/arch/arm/lib/csumpartialcopygeneric.S
3
 *
4
 *  Copyright (C) 1995-2001 Russell King
5
 *
6
 * This program is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License version 2 as
8
 * published by the Free Software Foundation.
9
 */
10

11
/*
12
 * unsigned int
13
 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
14
 *  r0 = src, r1 = dst, r2 = len, r3 = sum
15
 *  Returns : r0 = checksum
16
 *
17
 * Note that 'tst' and 'teq' preserve the carry flag.
18
 */
19

20
src	.req	r0
21
dst	.req	r1
22
len	.req	r2
23
sum	.req	r3
24

25
.Lzero:		mov	r0, sum
26
		load_regs
27

28
		/*
29
		 * Align an unaligned destination pointer.  We know that
30
		 * we have >= 8 bytes here, so we don't need to check
31
		 * the length.  Note that the source pointer hasn't been
32
		 * aligned yet.
33
		 */
34
.Ldst_unaligned:
35
		tst	dst, #1
36
		beq	.Ldst_16bit
37

38
		load1b	ip
39
		sub	len, len, #1
40
		adcs	sum, sum, ip, put_byte_1	@ update checksum
41
		strb	ip, [dst], #1
42
		tst	dst, #2
43
		moveq	pc, lr			@ dst is now 32bit aligned
44

45
.Ldst_16bit:	load2b	r8, ip
46
		sub	len, len, #2
47
		adcs	sum, sum, r8, put_byte_0
48
		strb	r8, [dst], #1
49
		adcs	sum, sum, ip, put_byte_1
50
		strb	ip, [dst], #1
51
		mov	pc, lr			@ dst is now 32bit aligned
52

53
		/*
54
		 * Handle 0 to 7 bytes, with any alignment of source and
55
		 * destination pointers.  Note that when we get here, C = 0
56
		 */
57
.Lless8:	teq	len, #0			@ check for zero count
58
		beq	.Lzero
59

60
		/* we must have at least one byte. */
61
		tst	dst, #1			@ dst 16-bit aligned
62
		beq	.Lless8_aligned
63

64
		/* Align dst */
65
		load1b	ip
66
		sub	len, len, #1
67
		adcs	sum, sum, ip, put_byte_1	@ update checksum
68
		strb	ip, [dst], #1
69
		tst	len, #6
70
		beq	.Lless8_byteonly
71

72
1:		load2b	r8, ip
73
		sub	len, len, #2
74
		adcs	sum, sum, r8, put_byte_0
75
		strb	r8, [dst], #1
76
		adcs	sum, sum, ip, put_byte_1
77
		strb	ip, [dst], #1
78
.Lless8_aligned:
79
		tst	len, #6
80
		bne	1b
81
.Lless8_byteonly:
82
		tst	len, #1
83
		beq	.Ldone
84
		load1b	r8
85
		adcs	sum, sum, r8, put_byte_0	@ update checksum
86
		strb	r8, [dst], #1
87
		b	.Ldone
88

89
FN_ENTRY
90
		save_regs
91

92
		cmp	len, #8			@ Ensure that we have at least
93
		blo	.Lless8			@ 8 bytes to copy.
94

95
		adds	sum, sum, #0		@ C = 0
96
		tst	dst, #3			@ Test destination alignment
97
		blne	.Ldst_unaligned		@ align destination, return here
98

99
		/*
100
		 * Ok, the dst pointer is now 32bit aligned, and we know
101
		 * that we must have more than 4 bytes to copy.  Note
102
		 * that C contains the carry from the dst alignment above.
103
		 */
104

105
		tst	src, #3			@ Test source alignment
106
		bne	.Lsrc_not_aligned
107

108
		/* Routine for src & dst aligned */
109

110
		bics	ip, len, #15
111
		beq	2f
112

113
1:		load4l	r4, r5, r6, r7
114
		stmia	dst!, {r4, r5, r6, r7}
115
		adcs	sum, sum, r4
116
		adcs	sum, sum, r5
117
		adcs	sum, sum, r6
118
		adcs	sum, sum, r7
119
		sub	ip, ip, #16
120
		teq	ip, #0
121
		bne	1b
122

123
2:		ands	ip, len, #12
124
		beq	4f
125
		tst	ip, #8
126
		beq	3f
127
		load2l	r4, r5
128
		stmia	dst!, {r4, r5}
129
		adcs	sum, sum, r4
130
		adcs	sum, sum, r5
131
		tst	ip, #4
132
		beq	4f
133

134
3:		load1l	r4
135
		str	r4, [dst], #4
136
		adcs	sum, sum, r4
137

138
4:		ands	len, len, #3
139
		beq	.Ldone
140
		load1l	r4
141
		tst	len, #2
142
		mov	r5, r4, get_byte_0
143
		beq	.Lexit
144
		adcs	sum, sum, r4, push #16
145
		strb	r5, [dst], #1
146
		mov	r5, r4, get_byte_1
147
		strb	r5, [dst], #1
148
		mov	r5, r4, get_byte_2
149
.Lexit:		tst	len, #1
150
		strneb	r5, [dst], #1
151
		andne	r5, r5, #255
152
		adcnes	sum, sum, r5, put_byte_0
153

154
		/*
155
		 * If the dst pointer was not 16-bit aligned, we
156
		 * need to rotate the checksum here to get around
157
		 * the inefficient byte manipulations in the
158
		 * architecture independent code.
159
		 */
160
.Ldone:		adc	r0, sum, #0
161
		ldr	sum, [sp, #0]		@ dst
162
		tst	sum, #1
163
		movne	r0, r0, ror #8
164
		load_regs
165

166
.Lsrc_not_aligned:
167
		adc	sum, sum, #0		@ include C from dst alignment
168
		and	ip, src, #3
169
		bic	src, src, #3
170
		load1l	r5
171
		cmp	ip, #2
172
		beq	.Lsrc2_aligned
173
		bhi	.Lsrc3_aligned
174
		mov	r4, r5, pull #8		@ C = 0
175
		bics	ip, len, #15
176
		beq	2f
177
1:		load4l	r5, r6, r7, r8
178
		orr	r4, r4, r5, push #24
179
		mov	r5, r5, pull #8
180
		orr	r5, r5, r6, push #24
181
		mov	r6, r6, pull #8
182
		orr	r6, r6, r7, push #24
183
		mov	r7, r7, pull #8
184
		orr	r7, r7, r8, push #24
185
		stmia	dst!, {r4, r5, r6, r7}
186
		adcs	sum, sum, r4
187
		adcs	sum, sum, r5
188
		adcs	sum, sum, r6
189
		adcs	sum, sum, r7
190
		mov	r4, r8, pull #8
191
		sub	ip, ip, #16
192
		teq	ip, #0
193
		bne	1b
194
2:		ands	ip, len, #12
195
		beq	4f
196
		tst	ip, #8
197
		beq	3f
198
		load2l	r5, r6
199
		orr	r4, r4, r5, push #24
200
		mov	r5, r5, pull #8
201
		orr	r5, r5, r6, push #24
202
		stmia	dst!, {r4, r5}
203
		adcs	sum, sum, r4
204
		adcs	sum, sum, r5
205
		mov	r4, r6, pull #8
206
		tst	ip, #4
207
		beq	4f
208
3:		load1l	r5
209
		orr	r4, r4, r5, push #24
210
		str	r4, [dst], #4
211
		adcs	sum, sum, r4
212
		mov	r4, r5, pull #8
213
4:		ands	len, len, #3
214
		beq	.Ldone
215
		mov	r5, r4, get_byte_0
216
		tst	len, #2
217
		beq	.Lexit
218
		adcs	sum, sum, r4, push #16
219
		strb	r5, [dst], #1
220
		mov	r5, r4, get_byte_1
221
		strb	r5, [dst], #1
222
		mov	r5, r4, get_byte_2
223
		b	.Lexit
224

225
.Lsrc2_aligned:	mov	r4, r5, pull #16
226
		adds	sum, sum, #0
227
		bics	ip, len, #15
228
		beq	2f
229
1:		load4l	r5, r6, r7, r8
230
		orr	r4, r4, r5, push #16
231
		mov	r5, r5, pull #16
232
		orr	r5, r5, r6, push #16
233
		mov	r6, r6, pull #16
234
		orr	r6, r6, r7, push #16
235
		mov	r7, r7, pull #16
236
		orr	r7, r7, r8, push #16
237
		stmia	dst!, {r4, r5, r6, r7}
238
		adcs	sum, sum, r4
239
		adcs	sum, sum, r5
240
		adcs	sum, sum, r6
241
		adcs	sum, sum, r7
242
		mov	r4, r8, pull #16
243
		sub	ip, ip, #16
244
		teq	ip, #0
245
		bne	1b
246
2:		ands	ip, len, #12
247
		beq	4f
248
		tst	ip, #8
249
		beq	3f
250
		load2l	r5, r6
251
		orr	r4, r4, r5, push #16
252
		mov	r5, r5, pull #16
253
		orr	r5, r5, r6, push #16
254
		stmia	dst!, {r4, r5}
255
		adcs	sum, sum, r4
256
		adcs	sum, sum, r5
257
		mov	r4, r6, pull #16
258
		tst	ip, #4
259
		beq	4f
260
3:		load1l	r5
261
		orr	r4, r4, r5, push #16
262
		str	r4, [dst], #4
263
		adcs	sum, sum, r4
264
		mov	r4, r5, pull #16
265
4:		ands	len, len, #3
266
		beq	.Ldone
267
		mov	r5, r4, get_byte_0
268
		tst	len, #2
269
		beq	.Lexit
270
		adcs	sum, sum, r4
271
		strb	r5, [dst], #1
272
		mov	r5, r4, get_byte_1
273
		strb	r5, [dst], #1
274
		tst	len, #1
275
		beq	.Ldone
276
		load1b	r5
277
		b	.Lexit
278

279
.Lsrc3_aligned:	mov	r4, r5, pull #24
280
		adds	sum, sum, #0
281
		bics	ip, len, #15
282
		beq	2f
283
1:		load4l	r5, r6, r7, r8
284
		orr	r4, r4, r5, push #8
285
		mov	r5, r5, pull #24
286
		orr	r5, r5, r6, push #8
287
		mov	r6, r6, pull #24
288
		orr	r6, r6, r7, push #8
289
		mov	r7, r7, pull #24
290
		orr	r7, r7, r8, push #8
291
		stmia	dst!, {r4, r5, r6, r7}
292
		adcs	sum, sum, r4
293
		adcs	sum, sum, r5
294
		adcs	sum, sum, r6
295
		adcs	sum, sum, r7
296
		mov	r4, r8, pull #24
297
		sub	ip, ip, #16
298
		teq	ip, #0
299
		bne	1b
300
2:		ands	ip, len, #12
301
		beq	4f
302
		tst	ip, #8
303
		beq	3f
304
		load2l	r5, r6
305
		orr	r4, r4, r5, push #8
306
		mov	r5, r5, pull #24
307
		orr	r5, r5, r6, push #8
308
		stmia	dst!, {r4, r5}
309
		adcs	sum, sum, r4
310
		adcs	sum, sum, r5
311
		mov	r4, r6, pull #24
312
		tst	ip, #4
313
		beq	4f
314
3:		load1l	r5
315
		orr	r4, r4, r5, push #8
316
		str	r4, [dst], #4
317
		adcs	sum, sum, r4
318
		mov	r4, r5, pull #24
319
4:		ands	len, len, #3
320
		beq	.Ldone
321
		mov	r5, r4, get_byte_0
322
		tst	len, #2
323
		beq	.Lexit
324
		strb	r5, [dst], #1
325
		adcs	sum, sum, r4
326
		load1l	r4
327
		mov	r5, r4, get_byte_0
328
		strb	r5, [dst], #1
329
		adcs	sum, sum, r4, push #24
330
		mov	r5, r4, get_byte_1
331
		b	.Lexit
332
FN_EXIT
333

334
Product

Resources

Company