Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/arm/string/memcpy.S
39491 views
1
/* $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $ */
2
3
/*
4
* Copyright 2003 Wasabi Systems, Inc.
5
* All rights reserved.
6
*
7
* Written by Steve C. Woodford for Wasabi Systems, Inc.
8
*
9
* Redistribution and use in source and binary forms, with or without
10
* modification, are permitted provided that the following conditions
11
* are met:
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
14
* 2. Redistributions in binary form must reproduce the above copyright
15
* notice, this list of conditions and the following disclaimer in the
16
* documentation and/or other materials provided with the distribution.
17
* 3. All advertising materials mentioning features or use of this software
18
* must display the following acknowledgement:
19
* This product includes software developed for the NetBSD Project by
20
* Wasabi Systems, Inc.
21
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
22
* or promote products derived from this software without specific prior
23
* written permission.
24
*
25
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35
* POSSIBILITY OF SUCH DAMAGE.
36
*/
37
38
#include <machine/asm.h>
39
.syntax unified
40
41
/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
42
ENTRY(memcpy)
43
pld [r1]
44
cmp r2, #0x0c
45
ble .Lmemcpy_short /* <= 12 bytes */
46
mov r3, r0 /* We must not clobber r0 */
47
48
/* Word-align the destination buffer */
49
ands ip, r3, #0x03 /* Already word aligned? */
50
beq .Lmemcpy_wordaligned /* Yup */
51
cmp ip, #0x02
52
ldrb ip, [r1], #0x01
53
sub r2, r2, #0x01
54
strb ip, [r3], #0x01
55
ldrble ip, [r1], #0x01
56
suble r2, r2, #0x01
57
strble ip, [r3], #0x01
58
ldrblt ip, [r1], #0x01
59
sublt r2, r2, #0x01
60
strblt ip, [r3], #0x01
61
62
/* Destination buffer is now word aligned */
63
.Lmemcpy_wordaligned:
64
ands ip, r1, #0x03 /* Is src also word-aligned? */
65
bne .Lmemcpy_bad_align /* Nope. Things just got bad */
66
67
/* Quad-align the destination buffer */
68
tst r3, #0x07 /* Already quad aligned? */
69
ldrne ip, [r1], #0x04
70
stmfd sp!, {r4-r9} /* Free up some registers */
71
subne r2, r2, #0x04
72
strne ip, [r3], #0x04
73
74
/* Destination buffer quad aligned, source is at least word aligned */
75
subs r2, r2, #0x80
76
blt .Lmemcpy_w_lessthan128
77
78
/* Copy 128 bytes at a time */
79
.Lmemcpy_w_loop128:
80
ldr r4, [r1], #0x04 /* LD:00-03 */
81
ldr r5, [r1], #0x04 /* LD:04-07 */
82
pld [r1, #0x18] /* Prefetch 0x20 */
83
ldr r6, [r1], #0x04 /* LD:08-0b */
84
ldr r7, [r1], #0x04 /* LD:0c-0f */
85
ldr r8, [r1], #0x04 /* LD:10-13 */
86
ldr r9, [r1], #0x04 /* LD:14-17 */
87
strd r4, [r3], #0x08 /* ST:00-07 */
88
ldr r4, [r1], #0x04 /* LD:18-1b */
89
ldr r5, [r1], #0x04 /* LD:1c-1f */
90
strd r6, [r3], #0x08 /* ST:08-0f */
91
ldr r6, [r1], #0x04 /* LD:20-23 */
92
ldr r7, [r1], #0x04 /* LD:24-27 */
93
pld [r1, #0x18] /* Prefetch 0x40 */
94
strd r8, [r3], #0x08 /* ST:10-17 */
95
ldr r8, [r1], #0x04 /* LD:28-2b */
96
ldr r9, [r1], #0x04 /* LD:2c-2f */
97
strd r4, [r3], #0x08 /* ST:18-1f */
98
ldr r4, [r1], #0x04 /* LD:30-33 */
99
ldr r5, [r1], #0x04 /* LD:34-37 */
100
strd r6, [r3], #0x08 /* ST:20-27 */
101
ldr r6, [r1], #0x04 /* LD:38-3b */
102
ldr r7, [r1], #0x04 /* LD:3c-3f */
103
strd r8, [r3], #0x08 /* ST:28-2f */
104
ldr r8, [r1], #0x04 /* LD:40-43 */
105
ldr r9, [r1], #0x04 /* LD:44-47 */
106
pld [r1, #0x18] /* Prefetch 0x60 */
107
strd r4, [r3], #0x08 /* ST:30-37 */
108
ldr r4, [r1], #0x04 /* LD:48-4b */
109
ldr r5, [r1], #0x04 /* LD:4c-4f */
110
strd r6, [r3], #0x08 /* ST:38-3f */
111
ldr r6, [r1], #0x04 /* LD:50-53 */
112
ldr r7, [r1], #0x04 /* LD:54-57 */
113
strd r8, [r3], #0x08 /* ST:40-47 */
114
ldr r8, [r1], #0x04 /* LD:58-5b */
115
ldr r9, [r1], #0x04 /* LD:5c-5f */
116
strd r4, [r3], #0x08 /* ST:48-4f */
117
ldr r4, [r1], #0x04 /* LD:60-63 */
118
ldr r5, [r1], #0x04 /* LD:64-67 */
119
pld [r1, #0x18] /* Prefetch 0x80 */
120
strd r6, [r3], #0x08 /* ST:50-57 */
121
ldr r6, [r1], #0x04 /* LD:68-6b */
122
ldr r7, [r1], #0x04 /* LD:6c-6f */
123
strd r8, [r3], #0x08 /* ST:58-5f */
124
ldr r8, [r1], #0x04 /* LD:70-73 */
125
ldr r9, [r1], #0x04 /* LD:74-77 */
126
strd r4, [r3], #0x08 /* ST:60-67 */
127
ldr r4, [r1], #0x04 /* LD:78-7b */
128
ldr r5, [r1], #0x04 /* LD:7c-7f */
129
strd r6, [r3], #0x08 /* ST:68-6f */
130
strd r8, [r3], #0x08 /* ST:70-77 */
131
subs r2, r2, #0x80
132
strd r4, [r3], #0x08 /* ST:78-7f */
133
bge .Lmemcpy_w_loop128
134
135
.Lmemcpy_w_lessthan128:
136
adds r2, r2, #0x80 /* Adjust for extra sub */
137
ldmfdeq sp!, {r4-r9}
138
bxeq lr /* Return now if done */
139
subs r2, r2, #0x20
140
blt .Lmemcpy_w_lessthan32
141
142
/* Copy 32 bytes at a time */
143
.Lmemcpy_w_loop32:
144
ldr r4, [r1], #0x04
145
ldr r5, [r1], #0x04
146
pld [r1, #0x18]
147
ldr r6, [r1], #0x04
148
ldr r7, [r1], #0x04
149
ldr r8, [r1], #0x04
150
ldr r9, [r1], #0x04
151
strd r4, [r3], #0x08
152
ldr r4, [r1], #0x04
153
ldr r5, [r1], #0x04
154
strd r6, [r3], #0x08
155
strd r8, [r3], #0x08
156
subs r2, r2, #0x20
157
strd r4, [r3], #0x08
158
bge .Lmemcpy_w_loop32
159
160
.Lmemcpy_w_lessthan32:
161
adds r2, r2, #0x20 /* Adjust for extra sub */
162
ldmfdeq sp!, {r4-r9}
163
bxeq lr /* Return now if done */
164
165
and r4, r2, #0x18
166
rsbs r4, r4, #0x18
167
addne pc, pc, r4, lsl #1
168
nop
169
170
/* At least 24 bytes remaining */
171
ldr r4, [r1], #0x04
172
ldr r5, [r1], #0x04
173
sub r2, r2, #0x08
174
strd r4, [r3], #0x08
175
176
/* At least 16 bytes remaining */
177
ldr r4, [r1], #0x04
178
ldr r5, [r1], #0x04
179
sub r2, r2, #0x08
180
strd r4, [r3], #0x08
181
182
/* At least 8 bytes remaining */
183
ldr r4, [r1], #0x04
184
ldr r5, [r1], #0x04
185
subs r2, r2, #0x08
186
strd r4, [r3], #0x08
187
188
/* Less than 8 bytes remaining */
189
ldmfd sp!, {r4-r9}
190
bxeq lr /* Return now if done */
191
subs r2, r2, #0x04
192
ldrge ip, [r1], #0x04
193
strge ip, [r3], #0x04
194
bxeq lr /* Return now if done */
195
addlt r2, r2, #0x04
196
ldrb ip, [r1], #0x01
197
cmp r2, #0x02
198
ldrbge r2, [r1], #0x01
199
strb ip, [r3], #0x01
200
ldrbgt ip, [r1]
201
strbge r2, [r3], #0x01
202
strbgt ip, [r3]
203
bx lr
204
205
206
/*
207
* At this point, it has not been possible to word align both buffers.
208
* The destination buffer is word aligned, but the source buffer is not.
209
*/
210
.Lmemcpy_bad_align:
211
stmfd sp!, {r4-r7}
212
bic r1, r1, #0x03
213
cmp ip, #2
214
ldr ip, [r1], #0x04
215
bgt .Lmemcpy_bad3
216
beq .Lmemcpy_bad2
217
b .Lmemcpy_bad1
218
219
.Lmemcpy_bad1_loop16:
220
mov r4, ip, lsr #8
221
ldr r5, [r1], #0x04
222
pld [r1, #0x018]
223
ldr r6, [r1], #0x04
224
ldr r7, [r1], #0x04
225
ldr ip, [r1], #0x04
226
orr r4, r4, r5, lsl #24
227
mov r5, r5, lsr #8
228
orr r5, r5, r6, lsl #24
229
mov r6, r6, lsr #8
230
orr r6, r6, r7, lsl #24
231
mov r7, r7, lsr #8
232
orr r7, r7, ip, lsl #24
233
str r4, [r3], #0x04
234
str r5, [r3], #0x04
235
str r6, [r3], #0x04
236
str r7, [r3], #0x04
237
.Lmemcpy_bad1:
238
subs r2, r2, #0x10
239
bge .Lmemcpy_bad1_loop16
240
241
adds r2, r2, #0x10
242
ldmfdeq sp!, {r4-r7}
243
bxeq lr /* Return now if done */
244
subs r2, r2, #0x04
245
sublt r1, r1, #0x03
246
blt .Lmemcpy_bad_done
247
248
.Lmemcpy_bad1_loop4:
249
mov r4, ip, lsr #8
250
ldr ip, [r1], #0x04
251
subs r2, r2, #0x04
252
orr r4, r4, ip, lsl #24
253
str r4, [r3], #0x04
254
bge .Lmemcpy_bad1_loop4
255
sub r1, r1, #0x03
256
b .Lmemcpy_bad_done
257
258
.Lmemcpy_bad2_loop16:
259
mov r4, ip, lsr #16
260
ldr r5, [r1], #0x04
261
pld [r1, #0x018]
262
ldr r6, [r1], #0x04
263
ldr r7, [r1], #0x04
264
ldr ip, [r1], #0x04
265
orr r4, r4, r5, lsl #16
266
mov r5, r5, lsr #16
267
orr r5, r5, r6, lsl #16
268
mov r6, r6, lsr #16
269
orr r6, r6, r7, lsl #16
270
mov r7, r7, lsr #16
271
orr r7, r7, ip, lsl #16
272
str r4, [r3], #0x04
273
str r5, [r3], #0x04
274
str r6, [r3], #0x04
275
str r7, [r3], #0x04
276
.Lmemcpy_bad2:
277
subs r2, r2, #0x10
278
bge .Lmemcpy_bad2_loop16
279
280
adds r2, r2, #0x10
281
ldmfdeq sp!, {r4-r7}
282
bxeq lr /* Return now if done */
283
subs r2, r2, #0x04
284
sublt r1, r1, #0x02
285
blt .Lmemcpy_bad_done
286
287
.Lmemcpy_bad2_loop4:
288
mov r4, ip, lsr #16
289
ldr ip, [r1], #0x04
290
subs r2, r2, #0x04
291
orr r4, r4, ip, lsl #16
292
str r4, [r3], #0x04
293
bge .Lmemcpy_bad2_loop4
294
sub r1, r1, #0x02
295
b .Lmemcpy_bad_done
296
297
.Lmemcpy_bad3_loop16:
298
mov r4, ip, lsr #24
299
ldr r5, [r1], #0x04
300
pld [r1, #0x018]
301
ldr r6, [r1], #0x04
302
ldr r7, [r1], #0x04
303
ldr ip, [r1], #0x04
304
orr r4, r4, r5, lsl #8
305
mov r5, r5, lsr #24
306
orr r5, r5, r6, lsl #8
307
mov r6, r6, lsr #24
308
orr r6, r6, r7, lsl #8
309
mov r7, r7, lsr #24
310
orr r7, r7, ip, lsl #8
311
str r4, [r3], #0x04
312
str r5, [r3], #0x04
313
str r6, [r3], #0x04
314
str r7, [r3], #0x04
315
.Lmemcpy_bad3:
316
subs r2, r2, #0x10
317
bge .Lmemcpy_bad3_loop16
318
319
adds r2, r2, #0x10
320
ldmfdeq sp!, {r4-r7}
321
bxeq lr /* Return now if done */
322
subs r2, r2, #0x04
323
sublt r1, r1, #0x01
324
blt .Lmemcpy_bad_done
325
326
.Lmemcpy_bad3_loop4:
327
mov r4, ip, lsr #24
328
ldr ip, [r1], #0x04
329
subs r2, r2, #0x04
330
orr r4, r4, ip, lsl #8
331
str r4, [r3], #0x04
332
bge .Lmemcpy_bad3_loop4
333
sub r1, r1, #0x01
334
335
.Lmemcpy_bad_done:
336
ldmfd sp!, {r4-r7}
337
adds r2, r2, #0x04
338
bxeq lr
339
ldrb ip, [r1], #0x01
340
cmp r2, #0x02
341
ldrbge r2, [r1], #0x01
342
strb ip, [r3], #0x01
343
ldrbgt ip, [r1]
344
strbge r2, [r3], #0x01
345
strbgt ip, [r3]
346
bx lr
347
348
349
/*
350
* Handle short copies (less than 16 bytes), possibly misaligned.
351
* Some of these are *very* common, thanks to the network stack,
352
* and so are handled specially.
353
*/
354
.Lmemcpy_short:
355
#ifndef _STANDALONE
356
add pc, pc, r2, lsl #2
357
nop
358
bx lr /* 0x00 */
359
b .Lmemcpy_bytewise /* 0x01 */
360
b .Lmemcpy_bytewise /* 0x02 */
361
b .Lmemcpy_bytewise /* 0x03 */
362
b .Lmemcpy_4 /* 0x04 */
363
b .Lmemcpy_bytewise /* 0x05 */
364
b .Lmemcpy_6 /* 0x06 */
365
b .Lmemcpy_bytewise /* 0x07 */
366
b .Lmemcpy_8 /* 0x08 */
367
b .Lmemcpy_bytewise /* 0x09 */
368
b .Lmemcpy_bytewise /* 0x0a */
369
b .Lmemcpy_bytewise /* 0x0b */
370
b .Lmemcpy_c /* 0x0c */
371
#endif
372
.Lmemcpy_bytewise:
373
mov r3, r0 /* We must not clobber r0 */
374
ldrb ip, [r1], #0x01
375
1: subs r2, r2, #0x01
376
strb ip, [r3], #0x01
377
ldrbne ip, [r1], #0x01
378
bne 1b
379
bx lr
380
381
#ifndef _STANDALONE
382
/******************************************************************************
383
* Special case for 4 byte copies
384
*/
385
#define LMEMCPY_4_LOG2 6 /* 64 bytes */
386
#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
387
LMEMCPY_4_PAD
388
.Lmemcpy_4:
389
and r2, r1, #0x03
390
orr r2, r2, r0, lsl #2
391
ands r2, r2, #0x0f
392
sub r3, pc, #0x14
393
addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
394
395
/*
396
* 0000: dst is 32-bit aligned, src is 32-bit aligned
397
*/
398
ldr r2, [r1]
399
str r2, [r0]
400
bx lr
401
LMEMCPY_4_PAD
402
403
/*
404
* 0001: dst is 32-bit aligned, src is 8-bit aligned
405
*/
406
ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
407
ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
408
mov r3, r3, lsr #8 /* r3 = .210 */
409
orr r3, r3, r2, lsl #24 /* r3 = 3210 */
410
str r3, [r0]
411
bx lr
412
LMEMCPY_4_PAD
413
414
/*
415
* 0010: dst is 32-bit aligned, src is 16-bit aligned
416
*/
417
ldrh r3, [r1, #0x02]
418
ldrh r2, [r1]
419
orr r3, r2, r3, lsl #16
420
str r3, [r0]
421
bx lr
422
LMEMCPY_4_PAD
423
424
/*
425
* 0011: dst is 32-bit aligned, src is 8-bit aligned
426
*/
427
ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
428
ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
429
mov r3, r3, lsr #24 /* r3 = ...0 */
430
orr r3, r3, r2, lsl #8 /* r3 = 3210 */
431
str r3, [r0]
432
bx lr
433
LMEMCPY_4_PAD
434
435
/*
436
* 0100: dst is 8-bit aligned, src is 32-bit aligned
437
*/
438
ldr r2, [r1]
439
strb r2, [r0]
440
mov r3, r2, lsr #8
441
mov r1, r2, lsr #24
442
strb r1, [r0, #0x03]
443
strh r3, [r0, #0x01]
444
bx lr
445
LMEMCPY_4_PAD
446
447
/*
448
* 0101: dst is 8-bit aligned, src is 8-bit aligned
449
*/
450
ldrb r2, [r1]
451
ldrh r3, [r1, #0x01]
452
ldrb r1, [r1, #0x03]
453
strb r2, [r0]
454
strh r3, [r0, #0x01]
455
strb r1, [r0, #0x03]
456
bx lr
457
LMEMCPY_4_PAD
458
459
/*
460
* 0110: dst is 8-bit aligned, src is 16-bit aligned
461
*/
462
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
463
ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
464
strb r2, [r0]
465
mov r2, r2, lsr #8 /* r2 = ...1 */
466
orr r2, r2, r3, lsl #8 /* r2 = .321 */
467
mov r3, r3, lsr #8 /* r3 = ...3 */
468
strh r2, [r0, #0x01]
469
strb r3, [r0, #0x03]
470
bx lr
471
LMEMCPY_4_PAD
472
473
/*
474
* 0111: dst is 8-bit aligned, src is 8-bit aligned
475
*/
476
ldrb r2, [r1]
477
ldrh r3, [r1, #0x01]
478
ldrb r1, [r1, #0x03]
479
strb r2, [r0]
480
strh r3, [r0, #0x01]
481
strb r1, [r0, #0x03]
482
bx lr
483
LMEMCPY_4_PAD
484
485
/*
486
* 1000: dst is 16-bit aligned, src is 32-bit aligned
487
*/
488
ldr r2, [r1]
489
strh r2, [r0]
490
mov r3, r2, lsr #16
491
strh r3, [r0, #0x02]
492
bx lr
493
LMEMCPY_4_PAD
494
495
/*
496
* 1001: dst is 16-bit aligned, src is 8-bit aligned
497
*/
498
ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
499
ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
500
mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
501
strh r1, [r0]
502
mov r2, r2, lsr #24 /* r2 = ...2 */
503
orr r2, r2, r3, lsl #8 /* r2 = xx32 */
504
strh r2, [r0, #0x02]
505
bx lr
506
LMEMCPY_4_PAD
507
508
/*
509
* 1010: dst is 16-bit aligned, src is 16-bit aligned
510
*/
511
ldrh r2, [r1]
512
ldrh r3, [r1, #0x02]
513
strh r2, [r0]
514
strh r3, [r0, #0x02]
515
bx lr
516
LMEMCPY_4_PAD
517
518
/*
519
* 1011: dst is 16-bit aligned, src is 8-bit aligned
520
*/
521
ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
522
ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
523
mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
524
strh r1, [r0, #0x02]
525
mov r3, r3, lsl #8 /* r3 = 321. */
526
orr r3, r3, r2, lsr #24 /* r3 = 3210 */
527
strh r3, [r0]
528
bx lr
529
LMEMCPY_4_PAD
530
531
/*
532
* 1100: dst is 8-bit aligned, src is 32-bit aligned
533
*/
534
ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
535
strb r2, [r0]
536
mov r3, r2, lsr #8
537
mov r1, r2, lsr #24
538
strh r3, [r0, #0x01]
539
strb r1, [r0, #0x03]
540
bx lr
541
LMEMCPY_4_PAD
542
543
/*
544
* 1101: dst is 8-bit aligned, src is 8-bit aligned
545
*/
546
ldrb r2, [r1]
547
ldrh r3, [r1, #0x01]
548
ldrb r1, [r1, #0x03]
549
strb r2, [r0]
550
strh r3, [r0, #0x01]
551
strb r1, [r0, #0x03]
552
bx lr
553
LMEMCPY_4_PAD
554
555
/*
556
* 1110: dst is 8-bit aligned, src is 16-bit aligned
557
*/
558
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
559
ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
560
strb r2, [r0]
561
mov r2, r2, lsr #8 /* r2 = ...1 */
562
orr r2, r2, r3, lsl #8 /* r2 = .321 */
563
strh r2, [r0, #0x01]
564
mov r3, r3, lsr #8 /* r3 = ...3 */
565
strb r3, [r0, #0x03]
566
bx lr
567
LMEMCPY_4_PAD
568
569
/*
570
* 1111: dst is 8-bit aligned, src is 8-bit aligned
571
*/
572
ldrb r2, [r1]
573
ldrh r3, [r1, #0x01]
574
ldrb r1, [r1, #0x03]
575
strb r2, [r0]
576
strh r3, [r0, #0x01]
577
strb r1, [r0, #0x03]
578
bx lr
579
LMEMCPY_4_PAD
580
581
582
/******************************************************************************
583
* Special case for 6 byte copies
584
*/
585
#define LMEMCPY_6_LOG2 6 /* 64 bytes */
586
#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
587
LMEMCPY_6_PAD
588
.Lmemcpy_6:
589
and r2, r1, #0x03
590
orr r2, r2, r0, lsl #2
591
ands r2, r2, #0x0f
592
sub r3, pc, #0x14
593
addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
594
595
/*
596
* 0000: dst is 32-bit aligned, src is 32-bit aligned
597
*/
598
ldr r2, [r1]
599
ldrh r3, [r1, #0x04]
600
str r2, [r0]
601
strh r3, [r0, #0x04]
602
bx lr
603
LMEMCPY_6_PAD
604
605
/*
606
* 0001: dst is 32-bit aligned, src is 8-bit aligned
607
*/
608
ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
609
ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
610
mov r2, r2, lsr #8 /* r2 = .210 */
611
orr r2, r2, r3, lsl #24 /* r2 = 3210 */
612
mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
613
str r2, [r0]
614
strh r3, [r0, #0x04]
615
bx lr
616
LMEMCPY_6_PAD
617
618
/*
619
* 0010: dst is 32-bit aligned, src is 16-bit aligned
620
*/
621
ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
622
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
623
mov r1, r3, lsr #16 /* r1 = ..54 */
624
orr r2, r2, r3, lsl #16 /* r2 = 3210 */
625
str r2, [r0]
626
strh r1, [r0, #0x04]
627
bx lr
628
LMEMCPY_6_PAD
629
630
/*
631
* 0011: dst is 32-bit aligned, src is 8-bit aligned
632
*/
633
ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
634
ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
635
ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
636
mov r2, r2, lsr #24 /* r2 = ...0 */
637
orr r2, r2, r3, lsl #8 /* r2 = 3210 */
638
mov r1, r1, lsl #8 /* r1 = xx5. */
639
orr r1, r1, r3, lsr #24 /* r1 = xx54 */
640
str r2, [r0]
641
strh r1, [r0, #0x04]
642
bx lr
643
LMEMCPY_6_PAD
644
645
/*
646
* 0100: dst is 8-bit aligned, src is 32-bit aligned
647
*/
648
ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
649
ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
650
mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
651
strh r1, [r0, #0x01]
652
strb r3, [r0]
653
mov r3, r3, lsr #24 /* r3 = ...3 */
654
orr r3, r3, r2, lsl #8 /* r3 = .543 */
655
mov r2, r2, lsr #8 /* r2 = ...5 */
656
strh r3, [r0, #0x03]
657
strb r2, [r0, #0x05]
658
bx lr
659
LMEMCPY_6_PAD
660
661
/*
662
* 0101: dst is 8-bit aligned, src is 8-bit aligned
663
*/
664
ldrb r2, [r1]
665
ldrh r3, [r1, #0x01]
666
ldrh ip, [r1, #0x03]
667
ldrb r1, [r1, #0x05]
668
strb r2, [r0]
669
strh r3, [r0, #0x01]
670
strh ip, [r0, #0x03]
671
strb r1, [r0, #0x05]
672
bx lr
673
LMEMCPY_6_PAD
674
675
/*
676
* 0110: dst is 8-bit aligned, src is 16-bit aligned
677
*/
678
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
679
ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
680
strb r2, [r0]
681
mov r3, r1, lsr #24
682
strb r3, [r0, #0x05]
683
mov r3, r1, lsr #8 /* r3 = .543 */
684
strh r3, [r0, #0x03]
685
mov r3, r2, lsr #8 /* r3 = ...1 */
686
orr r3, r3, r1, lsl #8 /* r3 = 4321 */
687
strh r3, [r0, #0x01]
688
bx lr
689
LMEMCPY_6_PAD
690
691
/*
692
* 0111: dst is 8-bit aligned, src is 8-bit aligned
693
*/
694
ldrb r2, [r1]
695
ldrh r3, [r1, #0x01]
696
ldrh ip, [r1, #0x03]
697
ldrb r1, [r1, #0x05]
698
strb r2, [r0]
699
strh r3, [r0, #0x01]
700
strh ip, [r0, #0x03]
701
strb r1, [r0, #0x05]
702
bx lr
703
LMEMCPY_6_PAD
704
705
/*
706
* 1000: dst is 16-bit aligned, src is 32-bit aligned
707
*/
708
ldrh r2, [r1, #0x04] /* r2 = ..54 */
709
ldr r3, [r1] /* r3 = 3210 */
710
mov r2, r2, lsl #16 /* r2 = 54.. */
711
orr r2, r2, r3, lsr #16 /* r2 = 5432 */
712
strh r3, [r0]
713
str r2, [r0, #0x02]
714
bx lr
715
LMEMCPY_6_PAD
716
717
/*
718
* 1001: dst is 16-bit aligned, src is 8-bit aligned
719
*/
720
ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
721
ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
722
mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
723
mov r2, r2, lsl #8 /* r2 = 543. */
724
orr r2, r2, r3, lsr #24 /* r2 = 5432 */
725
strh r1, [r0]
726
str r2, [r0, #0x02]
727
bx lr
728
LMEMCPY_6_PAD
729
730
/*
731
* 1010: dst is 16-bit aligned, src is 16-bit aligned
732
*/
733
ldrh r2, [r1]
734
ldr r3, [r1, #0x02]
735
strh r2, [r0]
736
str r3, [r0, #0x02]
737
bx lr
738
LMEMCPY_6_PAD
739
740
/*
741
* 1011: dst is 16-bit aligned, src is 8-bit aligned
742
*/
743
ldrb r3, [r1] /* r3 = ...0 */
744
ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
745
ldrb r1, [r1, #0x05] /* r1 = ...5 */
746
orr r3, r3, r2, lsl #8 /* r3 = 3210 */
747
mov r1, r1, lsl #24 /* r1 = 5... */
748
orr r1, r1, r2, lsr #8 /* r1 = 5432 */
749
strh r3, [r0]
750
str r1, [r0, #0x02]
751
bx lr
752
LMEMCPY_6_PAD
753
754
/*
755
* 1100: dst is 8-bit aligned, src is 32-bit aligned
756
*/
757
ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
758
ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
759
strb r2, [r0]
760
mov r2, r2, lsr #8 /* r2 = .321 */
761
orr r2, r2, r1, lsl #24 /* r2 = 4321 */
762
mov r1, r1, lsr #8 /* r1 = ...5 */
763
str r2, [r0, #0x01]
764
strb r1, [r0, #0x05]
765
bx lr
766
LMEMCPY_6_PAD
767
768
/*
769
* 1101: dst is 8-bit aligned, src is 8-bit aligned
770
*/
771
ldrb r2, [r1]
772
ldrh r3, [r1, #0x01]
773
ldrh ip, [r1, #0x03]
774
ldrb r1, [r1, #0x05]
775
strb r2, [r0]
776
strh r3, [r0, #0x01]
777
strh ip, [r0, #0x03]
778
strb r1, [r0, #0x05]
779
bx lr
780
LMEMCPY_6_PAD
781
782
/*
783
* 1110: dst is 8-bit aligned, src is 16-bit aligned
784
*/
785
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
786
ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
787
strb r2, [r0]
788
mov r2, r2, lsr #8 /* r2 = ...1 */
789
orr r2, r2, r1, lsl #8 /* r2 = 4321 */
790
mov r1, r1, lsr #24 /* r1 = ...5 */
791
str r2, [r0, #0x01]
792
strb r1, [r0, #0x05]
793
bx lr
794
LMEMCPY_6_PAD
795
796
/*
797
* 1111: dst is 8-bit aligned, src is 8-bit aligned
798
*/
799
ldrb r2, [r1]
800
ldr r3, [r1, #0x01]
801
ldrb r1, [r1, #0x05]
802
strb r2, [r0]
803
str r3, [r0, #0x01]
804
strb r1, [r0, #0x05]
805
bx lr
806
LMEMCPY_6_PAD
807
808
809
/******************************************************************************
810
* Special case for 8 byte copies
811
*/
812
#define LMEMCPY_8_LOG2 6 /* 64 bytes */
813
#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
814
LMEMCPY_8_PAD
815
.Lmemcpy_8:
816
and r2, r1, #0x03
817
orr r2, r2, r0, lsl #2
818
ands r2, r2, #0x0f
819
sub r3, pc, #0x14
820
addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
821
822
/*
823
* 0000: dst is 32-bit aligned, src is 32-bit aligned
824
*/
825
ldr r2, [r1]
826
ldr r3, [r1, #0x04]
827
str r2, [r0]
828
str r3, [r0, #0x04]
829
bx lr
830
LMEMCPY_8_PAD
831
832
/*
833
* 0001: dst is 32-bit aligned, src is 8-bit aligned
834
*/
835
ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
836
ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
837
ldrb r1, [r1, #0x07] /* r1 = ...7 */
838
mov r3, r3, lsr #8 /* r3 = .210 */
839
orr r3, r3, r2, lsl #24 /* r3 = 3210 */
840
mov r1, r1, lsl #24 /* r1 = 7... */
841
orr r2, r1, r2, lsr #8 /* r2 = 7654 */
842
str r3, [r0]
843
str r2, [r0, #0x04]
844
bx lr
845
LMEMCPY_8_PAD
846
847
/*
848
* 0010: dst is 32-bit aligned, src is 16-bit aligned
849
*/
850
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
851
ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
852
ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
853
orr r2, r2, r3, lsl #16 /* r2 = 3210 */
854
mov r3, r3, lsr #16 /* r3 = ..54 */
855
orr r3, r3, r1, lsl #16 /* r3 = 7654 */
856
str r2, [r0]
857
str r3, [r0, #0x04]
858
bx lr
859
LMEMCPY_8_PAD
860
861
/*
862
* 0011: dst is 32-bit aligned, src is 8-bit aligned
863
*/
864
ldrb r3, [r1] /* r3 = ...0 */
865
ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
866
ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
867
orr r3, r3, r2, lsl #8 /* r3 = 3210 */
868
mov r2, r2, lsr #24 /* r2 = ...4 */
869
orr r2, r2, r1, lsl #8 /* r2 = 7654 */
870
str r3, [r0]
871
str r2, [r0, #0x04]
872
bx lr
873
LMEMCPY_8_PAD
874
875
/*
876
* 0100: dst is 8-bit aligned, src is 32-bit aligned
877
*/
878
ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
879
ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
880
strb r3, [r0]
881
mov r1, r2, lsr #24 /* r1 = ...7 */
882
strb r1, [r0, #0x07]
883
mov r1, r3, lsr #8 /* r1 = .321 */
884
mov r3, r3, lsr #24 /* r3 = ...3 */
885
orr r3, r3, r2, lsl #8 /* r3 = 6543 */
886
strh r1, [r0, #0x01]
887
str r3, [r0, #0x03]
888
bx lr
889
LMEMCPY_8_PAD
890
891
/*
892
* 0101: dst is 8-bit aligned, src is 8-bit aligned
893
*/
894
ldrb r2, [r1]
895
ldrh r3, [r1, #0x01]
896
ldr ip, [r1, #0x03]
897
ldrb r1, [r1, #0x07]
898
strb r2, [r0]
899
strh r3, [r0, #0x01]
900
str ip, [r0, #0x03]
901
strb r1, [r0, #0x07]
902
bx lr
903
LMEMCPY_8_PAD
904
905
/*
906
* 0110: dst is 8-bit aligned, src is 16-bit aligned
907
*/
908
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
909
ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
910
ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
911
strb r2, [r0] /* 0 */
912
mov ip, r1, lsr #8 /* ip = ...7 */
913
strb ip, [r0, #0x07] /* 7 */
914
mov ip, r2, lsr #8 /* ip = ...1 */
915
orr ip, ip, r3, lsl #8 /* ip = 4321 */
916
mov r3, r3, lsr #8 /* r3 = .543 */
917
orr r3, r3, r1, lsl #24 /* r3 = 6543 */
918
strh ip, [r0, #0x01]
919
str r3, [r0, #0x03]
920
bx lr
921
LMEMCPY_8_PAD
922
923
/*
924
* 0111: dst is 8-bit aligned, src is 8-bit aligned
925
*/
926
ldrb r3, [r1] /* r3 = ...0 */
927
ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
928
ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
929
ldrb r1, [r1, #0x07] /* r1 = ...7 */
930
strb r3, [r0]
931
mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
932
strh ip, [r0, #0x01]
933
orr r2, r3, r2, lsl #16 /* r2 = 6543 */
934
str r2, [r0, #0x03]
935
strb r1, [r0, #0x07]
936
bx lr
937
LMEMCPY_8_PAD
938
939
/*
940
* 1000: dst is 16-bit aligned, src is 32-bit aligned
941
*/
942
ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
943
ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
944
mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
945
strh r2, [r0]
946
orr r2, r1, r3, lsl #16 /* r2 = 5432 */
947
mov r3, r3, lsr #16 /* r3 = ..76 */
948
str r2, [r0, #0x02]
949
strh r3, [r0, #0x06]
950
bx lr
951
LMEMCPY_8_PAD
952
953
/*
954
* 1001: dst is 16-bit aligned, src is 8-bit aligned
955
*/
956
ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
957
ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
958
ldrb ip, [r1, #0x07] /* ip = ...7 */
959
mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
960
strh r1, [r0]
961
mov r1, r2, lsr #24 /* r1 = ...2 */
962
orr r1, r1, r3, lsl #8 /* r1 = 5432 */
963
mov r3, r3, lsr #24 /* r3 = ...6 */
964
orr r3, r3, ip, lsl #8 /* r3 = ..76 */
965
str r1, [r0, #0x02]
966
strh r3, [r0, #0x06]
967
bx lr
968
LMEMCPY_8_PAD
969
970
/*
971
* 1010: dst is 16-bit aligned, src is 16-bit aligned
972
*/
973
ldrh r2, [r1]
974
ldr ip, [r1, #0x02]
975
ldrh r3, [r1, #0x06]
976
strh r2, [r0]
977
str ip, [r0, #0x02]
978
strh r3, [r0, #0x06]
979
bx lr
980
LMEMCPY_8_PAD
981
982
/*
983
* 1011: dst is 16-bit aligned, src is 8-bit aligned
984
*/
985
ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
986
ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
987
ldrb ip, [r1] /* ip = ...0 */
988
mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
989
strh r1, [r0, #0x06]
990
mov r3, r3, lsl #24 /* r3 = 5... */
991
orr r3, r3, r2, lsr #8 /* r3 = 5432 */
992
orr r2, ip, r2, lsl #8 /* r2 = 3210 */
993
str r3, [r0, #0x02]
994
strh r2, [r0]
995
bx lr
996
LMEMCPY_8_PAD
997
998
/*
999
* 1100: dst is 8-bit aligned, src is 32-bit aligned
1000
*/
1001
ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1002
ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1003
mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
1004
strh r1, [r0, #0x05]
1005
strb r2, [r0]
1006
mov r1, r3, lsr #24 /* r1 = ...7 */
1007
strb r1, [r0, #0x07]
1008
mov r2, r2, lsr #8 /* r2 = .321 */
1009
orr r2, r2, r3, lsl #24 /* r2 = 4321 */
1010
str r2, [r0, #0x01]
1011
bx lr
1012
LMEMCPY_8_PAD
1013
1014
/*
1015
* 1101: dst is 8-bit aligned, src is 8-bit aligned
1016
*/
1017
ldrb r3, [r1] /* r3 = ...0 */
1018
ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
1019
ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1020
ldrb r1, [r1, #0x07] /* r1 = ...7 */
1021
strb r3, [r0]
1022
mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
1023
strh r3, [r0, #0x05]
1024
orr r2, r2, ip, lsl #16 /* r2 = 4321 */
1025
str r2, [r0, #0x01]
1026
strb r1, [r0, #0x07]
1027
bx lr
1028
LMEMCPY_8_PAD
1029
1030
/*
1031
* 1110: dst is 8-bit aligned, src is 16-bit aligned
1032
*/
1033
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1034
ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1035
ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
1036
strb r2, [r0]
1037
mov ip, r2, lsr #8 /* ip = ...1 */
1038
orr ip, ip, r3, lsl #8 /* ip = 4321 */
1039
mov r2, r1, lsr #8 /* r2 = ...7 */
1040
strb r2, [r0, #0x07]
1041
mov r1, r1, lsl #8 /* r1 = .76. */
1042
orr r1, r1, r3, lsr #24 /* r1 = .765 */
1043
str ip, [r0, #0x01]
1044
strh r1, [r0, #0x05]
1045
bx lr
1046
LMEMCPY_8_PAD
1047
1048
/*
1049
* 1111: dst is 8-bit aligned, src is 8-bit aligned
1050
*/
1051
ldrb r2, [r1]
1052
ldr ip, [r1, #0x01]
1053
ldrh r3, [r1, #0x05]
1054
ldrb r1, [r1, #0x07]
1055
strb r2, [r0]
1056
str ip, [r0, #0x01]
1057
strh r3, [r0, #0x05]
1058
strb r1, [r0, #0x07]
1059
bx lr
1060
LMEMCPY_8_PAD
1061
1062
/******************************************************************************
1063
* Special case for 12 byte copies
1064
*/
1065
#define LMEMCPY_C_LOG2 7 /* 128 bytes */
1066
#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
1067
LMEMCPY_C_PAD
1068
.Lmemcpy_c:
1069
and r2, r1, #0x03
1070
orr r2, r2, r0, lsl #2
1071
ands r2, r2, #0x0f
1072
sub r3, pc, #0x14
1073
addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
1074
1075
/*
1076
* 0000: dst is 32-bit aligned, src is 32-bit aligned
1077
*/
1078
ldr r2, [r1]
1079
ldr r3, [r1, #0x04]
1080
ldr r1, [r1, #0x08]
1081
str r2, [r0]
1082
str r3, [r0, #0x04]
1083
str r1, [r0, #0x08]
1084
bx lr
1085
LMEMCPY_C_PAD
1086
1087
/*
1088
* 0001: dst is 32-bit aligned, src is 8-bit aligned
1089
*/
1090
ldrb r2, [r1, #0xb] /* r2 = ...B */
1091
ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1092
ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1093
ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1094
mov r2, r2, lsl #24 /* r2 = B... */
1095
orr r2, r2, ip, lsr #8 /* r2 = BA98 */
1096
str r2, [r0, #0x08]
1097
mov r2, ip, lsl #24 /* r2 = 7... */
1098
orr r2, r2, r3, lsr #8 /* r2 = 7654 */
1099
mov r1, r1, lsr #8 /* r1 = .210 */
1100
orr r1, r1, r3, lsl #24 /* r1 = 3210 */
1101
str r2, [r0, #0x04]
1102
str r1, [r0]
1103
bx lr
1104
LMEMCPY_C_PAD
1105
1106
/*
1107
* 0010: dst is 32-bit aligned, src is 16-bit aligned
1108
*/
1109
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1110
ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1111
ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1112
ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1113
orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1114
str r2, [r0]
1115
mov r3, r3, lsr #16 /* r3 = ..54 */
1116
orr r3, r3, ip, lsl #16 /* r3 = 7654 */
1117
mov r1, r1, lsl #16 /* r1 = BA.. */
1118
orr r1, r1, ip, lsr #16 /* r1 = BA98 */
1119
str r3, [r0, #0x04]
1120
str r1, [r0, #0x08]
1121
bx lr
1122
LMEMCPY_C_PAD
1123
1124
/*
1125
* 0011: dst is 32-bit aligned, src is 8-bit aligned
1126
*/
1127
ldrb r2, [r1] /* r2 = ...0 */
1128
ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1129
ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1130
ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1131
orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1132
str r2, [r0]
1133
mov r3, r3, lsr #24 /* r3 = ...4 */
1134
orr r3, r3, ip, lsl #8 /* r3 = 7654 */
1135
mov r1, r1, lsl #8 /* r1 = BA9. */
1136
orr r1, r1, ip, lsr #24 /* r1 = BA98 */
1137
str r3, [r0, #0x04]
1138
str r1, [r0, #0x08]
1139
bx lr
1140
LMEMCPY_C_PAD
1141
1142
/*
1143
* 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1144
*/
1145
ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1146
ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1147
ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
1148
mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1149
strh r1, [r0, #0x01]
1150
strb r2, [r0]
1151
mov r1, r2, lsr #24 /* r1 = ...3 */
1152
orr r2, r1, r3, lsl #8 /* r1 = 6543 */
1153
mov r1, r3, lsr #24 /* r1 = ...7 */
1154
orr r1, r1, ip, lsl #8 /* r1 = A987 */
1155
mov ip, ip, lsr #24 /* ip = ...B */
1156
str r2, [r0, #0x03]
1157
str r1, [r0, #0x07]
1158
strb ip, [r0, #0x0b]
1159
bx lr
1160
LMEMCPY_C_PAD
1161
1162
/*
1163
* 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1164
*/
1165
ldrb r2, [r1]
1166
ldrh r3, [r1, #0x01]
1167
ldr ip, [r1, #0x03]
1168
strb r2, [r0]
1169
ldr r2, [r1, #0x07]
1170
ldrb r1, [r1, #0x0b]
1171
strh r3, [r0, #0x01]
1172
str ip, [r0, #0x03]
1173
str r2, [r0, #0x07]
1174
strb r1, [r0, #0x0b]
1175
bx lr
1176
LMEMCPY_C_PAD
1177
1178
/*
1179
* 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1180
*/
1181
ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1182
ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1183
ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
1184
ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
1185
strb r2, [r0]
1186
mov r2, r2, lsr #8 /* r2 = ...1 */
1187
orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1188
strh r2, [r0, #0x01]
1189
mov r2, r3, lsr #8 /* r2 = .543 */
1190
orr r3, r2, ip, lsl #24 /* r3 = 6543 */
1191
mov r2, ip, lsr #8 /* r2 = .987 */
1192
orr r2, r2, r1, lsl #24 /* r2 = A987 */
1193
mov r1, r1, lsr #8 /* r1 = ...B */
1194
str r3, [r0, #0x03]
1195
str r2, [r0, #0x07]
1196
strb r1, [r0, #0x0b]
1197
bx lr
1198
LMEMCPY_C_PAD
1199
1200
/*
1201
* 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1202
*/
1203
ldrb r2, [r1]
1204
ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
1205
ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
1206
ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
1207
strb r2, [r0]
1208
strh r3, [r0, #0x01]
1209
mov r3, r3, lsr #16 /* r3 = ..43 */
1210
orr r3, r3, ip, lsl #16 /* r3 = 6543 */
1211
mov ip, ip, lsr #16 /* ip = ..87 */
1212
orr ip, ip, r1, lsl #16 /* ip = A987 */
1213
mov r1, r1, lsr #16 /* r1 = ..xB */
1214
str r3, [r0, #0x03]
1215
str ip, [r0, #0x07]
1216
strb r1, [r0, #0x0b]
1217
bx lr
1218
LMEMCPY_C_PAD
1219
1220
/*
1221
* 1000: dst is 16-bit aligned, src is 32-bit aligned
1222
*/
1223
ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
1224
ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
1225
ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
1226
mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
1227
strh ip, [r0]
1228
orr r1, r1, r3, lsl #16 /* r1 = 5432 */
1229
mov r3, r3, lsr #16 /* r3 = ..76 */
1230
orr r3, r3, r2, lsl #16 /* r3 = 9876 */
1231
mov r2, r2, lsr #16 /* r2 = ..BA */
1232
str r1, [r0, #0x02]
1233
str r3, [r0, #0x06]
1234
strh r2, [r0, #0x0a]
1235
bx lr
1236
LMEMCPY_C_PAD
1237
1238
/*
1239
* 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1240
*/
1241
ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1242
ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
1243
mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
1244
strh ip, [r0]
1245
ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
1246
ldrb r1, [r1, #0x0b] /* r1 = ...B */
1247
mov r2, r2, lsr #24 /* r2 = ...2 */
1248
orr r2, r2, r3, lsl #8 /* r2 = 5432 */
1249
mov r3, r3, lsr #24 /* r3 = ...6 */
1250
orr r3, r3, ip, lsl #8 /* r3 = 9876 */
1251
mov r1, r1, lsl #8 /* r1 = ..B. */
1252
orr r1, r1, ip, lsr #24 /* r1 = ..BA */
1253
str r2, [r0, #0x02]
1254
str r3, [r0, #0x06]
1255
strh r1, [r0, #0x0a]
1256
bx lr
1257
LMEMCPY_C_PAD
1258
1259
/*
1260
* 1010: dst is 16-bit aligned, src is 16-bit aligned
1261
*/
1262
ldrh r2, [r1]
1263
ldr r3, [r1, #0x02]
1264
ldr ip, [r1, #0x06]
1265
ldrh r1, [r1, #0x0a]
1266
strh r2, [r0]
1267
str r3, [r0, #0x02]
1268
str ip, [r0, #0x06]
1269
strh r1, [r0, #0x0a]
1270
bx lr
1271
LMEMCPY_C_PAD
1272
1273
/*
1274
* 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1275
*/
1276
ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
1277
ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
1278
mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
1279
strh ip, [r0, #0x0a]
1280
ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
1281
ldrb r1, [r1] /* r1 = ...0 */
1282
mov r2, r2, lsl #24 /* r2 = 9... */
1283
orr r2, r2, r3, lsr #8 /* r2 = 9876 */
1284
mov r3, r3, lsl #24 /* r3 = 5... */
1285
orr r3, r3, ip, lsr #8 /* r3 = 5432 */
1286
orr r1, r1, ip, lsl #8 /* r1 = 3210 */
1287
str r2, [r0, #0x06]
1288
str r3, [r0, #0x02]
1289
strh r1, [r0]
1290
bx lr
1291
LMEMCPY_C_PAD
1292
1293
/*
1294
* 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1295
*/
1296
ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1297
ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
1298
ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
1299
strb r2, [r0]
1300
mov r3, r2, lsr #8 /* r3 = .321 */
1301
orr r3, r3, ip, lsl #24 /* r3 = 4321 */
1302
str r3, [r0, #0x01]
1303
mov r3, ip, lsr #8 /* r3 = .765 */
1304
orr r3, r3, r1, lsl #24 /* r3 = 8765 */
1305
str r3, [r0, #0x05]
1306
mov r1, r1, lsr #8 /* r1 = .BA9 */
1307
strh r1, [r0, #0x09]
1308
mov r1, r1, lsr #16 /* r1 = ...B */
1309
strb r1, [r0, #0x0b]
1310
bx lr
1311
LMEMCPY_C_PAD
1312
1313
/*
1314
* 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
1315
*/
1316
ldrb r2, [r1, #0x0b] /* r2 = ...B */
1317
ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
1318
ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
1319
ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
1320
strb r2, [r0, #0x0b]
1321
mov r2, r3, lsr #16 /* r2 = ..A9 */
1322
strh r2, [r0, #0x09]
1323
mov r3, r3, lsl #16 /* r3 = 87.. */
1324
orr r3, r3, ip, lsr #16 /* r3 = 8765 */
1325
mov ip, ip, lsl #16 /* ip = 43.. */
1326
orr ip, ip, r1, lsr #16 /* ip = 4321 */
1327
mov r1, r1, lsr #8 /* r1 = .210 */
1328
str r3, [r0, #0x05]
1329
str ip, [r0, #0x01]
1330
strb r1, [r0]
1331
bx lr
1332
LMEMCPY_C_PAD
1333
1334
/*
1335
* 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
1336
*/
1337
ldrh r2, [r1] /* r2 = ..10 */
1338
ldr r3, [r1, #0x02] /* r3 = 5432 */
1339
ldr ip, [r1, #0x06] /* ip = 9876 */
1340
ldrh r1, [r1, #0x0a] /* r1 = ..BA */
1341
strb r2, [r0]
1342
mov r2, r2, lsr #8 /* r2 = ...1 */
1343
orr r2, r2, r3, lsl #8 /* r2 = 4321 */
1344
mov r3, r3, lsr #24 /* r3 = ...5 */
1345
orr r3, r3, ip, lsl #8 /* r3 = 8765 */
1346
mov ip, ip, lsr #24 /* ip = ...9 */
1347
orr ip, ip, r1, lsl #8 /* ip = .BA9 */
1348
mov r1, r1, lsr #8 /* r1 = ...B */
1349
str r2, [r0, #0x01]
1350
str r3, [r0, #0x05]
1351
strh ip, [r0, #0x09]
1352
strb r1, [r0, #0x0b]
1353
bx lr
1354
LMEMCPY_C_PAD
1355
1356
/*
1357
* 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
1358
*/
1359
ldrb r2, [r1]
1360
ldr r3, [r1, #0x01]
1361
ldr ip, [r1, #0x05]
1362
strb r2, [r0]
1363
ldrh r2, [r1, #0x09]
1364
ldrb r1, [r1, #0x0b]
1365
str r3, [r0, #0x01]
1366
str ip, [r0, #0x05]
1367
strh r2, [r0, #0x09]
1368
strb r1, [r0, #0x0b]
1369
bx lr
1370
#endif /* !_STANDALONE */
1371
END(memcpy)
1372
1373
.section .note.GNU-stack,"",%progbits
1374
1375