Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/arm/string/memmove.S
39491 views
1
/* $NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $ */
2
3
/*-
4
* Copyright (c) 1997 The NetBSD Foundation, Inc.
5
* All rights reserved.
6
*
7
* This code is derived from software contributed to The NetBSD Foundation
8
* by Neil A. Carson and Mark Brinicombe
9
*
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
12
* are met:
13
* 1. Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in the
17
* documentation and/or other materials provided with the distribution.
18
*
19
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
* POSSIBILITY OF SUCH DAMAGE.
30
*/
31
32
#include <machine/asm.h>
33
.syntax unified
34
35
#ifndef _BCOPY
36
/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
37
ENTRY(memmove)
38
#else
39
/* bcopy = memcpy/memmove with arguments reversed. */
40
/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
41
ENTRY(bcopy)
42
/* switch the source and destination registers */
43
eor r0, r1, r0
44
eor r1, r0, r1
45
eor r0, r1, r0
46
#endif
47
/* Do the buffers overlap? */
48
cmp r0, r1
49
it eq
50
RETeq /* Bail now if src/dst are the same */
51
ite cc
52
subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
53
subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
54
cmp r3, r2 /* if (r3 < len) we have an overlap */
55
bcc PIC_SYM(_C_LABEL(memcpy), PLT)
56
57
/* Determine copy direction */
58
cmp r1, r0
59
it cc
60
bcc .Lmemmove_backwards
61
62
itt eq
63
moveq r0, #0 /* Quick abort for len=0 */
64
RETeq
65
66
stmdb sp!, {r0, lr} /* memmove() returns dest addr */
67
subs r2, r2, #4
68
blt .Lmemmove_fl4 /* less than 4 bytes */
69
ands r12, r0, #3
70
bne .Lmemmove_fdestul /* oh unaligned destination addr */
71
ands r12, r1, #3
72
bne .Lmemmove_fsrcul /* oh unaligned source addr */
73
74
.Lmemmove_ft8:
75
/* We have aligned source and destination */
76
subs r2, r2, #8
77
blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
78
subs r2, r2, #0x14
79
blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
80
stmdb sp!, {r4} /* borrow r4 */
81
82
/* blat 32 bytes at a time */
83
/* XXX for really big copies perhaps we should use more registers */
84
.Lmemmove_floop32:
85
ldmia r1!, {r3, r4, r12, lr}
86
stmia r0!, {r3, r4, r12, lr}
87
ldmia r1!, {r3, r4, r12, lr}
88
stmia r0!, {r3, r4, r12, lr}
89
subs r2, r2, #0x20
90
bge .Lmemmove_floop32
91
92
cmn r2, #0x10
93
ittt ge
94
ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
95
stmiage r0!, {r3, r4, r12, lr}
96
subge r2, r2, #0x10
97
ldmia sp!, {r4} /* return r4 */
98
99
.Lmemmove_fl32:
100
adds r2, r2, #0x14
101
102
/* blat 12 bytes at a time */
103
.Lmemmove_floop12:
104
ittt ge
105
ldmiage r1!, {r3, r12, lr}
106
stmiage r0!, {r3, r12, lr}
107
subsge r2, r2, #0x0c
108
bge .Lmemmove_floop12
109
110
.Lmemmove_fl12:
111
adds r2, r2, #8
112
blt .Lmemmove_fl4
113
114
subs r2, r2, #4
115
itt lt
116
ldrlt r3, [r1], #4
117
strlt r3, [r0], #4
118
ittt ge
119
ldmiage r1!, {r3, r12}
120
stmiage r0!, {r3, r12}
121
subge r2, r2, #4
122
123
.Lmemmove_fl4:
124
/* less than 4 bytes to go */
125
adds r2, r2, #4
126
it eq
127
ldmiaeq sp!, {r0, pc} /* done */
128
129
/* copy the crud byte at a time */
130
cmp r2, #2
131
ldrb r3, [r1], #1
132
strb r3, [r0], #1
133
itt ge
134
ldrbge r3, [r1], #1
135
strbge r3, [r0], #1
136
itt gt
137
ldrbgt r3, [r1], #1
138
strbgt r3, [r0], #1
139
ldmia sp!, {r0, pc}
140
141
/* erg - unaligned destination */
142
.Lmemmove_fdestul:
143
rsb r12, r12, #4
144
cmp r12, #2
145
146
/* align destination with byte copies */
147
ldrb r3, [r1], #1
148
strb r3, [r0], #1
149
itt ge
150
ldrbge r3, [r1], #1
151
strbge r3, [r0], #1
152
itt gt
153
ldrbgt r3, [r1], #1
154
strbgt r3, [r0], #1
155
subs r2, r2, r12
156
blt .Lmemmove_fl4 /* less the 4 bytes */
157
158
ands r12, r1, #3
159
beq .Lmemmove_ft8 /* we have an aligned source */
160
161
/* erg - unaligned source */
162
/* This is where it gets nasty ... */
163
.Lmemmove_fsrcul:
164
bic r1, r1, #3
165
ldr lr, [r1], #4
166
cmp r12, #2
167
bgt .Lmemmove_fsrcul3
168
beq .Lmemmove_fsrcul2
169
cmp r2, #0x0c
170
blt .Lmemmove_fsrcul1loop4
171
sub r2, r2, #0x0c
172
stmdb sp!, {r4, r5}
173
174
.Lmemmove_fsrcul1loop16:
175
mov r3, lr, lsr #8
176
ldmia r1!, {r4, r5, r12, lr}
177
orr r3, r3, r4, lsl #24
178
mov r4, r4, lsr #8
179
orr r4, r4, r5, lsl #24
180
mov r5, r5, lsr #8
181
orr r5, r5, r12, lsl #24
182
mov r12, r12, lsr #8
183
orr r12, r12, lr, lsl #24
184
stmia r0!, {r3-r5, r12}
185
subs r2, r2, #0x10
186
bge .Lmemmove_fsrcul1loop16
187
ldmia sp!, {r4, r5}
188
adds r2, r2, #0x0c
189
blt .Lmemmove_fsrcul1l4
190
191
.Lmemmove_fsrcul1loop4:
192
mov r12, lr, lsr #8
193
ldr lr, [r1], #4
194
orr r12, r12, lr, lsl #24
195
str r12, [r0], #4
196
subs r2, r2, #4
197
bge .Lmemmove_fsrcul1loop4
198
199
.Lmemmove_fsrcul1l4:
200
sub r1, r1, #3
201
b .Lmemmove_fl4
202
203
.Lmemmove_fsrcul2:
204
cmp r2, #0x0c
205
blt .Lmemmove_fsrcul2loop4
206
sub r2, r2, #0x0c
207
stmdb sp!, {r4, r5}
208
209
.Lmemmove_fsrcul2loop16:
210
mov r3, lr, lsr #16
211
ldmia r1!, {r4, r5, r12, lr}
212
orr r3, r3, r4, lsl #16
213
mov r4, r4, lsr #16
214
orr r4, r4, r5, lsl #16
215
mov r5, r5, lsr #16
216
orr r5, r5, r12, lsl #16
217
mov r12, r12, lsr #16
218
orr r12, r12, lr, lsl #16
219
stmia r0!, {r3-r5, r12}
220
subs r2, r2, #0x10
221
bge .Lmemmove_fsrcul2loop16
222
ldmia sp!, {r4, r5}
223
adds r2, r2, #0x0c
224
blt .Lmemmove_fsrcul2l4
225
226
.Lmemmove_fsrcul2loop4:
227
mov r12, lr, lsr #16
228
ldr lr, [r1], #4
229
orr r12, r12, lr, lsl #16
230
str r12, [r0], #4
231
subs r2, r2, #4
232
bge .Lmemmove_fsrcul2loop4
233
234
.Lmemmove_fsrcul2l4:
235
sub r1, r1, #2
236
b .Lmemmove_fl4
237
238
.Lmemmove_fsrcul3:
239
cmp r2, #0x0c
240
blt .Lmemmove_fsrcul3loop4
241
sub r2, r2, #0x0c
242
stmdb sp!, {r4, r5}
243
244
.Lmemmove_fsrcul3loop16:
245
mov r3, lr, lsr #24
246
ldmia r1!, {r4, r5, r12, lr}
247
orr r3, r3, r4, lsl #8
248
mov r4, r4, lsr #24
249
orr r4, r4, r5, lsl #8
250
mov r5, r5, lsr #24
251
orr r5, r5, r12, lsl #8
252
mov r12, r12, lsr #24
253
orr r12, r12, lr, lsl #8
254
stmia r0!, {r3-r5, r12}
255
subs r2, r2, #0x10
256
bge .Lmemmove_fsrcul3loop16
257
ldmia sp!, {r4, r5}
258
adds r2, r2, #0x0c
259
blt .Lmemmove_fsrcul3l4
260
261
.Lmemmove_fsrcul3loop4:
262
mov r12, lr, lsr #24
263
ldr lr, [r1], #4
264
orr r12, r12, lr, lsl #8
265
str r12, [r0], #4
266
subs r2, r2, #4
267
bge .Lmemmove_fsrcul3loop4
268
269
.Lmemmove_fsrcul3l4:
270
sub r1, r1, #1
271
b .Lmemmove_fl4
272
273
.Lmemmove_backwards:
274
add r1, r1, r2
275
add r0, r0, r2
276
subs r2, r2, #4
277
blt .Lmemmove_bl4 /* less than 4 bytes */
278
ands r12, r0, #3
279
bne .Lmemmove_bdestul /* oh unaligned destination addr */
280
ands r12, r1, #3
281
bne .Lmemmove_bsrcul /* oh unaligned source addr */
282
283
.Lmemmove_bt8:
284
/* We have aligned source and destination */
285
subs r2, r2, #8
286
blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
287
stmdb sp!, {r4, lr}
288
subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
289
blt .Lmemmove_bl32
290
291
/* blat 32 bytes at a time */
292
/* XXX for really big copies perhaps we should use more registers */
293
.Lmemmove_bloop32:
294
ldmdb r1!, {r3, r4, r12, lr}
295
stmdb r0!, {r3, r4, r12, lr}
296
ldmdb r1!, {r3, r4, r12, lr}
297
stmdb r0!, {r3, r4, r12, lr}
298
subs r2, r2, #0x20
299
bge .Lmemmove_bloop32
300
301
.Lmemmove_bl32:
302
cmn r2, #0x10
303
ittt ge
304
ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
305
stmdbge r0!, {r3, r4, r12, lr}
306
subge r2, r2, #0x10
307
adds r2, r2, #0x14
308
ittt ge
309
ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
310
stmdbge r0!, {r3, r12, lr}
311
subge r2, r2, #0x0c
312
ldmia sp!, {r4, lr}
313
314
.Lmemmove_bl12:
315
adds r2, r2, #8
316
blt .Lmemmove_bl4
317
subs r2, r2, #4
318
itt lt
319
ldrlt r3, [r1, #-4]!
320
strlt r3, [r0, #-4]!
321
ittt ge
322
ldmdbge r1!, {r3, r12}
323
stmdbge r0!, {r3, r12}
324
subge r2, r2, #4
325
326
.Lmemmove_bl4:
327
/* less than 4 bytes to go */
328
adds r2, r2, #4
329
it eq
330
RETeq /* done */
331
332
/* copy the crud byte at a time */
333
cmp r2, #2
334
ldrb r3, [r1, #-1]!
335
strb r3, [r0, #-1]!
336
itt ge
337
ldrbge r3, [r1, #-1]!
338
strbge r3, [r0, #-1]!
339
itt gt
340
ldrbgt r3, [r1, #-1]!
341
strbgt r3, [r0, #-1]!
342
RET
343
344
/* erg - unaligned destination */
345
.Lmemmove_bdestul:
346
cmp r12, #2
347
348
/* align destination with byte copies */
349
ldrb r3, [r1, #-1]!
350
strb r3, [r0, #-1]!
351
itt ge
352
ldrbge r3, [r1, #-1]!
353
strbge r3, [r0, #-1]!
354
itt gt
355
ldrbgt r3, [r1, #-1]!
356
strbgt r3, [r0, #-1]!
357
subs r2, r2, r12
358
blt .Lmemmove_bl4 /* less than 4 bytes to go */
359
ands r12, r1, #3
360
beq .Lmemmove_bt8 /* we have an aligned source */
361
362
/* erg - unaligned source */
363
/* This is where it gets nasty ... */
364
.Lmemmove_bsrcul:
365
bic r1, r1, #3
366
ldr r3, [r1, #0]
367
cmp r12, #2
368
blt .Lmemmove_bsrcul1
369
beq .Lmemmove_bsrcul2
370
cmp r2, #0x0c
371
blt .Lmemmove_bsrcul3loop4
372
sub r2, r2, #0x0c
373
stmdb sp!, {r4, r5, lr}
374
375
.Lmemmove_bsrcul3loop16:
376
mov lr, r3, lsl #8
377
ldmdb r1!, {r3-r5, r12}
378
orr lr, lr, r12, lsr #24
379
mov r12, r12, lsl #8
380
orr r12, r12, r5, lsr #24
381
mov r5, r5, lsl #8
382
orr r5, r5, r4, lsr #24
383
mov r4, r4, lsl #8
384
orr r4, r4, r3, lsr #24
385
stmdb r0!, {r4, r5, r12, lr}
386
subs r2, r2, #0x10
387
bge .Lmemmove_bsrcul3loop16
388
ldmia sp!, {r4, r5, lr}
389
adds r2, r2, #0x0c
390
blt .Lmemmove_bsrcul3l4
391
392
.Lmemmove_bsrcul3loop4:
393
mov r12, r3, lsl #8
394
ldr r3, [r1, #-4]!
395
orr r12, r12, r3, lsr #24
396
str r12, [r0, #-4]!
397
subs r2, r2, #4
398
bge .Lmemmove_bsrcul3loop4
399
400
.Lmemmove_bsrcul3l4:
401
add r1, r1, #3
402
b .Lmemmove_bl4
403
404
.Lmemmove_bsrcul2:
405
cmp r2, #0x0c
406
blt .Lmemmove_bsrcul2loop4
407
sub r2, r2, #0x0c
408
stmdb sp!, {r4, r5, lr}
409
410
.Lmemmove_bsrcul2loop16:
411
mov lr, r3, lsl #16
412
ldmdb r1!, {r3-r5, r12}
413
orr lr, lr, r12, lsr #16
414
mov r12, r12, lsl #16
415
orr r12, r12, r5, lsr #16
416
mov r5, r5, lsl #16
417
orr r5, r5, r4, lsr #16
418
mov r4, r4, lsl #16
419
orr r4, r4, r3, lsr #16
420
stmdb r0!, {r4, r5, r12, lr}
421
subs r2, r2, #0x10
422
bge .Lmemmove_bsrcul2loop16
423
ldmia sp!, {r4, r5, lr}
424
adds r2, r2, #0x0c
425
blt .Lmemmove_bsrcul2l4
426
427
.Lmemmove_bsrcul2loop4:
428
mov r12, r3, lsl #16
429
ldr r3, [r1, #-4]!
430
orr r12, r12, r3, lsr #16
431
str r12, [r0, #-4]!
432
subs r2, r2, #4
433
bge .Lmemmove_bsrcul2loop4
434
435
.Lmemmove_bsrcul2l4:
436
add r1, r1, #2
437
b .Lmemmove_bl4
438
439
.Lmemmove_bsrcul1:
440
cmp r2, #0x0c
441
blt .Lmemmove_bsrcul1loop4
442
sub r2, r2, #0x0c
443
stmdb sp!, {r4, r5, lr}
444
445
.Lmemmove_bsrcul1loop32:
446
mov lr, r3, lsl #24
447
ldmdb r1!, {r3-r5, r12}
448
orr lr, lr, r12, lsr #8
449
mov r12, r12, lsl #24
450
orr r12, r12, r5, lsr #8
451
mov r5, r5, lsl #24
452
orr r5, r5, r4, lsr #8
453
mov r4, r4, lsl #24
454
orr r4, r4, r3, lsr #8
455
stmdb r0!, {r4, r5, r12, lr}
456
subs r2, r2, #0x10
457
bge .Lmemmove_bsrcul1loop32
458
ldmia sp!, {r4, r5, lr}
459
adds r2, r2, #0x0c
460
blt .Lmemmove_bsrcul1l4
461
462
.Lmemmove_bsrcul1loop4:
463
mov r12, r3, lsl #24
464
ldr r3, [r1, #-4]!
465
orr r12, r12, r3, lsr #8
466
str r12, [r0, #-4]!
467
subs r2, r2, #4
468
bge .Lmemmove_bsrcul1loop4
469
470
.Lmemmove_bsrcul1l4:
471
add r1, r1, #1
472
b .Lmemmove_bl4
473
#ifndef _BCOPY
474
END(memmove)
475
#else
476
END(bcopy)
477
#endif
478
479
.section .note.GNU-stack,"",%progbits
480
481