Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/powerpc64/string/bcopy.S
39530 views
1
/*-
2
* Copyright (c) 2018 Instituto de Pesquisas Eldorado
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
13
* 3. Neither the name of the author nor the names of its contributors may
14
* be used to endorse or promote products derived from this software
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*
28
*/
29
30
#include <machine/asm.h>
31
#define BLOCK_SIZE_BITS 6
32
#define BLOCK_SIZE (1 << BLOCK_SIZE_BITS)
33
#define BLOCK_SIZE_MASK (BLOCK_SIZE - 1)
34
35
/* Minimum 8 byte alignment, to avoid cache-inhibited alignment faults.*/
36
#ifndef ALIGN_MASK
37
#define ALIGN_MASK 0x7
38
#endif
39
40
#define MULTI_PHASE_THRESHOLD 512
41
42
#ifndef FN_NAME
43
#ifdef MEMMOVE
44
#define FN_NAME __memmove
45
WEAK_REFERENCE(__memmove, memmove);
46
#else
47
#define FN_NAME __bcopy
48
WEAK_REFERENCE(__bcopy, bcopy);
49
#endif
50
#endif
51
52
/*
53
* r3: dst
54
* r4: src
55
* r5: len
56
*/
57
58
ENTRY(FN_NAME)
59
cmpld %r3, %r4 /* src == dst? nothing to do */
60
beqlr-
61
cmpdi %r5, 0 /* len == 0? nothing to do */
62
beqlr-
63
64
#ifdef MEMMOVE
65
std %r3, -8(%r1) /* save dst */
66
#else /* bcopy: swap src/dst */
67
mr %r0, %r3
68
mr %r3, %r4
69
mr %r4, %r0
70
#endif
71
72
/* First check for relative alignment, if unaligned copy one byte at a time */
73
andi. %r8, %r3, ALIGN_MASK
74
andi. %r7, %r4, ALIGN_MASK
75
cmpd %r7, %r8
76
bne .Lunaligned
77
78
79
cmpldi %r5, MULTI_PHASE_THRESHOLD
80
bge .Lmulti_phase
81
b .Lfast_copy
82
83
.Lunaligned:
84
/* forward or backward copy? */
85
cmpd %r4, %r3
86
blt .Lbackward_unaligned
87
88
/* Just need to setup increment and jump to copy */
89
li %r0, 1
90
mtctr %r5
91
b .Lsingle_1_loop
92
93
.Lbackward_unaligned:
94
/* advance src and dst to last byte, set decrement and jump to copy */
95
add %r3, %r3, %r5
96
addi %r3, %r3, -1
97
add %r4, %r4, %r5
98
addi %r4, %r4, -1
99
li %r0, -1
100
mtctr %r5
101
b .Lsingle_1_loop
102
103
.Lfast_copy:
104
/* align src */
105
cmpd %r4, %r3 /* forward or backward copy? */
106
blt .Lbackward_align
107
108
.align 5
109
.Lalign:
110
andi. %r0, %r4, 15
111
beq .Lsingle_copy
112
lbz %r0, 0(%r4)
113
addi %r4, %r4, 1
114
stb %r0, 0(%r3)
115
addi %r3, %r3, 1
116
addi %r5, %r5, -1
117
cmpdi %r5, 0
118
beq- .Ldone
119
b .Lalign
120
121
.Lbackward_align:
122
/* advance src and dst to end (past last byte) */
123
add %r3, %r3, %r5
124
add %r4, %r4, %r5
125
.align 5
126
.Lbackward_align_loop:
127
andi. %r0, %r4, 15
128
beq .Lbackward_single_copy
129
lbzu %r0, -1(%r4)
130
addi %r5, %r5, -1
131
stbu %r0, -1(%r3)
132
cmpdi %r5, 0
133
beq- .Ldone
134
b .Lbackward_align_loop
135
136
.Lsingle_copy:
137
/* forward copy */
138
li %r0, 1
139
li %r8, 16
140
li %r9, 0
141
b .Lsingle_phase
142
143
.Lbackward_single_copy:
144
/* backward copy */
145
li %r0, -1
146
li %r8, -16
147
li %r9, -15
148
/* point src and dst to last byte */
149
addi %r3, %r3, -1
150
addi %r4, %r4, -1
151
152
.Lsingle_phase:
153
srdi. %r6, %r5, 4 /* number of 16-bytes */
154
beq .Lsingle_1
155
156
/* pre-adjustment */
157
add %r3, %r3, %r9
158
add %r4, %r4, %r9
159
160
mtctr %r6
161
.align 5
162
.Lsingle_16_loop:
163
ld %r6, 0(%r4)
164
ld %r7, 8(%r4)
165
add %r4, %r4, %r8
166
std %r6, 0(%r3)
167
std %r7, 8(%r3)
168
add %r3, %r3, %r8
169
bdnz .Lsingle_16_loop
170
171
/* post-adjustment */
172
sub %r3, %r3, %r9
173
sub %r4, %r4, %r9
174
175
.Lsingle_1:
176
andi. %r6, %r5, 0x0f /* number of 1-bytes */
177
beq .Ldone /* 1-bytes == 0? done */
178
179
mtctr %r6
180
.align 5
181
.Lsingle_1_loop:
182
lbz %r6, 0(%r4)
183
add %r4, %r4, %r0 /* increment */
184
stb %r6, 0(%r3)
185
add %r3, %r3, %r0 /* increment */
186
bdnz .Lsingle_1_loop
187
188
.Ldone:
189
#ifdef MEMMOVE
190
ld %r3, -8(%r1) /* restore dst */
191
#endif
192
blr
193
194
195
.Lmulti_phase:
196
/* set up multi-phase copy parameters */
197
198
/* r7 = bytes before the aligned section of the buffer */
199
andi. %r6, %r4, 15
200
subfic %r7, %r6, 16
201
/* r8 = bytes in and after the aligned section of the buffer */
202
sub %r8, %r5, %r7
203
/* r9 = bytes after the aligned section of the buffer */
204
andi. %r9, %r8, BLOCK_SIZE_MASK
205
/* r10 = BLOCKS in the aligned section of the buffer */
206
srdi %r10, %r8, BLOCK_SIZE_BITS
207
208
/* forward or backward copy? */
209
cmpd %r4, %r3
210
blt .Lbackward_multi_copy
211
212
/* set up forward copy parameters */
213
std %r7, -32(%r1) /* bytes to copy in phase 1 */
214
std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */
215
std %r9, -48(%r1) /* bytes to copy in phase 3 */
216
217
li %r0, 1 /* increment for phases 1 and 3 */
218
li %r5, BLOCK_SIZE /* increment for phase 2 */
219
220
/* op offsets for phase 2 */
221
li %r7, 0
222
li %r8, 16
223
li %r9, 32
224
li %r10, 48
225
226
std %r8, -16(%r1) /* 16-byte increment (16) */
227
std %r7, -24(%r1) /* 16-byte pre/post adjustment (0) */
228
229
b .Lphase1
230
231
.Lbackward_multi_copy:
232
/* set up backward copy parameters */
233
std %r9, -32(%r1) /* bytes to copy in phase 1 */
234
std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */
235
std %r7, -48(%r1) /* bytes to copy in phase 3 */
236
237
li %r0, -1 /* increment for phases 1 and 3 */
238
add %r6, %r5, %r0 /* r6 = len - 1 */
239
li %r5, -BLOCK_SIZE /* increment for phase 2 */
240
/* advance src and dst to the last position */
241
add %r3, %r3, %r6
242
add %r4, %r4, %r6
243
244
/* op offsets for phase 2 */
245
li %r7, -15
246
li %r8, -31
247
li %r9, -47
248
li %r10, -63
249
250
add %r6, %r7, %r0 /* r6 = -16 */
251
std %r6, -16(%r1) /* 16-byte increment (-16) */
252
std %r7, -24(%r1) /* 16-byte pre/post adjustment (-15) */
253
254
.Lphase1:
255
ld %r6, -32(%r1) /* bytes to copy in phase 1 */
256
cmpldi %r6, 0 /* r6 == 0? skip phase 1 */
257
beq+ .Lphase2
258
259
mtctr %r6
260
.align 5
261
.Lphase1_loop:
262
lbz %r6, 0(%r4)
263
add %r4, %r4, %r0 /* phase 1 increment */
264
stb %r6, 0(%r3)
265
add %r3, %r3, %r0 /* phase 1 increment */
266
bdnz .Lphase1_loop
267
268
.Lphase2:
269
ld %r6, -40(%r1) /* BLOCKS to copy in phase 2 */
270
cmpldi %r6, 0 /* %r6 == 0? skip phase 2 */
271
beq .Lphase3
272
273
#ifdef FN_PHASE2
274
FN_PHASE2
275
#else
276
/* save registers */
277
std %r14, -56(%r1)
278
std %r15, -64(%r1)
279
std %r16, -72(%r1)
280
std %r17, -80(%r1)
281
std %r18, -88(%r1)
282
std %r19, -96(%r1)
283
std %r20, -104(%r1)
284
std %r21, -112(%r1)
285
286
addi %r18, %r7, 8
287
addi %r19, %r8, 8
288
addi %r20, %r9, 8
289
addi %r21, %r10, 8
290
291
mtctr %r6
292
.align 5
293
.Lphase2_loop:
294
ldx %r14, %r7, %r4
295
ldx %r15, %r18, %r4
296
ldx %r16, %r8, %r4
297
ldx %r17, %r19, %r4
298
stdx %r14, %r7, %r3
299
stdx %r15, %r18, %r3
300
stdx %r16, %r8, %r3
301
stdx %r17, %r19, %r3
302
303
ldx %r14, %r9, %r4
304
ldx %r15, %r20, %r4
305
ldx %r16, %r10, %r4
306
ldx %r17, %r21, %r4
307
stdx %r14, %r9, %r3
308
stdx %r15, %r20, %r3
309
stdx %r16, %r10, %r3
310
stdx %r17, %r21, %r3
311
312
add %r4, %r4, %r5 /* phase 2 increment */
313
add %r3, %r3, %r5 /* phase 2 increment */
314
315
bdnz .Lphase2_loop
316
317
/* restore registers */
318
ld %r14, -56(%r1)
319
ld %r15, -64(%r1)
320
ld %r16, -72(%r1)
321
ld %r17, -80(%r1)
322
ld %r18, -88(%r1)
323
ld %r19, -96(%r1)
324
ld %r20, -104(%r1)
325
ld %r21, -112(%r1)
326
#endif
327
328
.Lphase3:
329
/* load registers for transitioning into the single-phase logic */
330
ld %r5, -48(%r1) /* bytes to copy in phase 3 */
331
ld %r8, -16(%r1) /* 16-byte increment */
332
ld %r9, -24(%r1) /* 16-byte pre/post adjustment */
333
b .Lsingle_phase
334
335
END(FN_NAME)
336
337
.section .note.GNU-stack,"",%progbits
338
339
340