Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/lib/copy_32.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* Memory copy functions for 32-bit PowerPC.
4
*
5
* Copyright (C) 1996-2005 Paul Mackerras.
6
*/
7
#include <linux/export.h>
8
#include <asm/processor.h>
9
#include <asm/cache.h>
10
#include <asm/errno.h>
11
#include <asm/ppc_asm.h>
12
#include <asm/code-patching-asm.h>
13
#include <asm/kasan.h>
14
15
#define COPY_16_BYTES \
16
lwz r7,4(r4); \
17
lwz r8,8(r4); \
18
lwz r9,12(r4); \
19
lwzu r10,16(r4); \
20
stw r7,4(r6); \
21
stw r8,8(r6); \
22
stw r9,12(r6); \
23
stwu r10,16(r6)
24
25
#define COPY_16_BYTES_WITHEX(n) \
26
8 ## n ## 0: \
27
lwz r7,4(r4); \
28
8 ## n ## 1: \
29
lwz r8,8(r4); \
30
8 ## n ## 2: \
31
lwz r9,12(r4); \
32
8 ## n ## 3: \
33
lwzu r10,16(r4); \
34
8 ## n ## 4: \
35
stw r7,4(r6); \
36
8 ## n ## 5: \
37
stw r8,8(r6); \
38
8 ## n ## 6: \
39
stw r9,12(r6); \
40
8 ## n ## 7: \
41
stwu r10,16(r6)
42
43
#define COPY_16_BYTES_EXCODE(n) \
44
9 ## n ## 0: \
45
addi r5,r5,-(16 * n); \
46
b 104f; \
47
9 ## n ## 1: \
48
addi r5,r5,-(16 * n); \
49
b 105f; \
50
EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \
51
EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \
52
EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \
53
EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \
54
EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \
55
EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \
56
EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \
57
EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
58
59
.text
60
61
CACHELINE_BYTES = L1_CACHE_BYTES
62
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
63
CACHELINE_MASK = (L1_CACHE_BYTES-1)
64
65
#ifndef CONFIG_KASAN
66
_GLOBAL(memset16)
67
rlwinm. r0 ,r5, 31, 1, 31
68
addi r6, r3, -4
69
beq- 2f
70
rlwimi r4 ,r4 ,16 ,0 ,15
71
mtctr r0
72
1: stwu r4, 4(r6)
73
bdnz 1b
74
2: andi. r0, r5, 1
75
beqlr
76
sth r4, 4(r6)
77
blr
78
EXPORT_SYMBOL(memset16)
79
#endif
80
81
/*
82
* Use dcbz on the complete cache lines in the destination
83
* to set them to zero. This requires that the destination
84
* area is cacheable. -- paulus
85
*
86
* During early init, cache might not be active yet, so dcbz cannot be used.
87
* We therefore skip the optimised bloc that uses dcbz. This jump is
88
* replaced by a nop once cache is active. This is done in machine_init()
89
*/
90
_GLOBAL_KASAN(memset)
91
cmplwi 0,r5,4
92
blt 7f
93
94
rlwimi r4,r4,8,16,23
95
rlwimi r4,r4,16,0,15
96
97
stw r4,0(r3)
98
beqlr
99
andi. r0,r3,3
100
add r5,r0,r5
101
subf r6,r0,r3
102
cmplwi 0,r4,0
103
/*
104
* Skip optimised bloc until cache is enabled. Will be replaced
105
* by 'bne' during boot to use normal procedure if r4 is not zero
106
*/
107
5: b 2f
108
patch_site 5b, patch__memset_nocache
109
110
clrlwi r7,r6,32-LG_CACHELINE_BYTES
111
add r8,r7,r5
112
srwi r9,r8,LG_CACHELINE_BYTES
113
addic. r9,r9,-1 /* total number of complete cachelines */
114
ble 2f
115
xori r0,r7,CACHELINE_MASK & ~3
116
srwi. r0,r0,2
117
beq 3f
118
mtctr r0
119
4: stwu r4,4(r6)
120
bdnz 4b
121
3: mtctr r9
122
li r7,4
123
10: dcbz r7,r6
124
addi r6,r6,CACHELINE_BYTES
125
bdnz 10b
126
clrlwi r5,r8,32-LG_CACHELINE_BYTES
127
addi r5,r5,4
128
129
2: srwi r0,r5,2
130
mtctr r0
131
bdz 6f
132
1: stwu r4,4(r6)
133
bdnz 1b
134
6: andi. r5,r5,3
135
beqlr
136
mtctr r5
137
addi r6,r6,3
138
8: stbu r4,1(r6)
139
bdnz 8b
140
blr
141
142
7: cmpwi 0,r5,0
143
beqlr
144
mtctr r5
145
addi r6,r3,-1
146
9: stbu r4,1(r6)
147
bdnz 9b
148
blr
149
EXPORT_SYMBOL(memset)
150
EXPORT_SYMBOL_KASAN(memset)
151
152
/*
153
* This version uses dcbz on the complete cache lines in the
154
* destination area to reduce memory traffic. This requires that
155
* the destination area is cacheable.
156
* We only use this version if the source and dest don't overlap.
157
* -- paulus.
158
*
159
* During early init, cache might not be active yet, so dcbz cannot be used.
160
* We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
161
* replaced by a nop once cache is active. This is done in machine_init()
162
*/
163
_GLOBAL_KASAN(memmove)
164
cmplw 0,r3,r4
165
bgt backwards_memcpy
166
/* fall through */
167
168
_GLOBAL_KASAN(memcpy)
169
1: b generic_memcpy
170
patch_site 1b, patch__memcpy_nocache
171
172
add r7,r3,r5 /* test if the src & dst overlap */
173
add r8,r4,r5
174
cmplw 0,r4,r7
175
cmplw 1,r3,r8
176
crand 0,0,4 /* cr0.lt &= cr1.lt */
177
blt generic_memcpy /* if regions overlap */
178
179
addi r4,r4,-4
180
addi r6,r3,-4
181
neg r0,r3
182
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
183
beq 58f
184
185
cmplw 0,r5,r0 /* is this more than total to do? */
186
blt 63f /* if not much to do */
187
andi. r8,r0,3 /* get it word-aligned first */
188
subf r5,r0,r5
189
mtctr r8
190
beq+ 61f
191
70: lbz r9,4(r4) /* do some bytes */
192
addi r4,r4,1
193
addi r6,r6,1
194
stb r9,3(r6)
195
bdnz 70b
196
61: srwi. r0,r0,2
197
mtctr r0
198
beq 58f
199
72: lwzu r9,4(r4) /* do some words */
200
stwu r9,4(r6)
201
bdnz 72b
202
203
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
204
clrlwi r5,r5,32-LG_CACHELINE_BYTES
205
li r11,4
206
mtctr r0
207
beq 63f
208
53:
209
dcbz r11,r6
210
COPY_16_BYTES
211
#if L1_CACHE_BYTES >= 32
212
COPY_16_BYTES
213
#if L1_CACHE_BYTES >= 64
214
COPY_16_BYTES
215
COPY_16_BYTES
216
#if L1_CACHE_BYTES >= 128
217
COPY_16_BYTES
218
COPY_16_BYTES
219
COPY_16_BYTES
220
COPY_16_BYTES
221
#endif
222
#endif
223
#endif
224
bdnz 53b
225
226
63: srwi. r0,r5,2
227
mtctr r0
228
beq 64f
229
30: lwzu r0,4(r4)
230
stwu r0,4(r6)
231
bdnz 30b
232
233
64: andi. r0,r5,3
234
mtctr r0
235
beq+ 65f
236
addi r4,r4,3
237
addi r6,r6,3
238
40: lbzu r0,1(r4)
239
stbu r0,1(r6)
240
bdnz 40b
241
65: blr
242
EXPORT_SYMBOL(memcpy)
243
EXPORT_SYMBOL(memmove)
244
EXPORT_SYMBOL_KASAN(memcpy)
245
EXPORT_SYMBOL_KASAN(memmove)
246
247
generic_memcpy:
248
srwi. r7,r5,3
249
addi r6,r3,-4
250
addi r4,r4,-4
251
beq 2f /* if less than 8 bytes to do */
252
andi. r0,r6,3 /* get dest word aligned */
253
mtctr r7
254
bne 5f
255
1: lwz r7,4(r4)
256
lwzu r8,8(r4)
257
stw r7,4(r6)
258
stwu r8,8(r6)
259
bdnz 1b
260
andi. r5,r5,7
261
2: cmplwi 0,r5,4
262
blt 3f
263
lwzu r0,4(r4)
264
addi r5,r5,-4
265
stwu r0,4(r6)
266
3: cmpwi 0,r5,0
267
beqlr
268
mtctr r5
269
addi r4,r4,3
270
addi r6,r6,3
271
4: lbzu r0,1(r4)
272
stbu r0,1(r6)
273
bdnz 4b
274
blr
275
5: subfic r0,r0,4
276
mtctr r0
277
6: lbz r7,4(r4)
278
addi r4,r4,1
279
stb r7,4(r6)
280
addi r6,r6,1
281
bdnz 6b
282
subf r5,r0,r5
283
rlwinm. r7,r5,32-3,3,31
284
beq 2b
285
mtctr r7
286
b 1b
287
288
_GLOBAL(backwards_memcpy)
289
rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
290
add r6,r3,r5
291
add r4,r4,r5
292
beq 2f
293
andi. r0,r6,3
294
mtctr r7
295
bne 5f
296
1: lwz r7,-4(r4)
297
lwzu r8,-8(r4)
298
stw r7,-4(r6)
299
stwu r8,-8(r6)
300
bdnz 1b
301
andi. r5,r5,7
302
2: cmplwi 0,r5,4
303
blt 3f
304
lwzu r0,-4(r4)
305
subi r5,r5,4
306
stwu r0,-4(r6)
307
3: cmpwi 0,r5,0
308
beqlr
309
mtctr r5
310
4: lbzu r0,-1(r4)
311
stbu r0,-1(r6)
312
bdnz 4b
313
blr
314
5: mtctr r0
315
6: lbzu r7,-1(r4)
316
stbu r7,-1(r6)
317
bdnz 6b
318
subf r5,r0,r5
319
rlwinm. r7,r5,32-3,3,31
320
beq 2b
321
mtctr r7
322
b 1b
323
324
_GLOBAL(__copy_tofrom_user)
325
addi r4,r4,-4
326
addi r6,r3,-4
327
neg r0,r3
328
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
329
beq 58f
330
331
cmplw 0,r5,r0 /* is this more than total to do? */
332
blt 63f /* if not much to do */
333
andi. r8,r0,3 /* get it word-aligned first */
334
mtctr r8
335
beq+ 61f
336
70: lbz r9,4(r4) /* do some bytes */
337
71: stb r9,4(r6)
338
addi r4,r4,1
339
addi r6,r6,1
340
bdnz 70b
341
61: subf r5,r0,r5
342
srwi. r0,r0,2
343
mtctr r0
344
beq 58f
345
72: lwzu r9,4(r4) /* do some words */
346
73: stwu r9,4(r6)
347
bdnz 72b
348
349
EX_TABLE(70b,100f)
350
EX_TABLE(71b,101f)
351
EX_TABLE(72b,102f)
352
EX_TABLE(73b,103f)
353
354
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
355
clrlwi r5,r5,32-LG_CACHELINE_BYTES
356
li r11,4
357
beq 63f
358
359
/* Here we decide how far ahead to prefetch the source */
360
li r3,4
361
cmpwi r0,1
362
li r7,0
363
ble 114f
364
li r7,1
365
#if MAX_COPY_PREFETCH > 1
366
/* Heuristically, for large transfers we prefetch
367
MAX_COPY_PREFETCH cachelines ahead. For small transfers
368
we prefetch 1 cacheline ahead. */
369
cmpwi r0,MAX_COPY_PREFETCH
370
ble 112f
371
li r7,MAX_COPY_PREFETCH
372
112: mtctr r7
373
111: dcbt r3,r4
374
addi r3,r3,CACHELINE_BYTES
375
bdnz 111b
376
#else
377
dcbt r3,r4
378
addi r3,r3,CACHELINE_BYTES
379
#endif /* MAX_COPY_PREFETCH > 1 */
380
381
114: subf r8,r7,r0
382
mr r0,r7
383
mtctr r8
384
385
53: dcbt r3,r4
386
54: dcbz r11,r6
387
EX_TABLE(54b,105f)
388
/* the main body of the cacheline loop */
389
COPY_16_BYTES_WITHEX(0)
390
#if L1_CACHE_BYTES >= 32
391
COPY_16_BYTES_WITHEX(1)
392
#if L1_CACHE_BYTES >= 64
393
COPY_16_BYTES_WITHEX(2)
394
COPY_16_BYTES_WITHEX(3)
395
#if L1_CACHE_BYTES >= 128
396
COPY_16_BYTES_WITHEX(4)
397
COPY_16_BYTES_WITHEX(5)
398
COPY_16_BYTES_WITHEX(6)
399
COPY_16_BYTES_WITHEX(7)
400
#endif
401
#endif
402
#endif
403
bdnz 53b
404
cmpwi r0,0
405
li r3,4
406
li r7,0
407
bne 114b
408
409
63: srwi. r0,r5,2
410
mtctr r0
411
beq 64f
412
30: lwzu r0,4(r4)
413
31: stwu r0,4(r6)
414
bdnz 30b
415
416
64: andi. r0,r5,3
417
mtctr r0
418
beq+ 65f
419
40: lbz r0,4(r4)
420
41: stb r0,4(r6)
421
addi r4,r4,1
422
addi r6,r6,1
423
bdnz 40b
424
65: li r3,0
425
blr
426
427
/* read fault, initial single-byte copy */
428
100: li r9,0
429
b 90f
430
/* write fault, initial single-byte copy */
431
101: li r9,1
432
90: subf r5,r8,r5
433
li r3,0
434
b 99f
435
/* read fault, initial word copy */
436
102: li r9,0
437
b 91f
438
/* write fault, initial word copy */
439
103: li r9,1
440
91: li r3,2
441
b 99f
442
443
/*
444
* this stuff handles faults in the cacheline loop and branches to either
445
* 104f (if in read part) or 105f (if in write part), after updating r5
446
*/
447
COPY_16_BYTES_EXCODE(0)
448
#if L1_CACHE_BYTES >= 32
449
COPY_16_BYTES_EXCODE(1)
450
#if L1_CACHE_BYTES >= 64
451
COPY_16_BYTES_EXCODE(2)
452
COPY_16_BYTES_EXCODE(3)
453
#if L1_CACHE_BYTES >= 128
454
COPY_16_BYTES_EXCODE(4)
455
COPY_16_BYTES_EXCODE(5)
456
COPY_16_BYTES_EXCODE(6)
457
COPY_16_BYTES_EXCODE(7)
458
#endif
459
#endif
460
#endif
461
462
/* read fault in cacheline loop */
463
104: li r9,0
464
b 92f
465
/* fault on dcbz (effectively a write fault) */
466
/* or write fault in cacheline loop */
467
105: li r9,1
468
92: li r3,LG_CACHELINE_BYTES
469
mfctr r8
470
add r0,r0,r8
471
b 106f
472
/* read fault in final word loop */
473
108: li r9,0
474
b 93f
475
/* write fault in final word loop */
476
109: li r9,1
477
93: andi. r5,r5,3
478
li r3,2
479
b 99f
480
/* read fault in final byte loop */
481
110: li r9,0
482
b 94f
483
/* write fault in final byte loop */
484
111: li r9,1
485
94: li r5,0
486
li r3,0
487
/*
488
* At this stage the number of bytes not copied is
489
* r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
490
*/
491
99: mfctr r0
492
106: slw r3,r0,r3
493
add. r3,r3,r5
494
beq 120f /* shouldn't happen */
495
cmpwi 0,r9,0
496
bne 120f
497
/* for a read fault, first try to continue the copy one byte at a time */
498
mtctr r3
499
130: lbz r0,4(r4)
500
131: stb r0,4(r6)
501
addi r4,r4,1
502
addi r6,r6,1
503
bdnz 130b
504
/* then clear out the destination: r3 bytes starting at 4(r6) */
505
132: mfctr r3
506
120: blr
507
508
EX_TABLE(30b,108b)
509
EX_TABLE(31b,109b)
510
EX_TABLE(40b,110b)
511
EX_TABLE(41b,111b)
512
EX_TABLE(130b,132b)
513
EX_TABLE(131b,120b)
514
515
EXPORT_SYMBOL(__copy_tofrom_user)
516
517