Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/lib/copyuser_64.S
26442 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
4
*/
5
#include <linux/export.h>
6
#include <asm/processor.h>
7
#include <asm/ppc_asm.h>
8
#include <asm/asm-compat.h>
9
#include <asm/feature-fixups.h>
10
11
#ifndef SELFTEST_CASE
12
/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
13
#define SELFTEST_CASE 0
14
#endif
15
16
#ifdef __BIG_ENDIAN__
17
#define sLd sld /* Shift towards low-numbered address. */
18
#define sHd srd /* Shift towards high-numbered address. */
19
#else
20
#define sLd srd /* Shift towards low-numbered address. */
21
#define sHd sld /* Shift towards high-numbered address. */
22
#endif
23
24
/*
25
* These macros are used to generate exception table entries.
26
* The exception handlers below use the original arguments
27
* (stored on the stack) and the point where we're up to in
28
* the destination buffer, i.e. the address of the first
29
* unmodified byte. Generally r3 points into the destination
30
* buffer, but the first unmodified byte is at a variable
31
* offset from r3. In the code below, the symbol r3_offset
32
* is set to indicate the current offset at each point in
33
* the code. This offset is then used as a negative offset
34
* from the exception handler code, and those instructions
35
* before the exception handlers are addi instructions that
36
* adjust r3 to point to the correct place.
37
*/
38
.macro lex /* exception handler for load */
39
100: EX_TABLE(100b, .Lld_exc - r3_offset)
40
.endm
41
42
.macro stex /* exception handler for store */
43
100: EX_TABLE(100b, .Lst_exc - r3_offset)
44
.endm
45
46
.align 7
47
_GLOBAL_TOC(__copy_tofrom_user)
48
#ifdef CONFIG_PPC_BOOK3S_64
49
BEGIN_FTR_SECTION
50
nop
51
FTR_SECTION_ELSE
52
b __copy_tofrom_user_power7
53
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
54
#endif
55
_GLOBAL(__copy_tofrom_user_base)
56
/* first check for a 4kB copy on a 4kB boundary */
57
cmpldi cr1,r5,16
58
cmpdi cr6,r5,4096
59
or r0,r3,r4
60
neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
61
andi. r0,r0,4095
62
std r3,-24(r1)
63
crand cr0*4+2,cr0*4+2,cr6*4+2
64
std r4,-16(r1)
65
std r5,-8(r1)
66
dcbt 0,r4
67
beq .Lcopy_page_4K
68
andi. r6,r6,7
69
PPC_MTOCRF(0x01,r5)
70
blt cr1,.Lshort_copy
71
/* Below we want to nop out the bne if we're on a CPU that has the
72
* CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
73
* cleared.
74
* At the time of writing the only CPU that has this combination of bits
75
* set is Power6.
76
*/
77
test_feature = (SELFTEST_CASE == 1)
78
BEGIN_FTR_SECTION
79
nop
80
FTR_SECTION_ELSE
81
bne .Ldst_unaligned
82
ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
83
CPU_FTR_UNALIGNED_LD_STD)
84
.Ldst_aligned:
85
addi r3,r3,-16
86
r3_offset = 16
87
test_feature = (SELFTEST_CASE == 0)
88
BEGIN_FTR_SECTION
89
andi. r0,r4,7
90
bne .Lsrc_unaligned
91
END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
92
blt cr1,.Ldo_tail /* if < 16 bytes to copy */
93
srdi r0,r5,5
94
cmpdi cr1,r0,0
95
lex; ld r7,0(r4)
96
lex; ld r6,8(r4)
97
addi r4,r4,16
98
mtctr r0
99
andi. r0,r5,0x10
100
beq 22f
101
addi r3,r3,16
102
r3_offset = 0
103
addi r4,r4,-16
104
mr r9,r7
105
mr r8,r6
106
beq cr1,72f
107
21:
108
lex; ld r7,16(r4)
109
lex; ld r6,24(r4)
110
addi r4,r4,32
111
stex; std r9,0(r3)
112
r3_offset = 8
113
stex; std r8,8(r3)
114
r3_offset = 16
115
22:
116
lex; ld r9,0(r4)
117
lex; ld r8,8(r4)
118
stex; std r7,16(r3)
119
r3_offset = 24
120
stex; std r6,24(r3)
121
addi r3,r3,32
122
r3_offset = 0
123
bdnz 21b
124
72:
125
stex; std r9,0(r3)
126
r3_offset = 8
127
stex; std r8,8(r3)
128
r3_offset = 16
129
andi. r5,r5,0xf
130
beq+ 3f
131
addi r4,r4,16
132
.Ldo_tail:
133
addi r3,r3,16
134
r3_offset = 0
135
bf cr7*4+0,246f
136
lex; ld r9,0(r4)
137
addi r4,r4,8
138
stex; std r9,0(r3)
139
addi r3,r3,8
140
246: bf cr7*4+1,1f
141
lex; lwz r9,0(r4)
142
addi r4,r4,4
143
stex; stw r9,0(r3)
144
addi r3,r3,4
145
1: bf cr7*4+2,2f
146
lex; lhz r9,0(r4)
147
addi r4,r4,2
148
stex; sth r9,0(r3)
149
addi r3,r3,2
150
2: bf cr7*4+3,3f
151
lex; lbz r9,0(r4)
152
stex; stb r9,0(r3)
153
3: li r3,0
154
blr
155
156
.Lsrc_unaligned:
157
r3_offset = 16
158
srdi r6,r5,3
159
addi r5,r5,-16
160
subf r4,r0,r4
161
srdi r7,r5,4
162
sldi r10,r0,3
163
cmpldi cr6,r6,3
164
andi. r5,r5,7
165
mtctr r7
166
subfic r11,r10,64
167
add r5,r5,r0
168
bt cr7*4+0,28f
169
170
lex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */
171
lex; ld r0,8(r4)
172
sLd r6,r9,r10
173
lex; ldu r9,16(r4)
174
sHd r7,r0,r11
175
sLd r8,r0,r10
176
or r7,r7,r6
177
blt cr6,79f
178
lex; ld r0,8(r4)
179
b 2f
180
181
28:
182
lex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */
183
lex; ldu r9,8(r4)
184
sLd r8,r0,r10
185
addi r3,r3,-8
186
r3_offset = 24
187
blt cr6,5f
188
lex; ld r0,8(r4)
189
sHd r12,r9,r11
190
sLd r6,r9,r10
191
lex; ldu r9,16(r4)
192
or r12,r8,r12
193
sHd r7,r0,r11
194
sLd r8,r0,r10
195
addi r3,r3,16
196
r3_offset = 8
197
beq cr6,78f
198
199
1: or r7,r7,r6
200
lex; ld r0,8(r4)
201
stex; std r12,8(r3)
202
r3_offset = 16
203
2: sHd r12,r9,r11
204
sLd r6,r9,r10
205
lex; ldu r9,16(r4)
206
or r12,r8,r12
207
stex; stdu r7,16(r3)
208
r3_offset = 8
209
sHd r7,r0,r11
210
sLd r8,r0,r10
211
bdnz 1b
212
213
78:
214
stex; std r12,8(r3)
215
r3_offset = 16
216
or r7,r7,r6
217
79:
218
stex; std r7,16(r3)
219
r3_offset = 24
220
5: sHd r12,r9,r11
221
or r12,r8,r12
222
stex; std r12,24(r3)
223
r3_offset = 32
224
bne 6f
225
li r3,0
226
blr
227
6: cmpwi cr1,r5,8
228
addi r3,r3,32
229
r3_offset = 0
230
sLd r9,r9,r10
231
ble cr1,7f
232
lex; ld r0,8(r4)
233
sHd r7,r0,r11
234
or r9,r7,r9
235
7:
236
bf cr7*4+1,1f
237
#ifdef __BIG_ENDIAN__
238
rotldi r9,r9,32
239
#endif
240
stex; stw r9,0(r3)
241
#ifdef __LITTLE_ENDIAN__
242
rotrdi r9,r9,32
243
#endif
244
addi r3,r3,4
245
1: bf cr7*4+2,2f
246
#ifdef __BIG_ENDIAN__
247
rotldi r9,r9,16
248
#endif
249
stex; sth r9,0(r3)
250
#ifdef __LITTLE_ENDIAN__
251
rotrdi r9,r9,16
252
#endif
253
addi r3,r3,2
254
2: bf cr7*4+3,3f
255
#ifdef __BIG_ENDIAN__
256
rotldi r9,r9,8
257
#endif
258
stex; stb r9,0(r3)
259
#ifdef __LITTLE_ENDIAN__
260
rotrdi r9,r9,8
261
#endif
262
3: li r3,0
263
blr
264
265
.Ldst_unaligned:
266
r3_offset = 0
267
PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
268
subf r5,r6,r5
269
li r7,0
270
cmpldi cr1,r5,16
271
bf cr7*4+3,1f
272
100: EX_TABLE(100b, .Lld_exc_r7)
273
lbz r0,0(r4)
274
100: EX_TABLE(100b, .Lst_exc_r7)
275
stb r0,0(r3)
276
addi r7,r7,1
277
1: bf cr7*4+2,2f
278
100: EX_TABLE(100b, .Lld_exc_r7)
279
lhzx r0,r7,r4
280
100: EX_TABLE(100b, .Lst_exc_r7)
281
sthx r0,r7,r3
282
addi r7,r7,2
283
2: bf cr7*4+1,3f
284
100: EX_TABLE(100b, .Lld_exc_r7)
285
lwzx r0,r7,r4
286
100: EX_TABLE(100b, .Lst_exc_r7)
287
stwx r0,r7,r3
288
3: PPC_MTOCRF(0x01,r5)
289
add r4,r6,r4
290
add r3,r6,r3
291
b .Ldst_aligned
292
293
.Lshort_copy:
294
r3_offset = 0
295
bf cr7*4+0,1f
296
lex; lwz r0,0(r4)
297
lex; lwz r9,4(r4)
298
addi r4,r4,8
299
stex; stw r0,0(r3)
300
stex; stw r9,4(r3)
301
addi r3,r3,8
302
1: bf cr7*4+1,2f
303
lex; lwz r0,0(r4)
304
addi r4,r4,4
305
stex; stw r0,0(r3)
306
addi r3,r3,4
307
2: bf cr7*4+2,3f
308
lex; lhz r0,0(r4)
309
addi r4,r4,2
310
stex; sth r0,0(r3)
311
addi r3,r3,2
312
3: bf cr7*4+3,4f
313
lex; lbz r0,0(r4)
314
stex; stb r0,0(r3)
315
4: li r3,0
316
blr
317
318
/*
319
* exception handlers follow
320
* we have to return the number of bytes not copied
321
* for an exception on a load, we set the rest of the destination to 0
322
* Note that the number of bytes of instructions for adjusting r3 needs
323
* to equal the amount of the adjustment, due to the trick of using
324
* .Lld_exc - r3_offset as the handler address.
325
*/
326
327
.Lld_exc_r7:
328
add r3,r3,r7
329
b .Lld_exc
330
331
/* adjust by 24 */
332
addi r3,r3,8
333
nop
334
/* adjust by 16 */
335
addi r3,r3,8
336
nop
337
/* adjust by 8 */
338
addi r3,r3,8
339
nop
340
341
/*
342
* Here we have had a fault on a load and r3 points to the first
343
* unmodified byte of the destination. We use the original arguments
344
* and r3 to work out how much wasn't copied. Since we load some
345
* distance ahead of the stores, we continue copying byte-by-byte until
346
* we hit the load fault again in order to copy as much as possible.
347
*/
348
.Lld_exc:
349
ld r6,-24(r1)
350
ld r4,-16(r1)
351
ld r5,-8(r1)
352
subf r6,r6,r3
353
add r4,r4,r6
354
subf r5,r6,r5 /* #bytes left to go */
355
356
/*
357
* first see if we can copy any more bytes before hitting another exception
358
*/
359
mtctr r5
360
r3_offset = 0
361
100: EX_TABLE(100b, .Ldone)
362
43: lbz r0,0(r4)
363
addi r4,r4,1
364
stex; stb r0,0(r3)
365
addi r3,r3,1
366
bdnz 43b
367
li r3,0 /* huh? all copied successfully this time? */
368
blr
369
370
/*
371
* here we have trapped again, amount remaining is in ctr.
372
*/
373
.Ldone:
374
mfctr r3
375
blr
376
377
/*
378
* exception handlers for stores: we need to work out how many bytes
379
* weren't copied, and we may need to copy some more.
380
* Note that the number of bytes of instructions for adjusting r3 needs
381
* to equal the amount of the adjustment, due to the trick of using
382
* .Lst_exc - r3_offset as the handler address.
383
*/
384
.Lst_exc_r7:
385
add r3,r3,r7
386
b .Lst_exc
387
388
/* adjust by 24 */
389
addi r3,r3,8
390
nop
391
/* adjust by 16 */
392
addi r3,r3,8
393
nop
394
/* adjust by 8 */
395
addi r3,r3,4
396
/* adjust by 4 */
397
addi r3,r3,4
398
.Lst_exc:
399
ld r6,-24(r1) /* original destination pointer */
400
ld r4,-16(r1) /* original source pointer */
401
ld r5,-8(r1) /* original number of bytes */
402
add r7,r6,r5
403
/*
404
* If the destination pointer isn't 8-byte aligned,
405
* we may have got the exception as a result of a
406
* store that overlapped a page boundary, so we may be
407
* able to copy a few more bytes.
408
*/
409
17: andi. r0,r3,7
410
beq 19f
411
subf r8,r6,r3 /* #bytes copied */
412
100: EX_TABLE(100b,19f)
413
lbzx r0,r8,r4
414
100: EX_TABLE(100b,19f)
415
stb r0,0(r3)
416
addi r3,r3,1
417
cmpld r3,r7
418
blt 17b
419
19: subf r3,r3,r7 /* #bytes not copied in r3 */
420
blr
421
422
/*
423
* Routine to copy a whole page of data, optimized for POWER4.
424
* On POWER4 it is more than 50% faster than the simple loop
425
* above (following the .Ldst_aligned label).
426
*/
427
.macro exc
428
100: EX_TABLE(100b, .Labort)
429
.endm
430
.Lcopy_page_4K:
431
std r31,-32(1)
432
std r30,-40(1)
433
std r29,-48(1)
434
std r28,-56(1)
435
std r27,-64(1)
436
std r26,-72(1)
437
std r25,-80(1)
438
std r24,-88(1)
439
std r23,-96(1)
440
std r22,-104(1)
441
std r21,-112(1)
442
std r20,-120(1)
443
li r5,4096/32 - 1
444
addi r3,r3,-8
445
li r0,5
446
0: addi r5,r5,-24
447
mtctr r0
448
exc; ld r22,640(4)
449
exc; ld r21,512(4)
450
exc; ld r20,384(4)
451
exc; ld r11,256(4)
452
exc; ld r9,128(4)
453
exc; ld r7,0(4)
454
exc; ld r25,648(4)
455
exc; ld r24,520(4)
456
exc; ld r23,392(4)
457
exc; ld r10,264(4)
458
exc; ld r8,136(4)
459
exc; ldu r6,8(4)
460
cmpwi r5,24
461
1:
462
exc; std r22,648(3)
463
exc; std r21,520(3)
464
exc; std r20,392(3)
465
exc; std r11,264(3)
466
exc; std r9,136(3)
467
exc; std r7,8(3)
468
exc; ld r28,648(4)
469
exc; ld r27,520(4)
470
exc; ld r26,392(4)
471
exc; ld r31,264(4)
472
exc; ld r30,136(4)
473
exc; ld r29,8(4)
474
exc; std r25,656(3)
475
exc; std r24,528(3)
476
exc; std r23,400(3)
477
exc; std r10,272(3)
478
exc; std r8,144(3)
479
exc; std r6,16(3)
480
exc; ld r22,656(4)
481
exc; ld r21,528(4)
482
exc; ld r20,400(4)
483
exc; ld r11,272(4)
484
exc; ld r9,144(4)
485
exc; ld r7,16(4)
486
exc; std r28,664(3)
487
exc; std r27,536(3)
488
exc; std r26,408(3)
489
exc; std r31,280(3)
490
exc; std r30,152(3)
491
exc; stdu r29,24(3)
492
exc; ld r25,664(4)
493
exc; ld r24,536(4)
494
exc; ld r23,408(4)
495
exc; ld r10,280(4)
496
exc; ld r8,152(4)
497
exc; ldu r6,24(4)
498
bdnz 1b
499
exc; std r22,648(3)
500
exc; std r21,520(3)
501
exc; std r20,392(3)
502
exc; std r11,264(3)
503
exc; std r9,136(3)
504
exc; std r7,8(3)
505
addi r4,r4,640
506
addi r3,r3,648
507
bge 0b
508
mtctr r5
509
exc; ld r7,0(4)
510
exc; ld r8,8(4)
511
exc; ldu r9,16(4)
512
3:
513
exc; ld r10,8(4)
514
exc; std r7,8(3)
515
exc; ld r7,16(4)
516
exc; std r8,16(3)
517
exc; ld r8,24(4)
518
exc; std r9,24(3)
519
exc; ldu r9,32(4)
520
exc; stdu r10,32(3)
521
bdnz 3b
522
4:
523
exc; ld r10,8(4)
524
exc; std r7,8(3)
525
exc; std r8,16(3)
526
exc; std r9,24(3)
527
exc; std r10,32(3)
528
9: ld r20,-120(1)
529
ld r21,-112(1)
530
ld r22,-104(1)
531
ld r23,-96(1)
532
ld r24,-88(1)
533
ld r25,-80(1)
534
ld r26,-72(1)
535
ld r27,-64(1)
536
ld r28,-56(1)
537
ld r29,-48(1)
538
ld r30,-40(1)
539
ld r31,-32(1)
540
li r3,0
541
blr
542
543
/*
544
* on an exception, reset to the beginning and jump back into the
545
* standard __copy_tofrom_user
546
*/
547
.Labort:
548
ld r20,-120(1)
549
ld r21,-112(1)
550
ld r22,-104(1)
551
ld r23,-96(1)
552
ld r24,-88(1)
553
ld r25,-80(1)
554
ld r26,-72(1)
555
ld r27,-64(1)
556
ld r28,-56(1)
557
ld r29,-48(1)
558
ld r30,-40(1)
559
ld r31,-32(1)
560
ld r3,-24(r1)
561
ld r4,-16(r1)
562
li r5,4096
563
b .Ldst_aligned
564
EXPORT_SYMBOL(__copy_tofrom_user)
565
566