Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/powerpc/lib/copyuser_64.S
10818 views
1
/*
2
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
3
*
4
* This program is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU General Public License
6
* as published by the Free Software Foundation; either version
7
* 2 of the License, or (at your option) any later version.
8
*/
9
#include <asm/processor.h>
10
#include <asm/ppc_asm.h>
11
12
.align 7
13
_GLOBAL(__copy_tofrom_user)
14
/* first check for a whole page copy on a page boundary */
15
cmpldi cr1,r5,16
16
cmpdi cr6,r5,4096
17
or r0,r3,r4
18
neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
19
andi. r0,r0,4095
20
std r3,-24(r1)
21
crand cr0*4+2,cr0*4+2,cr6*4+2
22
std r4,-16(r1)
23
std r5,-8(r1)
24
dcbt 0,r4
25
beq .Lcopy_page_4K
26
andi. r6,r6,7
27
PPC_MTOCRF 0x01,r5
28
blt cr1,.Lshort_copy
29
/* Below we want to nop out the bne if we're on a CPU that has the
30
* CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
31
* cleared.
32
* At the time of writing the only CPU that has this combination of bits
33
* set is Power6.
34
*/
35
BEGIN_FTR_SECTION
36
nop
37
FTR_SECTION_ELSE
38
bne .Ldst_unaligned
39
ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
40
CPU_FTR_UNALIGNED_LD_STD)
41
.Ldst_aligned:
42
addi r3,r3,-16
43
BEGIN_FTR_SECTION
44
andi. r0,r4,7
45
bne .Lsrc_unaligned
46
END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
47
blt cr1,.Ldo_tail /* if < 16 bytes to copy */
48
srdi r0,r5,5
49
cmpdi cr1,r0,0
50
20: ld r7,0(r4)
51
220: ld r6,8(r4)
52
addi r4,r4,16
53
mtctr r0
54
andi. r0,r5,0x10
55
beq 22f
56
addi r3,r3,16
57
addi r4,r4,-16
58
mr r9,r7
59
mr r8,r6
60
beq cr1,72f
61
21: ld r7,16(r4)
62
221: ld r6,24(r4)
63
addi r4,r4,32
64
70: std r9,0(r3)
65
270: std r8,8(r3)
66
22: ld r9,0(r4)
67
222: ld r8,8(r4)
68
71: std r7,16(r3)
69
271: std r6,24(r3)
70
addi r3,r3,32
71
bdnz 21b
72
72: std r9,0(r3)
73
272: std r8,8(r3)
74
andi. r5,r5,0xf
75
beq+ 3f
76
addi r4,r4,16
77
.Ldo_tail:
78
addi r3,r3,16
79
bf cr7*4+0,246f
80
244: ld r9,0(r4)
81
addi r4,r4,8
82
245: std r9,0(r3)
83
addi r3,r3,8
84
246: bf cr7*4+1,1f
85
23: lwz r9,0(r4)
86
addi r4,r4,4
87
73: stw r9,0(r3)
88
addi r3,r3,4
89
1: bf cr7*4+2,2f
90
44: lhz r9,0(r4)
91
addi r4,r4,2
92
74: sth r9,0(r3)
93
addi r3,r3,2
94
2: bf cr7*4+3,3f
95
45: lbz r9,0(r4)
96
75: stb r9,0(r3)
97
3: li r3,0
98
blr
99
100
.Lsrc_unaligned:
101
srdi r6,r5,3
102
addi r5,r5,-16
103
subf r4,r0,r4
104
srdi r7,r5,4
105
sldi r10,r0,3
106
cmpldi cr6,r6,3
107
andi. r5,r5,7
108
mtctr r7
109
subfic r11,r10,64
110
add r5,r5,r0
111
bt cr7*4+0,28f
112
113
24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
114
25: ld r0,8(r4)
115
sld r6,r9,r10
116
26: ldu r9,16(r4)
117
srd r7,r0,r11
118
sld r8,r0,r10
119
or r7,r7,r6
120
blt cr6,79f
121
27: ld r0,8(r4)
122
b 2f
123
124
28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
125
29: ldu r9,8(r4)
126
sld r8,r0,r10
127
addi r3,r3,-8
128
blt cr6,5f
129
30: ld r0,8(r4)
130
srd r12,r9,r11
131
sld r6,r9,r10
132
31: ldu r9,16(r4)
133
or r12,r8,r12
134
srd r7,r0,r11
135
sld r8,r0,r10
136
addi r3,r3,16
137
beq cr6,78f
138
139
1: or r7,r7,r6
140
32: ld r0,8(r4)
141
76: std r12,8(r3)
142
2: srd r12,r9,r11
143
sld r6,r9,r10
144
33: ldu r9,16(r4)
145
or r12,r8,r12
146
77: stdu r7,16(r3)
147
srd r7,r0,r11
148
sld r8,r0,r10
149
bdnz 1b
150
151
78: std r12,8(r3)
152
or r7,r7,r6
153
79: std r7,16(r3)
154
5: srd r12,r9,r11
155
or r12,r8,r12
156
80: std r12,24(r3)
157
bne 6f
158
li r3,0
159
blr
160
6: cmpwi cr1,r5,8
161
addi r3,r3,32
162
sld r9,r9,r10
163
ble cr1,7f
164
34: ld r0,8(r4)
165
srd r7,r0,r11
166
or r9,r7,r9
167
7:
168
bf cr7*4+1,1f
169
rotldi r9,r9,32
170
94: stw r9,0(r3)
171
addi r3,r3,4
172
1: bf cr7*4+2,2f
173
rotldi r9,r9,16
174
95: sth r9,0(r3)
175
addi r3,r3,2
176
2: bf cr7*4+3,3f
177
rotldi r9,r9,8
178
96: stb r9,0(r3)
179
3: li r3,0
180
blr
181
182
.Ldst_unaligned:
183
PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
184
subf r5,r6,r5
185
li r7,0
186
cmpldi cr1,r5,16
187
bf cr7*4+3,1f
188
35: lbz r0,0(r4)
189
81: stb r0,0(r3)
190
addi r7,r7,1
191
1: bf cr7*4+2,2f
192
36: lhzx r0,r7,r4
193
82: sthx r0,r7,r3
194
addi r7,r7,2
195
2: bf cr7*4+1,3f
196
37: lwzx r0,r7,r4
197
83: stwx r0,r7,r3
198
3: PPC_MTOCRF 0x01,r5
199
add r4,r6,r4
200
add r3,r6,r3
201
b .Ldst_aligned
202
203
.Lshort_copy:
204
bf cr7*4+0,1f
205
38: lwz r0,0(r4)
206
39: lwz r9,4(r4)
207
addi r4,r4,8
208
84: stw r0,0(r3)
209
85: stw r9,4(r3)
210
addi r3,r3,8
211
1: bf cr7*4+1,2f
212
40: lwz r0,0(r4)
213
addi r4,r4,4
214
86: stw r0,0(r3)
215
addi r3,r3,4
216
2: bf cr7*4+2,3f
217
41: lhz r0,0(r4)
218
addi r4,r4,2
219
87: sth r0,0(r3)
220
addi r3,r3,2
221
3: bf cr7*4+3,4f
222
42: lbz r0,0(r4)
223
88: stb r0,0(r3)
224
4: li r3,0
225
blr
226
227
/*
228
* exception handlers follow
229
* we have to return the number of bytes not copied
230
* for an exception on a load, we set the rest of the destination to 0
231
*/
232
233
136:
234
137:
235
add r3,r3,r7
236
b 1f
237
130:
238
131:
239
addi r3,r3,8
240
120:
241
320:
242
122:
243
322:
244
124:
245
125:
246
126:
247
127:
248
128:
249
129:
250
133:
251
addi r3,r3,8
252
132:
253
addi r3,r3,8
254
121:
255
321:
256
344:
257
134:
258
135:
259
138:
260
139:
261
140:
262
141:
263
142:
264
123:
265
144:
266
145:
267
268
/*
269
* here we have had a fault on a load and r3 points to the first
270
* unmodified byte of the destination
271
*/
272
1: ld r6,-24(r1)
273
ld r4,-16(r1)
274
ld r5,-8(r1)
275
subf r6,r6,r3
276
add r4,r4,r6
277
subf r5,r6,r5 /* #bytes left to go */
278
279
/*
280
* first see if we can copy any more bytes before hitting another exception
281
*/
282
mtctr r5
283
43: lbz r0,0(r4)
284
addi r4,r4,1
285
89: stb r0,0(r3)
286
addi r3,r3,1
287
bdnz 43b
288
li r3,0 /* huh? all copied successfully this time? */
289
blr
290
291
/*
292
* here we have trapped again, need to clear ctr bytes starting at r3
293
*/
294
143: mfctr r5
295
li r0,0
296
mr r4,r3
297
mr r3,r5 /* return the number of bytes not copied */
298
1: andi. r9,r4,7
299
beq 3f
300
90: stb r0,0(r4)
301
addic. r5,r5,-1
302
addi r4,r4,1
303
bne 1b
304
blr
305
3: cmpldi cr1,r5,8
306
srdi r9,r5,3
307
andi. r5,r5,7
308
blt cr1,93f
309
mtctr r9
310
91: std r0,0(r4)
311
addi r4,r4,8
312
bdnz 91b
313
93: beqlr
314
mtctr r5
315
92: stb r0,0(r4)
316
addi r4,r4,1
317
bdnz 92b
318
blr
319
320
/*
321
* exception handlers for stores: we just need to work
322
* out how many bytes weren't copied
323
*/
324
182:
325
183:
326
add r3,r3,r7
327
b 1f
328
371:
329
180:
330
addi r3,r3,8
331
171:
332
177:
333
addi r3,r3,8
334
370:
335
372:
336
176:
337
178:
338
addi r3,r3,4
339
185:
340
addi r3,r3,4
341
170:
342
172:
343
345:
344
173:
345
174:
346
175:
347
179:
348
181:
349
184:
350
186:
351
187:
352
188:
353
189:
354
194:
355
195:
356
196:
357
1:
358
ld r6,-24(r1)
359
ld r5,-8(r1)
360
add r6,r6,r5
361
subf r3,r3,r6 /* #bytes not copied */
362
190:
363
191:
364
192:
365
blr /* #bytes not copied in r3 */
366
367
.section __ex_table,"a"
368
.align 3
369
.llong 20b,120b
370
.llong 220b,320b
371
.llong 21b,121b
372
.llong 221b,321b
373
.llong 70b,170b
374
.llong 270b,370b
375
.llong 22b,122b
376
.llong 222b,322b
377
.llong 71b,171b
378
.llong 271b,371b
379
.llong 72b,172b
380
.llong 272b,372b
381
.llong 244b,344b
382
.llong 245b,345b
383
.llong 23b,123b
384
.llong 73b,173b
385
.llong 44b,144b
386
.llong 74b,174b
387
.llong 45b,145b
388
.llong 75b,175b
389
.llong 24b,124b
390
.llong 25b,125b
391
.llong 26b,126b
392
.llong 27b,127b
393
.llong 28b,128b
394
.llong 29b,129b
395
.llong 30b,130b
396
.llong 31b,131b
397
.llong 32b,132b
398
.llong 76b,176b
399
.llong 33b,133b
400
.llong 77b,177b
401
.llong 78b,178b
402
.llong 79b,179b
403
.llong 80b,180b
404
.llong 34b,134b
405
.llong 94b,194b
406
.llong 95b,195b
407
.llong 96b,196b
408
.llong 35b,135b
409
.llong 81b,181b
410
.llong 36b,136b
411
.llong 82b,182b
412
.llong 37b,137b
413
.llong 83b,183b
414
.llong 38b,138b
415
.llong 39b,139b
416
.llong 84b,184b
417
.llong 85b,185b
418
.llong 40b,140b
419
.llong 86b,186b
420
.llong 41b,141b
421
.llong 87b,187b
422
.llong 42b,142b
423
.llong 88b,188b
424
.llong 43b,143b
425
.llong 89b,189b
426
.llong 90b,190b
427
.llong 91b,191b
428
.llong 92b,192b
429
430
.text
431
432
/*
433
* Routine to copy a whole page of data, optimized for POWER4.
434
* On POWER4 it is more than 50% faster than the simple loop
435
* above (following the .Ldst_aligned label) but it runs slightly
436
* slower on POWER3.
437
*/
438
.Lcopy_page_4K:
439
std r31,-32(1)
440
std r30,-40(1)
441
std r29,-48(1)
442
std r28,-56(1)
443
std r27,-64(1)
444
std r26,-72(1)
445
std r25,-80(1)
446
std r24,-88(1)
447
std r23,-96(1)
448
std r22,-104(1)
449
std r21,-112(1)
450
std r20,-120(1)
451
li r5,4096/32 - 1
452
addi r3,r3,-8
453
li r0,5
454
0: addi r5,r5,-24
455
mtctr r0
456
20: ld r22,640(4)
457
21: ld r21,512(4)
458
22: ld r20,384(4)
459
23: ld r11,256(4)
460
24: ld r9,128(4)
461
25: ld r7,0(4)
462
26: ld r25,648(4)
463
27: ld r24,520(4)
464
28: ld r23,392(4)
465
29: ld r10,264(4)
466
30: ld r8,136(4)
467
31: ldu r6,8(4)
468
cmpwi r5,24
469
1:
470
32: std r22,648(3)
471
33: std r21,520(3)
472
34: std r20,392(3)
473
35: std r11,264(3)
474
36: std r9,136(3)
475
37: std r7,8(3)
476
38: ld r28,648(4)
477
39: ld r27,520(4)
478
40: ld r26,392(4)
479
41: ld r31,264(4)
480
42: ld r30,136(4)
481
43: ld r29,8(4)
482
44: std r25,656(3)
483
45: std r24,528(3)
484
46: std r23,400(3)
485
47: std r10,272(3)
486
48: std r8,144(3)
487
49: std r6,16(3)
488
50: ld r22,656(4)
489
51: ld r21,528(4)
490
52: ld r20,400(4)
491
53: ld r11,272(4)
492
54: ld r9,144(4)
493
55: ld r7,16(4)
494
56: std r28,664(3)
495
57: std r27,536(3)
496
58: std r26,408(3)
497
59: std r31,280(3)
498
60: std r30,152(3)
499
61: stdu r29,24(3)
500
62: ld r25,664(4)
501
63: ld r24,536(4)
502
64: ld r23,408(4)
503
65: ld r10,280(4)
504
66: ld r8,152(4)
505
67: ldu r6,24(4)
506
bdnz 1b
507
68: std r22,648(3)
508
69: std r21,520(3)
509
70: std r20,392(3)
510
71: std r11,264(3)
511
72: std r9,136(3)
512
73: std r7,8(3)
513
74: addi r4,r4,640
514
75: addi r3,r3,648
515
bge 0b
516
mtctr r5
517
76: ld r7,0(4)
518
77: ld r8,8(4)
519
78: ldu r9,16(4)
520
3:
521
79: ld r10,8(4)
522
80: std r7,8(3)
523
81: ld r7,16(4)
524
82: std r8,16(3)
525
83: ld r8,24(4)
526
84: std r9,24(3)
527
85: ldu r9,32(4)
528
86: stdu r10,32(3)
529
bdnz 3b
530
4:
531
87: ld r10,8(4)
532
88: std r7,8(3)
533
89: std r8,16(3)
534
90: std r9,24(3)
535
91: std r10,32(3)
536
9: ld r20,-120(1)
537
ld r21,-112(1)
538
ld r22,-104(1)
539
ld r23,-96(1)
540
ld r24,-88(1)
541
ld r25,-80(1)
542
ld r26,-72(1)
543
ld r27,-64(1)
544
ld r28,-56(1)
545
ld r29,-48(1)
546
ld r30,-40(1)
547
ld r31,-32(1)
548
li r3,0
549
blr
550
551
/*
552
* on an exception, reset to the beginning and jump back into the
553
* standard __copy_tofrom_user
554
*/
555
100: ld r20,-120(1)
556
ld r21,-112(1)
557
ld r22,-104(1)
558
ld r23,-96(1)
559
ld r24,-88(1)
560
ld r25,-80(1)
561
ld r26,-72(1)
562
ld r27,-64(1)
563
ld r28,-56(1)
564
ld r29,-48(1)
565
ld r30,-40(1)
566
ld r31,-32(1)
567
ld r3,-24(r1)
568
ld r4,-16(r1)
569
li r5,4096
570
b .Ldst_aligned
571
572
.section __ex_table,"a"
573
.align 3
574
.llong 20b,100b
575
.llong 21b,100b
576
.llong 22b,100b
577
.llong 23b,100b
578
.llong 24b,100b
579
.llong 25b,100b
580
.llong 26b,100b
581
.llong 27b,100b
582
.llong 28b,100b
583
.llong 29b,100b
584
.llong 30b,100b
585
.llong 31b,100b
586
.llong 32b,100b
587
.llong 33b,100b
588
.llong 34b,100b
589
.llong 35b,100b
590
.llong 36b,100b
591
.llong 37b,100b
592
.llong 38b,100b
593
.llong 39b,100b
594
.llong 40b,100b
595
.llong 41b,100b
596
.llong 42b,100b
597
.llong 43b,100b
598
.llong 44b,100b
599
.llong 45b,100b
600
.llong 46b,100b
601
.llong 47b,100b
602
.llong 48b,100b
603
.llong 49b,100b
604
.llong 50b,100b
605
.llong 51b,100b
606
.llong 52b,100b
607
.llong 53b,100b
608
.llong 54b,100b
609
.llong 55b,100b
610
.llong 56b,100b
611
.llong 57b,100b
612
.llong 58b,100b
613
.llong 59b,100b
614
.llong 60b,100b
615
.llong 61b,100b
616
.llong 62b,100b
617
.llong 63b,100b
618
.llong 64b,100b
619
.llong 65b,100b
620
.llong 66b,100b
621
.llong 67b,100b
622
.llong 68b,100b
623
.llong 69b,100b
624
.llong 70b,100b
625
.llong 71b,100b
626
.llong 72b,100b
627
.llong 73b,100b
628
.llong 74b,100b
629
.llong 75b,100b
630
.llong 76b,100b
631
.llong 77b,100b
632
.llong 78b,100b
633
.llong 79b,100b
634
.llong 80b,100b
635
.llong 81b,100b
636
.llong 82b,100b
637
.llong 83b,100b
638
.llong 84b,100b
639
.llong 85b,100b
640
.llong 86b,100b
641
.llong 87b,100b
642
.llong 88b,100b
643
.llong 89b,100b
644
.llong 90b,100b
645
.llong 91b,100b
646
647