Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/mm/book3s64/radix_tlb.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* TLB flush routines for radix kernels.
4
*
5
* Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6
*/
7
8
#include <linux/mm.h>
9
#include <linux/hugetlb.h>
10
#include <linux/memblock.h>
11
#include <linux/mmu_context.h>
12
#include <linux/sched/mm.h>
13
#include <linux/debugfs.h>
14
15
#include <asm/ppc-opcode.h>
16
#include <asm/tlb.h>
17
#include <asm/tlbflush.h>
18
#include <asm/trace.h>
19
#include <asm/cputhreads.h>
20
#include <asm/plpar_wrappers.h>
21
22
#include "internal.h"
23
24
/*
25
* tlbiel instruction for radix, set invalidation
26
* i.e., r=1 and is=01 or is=10 or is=11
27
*/
28
static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
29
unsigned int pid,
30
unsigned int ric, unsigned int prs)
31
{
32
unsigned long rb;
33
unsigned long rs;
34
35
rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
36
rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
37
38
asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
39
: : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
40
: "memory");
41
}
42
43
static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
44
{
45
unsigned int set;
46
47
asm volatile("ptesync": : :"memory");
48
49
/*
50
* Flush the first set of the TLB, and the entire Page Walk Cache
51
* and partition table entries. Then flush the remaining sets of the
52
* TLB.
53
*/
54
55
if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
56
/* MSR[HV] should flush partition scope translations first. */
57
tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
58
59
if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
60
for (set = 1; set < num_sets; set++)
61
tlbiel_radix_set_isa300(set, is, 0,
62
RIC_FLUSH_TLB, 0);
63
}
64
}
65
66
/* Flush process scoped entries. */
67
tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
68
69
if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
70
for (set = 1; set < num_sets; set++)
71
tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
72
}
73
74
ppc_after_tlbiel_barrier();
75
}
76
77
void radix__tlbiel_all(unsigned int action)
78
{
79
unsigned int is;
80
81
switch (action) {
82
case TLB_INVAL_SCOPE_GLOBAL:
83
is = 3;
84
break;
85
case TLB_INVAL_SCOPE_LPID:
86
is = 2;
87
break;
88
default:
89
BUG();
90
}
91
92
if (early_cpu_has_feature(CPU_FTR_ARCH_300))
93
tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
94
else
95
WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
96
97
asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
98
}
99
100
static __always_inline void __tlbiel_pid(unsigned long pid, int set,
101
unsigned long ric)
102
{
103
unsigned long rb,rs,prs,r;
104
105
rb = PPC_BIT(53); /* IS = 1 */
106
rb |= set << PPC_BITLSHIFT(51);
107
rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
108
prs = 1; /* process scoped */
109
r = 1; /* radix format */
110
111
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
112
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
113
trace_tlbie(0, 1, rb, rs, ric, prs, r);
114
}
115
116
static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
117
{
118
unsigned long rb,rs,prs,r;
119
120
rb = PPC_BIT(53); /* IS = 1 */
121
rs = pid << PPC_BITLSHIFT(31);
122
prs = 1; /* process scoped */
123
r = 1; /* radix format */
124
125
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
126
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
127
trace_tlbie(0, 0, rb, rs, ric, prs, r);
128
}
129
130
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
131
{
132
unsigned long rb,rs,prs,r;
133
134
rb = PPC_BIT(52); /* IS = 2 */
135
rs = lpid;
136
prs = 0; /* partition scoped */
137
r = 1; /* radix format */
138
139
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
140
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
141
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
142
}
143
144
static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
145
{
146
unsigned long rb,rs,prs,r;
147
148
rb = PPC_BIT(52); /* IS = 2 */
149
rs = lpid;
150
prs = 1; /* process scoped */
151
r = 1; /* radix format */
152
153
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
154
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
155
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
156
}
157
158
static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
159
unsigned long ap, unsigned long ric)
160
{
161
unsigned long rb,rs,prs,r;
162
163
rb = va & ~(PPC_BITMASK(52, 63));
164
rb |= ap << PPC_BITLSHIFT(58);
165
rs = pid << PPC_BITLSHIFT(31);
166
prs = 1; /* process scoped */
167
r = 1; /* radix format */
168
169
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
170
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
171
trace_tlbie(0, 1, rb, rs, ric, prs, r);
172
}
173
174
static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
175
unsigned long ap, unsigned long ric)
176
{
177
unsigned long rb,rs,prs,r;
178
179
rb = va & ~(PPC_BITMASK(52, 63));
180
rb |= ap << PPC_BITLSHIFT(58);
181
rs = pid << PPC_BITLSHIFT(31);
182
prs = 1; /* process scoped */
183
r = 1; /* radix format */
184
185
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
186
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
187
trace_tlbie(0, 0, rb, rs, ric, prs, r);
188
}
189
190
static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
191
unsigned long ap, unsigned long ric)
192
{
193
unsigned long rb,rs,prs,r;
194
195
rb = va & ~(PPC_BITMASK(52, 63));
196
rb |= ap << PPC_BITLSHIFT(58);
197
rs = lpid;
198
prs = 0; /* partition scoped */
199
r = 1; /* radix format */
200
201
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
202
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
203
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
204
}
205
206
207
static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
208
unsigned long ap)
209
{
210
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
211
asm volatile("ptesync": : :"memory");
212
__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
213
}
214
215
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
216
asm volatile("ptesync": : :"memory");
217
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
218
}
219
}
220
221
static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
222
unsigned long ap)
223
{
224
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
225
asm volatile("ptesync": : :"memory");
226
__tlbie_pid(0, RIC_FLUSH_TLB);
227
}
228
229
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
230
asm volatile("ptesync": : :"memory");
231
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
232
}
233
}
234
235
static inline void fixup_tlbie_pid(unsigned long pid)
236
{
237
/*
238
* We can use any address for the invalidation, pick one which is
239
* probably unused as an optimisation.
240
*/
241
unsigned long va = ((1UL << 52) - 1);
242
243
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
244
asm volatile("ptesync": : :"memory");
245
__tlbie_pid(0, RIC_FLUSH_TLB);
246
}
247
248
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
249
asm volatile("ptesync": : :"memory");
250
__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
251
}
252
}
253
254
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
255
unsigned long ap)
256
{
257
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
258
asm volatile("ptesync": : :"memory");
259
__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
260
}
261
262
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
263
asm volatile("ptesync": : :"memory");
264
__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
265
}
266
}
267
268
static inline void fixup_tlbie_lpid(unsigned long lpid)
269
{
270
/*
271
* We can use any address for the invalidation, pick one which is
272
* probably unused as an optimisation.
273
*/
274
unsigned long va = ((1UL << 52) - 1);
275
276
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
277
asm volatile("ptesync": : :"memory");
278
__tlbie_lpid(0, RIC_FLUSH_TLB);
279
}
280
281
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
282
asm volatile("ptesync": : :"memory");
283
__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
284
}
285
}
286
287
/*
288
* We use 128 set in radix mode and 256 set in hpt mode.
289
*/
290
static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
291
{
292
int set;
293
294
asm volatile("ptesync": : :"memory");
295
296
switch (ric) {
297
case RIC_FLUSH_PWC:
298
299
/* For PWC, only one flush is needed */
300
__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
301
ppc_after_tlbiel_barrier();
302
return;
303
case RIC_FLUSH_TLB:
304
__tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
305
break;
306
case RIC_FLUSH_ALL:
307
default:
308
/*
309
* Flush the first set of the TLB, and if
310
* we're doing a RIC_FLUSH_ALL, also flush
311
* the entire Page Walk Cache.
312
*/
313
__tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
314
}
315
316
if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
317
/* For the remaining sets, just flush the TLB */
318
for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
319
__tlbiel_pid(pid, set, RIC_FLUSH_TLB);
320
}
321
322
ppc_after_tlbiel_barrier();
323
asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
324
}
325
326
static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
327
{
328
asm volatile("ptesync": : :"memory");
329
330
/*
331
* Workaround the fact that the "ric" argument to __tlbie_pid
332
* must be a compile-time constraint to match the "i" constraint
333
* in the asm statement.
334
*/
335
switch (ric) {
336
case RIC_FLUSH_TLB:
337
__tlbie_pid(pid, RIC_FLUSH_TLB);
338
fixup_tlbie_pid(pid);
339
break;
340
case RIC_FLUSH_PWC:
341
__tlbie_pid(pid, RIC_FLUSH_PWC);
342
break;
343
case RIC_FLUSH_ALL:
344
default:
345
__tlbie_pid(pid, RIC_FLUSH_ALL);
346
fixup_tlbie_pid(pid);
347
}
348
asm volatile("eieio; tlbsync; ptesync": : :"memory");
349
}
350
351
struct tlbiel_pid {
352
unsigned long pid;
353
unsigned long ric;
354
};
355
356
static void do_tlbiel_pid(void *info)
357
{
358
struct tlbiel_pid *t = info;
359
360
if (t->ric == RIC_FLUSH_TLB)
361
_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
362
else if (t->ric == RIC_FLUSH_PWC)
363
_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
364
else
365
_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
366
}
367
368
static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
369
unsigned long pid, unsigned long ric)
370
{
371
struct cpumask *cpus = mm_cpumask(mm);
372
struct tlbiel_pid t = { .pid = pid, .ric = ric };
373
374
on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
375
/*
376
* Always want the CPU translations to be invalidated with tlbiel in
377
* these paths, so while coprocessors must use tlbie, we can not
378
* optimise away the tlbiel component.
379
*/
380
if (atomic_read(&mm->context.copros) > 0)
381
_tlbie_pid(pid, RIC_FLUSH_ALL);
382
}
383
384
static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
385
{
386
asm volatile("ptesync": : :"memory");
387
388
/*
389
* Workaround the fact that the "ric" argument to __tlbie_pid
390
* must be a compile-time contraint to match the "i" constraint
391
* in the asm statement.
392
*/
393
switch (ric) {
394
case RIC_FLUSH_TLB:
395
__tlbie_lpid(lpid, RIC_FLUSH_TLB);
396
fixup_tlbie_lpid(lpid);
397
break;
398
case RIC_FLUSH_PWC:
399
__tlbie_lpid(lpid, RIC_FLUSH_PWC);
400
break;
401
case RIC_FLUSH_ALL:
402
default:
403
__tlbie_lpid(lpid, RIC_FLUSH_ALL);
404
fixup_tlbie_lpid(lpid);
405
}
406
asm volatile("eieio; tlbsync; ptesync": : :"memory");
407
}
408
409
static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
410
{
411
/*
412
* Workaround the fact that the "ric" argument to __tlbie_pid
413
* must be a compile-time contraint to match the "i" constraint
414
* in the asm statement.
415
*/
416
switch (ric) {
417
case RIC_FLUSH_TLB:
418
__tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
419
break;
420
case RIC_FLUSH_PWC:
421
__tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
422
break;
423
case RIC_FLUSH_ALL:
424
default:
425
__tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
426
}
427
fixup_tlbie_lpid(lpid);
428
asm volatile("eieio; tlbsync; ptesync": : :"memory");
429
}
430
431
static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
432
unsigned long pid, unsigned long page_size,
433
unsigned long psize)
434
{
435
unsigned long addr;
436
unsigned long ap = mmu_get_ap(psize);
437
438
for (addr = start; addr < end; addr += page_size)
439
__tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
440
}
441
442
static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
443
unsigned long psize, unsigned long ric)
444
{
445
unsigned long ap = mmu_get_ap(psize);
446
447
asm volatile("ptesync": : :"memory");
448
__tlbiel_va(va, pid, ap, ric);
449
ppc_after_tlbiel_barrier();
450
}
451
452
static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
453
unsigned long pid, unsigned long page_size,
454
unsigned long psize, bool also_pwc)
455
{
456
asm volatile("ptesync": : :"memory");
457
if (also_pwc)
458
__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
459
__tlbiel_va_range(start, end, pid, page_size, psize);
460
ppc_after_tlbiel_barrier();
461
}
462
463
static inline void __tlbie_va_range(unsigned long start, unsigned long end,
464
unsigned long pid, unsigned long page_size,
465
unsigned long psize)
466
{
467
unsigned long addr;
468
unsigned long ap = mmu_get_ap(psize);
469
470
for (addr = start; addr < end; addr += page_size)
471
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
472
473
fixup_tlbie_va_range(addr - page_size, pid, ap);
474
}
475
476
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
477
unsigned long psize, unsigned long ric)
478
{
479
unsigned long ap = mmu_get_ap(psize);
480
481
asm volatile("ptesync": : :"memory");
482
__tlbie_va(va, pid, ap, ric);
483
fixup_tlbie_va(va, pid, ap);
484
asm volatile("eieio; tlbsync; ptesync": : :"memory");
485
}
486
487
struct tlbiel_va {
488
unsigned long pid;
489
unsigned long va;
490
unsigned long psize;
491
unsigned long ric;
492
};
493
494
static void do_tlbiel_va(void *info)
495
{
496
struct tlbiel_va *t = info;
497
498
if (t->ric == RIC_FLUSH_TLB)
499
_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
500
else if (t->ric == RIC_FLUSH_PWC)
501
_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
502
else
503
_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
504
}
505
506
static inline void _tlbiel_va_multicast(struct mm_struct *mm,
507
unsigned long va, unsigned long pid,
508
unsigned long psize, unsigned long ric)
509
{
510
struct cpumask *cpus = mm_cpumask(mm);
511
struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
512
on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
513
if (atomic_read(&mm->context.copros) > 0)
514
_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
515
}
516
517
struct tlbiel_va_range {
518
unsigned long pid;
519
unsigned long start;
520
unsigned long end;
521
unsigned long page_size;
522
unsigned long psize;
523
bool also_pwc;
524
};
525
526
static void do_tlbiel_va_range(void *info)
527
{
528
struct tlbiel_va_range *t = info;
529
530
_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
531
t->psize, t->also_pwc);
532
}
533
534
static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
535
unsigned long psize, unsigned long ric)
536
{
537
unsigned long ap = mmu_get_ap(psize);
538
539
asm volatile("ptesync": : :"memory");
540
__tlbie_lpid_va(va, lpid, ap, ric);
541
fixup_tlbie_lpid_va(va, lpid, ap);
542
asm volatile("eieio; tlbsync; ptesync": : :"memory");
543
}
544
545
static inline void _tlbie_va_range(unsigned long start, unsigned long end,
546
unsigned long pid, unsigned long page_size,
547
unsigned long psize, bool also_pwc)
548
{
549
asm volatile("ptesync": : :"memory");
550
if (also_pwc)
551
__tlbie_pid(pid, RIC_FLUSH_PWC);
552
__tlbie_va_range(start, end, pid, page_size, psize);
553
asm volatile("eieio; tlbsync; ptesync": : :"memory");
554
}
555
556
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
557
unsigned long start, unsigned long end,
558
unsigned long pid, unsigned long page_size,
559
unsigned long psize, bool also_pwc)
560
{
561
struct cpumask *cpus = mm_cpumask(mm);
562
struct tlbiel_va_range t = { .start = start, .end = end,
563
.pid = pid, .page_size = page_size,
564
.psize = psize, .also_pwc = also_pwc };
565
566
on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
567
if (atomic_read(&mm->context.copros) > 0)
568
_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
569
}
570
571
/*
572
* Base TLB flushing operations:
573
*
574
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
575
* - flush_tlb_page(vma, vmaddr) flushes one page
576
* - flush_tlb_range(vma, start, end) flushes a range of pages
577
* - flush_tlb_kernel_range(start, end) flushes kernel pages
578
*
579
* - local_* variants of page and mm only apply to the current
580
* processor
581
*/
582
void radix__local_flush_tlb_mm(struct mm_struct *mm)
583
{
584
unsigned long pid = mm->context.id;
585
586
if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
587
return;
588
589
preempt_disable();
590
_tlbiel_pid(pid, RIC_FLUSH_TLB);
591
preempt_enable();
592
}
593
EXPORT_SYMBOL(radix__local_flush_tlb_mm);
594
595
#ifndef CONFIG_SMP
596
void radix__local_flush_all_mm(struct mm_struct *mm)
597
{
598
unsigned long pid = mm->context.id;
599
600
if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
601
return;
602
603
preempt_disable();
604
_tlbiel_pid(pid, RIC_FLUSH_ALL);
605
preempt_enable();
606
}
607
EXPORT_SYMBOL(radix__local_flush_all_mm);
608
609
static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
610
{
611
radix__local_flush_all_mm(mm);
612
}
613
#endif /* CONFIG_SMP */
614
615
void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
616
int psize)
617
{
618
unsigned long pid = mm->context.id;
619
620
if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
621
return;
622
623
preempt_disable();
624
_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
625
preempt_enable();
626
}
627
628
void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
629
{
630
#ifdef CONFIG_HUGETLB_PAGE
631
/* need the return fix for nohash.c */
632
if (is_vm_hugetlb_page(vma))
633
return radix__local_flush_hugetlb_page(vma, vmaddr);
634
#endif
635
radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
636
}
637
EXPORT_SYMBOL(radix__local_flush_tlb_page);
638
639
static bool mm_needs_flush_escalation(struct mm_struct *mm)
640
{
641
/*
642
* The P9 nest MMU has issues with the page walk cache caching PTEs
643
* and not flushing them when RIC = 0 for a PID/LPID invalidate.
644
*
645
* This may have been fixed in shipping firmware (by disabling PWC
646
* or preventing it from caching PTEs), but until that is confirmed,
647
* this workaround is required - escalate all RIC=0 IS=1/2/3 flushes
648
* to RIC=2.
649
*
650
* POWER10 (and P9P) does not have this problem.
651
*/
652
if (cpu_has_feature(CPU_FTR_ARCH_31))
653
return false;
654
if (atomic_read(&mm->context.copros) > 0)
655
return true;
656
return false;
657
}
658
659
/*
660
* If always_flush is true, then flush even if this CPU can't be removed
661
* from mm_cpumask.
662
*/
663
void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
664
{
665
unsigned long pid = mm->context.id;
666
int cpu = smp_processor_id();
667
668
/*
669
* A kthread could have done a mmget_not_zero() after the flushing CPU
670
* checked mm_cpumask, and be in the process of kthread_use_mm when
671
* interrupted here. In that case, current->mm will be set to mm,
672
* because kthread_use_mm() setting ->mm and switching to the mm is
673
* done with interrupts off.
674
*/
675
if (current->mm == mm)
676
goto out;
677
678
if (current->active_mm == mm) {
679
unsigned long flags;
680
681
WARN_ON_ONCE(current->mm != NULL);
682
/*
683
* It is a kernel thread and is using mm as the lazy tlb, so
684
* switch it to init_mm. This is not always called from IPI
685
* (e.g., flush_type_needed), so must disable irqs.
686
*/
687
local_irq_save(flags);
688
mmgrab_lazy_tlb(&init_mm);
689
current->active_mm = &init_mm;
690
switch_mm_irqs_off(mm, &init_mm, current);
691
mmdrop_lazy_tlb(mm);
692
local_irq_restore(flags);
693
}
694
695
/*
696
* This IPI may be initiated from any source including those not
697
* running the mm, so there may be a racing IPI that comes after
698
* this one which finds the cpumask already clear. Check and avoid
699
* underflowing the active_cpus count in that case. The race should
700
* not otherwise be a problem, but the TLB must be flushed because
701
* that's what the caller expects.
702
*/
703
if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
704
dec_mm_active_cpus(mm);
705
cpumask_clear_cpu(cpu, mm_cpumask(mm));
706
always_flush = true;
707
}
708
709
out:
710
if (always_flush)
711
_tlbiel_pid(pid, RIC_FLUSH_ALL);
712
}
713
714
#ifdef CONFIG_SMP
715
static void do_exit_flush_lazy_tlb(void *arg)
716
{
717
struct mm_struct *mm = arg;
718
exit_lazy_flush_tlb(mm, true);
719
}
720
721
static void exit_flush_lazy_tlbs(struct mm_struct *mm)
722
{
723
/*
724
* Would be nice if this was async so it could be run in
725
* parallel with our local flush, but generic code does not
726
* give a good API for it. Could extend the generic code or
727
* make a special powerpc IPI for flushing TLBs.
728
* For now it's not too performance critical.
729
*/
730
smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
731
(void *)mm, 1);
732
}
733
734
#else /* CONFIG_SMP */
735
static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
736
#endif /* CONFIG_SMP */
737
738
static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
739
740
/*
741
* Interval between flushes at which we send out IPIs to check whether the
742
* mm_cpumask can be trimmed for the case where it's not a single-threaded
743
* process flushing its own mm. The intent is to reduce the cost of later
744
* flushes. Don't want this to be so low that it adds noticable cost to TLB
745
* flushing, or so high that it doesn't help reduce global TLBIEs.
746
*/
747
static unsigned long tlb_mm_cpumask_trim_timer = 1073;
748
749
static bool tick_and_test_trim_clock(void)
750
{
751
if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
752
tlb_mm_cpumask_trim_timer) {
753
__this_cpu_write(mm_cpumask_trim_clock, 0);
754
return true;
755
}
756
return false;
757
}
758
759
enum tlb_flush_type {
760
FLUSH_TYPE_NONE,
761
FLUSH_TYPE_LOCAL,
762
FLUSH_TYPE_GLOBAL,
763
};
764
765
static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
766
{
767
int active_cpus = atomic_read(&mm->context.active_cpus);
768
int cpu = smp_processor_id();
769
770
if (active_cpus == 0)
771
return FLUSH_TYPE_NONE;
772
if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
773
if (current->mm != mm) {
774
/*
775
* Asynchronous flush sources may trim down to nothing
776
* if the process is not running, so occasionally try
777
* to trim.
778
*/
779
if (tick_and_test_trim_clock()) {
780
exit_lazy_flush_tlb(mm, true);
781
return FLUSH_TYPE_NONE;
782
}
783
}
784
return FLUSH_TYPE_LOCAL;
785
}
786
787
/* Coprocessors require TLBIE to invalidate nMMU. */
788
if (atomic_read(&mm->context.copros) > 0)
789
return FLUSH_TYPE_GLOBAL;
790
791
/*
792
* In the fullmm case there's no point doing the exit_flush_lazy_tlbs
793
* because the mm is being taken down anyway, and a TLBIE tends to
794
* be faster than an IPI+TLBIEL.
795
*/
796
if (fullmm)
797
return FLUSH_TYPE_GLOBAL;
798
799
/*
800
* If we are running the only thread of a single-threaded process,
801
* then we should almost always be able to trim off the rest of the
802
* CPU mask (except in the case of use_mm() races), so always try
803
* trimming the mask.
804
*/
805
if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
806
exit_flush_lazy_tlbs(mm);
807
/*
808
* use_mm() race could prevent IPIs from being able to clear
809
* the cpumask here, however those users are established
810
* after our first check (and so after the PTEs are removed),
811
* and the TLB still gets flushed by the IPI, so this CPU
812
* will only require a local flush.
813
*/
814
return FLUSH_TYPE_LOCAL;
815
}
816
817
/*
818
* Occasionally try to trim down the cpumask. It's possible this can
819
* bring the mask to zero, which results in no flush.
820
*/
821
if (tick_and_test_trim_clock()) {
822
exit_flush_lazy_tlbs(mm);
823
if (current->mm == mm)
824
return FLUSH_TYPE_LOCAL;
825
if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
826
exit_lazy_flush_tlb(mm, true);
827
return FLUSH_TYPE_NONE;
828
}
829
830
return FLUSH_TYPE_GLOBAL;
831
}
832
833
#ifdef CONFIG_SMP
834
void radix__flush_tlb_mm(struct mm_struct *mm)
835
{
836
unsigned long pid;
837
enum tlb_flush_type type;
838
839
pid = mm->context.id;
840
if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
841
return;
842
843
preempt_disable();
844
/*
845
* Order loads of mm_cpumask (in flush_type_needed) vs previous
846
* stores to clear ptes before the invalidate. See barrier in
847
* switch_mm_irqs_off
848
*/
849
smp_mb();
850
type = flush_type_needed(mm, false);
851
if (type == FLUSH_TYPE_LOCAL) {
852
_tlbiel_pid(pid, RIC_FLUSH_TLB);
853
} else if (type == FLUSH_TYPE_GLOBAL) {
854
if (!mmu_has_feature(MMU_FTR_GTSE)) {
855
unsigned long tgt = H_RPTI_TARGET_CMMU;
856
857
if (atomic_read(&mm->context.copros) > 0)
858
tgt |= H_RPTI_TARGET_NMMU;
859
pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
860
H_RPTI_PAGE_ALL, 0, -1UL);
861
} else if (cputlb_use_tlbie()) {
862
if (mm_needs_flush_escalation(mm))
863
_tlbie_pid(pid, RIC_FLUSH_ALL);
864
else
865
_tlbie_pid(pid, RIC_FLUSH_TLB);
866
} else {
867
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
868
}
869
}
870
preempt_enable();
871
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
872
}
873
EXPORT_SYMBOL(radix__flush_tlb_mm);
874
875
static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
876
{
877
unsigned long pid;
878
enum tlb_flush_type type;
879
880
pid = mm->context.id;
881
if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
882
return;
883
884
preempt_disable();
885
smp_mb(); /* see radix__flush_tlb_mm */
886
type = flush_type_needed(mm, fullmm);
887
if (type == FLUSH_TYPE_LOCAL) {
888
_tlbiel_pid(pid, RIC_FLUSH_ALL);
889
} else if (type == FLUSH_TYPE_GLOBAL) {
890
if (!mmu_has_feature(MMU_FTR_GTSE)) {
891
unsigned long tgt = H_RPTI_TARGET_CMMU;
892
unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
893
H_RPTI_TYPE_PRT;
894
895
if (atomic_read(&mm->context.copros) > 0)
896
tgt |= H_RPTI_TARGET_NMMU;
897
pseries_rpt_invalidate(pid, tgt, type,
898
H_RPTI_PAGE_ALL, 0, -1UL);
899
} else if (cputlb_use_tlbie())
900
_tlbie_pid(pid, RIC_FLUSH_ALL);
901
else
902
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
903
}
904
preempt_enable();
905
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
906
}
907
908
void radix__flush_all_mm(struct mm_struct *mm)
909
{
910
__flush_all_mm(mm, false);
911
}
912
EXPORT_SYMBOL(radix__flush_all_mm);
913
914
void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
915
int psize)
916
{
917
unsigned long pid;
918
enum tlb_flush_type type;
919
920
pid = mm->context.id;
921
if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
922
return;
923
924
preempt_disable();
925
smp_mb(); /* see radix__flush_tlb_mm */
926
type = flush_type_needed(mm, false);
927
if (type == FLUSH_TYPE_LOCAL) {
928
_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
929
} else if (type == FLUSH_TYPE_GLOBAL) {
930
if (!mmu_has_feature(MMU_FTR_GTSE)) {
931
unsigned long tgt, pg_sizes, size;
932
933
tgt = H_RPTI_TARGET_CMMU;
934
pg_sizes = psize_to_rpti_pgsize(psize);
935
size = 1UL << mmu_psize_to_shift(psize);
936
937
if (atomic_read(&mm->context.copros) > 0)
938
tgt |= H_RPTI_TARGET_NMMU;
939
pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
940
pg_sizes, vmaddr,
941
vmaddr + size);
942
} else if (cputlb_use_tlbie())
943
_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
944
else
945
_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
946
}
947
preempt_enable();
948
}
949
950
void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
951
{
952
#ifdef CONFIG_HUGETLB_PAGE
953
if (is_vm_hugetlb_page(vma))
954
return radix__flush_hugetlb_page(vma, vmaddr);
955
#endif
956
radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
957
}
958
EXPORT_SYMBOL(radix__flush_tlb_page);
959
960
#endif /* CONFIG_SMP */
961
962
static void do_tlbiel_kernel(void *info)
963
{
964
_tlbiel_pid(0, RIC_FLUSH_ALL);
965
}
966
967
static inline void _tlbiel_kernel_broadcast(void)
968
{
969
on_each_cpu(do_tlbiel_kernel, NULL, 1);
970
if (tlbie_capable) {
971
/*
972
* Coherent accelerators don't refcount kernel memory mappings,
973
* so have to always issue a tlbie for them. This is quite a
974
* slow path anyway.
975
*/
976
_tlbie_pid(0, RIC_FLUSH_ALL);
977
}
978
}
979
980
/*
981
* If kernel TLBIs ever become local rather than global, then
982
* drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
983
* assumes kernel TLBIs are global.
984
*/
985
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
986
{
987
if (!mmu_has_feature(MMU_FTR_GTSE)) {
988
unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
989
unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
990
H_RPTI_TYPE_PRT;
991
992
pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
993
start, end);
994
} else if (cputlb_use_tlbie())
995
_tlbie_pid(0, RIC_FLUSH_ALL);
996
else
997
_tlbiel_kernel_broadcast();
998
}
999
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
1000
1001
/*
1002
* Doesn't appear to be used anywhere. Remove.
1003
*/
1004
#define TLB_FLUSH_ALL -1UL
1005
1006
/*
1007
* Number of pages above which we invalidate the entire PID rather than
1008
* flush individual pages, for local and global flushes respectively.
1009
*
1010
* tlbie goes out to the interconnect and individual ops are more costly.
1011
* It also does not iterate over sets like the local tlbiel variant when
1012
* invalidating a full PID, so it has a far lower threshold to change from
1013
* individual page flushes to full-pid flushes.
1014
*/
1015
static u32 tlb_single_page_flush_ceiling __read_mostly = 33;
1016
static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
1017
1018
static inline void __radix__flush_tlb_range(struct mm_struct *mm,
1019
unsigned long start, unsigned long end)
1020
{
1021
unsigned long pid;
1022
unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
1023
unsigned long page_size = 1UL << page_shift;
1024
unsigned long nr_pages = (end - start) >> page_shift;
1025
bool flush_pid, flush_pwc = false;
1026
enum tlb_flush_type type;
1027
1028
pid = mm->context.id;
1029
if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
1030
return;
1031
1032
WARN_ON_ONCE(end == TLB_FLUSH_ALL);
1033
1034
preempt_disable();
1035
smp_mb(); /* see radix__flush_tlb_mm */
1036
type = flush_type_needed(mm, false);
1037
if (type == FLUSH_TYPE_NONE)
1038
goto out;
1039
1040
if (type == FLUSH_TYPE_GLOBAL)
1041
flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1042
else
1043
flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1044
/*
1045
* full pid flush already does the PWC flush. if it is not full pid
1046
* flush check the range is more than PMD and force a pwc flush
1047
* mremap() depends on this behaviour.
1048
*/
1049
if (!flush_pid && (end - start) >= PMD_SIZE)
1050
flush_pwc = true;
1051
1052
if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1053
unsigned long type = H_RPTI_TYPE_TLB;
1054
unsigned long tgt = H_RPTI_TARGET_CMMU;
1055
unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1056
1057
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1058
pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
1059
if (atomic_read(&mm->context.copros) > 0)
1060
tgt |= H_RPTI_TARGET_NMMU;
1061
if (flush_pwc)
1062
type |= H_RPTI_TYPE_PWC;
1063
pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1064
} else if (flush_pid) {
1065
/*
1066
* We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
1067
*/
1068
if (type == FLUSH_TYPE_LOCAL) {
1069
_tlbiel_pid(pid, RIC_FLUSH_ALL);
1070
} else {
1071
if (cputlb_use_tlbie()) {
1072
_tlbie_pid(pid, RIC_FLUSH_ALL);
1073
} else {
1074
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
1075
}
1076
}
1077
} else {
1078
bool hflush;
1079
unsigned long hstart, hend;
1080
1081
hstart = (start + PMD_SIZE - 1) & PMD_MASK;
1082
hend = end & PMD_MASK;
1083
hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend;
1084
1085
if (type == FLUSH_TYPE_LOCAL) {
1086
asm volatile("ptesync": : :"memory");
1087
if (flush_pwc)
1088
/* For PWC, only one flush is needed */
1089
__tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
1090
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
1091
if (hflush)
1092
__tlbiel_va_range(hstart, hend, pid,
1093
PMD_SIZE, MMU_PAGE_2M);
1094
ppc_after_tlbiel_barrier();
1095
} else if (cputlb_use_tlbie()) {
1096
asm volatile("ptesync": : :"memory");
1097
if (flush_pwc)
1098
__tlbie_pid(pid, RIC_FLUSH_PWC);
1099
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
1100
if (hflush)
1101
__tlbie_va_range(hstart, hend, pid,
1102
PMD_SIZE, MMU_PAGE_2M);
1103
asm volatile("eieio; tlbsync; ptesync": : :"memory");
1104
} else {
1105
_tlbiel_va_range_multicast(mm,
1106
start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
1107
if (hflush)
1108
_tlbiel_va_range_multicast(mm,
1109
hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
1110
}
1111
}
1112
out:
1113
preempt_enable();
1114
mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
1115
}
1116
1117
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
1118
unsigned long end)
1119
1120
{
1121
#ifdef CONFIG_HUGETLB_PAGE
1122
if (is_vm_hugetlb_page(vma))
1123
return radix__flush_hugetlb_tlb_range(vma, start, end);
1124
#endif
1125
1126
__radix__flush_tlb_range(vma->vm_mm, start, end);
1127
}
1128
EXPORT_SYMBOL(radix__flush_tlb_range);
1129
1130
static int radix_get_mmu_psize(int page_size)
1131
{
1132
int psize;
1133
1134
if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
1135
psize = mmu_virtual_psize;
1136
else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
1137
psize = MMU_PAGE_2M;
1138
else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
1139
psize = MMU_PAGE_1G;
1140
else
1141
return -1;
1142
return psize;
1143
}
1144
1145
/*
1146
* Flush partition scoped LPID address translation for all CPUs.
1147
*/
1148
void radix__flush_tlb_lpid_page(unsigned int lpid,
1149
unsigned long addr,
1150
unsigned long page_size)
1151
{
1152
int psize = radix_get_mmu_psize(page_size);
1153
1154
_tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1155
}
1156
EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1157
1158
/*
1159
* Flush partition scoped PWC from LPID for all CPUs.
1160
*/
1161
void radix__flush_pwc_lpid(unsigned int lpid)
1162
{
1163
_tlbie_lpid(lpid, RIC_FLUSH_PWC);
1164
}
1165
EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1166
1167
/*
1168
* Flush partition scoped translations from LPID (=LPIDR)
1169
*/
1170
void radix__flush_all_lpid(unsigned int lpid)
1171
{
1172
_tlbie_lpid(lpid, RIC_FLUSH_ALL);
1173
}
1174
EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1175
1176
/*
1177
* Flush process scoped translations from LPID (=LPIDR)
1178
*/
1179
void radix__flush_all_lpid_guest(unsigned int lpid)
1180
{
1181
_tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1182
}
1183
1184
void radix__tlb_flush(struct mmu_gather *tlb)
1185
{
1186
int psize = 0;
1187
struct mm_struct *mm = tlb->mm;
1188
int page_size = tlb->page_size;
1189
unsigned long start = tlb->start;
1190
unsigned long end = tlb->end;
1191
1192
/*
1193
* if page size is not something we understand, do a full mm flush
1194
*
1195
* A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1196
* that flushes the process table entry cache upon process teardown.
1197
* See the comment for radix in arch_exit_mmap().
1198
*/
1199
if (tlb->fullmm) {
1200
if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
1201
/*
1202
* Shootdown based lazy tlb mm refcounting means we
1203
* have to IPI everyone in the mm_cpumask anyway soon
1204
* when the mm goes away, so might as well do it as
1205
* part of the final flush now.
1206
*
1207
* If lazy shootdown was improved to reduce IPIs (e.g.,
1208
* by batching), then it may end up being better to use
1209
* tlbies here instead.
1210
*/
1211
preempt_disable();
1212
1213
smp_mb(); /* see radix__flush_tlb_mm */
1214
exit_flush_lazy_tlbs(mm);
1215
__flush_all_mm(mm, true);
1216
1217
preempt_enable();
1218
} else {
1219
__flush_all_mm(mm, true);
1220
}
1221
1222
} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1223
if (!tlb->freed_tables)
1224
radix__flush_tlb_mm(mm);
1225
else
1226
radix__flush_all_mm(mm);
1227
} else {
1228
if (!tlb->freed_tables)
1229
radix__flush_tlb_range_psize(mm, start, end, psize);
1230
else
1231
radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1232
}
1233
}
1234
1235
static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1236
unsigned long start, unsigned long end,
1237
int psize, bool also_pwc)
1238
{
1239
unsigned long pid;
1240
unsigned int page_shift = mmu_psize_defs[psize].shift;
1241
unsigned long page_size = 1UL << page_shift;
1242
unsigned long nr_pages = (end - start) >> page_shift;
1243
bool flush_pid;
1244
enum tlb_flush_type type;
1245
1246
pid = mm->context.id;
1247
if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
1248
return;
1249
1250
WARN_ON_ONCE(end == TLB_FLUSH_ALL);
1251
1252
preempt_disable();
1253
smp_mb(); /* see radix__flush_tlb_mm */
1254
type = flush_type_needed(mm, false);
1255
if (type == FLUSH_TYPE_NONE)
1256
goto out;
1257
1258
if (type == FLUSH_TYPE_GLOBAL)
1259
flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1260
else
1261
flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1262
1263
if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1264
unsigned long tgt = H_RPTI_TARGET_CMMU;
1265
unsigned long type = H_RPTI_TYPE_TLB;
1266
unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1267
1268
if (also_pwc)
1269
type |= H_RPTI_TYPE_PWC;
1270
if (atomic_read(&mm->context.copros) > 0)
1271
tgt |= H_RPTI_TARGET_NMMU;
1272
pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1273
} else if (flush_pid) {
1274
if (type == FLUSH_TYPE_LOCAL) {
1275
_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1276
} else {
1277
if (cputlb_use_tlbie()) {
1278
if (mm_needs_flush_escalation(mm))
1279
also_pwc = true;
1280
1281
_tlbie_pid(pid,
1282
also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1283
} else {
1284
_tlbiel_pid_multicast(mm, pid,
1285
also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1286
}
1287
1288
}
1289
} else {
1290
if (type == FLUSH_TYPE_LOCAL)
1291
_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1292
else if (cputlb_use_tlbie())
1293
_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1294
else
1295
_tlbiel_va_range_multicast(mm,
1296
start, end, pid, page_size, psize, also_pwc);
1297
}
1298
out:
1299
preempt_enable();
1300
mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
1301
}
1302
1303
void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1304
unsigned long end, int psize)
1305
{
1306
return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1307
}
1308
1309
void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1310
unsigned long end, int psize)
1311
{
1312
__radix__flush_tlb_range_psize(mm, start, end, psize, true);
1313
}
1314
1315
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1316
void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1317
{
1318
unsigned long pid, end;
1319
enum tlb_flush_type type;
1320
1321
pid = mm->context.id;
1322
if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
1323
return;
1324
1325
/* 4k page size, just blow the world */
1326
if (PAGE_SIZE == 0x1000) {
1327
radix__flush_all_mm(mm);
1328
return;
1329
}
1330
1331
end = addr + HPAGE_PMD_SIZE;
1332
1333
/* Otherwise first do the PWC, then iterate the pages. */
1334
preempt_disable();
1335
smp_mb(); /* see radix__flush_tlb_mm */
1336
type = flush_type_needed(mm, false);
1337
if (type == FLUSH_TYPE_LOCAL) {
1338
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1339
} else if (type == FLUSH_TYPE_GLOBAL) {
1340
if (!mmu_has_feature(MMU_FTR_GTSE)) {
1341
unsigned long tgt, type, pg_sizes;
1342
1343
tgt = H_RPTI_TARGET_CMMU;
1344
type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1345
H_RPTI_TYPE_PRT;
1346
pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1347
1348
if (atomic_read(&mm->context.copros) > 0)
1349
tgt |= H_RPTI_TARGET_NMMU;
1350
pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1351
addr, end);
1352
} else if (cputlb_use_tlbie())
1353
_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1354
else
1355
_tlbiel_va_range_multicast(mm,
1356
addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1357
}
1358
1359
preempt_enable();
1360
}
1361
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1362
1363
void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1364
unsigned long start, unsigned long end)
1365
{
1366
radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1367
}
1368
EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1369
1370
void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
1371
unsigned long start, unsigned long end)
1372
{
1373
radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_1G);
1374
}
1375
EXPORT_SYMBOL(radix__flush_pud_tlb_range);
1376
1377
void radix__flush_tlb_all(void)
1378
{
1379
unsigned long rb,prs,r,rs;
1380
unsigned long ric = RIC_FLUSH_ALL;
1381
1382
rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1383
prs = 0; /* partition scoped */
1384
r = 1; /* radix format */
1385
rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1386
1387
asm volatile("ptesync": : :"memory");
1388
/*
1389
* now flush guest entries by passing PRS = 1 and LPID != 0
1390
*/
1391
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1392
: : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1393
/*
1394
* now flush host entires by passing PRS = 0 and LPID == 0
1395
*/
1396
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1397
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1398
asm volatile("eieio; tlbsync; ptesync": : :"memory");
1399
}
1400
1401
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1402
static __always_inline void __tlbie_pid_lpid(unsigned long pid,
1403
unsigned long lpid,
1404
unsigned long ric)
1405
{
1406
unsigned long rb, rs, prs, r;
1407
1408
rb = PPC_BIT(53); /* IS = 1 */
1409
rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
1410
prs = 1; /* process scoped */
1411
r = 1; /* radix format */
1412
1413
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1414
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
1415
trace_tlbie(0, 0, rb, rs, ric, prs, r);
1416
}
1417
1418
static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
1419
unsigned long lpid,
1420
unsigned long ap, unsigned long ric)
1421
{
1422
unsigned long rb, rs, prs, r;
1423
1424
rb = va & ~(PPC_BITMASK(52, 63));
1425
rb |= ap << PPC_BITLSHIFT(58);
1426
rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
1427
prs = 1; /* process scoped */
1428
r = 1; /* radix format */
1429
1430
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1431
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
1432
trace_tlbie(0, 0, rb, rs, ric, prs, r);
1433
}
1434
1435
static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
1436
{
1437
/*
1438
* We can use any address for the invalidation, pick one which is
1439
* probably unused as an optimisation.
1440
*/
1441
unsigned long va = ((1UL << 52) - 1);
1442
1443
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
1444
asm volatile("ptesync" : : : "memory");
1445
__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
1446
}
1447
1448
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
1449
asm volatile("ptesync" : : : "memory");
1450
__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
1451
RIC_FLUSH_TLB);
1452
}
1453
}
1454
1455
static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
1456
unsigned long ric)
1457
{
1458
asm volatile("ptesync" : : : "memory");
1459
1460
/*
1461
* Workaround the fact that the "ric" argument to __tlbie_pid
1462
* must be a compile-time contraint to match the "i" constraint
1463
* in the asm statement.
1464
*/
1465
switch (ric) {
1466
case RIC_FLUSH_TLB:
1467
__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1468
fixup_tlbie_pid_lpid(pid, lpid);
1469
break;
1470
case RIC_FLUSH_PWC:
1471
__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
1472
break;
1473
case RIC_FLUSH_ALL:
1474
default:
1475
__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
1476
fixup_tlbie_pid_lpid(pid, lpid);
1477
}
1478
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
1479
}
1480
1481
static inline void fixup_tlbie_va_range_lpid(unsigned long va,
1482
unsigned long pid,
1483
unsigned long lpid,
1484
unsigned long ap)
1485
{
1486
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
1487
asm volatile("ptesync" : : : "memory");
1488
__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
1489
}
1490
1491
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
1492
asm volatile("ptesync" : : : "memory");
1493
__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
1494
}
1495
}
1496
1497
static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
1498
unsigned long pid, unsigned long lpid,
1499
unsigned long page_size,
1500
unsigned long psize)
1501
{
1502
unsigned long addr;
1503
unsigned long ap = mmu_get_ap(psize);
1504
1505
for (addr = start; addr < end; addr += page_size)
1506
__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
1507
1508
fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
1509
}
1510
1511
static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
1512
unsigned long pid, unsigned long lpid,
1513
unsigned long page_size,
1514
unsigned long psize, bool also_pwc)
1515
{
1516
asm volatile("ptesync" : : : "memory");
1517
if (also_pwc)
1518
__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
1519
__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
1520
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
1521
}
1522
1523
/*
1524
* Performs process-scoped invalidations for a given LPID
1525
* as part of H_RPT_INVALIDATE hcall.
1526
*/
1527
void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
1528
unsigned long type, unsigned long pg_sizes,
1529
unsigned long start, unsigned long end)
1530
{
1531
unsigned long psize, nr_pages;
1532
struct mmu_psize_def *def;
1533
bool flush_pid;
1534
1535
/*
1536
* A H_RPTI_TYPE_ALL request implies RIC=3, hence
1537
* do a single IS=1 based flush.
1538
*/
1539
if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
1540
_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
1541
return;
1542
}
1543
1544
if (type & H_RPTI_TYPE_PWC)
1545
_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
1546
1547
/* Full PID flush */
1548
if (start == 0 && end == -1)
1549
return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1550
1551
/* Do range invalidation for all the valid page sizes */
1552
for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
1553
def = &mmu_psize_defs[psize];
1554
if (!(pg_sizes & def->h_rpt_pgsize))
1555
continue;
1556
1557
nr_pages = (end - start) >> def->shift;
1558
flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1559
1560
/*
1561
* If the number of pages spanning the range is above
1562
* the ceiling, convert the request into a full PID flush.
1563
* And since PID flush takes out all the page sizes, there
1564
* is no need to consider remaining page sizes.
1565
*/
1566
if (flush_pid) {
1567
_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1568
return;
1569
}
1570
_tlbie_va_range_lpid(start, end, pid, lpid,
1571
(1UL << def->shift), psize, false);
1572
}
1573
}
1574
EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
1575
1576
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1577
1578
static int __init create_tlb_single_page_flush_ceiling(void)
1579
{
1580
debugfs_create_u32("tlb_single_page_flush_ceiling", 0600,
1581
arch_debugfs_dir, &tlb_single_page_flush_ceiling);
1582
debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600,
1583
arch_debugfs_dir, &tlb_local_single_page_flush_ceiling);
1584
return 0;
1585
}
1586
late_initcall(create_tlb_single_page_flush_ceiling);
1587
1588
1589