Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/kvm/book3s_hv_rm_mmu.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
*
4
* Copyright 2010-2011 Paul Mackerras, IBM Corp. <[email protected]>
5
*/
6
7
#include <linux/types.h>
8
#include <linux/string.h>
9
#include <linux/kvm.h>
10
#include <linux/kvm_host.h>
11
#include <linux/hugetlb.h>
12
#include <linux/module.h>
13
#include <linux/log2.h>
14
#include <linux/sizes.h>
15
16
#include <asm/trace.h>
17
#include <asm/kvm_ppc.h>
18
#include <asm/kvm_book3s.h>
19
#include <asm/book3s/64/mmu-hash.h>
20
#include <asm/hvcall.h>
21
#include <asm/synch.h>
22
#include <asm/ppc-opcode.h>
23
#include <asm/pte-walk.h>
24
25
/* Translate address of a vmalloc'd thing to a linear map address */
26
static void *real_vmalloc_addr(void *addr)
27
{
28
return __va(ppc_find_vmap_phys((unsigned long)addr));
29
}
30
31
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
32
static int global_invalidates(struct kvm *kvm)
33
{
34
int global;
35
int cpu;
36
37
/*
38
* If there is only one vcore, and it's currently running,
39
* as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
40
* we can use tlbiel as long as we mark all other physical
41
* cores as potentially having stale TLB entries for this lpid.
42
* Otherwise, don't use tlbiel.
43
*/
44
if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
45
global = 0;
46
else
47
global = 1;
48
49
/* LPID has been switched to host if in virt mode so can't do local */
50
if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
51
global = 1;
52
53
if (!global) {
54
/* any other core might now have stale TLB entries... */
55
smp_wmb();
56
cpumask_setall(&kvm->arch.need_tlb_flush);
57
cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
58
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
59
}
60
61
return global;
62
}
63
64
/*
65
* Add this HPTE into the chain for the real page.
66
* Must be called with the chain locked; it unlocks the chain.
67
*/
68
void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
69
unsigned long *rmap, long pte_index, int realmode)
70
{
71
struct revmap_entry *head, *tail;
72
unsigned long i;
73
74
if (*rmap & KVMPPC_RMAP_PRESENT) {
75
i = *rmap & KVMPPC_RMAP_INDEX;
76
head = &kvm->arch.hpt.rev[i];
77
if (realmode)
78
head = real_vmalloc_addr(head);
79
tail = &kvm->arch.hpt.rev[head->back];
80
if (realmode)
81
tail = real_vmalloc_addr(tail);
82
rev->forw = i;
83
rev->back = head->back;
84
tail->forw = pte_index;
85
head->back = pte_index;
86
} else {
87
rev->forw = rev->back = pte_index;
88
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
89
pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
90
}
91
unlock_rmap(rmap);
92
}
93
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
94
95
/* Update the dirty bitmap of a memslot */
96
void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
97
unsigned long gfn, unsigned long psize)
98
{
99
unsigned long npages;
100
101
if (!psize || !memslot->dirty_bitmap)
102
return;
103
npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
104
gfn -= memslot->base_gfn;
105
set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
106
}
107
EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
108
109
static void kvmppc_set_dirty_from_hpte(struct kvm *kvm,
110
unsigned long hpte_v, unsigned long hpte_gr)
111
{
112
struct kvm_memory_slot *memslot;
113
unsigned long gfn;
114
unsigned long psize;
115
116
psize = kvmppc_actual_pgsz(hpte_v, hpte_gr);
117
gfn = hpte_rpn(hpte_gr, psize);
118
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
119
if (memslot && memslot->dirty_bitmap)
120
kvmppc_update_dirty_map(memslot, gfn, psize);
121
}
122
123
/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
124
static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
125
unsigned long hpte_gr,
126
struct kvm_memory_slot **memslotp,
127
unsigned long *gfnp)
128
{
129
struct kvm_memory_slot *memslot;
130
unsigned long *rmap;
131
unsigned long gfn;
132
133
gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr));
134
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
135
if (memslotp)
136
*memslotp = memslot;
137
if (gfnp)
138
*gfnp = gfn;
139
if (!memslot)
140
return NULL;
141
142
rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
143
return rmap;
144
}
145
146
/* Remove this HPTE from the chain for a real page */
147
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
148
struct revmap_entry *rev,
149
unsigned long hpte_v, unsigned long hpte_r)
150
{
151
struct revmap_entry *next, *prev;
152
unsigned long ptel, head;
153
unsigned long *rmap;
154
unsigned long rcbits;
155
struct kvm_memory_slot *memslot;
156
unsigned long gfn;
157
158
rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
159
ptel = rev->guest_rpte |= rcbits;
160
rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn);
161
if (!rmap)
162
return;
163
lock_rmap(rmap);
164
165
head = *rmap & KVMPPC_RMAP_INDEX;
166
next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]);
167
prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]);
168
next->back = rev->back;
169
prev->forw = rev->forw;
170
if (head == pte_index) {
171
head = rev->forw;
172
if (head == pte_index)
173
*rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
174
else
175
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
176
}
177
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
178
if (rcbits & HPTE_R_C)
179
kvmppc_update_dirty_map(memslot, gfn,
180
kvmppc_actual_pgsz(hpte_v, hpte_r));
181
unlock_rmap(rmap);
182
}
183
184
long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
185
long pte_index, unsigned long pteh, unsigned long ptel,
186
pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
187
{
188
unsigned long i, pa, gpa, gfn, psize;
189
unsigned long slot_fn, hva;
190
__be64 *hpte;
191
struct revmap_entry *rev;
192
unsigned long g_ptel;
193
struct kvm_memory_slot *memslot;
194
unsigned hpage_shift;
195
bool is_ci;
196
unsigned long *rmap;
197
pte_t *ptep;
198
unsigned int writing;
199
unsigned long mmu_seq;
200
unsigned long rcbits;
201
202
if (kvm_is_radix(kvm))
203
return H_FUNCTION;
204
/*
205
* The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so
206
* these functions should work together -- must ensure a guest can not
207
* cause problems with the TLBIE that KVM executes.
208
*/
209
if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) {
210
/* B=0b1x is a reserved value, disallow it. */
211
return H_PARAMETER;
212
}
213
psize = kvmppc_actual_pgsz(pteh, ptel);
214
if (!psize)
215
return H_PARAMETER;
216
writing = hpte_is_writable(ptel);
217
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
218
ptel &= ~HPTE_GR_RESERVED;
219
g_ptel = ptel;
220
221
/* used later to detect if we might have been invalidated */
222
mmu_seq = kvm->mmu_invalidate_seq;
223
smp_rmb();
224
225
/* Find the memslot (if any) for this address */
226
gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
227
gfn = gpa >> PAGE_SHIFT;
228
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
229
pa = 0;
230
is_ci = false;
231
rmap = NULL;
232
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
233
/* Emulated MMIO - mark this with key=31 */
234
pteh |= HPTE_V_ABSENT;
235
ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
236
goto do_insert;
237
}
238
239
/* Check if the requested page fits entirely in the memslot. */
240
if (!slot_is_aligned(memslot, psize))
241
return H_PARAMETER;
242
slot_fn = gfn - memslot->base_gfn;
243
rmap = &memslot->arch.rmap[slot_fn];
244
245
/* Translate to host virtual address */
246
hva = __gfn_to_hva_memslot(memslot, gfn);
247
248
arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
249
ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift);
250
if (ptep) {
251
pte_t pte;
252
unsigned int host_pte_size;
253
254
if (hpage_shift)
255
host_pte_size = 1ul << hpage_shift;
256
else
257
host_pte_size = PAGE_SIZE;
258
/*
259
* We should always find the guest page size
260
* to <= host page size, if host is using hugepage
261
*/
262
if (host_pte_size < psize) {
263
arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
264
return H_PARAMETER;
265
}
266
pte = kvmppc_read_update_linux_pte(ptep, writing);
267
if (pte_present(pte) && !pte_protnone(pte)) {
268
if (writing && !pte_write(pte))
269
/* make the actual HPTE be read-only */
270
ptel = hpte_make_readonly(ptel);
271
is_ci = pte_ci(pte);
272
pa = pte_pfn(pte) << PAGE_SHIFT;
273
pa |= hva & (host_pte_size - 1);
274
pa |= gpa & ~PAGE_MASK;
275
}
276
}
277
arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
278
279
ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
280
ptel |= pa;
281
282
if (pa)
283
pteh |= HPTE_V_VALID;
284
else {
285
pteh |= HPTE_V_ABSENT;
286
ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
287
}
288
289
/*If we had host pte mapping then Check WIMG */
290
if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
291
if (is_ci)
292
return H_PARAMETER;
293
/*
294
* Allow guest to map emulated device memory as
295
* uncacheable, but actually make it cacheable.
296
*/
297
ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
298
ptel |= HPTE_R_M;
299
}
300
301
/* Find and lock the HPTEG slot to use */
302
do_insert:
303
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
304
return H_PARAMETER;
305
if (likely((flags & H_EXACT) == 0)) {
306
pte_index &= ~7UL;
307
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
308
for (i = 0; i < 8; ++i) {
309
if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
310
try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
311
HPTE_V_ABSENT))
312
break;
313
hpte += 2;
314
}
315
if (i == 8) {
316
/*
317
* Since try_lock_hpte doesn't retry (not even stdcx.
318
* failures), it could be that there is a free slot
319
* but we transiently failed to lock it. Try again,
320
* actually locking each slot and checking it.
321
*/
322
hpte -= 16;
323
for (i = 0; i < 8; ++i) {
324
u64 pte;
325
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
326
cpu_relax();
327
pte = be64_to_cpu(hpte[0]);
328
if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
329
break;
330
__unlock_hpte(hpte, pte);
331
hpte += 2;
332
}
333
if (i == 8)
334
return H_PTEG_FULL;
335
}
336
pte_index += i;
337
} else {
338
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
339
if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
340
HPTE_V_ABSENT)) {
341
/* Lock the slot and check again */
342
u64 pte;
343
344
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
345
cpu_relax();
346
pte = be64_to_cpu(hpte[0]);
347
if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
348
__unlock_hpte(hpte, pte);
349
return H_PTEG_FULL;
350
}
351
}
352
}
353
354
/* Save away the guest's idea of the second HPTE dword */
355
rev = &kvm->arch.hpt.rev[pte_index];
356
if (realmode)
357
rev = real_vmalloc_addr(rev);
358
if (rev) {
359
rev->guest_rpte = g_ptel;
360
note_hpte_modification(kvm, rev);
361
}
362
363
/* Link HPTE into reverse-map chain */
364
if (pteh & HPTE_V_VALID) {
365
if (realmode)
366
rmap = real_vmalloc_addr(rmap);
367
lock_rmap(rmap);
368
/* Check for pending invalidations under the rmap chain lock */
369
if (mmu_invalidate_retry(kvm, mmu_seq)) {
370
/* inval in progress, write a non-present HPTE */
371
pteh |= HPTE_V_ABSENT;
372
pteh &= ~HPTE_V_VALID;
373
ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
374
unlock_rmap(rmap);
375
} else {
376
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
377
realmode);
378
/* Only set R/C in real HPTE if already set in *rmap */
379
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
380
ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
381
}
382
}
383
384
/* Convert to new format on P9 */
385
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
386
ptel = hpte_old_to_new_r(pteh, ptel);
387
pteh = hpte_old_to_new_v(pteh);
388
}
389
hpte[1] = cpu_to_be64(ptel);
390
391
/* Write the first HPTE dword, unlocking the HPTE and making it valid */
392
eieio();
393
__unlock_hpte(hpte, pteh);
394
asm volatile("ptesync" : : : "memory");
395
396
*pte_idx_ret = pte_index;
397
return H_SUCCESS;
398
}
399
EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
400
401
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
402
long pte_index, unsigned long pteh, unsigned long ptel)
403
{
404
return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
405
vcpu->arch.pgdir, true,
406
&vcpu->arch.regs.gpr[4]);
407
}
408
EXPORT_SYMBOL_GPL(kvmppc_h_enter);
409
410
#ifdef __BIG_ENDIAN__
411
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
412
#else
413
#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
414
#endif
415
416
static inline int is_mmio_hpte(unsigned long v, unsigned long r)
417
{
418
return ((v & HPTE_V_ABSENT) &&
419
(r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
420
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
421
}
422
423
static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
424
{
425
426
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
427
/* Radix flush for a hash guest */
428
429
unsigned long rb,rs,prs,r,ric;
430
431
rb = PPC_BIT(52); /* IS = 2 */
432
rs = 0; /* lpid = 0 */
433
prs = 0; /* partition scoped */
434
r = 1; /* radix format */
435
ric = 0; /* RIC_FLSUH_TLB */
436
437
/*
438
* Need the extra ptesync to make sure we don't
439
* re-order the tlbie
440
*/
441
asm volatile("ptesync": : :"memory");
442
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
443
: : "r"(rb), "i"(r), "i"(prs),
444
"i"(ric), "r"(rs) : "memory");
445
}
446
447
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
448
asm volatile("ptesync": : :"memory");
449
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
450
"r" (rb_value), "r" (lpid));
451
}
452
}
453
454
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
455
long npages, int global, bool need_sync)
456
{
457
long i;
458
459
/*
460
* We use the POWER9 5-operand versions of tlbie and tlbiel here.
461
* Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
462
* the RS field, this is backwards-compatible with P7 and P8.
463
*/
464
if (global) {
465
if (need_sync)
466
asm volatile("ptesync" : : : "memory");
467
for (i = 0; i < npages; ++i) {
468
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
469
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
470
}
471
472
fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
473
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
474
} else {
475
if (need_sync)
476
asm volatile("ptesync" : : : "memory");
477
for (i = 0; i < npages; ++i) {
478
asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
479
"r" (rbvalues[i]), "r" (0));
480
}
481
asm volatile("ptesync" : : : "memory");
482
}
483
}
484
485
long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
486
unsigned long pte_index, unsigned long avpn,
487
unsigned long *hpret)
488
{
489
__be64 *hpte;
490
unsigned long v, r, rb;
491
struct revmap_entry *rev;
492
u64 pte, orig_pte, pte_r;
493
494
if (kvm_is_radix(kvm))
495
return H_FUNCTION;
496
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
497
return H_PARAMETER;
498
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
499
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
500
cpu_relax();
501
pte = orig_pte = be64_to_cpu(hpte[0]);
502
pte_r = be64_to_cpu(hpte[1]);
503
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
504
pte = hpte_new_to_old_v(pte, pte_r);
505
pte_r = hpte_new_to_old_r(pte_r);
506
}
507
if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
508
((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
509
((flags & H_ANDCOND) && (pte & avpn) != 0)) {
510
__unlock_hpte(hpte, orig_pte);
511
return H_NOT_FOUND;
512
}
513
514
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
515
v = pte & ~HPTE_V_HVLOCK;
516
if (v & HPTE_V_VALID) {
517
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
518
rb = compute_tlbie_rb(v, pte_r, pte_index);
519
do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
520
/*
521
* The reference (R) and change (C) bits in a HPT
522
* entry can be set by hardware at any time up until
523
* the HPTE is invalidated and the TLB invalidation
524
* sequence has completed. This means that when
525
* removing a HPTE, we need to re-read the HPTE after
526
* the invalidation sequence has completed in order to
527
* obtain reliable values of R and C.
528
*/
529
remove_revmap_chain(kvm, pte_index, rev, v,
530
be64_to_cpu(hpte[1]));
531
}
532
r = rev->guest_rpte & ~HPTE_GR_RESERVED;
533
note_hpte_modification(kvm, rev);
534
unlock_hpte(hpte, 0);
535
536
if (is_mmio_hpte(v, pte_r))
537
atomic64_inc(&kvm->arch.mmio_update);
538
539
if (v & HPTE_V_ABSENT)
540
v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
541
hpret[0] = v;
542
hpret[1] = r;
543
return H_SUCCESS;
544
}
545
EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
546
547
long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
548
unsigned long pte_index, unsigned long avpn)
549
{
550
return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
551
&vcpu->arch.regs.gpr[4]);
552
}
553
EXPORT_SYMBOL_GPL(kvmppc_h_remove);
554
555
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
556
{
557
struct kvm *kvm = vcpu->kvm;
558
unsigned long *args = &vcpu->arch.regs.gpr[4];
559
__be64 *hp, *hptes[4];
560
unsigned long tlbrb[4];
561
long int i, j, k, n, found, indexes[4];
562
unsigned long flags, req, pte_index, rcbits;
563
int global;
564
long int ret = H_SUCCESS;
565
struct revmap_entry *rev, *revs[4];
566
u64 hp0, hp1;
567
568
if (kvm_is_radix(kvm))
569
return H_FUNCTION;
570
global = global_invalidates(kvm);
571
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
572
n = 0;
573
for (; i < 4; ++i) {
574
j = i * 2;
575
pte_index = args[j];
576
flags = pte_index >> 56;
577
pte_index &= ((1ul << 56) - 1);
578
req = flags >> 6;
579
flags &= 3;
580
if (req == 3) { /* no more requests */
581
i = 4;
582
break;
583
}
584
if (req != 1 || flags == 3 ||
585
pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
586
/* parameter error */
587
args[j] = ((0xa0 | flags) << 56) + pte_index;
588
ret = H_PARAMETER;
589
break;
590
}
591
hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4));
592
/* to avoid deadlock, don't spin except for first */
593
if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
594
if (n)
595
break;
596
while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
597
cpu_relax();
598
}
599
found = 0;
600
hp0 = be64_to_cpu(hp[0]);
601
hp1 = be64_to_cpu(hp[1]);
602
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
603
hp0 = hpte_new_to_old_v(hp0, hp1);
604
hp1 = hpte_new_to_old_r(hp1);
605
}
606
if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
607
switch (flags & 3) {
608
case 0: /* absolute */
609
found = 1;
610
break;
611
case 1: /* andcond */
612
if (!(hp0 & args[j + 1]))
613
found = 1;
614
break;
615
case 2: /* AVPN */
616
if ((hp0 & ~0x7fUL) == args[j + 1])
617
found = 1;
618
break;
619
}
620
}
621
if (!found) {
622
hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
623
args[j] = ((0x90 | flags) << 56) + pte_index;
624
continue;
625
}
626
627
args[j] = ((0x80 | flags) << 56) + pte_index;
628
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
629
note_hpte_modification(kvm, rev);
630
631
if (!(hp0 & HPTE_V_VALID)) {
632
/* insert R and C bits from PTE */
633
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
634
args[j] |= rcbits << (56 - 5);
635
hp[0] = 0;
636
if (is_mmio_hpte(hp0, hp1))
637
atomic64_inc(&kvm->arch.mmio_update);
638
continue;
639
}
640
641
/* leave it locked */
642
hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
643
tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index);
644
indexes[n] = j;
645
hptes[n] = hp;
646
revs[n] = rev;
647
++n;
648
}
649
650
if (!n)
651
break;
652
653
/* Now that we've collected a batch, do the tlbies */
654
do_tlbies(kvm, tlbrb, n, global, true);
655
656
/* Read PTE low words after tlbie to get final R/C values */
657
for (k = 0; k < n; ++k) {
658
j = indexes[k];
659
pte_index = args[j] & ((1ul << 56) - 1);
660
hp = hptes[k];
661
rev = revs[k];
662
remove_revmap_chain(kvm, pte_index, rev,
663
be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
664
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
665
args[j] |= rcbits << (56 - 5);
666
__unlock_hpte(hp, 0);
667
}
668
}
669
670
return ret;
671
}
672
EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
673
674
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
675
unsigned long pte_index, unsigned long avpn)
676
{
677
struct kvm *kvm = vcpu->kvm;
678
__be64 *hpte;
679
struct revmap_entry *rev;
680
unsigned long v, r, rb, mask, bits;
681
u64 pte_v, pte_r;
682
683
if (kvm_is_radix(kvm))
684
return H_FUNCTION;
685
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
686
return H_PARAMETER;
687
688
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
689
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
690
cpu_relax();
691
v = pte_v = be64_to_cpu(hpte[0]);
692
if (cpu_has_feature(CPU_FTR_ARCH_300))
693
v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1]));
694
if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
695
((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) {
696
__unlock_hpte(hpte, pte_v);
697
return H_NOT_FOUND;
698
}
699
700
pte_r = be64_to_cpu(hpte[1]);
701
bits = (flags << 55) & HPTE_R_PP0;
702
bits |= (flags << 48) & HPTE_R_KEY_HI;
703
bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
704
705
/* Update guest view of 2nd HPTE dword */
706
mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
707
HPTE_R_KEY_HI | HPTE_R_KEY_LO;
708
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
709
if (rev) {
710
r = (rev->guest_rpte & ~mask) | bits;
711
rev->guest_rpte = r;
712
note_hpte_modification(kvm, rev);
713
}
714
715
/* Update HPTE */
716
if (v & HPTE_V_VALID) {
717
/*
718
* If the page is valid, don't let it transition from
719
* readonly to writable. If it should be writable, we'll
720
* take a trap and let the page fault code sort it out.
721
*/
722
r = (pte_r & ~mask) | bits;
723
if (hpte_is_writable(r) && !hpte_is_writable(pte_r))
724
r = hpte_make_readonly(r);
725
/* If the PTE is changing, invalidate it first */
726
if (r != pte_r) {
727
rb = compute_tlbie_rb(v, r, pte_index);
728
hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
729
HPTE_V_ABSENT);
730
do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
731
/* Don't lose R/C bit updates done by hardware */
732
r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
733
hpte[1] = cpu_to_be64(r);
734
}
735
}
736
unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK);
737
asm volatile("ptesync" : : : "memory");
738
if (is_mmio_hpte(v, pte_r))
739
atomic64_inc(&kvm->arch.mmio_update);
740
741
return H_SUCCESS;
742
}
743
EXPORT_SYMBOL_GPL(kvmppc_h_protect);
744
745
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
746
unsigned long pte_index)
747
{
748
struct kvm *kvm = vcpu->kvm;
749
__be64 *hpte;
750
unsigned long v, r;
751
int i, n = 1;
752
struct revmap_entry *rev = NULL;
753
754
if (kvm_is_radix(kvm))
755
return H_FUNCTION;
756
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
757
return H_PARAMETER;
758
if (flags & H_READ_4) {
759
pte_index &= ~3;
760
n = 4;
761
}
762
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
763
for (i = 0; i < n; ++i, ++pte_index) {
764
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
765
v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
766
r = be64_to_cpu(hpte[1]);
767
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
768
v = hpte_new_to_old_v(v, r);
769
r = hpte_new_to_old_r(r);
770
}
771
if (v & HPTE_V_ABSENT) {
772
v &= ~HPTE_V_ABSENT;
773
v |= HPTE_V_VALID;
774
}
775
if (v & HPTE_V_VALID) {
776
r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
777
r &= ~HPTE_GR_RESERVED;
778
}
779
kvmppc_set_gpr(vcpu, 4 + i * 2, v);
780
kvmppc_set_gpr(vcpu, 5 + i * 2, r);
781
}
782
return H_SUCCESS;
783
}
784
EXPORT_SYMBOL_GPL(kvmppc_h_read);
785
786
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
787
unsigned long pte_index)
788
{
789
struct kvm *kvm = vcpu->kvm;
790
__be64 *hpte;
791
unsigned long v, r, gr;
792
struct revmap_entry *rev;
793
unsigned long *rmap;
794
long ret = H_NOT_FOUND;
795
796
if (kvm_is_radix(kvm))
797
return H_FUNCTION;
798
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
799
return H_PARAMETER;
800
801
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
802
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
803
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
804
cpu_relax();
805
v = be64_to_cpu(hpte[0]);
806
r = be64_to_cpu(hpte[1]);
807
if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
808
goto out;
809
810
gr = rev->guest_rpte;
811
if (rev->guest_rpte & HPTE_R_R) {
812
rev->guest_rpte &= ~HPTE_R_R;
813
note_hpte_modification(kvm, rev);
814
}
815
if (v & HPTE_V_VALID) {
816
gr |= r & (HPTE_R_R | HPTE_R_C);
817
if (r & HPTE_R_R) {
818
kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
819
rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL);
820
if (rmap) {
821
lock_rmap(rmap);
822
*rmap |= KVMPPC_RMAP_REFERENCED;
823
unlock_rmap(rmap);
824
}
825
}
826
}
827
kvmppc_set_gpr(vcpu, 4, gr);
828
ret = H_SUCCESS;
829
out:
830
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
831
return ret;
832
}
833
EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
834
835
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
836
unsigned long pte_index)
837
{
838
struct kvm *kvm = vcpu->kvm;
839
__be64 *hpte;
840
unsigned long v, r, gr;
841
struct revmap_entry *rev;
842
long ret = H_NOT_FOUND;
843
844
if (kvm_is_radix(kvm))
845
return H_FUNCTION;
846
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
847
return H_PARAMETER;
848
849
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
850
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
851
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
852
cpu_relax();
853
v = be64_to_cpu(hpte[0]);
854
r = be64_to_cpu(hpte[1]);
855
if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
856
goto out;
857
858
gr = rev->guest_rpte;
859
if (gr & HPTE_R_C) {
860
rev->guest_rpte &= ~HPTE_R_C;
861
note_hpte_modification(kvm, rev);
862
}
863
if (v & HPTE_V_VALID) {
864
/* need to make it temporarily absent so C is stable */
865
hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
866
kvmppc_invalidate_hpte(kvm, hpte, pte_index);
867
r = be64_to_cpu(hpte[1]);
868
gr |= r & (HPTE_R_R | HPTE_R_C);
869
if (r & HPTE_R_C) {
870
hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
871
eieio();
872
kvmppc_set_dirty_from_hpte(kvm, v, gr);
873
}
874
}
875
kvmppc_set_gpr(vcpu, 4, gr);
876
ret = H_SUCCESS;
877
out:
878
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
879
return ret;
880
}
881
EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
882
883
static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
884
unsigned long gpa, int writing, unsigned long *hpa,
885
struct kvm_memory_slot **memslot_p)
886
{
887
struct kvm *kvm = vcpu->kvm;
888
struct kvm_memory_slot *memslot;
889
unsigned long gfn, hva, pa, psize = PAGE_SHIFT;
890
unsigned int shift;
891
pte_t *ptep, pte;
892
893
/* Find the memslot for this address */
894
gfn = gpa >> PAGE_SHIFT;
895
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
896
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
897
return H_PARAMETER;
898
899
/* Translate to host virtual address */
900
hva = __gfn_to_hva_memslot(memslot, gfn);
901
902
/* Try to find the host pte for that virtual address */
903
ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
904
if (!ptep)
905
return H_TOO_HARD;
906
pte = kvmppc_read_update_linux_pte(ptep, writing);
907
if (!pte_present(pte))
908
return H_TOO_HARD;
909
910
/* Convert to a physical address */
911
if (shift)
912
psize = 1UL << shift;
913
pa = pte_pfn(pte) << PAGE_SHIFT;
914
pa |= hva & (psize - 1);
915
pa |= gpa & ~PAGE_MASK;
916
917
if (hpa)
918
*hpa = pa;
919
if (memslot_p)
920
*memslot_p = memslot;
921
922
return H_SUCCESS;
923
}
924
925
static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu,
926
unsigned long dest)
927
{
928
struct kvm_memory_slot *memslot;
929
struct kvm *kvm = vcpu->kvm;
930
unsigned long pa, mmu_seq;
931
long ret = H_SUCCESS;
932
int i;
933
934
/* Used later to detect if we might have been invalidated */
935
mmu_seq = kvm->mmu_invalidate_seq;
936
smp_rmb();
937
938
arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
939
940
ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot);
941
if (ret != H_SUCCESS)
942
goto out_unlock;
943
944
/* Zero the page */
945
for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES)
946
dcbz((void *)pa);
947
kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
948
949
out_unlock:
950
arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
951
return ret;
952
}
953
954
static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu,
955
unsigned long dest, unsigned long src)
956
{
957
unsigned long dest_pa, src_pa, mmu_seq;
958
struct kvm_memory_slot *dest_memslot;
959
struct kvm *kvm = vcpu->kvm;
960
long ret = H_SUCCESS;
961
962
/* Used later to detect if we might have been invalidated */
963
mmu_seq = kvm->mmu_invalidate_seq;
964
smp_rmb();
965
966
arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
967
ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot);
968
if (ret != H_SUCCESS)
969
goto out_unlock;
970
971
ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL);
972
if (ret != H_SUCCESS)
973
goto out_unlock;
974
975
/* Copy the page */
976
memcpy((void *)dest_pa, (void *)src_pa, SZ_4K);
977
978
kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
979
980
out_unlock:
981
arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
982
return ret;
983
}
984
985
long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
986
unsigned long dest, unsigned long src)
987
{
988
struct kvm *kvm = vcpu->kvm;
989
u64 pg_mask = SZ_4K - 1; /* 4K page size */
990
long ret = H_SUCCESS;
991
992
/* Don't handle radix mode here, go up to the virtual mode handler */
993
if (kvm_is_radix(kvm))
994
return H_TOO_HARD;
995
996
/* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
997
if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
998
H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
999
return H_PARAMETER;
1000
1001
/* dest (and src if copy_page flag set) must be page aligned */
1002
if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
1003
return H_PARAMETER;
1004
1005
/* zero and/or copy the page as determined by the flags */
1006
if (flags & H_COPY_PAGE)
1007
ret = kvmppc_do_h_page_init_copy(vcpu, dest, src);
1008
else if (flags & H_ZERO_PAGE)
1009
ret = kvmppc_do_h_page_init_zero(vcpu, dest);
1010
1011
/* We can ignore the other flags */
1012
1013
return ret;
1014
}
1015
1016
void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
1017
unsigned long pte_index)
1018
{
1019
unsigned long rb;
1020
u64 hp0, hp1;
1021
1022
hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
1023
hp0 = be64_to_cpu(hptep[0]);
1024
hp1 = be64_to_cpu(hptep[1]);
1025
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1026
hp0 = hpte_new_to_old_v(hp0, hp1);
1027
hp1 = hpte_new_to_old_r(hp1);
1028
}
1029
rb = compute_tlbie_rb(hp0, hp1, pte_index);
1030
do_tlbies(kvm, &rb, 1, 1, true);
1031
}
1032
EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
1033
1034
void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
1035
unsigned long pte_index)
1036
{
1037
unsigned long rb;
1038
unsigned char rbyte;
1039
u64 hp0, hp1;
1040
1041
hp0 = be64_to_cpu(hptep[0]);
1042
hp1 = be64_to_cpu(hptep[1]);
1043
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1044
hp0 = hpte_new_to_old_v(hp0, hp1);
1045
hp1 = hpte_new_to_old_r(hp1);
1046
}
1047
rb = compute_tlbie_rb(hp0, hp1, pte_index);
1048
rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
1049
/* modify only the second-last byte, which contains the ref bit */
1050
*((char *)hptep + 14) = rbyte;
1051
do_tlbies(kvm, &rb, 1, 1, false);
1052
}
1053
EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte);
1054
1055
static int slb_base_page_shift[4] = {
1056
24, /* 16M */
1057
16, /* 64k */
1058
34, /* 16G */
1059
20, /* 1M, unsupported */
1060
};
1061
1062
static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu,
1063
unsigned long eaddr, unsigned long slb_v, long mmio_update)
1064
{
1065
struct mmio_hpte_cache_entry *entry = NULL;
1066
unsigned int pshift;
1067
unsigned int i;
1068
1069
for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) {
1070
entry = &vcpu->arch.mmio_cache.entry[i];
1071
if (entry->mmio_update == mmio_update) {
1072
pshift = entry->slb_base_pshift;
1073
if ((entry->eaddr >> pshift) == (eaddr >> pshift) &&
1074
entry->slb_v == slb_v)
1075
return entry;
1076
}
1077
}
1078
return NULL;
1079
}
1080
1081
static struct mmio_hpte_cache_entry *
1082
next_mmio_cache_entry(struct kvm_vcpu *vcpu)
1083
{
1084
unsigned int index = vcpu->arch.mmio_cache.index;
1085
1086
vcpu->arch.mmio_cache.index++;
1087
if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE)
1088
vcpu->arch.mmio_cache.index = 0;
1089
1090
return &vcpu->arch.mmio_cache.entry[index];
1091
}
1092
1093
/* When called from virtmode, this func should be protected by
1094
* preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
1095
* can trigger deadlock issue.
1096
*/
1097
long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
1098
unsigned long valid)
1099
{
1100
unsigned int i;
1101
unsigned int pshift;
1102
unsigned long somask;
1103
unsigned long vsid, hash;
1104
unsigned long avpn;
1105
__be64 *hpte;
1106
unsigned long mask, val;
1107
unsigned long v, r, orig_v;
1108
1109
/* Get page shift, work out hash and AVPN etc. */
1110
mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
1111
val = 0;
1112
pshift = 12;
1113
if (slb_v & SLB_VSID_L) {
1114
mask |= HPTE_V_LARGE;
1115
val |= HPTE_V_LARGE;
1116
pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4];
1117
}
1118
if (slb_v & SLB_VSID_B_1T) {
1119
somask = (1UL << 40) - 1;
1120
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T;
1121
vsid ^= vsid << 25;
1122
} else {
1123
somask = (1UL << 28) - 1;
1124
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
1125
}
1126
hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt);
1127
avpn = slb_v & ~(somask >> 16); /* also includes B */
1128
avpn |= (eaddr & somask) >> 16;
1129
1130
if (pshift >= 24)
1131
avpn &= ~((1UL << (pshift - 16)) - 1);
1132
else
1133
avpn &= ~0x7fUL;
1134
val |= avpn;
1135
1136
for (;;) {
1137
hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7));
1138
1139
for (i = 0; i < 16; i += 2) {
1140
/* Read the PTE racily */
1141
v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
1142
if (cpu_has_feature(CPU_FTR_ARCH_300))
1143
v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1]));
1144
1145
/* Check valid/absent, hash, segment size and AVPN */
1146
if (!(v & valid) || (v & mask) != val)
1147
continue;
1148
1149
/* Lock the PTE and read it under the lock */
1150
while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
1151
cpu_relax();
1152
v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
1153
r = be64_to_cpu(hpte[i+1]);
1154
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1155
v = hpte_new_to_old_v(v, r);
1156
r = hpte_new_to_old_r(r);
1157
}
1158
1159
/*
1160
* Check the HPTE again, including base page size
1161
*/
1162
if ((v & valid) && (v & mask) == val &&
1163
kvmppc_hpte_base_page_shift(v, r) == pshift)
1164
/* Return with the HPTE still locked */
1165
return (hash << 3) + (i >> 1);
1166
1167
__unlock_hpte(&hpte[i], orig_v);
1168
}
1169
1170
if (val & HPTE_V_SECONDARY)
1171
break;
1172
val |= HPTE_V_SECONDARY;
1173
hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt);
1174
}
1175
return -1;
1176
}
1177
EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
1178
1179
/*
1180
* Called in real mode to check whether an HPTE not found fault
1181
* is due to accessing a paged-out page or an emulated MMIO page,
1182
* or if a protection fault is due to accessing a page that the
1183
* guest wanted read/write access to but which we made read-only.
1184
* Returns a possibly modified status (DSISR) value if not
1185
* (i.e. pass the interrupt to the guest),
1186
* -1 to pass the fault up to host kernel mode code, -2 to do that
1187
* and also load the instruction word (for MMIO emulation),
1188
* or 0 if we should make the guest retry the access.
1189
*/
1190
long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
1191
unsigned long slb_v, unsigned int status, bool data)
1192
{
1193
struct kvm *kvm = vcpu->kvm;
1194
long int index;
1195
unsigned long v, r, gr, orig_v;
1196
__be64 *hpte;
1197
unsigned long valid;
1198
struct revmap_entry *rev;
1199
unsigned long pp, key;
1200
struct mmio_hpte_cache_entry *cache_entry = NULL;
1201
long mmio_update = 0;
1202
1203
/* For protection fault, expect to find a valid HPTE */
1204
valid = HPTE_V_VALID;
1205
if (status & DSISR_NOHPTE) {
1206
valid |= HPTE_V_ABSENT;
1207
mmio_update = atomic64_read(&kvm->arch.mmio_update);
1208
cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update);
1209
}
1210
if (cache_entry) {
1211
index = cache_entry->pte_index;
1212
v = cache_entry->hpte_v;
1213
r = cache_entry->hpte_r;
1214
gr = cache_entry->rpte;
1215
} else {
1216
index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
1217
if (index < 0) {
1218
if (status & DSISR_NOHPTE)
1219
return status; /* there really was no HPTE */
1220
return 0; /* for prot fault, HPTE disappeared */
1221
}
1222
hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
1223
v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
1224
r = be64_to_cpu(hpte[1]);
1225
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1226
v = hpte_new_to_old_v(v, r);
1227
r = hpte_new_to_old_r(r);
1228
}
1229
rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]);
1230
gr = rev->guest_rpte;
1231
1232
unlock_hpte(hpte, orig_v);
1233
}
1234
1235
/* For not found, if the HPTE is valid by now, retry the instruction */
1236
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
1237
return 0;
1238
1239
/* Check access permissions to the page */
1240
pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
1241
key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
1242
status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
1243
if (!data) {
1244
if (gr & (HPTE_R_N | HPTE_R_G))
1245
return status | SRR1_ISI_N_G_OR_CIP;
1246
if (!hpte_read_permission(pp, slb_v & key))
1247
return status | SRR1_ISI_PROT;
1248
} else if (status & DSISR_ISSTORE) {
1249
/* check write permission */
1250
if (!hpte_write_permission(pp, slb_v & key))
1251
return status | DSISR_PROTFAULT;
1252
} else {
1253
if (!hpte_read_permission(pp, slb_v & key))
1254
return status | DSISR_PROTFAULT;
1255
}
1256
1257
/* Check storage key, if applicable */
1258
if (data && (vcpu->arch.shregs.msr & MSR_DR)) {
1259
unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr);
1260
if (status & DSISR_ISSTORE)
1261
perm >>= 1;
1262
if (perm & 1)
1263
return status | DSISR_KEYFAULT;
1264
}
1265
1266
/* Save HPTE info for virtual-mode handler */
1267
vcpu->arch.pgfault_addr = addr;
1268
vcpu->arch.pgfault_index = index;
1269
vcpu->arch.pgfault_hpte[0] = v;
1270
vcpu->arch.pgfault_hpte[1] = r;
1271
vcpu->arch.pgfault_cache = cache_entry;
1272
1273
/* Check the storage key to see if it is possibly emulated MMIO */
1274
if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
1275
(HPTE_R_KEY_HI | HPTE_R_KEY_LO)) {
1276
if (!cache_entry) {
1277
unsigned int pshift = 12;
1278
unsigned int pshift_index;
1279
1280
if (slb_v & SLB_VSID_L) {
1281
pshift_index = ((slb_v & SLB_VSID_LP) >> 4);
1282
pshift = slb_base_page_shift[pshift_index];
1283
}
1284
cache_entry = next_mmio_cache_entry(vcpu);
1285
cache_entry->eaddr = addr;
1286
cache_entry->slb_base_pshift = pshift;
1287
cache_entry->pte_index = index;
1288
cache_entry->hpte_v = v;
1289
cache_entry->hpte_r = r;
1290
cache_entry->rpte = gr;
1291
cache_entry->slb_v = slb_v;
1292
cache_entry->mmio_update = mmio_update;
1293
}
1294
if (data && (vcpu->arch.shregs.msr & MSR_IR))
1295
return -2; /* MMIO emulation - load instr word */
1296
}
1297
1298
return -1; /* send fault up to host kernel mode */
1299
}
1300
EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
1301
1302