Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/kvm/book3s_hv_p9_entry.c
26439 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
#include <linux/kernel.h>
3
#include <linux/kvm_host.h>
4
#include <asm/asm-prototypes.h>
5
#include <asm/dbell.h>
6
#include <asm/ppc-opcode.h>
7
8
#include "book3s_hv.h"
9
10
static void load_spr_state(struct kvm_vcpu *vcpu,
11
struct p9_host_os_sprs *host_os_sprs)
12
{
13
/* TAR is very fast */
14
mtspr(SPRN_TAR, vcpu->arch.tar);
15
16
#ifdef CONFIG_ALTIVEC
17
if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
18
current->thread.vrsave != vcpu->arch.vrsave)
19
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
20
#endif
21
22
if (vcpu->arch.hfscr & HFSCR_EBB) {
23
if (current->thread.ebbhr != vcpu->arch.ebbhr)
24
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
25
if (current->thread.ebbrr != vcpu->arch.ebbrr)
26
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
27
if (current->thread.bescr != vcpu->arch.bescr)
28
mtspr(SPRN_BESCR, vcpu->arch.bescr);
29
}
30
31
if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
32
current->thread.tidr != vcpu->arch.tid)
33
mtspr(SPRN_TIDR, vcpu->arch.tid);
34
if (host_os_sprs->iamr != vcpu->arch.iamr)
35
mtspr(SPRN_IAMR, vcpu->arch.iamr);
36
if (host_os_sprs->amr != vcpu->arch.amr)
37
mtspr(SPRN_AMR, vcpu->arch.amr);
38
if (vcpu->arch.uamor != 0)
39
mtspr(SPRN_UAMOR, vcpu->arch.uamor);
40
if (current->thread.fscr != vcpu->arch.fscr)
41
mtspr(SPRN_FSCR, vcpu->arch.fscr);
42
if (current->thread.dscr != vcpu->arch.dscr)
43
mtspr(SPRN_DSCR, vcpu->arch.dscr);
44
if (vcpu->arch.pspb != 0)
45
mtspr(SPRN_PSPB, vcpu->arch.pspb);
46
47
/*
48
* DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
49
* clear (or hstate set appropriately to catch those registers
50
* being clobbered if we take a MCE or SRESET), so those are done
51
* later.
52
*/
53
54
if (!(vcpu->arch.ctrl & 1))
55
mtspr(SPRN_CTRLT, 0);
56
}
57
58
static void store_spr_state(struct kvm_vcpu *vcpu)
59
{
60
vcpu->arch.tar = mfspr(SPRN_TAR);
61
62
#ifdef CONFIG_ALTIVEC
63
if (cpu_has_feature(CPU_FTR_ALTIVEC))
64
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
65
#endif
66
67
if (vcpu->arch.hfscr & HFSCR_EBB) {
68
vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
69
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
70
vcpu->arch.bescr = mfspr(SPRN_BESCR);
71
}
72
73
if (cpu_has_feature(CPU_FTR_P9_TIDR))
74
vcpu->arch.tid = mfspr(SPRN_TIDR);
75
vcpu->arch.iamr = mfspr(SPRN_IAMR);
76
vcpu->arch.amr = mfspr(SPRN_AMR);
77
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
78
vcpu->arch.fscr = mfspr(SPRN_FSCR);
79
vcpu->arch.dscr = mfspr(SPRN_DSCR);
80
vcpu->arch.pspb = mfspr(SPRN_PSPB);
81
82
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
83
}
84
85
/* Returns true if current MSR and/or guest MSR may have changed */
86
bool load_vcpu_state(struct kvm_vcpu *vcpu,
87
struct p9_host_os_sprs *host_os_sprs)
88
{
89
bool ret = false;
90
91
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
92
if (cpu_has_feature(CPU_FTR_TM) ||
93
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
94
unsigned long guest_msr = vcpu->arch.shregs.msr;
95
if (MSR_TM_ACTIVE(guest_msr)) {
96
kvmppc_restore_tm_hv(vcpu, guest_msr, true);
97
ret = true;
98
} else if (vcpu->arch.hfscr & HFSCR_TM) {
99
mtspr(SPRN_TEXASR, vcpu->arch.texasr);
100
mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
101
mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
102
}
103
}
104
#endif
105
106
load_spr_state(vcpu, host_os_sprs);
107
108
load_fp_state(&vcpu->arch.fp);
109
#ifdef CONFIG_ALTIVEC
110
load_vr_state(&vcpu->arch.vr);
111
#endif
112
113
return ret;
114
}
115
EXPORT_SYMBOL_GPL(load_vcpu_state);
116
117
void store_vcpu_state(struct kvm_vcpu *vcpu)
118
{
119
store_spr_state(vcpu);
120
121
store_fp_state(&vcpu->arch.fp);
122
#ifdef CONFIG_ALTIVEC
123
store_vr_state(&vcpu->arch.vr);
124
#endif
125
126
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
127
if (cpu_has_feature(CPU_FTR_TM) ||
128
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
129
unsigned long guest_msr = vcpu->arch.shregs.msr;
130
if (MSR_TM_ACTIVE(guest_msr)) {
131
kvmppc_save_tm_hv(vcpu, guest_msr, true);
132
} else if (vcpu->arch.hfscr & HFSCR_TM) {
133
vcpu->arch.texasr = mfspr(SPRN_TEXASR);
134
vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
135
vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
136
137
if (!vcpu->arch.nested) {
138
vcpu->arch.load_tm++; /* see load_ebb comment */
139
if (!vcpu->arch.load_tm)
140
vcpu->arch.hfscr &= ~HFSCR_TM;
141
}
142
}
143
}
144
#endif
145
}
146
EXPORT_SYMBOL_GPL(store_vcpu_state);
147
148
void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
149
{
150
host_os_sprs->iamr = mfspr(SPRN_IAMR);
151
host_os_sprs->amr = mfspr(SPRN_AMR);
152
}
153
EXPORT_SYMBOL_GPL(save_p9_host_os_sprs);
154
155
/* vcpu guest regs must already be saved */
156
void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
157
struct p9_host_os_sprs *host_os_sprs)
158
{
159
/*
160
* current->thread.xxx registers must all be restored to host
161
* values before a potential context switch, otherwise the context
162
* switch itself will overwrite current->thread.xxx with the values
163
* from the guest SPRs.
164
*/
165
166
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
167
168
if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
169
current->thread.tidr != vcpu->arch.tid)
170
mtspr(SPRN_TIDR, current->thread.tidr);
171
if (host_os_sprs->iamr != vcpu->arch.iamr)
172
mtspr(SPRN_IAMR, host_os_sprs->iamr);
173
if (vcpu->arch.uamor != 0)
174
mtspr(SPRN_UAMOR, 0);
175
if (host_os_sprs->amr != vcpu->arch.amr)
176
mtspr(SPRN_AMR, host_os_sprs->amr);
177
if (current->thread.fscr != vcpu->arch.fscr)
178
mtspr(SPRN_FSCR, current->thread.fscr);
179
if (current->thread.dscr != vcpu->arch.dscr)
180
mtspr(SPRN_DSCR, current->thread.dscr);
181
if (vcpu->arch.pspb != 0)
182
mtspr(SPRN_PSPB, 0);
183
184
/* Save guest CTRL register, set runlatch to 1 */
185
if (!(vcpu->arch.ctrl & 1))
186
mtspr(SPRN_CTRLT, 1);
187
188
#ifdef CONFIG_ALTIVEC
189
if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
190
vcpu->arch.vrsave != current->thread.vrsave)
191
mtspr(SPRN_VRSAVE, current->thread.vrsave);
192
#endif
193
if (vcpu->arch.hfscr & HFSCR_EBB) {
194
if (vcpu->arch.bescr != current->thread.bescr)
195
mtspr(SPRN_BESCR, current->thread.bescr);
196
if (vcpu->arch.ebbhr != current->thread.ebbhr)
197
mtspr(SPRN_EBBHR, current->thread.ebbhr);
198
if (vcpu->arch.ebbrr != current->thread.ebbrr)
199
mtspr(SPRN_EBBRR, current->thread.ebbrr);
200
201
if (!vcpu->arch.nested) {
202
/*
203
* This is like load_fp in context switching, turn off
204
* the facility after it wraps the u8 to try avoiding
205
* saving and restoring the registers each partition
206
* switch.
207
*/
208
vcpu->arch.load_ebb++;
209
if (!vcpu->arch.load_ebb)
210
vcpu->arch.hfscr &= ~HFSCR_EBB;
211
}
212
}
213
214
if (vcpu->arch.tar != current->thread.tar)
215
mtspr(SPRN_TAR, current->thread.tar);
216
}
217
EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs);
218
219
#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
220
void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
221
{
222
struct kvmppc_vcore *vc = vcpu->arch.vcore;
223
struct kvmhv_tb_accumulator *curr;
224
u64 tb = mftb() - vc->tb_offset_applied;
225
u64 prev_tb;
226
u64 delta;
227
u64 seq;
228
229
curr = vcpu->arch.cur_activity;
230
vcpu->arch.cur_activity = next;
231
prev_tb = vcpu->arch.cur_tb_start;
232
vcpu->arch.cur_tb_start = tb;
233
234
if (!curr)
235
return;
236
237
delta = tb - prev_tb;
238
239
seq = curr->seqcount;
240
curr->seqcount = seq + 1;
241
smp_wmb();
242
curr->tb_total += delta;
243
if (seq == 0 || delta < curr->tb_min)
244
curr->tb_min = delta;
245
if (delta > curr->tb_max)
246
curr->tb_max = delta;
247
smp_wmb();
248
curr->seqcount = seq + 2;
249
}
250
EXPORT_SYMBOL_GPL(accumulate_time);
251
#endif
252
253
static inline u64 mfslbv(unsigned int idx)
254
{
255
u64 slbev;
256
257
asm volatile("slbmfev %0,%1" : "=r" (slbev) : "r" (idx));
258
259
return slbev;
260
}
261
262
static inline u64 mfslbe(unsigned int idx)
263
{
264
u64 slbee;
265
266
asm volatile("slbmfee %0,%1" : "=r" (slbee) : "r" (idx));
267
268
return slbee;
269
}
270
271
static inline void mtslb(u64 slbee, u64 slbev)
272
{
273
asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
274
}
275
276
static inline void clear_slb_entry(unsigned int idx)
277
{
278
mtslb(idx, 0);
279
}
280
281
static inline void slb_clear_invalidate_partition(void)
282
{
283
clear_slb_entry(0);
284
asm volatile(PPC_SLBIA(6));
285
}
286
287
/*
288
* Malicious or buggy radix guests may have inserted SLB entries
289
* (only 0..3 because radix always runs with UPRT=1), so these must
290
* be cleared here to avoid side-channels. slbmte is used rather
291
* than slbia, as it won't clear cached translations.
292
*/
293
static void radix_clear_slb(void)
294
{
295
int i;
296
297
for (i = 0; i < 4; i++)
298
clear_slb_entry(i);
299
}
300
301
static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
302
{
303
struct kvm_nested_guest *nested = vcpu->arch.nested;
304
u32 lpid;
305
u32 pid;
306
307
lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
308
pid = kvmppc_get_pid(vcpu);
309
310
/*
311
* Prior memory accesses to host PID Q3 must be completed before we
312
* start switching, and stores must be drained to avoid not-my-LPAR
313
* logic (see switch_mmu_to_host).
314
*/
315
asm volatile("hwsync" ::: "memory");
316
isync();
317
mtspr(SPRN_LPID, lpid);
318
mtspr(SPRN_LPCR, lpcr);
319
mtspr(SPRN_PID, pid);
320
/*
321
* isync not required here because we are HRFID'ing to guest before
322
* any guest context access, which is context synchronising.
323
*/
324
}
325
326
static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
327
{
328
u32 lpid;
329
u32 pid;
330
int i;
331
332
lpid = kvm->arch.lpid;
333
pid = kvmppc_get_pid(vcpu);
334
335
/*
336
* See switch_mmu_to_guest_radix. ptesync should not be required here
337
* even if the host is in HPT mode because speculative accesses would
338
* not cause RC updates (we are in real mode).
339
*/
340
asm volatile("hwsync" ::: "memory");
341
isync();
342
mtspr(SPRN_LPID, lpid);
343
mtspr(SPRN_LPCR, lpcr);
344
mtspr(SPRN_PID, pid);
345
346
for (i = 0; i < vcpu->arch.slb_max; i++)
347
mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
348
/*
349
* isync not required here, see switch_mmu_to_guest_radix.
350
*/
351
}
352
353
static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
354
{
355
u32 lpid = kvm->arch.host_lpid;
356
u64 lpcr = kvm->arch.host_lpcr;
357
358
/*
359
* The guest has exited, so guest MMU context is no longer being
360
* non-speculatively accessed, but a hwsync is needed before the
361
* mtLPIDR / mtPIDR switch, in order to ensure all stores are drained,
362
* so the not-my-LPAR tlbie logic does not overlook them.
363
*/
364
asm volatile("hwsync" ::: "memory");
365
isync();
366
mtspr(SPRN_PID, pid);
367
mtspr(SPRN_LPID, lpid);
368
mtspr(SPRN_LPCR, lpcr);
369
/*
370
* isync is not required after the switch, because mtmsrd with L=0
371
* is performed after this switch, which is context synchronising.
372
*/
373
374
if (!radix_enabled())
375
slb_restore_bolted_realmode();
376
}
377
378
static void save_clear_host_mmu(struct kvm *kvm)
379
{
380
if (!radix_enabled()) {
381
/*
382
* Hash host could save and restore host SLB entries to
383
* reduce SLB fault overheads of VM exits, but for now the
384
* existing code clears all entries and restores just the
385
* bolted ones when switching back to host.
386
*/
387
slb_clear_invalidate_partition();
388
}
389
}
390
391
static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
392
{
393
if (kvm_is_radix(kvm)) {
394
radix_clear_slb();
395
} else {
396
int i;
397
int nr = 0;
398
399
/*
400
* This must run before switching to host (radix host can't
401
* access all SLBs).
402
*/
403
for (i = 0; i < vcpu->arch.slb_nr; i++) {
404
u64 slbee, slbev;
405
406
slbee = mfslbe(i);
407
if (slbee & SLB_ESID_V) {
408
slbev = mfslbv(i);
409
vcpu->arch.slb[nr].orige = slbee | i;
410
vcpu->arch.slb[nr].origv = slbev;
411
nr++;
412
}
413
}
414
vcpu->arch.slb_max = nr;
415
slb_clear_invalidate_partition();
416
}
417
}
418
419
static void flush_guest_tlb(struct kvm *kvm)
420
{
421
unsigned long rb, set;
422
423
rb = PPC_BIT(52); /* IS = 2 */
424
if (kvm_is_radix(kvm)) {
425
/* R=1 PRS=1 RIC=2 */
426
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
427
: : "r" (rb), "i" (1), "i" (1), "i" (2),
428
"r" (0) : "memory");
429
for (set = 1; set < kvm->arch.tlb_sets; ++set) {
430
rb += PPC_BIT(51); /* increment set number */
431
/* R=1 PRS=1 RIC=0 */
432
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
433
: : "r" (rb), "i" (1), "i" (1), "i" (0),
434
"r" (0) : "memory");
435
}
436
asm volatile("ptesync": : :"memory");
437
// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
438
asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
439
} else {
440
for (set = 0; set < kvm->arch.tlb_sets; ++set) {
441
/* R=0 PRS=0 RIC=0 */
442
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
443
: : "r" (rb), "i" (0), "i" (0), "i" (0),
444
"r" (0) : "memory");
445
rb += PPC_BIT(51); /* increment set number */
446
}
447
asm volatile("ptesync": : :"memory");
448
// POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
449
asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
450
}
451
}
452
453
static void check_need_tlb_flush(struct kvm *kvm, int pcpu,
454
struct kvm_nested_guest *nested)
455
{
456
cpumask_t *need_tlb_flush;
457
bool all_set = true;
458
int i;
459
460
if (nested)
461
need_tlb_flush = &nested->need_tlb_flush;
462
else
463
need_tlb_flush = &kvm->arch.need_tlb_flush;
464
465
if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush)))
466
return;
467
468
/*
469
* Individual threads can come in here, but the TLB is shared between
470
* the 4 threads in a core, hence invalidating on one thread
471
* invalidates for all, so only invalidate the first time (if all bits
472
* were set. The others must still execute a ptesync.
473
*
474
* If a race occurs and two threads do the TLB flush, that is not a
475
* problem, just sub-optimal.
476
*/
477
for (i = cpu_first_tlb_thread_sibling(pcpu);
478
i <= cpu_last_tlb_thread_sibling(pcpu);
479
i += cpu_tlb_thread_sibling_step()) {
480
if (!cpumask_test_cpu(i, need_tlb_flush)) {
481
all_set = false;
482
break;
483
}
484
}
485
if (all_set)
486
flush_guest_tlb(kvm);
487
else
488
asm volatile("ptesync" ::: "memory");
489
490
/* Clear the bit after the TLB flush */
491
cpumask_clear_cpu(pcpu, need_tlb_flush);
492
}
493
494
unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr)
495
{
496
unsigned long msr_needed = 0;
497
498
msr &= ~MSR_EE;
499
500
/* MSR bits may have been cleared by context switch so must recheck */
501
if (IS_ENABLED(CONFIG_PPC_FPU))
502
msr_needed |= MSR_FP;
503
if (cpu_has_feature(CPU_FTR_ALTIVEC))
504
msr_needed |= MSR_VEC;
505
if (cpu_has_feature(CPU_FTR_VSX))
506
msr_needed |= MSR_VSX;
507
if ((cpu_has_feature(CPU_FTR_TM) ||
508
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
509
(vcpu->arch.hfscr & HFSCR_TM))
510
msr_needed |= MSR_TM;
511
512
/*
513
* This could be combined with MSR[RI] clearing, but that expands
514
* the unrecoverable window. It would be better to cover unrecoverable
515
* with KVM bad interrupt handling rather than use MSR[RI] at all.
516
*
517
* Much more difficult and less worthwhile to combine with IR/DR
518
* disable.
519
*/
520
if ((msr & msr_needed) != msr_needed) {
521
msr |= msr_needed;
522
__mtmsrd(msr, 0);
523
} else {
524
__hard_irq_disable();
525
}
526
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
527
528
return msr;
529
}
530
EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities);
531
532
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
533
{
534
struct p9_host_os_sprs host_os_sprs;
535
struct kvm *kvm = vcpu->kvm;
536
struct kvm_nested_guest *nested = vcpu->arch.nested;
537
struct kvmppc_vcore *vc = vcpu->arch.vcore;
538
s64 hdec, dec;
539
u64 purr, spurr;
540
u64 *exsave;
541
int trap;
542
unsigned long msr;
543
unsigned long host_hfscr;
544
unsigned long host_ciabr;
545
unsigned long host_dawr0;
546
unsigned long host_dawrx0;
547
unsigned long host_psscr;
548
unsigned long host_hpsscr;
549
unsigned long host_pidr;
550
unsigned long host_dawr1;
551
unsigned long host_dawrx1;
552
unsigned long dpdes;
553
554
hdec = time_limit - *tb;
555
if (hdec < 0)
556
return BOOK3S_INTERRUPT_HV_DECREMENTER;
557
558
WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
559
WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
560
561
vcpu->arch.ceded = 0;
562
563
/* Save MSR for restore, with EE clear. */
564
msr = mfmsr() & ~MSR_EE;
565
566
host_hfscr = mfspr(SPRN_HFSCR);
567
host_ciabr = mfspr(SPRN_CIABR);
568
host_psscr = mfspr(SPRN_PSSCR_PR);
569
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
570
host_hpsscr = mfspr(SPRN_PSSCR);
571
host_pidr = mfspr(SPRN_PID);
572
573
if (dawr_enabled()) {
574
host_dawr0 = mfspr(SPRN_DAWR0);
575
host_dawrx0 = mfspr(SPRN_DAWRX0);
576
if (cpu_has_feature(CPU_FTR_DAWR1)) {
577
host_dawr1 = mfspr(SPRN_DAWR1);
578
host_dawrx1 = mfspr(SPRN_DAWRX1);
579
}
580
}
581
582
local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
583
local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
584
585
save_p9_host_os_sprs(&host_os_sprs);
586
587
msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
588
if (lazy_irq_pending()) {
589
trap = 0;
590
goto out;
591
}
592
593
if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
594
msr = mfmsr(); /* MSR may have been updated */
595
596
if (vc->tb_offset) {
597
u64 new_tb = *tb + vc->tb_offset;
598
mtspr(SPRN_TBU40, new_tb);
599
if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
600
new_tb += 0x1000000;
601
mtspr(SPRN_TBU40, new_tb);
602
}
603
*tb = new_tb;
604
vc->tb_offset_applied = vc->tb_offset;
605
}
606
607
mtspr(SPRN_VTB, vc->vtb);
608
mtspr(SPRN_PURR, vcpu->arch.purr);
609
mtspr(SPRN_SPURR, vcpu->arch.spurr);
610
611
if (vc->pcr)
612
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
613
if (vcpu->arch.doorbell_request) {
614
vcpu->arch.doorbell_request = 0;
615
mtspr(SPRN_DPDES, 1);
616
}
617
618
if (dawr_enabled()) {
619
if (vcpu->arch.dawr0 != host_dawr0)
620
mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
621
if (vcpu->arch.dawrx0 != host_dawrx0)
622
mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
623
if (cpu_has_feature(CPU_FTR_DAWR1)) {
624
if (vcpu->arch.dawr1 != host_dawr1)
625
mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
626
if (vcpu->arch.dawrx1 != host_dawrx1)
627
mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
628
}
629
}
630
if (vcpu->arch.ciabr != host_ciabr)
631
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
632
633
634
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
635
mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
636
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
637
} else {
638
if (vcpu->arch.psscr != host_psscr)
639
mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
640
}
641
642
mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
643
644
mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
645
mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
646
647
/*
648
* On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
649
* Interrupt (HDSI) the HDSISR is not be updated at all.
650
*
651
* To work around this we put a canary value into the HDSISR before
652
* returning to a guest and then check for this canary when we take a
653
* HDSI. If we find the canary on a HDSI, we know the hardware didn't
654
* update the HDSISR. In this case we return to the guest to retake the
655
* HDSI which should correctly update the HDSISR the second time HDSI
656
* entry.
657
*
658
* The "radix prefetch bug" test can be used to test for this bug, as
659
* it also exists fo DD2.1 and below.
660
*/
661
if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
662
mtspr(SPRN_HDSISR, HDSISR_CANARY);
663
664
mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
665
mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
666
mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
667
mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
668
669
/*
670
* It might be preferable to load_vcpu_state here, in order to get the
671
* GPR/FP register loads executing in parallel with the previous mtSPR
672
* instructions, but for now that can't be done because the TM handling
673
* in load_vcpu_state can change some SPRs and vcpu state (nip, msr).
674
* But TM could be split out if this would be a significant benefit.
675
*/
676
677
/*
678
* MSR[RI] does not need to be cleared (and is not, for radix guests
679
* with no prefetch bug), because in_guest is set. If we take a SRESET
680
* or MCE with in_guest set but still in HV mode, then
681
* kvmppc_p9_bad_interrupt handles the interrupt, which effectively
682
* clears MSR[RI] and doesn't return.
683
*/
684
WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9);
685
barrier(); /* Open in_guest critical section */
686
687
/*
688
* Hash host, hash guest, or radix guest with prefetch bug, all have
689
* to disable the MMU before switching to guest MMU state.
690
*/
691
if (!radix_enabled() || !kvm_is_radix(kvm) ||
692
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
693
__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
694
695
save_clear_host_mmu(kvm);
696
697
if (kvm_is_radix(kvm))
698
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
699
else
700
switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
701
702
/* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
703
check_need_tlb_flush(kvm, vc->pcpu, nested);
704
705
/*
706
* P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
707
* so set guest LPCR (with HDICE) before writing HDEC.
708
*/
709
mtspr(SPRN_HDEC, hdec);
710
711
mtspr(SPRN_DEC, vcpu->arch.dec_expires - *tb);
712
713
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
714
tm_return_to_guest:
715
#endif
716
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
717
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
718
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
719
mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
720
721
switch_pmu_to_guest(vcpu, &host_os_sprs);
722
accumulate_time(vcpu, &vcpu->arch.in_guest);
723
724
kvmppc_p9_enter_guest(vcpu);
725
726
accumulate_time(vcpu, &vcpu->arch.guest_exit);
727
switch_pmu_to_host(vcpu, &host_os_sprs);
728
729
/* XXX: Could get these from r11/12 and paca exsave instead */
730
vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
731
vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
732
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
733
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
734
735
/* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
736
trap = local_paca->kvm_hstate.scratch0 & ~0x2;
737
738
if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK))
739
exsave = local_paca->exgen;
740
else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET)
741
exsave = local_paca->exnmi;
742
else /* trap == 0x200 */
743
exsave = local_paca->exmc;
744
745
vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
746
vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
747
748
/*
749
* After reading machine check regs (DAR, DSISR, SRR0/1) and hstate
750
* scratch (which we need to move into exsave to make re-entrant vs
751
* SRESET/MCE), register state is protected from reentrancy. However
752
* timebase, MMU, among other state is still set to guest, so don't
753
* enable MSR[RI] here. It gets enabled at the end, after in_guest
754
* is cleared.
755
*
756
* It is possible an NMI could come in here, which is why it is
757
* important to save the above state early so it can be debugged.
758
*/
759
760
vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
761
vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
762
vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
763
vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
764
vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
765
vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
766
vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
767
vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
768
769
vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
770
771
if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
772
vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
773
vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
774
kvmppc_realmode_machine_check(vcpu);
775
776
} else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
777
kvmppc_p9_realmode_hmi_handler(vcpu);
778
779
} else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
780
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
781
782
} else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
783
vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
784
vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
785
vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
786
787
} else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
788
vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
789
790
} else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
791
vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
792
793
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
794
/*
795
* Softpatch interrupt for transactional memory emulation cases
796
* on POWER9 DD2.2. This is early in the guest exit path - we
797
* haven't saved registers or done a treclaim yet.
798
*/
799
} else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
800
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
801
802
/*
803
* The cases we want to handle here are those where the guest
804
* is in real suspend mode and is trying to transition to
805
* transactional mode.
806
*/
807
if (!local_paca->kvm_hstate.fake_suspend &&
808
(vcpu->arch.shregs.msr & MSR_TS_S)) {
809
if (kvmhv_p9_tm_emulation_early(vcpu)) {
810
/*
811
* Go straight back into the guest with the
812
* new NIP/MSR as set by TM emulation.
813
*/
814
mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
815
mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
816
goto tm_return_to_guest;
817
}
818
}
819
#endif
820
}
821
822
/* Advance host PURR/SPURR by the amount used by guest */
823
purr = mfspr(SPRN_PURR);
824
spurr = mfspr(SPRN_SPURR);
825
local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr;
826
local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr;
827
vcpu->arch.purr = purr;
828
vcpu->arch.spurr = spurr;
829
830
vcpu->arch.ic = mfspr(SPRN_IC);
831
vcpu->arch.pid = mfspr(SPRN_PID);
832
vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
833
834
vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
835
vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
836
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
837
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
838
839
dpdes = mfspr(SPRN_DPDES);
840
if (dpdes)
841
vcpu->arch.doorbell_request = 1;
842
843
vc->vtb = mfspr(SPRN_VTB);
844
845
dec = mfspr(SPRN_DEC);
846
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
847
dec = (s32) dec;
848
*tb = mftb();
849
vcpu->arch.dec_expires = dec + *tb;
850
851
if (vc->tb_offset_applied) {
852
u64 new_tb = *tb - vc->tb_offset_applied;
853
mtspr(SPRN_TBU40, new_tb);
854
if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
855
new_tb += 0x1000000;
856
mtspr(SPRN_TBU40, new_tb);
857
}
858
*tb = new_tb;
859
vc->tb_offset_applied = 0;
860
}
861
862
save_clear_guest_mmu(kvm, vcpu);
863
switch_mmu_to_host(kvm, host_pidr);
864
865
/*
866
* Enable MSR here in order to have facilities enabled to save
867
* guest registers. This enables MMU (if we were in realmode), so
868
* only switch MMU on after the MMU is switched to host, to avoid
869
* the P9_RADIX_PREFETCH_BUG or hash guest context.
870
*/
871
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
872
vcpu->arch.shregs.msr & MSR_TS_MASK)
873
msr |= MSR_TS_S;
874
__mtmsrd(msr, 0);
875
876
store_vcpu_state(vcpu);
877
878
mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr);
879
mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr);
880
881
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
882
/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
883
mtspr(SPRN_PSSCR, host_hpsscr |
884
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
885
}
886
887
mtspr(SPRN_HFSCR, host_hfscr);
888
if (vcpu->arch.ciabr != host_ciabr)
889
mtspr(SPRN_CIABR, host_ciabr);
890
891
if (dawr_enabled()) {
892
if (vcpu->arch.dawr0 != host_dawr0)
893
mtspr(SPRN_DAWR0, host_dawr0);
894
if (vcpu->arch.dawrx0 != host_dawrx0)
895
mtspr(SPRN_DAWRX0, host_dawrx0);
896
if (cpu_has_feature(CPU_FTR_DAWR1)) {
897
if (vcpu->arch.dawr1 != host_dawr1)
898
mtspr(SPRN_DAWR1, host_dawr1);
899
if (vcpu->arch.dawrx1 != host_dawrx1)
900
mtspr(SPRN_DAWRX1, host_dawrx1);
901
}
902
}
903
904
if (dpdes)
905
mtspr(SPRN_DPDES, 0);
906
if (vc->pcr)
907
mtspr(SPRN_PCR, PCR_MASK);
908
909
/* HDEC must be at least as large as DEC, so decrementer_max fits */
910
mtspr(SPRN_HDEC, decrementer_max);
911
912
timer_rearm_host_dec(*tb);
913
914
restore_p9_host_os_sprs(vcpu, &host_os_sprs);
915
916
barrier(); /* Close in_guest critical section */
917
WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE);
918
/* Interrupts are recoverable at this point */
919
920
/*
921
* cp_abort is required if the processor supports local copy-paste
922
* to clear the copy buffer that was under control of the guest.
923
*/
924
if (cpu_has_feature(CPU_FTR_ARCH_31))
925
asm volatile(PPC_CP_ABORT);
926
927
out:
928
return trap;
929
}
930
EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
931
932