Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/kvm/svm.c
10817 views
1
/*
2
* Kernel-based Virtual Machine driver for Linux
3
*
4
* AMD SVM support
5
*
6
* Copyright (C) 2006 Qumranet, Inc.
7
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
8
*
9
* Authors:
10
* Yaniv Kamay <[email protected]>
11
* Avi Kivity <[email protected]>
12
*
13
* This work is licensed under the terms of the GNU GPL, version 2. See
14
* the COPYING file in the top-level directory.
15
*
16
*/
17
#include <linux/kvm_host.h>
18
19
#include "irq.h"
20
#include "mmu.h"
21
#include "kvm_cache_regs.h"
22
#include "x86.h"
23
24
#include <linux/module.h>
25
#include <linux/kernel.h>
26
#include <linux/vmalloc.h>
27
#include <linux/highmem.h>
28
#include <linux/sched.h>
29
#include <linux/ftrace_event.h>
30
#include <linux/slab.h>
31
32
#include <asm/tlbflush.h>
33
#include <asm/desc.h>
34
#include <asm/kvm_para.h>
35
36
#include <asm/virtext.h>
37
#include "trace.h"
38
39
#define __ex(x) __kvm_handle_fault_on_reboot(x)
40
41
MODULE_AUTHOR("Qumranet");
42
MODULE_LICENSE("GPL");
43
44
#define IOPM_ALLOC_ORDER 2
45
#define MSRPM_ALLOC_ORDER 1
46
47
#define SEG_TYPE_LDT 2
48
#define SEG_TYPE_BUSY_TSS16 3
49
50
#define SVM_FEATURE_NPT (1 << 0)
51
#define SVM_FEATURE_LBRV (1 << 1)
52
#define SVM_FEATURE_SVML (1 << 2)
53
#define SVM_FEATURE_NRIP (1 << 3)
54
#define SVM_FEATURE_TSC_RATE (1 << 4)
55
#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
56
#define SVM_FEATURE_FLUSH_ASID (1 << 6)
57
#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
58
#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
59
60
#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
61
#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
62
#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
63
64
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
65
66
#define TSC_RATIO_RSVD 0xffffff0000000000ULL
67
#define TSC_RATIO_MIN 0x0000000000000001ULL
68
#define TSC_RATIO_MAX 0x000000ffffffffffULL
69
70
static bool erratum_383_found __read_mostly;
71
72
static const u32 host_save_user_msrs[] = {
73
#ifdef CONFIG_X86_64
74
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
75
MSR_FS_BASE,
76
#endif
77
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
78
};
79
80
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
81
82
struct kvm_vcpu;
83
84
struct nested_state {
85
struct vmcb *hsave;
86
u64 hsave_msr;
87
u64 vm_cr_msr;
88
u64 vmcb;
89
90
/* These are the merged vectors */
91
u32 *msrpm;
92
93
/* gpa pointers to the real vectors */
94
u64 vmcb_msrpm;
95
u64 vmcb_iopm;
96
97
/* A VMEXIT is required but not yet emulated */
98
bool exit_required;
99
100
/* cache for intercepts of the guest */
101
u32 intercept_cr;
102
u32 intercept_dr;
103
u32 intercept_exceptions;
104
u64 intercept;
105
106
/* Nested Paging related state */
107
u64 nested_cr3;
108
};
109
110
#define MSRPM_OFFSETS 16
111
static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
112
113
struct vcpu_svm {
114
struct kvm_vcpu vcpu;
115
struct vmcb *vmcb;
116
unsigned long vmcb_pa;
117
struct svm_cpu_data *svm_data;
118
uint64_t asid_generation;
119
uint64_t sysenter_esp;
120
uint64_t sysenter_eip;
121
122
u64 next_rip;
123
124
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
125
struct {
126
u16 fs;
127
u16 gs;
128
u16 ldt;
129
u64 gs_base;
130
} host;
131
132
u32 *msrpm;
133
134
ulong nmi_iret_rip;
135
136
struct nested_state nested;
137
138
bool nmi_singlestep;
139
140
unsigned int3_injected;
141
unsigned long int3_rip;
142
u32 apf_reason;
143
144
u64 tsc_ratio;
145
};
146
147
static DEFINE_PER_CPU(u64, current_tsc_ratio);
148
#define TSC_RATIO_DEFAULT 0x0100000000ULL
149
150
#define MSR_INVALID 0xffffffffU
151
152
static struct svm_direct_access_msrs {
153
u32 index; /* Index of the MSR */
154
bool always; /* True if intercept is always on */
155
} direct_access_msrs[] = {
156
{ .index = MSR_STAR, .always = true },
157
{ .index = MSR_IA32_SYSENTER_CS, .always = true },
158
#ifdef CONFIG_X86_64
159
{ .index = MSR_GS_BASE, .always = true },
160
{ .index = MSR_FS_BASE, .always = true },
161
{ .index = MSR_KERNEL_GS_BASE, .always = true },
162
{ .index = MSR_LSTAR, .always = true },
163
{ .index = MSR_CSTAR, .always = true },
164
{ .index = MSR_SYSCALL_MASK, .always = true },
165
#endif
166
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
167
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
168
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
169
{ .index = MSR_IA32_LASTINTTOIP, .always = false },
170
{ .index = MSR_INVALID, .always = false },
171
};
172
173
/* enable NPT for AMD64 and X86 with PAE */
174
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
175
static bool npt_enabled = true;
176
#else
177
static bool npt_enabled;
178
#endif
179
static int npt = 1;
180
181
module_param(npt, int, S_IRUGO);
182
183
static int nested = 1;
184
module_param(nested, int, S_IRUGO);
185
186
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
187
static void svm_complete_interrupts(struct vcpu_svm *svm);
188
189
static int nested_svm_exit_handled(struct vcpu_svm *svm);
190
static int nested_svm_intercept(struct vcpu_svm *svm);
191
static int nested_svm_vmexit(struct vcpu_svm *svm);
192
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
193
bool has_error_code, u32 error_code);
194
static u64 __scale_tsc(u64 ratio, u64 tsc);
195
196
enum {
197
VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
198
pause filter count */
199
VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */
200
VMCB_ASID, /* ASID */
201
VMCB_INTR, /* int_ctl, int_vector */
202
VMCB_NPT, /* npt_en, nCR3, gPAT */
203
VMCB_CR, /* CR0, CR3, CR4, EFER */
204
VMCB_DR, /* DR6, DR7 */
205
VMCB_DT, /* GDT, IDT */
206
VMCB_SEG, /* CS, DS, SS, ES, CPL */
207
VMCB_CR2, /* CR2 only */
208
VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
209
VMCB_DIRTY_MAX,
210
};
211
212
/* TPR and CR2 are always written before VMRUN */
213
#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
214
215
static inline void mark_all_dirty(struct vmcb *vmcb)
216
{
217
vmcb->control.clean = 0;
218
}
219
220
static inline void mark_all_clean(struct vmcb *vmcb)
221
{
222
vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
223
& ~VMCB_ALWAYS_DIRTY_MASK;
224
}
225
226
static inline void mark_dirty(struct vmcb *vmcb, int bit)
227
{
228
vmcb->control.clean &= ~(1 << bit);
229
}
230
231
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
232
{
233
return container_of(vcpu, struct vcpu_svm, vcpu);
234
}
235
236
static void recalc_intercepts(struct vcpu_svm *svm)
237
{
238
struct vmcb_control_area *c, *h;
239
struct nested_state *g;
240
241
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
242
243
if (!is_guest_mode(&svm->vcpu))
244
return;
245
246
c = &svm->vmcb->control;
247
h = &svm->nested.hsave->control;
248
g = &svm->nested;
249
250
c->intercept_cr = h->intercept_cr | g->intercept_cr;
251
c->intercept_dr = h->intercept_dr | g->intercept_dr;
252
c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
253
c->intercept = h->intercept | g->intercept;
254
}
255
256
static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
257
{
258
if (is_guest_mode(&svm->vcpu))
259
return svm->nested.hsave;
260
else
261
return svm->vmcb;
262
}
263
264
static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
265
{
266
struct vmcb *vmcb = get_host_vmcb(svm);
267
268
vmcb->control.intercept_cr |= (1U << bit);
269
270
recalc_intercepts(svm);
271
}
272
273
static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
274
{
275
struct vmcb *vmcb = get_host_vmcb(svm);
276
277
vmcb->control.intercept_cr &= ~(1U << bit);
278
279
recalc_intercepts(svm);
280
}
281
282
static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
283
{
284
struct vmcb *vmcb = get_host_vmcb(svm);
285
286
return vmcb->control.intercept_cr & (1U << bit);
287
}
288
289
static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
290
{
291
struct vmcb *vmcb = get_host_vmcb(svm);
292
293
vmcb->control.intercept_dr |= (1U << bit);
294
295
recalc_intercepts(svm);
296
}
297
298
static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
299
{
300
struct vmcb *vmcb = get_host_vmcb(svm);
301
302
vmcb->control.intercept_dr &= ~(1U << bit);
303
304
recalc_intercepts(svm);
305
}
306
307
static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
308
{
309
struct vmcb *vmcb = get_host_vmcb(svm);
310
311
vmcb->control.intercept_exceptions |= (1U << bit);
312
313
recalc_intercepts(svm);
314
}
315
316
static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
317
{
318
struct vmcb *vmcb = get_host_vmcb(svm);
319
320
vmcb->control.intercept_exceptions &= ~(1U << bit);
321
322
recalc_intercepts(svm);
323
}
324
325
static inline void set_intercept(struct vcpu_svm *svm, int bit)
326
{
327
struct vmcb *vmcb = get_host_vmcb(svm);
328
329
vmcb->control.intercept |= (1ULL << bit);
330
331
recalc_intercepts(svm);
332
}
333
334
static inline void clr_intercept(struct vcpu_svm *svm, int bit)
335
{
336
struct vmcb *vmcb = get_host_vmcb(svm);
337
338
vmcb->control.intercept &= ~(1ULL << bit);
339
340
recalc_intercepts(svm);
341
}
342
343
static inline void enable_gif(struct vcpu_svm *svm)
344
{
345
svm->vcpu.arch.hflags |= HF_GIF_MASK;
346
}
347
348
static inline void disable_gif(struct vcpu_svm *svm)
349
{
350
svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
351
}
352
353
static inline bool gif_set(struct vcpu_svm *svm)
354
{
355
return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
356
}
357
358
static unsigned long iopm_base;
359
360
struct kvm_ldttss_desc {
361
u16 limit0;
362
u16 base0;
363
unsigned base1:8, type:5, dpl:2, p:1;
364
unsigned limit1:4, zero0:3, g:1, base2:8;
365
u32 base3;
366
u32 zero1;
367
} __attribute__((packed));
368
369
struct svm_cpu_data {
370
int cpu;
371
372
u64 asid_generation;
373
u32 max_asid;
374
u32 next_asid;
375
struct kvm_ldttss_desc *tss_desc;
376
377
struct page *save_area;
378
};
379
380
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
381
382
struct svm_init_data {
383
int cpu;
384
int r;
385
};
386
387
static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
388
389
#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
390
#define MSRS_RANGE_SIZE 2048
391
#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
392
393
static u32 svm_msrpm_offset(u32 msr)
394
{
395
u32 offset;
396
int i;
397
398
for (i = 0; i < NUM_MSR_MAPS; i++) {
399
if (msr < msrpm_ranges[i] ||
400
msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
401
continue;
402
403
offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
404
offset += (i * MSRS_RANGE_SIZE); /* add range offset */
405
406
/* Now we have the u8 offset - but need the u32 offset */
407
return offset / 4;
408
}
409
410
/* MSR not in any range */
411
return MSR_INVALID;
412
}
413
414
#define MAX_INST_SIZE 15
415
416
static inline void clgi(void)
417
{
418
asm volatile (__ex(SVM_CLGI));
419
}
420
421
static inline void stgi(void)
422
{
423
asm volatile (__ex(SVM_STGI));
424
}
425
426
static inline void invlpga(unsigned long addr, u32 asid)
427
{
428
asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
429
}
430
431
static int get_npt_level(void)
432
{
433
#ifdef CONFIG_X86_64
434
return PT64_ROOT_LEVEL;
435
#else
436
return PT32E_ROOT_LEVEL;
437
#endif
438
}
439
440
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
441
{
442
vcpu->arch.efer = efer;
443
if (!npt_enabled && !(efer & EFER_LMA))
444
efer &= ~EFER_LME;
445
446
to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
447
mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
448
}
449
450
static int is_external_interrupt(u32 info)
451
{
452
info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
453
return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
454
}
455
456
static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
457
{
458
struct vcpu_svm *svm = to_svm(vcpu);
459
u32 ret = 0;
460
461
if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
462
ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
463
return ret & mask;
464
}
465
466
static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
467
{
468
struct vcpu_svm *svm = to_svm(vcpu);
469
470
if (mask == 0)
471
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
472
else
473
svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
474
475
}
476
477
static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
478
{
479
struct vcpu_svm *svm = to_svm(vcpu);
480
481
if (svm->vmcb->control.next_rip != 0)
482
svm->next_rip = svm->vmcb->control.next_rip;
483
484
if (!svm->next_rip) {
485
if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
486
EMULATE_DONE)
487
printk(KERN_DEBUG "%s: NOP\n", __func__);
488
return;
489
}
490
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
491
printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
492
__func__, kvm_rip_read(vcpu), svm->next_rip);
493
494
kvm_rip_write(vcpu, svm->next_rip);
495
svm_set_interrupt_shadow(vcpu, 0);
496
}
497
498
static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
499
bool has_error_code, u32 error_code,
500
bool reinject)
501
{
502
struct vcpu_svm *svm = to_svm(vcpu);
503
504
/*
505
* If we are within a nested VM we'd better #VMEXIT and let the guest
506
* handle the exception
507
*/
508
if (!reinject &&
509
nested_svm_check_exception(svm, nr, has_error_code, error_code))
510
return;
511
512
if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
513
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
514
515
/*
516
* For guest debugging where we have to reinject #BP if some
517
* INT3 is guest-owned:
518
* Emulate nRIP by moving RIP forward. Will fail if injection
519
* raises a fault that is not intercepted. Still better than
520
* failing in all cases.
521
*/
522
skip_emulated_instruction(&svm->vcpu);
523
rip = kvm_rip_read(&svm->vcpu);
524
svm->int3_rip = rip + svm->vmcb->save.cs.base;
525
svm->int3_injected = rip - old_rip;
526
}
527
528
svm->vmcb->control.event_inj = nr
529
| SVM_EVTINJ_VALID
530
| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
531
| SVM_EVTINJ_TYPE_EXEPT;
532
svm->vmcb->control.event_inj_err = error_code;
533
}
534
535
static void svm_init_erratum_383(void)
536
{
537
u32 low, high;
538
int err;
539
u64 val;
540
541
if (!cpu_has_amd_erratum(amd_erratum_383))
542
return;
543
544
/* Use _safe variants to not break nested virtualization */
545
val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
546
if (err)
547
return;
548
549
val |= (1ULL << 47);
550
551
low = lower_32_bits(val);
552
high = upper_32_bits(val);
553
554
native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
555
556
erratum_383_found = true;
557
}
558
559
static int has_svm(void)
560
{
561
const char *msg;
562
563
if (!cpu_has_svm(&msg)) {
564
printk(KERN_INFO "has_svm: %s\n", msg);
565
return 0;
566
}
567
568
return 1;
569
}
570
571
static void svm_hardware_disable(void *garbage)
572
{
573
/* Make sure we clean up behind us */
574
if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
575
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
576
577
cpu_svm_disable();
578
}
579
580
static int svm_hardware_enable(void *garbage)
581
{
582
583
struct svm_cpu_data *sd;
584
uint64_t efer;
585
struct desc_ptr gdt_descr;
586
struct desc_struct *gdt;
587
int me = raw_smp_processor_id();
588
589
rdmsrl(MSR_EFER, efer);
590
if (efer & EFER_SVME)
591
return -EBUSY;
592
593
if (!has_svm()) {
594
printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
595
me);
596
return -EINVAL;
597
}
598
sd = per_cpu(svm_data, me);
599
600
if (!sd) {
601
printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
602
me);
603
return -EINVAL;
604
}
605
606
sd->asid_generation = 1;
607
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
608
sd->next_asid = sd->max_asid + 1;
609
610
native_store_gdt(&gdt_descr);
611
gdt = (struct desc_struct *)gdt_descr.address;
612
sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
613
614
wrmsrl(MSR_EFER, efer | EFER_SVME);
615
616
wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
617
618
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
619
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
620
__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
621
}
622
623
svm_init_erratum_383();
624
625
return 0;
626
}
627
628
static void svm_cpu_uninit(int cpu)
629
{
630
struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
631
632
if (!sd)
633
return;
634
635
per_cpu(svm_data, raw_smp_processor_id()) = NULL;
636
__free_page(sd->save_area);
637
kfree(sd);
638
}
639
640
static int svm_cpu_init(int cpu)
641
{
642
struct svm_cpu_data *sd;
643
int r;
644
645
sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
646
if (!sd)
647
return -ENOMEM;
648
sd->cpu = cpu;
649
sd->save_area = alloc_page(GFP_KERNEL);
650
r = -ENOMEM;
651
if (!sd->save_area)
652
goto err_1;
653
654
per_cpu(svm_data, cpu) = sd;
655
656
return 0;
657
658
err_1:
659
kfree(sd);
660
return r;
661
662
}
663
664
static bool valid_msr_intercept(u32 index)
665
{
666
int i;
667
668
for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
669
if (direct_access_msrs[i].index == index)
670
return true;
671
672
return false;
673
}
674
675
static void set_msr_interception(u32 *msrpm, unsigned msr,
676
int read, int write)
677
{
678
u8 bit_read, bit_write;
679
unsigned long tmp;
680
u32 offset;
681
682
/*
683
* If this warning triggers extend the direct_access_msrs list at the
684
* beginning of the file
685
*/
686
WARN_ON(!valid_msr_intercept(msr));
687
688
offset = svm_msrpm_offset(msr);
689
bit_read = 2 * (msr & 0x0f);
690
bit_write = 2 * (msr & 0x0f) + 1;
691
tmp = msrpm[offset];
692
693
BUG_ON(offset == MSR_INVALID);
694
695
read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
696
write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
697
698
msrpm[offset] = tmp;
699
}
700
701
static void svm_vcpu_init_msrpm(u32 *msrpm)
702
{
703
int i;
704
705
memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
706
707
for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
708
if (!direct_access_msrs[i].always)
709
continue;
710
711
set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
712
}
713
}
714
715
static void add_msr_offset(u32 offset)
716
{
717
int i;
718
719
for (i = 0; i < MSRPM_OFFSETS; ++i) {
720
721
/* Offset already in list? */
722
if (msrpm_offsets[i] == offset)
723
return;
724
725
/* Slot used by another offset? */
726
if (msrpm_offsets[i] != MSR_INVALID)
727
continue;
728
729
/* Add offset to list */
730
msrpm_offsets[i] = offset;
731
732
return;
733
}
734
735
/*
736
* If this BUG triggers the msrpm_offsets table has an overflow. Just
737
* increase MSRPM_OFFSETS in this case.
738
*/
739
BUG();
740
}
741
742
static void init_msrpm_offsets(void)
743
{
744
int i;
745
746
memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
747
748
for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
749
u32 offset;
750
751
offset = svm_msrpm_offset(direct_access_msrs[i].index);
752
BUG_ON(offset == MSR_INVALID);
753
754
add_msr_offset(offset);
755
}
756
}
757
758
static void svm_enable_lbrv(struct vcpu_svm *svm)
759
{
760
u32 *msrpm = svm->msrpm;
761
762
svm->vmcb->control.lbr_ctl = 1;
763
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
764
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
765
set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
766
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
767
}
768
769
static void svm_disable_lbrv(struct vcpu_svm *svm)
770
{
771
u32 *msrpm = svm->msrpm;
772
773
svm->vmcb->control.lbr_ctl = 0;
774
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
775
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
776
set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
777
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
778
}
779
780
static __init int svm_hardware_setup(void)
781
{
782
int cpu;
783
struct page *iopm_pages;
784
void *iopm_va;
785
int r;
786
787
iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
788
789
if (!iopm_pages)
790
return -ENOMEM;
791
792
iopm_va = page_address(iopm_pages);
793
memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
794
iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
795
796
init_msrpm_offsets();
797
798
if (boot_cpu_has(X86_FEATURE_NX))
799
kvm_enable_efer_bits(EFER_NX);
800
801
if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
802
kvm_enable_efer_bits(EFER_FFXSR);
803
804
if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
805
u64 max;
806
807
kvm_has_tsc_control = true;
808
809
/*
810
* Make sure the user can only configure tsc_khz values that
811
* fit into a signed integer.
812
* A min value is not calculated needed because it will always
813
* be 1 on all machines and a value of 0 is used to disable
814
* tsc-scaling for the vcpu.
815
*/
816
max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
817
818
kvm_max_guest_tsc_khz = max;
819
}
820
821
if (nested) {
822
printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
823
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
824
}
825
826
for_each_possible_cpu(cpu) {
827
r = svm_cpu_init(cpu);
828
if (r)
829
goto err;
830
}
831
832
if (!boot_cpu_has(X86_FEATURE_NPT))
833
npt_enabled = false;
834
835
if (npt_enabled && !npt) {
836
printk(KERN_INFO "kvm: Nested Paging disabled\n");
837
npt_enabled = false;
838
}
839
840
if (npt_enabled) {
841
printk(KERN_INFO "kvm: Nested Paging enabled\n");
842
kvm_enable_tdp();
843
} else
844
kvm_disable_tdp();
845
846
return 0;
847
848
err:
849
__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
850
iopm_base = 0;
851
return r;
852
}
853
854
static __exit void svm_hardware_unsetup(void)
855
{
856
int cpu;
857
858
for_each_possible_cpu(cpu)
859
svm_cpu_uninit(cpu);
860
861
__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
862
iopm_base = 0;
863
}
864
865
static void init_seg(struct vmcb_seg *seg)
866
{
867
seg->selector = 0;
868
seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
869
SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
870
seg->limit = 0xffff;
871
seg->base = 0;
872
}
873
874
static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
875
{
876
seg->selector = 0;
877
seg->attrib = SVM_SELECTOR_P_MASK | type;
878
seg->limit = 0xffff;
879
seg->base = 0;
880
}
881
882
static u64 __scale_tsc(u64 ratio, u64 tsc)
883
{
884
u64 mult, frac, _tsc;
885
886
mult = ratio >> 32;
887
frac = ratio & ((1ULL << 32) - 1);
888
889
_tsc = tsc;
890
_tsc *= mult;
891
_tsc += (tsc >> 32) * frac;
892
_tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
893
894
return _tsc;
895
}
896
897
static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
898
{
899
struct vcpu_svm *svm = to_svm(vcpu);
900
u64 _tsc = tsc;
901
902
if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
903
_tsc = __scale_tsc(svm->tsc_ratio, tsc);
904
905
return _tsc;
906
}
907
908
static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
909
{
910
struct vcpu_svm *svm = to_svm(vcpu);
911
u64 ratio;
912
u64 khz;
913
914
/* TSC scaling supported? */
915
if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
916
return;
917
918
/* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
919
if (user_tsc_khz == 0) {
920
vcpu->arch.virtual_tsc_khz = 0;
921
svm->tsc_ratio = TSC_RATIO_DEFAULT;
922
return;
923
}
924
925
khz = user_tsc_khz;
926
927
/* TSC scaling required - calculate ratio */
928
ratio = khz << 32;
929
do_div(ratio, tsc_khz);
930
931
if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
932
WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
933
user_tsc_khz);
934
return;
935
}
936
vcpu->arch.virtual_tsc_khz = user_tsc_khz;
937
svm->tsc_ratio = ratio;
938
}
939
940
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
941
{
942
struct vcpu_svm *svm = to_svm(vcpu);
943
u64 g_tsc_offset = 0;
944
945
if (is_guest_mode(vcpu)) {
946
g_tsc_offset = svm->vmcb->control.tsc_offset -
947
svm->nested.hsave->control.tsc_offset;
948
svm->nested.hsave->control.tsc_offset = offset;
949
}
950
951
svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
952
953
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
954
}
955
956
static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
957
{
958
struct vcpu_svm *svm = to_svm(vcpu);
959
960
svm->vmcb->control.tsc_offset += adjustment;
961
if (is_guest_mode(vcpu))
962
svm->nested.hsave->control.tsc_offset += adjustment;
963
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
964
}
965
966
static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
967
{
968
u64 tsc;
969
970
tsc = svm_scale_tsc(vcpu, native_read_tsc());
971
972
return target_tsc - tsc;
973
}
974
975
static void init_vmcb(struct vcpu_svm *svm)
976
{
977
struct vmcb_control_area *control = &svm->vmcb->control;
978
struct vmcb_save_area *save = &svm->vmcb->save;
979
980
svm->vcpu.fpu_active = 1;
981
svm->vcpu.arch.hflags = 0;
982
983
set_cr_intercept(svm, INTERCEPT_CR0_READ);
984
set_cr_intercept(svm, INTERCEPT_CR3_READ);
985
set_cr_intercept(svm, INTERCEPT_CR4_READ);
986
set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
987
set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
988
set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
989
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
990
991
set_dr_intercept(svm, INTERCEPT_DR0_READ);
992
set_dr_intercept(svm, INTERCEPT_DR1_READ);
993
set_dr_intercept(svm, INTERCEPT_DR2_READ);
994
set_dr_intercept(svm, INTERCEPT_DR3_READ);
995
set_dr_intercept(svm, INTERCEPT_DR4_READ);
996
set_dr_intercept(svm, INTERCEPT_DR5_READ);
997
set_dr_intercept(svm, INTERCEPT_DR6_READ);
998
set_dr_intercept(svm, INTERCEPT_DR7_READ);
999
1000
set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
1001
set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
1002
set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
1003
set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
1004
set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
1005
set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
1006
set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
1007
set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
1008
1009
set_exception_intercept(svm, PF_VECTOR);
1010
set_exception_intercept(svm, UD_VECTOR);
1011
set_exception_intercept(svm, MC_VECTOR);
1012
1013
set_intercept(svm, INTERCEPT_INTR);
1014
set_intercept(svm, INTERCEPT_NMI);
1015
set_intercept(svm, INTERCEPT_SMI);
1016
set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1017
set_intercept(svm, INTERCEPT_CPUID);
1018
set_intercept(svm, INTERCEPT_INVD);
1019
set_intercept(svm, INTERCEPT_HLT);
1020
set_intercept(svm, INTERCEPT_INVLPG);
1021
set_intercept(svm, INTERCEPT_INVLPGA);
1022
set_intercept(svm, INTERCEPT_IOIO_PROT);
1023
set_intercept(svm, INTERCEPT_MSR_PROT);
1024
set_intercept(svm, INTERCEPT_TASK_SWITCH);
1025
set_intercept(svm, INTERCEPT_SHUTDOWN);
1026
set_intercept(svm, INTERCEPT_VMRUN);
1027
set_intercept(svm, INTERCEPT_VMMCALL);
1028
set_intercept(svm, INTERCEPT_VMLOAD);
1029
set_intercept(svm, INTERCEPT_VMSAVE);
1030
set_intercept(svm, INTERCEPT_STGI);
1031
set_intercept(svm, INTERCEPT_CLGI);
1032
set_intercept(svm, INTERCEPT_SKINIT);
1033
set_intercept(svm, INTERCEPT_WBINVD);
1034
set_intercept(svm, INTERCEPT_MONITOR);
1035
set_intercept(svm, INTERCEPT_MWAIT);
1036
set_intercept(svm, INTERCEPT_XSETBV);
1037
1038
control->iopm_base_pa = iopm_base;
1039
control->msrpm_base_pa = __pa(svm->msrpm);
1040
control->int_ctl = V_INTR_MASKING_MASK;
1041
1042
init_seg(&save->es);
1043
init_seg(&save->ss);
1044
init_seg(&save->ds);
1045
init_seg(&save->fs);
1046
init_seg(&save->gs);
1047
1048
save->cs.selector = 0xf000;
1049
/* Executable/Readable Code Segment */
1050
save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1051
SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1052
save->cs.limit = 0xffff;
1053
/*
1054
* cs.base should really be 0xffff0000, but vmx can't handle that, so
1055
* be consistent with it.
1056
*
1057
* Replace when we have real mode working for vmx.
1058
*/
1059
save->cs.base = 0xf0000;
1060
1061
save->gdtr.limit = 0xffff;
1062
save->idtr.limit = 0xffff;
1063
1064
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1065
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1066
1067
svm_set_efer(&svm->vcpu, 0);
1068
save->dr6 = 0xffff0ff0;
1069
save->dr7 = 0x400;
1070
kvm_set_rflags(&svm->vcpu, 2);
1071
save->rip = 0x0000fff0;
1072
svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1073
1074
/*
1075
* This is the guest-visible cr0 value.
1076
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
1077
*/
1078
svm->vcpu.arch.cr0 = 0;
1079
(void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1080
1081
save->cr4 = X86_CR4_PAE;
1082
/* rdx = ?? */
1083
1084
if (npt_enabled) {
1085
/* Setup VMCB for Nested Paging */
1086
control->nested_ctl = 1;
1087
clr_intercept(svm, INTERCEPT_TASK_SWITCH);
1088
clr_intercept(svm, INTERCEPT_INVLPG);
1089
clr_exception_intercept(svm, PF_VECTOR);
1090
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1091
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1092
save->g_pat = 0x0007040600070406ULL;
1093
save->cr3 = 0;
1094
save->cr4 = 0;
1095
}
1096
svm->asid_generation = 0;
1097
1098
svm->nested.vmcb = 0;
1099
svm->vcpu.arch.hflags = 0;
1100
1101
if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1102
control->pause_filter_count = 3000;
1103
set_intercept(svm, INTERCEPT_PAUSE);
1104
}
1105
1106
mark_all_dirty(svm->vmcb);
1107
1108
enable_gif(svm);
1109
}
1110
1111
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
1112
{
1113
struct vcpu_svm *svm = to_svm(vcpu);
1114
1115
init_vmcb(svm);
1116
1117
if (!kvm_vcpu_is_bsp(vcpu)) {
1118
kvm_rip_write(vcpu, 0);
1119
svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
1120
svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
1121
}
1122
vcpu->arch.regs_avail = ~0;
1123
vcpu->arch.regs_dirty = ~0;
1124
1125
return 0;
1126
}
1127
1128
static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1129
{
1130
struct vcpu_svm *svm;
1131
struct page *page;
1132
struct page *msrpm_pages;
1133
struct page *hsave_page;
1134
struct page *nested_msrpm_pages;
1135
int err;
1136
1137
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1138
if (!svm) {
1139
err = -ENOMEM;
1140
goto out;
1141
}
1142
1143
svm->tsc_ratio = TSC_RATIO_DEFAULT;
1144
1145
err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1146
if (err)
1147
goto free_svm;
1148
1149
err = -ENOMEM;
1150
page = alloc_page(GFP_KERNEL);
1151
if (!page)
1152
goto uninit;
1153
1154
msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1155
if (!msrpm_pages)
1156
goto free_page1;
1157
1158
nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1159
if (!nested_msrpm_pages)
1160
goto free_page2;
1161
1162
hsave_page = alloc_page(GFP_KERNEL);
1163
if (!hsave_page)
1164
goto free_page3;
1165
1166
svm->nested.hsave = page_address(hsave_page);
1167
1168
svm->msrpm = page_address(msrpm_pages);
1169
svm_vcpu_init_msrpm(svm->msrpm);
1170
1171
svm->nested.msrpm = page_address(nested_msrpm_pages);
1172
svm_vcpu_init_msrpm(svm->nested.msrpm);
1173
1174
svm->vmcb = page_address(page);
1175
clear_page(svm->vmcb);
1176
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
1177
svm->asid_generation = 0;
1178
init_vmcb(svm);
1179
kvm_write_tsc(&svm->vcpu, 0);
1180
1181
err = fx_init(&svm->vcpu);
1182
if (err)
1183
goto free_page4;
1184
1185
svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1186
if (kvm_vcpu_is_bsp(&svm->vcpu))
1187
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1188
1189
return &svm->vcpu;
1190
1191
free_page4:
1192
__free_page(hsave_page);
1193
free_page3:
1194
__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1195
free_page2:
1196
__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
1197
free_page1:
1198
__free_page(page);
1199
uninit:
1200
kvm_vcpu_uninit(&svm->vcpu);
1201
free_svm:
1202
kmem_cache_free(kvm_vcpu_cache, svm);
1203
out:
1204
return ERR_PTR(err);
1205
}
1206
1207
static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1208
{
1209
struct vcpu_svm *svm = to_svm(vcpu);
1210
1211
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
1212
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
1213
__free_page(virt_to_page(svm->nested.hsave));
1214
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
1215
kvm_vcpu_uninit(vcpu);
1216
kmem_cache_free(kvm_vcpu_cache, svm);
1217
}
1218
1219
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1220
{
1221
struct vcpu_svm *svm = to_svm(vcpu);
1222
int i;
1223
1224
if (unlikely(cpu != vcpu->cpu)) {
1225
svm->asid_generation = 0;
1226
mark_all_dirty(svm->vmcb);
1227
}
1228
1229
#ifdef CONFIG_X86_64
1230
rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
1231
#endif
1232
savesegment(fs, svm->host.fs);
1233
savesegment(gs, svm->host.gs);
1234
svm->host.ldt = kvm_read_ldt();
1235
1236
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1237
rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1238
1239
if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
1240
svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) {
1241
__get_cpu_var(current_tsc_ratio) = svm->tsc_ratio;
1242
wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
1243
}
1244
}
1245
1246
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1247
{
1248
struct vcpu_svm *svm = to_svm(vcpu);
1249
int i;
1250
1251
++vcpu->stat.host_state_reload;
1252
kvm_load_ldt(svm->host.ldt);
1253
#ifdef CONFIG_X86_64
1254
loadsegment(fs, svm->host.fs);
1255
wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
1256
load_gs_index(svm->host.gs);
1257
#else
1258
#ifdef CONFIG_X86_32_LAZY_GS
1259
loadsegment(gs, svm->host.gs);
1260
#endif
1261
#endif
1262
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1263
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1264
}
1265
1266
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1267
{
1268
return to_svm(vcpu)->vmcb->save.rflags;
1269
}
1270
1271
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1272
{
1273
to_svm(vcpu)->vmcb->save.rflags = rflags;
1274
}
1275
1276
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1277
{
1278
switch (reg) {
1279
case VCPU_EXREG_PDPTR:
1280
BUG_ON(!npt_enabled);
1281
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
1282
break;
1283
default:
1284
BUG();
1285
}
1286
}
1287
1288
static void svm_set_vintr(struct vcpu_svm *svm)
1289
{
1290
set_intercept(svm, INTERCEPT_VINTR);
1291
}
1292
1293
static void svm_clear_vintr(struct vcpu_svm *svm)
1294
{
1295
clr_intercept(svm, INTERCEPT_VINTR);
1296
}
1297
1298
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1299
{
1300
struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1301
1302
switch (seg) {
1303
case VCPU_SREG_CS: return &save->cs;
1304
case VCPU_SREG_DS: return &save->ds;
1305
case VCPU_SREG_ES: return &save->es;
1306
case VCPU_SREG_FS: return &save->fs;
1307
case VCPU_SREG_GS: return &save->gs;
1308
case VCPU_SREG_SS: return &save->ss;
1309
case VCPU_SREG_TR: return &save->tr;
1310
case VCPU_SREG_LDTR: return &save->ldtr;
1311
}
1312
BUG();
1313
return NULL;
1314
}
1315
1316
static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1317
{
1318
struct vmcb_seg *s = svm_seg(vcpu, seg);
1319
1320
return s->base;
1321
}
1322
1323
static void svm_get_segment(struct kvm_vcpu *vcpu,
1324
struct kvm_segment *var, int seg)
1325
{
1326
struct vmcb_seg *s = svm_seg(vcpu, seg);
1327
1328
var->base = s->base;
1329
var->limit = s->limit;
1330
var->selector = s->selector;
1331
var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1332
var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1333
var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1334
var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1335
var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1336
var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1337
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1338
var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
1339
1340
/*
1341
* AMD's VMCB does not have an explicit unusable field, so emulate it
1342
* for cross vendor migration purposes by "not present"
1343
*/
1344
var->unusable = !var->present || (var->type == 0);
1345
1346
switch (seg) {
1347
case VCPU_SREG_CS:
1348
/*
1349
* SVM always stores 0 for the 'G' bit in the CS selector in
1350
* the VMCB on a VMEXIT. This hurts cross-vendor migration:
1351
* Intel's VMENTRY has a check on the 'G' bit.
1352
*/
1353
var->g = s->limit > 0xfffff;
1354
break;
1355
case VCPU_SREG_TR:
1356
/*
1357
* Work around a bug where the busy flag in the tr selector
1358
* isn't exposed
1359
*/
1360
var->type |= 0x2;
1361
break;
1362
case VCPU_SREG_DS:
1363
case VCPU_SREG_ES:
1364
case VCPU_SREG_FS:
1365
case VCPU_SREG_GS:
1366
/*
1367
* The accessed bit must always be set in the segment
1368
* descriptor cache, although it can be cleared in the
1369
* descriptor, the cached bit always remains at 1. Since
1370
* Intel has a check on this, set it here to support
1371
* cross-vendor migration.
1372
*/
1373
if (!var->unusable)
1374
var->type |= 0x1;
1375
break;
1376
case VCPU_SREG_SS:
1377
/*
1378
* On AMD CPUs sometimes the DB bit in the segment
1379
* descriptor is left as 1, although the whole segment has
1380
* been made unusable. Clear it here to pass an Intel VMX
1381
* entry check when cross vendor migrating.
1382
*/
1383
if (var->unusable)
1384
var->db = 0;
1385
break;
1386
}
1387
}
1388
1389
static int svm_get_cpl(struct kvm_vcpu *vcpu)
1390
{
1391
struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1392
1393
return save->cpl;
1394
}
1395
1396
static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1397
{
1398
struct vcpu_svm *svm = to_svm(vcpu);
1399
1400
dt->size = svm->vmcb->save.idtr.limit;
1401
dt->address = svm->vmcb->save.idtr.base;
1402
}
1403
1404
static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1405
{
1406
struct vcpu_svm *svm = to_svm(vcpu);
1407
1408
svm->vmcb->save.idtr.limit = dt->size;
1409
svm->vmcb->save.idtr.base = dt->address ;
1410
mark_dirty(svm->vmcb, VMCB_DT);
1411
}
1412
1413
static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1414
{
1415
struct vcpu_svm *svm = to_svm(vcpu);
1416
1417
dt->size = svm->vmcb->save.gdtr.limit;
1418
dt->address = svm->vmcb->save.gdtr.base;
1419
}
1420
1421
static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1422
{
1423
struct vcpu_svm *svm = to_svm(vcpu);
1424
1425
svm->vmcb->save.gdtr.limit = dt->size;
1426
svm->vmcb->save.gdtr.base = dt->address ;
1427
mark_dirty(svm->vmcb, VMCB_DT);
1428
}
1429
1430
static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1431
{
1432
}
1433
1434
static void svm_decache_cr3(struct kvm_vcpu *vcpu)
1435
{
1436
}
1437
1438
static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1439
{
1440
}
1441
1442
static void update_cr0_intercept(struct vcpu_svm *svm)
1443
{
1444
ulong gcr0 = svm->vcpu.arch.cr0;
1445
u64 *hcr0 = &svm->vmcb->save.cr0;
1446
1447
if (!svm->vcpu.fpu_active)
1448
*hcr0 |= SVM_CR0_SELECTIVE_MASK;
1449
else
1450
*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1451
| (gcr0 & SVM_CR0_SELECTIVE_MASK);
1452
1453
mark_dirty(svm->vmcb, VMCB_CR);
1454
1455
if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
1456
clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1457
clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1458
} else {
1459
set_cr_intercept(svm, INTERCEPT_CR0_READ);
1460
set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1461
}
1462
}
1463
1464
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1465
{
1466
struct vcpu_svm *svm = to_svm(vcpu);
1467
1468
#ifdef CONFIG_X86_64
1469
if (vcpu->arch.efer & EFER_LME) {
1470
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1471
vcpu->arch.efer |= EFER_LMA;
1472
svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1473
}
1474
1475
if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1476
vcpu->arch.efer &= ~EFER_LMA;
1477
svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1478
}
1479
}
1480
#endif
1481
vcpu->arch.cr0 = cr0;
1482
1483
if (!npt_enabled)
1484
cr0 |= X86_CR0_PG | X86_CR0_WP;
1485
1486
if (!vcpu->fpu_active)
1487
cr0 |= X86_CR0_TS;
1488
/*
1489
* re-enable caching here because the QEMU bios
1490
* does not do it - this results in some delay at
1491
* reboot
1492
*/
1493
cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1494
svm->vmcb->save.cr0 = cr0;
1495
mark_dirty(svm->vmcb, VMCB_CR);
1496
update_cr0_intercept(svm);
1497
}
1498
1499
static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1500
{
1501
unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
1502
unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1503
1504
if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1505
svm_flush_tlb(vcpu);
1506
1507
vcpu->arch.cr4 = cr4;
1508
if (!npt_enabled)
1509
cr4 |= X86_CR4_PAE;
1510
cr4 |= host_cr4_mce;
1511
to_svm(vcpu)->vmcb->save.cr4 = cr4;
1512
mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
1513
}
1514
1515
static void svm_set_segment(struct kvm_vcpu *vcpu,
1516
struct kvm_segment *var, int seg)
1517
{
1518
struct vcpu_svm *svm = to_svm(vcpu);
1519
struct vmcb_seg *s = svm_seg(vcpu, seg);
1520
1521
s->base = var->base;
1522
s->limit = var->limit;
1523
s->selector = var->selector;
1524
if (var->unusable)
1525
s->attrib = 0;
1526
else {
1527
s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1528
s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1529
s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1530
s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1531
s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1532
s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1533
s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1534
s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1535
}
1536
if (seg == VCPU_SREG_CS)
1537
svm->vmcb->save.cpl
1538
= (svm->vmcb->save.cs.attrib
1539
>> SVM_SELECTOR_DPL_SHIFT) & 3;
1540
1541
mark_dirty(svm->vmcb, VMCB_SEG);
1542
}
1543
1544
static void update_db_intercept(struct kvm_vcpu *vcpu)
1545
{
1546
struct vcpu_svm *svm = to_svm(vcpu);
1547
1548
clr_exception_intercept(svm, DB_VECTOR);
1549
clr_exception_intercept(svm, BP_VECTOR);
1550
1551
if (svm->nmi_singlestep)
1552
set_exception_intercept(svm, DB_VECTOR);
1553
1554
if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1555
if (vcpu->guest_debug &
1556
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
1557
set_exception_intercept(svm, DB_VECTOR);
1558
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1559
set_exception_intercept(svm, BP_VECTOR);
1560
} else
1561
vcpu->guest_debug = 0;
1562
}
1563
1564
static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1565
{
1566
struct vcpu_svm *svm = to_svm(vcpu);
1567
1568
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1569
svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
1570
else
1571
svm->vmcb->save.dr7 = vcpu->arch.dr7;
1572
1573
mark_dirty(svm->vmcb, VMCB_DR);
1574
1575
update_db_intercept(vcpu);
1576
}
1577
1578
static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1579
{
1580
if (sd->next_asid > sd->max_asid) {
1581
++sd->asid_generation;
1582
sd->next_asid = 1;
1583
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1584
}
1585
1586
svm->asid_generation = sd->asid_generation;
1587
svm->vmcb->control.asid = sd->next_asid++;
1588
1589
mark_dirty(svm->vmcb, VMCB_ASID);
1590
}
1591
1592
static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1593
{
1594
struct vcpu_svm *svm = to_svm(vcpu);
1595
1596
svm->vmcb->save.dr7 = value;
1597
mark_dirty(svm->vmcb, VMCB_DR);
1598
}
1599
1600
static int pf_interception(struct vcpu_svm *svm)
1601
{
1602
u64 fault_address = svm->vmcb->control.exit_info_2;
1603
u32 error_code;
1604
int r = 1;
1605
1606
switch (svm->apf_reason) {
1607
default:
1608
error_code = svm->vmcb->control.exit_info_1;
1609
1610
trace_kvm_page_fault(fault_address, error_code);
1611
if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1612
kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1613
r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
1614
svm->vmcb->control.insn_bytes,
1615
svm->vmcb->control.insn_len);
1616
break;
1617
case KVM_PV_REASON_PAGE_NOT_PRESENT:
1618
svm->apf_reason = 0;
1619
local_irq_disable();
1620
kvm_async_pf_task_wait(fault_address);
1621
local_irq_enable();
1622
break;
1623
case KVM_PV_REASON_PAGE_READY:
1624
svm->apf_reason = 0;
1625
local_irq_disable();
1626
kvm_async_pf_task_wake(fault_address);
1627
local_irq_enable();
1628
break;
1629
}
1630
return r;
1631
}
1632
1633
static int db_interception(struct vcpu_svm *svm)
1634
{
1635
struct kvm_run *kvm_run = svm->vcpu.run;
1636
1637
if (!(svm->vcpu.guest_debug &
1638
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1639
!svm->nmi_singlestep) {
1640
kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1641
return 1;
1642
}
1643
1644
if (svm->nmi_singlestep) {
1645
svm->nmi_singlestep = false;
1646
if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1647
svm->vmcb->save.rflags &=
1648
~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1649
update_db_intercept(&svm->vcpu);
1650
}
1651
1652
if (svm->vcpu.guest_debug &
1653
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1654
kvm_run->exit_reason = KVM_EXIT_DEBUG;
1655
kvm_run->debug.arch.pc =
1656
svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1657
kvm_run->debug.arch.exception = DB_VECTOR;
1658
return 0;
1659
}
1660
1661
return 1;
1662
}
1663
1664
static int bp_interception(struct vcpu_svm *svm)
1665
{
1666
struct kvm_run *kvm_run = svm->vcpu.run;
1667
1668
kvm_run->exit_reason = KVM_EXIT_DEBUG;
1669
kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1670
kvm_run->debug.arch.exception = BP_VECTOR;
1671
return 0;
1672
}
1673
1674
static int ud_interception(struct vcpu_svm *svm)
1675
{
1676
int er;
1677
1678
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
1679
if (er != EMULATE_DONE)
1680
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1681
return 1;
1682
}
1683
1684
static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1685
{
1686
struct vcpu_svm *svm = to_svm(vcpu);
1687
1688
clr_exception_intercept(svm, NM_VECTOR);
1689
1690
svm->vcpu.fpu_active = 1;
1691
update_cr0_intercept(svm);
1692
}
1693
1694
static int nm_interception(struct vcpu_svm *svm)
1695
{
1696
svm_fpu_activate(&svm->vcpu);
1697
return 1;
1698
}
1699
1700
static bool is_erratum_383(void)
1701
{
1702
int err, i;
1703
u64 value;
1704
1705
if (!erratum_383_found)
1706
return false;
1707
1708
value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
1709
if (err)
1710
return false;
1711
1712
/* Bit 62 may or may not be set for this mce */
1713
value &= ~(1ULL << 62);
1714
1715
if (value != 0xb600000000010015ULL)
1716
return false;
1717
1718
/* Clear MCi_STATUS registers */
1719
for (i = 0; i < 6; ++i)
1720
native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
1721
1722
value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
1723
if (!err) {
1724
u32 low, high;
1725
1726
value &= ~(1ULL << 2);
1727
low = lower_32_bits(value);
1728
high = upper_32_bits(value);
1729
1730
native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
1731
}
1732
1733
/* Flush tlb to evict multi-match entries */
1734
__flush_tlb_all();
1735
1736
return true;
1737
}
1738
1739
static void svm_handle_mce(struct vcpu_svm *svm)
1740
{
1741
if (is_erratum_383()) {
1742
/*
1743
* Erratum 383 triggered. Guest state is corrupt so kill the
1744
* guest.
1745
*/
1746
pr_err("KVM: Guest triggered AMD Erratum 383\n");
1747
1748
kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
1749
1750
return;
1751
}
1752
1753
/*
1754
* On an #MC intercept the MCE handler is not called automatically in
1755
* the host. So do it by hand here.
1756
*/
1757
asm volatile (
1758
"int $0x12\n");
1759
/* not sure if we ever come back to this point */
1760
1761
return;
1762
}
1763
1764
static int mc_interception(struct vcpu_svm *svm)
1765
{
1766
return 1;
1767
}
1768
1769
static int shutdown_interception(struct vcpu_svm *svm)
1770
{
1771
struct kvm_run *kvm_run = svm->vcpu.run;
1772
1773
/*
1774
* VMCB is undefined after a SHUTDOWN intercept
1775
* so reinitialize it.
1776
*/
1777
clear_page(svm->vmcb);
1778
init_vmcb(svm);
1779
1780
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1781
return 0;
1782
}
1783
1784
static int io_interception(struct vcpu_svm *svm)
1785
{
1786
struct kvm_vcpu *vcpu = &svm->vcpu;
1787
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1788
int size, in, string;
1789
unsigned port;
1790
1791
++svm->vcpu.stat.io_exits;
1792
string = (io_info & SVM_IOIO_STR_MASK) != 0;
1793
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1794
if (string || in)
1795
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
1796
1797
port = io_info >> 16;
1798
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1799
svm->next_rip = svm->vmcb->control.exit_info_2;
1800
skip_emulated_instruction(&svm->vcpu);
1801
1802
return kvm_fast_pio_out(vcpu, size, port);
1803
}
1804
1805
static int nmi_interception(struct vcpu_svm *svm)
1806
{
1807
return 1;
1808
}
1809
1810
static int intr_interception(struct vcpu_svm *svm)
1811
{
1812
++svm->vcpu.stat.irq_exits;
1813
return 1;
1814
}
1815
1816
static int nop_on_interception(struct vcpu_svm *svm)
1817
{
1818
return 1;
1819
}
1820
1821
static int halt_interception(struct vcpu_svm *svm)
1822
{
1823
svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1824
skip_emulated_instruction(&svm->vcpu);
1825
return kvm_emulate_halt(&svm->vcpu);
1826
}
1827
1828
static int vmmcall_interception(struct vcpu_svm *svm)
1829
{
1830
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1831
skip_emulated_instruction(&svm->vcpu);
1832
kvm_emulate_hypercall(&svm->vcpu);
1833
return 1;
1834
}
1835
1836
static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
1837
{
1838
struct vcpu_svm *svm = to_svm(vcpu);
1839
1840
return svm->nested.nested_cr3;
1841
}
1842
1843
static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
1844
unsigned long root)
1845
{
1846
struct vcpu_svm *svm = to_svm(vcpu);
1847
1848
svm->vmcb->control.nested_cr3 = root;
1849
mark_dirty(svm->vmcb, VMCB_NPT);
1850
svm_flush_tlb(vcpu);
1851
}
1852
1853
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1854
struct x86_exception *fault)
1855
{
1856
struct vcpu_svm *svm = to_svm(vcpu);
1857
1858
svm->vmcb->control.exit_code = SVM_EXIT_NPF;
1859
svm->vmcb->control.exit_code_hi = 0;
1860
svm->vmcb->control.exit_info_1 = fault->error_code;
1861
svm->vmcb->control.exit_info_2 = fault->address;
1862
1863
nested_svm_vmexit(svm);
1864
}
1865
1866
static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1867
{
1868
int r;
1869
1870
r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1871
1872
vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
1873
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
1874
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
1875
vcpu->arch.mmu.shadow_root_level = get_npt_level();
1876
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
1877
1878
return r;
1879
}
1880
1881
static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
1882
{
1883
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
1884
}
1885
1886
static int nested_svm_check_permissions(struct vcpu_svm *svm)
1887
{
1888
if (!(svm->vcpu.arch.efer & EFER_SVME)
1889
|| !is_paging(&svm->vcpu)) {
1890
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1891
return 1;
1892
}
1893
1894
if (svm->vmcb->save.cpl) {
1895
kvm_inject_gp(&svm->vcpu, 0);
1896
return 1;
1897
}
1898
1899
return 0;
1900
}
1901
1902
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1903
bool has_error_code, u32 error_code)
1904
{
1905
int vmexit;
1906
1907
if (!is_guest_mode(&svm->vcpu))
1908
return 0;
1909
1910
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1911
svm->vmcb->control.exit_code_hi = 0;
1912
svm->vmcb->control.exit_info_1 = error_code;
1913
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1914
1915
vmexit = nested_svm_intercept(svm);
1916
if (vmexit == NESTED_EXIT_DONE)
1917
svm->nested.exit_required = true;
1918
1919
return vmexit;
1920
}
1921
1922
/* This function returns true if it is save to enable the irq window */
1923
static inline bool nested_svm_intr(struct vcpu_svm *svm)
1924
{
1925
if (!is_guest_mode(&svm->vcpu))
1926
return true;
1927
1928
if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1929
return true;
1930
1931
if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1932
return false;
1933
1934
/*
1935
* if vmexit was already requested (by intercepted exception
1936
* for instance) do not overwrite it with "external interrupt"
1937
* vmexit.
1938
*/
1939
if (svm->nested.exit_required)
1940
return false;
1941
1942
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1943
svm->vmcb->control.exit_info_1 = 0;
1944
svm->vmcb->control.exit_info_2 = 0;
1945
1946
if (svm->nested.intercept & 1ULL) {
1947
/*
1948
* The #vmexit can't be emulated here directly because this
1949
* code path runs with irqs and preemtion disabled. A
1950
* #vmexit emulation might sleep. Only signal request for
1951
* the #vmexit here.
1952
*/
1953
svm->nested.exit_required = true;
1954
trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1955
return false;
1956
}
1957
1958
return true;
1959
}
1960
1961
/* This function returns true if it is save to enable the nmi window */
1962
static inline bool nested_svm_nmi(struct vcpu_svm *svm)
1963
{
1964
if (!is_guest_mode(&svm->vcpu))
1965
return true;
1966
1967
if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
1968
return true;
1969
1970
svm->vmcb->control.exit_code = SVM_EXIT_NMI;
1971
svm->nested.exit_required = true;
1972
1973
return false;
1974
}
1975
1976
static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
1977
{
1978
struct page *page;
1979
1980
might_sleep();
1981
1982
page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1983
if (is_error_page(page))
1984
goto error;
1985
1986
*_page = page;
1987
1988
return kmap(page);
1989
1990
error:
1991
kvm_release_page_clean(page);
1992
kvm_inject_gp(&svm->vcpu, 0);
1993
1994
return NULL;
1995
}
1996
1997
static void nested_svm_unmap(struct page *page)
1998
{
1999
kunmap(page);
2000
kvm_release_page_dirty(page);
2001
}
2002
2003
static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2004
{
2005
unsigned port;
2006
u8 val, bit;
2007
u64 gpa;
2008
2009
if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2010
return NESTED_EXIT_HOST;
2011
2012
port = svm->vmcb->control.exit_info_1 >> 16;
2013
gpa = svm->nested.vmcb_iopm + (port / 8);
2014
bit = port % 8;
2015
val = 0;
2016
2017
if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
2018
val &= (1 << bit);
2019
2020
return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2021
}
2022
2023
static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
2024
{
2025
u32 offset, msr, value;
2026
int write, mask;
2027
2028
if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2029
return NESTED_EXIT_HOST;
2030
2031
msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2032
offset = svm_msrpm_offset(msr);
2033
write = svm->vmcb->control.exit_info_1 & 1;
2034
mask = 1 << ((2 * (msr & 0xf)) + write);
2035
2036
if (offset == MSR_INVALID)
2037
return NESTED_EXIT_DONE;
2038
2039
/* Offset is in 32 bit units but need in 8 bit units */
2040
offset *= 4;
2041
2042
if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
2043
return NESTED_EXIT_DONE;
2044
2045
return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2046
}
2047
2048
static int nested_svm_exit_special(struct vcpu_svm *svm)
2049
{
2050
u32 exit_code = svm->vmcb->control.exit_code;
2051
2052
switch (exit_code) {
2053
case SVM_EXIT_INTR:
2054
case SVM_EXIT_NMI:
2055
case SVM_EXIT_EXCP_BASE + MC_VECTOR:
2056
return NESTED_EXIT_HOST;
2057
case SVM_EXIT_NPF:
2058
/* For now we are always handling NPFs when using them */
2059
if (npt_enabled)
2060
return NESTED_EXIT_HOST;
2061
break;
2062
case SVM_EXIT_EXCP_BASE + PF_VECTOR:
2063
/* When we're shadowing, trap PFs, but not async PF */
2064
if (!npt_enabled && svm->apf_reason == 0)
2065
return NESTED_EXIT_HOST;
2066
break;
2067
case SVM_EXIT_EXCP_BASE + NM_VECTOR:
2068
nm_interception(svm);
2069
break;
2070
default:
2071
break;
2072
}
2073
2074
return NESTED_EXIT_CONTINUE;
2075
}
2076
2077
/*
2078
* If this function returns true, this #vmexit was already handled
2079
*/
2080
static int nested_svm_intercept(struct vcpu_svm *svm)
2081
{
2082
u32 exit_code = svm->vmcb->control.exit_code;
2083
int vmexit = NESTED_EXIT_HOST;
2084
2085
switch (exit_code) {
2086
case SVM_EXIT_MSR:
2087
vmexit = nested_svm_exit_handled_msr(svm);
2088
break;
2089
case SVM_EXIT_IOIO:
2090
vmexit = nested_svm_intercept_ioio(svm);
2091
break;
2092
case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
2093
u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
2094
if (svm->nested.intercept_cr & bit)
2095
vmexit = NESTED_EXIT_DONE;
2096
break;
2097
}
2098
case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
2099
u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
2100
if (svm->nested.intercept_dr & bit)
2101
vmexit = NESTED_EXIT_DONE;
2102
break;
2103
}
2104
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
2105
u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
2106
if (svm->nested.intercept_exceptions & excp_bits)
2107
vmexit = NESTED_EXIT_DONE;
2108
/* async page fault always cause vmexit */
2109
else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
2110
svm->apf_reason != 0)
2111
vmexit = NESTED_EXIT_DONE;
2112
break;
2113
}
2114
case SVM_EXIT_ERR: {
2115
vmexit = NESTED_EXIT_DONE;
2116
break;
2117
}
2118
default: {
2119
u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
2120
if (svm->nested.intercept & exit_bits)
2121
vmexit = NESTED_EXIT_DONE;
2122
}
2123
}
2124
2125
return vmexit;
2126
}
2127
2128
static int nested_svm_exit_handled(struct vcpu_svm *svm)
2129
{
2130
int vmexit;
2131
2132
vmexit = nested_svm_intercept(svm);
2133
2134
if (vmexit == NESTED_EXIT_DONE)
2135
nested_svm_vmexit(svm);
2136
2137
return vmexit;
2138
}
2139
2140
static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
2141
{
2142
struct vmcb_control_area *dst = &dst_vmcb->control;
2143
struct vmcb_control_area *from = &from_vmcb->control;
2144
2145
dst->intercept_cr = from->intercept_cr;
2146
dst->intercept_dr = from->intercept_dr;
2147
dst->intercept_exceptions = from->intercept_exceptions;
2148
dst->intercept = from->intercept;
2149
dst->iopm_base_pa = from->iopm_base_pa;
2150
dst->msrpm_base_pa = from->msrpm_base_pa;
2151
dst->tsc_offset = from->tsc_offset;
2152
dst->asid = from->asid;
2153
dst->tlb_ctl = from->tlb_ctl;
2154
dst->int_ctl = from->int_ctl;
2155
dst->int_vector = from->int_vector;
2156
dst->int_state = from->int_state;
2157
dst->exit_code = from->exit_code;
2158
dst->exit_code_hi = from->exit_code_hi;
2159
dst->exit_info_1 = from->exit_info_1;
2160
dst->exit_info_2 = from->exit_info_2;
2161
dst->exit_int_info = from->exit_int_info;
2162
dst->exit_int_info_err = from->exit_int_info_err;
2163
dst->nested_ctl = from->nested_ctl;
2164
dst->event_inj = from->event_inj;
2165
dst->event_inj_err = from->event_inj_err;
2166
dst->nested_cr3 = from->nested_cr3;
2167
dst->lbr_ctl = from->lbr_ctl;
2168
}
2169
2170
static int nested_svm_vmexit(struct vcpu_svm *svm)
2171
{
2172
struct vmcb *nested_vmcb;
2173
struct vmcb *hsave = svm->nested.hsave;
2174
struct vmcb *vmcb = svm->vmcb;
2175
struct page *page;
2176
2177
trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
2178
vmcb->control.exit_info_1,
2179
vmcb->control.exit_info_2,
2180
vmcb->control.exit_int_info,
2181
vmcb->control.exit_int_info_err);
2182
2183
nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
2184
if (!nested_vmcb)
2185
return 1;
2186
2187
/* Exit Guest-Mode */
2188
leave_guest_mode(&svm->vcpu);
2189
svm->nested.vmcb = 0;
2190
2191
/* Give the current vmcb to the guest */
2192
disable_gif(svm);
2193
2194
nested_vmcb->save.es = vmcb->save.es;
2195
nested_vmcb->save.cs = vmcb->save.cs;
2196
nested_vmcb->save.ss = vmcb->save.ss;
2197
nested_vmcb->save.ds = vmcb->save.ds;
2198
nested_vmcb->save.gdtr = vmcb->save.gdtr;
2199
nested_vmcb->save.idtr = vmcb->save.idtr;
2200
nested_vmcb->save.efer = svm->vcpu.arch.efer;
2201
nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
2202
nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
2203
nested_vmcb->save.cr2 = vmcb->save.cr2;
2204
nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
2205
nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
2206
nested_vmcb->save.rip = vmcb->save.rip;
2207
nested_vmcb->save.rsp = vmcb->save.rsp;
2208
nested_vmcb->save.rax = vmcb->save.rax;
2209
nested_vmcb->save.dr7 = vmcb->save.dr7;
2210
nested_vmcb->save.dr6 = vmcb->save.dr6;
2211
nested_vmcb->save.cpl = vmcb->save.cpl;
2212
2213
nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
2214
nested_vmcb->control.int_vector = vmcb->control.int_vector;
2215
nested_vmcb->control.int_state = vmcb->control.int_state;
2216
nested_vmcb->control.exit_code = vmcb->control.exit_code;
2217
nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
2218
nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
2219
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
2220
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
2221
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
2222
nested_vmcb->control.next_rip = vmcb->control.next_rip;
2223
2224
/*
2225
* If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
2226
* to make sure that we do not lose injected events. So check event_inj
2227
* here and copy it to exit_int_info if it is valid.
2228
* Exit_int_info and event_inj can't be both valid because the case
2229
* below only happens on a VMRUN instruction intercept which has
2230
* no valid exit_int_info set.
2231
*/
2232
if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
2233
struct vmcb_control_area *nc = &nested_vmcb->control;
2234
2235
nc->exit_int_info = vmcb->control.event_inj;
2236
nc->exit_int_info_err = vmcb->control.event_inj_err;
2237
}
2238
2239
nested_vmcb->control.tlb_ctl = 0;
2240
nested_vmcb->control.event_inj = 0;
2241
nested_vmcb->control.event_inj_err = 0;
2242
2243
/* We always set V_INTR_MASKING and remember the old value in hflags */
2244
if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2245
nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
2246
2247
/* Restore the original control entries */
2248
copy_vmcb_control_area(vmcb, hsave);
2249
2250
kvm_clear_exception_queue(&svm->vcpu);
2251
kvm_clear_interrupt_queue(&svm->vcpu);
2252
2253
svm->nested.nested_cr3 = 0;
2254
2255
/* Restore selected save entries */
2256
svm->vmcb->save.es = hsave->save.es;
2257
svm->vmcb->save.cs = hsave->save.cs;
2258
svm->vmcb->save.ss = hsave->save.ss;
2259
svm->vmcb->save.ds = hsave->save.ds;
2260
svm->vmcb->save.gdtr = hsave->save.gdtr;
2261
svm->vmcb->save.idtr = hsave->save.idtr;
2262
kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
2263
svm_set_efer(&svm->vcpu, hsave->save.efer);
2264
svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
2265
svm_set_cr4(&svm->vcpu, hsave->save.cr4);
2266
if (npt_enabled) {
2267
svm->vmcb->save.cr3 = hsave->save.cr3;
2268
svm->vcpu.arch.cr3 = hsave->save.cr3;
2269
} else {
2270
(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
2271
}
2272
kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
2273
kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
2274
kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
2275
svm->vmcb->save.dr7 = 0;
2276
svm->vmcb->save.cpl = 0;
2277
svm->vmcb->control.exit_int_info = 0;
2278
2279
mark_all_dirty(svm->vmcb);
2280
2281
nested_svm_unmap(page);
2282
2283
nested_svm_uninit_mmu_context(&svm->vcpu);
2284
kvm_mmu_reset_context(&svm->vcpu);
2285
kvm_mmu_load(&svm->vcpu);
2286
2287
return 0;
2288
}
2289
2290
static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
2291
{
2292
/*
2293
* This function merges the msr permission bitmaps of kvm and the
2294
* nested vmcb. It is omptimized in that it only merges the parts where
2295
* the kvm msr permission bitmap may contain zero bits
2296
*/
2297
int i;
2298
2299
if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2300
return true;
2301
2302
for (i = 0; i < MSRPM_OFFSETS; i++) {
2303
u32 value, p;
2304
u64 offset;
2305
2306
if (msrpm_offsets[i] == 0xffffffff)
2307
break;
2308
2309
p = msrpm_offsets[i];
2310
offset = svm->nested.vmcb_msrpm + (p * 4);
2311
2312
if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
2313
return false;
2314
2315
svm->nested.msrpm[p] = svm->msrpm[p] | value;
2316
}
2317
2318
svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
2319
2320
return true;
2321
}
2322
2323
static bool nested_vmcb_checks(struct vmcb *vmcb)
2324
{
2325
if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
2326
return false;
2327
2328
if (vmcb->control.asid == 0)
2329
return false;
2330
2331
if (vmcb->control.nested_ctl && !npt_enabled)
2332
return false;
2333
2334
return true;
2335
}
2336
2337
static bool nested_svm_vmrun(struct vcpu_svm *svm)
2338
{
2339
struct vmcb *nested_vmcb;
2340
struct vmcb *hsave = svm->nested.hsave;
2341
struct vmcb *vmcb = svm->vmcb;
2342
struct page *page;
2343
u64 vmcb_gpa;
2344
2345
vmcb_gpa = svm->vmcb->save.rax;
2346
2347
nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2348
if (!nested_vmcb)
2349
return false;
2350
2351
if (!nested_vmcb_checks(nested_vmcb)) {
2352
nested_vmcb->control.exit_code = SVM_EXIT_ERR;
2353
nested_vmcb->control.exit_code_hi = 0;
2354
nested_vmcb->control.exit_info_1 = 0;
2355
nested_vmcb->control.exit_info_2 = 0;
2356
2357
nested_svm_unmap(page);
2358
2359
return false;
2360
}
2361
2362
trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
2363
nested_vmcb->save.rip,
2364
nested_vmcb->control.int_ctl,
2365
nested_vmcb->control.event_inj,
2366
nested_vmcb->control.nested_ctl);
2367
2368
trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
2369
nested_vmcb->control.intercept_cr >> 16,
2370
nested_vmcb->control.intercept_exceptions,
2371
nested_vmcb->control.intercept);
2372
2373
/* Clear internal status */
2374
kvm_clear_exception_queue(&svm->vcpu);
2375
kvm_clear_interrupt_queue(&svm->vcpu);
2376
2377
/*
2378
* Save the old vmcb, so we don't need to pick what we save, but can
2379
* restore everything when a VMEXIT occurs
2380
*/
2381
hsave->save.es = vmcb->save.es;
2382
hsave->save.cs = vmcb->save.cs;
2383
hsave->save.ss = vmcb->save.ss;
2384
hsave->save.ds = vmcb->save.ds;
2385
hsave->save.gdtr = vmcb->save.gdtr;
2386
hsave->save.idtr = vmcb->save.idtr;
2387
hsave->save.efer = svm->vcpu.arch.efer;
2388
hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
2389
hsave->save.cr4 = svm->vcpu.arch.cr4;
2390
hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
2391
hsave->save.rip = kvm_rip_read(&svm->vcpu);
2392
hsave->save.rsp = vmcb->save.rsp;
2393
hsave->save.rax = vmcb->save.rax;
2394
if (npt_enabled)
2395
hsave->save.cr3 = vmcb->save.cr3;
2396
else
2397
hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
2398
2399
copy_vmcb_control_area(hsave, vmcb);
2400
2401
if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
2402
svm->vcpu.arch.hflags |= HF_HIF_MASK;
2403
else
2404
svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
2405
2406
if (nested_vmcb->control.nested_ctl) {
2407
kvm_mmu_unload(&svm->vcpu);
2408
svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
2409
nested_svm_init_mmu_context(&svm->vcpu);
2410
}
2411
2412
/* Load the nested guest state */
2413
svm->vmcb->save.es = nested_vmcb->save.es;
2414
svm->vmcb->save.cs = nested_vmcb->save.cs;
2415
svm->vmcb->save.ss = nested_vmcb->save.ss;
2416
svm->vmcb->save.ds = nested_vmcb->save.ds;
2417
svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
2418
svm->vmcb->save.idtr = nested_vmcb->save.idtr;
2419
kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
2420
svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
2421
svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
2422
svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
2423
if (npt_enabled) {
2424
svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
2425
svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
2426
} else
2427
(void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
2428
2429
/* Guest paging mode is active - reset mmu */
2430
kvm_mmu_reset_context(&svm->vcpu);
2431
2432
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
2433
kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
2434
kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
2435
kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
2436
2437
/* In case we don't even reach vcpu_run, the fields are not updated */
2438
svm->vmcb->save.rax = nested_vmcb->save.rax;
2439
svm->vmcb->save.rsp = nested_vmcb->save.rsp;
2440
svm->vmcb->save.rip = nested_vmcb->save.rip;
2441
svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
2442
svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
2443
svm->vmcb->save.cpl = nested_vmcb->save.cpl;
2444
2445
svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
2446
svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
2447
2448
/* cache intercepts */
2449
svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
2450
svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
2451
svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
2452
svm->nested.intercept = nested_vmcb->control.intercept;
2453
2454
svm_flush_tlb(&svm->vcpu);
2455
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
2456
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
2457
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
2458
else
2459
svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
2460
2461
if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2462
/* We only want the cr8 intercept bits of the guest */
2463
clr_cr_intercept(svm, INTERCEPT_CR8_READ);
2464
clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2465
}
2466
2467
/* We don't want to see VMMCALLs from a nested guest */
2468
clr_intercept(svm, INTERCEPT_VMMCALL);
2469
2470
svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
2471
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
2472
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
2473
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
2474
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
2475
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
2476
2477
nested_svm_unmap(page);
2478
2479
/* Enter Guest-Mode */
2480
enter_guest_mode(&svm->vcpu);
2481
2482
/*
2483
* Merge guest and host intercepts - must be called with vcpu in
2484
* guest-mode to take affect here
2485
*/
2486
recalc_intercepts(svm);
2487
2488
svm->nested.vmcb = vmcb_gpa;
2489
2490
enable_gif(svm);
2491
2492
mark_all_dirty(svm->vmcb);
2493
2494
return true;
2495
}
2496
2497
static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
2498
{
2499
to_vmcb->save.fs = from_vmcb->save.fs;
2500
to_vmcb->save.gs = from_vmcb->save.gs;
2501
to_vmcb->save.tr = from_vmcb->save.tr;
2502
to_vmcb->save.ldtr = from_vmcb->save.ldtr;
2503
to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
2504
to_vmcb->save.star = from_vmcb->save.star;
2505
to_vmcb->save.lstar = from_vmcb->save.lstar;
2506
to_vmcb->save.cstar = from_vmcb->save.cstar;
2507
to_vmcb->save.sfmask = from_vmcb->save.sfmask;
2508
to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
2509
to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
2510
to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
2511
}
2512
2513
static int vmload_interception(struct vcpu_svm *svm)
2514
{
2515
struct vmcb *nested_vmcb;
2516
struct page *page;
2517
2518
if (nested_svm_check_permissions(svm))
2519
return 1;
2520
2521
nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2522
if (!nested_vmcb)
2523
return 1;
2524
2525
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2526
skip_emulated_instruction(&svm->vcpu);
2527
2528
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
2529
nested_svm_unmap(page);
2530
2531
return 1;
2532
}
2533
2534
static int vmsave_interception(struct vcpu_svm *svm)
2535
{
2536
struct vmcb *nested_vmcb;
2537
struct page *page;
2538
2539
if (nested_svm_check_permissions(svm))
2540
return 1;
2541
2542
nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2543
if (!nested_vmcb)
2544
return 1;
2545
2546
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2547
skip_emulated_instruction(&svm->vcpu);
2548
2549
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
2550
nested_svm_unmap(page);
2551
2552
return 1;
2553
}
2554
2555
static int vmrun_interception(struct vcpu_svm *svm)
2556
{
2557
if (nested_svm_check_permissions(svm))
2558
return 1;
2559
2560
/* Save rip after vmrun instruction */
2561
kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
2562
2563
if (!nested_svm_vmrun(svm))
2564
return 1;
2565
2566
if (!nested_svm_vmrun_msrpm(svm))
2567
goto failed;
2568
2569
return 1;
2570
2571
failed:
2572
2573
svm->vmcb->control.exit_code = SVM_EXIT_ERR;
2574
svm->vmcb->control.exit_code_hi = 0;
2575
svm->vmcb->control.exit_info_1 = 0;
2576
svm->vmcb->control.exit_info_2 = 0;
2577
2578
nested_svm_vmexit(svm);
2579
2580
return 1;
2581
}
2582
2583
static int stgi_interception(struct vcpu_svm *svm)
2584
{
2585
if (nested_svm_check_permissions(svm))
2586
return 1;
2587
2588
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2589
skip_emulated_instruction(&svm->vcpu);
2590
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2591
2592
enable_gif(svm);
2593
2594
return 1;
2595
}
2596
2597
static int clgi_interception(struct vcpu_svm *svm)
2598
{
2599
if (nested_svm_check_permissions(svm))
2600
return 1;
2601
2602
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2603
skip_emulated_instruction(&svm->vcpu);
2604
2605
disable_gif(svm);
2606
2607
/* After a CLGI no interrupts should come */
2608
svm_clear_vintr(svm);
2609
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2610
2611
mark_dirty(svm->vmcb, VMCB_INTR);
2612
2613
return 1;
2614
}
2615
2616
static int invlpga_interception(struct vcpu_svm *svm)
2617
{
2618
struct kvm_vcpu *vcpu = &svm->vcpu;
2619
2620
trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
2621
vcpu->arch.regs[VCPU_REGS_RAX]);
2622
2623
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
2624
kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
2625
2626
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2627
skip_emulated_instruction(&svm->vcpu);
2628
return 1;
2629
}
2630
2631
static int skinit_interception(struct vcpu_svm *svm)
2632
{
2633
trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
2634
2635
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2636
return 1;
2637
}
2638
2639
static int xsetbv_interception(struct vcpu_svm *svm)
2640
{
2641
u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
2642
u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
2643
2644
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
2645
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2646
skip_emulated_instruction(&svm->vcpu);
2647
}
2648
2649
return 1;
2650
}
2651
2652
static int invalid_op_interception(struct vcpu_svm *svm)
2653
{
2654
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2655
return 1;
2656
}
2657
2658
static int task_switch_interception(struct vcpu_svm *svm)
2659
{
2660
u16 tss_selector;
2661
int reason;
2662
int int_type = svm->vmcb->control.exit_int_info &
2663
SVM_EXITINTINFO_TYPE_MASK;
2664
int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2665
uint32_t type =
2666
svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2667
uint32_t idt_v =
2668
svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2669
bool has_error_code = false;
2670
u32 error_code = 0;
2671
2672
tss_selector = (u16)svm->vmcb->control.exit_info_1;
2673
2674
if (svm->vmcb->control.exit_info_2 &
2675
(1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2676
reason = TASK_SWITCH_IRET;
2677
else if (svm->vmcb->control.exit_info_2 &
2678
(1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2679
reason = TASK_SWITCH_JMP;
2680
else if (idt_v)
2681
reason = TASK_SWITCH_GATE;
2682
else
2683
reason = TASK_SWITCH_CALL;
2684
2685
if (reason == TASK_SWITCH_GATE) {
2686
switch (type) {
2687
case SVM_EXITINTINFO_TYPE_NMI:
2688
svm->vcpu.arch.nmi_injected = false;
2689
break;
2690
case SVM_EXITINTINFO_TYPE_EXEPT:
2691
if (svm->vmcb->control.exit_info_2 &
2692
(1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2693
has_error_code = true;
2694
error_code =
2695
(u32)svm->vmcb->control.exit_info_2;
2696
}
2697
kvm_clear_exception_queue(&svm->vcpu);
2698
break;
2699
case SVM_EXITINTINFO_TYPE_INTR:
2700
kvm_clear_interrupt_queue(&svm->vcpu);
2701
break;
2702
default:
2703
break;
2704
}
2705
}
2706
2707
if (reason != TASK_SWITCH_GATE ||
2708
int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2709
(int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2710
(int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2711
skip_emulated_instruction(&svm->vcpu);
2712
2713
if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
2714
has_error_code, error_code) == EMULATE_FAIL) {
2715
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2716
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2717
svm->vcpu.run->internal.ndata = 0;
2718
return 0;
2719
}
2720
return 1;
2721
}
2722
2723
static int cpuid_interception(struct vcpu_svm *svm)
2724
{
2725
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2726
kvm_emulate_cpuid(&svm->vcpu);
2727
return 1;
2728
}
2729
2730
static int iret_interception(struct vcpu_svm *svm)
2731
{
2732
++svm->vcpu.stat.nmi_window_exits;
2733
clr_intercept(svm, INTERCEPT_IRET);
2734
svm->vcpu.arch.hflags |= HF_IRET_MASK;
2735
svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
2736
return 1;
2737
}
2738
2739
static int invlpg_interception(struct vcpu_svm *svm)
2740
{
2741
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2742
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2743
2744
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
2745
skip_emulated_instruction(&svm->vcpu);
2746
return 1;
2747
}
2748
2749
static int emulate_on_interception(struct vcpu_svm *svm)
2750
{
2751
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2752
}
2753
2754
bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
2755
{
2756
unsigned long cr0 = svm->vcpu.arch.cr0;
2757
bool ret = false;
2758
u64 intercept;
2759
2760
intercept = svm->nested.intercept;
2761
2762
if (!is_guest_mode(&svm->vcpu) ||
2763
(!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
2764
return false;
2765
2766
cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2767
val &= ~SVM_CR0_SELECTIVE_MASK;
2768
2769
if (cr0 ^ val) {
2770
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2771
ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2772
}
2773
2774
return ret;
2775
}
2776
2777
#define CR_VALID (1ULL << 63)
2778
2779
static int cr_interception(struct vcpu_svm *svm)
2780
{
2781
int reg, cr;
2782
unsigned long val;
2783
int err;
2784
2785
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2786
return emulate_on_interception(svm);
2787
2788
if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2789
return emulate_on_interception(svm);
2790
2791
reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2792
cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2793
2794
err = 0;
2795
if (cr >= 16) { /* mov to cr */
2796
cr -= 16;
2797
val = kvm_register_read(&svm->vcpu, reg);
2798
switch (cr) {
2799
case 0:
2800
if (!check_selective_cr0_intercepted(svm, val))
2801
err = kvm_set_cr0(&svm->vcpu, val);
2802
else
2803
return 1;
2804
2805
break;
2806
case 3:
2807
err = kvm_set_cr3(&svm->vcpu, val);
2808
break;
2809
case 4:
2810
err = kvm_set_cr4(&svm->vcpu, val);
2811
break;
2812
case 8:
2813
err = kvm_set_cr8(&svm->vcpu, val);
2814
break;
2815
default:
2816
WARN(1, "unhandled write to CR%d", cr);
2817
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2818
return 1;
2819
}
2820
} else { /* mov from cr */
2821
switch (cr) {
2822
case 0:
2823
val = kvm_read_cr0(&svm->vcpu);
2824
break;
2825
case 2:
2826
val = svm->vcpu.arch.cr2;
2827
break;
2828
case 3:
2829
val = kvm_read_cr3(&svm->vcpu);
2830
break;
2831
case 4:
2832
val = kvm_read_cr4(&svm->vcpu);
2833
break;
2834
case 8:
2835
val = kvm_get_cr8(&svm->vcpu);
2836
break;
2837
default:
2838
WARN(1, "unhandled read from CR%d", cr);
2839
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2840
return 1;
2841
}
2842
kvm_register_write(&svm->vcpu, reg, val);
2843
}
2844
kvm_complete_insn_gp(&svm->vcpu, err);
2845
2846
return 1;
2847
}
2848
2849
static int dr_interception(struct vcpu_svm *svm)
2850
{
2851
int reg, dr;
2852
unsigned long val;
2853
int err;
2854
2855
if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2856
return emulate_on_interception(svm);
2857
2858
reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2859
dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
2860
2861
if (dr >= 16) { /* mov to DRn */
2862
val = kvm_register_read(&svm->vcpu, reg);
2863
kvm_set_dr(&svm->vcpu, dr - 16, val);
2864
} else {
2865
err = kvm_get_dr(&svm->vcpu, dr, &val);
2866
if (!err)
2867
kvm_register_write(&svm->vcpu, reg, val);
2868
}
2869
2870
skip_emulated_instruction(&svm->vcpu);
2871
2872
return 1;
2873
}
2874
2875
static int cr8_write_interception(struct vcpu_svm *svm)
2876
{
2877
struct kvm_run *kvm_run = svm->vcpu.run;
2878
int r;
2879
2880
u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2881
/* instruction emulation calls kvm_set_cr8() */
2882
r = cr_interception(svm);
2883
if (irqchip_in_kernel(svm->vcpu.kvm)) {
2884
clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2885
return r;
2886
}
2887
if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
2888
return r;
2889
kvm_run->exit_reason = KVM_EXIT_SET_TPR;
2890
return 0;
2891
}
2892
2893
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2894
{
2895
struct vcpu_svm *svm = to_svm(vcpu);
2896
2897
switch (ecx) {
2898
case MSR_IA32_TSC: {
2899
struct vmcb *vmcb = get_host_vmcb(svm);
2900
2901
*data = vmcb->control.tsc_offset +
2902
svm_scale_tsc(vcpu, native_read_tsc());
2903
2904
break;
2905
}
2906
case MSR_STAR:
2907
*data = svm->vmcb->save.star;
2908
break;
2909
#ifdef CONFIG_X86_64
2910
case MSR_LSTAR:
2911
*data = svm->vmcb->save.lstar;
2912
break;
2913
case MSR_CSTAR:
2914
*data = svm->vmcb->save.cstar;
2915
break;
2916
case MSR_KERNEL_GS_BASE:
2917
*data = svm->vmcb->save.kernel_gs_base;
2918
break;
2919
case MSR_SYSCALL_MASK:
2920
*data = svm->vmcb->save.sfmask;
2921
break;
2922
#endif
2923
case MSR_IA32_SYSENTER_CS:
2924
*data = svm->vmcb->save.sysenter_cs;
2925
break;
2926
case MSR_IA32_SYSENTER_EIP:
2927
*data = svm->sysenter_eip;
2928
break;
2929
case MSR_IA32_SYSENTER_ESP:
2930
*data = svm->sysenter_esp;
2931
break;
2932
/*
2933
* Nobody will change the following 5 values in the VMCB so we can
2934
* safely return them on rdmsr. They will always be 0 until LBRV is
2935
* implemented.
2936
*/
2937
case MSR_IA32_DEBUGCTLMSR:
2938
*data = svm->vmcb->save.dbgctl;
2939
break;
2940
case MSR_IA32_LASTBRANCHFROMIP:
2941
*data = svm->vmcb->save.br_from;
2942
break;
2943
case MSR_IA32_LASTBRANCHTOIP:
2944
*data = svm->vmcb->save.br_to;
2945
break;
2946
case MSR_IA32_LASTINTFROMIP:
2947
*data = svm->vmcb->save.last_excp_from;
2948
break;
2949
case MSR_IA32_LASTINTTOIP:
2950
*data = svm->vmcb->save.last_excp_to;
2951
break;
2952
case MSR_VM_HSAVE_PA:
2953
*data = svm->nested.hsave_msr;
2954
break;
2955
case MSR_VM_CR:
2956
*data = svm->nested.vm_cr_msr;
2957
break;
2958
case MSR_IA32_UCODE_REV:
2959
*data = 0x01000065;
2960
break;
2961
default:
2962
return kvm_get_msr_common(vcpu, ecx, data);
2963
}
2964
return 0;
2965
}
2966
2967
static int rdmsr_interception(struct vcpu_svm *svm)
2968
{
2969
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2970
u64 data;
2971
2972
if (svm_get_msr(&svm->vcpu, ecx, &data)) {
2973
trace_kvm_msr_read_ex(ecx);
2974
kvm_inject_gp(&svm->vcpu, 0);
2975
} else {
2976
trace_kvm_msr_read(ecx, data);
2977
2978
svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
2979
svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
2980
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2981
skip_emulated_instruction(&svm->vcpu);
2982
}
2983
return 1;
2984
}
2985
2986
static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
2987
{
2988
struct vcpu_svm *svm = to_svm(vcpu);
2989
int svm_dis, chg_mask;
2990
2991
if (data & ~SVM_VM_CR_VALID_MASK)
2992
return 1;
2993
2994
chg_mask = SVM_VM_CR_VALID_MASK;
2995
2996
if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
2997
chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
2998
2999
svm->nested.vm_cr_msr &= ~chg_mask;
3000
svm->nested.vm_cr_msr |= (data & chg_mask);
3001
3002
svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
3003
3004
/* check for svm_disable while efer.svme is set */
3005
if (svm_dis && (vcpu->arch.efer & EFER_SVME))
3006
return 1;
3007
3008
return 0;
3009
}
3010
3011
static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
3012
{
3013
struct vcpu_svm *svm = to_svm(vcpu);
3014
3015
switch (ecx) {
3016
case MSR_IA32_TSC:
3017
kvm_write_tsc(vcpu, data);
3018
break;
3019
case MSR_STAR:
3020
svm->vmcb->save.star = data;
3021
break;
3022
#ifdef CONFIG_X86_64
3023
case MSR_LSTAR:
3024
svm->vmcb->save.lstar = data;
3025
break;
3026
case MSR_CSTAR:
3027
svm->vmcb->save.cstar = data;
3028
break;
3029
case MSR_KERNEL_GS_BASE:
3030
svm->vmcb->save.kernel_gs_base = data;
3031
break;
3032
case MSR_SYSCALL_MASK:
3033
svm->vmcb->save.sfmask = data;
3034
break;
3035
#endif
3036
case MSR_IA32_SYSENTER_CS:
3037
svm->vmcb->save.sysenter_cs = data;
3038
break;
3039
case MSR_IA32_SYSENTER_EIP:
3040
svm->sysenter_eip = data;
3041
svm->vmcb->save.sysenter_eip = data;
3042
break;
3043
case MSR_IA32_SYSENTER_ESP:
3044
svm->sysenter_esp = data;
3045
svm->vmcb->save.sysenter_esp = data;
3046
break;
3047
case MSR_IA32_DEBUGCTLMSR:
3048
if (!boot_cpu_has(X86_FEATURE_LBRV)) {
3049
pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
3050
__func__, data);
3051
break;
3052
}
3053
if (data & DEBUGCTL_RESERVED_BITS)
3054
return 1;
3055
3056
svm->vmcb->save.dbgctl = data;
3057
mark_dirty(svm->vmcb, VMCB_LBR);
3058
if (data & (1ULL<<0))
3059
svm_enable_lbrv(svm);
3060
else
3061
svm_disable_lbrv(svm);
3062
break;
3063
case MSR_VM_HSAVE_PA:
3064
svm->nested.hsave_msr = data;
3065
break;
3066
case MSR_VM_CR:
3067
return svm_set_vm_cr(vcpu, data);
3068
case MSR_VM_IGNNE:
3069
pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3070
break;
3071
default:
3072
return kvm_set_msr_common(vcpu, ecx, data);
3073
}
3074
return 0;
3075
}
3076
3077
static int wrmsr_interception(struct vcpu_svm *svm)
3078
{
3079
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3080
u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
3081
| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3082
3083
3084
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3085
if (svm_set_msr(&svm->vcpu, ecx, data)) {
3086
trace_kvm_msr_write_ex(ecx, data);
3087
kvm_inject_gp(&svm->vcpu, 0);
3088
} else {
3089
trace_kvm_msr_write(ecx, data);
3090
skip_emulated_instruction(&svm->vcpu);
3091
}
3092
return 1;
3093
}
3094
3095
static int msr_interception(struct vcpu_svm *svm)
3096
{
3097
if (svm->vmcb->control.exit_info_1)
3098
return wrmsr_interception(svm);
3099
else
3100
return rdmsr_interception(svm);
3101
}
3102
3103
static int interrupt_window_interception(struct vcpu_svm *svm)
3104
{
3105
struct kvm_run *kvm_run = svm->vcpu.run;
3106
3107
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3108
svm_clear_vintr(svm);
3109
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3110
mark_dirty(svm->vmcb, VMCB_INTR);
3111
/*
3112
* If the user space waits to inject interrupts, exit as soon as
3113
* possible
3114
*/
3115
if (!irqchip_in_kernel(svm->vcpu.kvm) &&
3116
kvm_run->request_interrupt_window &&
3117
!kvm_cpu_has_interrupt(&svm->vcpu)) {
3118
++svm->vcpu.stat.irq_window_exits;
3119
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
3120
return 0;
3121
}
3122
3123
return 1;
3124
}
3125
3126
static int pause_interception(struct vcpu_svm *svm)
3127
{
3128
kvm_vcpu_on_spin(&(svm->vcpu));
3129
return 1;
3130
}
3131
3132
static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
3133
[SVM_EXIT_READ_CR0] = cr_interception,
3134
[SVM_EXIT_READ_CR3] = cr_interception,
3135
[SVM_EXIT_READ_CR4] = cr_interception,
3136
[SVM_EXIT_READ_CR8] = cr_interception,
3137
[SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
3138
[SVM_EXIT_WRITE_CR0] = cr_interception,
3139
[SVM_EXIT_WRITE_CR3] = cr_interception,
3140
[SVM_EXIT_WRITE_CR4] = cr_interception,
3141
[SVM_EXIT_WRITE_CR8] = cr8_write_interception,
3142
[SVM_EXIT_READ_DR0] = dr_interception,
3143
[SVM_EXIT_READ_DR1] = dr_interception,
3144
[SVM_EXIT_READ_DR2] = dr_interception,
3145
[SVM_EXIT_READ_DR3] = dr_interception,
3146
[SVM_EXIT_READ_DR4] = dr_interception,
3147
[SVM_EXIT_READ_DR5] = dr_interception,
3148
[SVM_EXIT_READ_DR6] = dr_interception,
3149
[SVM_EXIT_READ_DR7] = dr_interception,
3150
[SVM_EXIT_WRITE_DR0] = dr_interception,
3151
[SVM_EXIT_WRITE_DR1] = dr_interception,
3152
[SVM_EXIT_WRITE_DR2] = dr_interception,
3153
[SVM_EXIT_WRITE_DR3] = dr_interception,
3154
[SVM_EXIT_WRITE_DR4] = dr_interception,
3155
[SVM_EXIT_WRITE_DR5] = dr_interception,
3156
[SVM_EXIT_WRITE_DR6] = dr_interception,
3157
[SVM_EXIT_WRITE_DR7] = dr_interception,
3158
[SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
3159
[SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
3160
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
3161
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
3162
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
3163
[SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
3164
[SVM_EXIT_INTR] = intr_interception,
3165
[SVM_EXIT_NMI] = nmi_interception,
3166
[SVM_EXIT_SMI] = nop_on_interception,
3167
[SVM_EXIT_INIT] = nop_on_interception,
3168
[SVM_EXIT_VINTR] = interrupt_window_interception,
3169
[SVM_EXIT_CPUID] = cpuid_interception,
3170
[SVM_EXIT_IRET] = iret_interception,
3171
[SVM_EXIT_INVD] = emulate_on_interception,
3172
[SVM_EXIT_PAUSE] = pause_interception,
3173
[SVM_EXIT_HLT] = halt_interception,
3174
[SVM_EXIT_INVLPG] = invlpg_interception,
3175
[SVM_EXIT_INVLPGA] = invlpga_interception,
3176
[SVM_EXIT_IOIO] = io_interception,
3177
[SVM_EXIT_MSR] = msr_interception,
3178
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
3179
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
3180
[SVM_EXIT_VMRUN] = vmrun_interception,
3181
[SVM_EXIT_VMMCALL] = vmmcall_interception,
3182
[SVM_EXIT_VMLOAD] = vmload_interception,
3183
[SVM_EXIT_VMSAVE] = vmsave_interception,
3184
[SVM_EXIT_STGI] = stgi_interception,
3185
[SVM_EXIT_CLGI] = clgi_interception,
3186
[SVM_EXIT_SKINIT] = skinit_interception,
3187
[SVM_EXIT_WBINVD] = emulate_on_interception,
3188
[SVM_EXIT_MONITOR] = invalid_op_interception,
3189
[SVM_EXIT_MWAIT] = invalid_op_interception,
3190
[SVM_EXIT_XSETBV] = xsetbv_interception,
3191
[SVM_EXIT_NPF] = pf_interception,
3192
};
3193
3194
static void dump_vmcb(struct kvm_vcpu *vcpu)
3195
{
3196
struct vcpu_svm *svm = to_svm(vcpu);
3197
struct vmcb_control_area *control = &svm->vmcb->control;
3198
struct vmcb_save_area *save = &svm->vmcb->save;
3199
3200
pr_err("VMCB Control Area:\n");
3201
pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
3202
pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
3203
pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
3204
pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
3205
pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
3206
pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
3207
pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
3208
pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
3209
pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
3210
pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
3211
pr_err("%-20s%d\n", "asid:", control->asid);
3212
pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
3213
pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
3214
pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
3215
pr_err("%-20s%08x\n", "int_state:", control->int_state);
3216
pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
3217
pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
3218
pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
3219
pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
3220
pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3221
pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3222
pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3223
pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3224
pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3225
pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
3226
pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3227
pr_err("VMCB State Save Area:\n");
3228
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3229
"es:",
3230
save->es.selector, save->es.attrib,
3231
save->es.limit, save->es.base);
3232
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3233
"cs:",
3234
save->cs.selector, save->cs.attrib,
3235
save->cs.limit, save->cs.base);
3236
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3237
"ss:",
3238
save->ss.selector, save->ss.attrib,
3239
save->ss.limit, save->ss.base);
3240
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3241
"ds:",
3242
save->ds.selector, save->ds.attrib,
3243
save->ds.limit, save->ds.base);
3244
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3245
"fs:",
3246
save->fs.selector, save->fs.attrib,
3247
save->fs.limit, save->fs.base);
3248
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3249
"gs:",
3250
save->gs.selector, save->gs.attrib,
3251
save->gs.limit, save->gs.base);
3252
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3253
"gdtr:",
3254
save->gdtr.selector, save->gdtr.attrib,
3255
save->gdtr.limit, save->gdtr.base);
3256
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3257
"ldtr:",
3258
save->ldtr.selector, save->ldtr.attrib,
3259
save->ldtr.limit, save->ldtr.base);
3260
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3261
"idtr:",
3262
save->idtr.selector, save->idtr.attrib,
3263
save->idtr.limit, save->idtr.base);
3264
pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3265
"tr:",
3266
save->tr.selector, save->tr.attrib,
3267
save->tr.limit, save->tr.base);
3268
pr_err("cpl: %d efer: %016llx\n",
3269
save->cpl, save->efer);
3270
pr_err("%-15s %016llx %-13s %016llx\n",
3271
"cr0:", save->cr0, "cr2:", save->cr2);
3272
pr_err("%-15s %016llx %-13s %016llx\n",
3273
"cr3:", save->cr3, "cr4:", save->cr4);
3274
pr_err("%-15s %016llx %-13s %016llx\n",
3275
"dr6:", save->dr6, "dr7:", save->dr7);
3276
pr_err("%-15s %016llx %-13s %016llx\n",
3277
"rip:", save->rip, "rflags:", save->rflags);
3278
pr_err("%-15s %016llx %-13s %016llx\n",
3279
"rsp:", save->rsp, "rax:", save->rax);
3280
pr_err("%-15s %016llx %-13s %016llx\n",
3281
"star:", save->star, "lstar:", save->lstar);
3282
pr_err("%-15s %016llx %-13s %016llx\n",
3283
"cstar:", save->cstar, "sfmask:", save->sfmask);
3284
pr_err("%-15s %016llx %-13s %016llx\n",
3285
"kernel_gs_base:", save->kernel_gs_base,
3286
"sysenter_cs:", save->sysenter_cs);
3287
pr_err("%-15s %016llx %-13s %016llx\n",
3288
"sysenter_esp:", save->sysenter_esp,
3289
"sysenter_eip:", save->sysenter_eip);
3290
pr_err("%-15s %016llx %-13s %016llx\n",
3291
"gpat:", save->g_pat, "dbgctl:", save->dbgctl);
3292
pr_err("%-15s %016llx %-13s %016llx\n",
3293
"br_from:", save->br_from, "br_to:", save->br_to);
3294
pr_err("%-15s %016llx %-13s %016llx\n",
3295
"excp_from:", save->last_excp_from,
3296
"excp_to:", save->last_excp_to);
3297
}
3298
3299
static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
3300
{
3301
struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3302
3303
*info1 = control->exit_info_1;
3304
*info2 = control->exit_info_2;
3305
}
3306
3307
static int handle_exit(struct kvm_vcpu *vcpu)
3308
{
3309
struct vcpu_svm *svm = to_svm(vcpu);
3310
struct kvm_run *kvm_run = vcpu->run;
3311
u32 exit_code = svm->vmcb->control.exit_code;
3312
3313
trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
3314
3315
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
3316
vcpu->arch.cr0 = svm->vmcb->save.cr0;
3317
if (npt_enabled)
3318
vcpu->arch.cr3 = svm->vmcb->save.cr3;
3319
3320
if (unlikely(svm->nested.exit_required)) {
3321
nested_svm_vmexit(svm);
3322
svm->nested.exit_required = false;
3323
3324
return 1;
3325
}
3326
3327
if (is_guest_mode(vcpu)) {
3328
int vmexit;
3329
3330
trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
3331
svm->vmcb->control.exit_info_1,
3332
svm->vmcb->control.exit_info_2,
3333
svm->vmcb->control.exit_int_info,
3334
svm->vmcb->control.exit_int_info_err);
3335
3336
vmexit = nested_svm_exit_special(svm);
3337
3338
if (vmexit == NESTED_EXIT_CONTINUE)
3339
vmexit = nested_svm_exit_handled(svm);
3340
3341
if (vmexit == NESTED_EXIT_DONE)
3342
return 1;
3343
}
3344
3345
svm_complete_interrupts(svm);
3346
3347
if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
3348
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3349
kvm_run->fail_entry.hardware_entry_failure_reason
3350
= svm->vmcb->control.exit_code;
3351
pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
3352
dump_vmcb(vcpu);
3353
return 0;
3354
}
3355
3356
if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
3357
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
3358
exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
3359
exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
3360
printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
3361
"exit_code 0x%x\n",
3362
__func__, svm->vmcb->control.exit_int_info,
3363
exit_code);
3364
3365
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3366
|| !svm_exit_handlers[exit_code]) {
3367
kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
3368
kvm_run->hw.hardware_exit_reason = exit_code;
3369
return 0;
3370
}
3371
3372
return svm_exit_handlers[exit_code](svm);
3373
}
3374
3375
static void reload_tss(struct kvm_vcpu *vcpu)
3376
{
3377
int cpu = raw_smp_processor_id();
3378
3379
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3380
sd->tss_desc->type = 9; /* available 32/64-bit TSS */
3381
load_TR_desc();
3382
}
3383
3384
static void pre_svm_run(struct vcpu_svm *svm)
3385
{
3386
int cpu = raw_smp_processor_id();
3387
3388
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3389
3390
/* FIXME: handle wraparound of asid_generation */
3391
if (svm->asid_generation != sd->asid_generation)
3392
new_asid(svm, sd);
3393
}
3394
3395
static void svm_inject_nmi(struct kvm_vcpu *vcpu)
3396
{
3397
struct vcpu_svm *svm = to_svm(vcpu);
3398
3399
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
3400
vcpu->arch.hflags |= HF_NMI_MASK;
3401
set_intercept(svm, INTERCEPT_IRET);
3402
++vcpu->stat.nmi_injections;
3403
}
3404
3405
static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
3406
{
3407
struct vmcb_control_area *control;
3408
3409
control = &svm->vmcb->control;
3410
control->int_vector = irq;
3411
control->int_ctl &= ~V_INTR_PRIO_MASK;
3412
control->int_ctl |= V_IRQ_MASK |
3413
((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
3414
mark_dirty(svm->vmcb, VMCB_INTR);
3415
}
3416
3417
static void svm_set_irq(struct kvm_vcpu *vcpu)
3418
{
3419
struct vcpu_svm *svm = to_svm(vcpu);
3420
3421
BUG_ON(!(gif_set(svm)));
3422
3423
trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
3424
++vcpu->stat.irq_injections;
3425
3426
svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3427
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
3428
}
3429
3430
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3431
{
3432
struct vcpu_svm *svm = to_svm(vcpu);
3433
3434
if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3435
return;
3436
3437
if (irr == -1)
3438
return;
3439
3440
if (tpr >= irr)
3441
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3442
}
3443
3444
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3445
{
3446
struct vcpu_svm *svm = to_svm(vcpu);
3447
struct vmcb *vmcb = svm->vmcb;
3448
int ret;
3449
ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
3450
!(svm->vcpu.arch.hflags & HF_NMI_MASK);
3451
ret = ret && gif_set(svm) && nested_svm_nmi(svm);
3452
3453
return ret;
3454
}
3455
3456
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3457
{
3458
struct vcpu_svm *svm = to_svm(vcpu);
3459
3460
return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
3461
}
3462
3463
static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3464
{
3465
struct vcpu_svm *svm = to_svm(vcpu);
3466
3467
if (masked) {
3468
svm->vcpu.arch.hflags |= HF_NMI_MASK;
3469
set_intercept(svm, INTERCEPT_IRET);
3470
} else {
3471
svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
3472
clr_intercept(svm, INTERCEPT_IRET);
3473
}
3474
}
3475
3476
static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3477
{
3478
struct vcpu_svm *svm = to_svm(vcpu);
3479
struct vmcb *vmcb = svm->vmcb;
3480
int ret;
3481
3482
if (!gif_set(svm) ||
3483
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
3484
return 0;
3485
3486
ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
3487
3488
if (is_guest_mode(vcpu))
3489
return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
3490
3491
return ret;
3492
}
3493
3494
static void enable_irq_window(struct kvm_vcpu *vcpu)
3495
{
3496
struct vcpu_svm *svm = to_svm(vcpu);
3497
3498
/*
3499
* In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
3500
* 1, because that's a separate STGI/VMRUN intercept. The next time we
3501
* get that intercept, this function will be called again though and
3502
* we'll get the vintr intercept.
3503
*/
3504
if (gif_set(svm) && nested_svm_intr(svm)) {
3505
svm_set_vintr(svm);
3506
svm_inject_irq(svm, 0x0);
3507
}
3508
}
3509
3510
static void enable_nmi_window(struct kvm_vcpu *vcpu)
3511
{
3512
struct vcpu_svm *svm = to_svm(vcpu);
3513
3514
if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3515
== HF_NMI_MASK)
3516
return; /* IRET will cause a vm exit */
3517
3518
/*
3519
* Something prevents NMI from been injected. Single step over possible
3520
* problem (IRET or exception injection or interrupt shadow)
3521
*/
3522
svm->nmi_singlestep = true;
3523
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3524
update_db_intercept(vcpu);
3525
}
3526
3527
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3528
{
3529
return 0;
3530
}
3531
3532
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
3533
{
3534
struct vcpu_svm *svm = to_svm(vcpu);
3535
3536
if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3537
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3538
else
3539
svm->asid_generation--;
3540
}
3541
3542
static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
3543
{
3544
}
3545
3546
static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3547
{
3548
struct vcpu_svm *svm = to_svm(vcpu);
3549
3550
if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3551
return;
3552
3553
if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
3554
int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
3555
kvm_set_cr8(vcpu, cr8);
3556
}
3557
}
3558
3559
static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3560
{
3561
struct vcpu_svm *svm = to_svm(vcpu);
3562
u64 cr8;
3563
3564
if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3565
return;
3566
3567
cr8 = kvm_get_cr8(vcpu);
3568
svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3569
svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3570
}
3571
3572
static void svm_complete_interrupts(struct vcpu_svm *svm)
3573
{
3574
u8 vector;
3575
int type;
3576
u32 exitintinfo = svm->vmcb->control.exit_int_info;
3577
unsigned int3_injected = svm->int3_injected;
3578
3579
svm->int3_injected = 0;
3580
3581
/*
3582
* If we've made progress since setting HF_IRET_MASK, we've
3583
* executed an IRET and can allow NMI injection.
3584
*/
3585
if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
3586
&& kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
3587
svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3588
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3589
}
3590
3591
svm->vcpu.arch.nmi_injected = false;
3592
kvm_clear_exception_queue(&svm->vcpu);
3593
kvm_clear_interrupt_queue(&svm->vcpu);
3594
3595
if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3596
return;
3597
3598
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3599
3600
vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3601
type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3602
3603
switch (type) {
3604
case SVM_EXITINTINFO_TYPE_NMI:
3605
svm->vcpu.arch.nmi_injected = true;
3606
break;
3607
case SVM_EXITINTINFO_TYPE_EXEPT:
3608
/*
3609
* In case of software exceptions, do not reinject the vector,
3610
* but re-execute the instruction instead. Rewind RIP first
3611
* if we emulated INT3 before.
3612
*/
3613
if (kvm_exception_is_soft(vector)) {
3614
if (vector == BP_VECTOR && int3_injected &&
3615
kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
3616
kvm_rip_write(&svm->vcpu,
3617
kvm_rip_read(&svm->vcpu) -
3618
int3_injected);
3619
break;
3620
}
3621
if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3622
u32 err = svm->vmcb->control.exit_int_info_err;
3623
kvm_requeue_exception_e(&svm->vcpu, vector, err);
3624
3625
} else
3626
kvm_requeue_exception(&svm->vcpu, vector);
3627
break;
3628
case SVM_EXITINTINFO_TYPE_INTR:
3629
kvm_queue_interrupt(&svm->vcpu, vector, false);
3630
break;
3631
default:
3632
break;
3633
}
3634
}
3635
3636
static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3637
{
3638
struct vcpu_svm *svm = to_svm(vcpu);
3639
struct vmcb_control_area *control = &svm->vmcb->control;
3640
3641
control->exit_int_info = control->event_inj;
3642
control->exit_int_info_err = control->event_inj_err;
3643
control->event_inj = 0;
3644
svm_complete_interrupts(svm);
3645
}
3646
3647
#ifdef CONFIG_X86_64
3648
#define R "r"
3649
#else
3650
#define R "e"
3651
#endif
3652
3653
static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3654
{
3655
struct vcpu_svm *svm = to_svm(vcpu);
3656
3657
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3658
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3659
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3660
3661
/*
3662
* A vmexit emulation is required before the vcpu can be executed
3663
* again.
3664
*/
3665
if (unlikely(svm->nested.exit_required))
3666
return;
3667
3668
pre_svm_run(svm);
3669
3670
sync_lapic_to_cr8(vcpu);
3671
3672
svm->vmcb->save.cr2 = vcpu->arch.cr2;
3673
3674
clgi();
3675
3676
local_irq_enable();
3677
3678
asm volatile (
3679
"push %%"R"bp; \n\t"
3680
"mov %c[rbx](%[svm]), %%"R"bx \n\t"
3681
"mov %c[rcx](%[svm]), %%"R"cx \n\t"
3682
"mov %c[rdx](%[svm]), %%"R"dx \n\t"
3683
"mov %c[rsi](%[svm]), %%"R"si \n\t"
3684
"mov %c[rdi](%[svm]), %%"R"di \n\t"
3685
"mov %c[rbp](%[svm]), %%"R"bp \n\t"
3686
#ifdef CONFIG_X86_64
3687
"mov %c[r8](%[svm]), %%r8 \n\t"
3688
"mov %c[r9](%[svm]), %%r9 \n\t"
3689
"mov %c[r10](%[svm]), %%r10 \n\t"
3690
"mov %c[r11](%[svm]), %%r11 \n\t"
3691
"mov %c[r12](%[svm]), %%r12 \n\t"
3692
"mov %c[r13](%[svm]), %%r13 \n\t"
3693
"mov %c[r14](%[svm]), %%r14 \n\t"
3694
"mov %c[r15](%[svm]), %%r15 \n\t"
3695
#endif
3696
3697
/* Enter guest mode */
3698
"push %%"R"ax \n\t"
3699
"mov %c[vmcb](%[svm]), %%"R"ax \n\t"
3700
__ex(SVM_VMLOAD) "\n\t"
3701
__ex(SVM_VMRUN) "\n\t"
3702
__ex(SVM_VMSAVE) "\n\t"
3703
"pop %%"R"ax \n\t"
3704
3705
/* Save guest registers, load host registers */
3706
"mov %%"R"bx, %c[rbx](%[svm]) \n\t"
3707
"mov %%"R"cx, %c[rcx](%[svm]) \n\t"
3708
"mov %%"R"dx, %c[rdx](%[svm]) \n\t"
3709
"mov %%"R"si, %c[rsi](%[svm]) \n\t"
3710
"mov %%"R"di, %c[rdi](%[svm]) \n\t"
3711
"mov %%"R"bp, %c[rbp](%[svm]) \n\t"
3712
#ifdef CONFIG_X86_64
3713
"mov %%r8, %c[r8](%[svm]) \n\t"
3714
"mov %%r9, %c[r9](%[svm]) \n\t"
3715
"mov %%r10, %c[r10](%[svm]) \n\t"
3716
"mov %%r11, %c[r11](%[svm]) \n\t"
3717
"mov %%r12, %c[r12](%[svm]) \n\t"
3718
"mov %%r13, %c[r13](%[svm]) \n\t"
3719
"mov %%r14, %c[r14](%[svm]) \n\t"
3720
"mov %%r15, %c[r15](%[svm]) \n\t"
3721
#endif
3722
"pop %%"R"bp"
3723
:
3724
: [svm]"a"(svm),
3725
[vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
3726
[rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
3727
[rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
3728
[rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
3729
[rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
3730
[rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
3731
[rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
3732
#ifdef CONFIG_X86_64
3733
, [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
3734
[r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
3735
[r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
3736
[r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
3737
[r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
3738
[r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
3739
[r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
3740
[r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
3741
#endif
3742
: "cc", "memory"
3743
, R"bx", R"cx", R"dx", R"si", R"di"
3744
#ifdef CONFIG_X86_64
3745
, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
3746
#endif
3747
);
3748
3749
#ifdef CONFIG_X86_64
3750
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
3751
#else
3752
loadsegment(fs, svm->host.fs);
3753
#ifndef CONFIG_X86_32_LAZY_GS
3754
loadsegment(gs, svm->host.gs);
3755
#endif
3756
#endif
3757
3758
reload_tss(vcpu);
3759
3760
local_irq_disable();
3761
3762
vcpu->arch.cr2 = svm->vmcb->save.cr2;
3763
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3764
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3765
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3766
3767
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3768
kvm_before_handle_nmi(&svm->vcpu);
3769
3770
stgi();
3771
3772
/* Any pending NMI will happen here */
3773
3774
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3775
kvm_after_handle_nmi(&svm->vcpu);
3776
3777
sync_cr8_to_lapic(vcpu);
3778
3779
svm->next_rip = 0;
3780
3781
svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
3782
3783
/* if exit due to PF check for async PF */
3784
if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
3785
svm->apf_reason = kvm_read_and_reset_pf_reason();
3786
3787
if (npt_enabled) {
3788
vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
3789
vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
3790
}
3791
3792
/*
3793
* We need to handle MC intercepts here before the vcpu has a chance to
3794
* change the physical cpu
3795
*/
3796
if (unlikely(svm->vmcb->control.exit_code ==
3797
SVM_EXIT_EXCP_BASE + MC_VECTOR))
3798
svm_handle_mce(svm);
3799
3800
mark_all_clean(svm->vmcb);
3801
}
3802
3803
#undef R
3804
3805
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3806
{
3807
struct vcpu_svm *svm = to_svm(vcpu);
3808
3809
svm->vmcb->save.cr3 = root;
3810
mark_dirty(svm->vmcb, VMCB_CR);
3811
svm_flush_tlb(vcpu);
3812
}
3813
3814
static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3815
{
3816
struct vcpu_svm *svm = to_svm(vcpu);
3817
3818
svm->vmcb->control.nested_cr3 = root;
3819
mark_dirty(svm->vmcb, VMCB_NPT);
3820
3821
/* Also sync guest cr3 here in case we live migrate */
3822
svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
3823
mark_dirty(svm->vmcb, VMCB_CR);
3824
3825
svm_flush_tlb(vcpu);
3826
}
3827
3828
static int is_disabled(void)
3829
{
3830
u64 vm_cr;
3831
3832
rdmsrl(MSR_VM_CR, vm_cr);
3833
if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
3834
return 1;
3835
3836
return 0;
3837
}
3838
3839
static void
3840
svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3841
{
3842
/*
3843
* Patch in the VMMCALL instruction:
3844
*/
3845
hypercall[0] = 0x0f;
3846
hypercall[1] = 0x01;
3847
hypercall[2] = 0xd9;
3848
}
3849
3850
static void svm_check_processor_compat(void *rtn)
3851
{
3852
*(int *)rtn = 0;
3853
}
3854
3855
static bool svm_cpu_has_accelerated_tpr(void)
3856
{
3857
return false;
3858
}
3859
3860
static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3861
{
3862
return 0;
3863
}
3864
3865
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3866
{
3867
}
3868
3869
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3870
{
3871
switch (func) {
3872
case 0x80000001:
3873
if (nested)
3874
entry->ecx |= (1 << 2); /* Set SVM bit */
3875
break;
3876
case 0x8000000A:
3877
entry->eax = 1; /* SVM revision 1 */
3878
entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
3879
ASID emulation to nested SVM */
3880
entry->ecx = 0; /* Reserved */
3881
entry->edx = 0; /* Per default do not support any
3882
additional features */
3883
3884
/* Support next_rip if host supports it */
3885
if (boot_cpu_has(X86_FEATURE_NRIPS))
3886
entry->edx |= SVM_FEATURE_NRIP;
3887
3888
/* Support NPT for the guest if enabled */
3889
if (npt_enabled)
3890
entry->edx |= SVM_FEATURE_NPT;
3891
3892
break;
3893
}
3894
}
3895
3896
static const struct trace_print_flags svm_exit_reasons_str[] = {
3897
{ SVM_EXIT_READ_CR0, "read_cr0" },
3898
{ SVM_EXIT_READ_CR3, "read_cr3" },
3899
{ SVM_EXIT_READ_CR4, "read_cr4" },
3900
{ SVM_EXIT_READ_CR8, "read_cr8" },
3901
{ SVM_EXIT_WRITE_CR0, "write_cr0" },
3902
{ SVM_EXIT_WRITE_CR3, "write_cr3" },
3903
{ SVM_EXIT_WRITE_CR4, "write_cr4" },
3904
{ SVM_EXIT_WRITE_CR8, "write_cr8" },
3905
{ SVM_EXIT_READ_DR0, "read_dr0" },
3906
{ SVM_EXIT_READ_DR1, "read_dr1" },
3907
{ SVM_EXIT_READ_DR2, "read_dr2" },
3908
{ SVM_EXIT_READ_DR3, "read_dr3" },
3909
{ SVM_EXIT_WRITE_DR0, "write_dr0" },
3910
{ SVM_EXIT_WRITE_DR1, "write_dr1" },
3911
{ SVM_EXIT_WRITE_DR2, "write_dr2" },
3912
{ SVM_EXIT_WRITE_DR3, "write_dr3" },
3913
{ SVM_EXIT_WRITE_DR5, "write_dr5" },
3914
{ SVM_EXIT_WRITE_DR7, "write_dr7" },
3915
{ SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" },
3916
{ SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" },
3917
{ SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" },
3918
{ SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" },
3919
{ SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" },
3920
{ SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" },
3921
{ SVM_EXIT_INTR, "interrupt" },
3922
{ SVM_EXIT_NMI, "nmi" },
3923
{ SVM_EXIT_SMI, "smi" },
3924
{ SVM_EXIT_INIT, "init" },
3925
{ SVM_EXIT_VINTR, "vintr" },
3926
{ SVM_EXIT_CPUID, "cpuid" },
3927
{ SVM_EXIT_INVD, "invd" },
3928
{ SVM_EXIT_HLT, "hlt" },
3929
{ SVM_EXIT_INVLPG, "invlpg" },
3930
{ SVM_EXIT_INVLPGA, "invlpga" },
3931
{ SVM_EXIT_IOIO, "io" },
3932
{ SVM_EXIT_MSR, "msr" },
3933
{ SVM_EXIT_TASK_SWITCH, "task_switch" },
3934
{ SVM_EXIT_SHUTDOWN, "shutdown" },
3935
{ SVM_EXIT_VMRUN, "vmrun" },
3936
{ SVM_EXIT_VMMCALL, "hypercall" },
3937
{ SVM_EXIT_VMLOAD, "vmload" },
3938
{ SVM_EXIT_VMSAVE, "vmsave" },
3939
{ SVM_EXIT_STGI, "stgi" },
3940
{ SVM_EXIT_CLGI, "clgi" },
3941
{ SVM_EXIT_SKINIT, "skinit" },
3942
{ SVM_EXIT_WBINVD, "wbinvd" },
3943
{ SVM_EXIT_MONITOR, "monitor" },
3944
{ SVM_EXIT_MWAIT, "mwait" },
3945
{ SVM_EXIT_XSETBV, "xsetbv" },
3946
{ SVM_EXIT_NPF, "npf" },
3947
{ -1, NULL }
3948
};
3949
3950
static int svm_get_lpage_level(void)
3951
{
3952
return PT_PDPE_LEVEL;
3953
}
3954
3955
static bool svm_rdtscp_supported(void)
3956
{
3957
return false;
3958
}
3959
3960
static bool svm_has_wbinvd_exit(void)
3961
{
3962
return true;
3963
}
3964
3965
static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
3966
{
3967
struct vcpu_svm *svm = to_svm(vcpu);
3968
3969
set_exception_intercept(svm, NM_VECTOR);
3970
update_cr0_intercept(svm);
3971
}
3972
3973
#define PRE_EX(exit) { .exit_code = (exit), \
3974
.stage = X86_ICPT_PRE_EXCEPT, }
3975
#define POST_EX(exit) { .exit_code = (exit), \
3976
.stage = X86_ICPT_POST_EXCEPT, }
3977
#define POST_MEM(exit) { .exit_code = (exit), \
3978
.stage = X86_ICPT_POST_MEMACCESS, }
3979
3980
static struct __x86_intercept {
3981
u32 exit_code;
3982
enum x86_intercept_stage stage;
3983
} x86_intercept_map[] = {
3984
[x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
3985
[x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
3986
[x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
3987
[x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
3988
[x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
3989
[x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
3990
[x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
3991
[x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
3992
[x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
3993
[x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
3994
[x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
3995
[x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
3996
[x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
3997
[x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
3998
[x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
3999
[x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
4000
[x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
4001
[x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
4002
[x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
4003
[x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
4004
[x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
4005
[x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
4006
[x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
4007
[x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
4008
[x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
4009
[x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
4010
[x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
4011
[x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
4012
[x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
4013
[x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
4014
[x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
4015
[x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
4016
[x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
4017
[x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
4018
[x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
4019
[x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
4020
[x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
4021
[x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
4022
[x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
4023
[x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
4024
[x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
4025
[x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
4026
[x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
4027
[x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
4028
[x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
4029
[x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
4030
};
4031
4032
#undef PRE_EX
4033
#undef POST_EX
4034
#undef POST_MEM
4035
4036
static int svm_check_intercept(struct kvm_vcpu *vcpu,
4037
struct x86_instruction_info *info,
4038
enum x86_intercept_stage stage)
4039
{
4040
struct vcpu_svm *svm = to_svm(vcpu);
4041
int vmexit, ret = X86EMUL_CONTINUE;
4042
struct __x86_intercept icpt_info;
4043
struct vmcb *vmcb = svm->vmcb;
4044
4045
if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
4046
goto out;
4047
4048
icpt_info = x86_intercept_map[info->intercept];
4049
4050
if (stage != icpt_info.stage)
4051
goto out;
4052
4053
switch (icpt_info.exit_code) {
4054
case SVM_EXIT_READ_CR0:
4055
if (info->intercept == x86_intercept_cr_read)
4056
icpt_info.exit_code += info->modrm_reg;
4057
break;
4058
case SVM_EXIT_WRITE_CR0: {
4059
unsigned long cr0, val;
4060
u64 intercept;
4061
4062
if (info->intercept == x86_intercept_cr_write)
4063
icpt_info.exit_code += info->modrm_reg;
4064
4065
if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
4066
break;
4067
4068
intercept = svm->nested.intercept;
4069
4070
if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
4071
break;
4072
4073
cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
4074
val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
4075
4076
if (info->intercept == x86_intercept_lmsw) {
4077
cr0 &= 0xfUL;
4078
val &= 0xfUL;
4079
/* lmsw can't clear PE - catch this here */
4080
if (cr0 & X86_CR0_PE)
4081
val |= X86_CR0_PE;
4082
}
4083
4084
if (cr0 ^ val)
4085
icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4086
4087
break;
4088
}
4089
case SVM_EXIT_READ_DR0:
4090
case SVM_EXIT_WRITE_DR0:
4091
icpt_info.exit_code += info->modrm_reg;
4092
break;
4093
case SVM_EXIT_MSR:
4094
if (info->intercept == x86_intercept_wrmsr)
4095
vmcb->control.exit_info_1 = 1;
4096
else
4097
vmcb->control.exit_info_1 = 0;
4098
break;
4099
case SVM_EXIT_PAUSE:
4100
/*
4101
* We get this for NOP only, but pause
4102
* is rep not, check this here
4103
*/
4104
if (info->rep_prefix != REPE_PREFIX)
4105
goto out;
4106
case SVM_EXIT_IOIO: {
4107
u64 exit_info;
4108
u32 bytes;
4109
4110
exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
4111
4112
if (info->intercept == x86_intercept_in ||
4113
info->intercept == x86_intercept_ins) {
4114
exit_info |= SVM_IOIO_TYPE_MASK;
4115
bytes = info->src_bytes;
4116
} else {
4117
bytes = info->dst_bytes;
4118
}
4119
4120
if (info->intercept == x86_intercept_outs ||
4121
info->intercept == x86_intercept_ins)
4122
exit_info |= SVM_IOIO_STR_MASK;
4123
4124
if (info->rep_prefix)
4125
exit_info |= SVM_IOIO_REP_MASK;
4126
4127
bytes = min(bytes, 4u);
4128
4129
exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
4130
4131
exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
4132
4133
vmcb->control.exit_info_1 = exit_info;
4134
vmcb->control.exit_info_2 = info->next_rip;
4135
4136
break;
4137
}
4138
default:
4139
break;
4140
}
4141
4142
vmcb->control.next_rip = info->next_rip;
4143
vmcb->control.exit_code = icpt_info.exit_code;
4144
vmexit = nested_svm_exit_handled(svm);
4145
4146
ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
4147
: X86EMUL_CONTINUE;
4148
4149
out:
4150
return ret;
4151
}
4152
4153
static struct kvm_x86_ops svm_x86_ops = {
4154
.cpu_has_kvm_support = has_svm,
4155
.disabled_by_bios = is_disabled,
4156
.hardware_setup = svm_hardware_setup,
4157
.hardware_unsetup = svm_hardware_unsetup,
4158
.check_processor_compatibility = svm_check_processor_compat,
4159
.hardware_enable = svm_hardware_enable,
4160
.hardware_disable = svm_hardware_disable,
4161
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
4162
4163
.vcpu_create = svm_create_vcpu,
4164
.vcpu_free = svm_free_vcpu,
4165
.vcpu_reset = svm_vcpu_reset,
4166
4167
.prepare_guest_switch = svm_prepare_guest_switch,
4168
.vcpu_load = svm_vcpu_load,
4169
.vcpu_put = svm_vcpu_put,
4170
4171
.set_guest_debug = svm_guest_debug,
4172
.get_msr = svm_get_msr,
4173
.set_msr = svm_set_msr,
4174
.get_segment_base = svm_get_segment_base,
4175
.get_segment = svm_get_segment,
4176
.set_segment = svm_set_segment,
4177
.get_cpl = svm_get_cpl,
4178
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
4179
.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
4180
.decache_cr3 = svm_decache_cr3,
4181
.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
4182
.set_cr0 = svm_set_cr0,
4183
.set_cr3 = svm_set_cr3,
4184
.set_cr4 = svm_set_cr4,
4185
.set_efer = svm_set_efer,
4186
.get_idt = svm_get_idt,
4187
.set_idt = svm_set_idt,
4188
.get_gdt = svm_get_gdt,
4189
.set_gdt = svm_set_gdt,
4190
.set_dr7 = svm_set_dr7,
4191
.cache_reg = svm_cache_reg,
4192
.get_rflags = svm_get_rflags,
4193
.set_rflags = svm_set_rflags,
4194
.fpu_activate = svm_fpu_activate,
4195
.fpu_deactivate = svm_fpu_deactivate,
4196
4197
.tlb_flush = svm_flush_tlb,
4198
4199
.run = svm_vcpu_run,
4200
.handle_exit = handle_exit,
4201
.skip_emulated_instruction = skip_emulated_instruction,
4202
.set_interrupt_shadow = svm_set_interrupt_shadow,
4203
.get_interrupt_shadow = svm_get_interrupt_shadow,
4204
.patch_hypercall = svm_patch_hypercall,
4205
.set_irq = svm_set_irq,
4206
.set_nmi = svm_inject_nmi,
4207
.queue_exception = svm_queue_exception,
4208
.cancel_injection = svm_cancel_injection,
4209
.interrupt_allowed = svm_interrupt_allowed,
4210
.nmi_allowed = svm_nmi_allowed,
4211
.get_nmi_mask = svm_get_nmi_mask,
4212
.set_nmi_mask = svm_set_nmi_mask,
4213
.enable_nmi_window = enable_nmi_window,
4214
.enable_irq_window = enable_irq_window,
4215
.update_cr8_intercept = update_cr8_intercept,
4216
4217
.set_tss_addr = svm_set_tss_addr,
4218
.get_tdp_level = get_npt_level,
4219
.get_mt_mask = svm_get_mt_mask,
4220
4221
.get_exit_info = svm_get_exit_info,
4222
.exit_reasons_str = svm_exit_reasons_str,
4223
4224
.get_lpage_level = svm_get_lpage_level,
4225
4226
.cpuid_update = svm_cpuid_update,
4227
4228
.rdtscp_supported = svm_rdtscp_supported,
4229
4230
.set_supported_cpuid = svm_set_supported_cpuid,
4231
4232
.has_wbinvd_exit = svm_has_wbinvd_exit,
4233
4234
.set_tsc_khz = svm_set_tsc_khz,
4235
.write_tsc_offset = svm_write_tsc_offset,
4236
.adjust_tsc_offset = svm_adjust_tsc_offset,
4237
.compute_tsc_offset = svm_compute_tsc_offset,
4238
4239
.set_tdp_cr3 = set_tdp_cr3,
4240
4241
.check_intercept = svm_check_intercept,
4242
};
4243
4244
static int __init svm_init(void)
4245
{
4246
return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
4247
__alignof__(struct vcpu_svm), THIS_MODULE);
4248
}
4249
4250
static void __exit svm_exit(void)
4251
{
4252
kvm_exit();
4253
}
4254
4255
module_init(svm_init)
4256
module_exit(svm_exit)
4257
4258