Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/virt/hw.c
170923 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
#include <linux/cpu.h>
3
#include <linux/cpumask.h>
4
#include <linux/errno.h>
5
#include <linux/kvm_types.h>
6
#include <linux/list.h>
7
#include <linux/percpu.h>
8
9
#include <asm/perf_event.h>
10
#include <asm/processor.h>
11
#include <asm/virt.h>
12
#include <asm/vmx.h>
13
14
struct x86_virt_ops {
15
int feature;
16
int (*enable_virtualization_cpu)(void);
17
int (*disable_virtualization_cpu)(void);
18
void (*emergency_disable_virtualization_cpu)(void);
19
};
20
static struct x86_virt_ops virt_ops __ro_after_init;
21
22
__visible bool virt_rebooting;
23
EXPORT_SYMBOL_FOR_KVM(virt_rebooting);
24
25
static DEFINE_PER_CPU(int, virtualization_nr_users);
26
27
static cpu_emergency_virt_cb __rcu *kvm_emergency_callback;
28
29
void x86_virt_register_emergency_callback(cpu_emergency_virt_cb *callback)
30
{
31
if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback)))
32
return;
33
34
rcu_assign_pointer(kvm_emergency_callback, callback);
35
}
36
EXPORT_SYMBOL_FOR_KVM(x86_virt_register_emergency_callback);
37
38
void x86_virt_unregister_emergency_callback(cpu_emergency_virt_cb *callback)
39
{
40
if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback) != callback))
41
return;
42
43
rcu_assign_pointer(kvm_emergency_callback, NULL);
44
synchronize_rcu();
45
}
46
EXPORT_SYMBOL_FOR_KVM(x86_virt_unregister_emergency_callback);
47
48
static void x86_virt_invoke_kvm_emergency_callback(void)
49
{
50
cpu_emergency_virt_cb *kvm_callback;
51
52
kvm_callback = rcu_dereference(kvm_emergency_callback);
53
if (kvm_callback)
54
kvm_callback();
55
}
56
57
#if IS_ENABLED(CONFIG_KVM_INTEL)
58
static DEFINE_PER_CPU(struct vmcs *, root_vmcs);
59
60
static int x86_virt_cpu_vmxon(void)
61
{
62
u64 vmxon_pointer = __pa(per_cpu(root_vmcs, raw_smp_processor_id()));
63
u64 msr;
64
65
cr4_set_bits(X86_CR4_VMXE);
66
67
asm goto("1: vmxon %[vmxon_pointer]\n\t"
68
_ASM_EXTABLE(1b, %l[fault])
69
: : [vmxon_pointer] "m"(vmxon_pointer)
70
: : fault);
71
return 0;
72
73
fault:
74
WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
75
rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
76
cr4_clear_bits(X86_CR4_VMXE);
77
78
return -EFAULT;
79
}
80
81
static int x86_vmx_enable_virtualization_cpu(void)
82
{
83
int r;
84
85
if (cr4_read_shadow() & X86_CR4_VMXE)
86
return -EBUSY;
87
88
intel_pt_handle_vmx(1);
89
90
r = x86_virt_cpu_vmxon();
91
if (r) {
92
intel_pt_handle_vmx(0);
93
return r;
94
}
95
96
return 0;
97
}
98
99
/*
100
* Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
101
*
102
* Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
103
* atomically track post-VMXON state, e.g. this may be called in NMI context.
104
* Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
105
* faults are guaranteed to be due to the !post-VMXON check unless the CPU is
106
* magically in RM, VM86, compat mode, or at CPL>0.
107
*/
108
static int x86_vmx_disable_virtualization_cpu(void)
109
{
110
int r = -EIO;
111
112
asm goto("1: vmxoff\n\t"
113
_ASM_EXTABLE(1b, %l[fault])
114
::: "cc", "memory" : fault);
115
r = 0;
116
117
fault:
118
cr4_clear_bits(X86_CR4_VMXE);
119
intel_pt_handle_vmx(0);
120
return r;
121
}
122
123
static void x86_vmx_emergency_disable_virtualization_cpu(void)
124
{
125
virt_rebooting = true;
126
127
/*
128
* Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
129
* set in task context. If this races with _another_ emergency call
130
* from NMI context, VMCLEAR (in KVM) and VMXOFF may #UD, but KVM and
131
* the kernel will eat those faults due to virt_rebooting being set by
132
* the interrupting NMI callback.
133
*/
134
if (!(__read_cr4() & X86_CR4_VMXE))
135
return;
136
137
x86_virt_invoke_kvm_emergency_callback();
138
139
x86_vmx_disable_virtualization_cpu();
140
}
141
142
static __init void x86_vmx_exit(void)
143
{
144
int cpu;
145
146
for_each_possible_cpu(cpu) {
147
free_page((unsigned long)per_cpu(root_vmcs, cpu));
148
per_cpu(root_vmcs, cpu) = NULL;
149
}
150
}
151
152
static __init int __x86_vmx_init(void)
153
{
154
const struct x86_virt_ops vmx_ops = {
155
.feature = X86_FEATURE_VMX,
156
.enable_virtualization_cpu = x86_vmx_enable_virtualization_cpu,
157
.disable_virtualization_cpu = x86_vmx_disable_virtualization_cpu,
158
.emergency_disable_virtualization_cpu = x86_vmx_emergency_disable_virtualization_cpu,
159
};
160
161
u64 basic_msr;
162
u32 rev_id;
163
int cpu;
164
165
if (!cpu_feature_enabled(X86_FEATURE_VMX))
166
return -EOPNOTSUPP;
167
168
rdmsrq(MSR_IA32_VMX_BASIC, basic_msr);
169
170
/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
171
if (WARN_ON_ONCE(vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE))
172
return -EIO;
173
174
/*
175
* Even if eVMCS is enabled (or will be enabled?), and even though not
176
* explicitly documented by TLFS, the root VMCS passed to VMXON should
177
* still be marked with the revision_id reported by the physical CPU.
178
*/
179
rev_id = vmx_basic_vmcs_revision_id(basic_msr);
180
181
for_each_possible_cpu(cpu) {
182
int node = cpu_to_node(cpu);
183
struct page *page;
184
struct vmcs *vmcs;
185
186
page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
187
if (WARN_ON_ONCE(!page)) {
188
x86_vmx_exit();
189
return -ENOMEM;
190
}
191
192
vmcs = page_address(page);
193
vmcs->hdr.revision_id = rev_id;
194
per_cpu(root_vmcs, cpu) = vmcs;
195
}
196
197
memcpy(&virt_ops, &vmx_ops, sizeof(virt_ops));
198
return 0;
199
}
200
201
static __init int x86_vmx_init(void)
202
{
203
int r;
204
205
r = __x86_vmx_init();
206
if (r)
207
setup_clear_cpu_cap(X86_FEATURE_VMX);
208
return r;
209
}
210
#else
211
static __init int x86_vmx_init(void) { return -EOPNOTSUPP; }
212
static __init void x86_vmx_exit(void) { }
213
#endif
214
215
#if IS_ENABLED(CONFIG_KVM_AMD)
216
static int x86_svm_enable_virtualization_cpu(void)
217
{
218
u64 efer;
219
220
rdmsrq(MSR_EFER, efer);
221
if (efer & EFER_SVME)
222
return -EBUSY;
223
224
wrmsrq(MSR_EFER, efer | EFER_SVME);
225
return 0;
226
}
227
228
static int x86_svm_disable_virtualization_cpu(void)
229
{
230
int r = -EIO;
231
u64 efer;
232
233
/*
234
* Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
235
* NMI aren't blocked.
236
*/
237
asm goto("1: stgi\n\t"
238
_ASM_EXTABLE(1b, %l[fault])
239
::: "memory" : fault);
240
r = 0;
241
242
fault:
243
rdmsrq(MSR_EFER, efer);
244
wrmsrq(MSR_EFER, efer & ~EFER_SVME);
245
return r;
246
}
247
248
static void x86_svm_emergency_disable_virtualization_cpu(void)
249
{
250
u64 efer;
251
252
virt_rebooting = true;
253
254
rdmsrq(MSR_EFER, efer);
255
if (!(efer & EFER_SVME))
256
return;
257
258
x86_virt_invoke_kvm_emergency_callback();
259
260
x86_svm_disable_virtualization_cpu();
261
}
262
263
static __init int x86_svm_init(void)
264
{
265
const struct x86_virt_ops svm_ops = {
266
.feature = X86_FEATURE_SVM,
267
.enable_virtualization_cpu = x86_svm_enable_virtualization_cpu,
268
.disable_virtualization_cpu = x86_svm_disable_virtualization_cpu,
269
.emergency_disable_virtualization_cpu = x86_svm_emergency_disable_virtualization_cpu,
270
};
271
272
if (!cpu_feature_enabled(X86_FEATURE_SVM) ||
273
cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
274
return -EOPNOTSUPP;
275
276
memcpy(&virt_ops, &svm_ops, sizeof(virt_ops));
277
return 0;
278
}
279
#else
280
static __init int x86_svm_init(void) { return -EOPNOTSUPP; }
281
#endif
282
283
int x86_virt_get_ref(int feat)
284
{
285
int r;
286
287
/* Ensure the !feature check can't get false positives. */
288
BUILD_BUG_ON(!X86_FEATURE_SVM || !X86_FEATURE_VMX);
289
290
if (!virt_ops.feature || virt_ops.feature != feat)
291
return -EOPNOTSUPP;
292
293
guard(preempt)();
294
295
if (this_cpu_inc_return(virtualization_nr_users) > 1)
296
return 0;
297
298
r = virt_ops.enable_virtualization_cpu();
299
if (r)
300
WARN_ON_ONCE(this_cpu_dec_return(virtualization_nr_users));
301
302
return r;
303
}
304
EXPORT_SYMBOL_FOR_KVM(x86_virt_get_ref);
305
306
void x86_virt_put_ref(int feat)
307
{
308
guard(preempt)();
309
310
if (WARN_ON_ONCE(!this_cpu_read(virtualization_nr_users)) ||
311
this_cpu_dec_return(virtualization_nr_users))
312
return;
313
314
BUG_ON(virt_ops.disable_virtualization_cpu() && !virt_rebooting);
315
}
316
EXPORT_SYMBOL_FOR_KVM(x86_virt_put_ref);
317
318
/*
319
* Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
320
* reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
321
* GIF=0, i.e. if the crash occurred between CLGI and STGI.
322
*/
323
int x86_virt_emergency_disable_virtualization_cpu(void)
324
{
325
if (!virt_ops.feature)
326
return -EOPNOTSUPP;
327
328
/*
329
* IRQs must be disabled as virtualization is enabled in hardware via
330
* function call IPIs, i.e. IRQs need to be disabled to guarantee
331
* virtualization stays disabled.
332
*/
333
lockdep_assert_irqs_disabled();
334
335
/*
336
* Do the NMI shootdown even if virtualization is off on _this_ CPU, as
337
* other CPUs may have virtualization enabled.
338
*
339
* TODO: Track whether or not virtualization might be enabled on other
340
* CPUs? May not be worth avoiding the NMI shootdown...
341
*/
342
virt_ops.emergency_disable_virtualization_cpu();
343
return 0;
344
}
345
346
void __init x86_virt_init(void)
347
{
348
/*
349
* Attempt to initialize both SVM and VMX, and simply use whichever one
350
* is present. Rsefuse to enable/use SVM or VMX if both are somehow
351
* supported. No known CPU supports both SVM and VMX.
352
*/
353
bool has_vmx = !x86_vmx_init();
354
bool has_svm = !x86_svm_init();
355
356
if (WARN_ON_ONCE(has_vmx && has_svm)) {
357
x86_vmx_exit();
358
memset(&virt_ops, 0, sizeof(virt_ops));
359
}
360
}
361
362