Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kvm/smm.c
26424 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4
#include <linux/kvm_host.h>
5
#include "x86.h"
6
#include "kvm_cache_regs.h"
7
#include "kvm_emulate.h"
8
#include "smm.h"
9
#include "cpuid.h"
10
#include "trace.h"
11
12
#define CHECK_SMRAM32_OFFSET(field, offset) \
13
ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
14
15
#define CHECK_SMRAM64_OFFSET(field, offset) \
16
ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
17
18
static void check_smram_offsets(void)
19
{
20
/* 32 bit SMRAM image */
21
CHECK_SMRAM32_OFFSET(reserved1, 0xFE00);
22
CHECK_SMRAM32_OFFSET(smbase, 0xFEF8);
23
CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC);
24
CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00);
25
CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02);
26
CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04);
27
CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08);
28
CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C);
29
CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10);
30
CHECK_SMRAM32_OFFSET(cr4, 0xFF14);
31
CHECK_SMRAM32_OFFSET(reserved2, 0xFF18);
32
CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A);
33
CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B);
34
CHECK_SMRAM32_OFFSET(ds, 0xFF2C);
35
CHECK_SMRAM32_OFFSET(fs, 0xFF38);
36
CHECK_SMRAM32_OFFSET(gs, 0xFF44);
37
CHECK_SMRAM32_OFFSET(idtr, 0xFF50);
38
CHECK_SMRAM32_OFFSET(tr, 0xFF5C);
39
CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C);
40
CHECK_SMRAM32_OFFSET(ldtr, 0xFF78);
41
CHECK_SMRAM32_OFFSET(es, 0xFF84);
42
CHECK_SMRAM32_OFFSET(cs, 0xFF90);
43
CHECK_SMRAM32_OFFSET(ss, 0xFF9C);
44
CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8);
45
CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC);
46
CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0);
47
CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4);
48
CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8);
49
CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC);
50
CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0);
51
CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4);
52
CHECK_SMRAM32_OFFSET(dr7, 0xFFC8);
53
CHECK_SMRAM32_OFFSET(dr6, 0xFFCC);
54
CHECK_SMRAM32_OFFSET(gprs, 0xFFD0);
55
CHECK_SMRAM32_OFFSET(eip, 0xFFF0);
56
CHECK_SMRAM32_OFFSET(eflags, 0xFFF4);
57
CHECK_SMRAM32_OFFSET(cr3, 0xFFF8);
58
CHECK_SMRAM32_OFFSET(cr0, 0xFFFC);
59
60
/* 64 bit SMRAM image */
61
CHECK_SMRAM64_OFFSET(es, 0xFE00);
62
CHECK_SMRAM64_OFFSET(cs, 0xFE10);
63
CHECK_SMRAM64_OFFSET(ss, 0xFE20);
64
CHECK_SMRAM64_OFFSET(ds, 0xFE30);
65
CHECK_SMRAM64_OFFSET(fs, 0xFE40);
66
CHECK_SMRAM64_OFFSET(gs, 0xFE50);
67
CHECK_SMRAM64_OFFSET(gdtr, 0xFE60);
68
CHECK_SMRAM64_OFFSET(ldtr, 0xFE70);
69
CHECK_SMRAM64_OFFSET(idtr, 0xFE80);
70
CHECK_SMRAM64_OFFSET(tr, 0xFE90);
71
CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0);
72
CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8);
73
CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0);
74
CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8);
75
CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0);
76
CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4);
77
CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8);
78
CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9);
79
CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA);
80
CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB);
81
CHECK_SMRAM64_OFFSET(reserved2, 0xFECC);
82
CHECK_SMRAM64_OFFSET(efer, 0xFED0);
83
CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8);
84
CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0);
85
CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8);
86
CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0);
87
CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC);
88
CHECK_SMRAM64_OFFSET(smbase, 0xFF00);
89
CHECK_SMRAM64_OFFSET(reserved4, 0xFF04);
90
CHECK_SMRAM64_OFFSET(ssp, 0xFF18);
91
CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20);
92
CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28);
93
CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30);
94
CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38);
95
CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40);
96
CHECK_SMRAM64_OFFSET(cr4, 0xFF48);
97
CHECK_SMRAM64_OFFSET(cr3, 0xFF50);
98
CHECK_SMRAM64_OFFSET(cr0, 0xFF58);
99
CHECK_SMRAM64_OFFSET(dr7, 0xFF60);
100
CHECK_SMRAM64_OFFSET(dr6, 0xFF68);
101
CHECK_SMRAM64_OFFSET(rflags, 0xFF70);
102
CHECK_SMRAM64_OFFSET(rip, 0xFF78);
103
CHECK_SMRAM64_OFFSET(gprs, 0xFF80);
104
105
BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
106
}
107
108
#undef CHECK_SMRAM64_OFFSET
109
#undef CHECK_SMRAM32_OFFSET
110
111
112
void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
113
{
114
trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
115
116
if (entering_smm) {
117
vcpu->arch.hflags |= HF_SMM_MASK;
118
} else {
119
vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
120
121
/* Process a latched INIT or SMI, if any. */
122
kvm_make_request(KVM_REQ_EVENT, vcpu);
123
124
/*
125
* Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
126
* on SMM exit we still need to reload them from
127
* guest memory
128
*/
129
vcpu->arch.pdptrs_from_userspace = false;
130
}
131
132
kvm_mmu_reset_context(vcpu);
133
}
134
EXPORT_SYMBOL_GPL(kvm_smm_changed);
135
136
void process_smi(struct kvm_vcpu *vcpu)
137
{
138
vcpu->arch.smi_pending = true;
139
kvm_make_request(KVM_REQ_EVENT, vcpu);
140
}
141
142
static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
143
{
144
u32 flags = 0;
145
flags |= seg->g << 23;
146
flags |= seg->db << 22;
147
flags |= seg->l << 21;
148
flags |= seg->avl << 20;
149
flags |= seg->present << 15;
150
flags |= seg->dpl << 13;
151
flags |= seg->s << 12;
152
flags |= seg->type << 8;
153
return flags;
154
}
155
156
static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
157
struct kvm_smm_seg_state_32 *state,
158
u32 *selector, int n)
159
{
160
struct kvm_segment seg;
161
162
kvm_get_segment(vcpu, &seg, n);
163
*selector = seg.selector;
164
state->base = seg.base;
165
state->limit = seg.limit;
166
state->flags = enter_smm_get_segment_flags(&seg);
167
}
168
169
#ifdef CONFIG_X86_64
170
static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
171
struct kvm_smm_seg_state_64 *state,
172
int n)
173
{
174
struct kvm_segment seg;
175
176
kvm_get_segment(vcpu, &seg, n);
177
state->selector = seg.selector;
178
state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
179
state->limit = seg.limit;
180
state->base = seg.base;
181
}
182
#endif
183
184
static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
185
struct kvm_smram_state_32 *smram)
186
{
187
struct desc_ptr dt;
188
int i;
189
190
smram->cr0 = kvm_read_cr0(vcpu);
191
smram->cr3 = kvm_read_cr3(vcpu);
192
smram->eflags = kvm_get_rflags(vcpu);
193
smram->eip = kvm_rip_read(vcpu);
194
195
for (i = 0; i < 8; i++)
196
smram->gprs[i] = kvm_register_read_raw(vcpu, i);
197
198
smram->dr6 = (u32)vcpu->arch.dr6;
199
smram->dr7 = (u32)vcpu->arch.dr7;
200
201
enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
202
enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
203
204
kvm_x86_call(get_gdt)(vcpu, &dt);
205
smram->gdtr.base = dt.address;
206
smram->gdtr.limit = dt.size;
207
208
kvm_x86_call(get_idt)(vcpu, &dt);
209
smram->idtr.base = dt.address;
210
smram->idtr.limit = dt.size;
211
212
enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
213
enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
214
enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
215
216
enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
217
enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
218
enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
219
220
smram->cr4 = kvm_read_cr4(vcpu);
221
smram->smm_revision = 0x00020000;
222
smram->smbase = vcpu->arch.smbase;
223
224
smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
225
}
226
227
#ifdef CONFIG_X86_64
228
static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
229
struct kvm_smram_state_64 *smram)
230
{
231
struct desc_ptr dt;
232
int i;
233
234
for (i = 0; i < 16; i++)
235
smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
236
237
smram->rip = kvm_rip_read(vcpu);
238
smram->rflags = kvm_get_rflags(vcpu);
239
240
smram->dr6 = vcpu->arch.dr6;
241
smram->dr7 = vcpu->arch.dr7;
242
243
smram->cr0 = kvm_read_cr0(vcpu);
244
smram->cr3 = kvm_read_cr3(vcpu);
245
smram->cr4 = kvm_read_cr4(vcpu);
246
247
smram->smbase = vcpu->arch.smbase;
248
smram->smm_revison = 0x00020064;
249
250
smram->efer = vcpu->arch.efer;
251
252
enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
253
254
kvm_x86_call(get_idt)(vcpu, &dt);
255
smram->idtr.limit = dt.size;
256
smram->idtr.base = dt.address;
257
258
enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
259
260
kvm_x86_call(get_gdt)(vcpu, &dt);
261
smram->gdtr.limit = dt.size;
262
smram->gdtr.base = dt.address;
263
264
enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
265
enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
266
enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
267
enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
268
enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
269
enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
270
271
smram->int_shadow = kvm_x86_call(get_interrupt_shadow)(vcpu);
272
}
273
#endif
274
275
void enter_smm(struct kvm_vcpu *vcpu)
276
{
277
struct kvm_segment cs, ds;
278
struct desc_ptr dt;
279
unsigned long cr0;
280
union kvm_smram smram;
281
282
check_smram_offsets();
283
284
memset(smram.bytes, 0, sizeof(smram.bytes));
285
286
#ifdef CONFIG_X86_64
287
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
288
enter_smm_save_state_64(vcpu, &smram.smram64);
289
else
290
#endif
291
enter_smm_save_state_32(vcpu, &smram.smram32);
292
293
/*
294
* Give enter_smm() a chance to make ISA-specific changes to the vCPU
295
* state (e.g. leave guest mode) after we've saved the state into the
296
* SMM state-save area.
297
*
298
* Kill the VM in the unlikely case of failure, because the VM
299
* can be in undefined state in this case.
300
*/
301
if (kvm_x86_call(enter_smm)(vcpu, &smram))
302
goto error;
303
304
kvm_smm_changed(vcpu, true);
305
306
if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
307
goto error;
308
309
if (kvm_x86_call(get_nmi_mask)(vcpu))
310
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
311
else
312
kvm_x86_call(set_nmi_mask)(vcpu, true);
313
314
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
315
kvm_rip_write(vcpu, 0x8000);
316
317
kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
318
319
cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
320
kvm_x86_call(set_cr0)(vcpu, cr0);
321
322
kvm_x86_call(set_cr4)(vcpu, 0);
323
324
/* Undocumented: IDT limit is set to zero on entry to SMM. */
325
dt.address = dt.size = 0;
326
kvm_x86_call(set_idt)(vcpu, &dt);
327
328
if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
329
goto error;
330
331
cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
332
cs.base = vcpu->arch.smbase;
333
334
ds.selector = 0;
335
ds.base = 0;
336
337
cs.limit = ds.limit = 0xffffffff;
338
cs.type = ds.type = 0x3;
339
cs.dpl = ds.dpl = 0;
340
cs.db = ds.db = 0;
341
cs.s = ds.s = 1;
342
cs.l = ds.l = 0;
343
cs.g = ds.g = 1;
344
cs.avl = ds.avl = 0;
345
cs.present = ds.present = 1;
346
cs.unusable = ds.unusable = 0;
347
cs.padding = ds.padding = 0;
348
349
kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
350
kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
351
kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
352
kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
353
kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
354
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
355
356
#ifdef CONFIG_X86_64
357
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
358
if (kvm_x86_call(set_efer)(vcpu, 0))
359
goto error;
360
#endif
361
362
vcpu->arch.cpuid_dynamic_bits_dirty = true;
363
kvm_mmu_reset_context(vcpu);
364
return;
365
error:
366
kvm_vm_dead(vcpu->kvm);
367
}
368
369
static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
370
{
371
desc->g = (flags >> 23) & 1;
372
desc->db = (flags >> 22) & 1;
373
desc->l = (flags >> 21) & 1;
374
desc->avl = (flags >> 20) & 1;
375
desc->present = (flags >> 15) & 1;
376
desc->dpl = (flags >> 13) & 3;
377
desc->s = (flags >> 12) & 1;
378
desc->type = (flags >> 8) & 15;
379
380
desc->unusable = !desc->present;
381
desc->padding = 0;
382
}
383
384
static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
385
const struct kvm_smm_seg_state_32 *state,
386
u16 selector, int n)
387
{
388
struct kvm_segment desc;
389
390
desc.selector = selector;
391
desc.base = state->base;
392
desc.limit = state->limit;
393
rsm_set_desc_flags(&desc, state->flags);
394
kvm_set_segment(vcpu, &desc, n);
395
return X86EMUL_CONTINUE;
396
}
397
398
#ifdef CONFIG_X86_64
399
400
static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
401
const struct kvm_smm_seg_state_64 *state,
402
int n)
403
{
404
struct kvm_segment desc;
405
406
desc.selector = state->selector;
407
rsm_set_desc_flags(&desc, state->attributes << 8);
408
desc.limit = state->limit;
409
desc.base = state->base;
410
kvm_set_segment(vcpu, &desc, n);
411
return X86EMUL_CONTINUE;
412
}
413
#endif
414
415
static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
416
u64 cr0, u64 cr3, u64 cr4)
417
{
418
int bad;
419
u64 pcid;
420
421
/* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
422
pcid = 0;
423
if (cr4 & X86_CR4_PCIDE) {
424
pcid = cr3 & 0xfff;
425
cr3 &= ~0xfff;
426
}
427
428
bad = kvm_set_cr3(vcpu, cr3);
429
if (bad)
430
return X86EMUL_UNHANDLEABLE;
431
432
/*
433
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
434
* Then enable protected mode. However, PCID cannot be enabled
435
* if EFER.LMA=0, so set it separately.
436
*/
437
bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
438
if (bad)
439
return X86EMUL_UNHANDLEABLE;
440
441
bad = kvm_set_cr0(vcpu, cr0);
442
if (bad)
443
return X86EMUL_UNHANDLEABLE;
444
445
if (cr4 & X86_CR4_PCIDE) {
446
bad = kvm_set_cr4(vcpu, cr4);
447
if (bad)
448
return X86EMUL_UNHANDLEABLE;
449
if (pcid) {
450
bad = kvm_set_cr3(vcpu, cr3 | pcid);
451
if (bad)
452
return X86EMUL_UNHANDLEABLE;
453
}
454
455
}
456
457
return X86EMUL_CONTINUE;
458
}
459
460
static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
461
const struct kvm_smram_state_32 *smstate)
462
{
463
struct kvm_vcpu *vcpu = ctxt->vcpu;
464
struct desc_ptr dt;
465
int i, r;
466
467
ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED;
468
ctxt->_eip = smstate->eip;
469
470
for (i = 0; i < 8; i++)
471
*reg_write(ctxt, i) = smstate->gprs[i];
472
473
if (kvm_set_dr(vcpu, 6, smstate->dr6))
474
return X86EMUL_UNHANDLEABLE;
475
if (kvm_set_dr(vcpu, 7, smstate->dr7))
476
return X86EMUL_UNHANDLEABLE;
477
478
rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
479
rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
480
481
dt.address = smstate->gdtr.base;
482
dt.size = smstate->gdtr.limit;
483
kvm_x86_call(set_gdt)(vcpu, &dt);
484
485
dt.address = smstate->idtr.base;
486
dt.size = smstate->idtr.limit;
487
kvm_x86_call(set_idt)(vcpu, &dt);
488
489
rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
490
rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
491
rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
492
493
rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
494
rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
495
rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
496
497
vcpu->arch.smbase = smstate->smbase;
498
499
r = rsm_enter_protected_mode(vcpu, smstate->cr0,
500
smstate->cr3, smstate->cr4);
501
502
if (r != X86EMUL_CONTINUE)
503
return r;
504
505
kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
506
ctxt->interruptibility = (u8)smstate->int_shadow;
507
508
return r;
509
}
510
511
#ifdef CONFIG_X86_64
512
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
513
const struct kvm_smram_state_64 *smstate)
514
{
515
struct kvm_vcpu *vcpu = ctxt->vcpu;
516
struct desc_ptr dt;
517
int i, r;
518
519
for (i = 0; i < 16; i++)
520
*reg_write(ctxt, i) = smstate->gprs[15 - i];
521
522
ctxt->_eip = smstate->rip;
523
ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
524
525
if (kvm_set_dr(vcpu, 6, smstate->dr6))
526
return X86EMUL_UNHANDLEABLE;
527
if (kvm_set_dr(vcpu, 7, smstate->dr7))
528
return X86EMUL_UNHANDLEABLE;
529
530
vcpu->arch.smbase = smstate->smbase;
531
532
if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
533
return X86EMUL_UNHANDLEABLE;
534
535
rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
536
537
dt.size = smstate->idtr.limit;
538
dt.address = smstate->idtr.base;
539
kvm_x86_call(set_idt)(vcpu, &dt);
540
541
rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
542
543
dt.size = smstate->gdtr.limit;
544
dt.address = smstate->gdtr.base;
545
kvm_x86_call(set_gdt)(vcpu, &dt);
546
547
r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
548
if (r != X86EMUL_CONTINUE)
549
return r;
550
551
rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
552
rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
553
rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
554
rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
555
rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
556
rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
557
558
kvm_x86_call(set_interrupt_shadow)(vcpu, 0);
559
ctxt->interruptibility = (u8)smstate->int_shadow;
560
561
return X86EMUL_CONTINUE;
562
}
563
#endif
564
565
int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
566
{
567
struct kvm_vcpu *vcpu = ctxt->vcpu;
568
unsigned long cr0;
569
union kvm_smram smram;
570
u64 smbase;
571
int ret;
572
573
smbase = vcpu->arch.smbase;
574
575
ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
576
if (ret < 0)
577
return X86EMUL_UNHANDLEABLE;
578
579
if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
580
kvm_x86_call(set_nmi_mask)(vcpu, false);
581
582
kvm_smm_changed(vcpu, false);
583
584
/*
585
* Get back to real mode, to prepare a safe state in which to load
586
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
587
* supports long mode.
588
*/
589
#ifdef CONFIG_X86_64
590
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
591
struct kvm_segment cs_desc;
592
unsigned long cr4;
593
594
/* Zero CR4.PCIDE before CR0.PG. */
595
cr4 = kvm_read_cr4(vcpu);
596
if (cr4 & X86_CR4_PCIDE)
597
kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
598
599
/* A 32-bit code segment is required to clear EFER.LMA. */
600
memset(&cs_desc, 0, sizeof(cs_desc));
601
cs_desc.type = 0xb;
602
cs_desc.s = cs_desc.g = cs_desc.present = 1;
603
kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
604
}
605
#endif
606
607
/* For the 64-bit case, this will clear EFER.LMA. */
608
cr0 = kvm_read_cr0(vcpu);
609
if (cr0 & X86_CR0_PE)
610
kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
611
612
#ifdef CONFIG_X86_64
613
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
614
unsigned long cr4, efer;
615
616
/* Clear CR4.PAE before clearing EFER.LME. */
617
cr4 = kvm_read_cr4(vcpu);
618
if (cr4 & X86_CR4_PAE)
619
kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
620
621
/* And finally go back to 32-bit mode. */
622
efer = 0;
623
kvm_set_msr(vcpu, MSR_EFER, efer);
624
}
625
#endif
626
627
/*
628
* FIXME: When resuming L2 (a.k.a. guest mode), the transition to guest
629
* mode should happen _after_ loading state from SMRAM. However, KVM
630
* piggybacks the nested VM-Enter flows (which is wrong for many other
631
* reasons), and so nSVM/nVMX would clobber state that is loaded from
632
* SMRAM and from the VMCS/VMCB.
633
*/
634
if (kvm_x86_call(leave_smm)(vcpu, &smram))
635
return X86EMUL_UNHANDLEABLE;
636
637
#ifdef CONFIG_X86_64
638
if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
639
ret = rsm_load_state_64(ctxt, &smram.smram64);
640
else
641
#endif
642
ret = rsm_load_state_32(ctxt, &smram.smram32);
643
644
/*
645
* If RSM fails and triggers shutdown, architecturally the shutdown
646
* occurs *before* the transition to guest mode. But due to KVM's
647
* flawed handling of RSM to L2 (see above), the vCPU may already be
648
* in_guest_mode(). Force the vCPU out of guest mode before delivering
649
* the shutdown, so that L1 enters shutdown instead of seeing a VM-Exit
650
* that architecturally shouldn't be possible.
651
*/
652
if (ret != X86EMUL_CONTINUE && is_guest_mode(vcpu))
653
kvm_leave_nested(vcpu);
654
return ret;
655
}
656
657