Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/arm64/vmm/vmm.c
106463 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (C) 2015 Mihai Carabas <[email protected]>
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/param.h>
30
#include <sys/systm.h>
31
#include <sys/cpuset.h>
32
#include <sys/kernel.h>
33
#include <sys/linker.h>
34
#include <sys/lock.h>
35
#include <sys/malloc.h>
36
#include <sys/mutex.h>
37
#include <sys/pcpu.h>
38
#include <sys/proc.h>
39
#include <sys/queue.h>
40
#include <sys/rwlock.h>
41
#include <sys/sched.h>
42
#include <sys/smp.h>
43
44
#include <vm/vm.h>
45
#include <vm/vm_object.h>
46
#include <vm/vm_page.h>
47
#include <vm/pmap.h>
48
#include <vm/vm_map.h>
49
#include <vm/vm_extern.h>
50
#include <vm/vm_param.h>
51
52
#include <machine/cpu.h>
53
#include <machine/fpu.h>
54
#include <machine/machdep.h>
55
#include <machine/pcb.h>
56
#include <machine/smp.h>
57
#include <machine/vm.h>
58
#include <machine/vmparam.h>
59
#include <machine/vmm.h>
60
#include <machine/vmm_instruction_emul.h>
61
62
#include <dev/pci/pcireg.h>
63
64
#include <dev/vmm/vmm_dev.h>
65
#include <dev/vmm/vmm_ktr.h>
66
#include <dev/vmm/vmm_mem.h>
67
#include <dev/vmm/vmm_stat.h>
68
#include <dev/vmm/vmm_vm.h>
69
70
#include "arm64.h"
71
#include "mmu.h"
72
73
#include "io/vgic.h"
74
#include "io/vtimer.h"
75
76
static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
77
78
/* statistics */
79
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
80
81
struct vmm_regs {
82
uint64_t id_aa64afr0;
83
uint64_t id_aa64afr1;
84
uint64_t id_aa64dfr0;
85
uint64_t id_aa64dfr1;
86
uint64_t id_aa64isar0;
87
uint64_t id_aa64isar1;
88
uint64_t id_aa64isar2;
89
uint64_t id_aa64mmfr0;
90
uint64_t id_aa64mmfr1;
91
uint64_t id_aa64mmfr2;
92
uint64_t id_aa64pfr0;
93
uint64_t id_aa64pfr1;
94
};
95
96
static const struct vmm_regs vmm_arch_regs_masks = {
97
.id_aa64dfr0 =
98
ID_AA64DFR0_CTX_CMPs_MASK |
99
ID_AA64DFR0_WRPs_MASK |
100
ID_AA64DFR0_BRPs_MASK |
101
ID_AA64DFR0_PMUVer_3 |
102
ID_AA64DFR0_DebugVer_8,
103
.id_aa64isar0 =
104
ID_AA64ISAR0_TLB_TLBIOSR |
105
ID_AA64ISAR0_SHA3_IMPL |
106
ID_AA64ISAR0_RDM_IMPL |
107
ID_AA64ISAR0_Atomic_IMPL |
108
ID_AA64ISAR0_CRC32_BASE |
109
ID_AA64ISAR0_SHA2_512 |
110
ID_AA64ISAR0_SHA1_BASE |
111
ID_AA64ISAR0_AES_PMULL,
112
.id_aa64mmfr0 =
113
ID_AA64MMFR0_TGran4_IMPL |
114
ID_AA64MMFR0_TGran64_IMPL |
115
ID_AA64MMFR0_TGran16_IMPL |
116
ID_AA64MMFR0_ASIDBits_16 |
117
ID_AA64MMFR0_PARange_4P,
118
.id_aa64mmfr1 =
119
ID_AA64MMFR1_SpecSEI_IMPL |
120
ID_AA64MMFR1_PAN_ATS1E1 |
121
ID_AA64MMFR1_HAFDBS_AF,
122
.id_aa64pfr0 =
123
ID_AA64PFR0_GIC_CPUIF_NONE |
124
ID_AA64PFR0_AdvSIMD_HP |
125
ID_AA64PFR0_FP_HP |
126
ID_AA64PFR0_EL3_64 |
127
ID_AA64PFR0_EL2_64 |
128
ID_AA64PFR0_EL1_64 |
129
ID_AA64PFR0_EL0_64,
130
};
131
132
/* Host registers masked by vmm_arch_regs_masks. */
133
static struct vmm_regs vmm_arch_regs;
134
135
/* global statistics */
136
VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
137
VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
138
VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
139
VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
140
VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
141
VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
142
VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
143
VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
144
VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
145
VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
146
VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
147
VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception");
148
VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception");
149
VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
150
VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
151
152
static int
153
vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
154
{
155
#define _FETCH_KERN_REG(reg, field) do { \
156
regs->field = vmm_arch_regs_masks.field; \
157
get_kernel_reg_iss_masked(reg ## _ISS, &regs->field, \
158
masks->field); \
159
} while (0)
160
_FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0);
161
_FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1);
162
_FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0);
163
_FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1);
164
_FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0);
165
_FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1);
166
_FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2);
167
_FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0);
168
_FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1);
169
_FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2);
170
_FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0);
171
_FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1);
172
#undef _FETCH_KERN_REG
173
return (0);
174
}
175
176
static void
177
vcpu_cleanup(struct vcpu *vcpu, bool destroy)
178
{
179
vmmops_vcpu_cleanup(vcpu->cookie);
180
vcpu->cookie = NULL;
181
if (destroy) {
182
vmm_stat_free(vcpu->stats);
183
fpu_save_area_free(vcpu->guestfpu);
184
vcpu_lock_destroy(vcpu);
185
free(vcpu, M_VMM);
186
}
187
}
188
189
static struct vcpu *
190
vcpu_alloc(struct vm *vm, int vcpu_id)
191
{
192
struct vcpu *vcpu;
193
194
KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
195
("vcpu_alloc: invalid vcpu %d", vcpu_id));
196
197
vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
198
vcpu_lock_init(vcpu);
199
vcpu->state = VCPU_IDLE;
200
vcpu->hostcpu = NOCPU;
201
vcpu->vcpuid = vcpu_id;
202
vcpu->vm = vm;
203
vcpu->guestfpu = fpu_save_area_alloc();
204
vcpu->stats = vmm_stat_alloc();
205
return (vcpu);
206
}
207
208
static void
209
vcpu_init(struct vcpu *vcpu)
210
{
211
vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
212
MPASS(vcpu->cookie != NULL);
213
fpu_save_area_reset(vcpu->guestfpu);
214
vmm_stat_init(vcpu->stats);
215
}
216
217
struct vm_exit *
218
vm_exitinfo(struct vcpu *vcpu)
219
{
220
return (&vcpu->exitinfo);
221
}
222
223
static int
224
vmm_unsupported_quirk(void)
225
{
226
/*
227
* Known to not load on Ampere eMAG
228
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051
229
*/
230
if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM,
231
CPU_PART_EMAG8180, 0, 0))
232
return (ENXIO);
233
234
return (0);
235
}
236
237
int
238
vmm_modinit(void)
239
{
240
int error;
241
242
error = vmm_unsupported_quirk();
243
if (error != 0)
244
return (error);
245
246
error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
247
if (error != 0)
248
return (error);
249
250
return (vmmops_modinit(0));
251
}
252
253
int
254
vmm_modcleanup(void)
255
{
256
return (vmmops_modcleanup());
257
}
258
259
static void
260
vm_init(struct vm *vm, bool create)
261
{
262
int i;
263
264
vm->cookie = vmmops_init(vm, vmspace_pmap(vm_vmspace(vm)));
265
MPASS(vm->cookie != NULL);
266
267
CPU_ZERO(&vm->active_cpus);
268
CPU_ZERO(&vm->debug_cpus);
269
270
vm->suspend = 0;
271
CPU_ZERO(&vm->suspended_cpus);
272
273
memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
274
memset(vm->special_reg, 0, sizeof(vm->special_reg));
275
276
if (!create) {
277
for (i = 0; i < vm->maxcpus; i++) {
278
if (vm->vcpu[i] != NULL)
279
vcpu_init(vm->vcpu[i]);
280
}
281
}
282
}
283
284
struct vcpu *
285
vm_alloc_vcpu(struct vm *vm, int vcpuid)
286
{
287
struct vcpu *vcpu;
288
289
if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
290
return (NULL);
291
292
vcpu = (struct vcpu *)
293
atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
294
if (__predict_true(vcpu != NULL))
295
return (vcpu);
296
297
sx_xlock(&vm->vcpus_init_lock);
298
vcpu = vm->vcpu[vcpuid];
299
if (vcpu == NULL && !vm->dying) {
300
/* Some interrupt controllers may have a CPU limit */
301
if (vcpuid >= vgic_max_cpu_count(vm->cookie)) {
302
sx_xunlock(&vm->vcpus_init_lock);
303
return (NULL);
304
}
305
306
vcpu = vcpu_alloc(vm, vcpuid);
307
vcpu_init(vcpu);
308
309
/*
310
* Ensure vCPU is fully created before updating pointer
311
* to permit unlocked reads above.
312
*/
313
atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
314
(uintptr_t)vcpu);
315
}
316
sx_xunlock(&vm->vcpus_init_lock);
317
return (vcpu);
318
}
319
320
int
321
vm_create(const char *name, struct vm **retvm)
322
{
323
struct vm *vm;
324
int error;
325
326
vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
327
error = vm_mem_init(&vm->mem, 0, 1ul << 39);
328
if (error != 0) {
329
free(vm, M_VMM);
330
return (error);
331
}
332
strcpy(vm->name, name);
333
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
334
sx_init(&vm->vcpus_init_lock, "vm vcpus");
335
336
vm->sockets = 1;
337
vm->cores = 1; /* XXX backwards compatibility */
338
vm->threads = 1; /* XXX backwards compatibility */
339
vm->maxcpus = vm_maxcpu;
340
341
vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
342
M_WAITOK | M_ZERO);
343
344
vm_init(vm, true);
345
346
*retvm = vm;
347
return (0);
348
}
349
350
static void
351
vm_cleanup(struct vm *vm, bool destroy)
352
{
353
pmap_t pmap __diagused;
354
int i;
355
356
if (destroy) {
357
vm_xlock_memsegs(vm);
358
pmap = vmspace_pmap(vm_vmspace(vm));
359
sched_pin();
360
PCPU_SET(curvmpmap, NULL);
361
sched_unpin();
362
CPU_FOREACH(i) {
363
MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
364
}
365
} else
366
vm_assert_memseg_xlocked(vm);
367
368
369
vgic_detach_from_vm(vm->cookie);
370
371
for (i = 0; i < vm->maxcpus; i++) {
372
if (vm->vcpu[i] != NULL)
373
vcpu_cleanup(vm->vcpu[i], destroy);
374
}
375
376
vmmops_cleanup(vm->cookie);
377
378
vm_mem_cleanup(vm);
379
if (destroy) {
380
vm_mem_destroy(vm);
381
382
free(vm->vcpu, M_VMM);
383
sx_destroy(&vm->vcpus_init_lock);
384
}
385
}
386
387
void
388
vm_destroy(struct vm *vm)
389
{
390
vm_cleanup(vm, true);
391
free(vm, M_VMM);
392
}
393
394
void
395
vm_reset(struct vm *vm)
396
{
397
vm_cleanup(vm, false);
398
vm_init(vm, false);
399
}
400
401
int
402
vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
403
uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
404
{
405
return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault));
406
}
407
408
static int
409
vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg)
410
{
411
*rval = 0;
412
return (0);
413
}
414
415
static int
416
vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg)
417
{
418
*rval = *(uint64_t *)arg;
419
return (0);
420
}
421
422
static int
423
vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg)
424
{
425
return (0);
426
}
427
428
static int
429
vmm_write_oslar_el1(struct vcpu *vcpu, uint64_t wval, void *arg)
430
{
431
struct hypctx *hypctx;
432
433
hypctx = vcpu_get_cookie(vcpu);
434
/* All other fields are RES0 & we don't do anything with this */
435
/* TODO: Disable access to other debug state when locked */
436
hypctx->dbg_oslock = (wval & OSLAR_OSLK) == OSLAR_OSLK;
437
return (0);
438
}
439
440
static int
441
vmm_read_oslsr_el1(struct vcpu *vcpu, uint64_t *rval, void *arg)
442
{
443
struct hypctx *hypctx;
444
uint64_t val;
445
446
hypctx = vcpu_get_cookie(vcpu);
447
val = OSLSR_OSLM_1;
448
if (hypctx->dbg_oslock)
449
val |= OSLSR_OSLK;
450
*rval = val;
451
452
return (0);
453
}
454
455
static const struct vmm_special_reg vmm_special_regs[] = {
456
#define SPECIAL_REG(_reg, _read, _write) \
457
{ \
458
.esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
459
((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
460
((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
461
((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
462
((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
463
.esr_mask = ISS_MSR_REG_MASK, \
464
.reg_read = (_read), \
465
.reg_write = (_write), \
466
.arg = NULL, \
467
}
468
#define ID_SPECIAL_REG(_reg, _name) \
469
{ \
470
.esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
471
((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
472
((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
473
((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
474
((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
475
.esr_mask = ISS_MSR_REG_MASK, \
476
.reg_read = vmm_reg_read_arg, \
477
.reg_write = vmm_reg_wi, \
478
.arg = &(vmm_arch_regs._name), \
479
}
480
481
/* ID registers */
482
ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
483
ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
484
ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
485
ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
486
ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
487
488
/*
489
* All other ID registers are read as zero.
490
* They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
491
*/
492
{
493
.esr_iss = (3 << ISS_MSR_OP0_SHIFT) |
494
(0 << ISS_MSR_OP1_SHIFT) |
495
(0 << ISS_MSR_CRn_SHIFT) |
496
(0 << ISS_MSR_CRm_SHIFT),
497
.esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK |
498
ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT),
499
.reg_read = vmm_reg_raz,
500
.reg_write = vmm_reg_wi,
501
.arg = NULL,
502
},
503
504
/* Counter physical registers */
505
SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
506
SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
507
vtimer_phys_cval_write),
508
SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
509
vtimer_phys_tval_write),
510
SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
511
512
/* Debug registers */
513
SPECIAL_REG(DBGPRCR_EL1, vmm_reg_raz, vmm_reg_wi),
514
SPECIAL_REG(OSDLR_EL1, vmm_reg_raz, vmm_reg_wi),
515
/* TODO: Exceptions on invalid access */
516
SPECIAL_REG(OSLAR_EL1, vmm_reg_raz, vmm_write_oslar_el1),
517
SPECIAL_REG(OSLSR_EL1, vmm_read_oslsr_el1, vmm_reg_wi),
518
#undef SPECIAL_REG
519
};
520
521
void
522
vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
523
reg_read_t reg_read, reg_write_t reg_write, void *arg)
524
{
525
int i;
526
527
for (i = 0; i < nitems(vm->special_reg); i++) {
528
if (vm->special_reg[i].esr_iss == 0 &&
529
vm->special_reg[i].esr_mask == 0) {
530
vm->special_reg[i].esr_iss = iss;
531
vm->special_reg[i].esr_mask = mask;
532
vm->special_reg[i].reg_read = reg_read;
533
vm->special_reg[i].reg_write = reg_write;
534
vm->special_reg[i].arg = arg;
535
return;
536
}
537
}
538
539
panic("%s: No free special register slot", __func__);
540
}
541
542
void
543
vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask)
544
{
545
int i;
546
547
for (i = 0; i < nitems(vm->special_reg); i++) {
548
if (vm->special_reg[i].esr_iss == iss &&
549
vm->special_reg[i].esr_mask == mask) {
550
memset(&vm->special_reg[i], 0,
551
sizeof(vm->special_reg[i]));
552
return;
553
}
554
}
555
556
panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss,
557
mask);
558
}
559
560
static int
561
vm_handle_reg_emul(struct vcpu *vcpu, bool *retu)
562
{
563
struct vm *vm;
564
struct vm_exit *vme;
565
struct vre *vre;
566
int i, rv;
567
568
vm = vcpu->vm;
569
vme = &vcpu->exitinfo;
570
vre = &vme->u.reg_emul.vre;
571
572
for (i = 0; i < nitems(vm->special_reg); i++) {
573
if (vm->special_reg[i].esr_iss == 0 &&
574
vm->special_reg[i].esr_mask == 0)
575
continue;
576
577
if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) ==
578
vm->special_reg[i].esr_iss) {
579
rv = vmm_emulate_register(vcpu, vre,
580
vm->special_reg[i].reg_read,
581
vm->special_reg[i].reg_write,
582
vm->special_reg[i].arg);
583
if (rv == 0) {
584
*retu = false;
585
}
586
return (rv);
587
}
588
}
589
for (i = 0; i < nitems(vmm_special_regs); i++) {
590
if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
591
vmm_special_regs[i].esr_iss) {
592
rv = vmm_emulate_register(vcpu, vre,
593
vmm_special_regs[i].reg_read,
594
vmm_special_regs[i].reg_write,
595
vmm_special_regs[i].arg);
596
if (rv == 0) {
597
*retu = false;
598
}
599
return (rv);
600
}
601
}
602
603
604
*retu = true;
605
return (0);
606
}
607
608
void
609
vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
610
mem_region_read_t mmio_read, mem_region_write_t mmio_write)
611
{
612
int i;
613
614
for (i = 0; i < nitems(vm->mmio_region); i++) {
615
if (vm->mmio_region[i].start == 0 &&
616
vm->mmio_region[i].end == 0) {
617
vm->mmio_region[i].start = start;
618
vm->mmio_region[i].end = start + size;
619
vm->mmio_region[i].read = mmio_read;
620
vm->mmio_region[i].write = mmio_write;
621
return;
622
}
623
}
624
625
panic("%s: No free MMIO region", __func__);
626
}
627
628
void
629
vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
630
{
631
int i;
632
633
for (i = 0; i < nitems(vm->mmio_region); i++) {
634
if (vm->mmio_region[i].start == start &&
635
vm->mmio_region[i].end == start + size) {
636
memset(&vm->mmio_region[i], 0,
637
sizeof(vm->mmio_region[i]));
638
return;
639
}
640
}
641
642
panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
643
start + size);
644
}
645
646
static int
647
vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
648
{
649
struct vm *vm;
650
struct vm_exit *vme;
651
struct vie *vie;
652
struct hyp *hyp;
653
uint64_t fault_ipa;
654
struct vm_guest_paging *paging;
655
struct vmm_mmio_region *vmr;
656
int error, i;
657
658
vm = vcpu->vm;
659
hyp = vm->cookie;
660
if (!hyp->vgic_attached)
661
goto out_user;
662
663
vme = &vcpu->exitinfo;
664
vie = &vme->u.inst_emul.vie;
665
paging = &vme->u.inst_emul.paging;
666
667
fault_ipa = vme->u.inst_emul.gpa;
668
669
vmr = NULL;
670
for (i = 0; i < nitems(vm->mmio_region); i++) {
671
if (vm->mmio_region[i].start <= fault_ipa &&
672
vm->mmio_region[i].end > fault_ipa) {
673
vmr = &vm->mmio_region[i];
674
break;
675
}
676
}
677
if (vmr == NULL)
678
goto out_user;
679
680
error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
681
vmr->read, vmr->write, retu);
682
return (error);
683
684
out_user:
685
*retu = true;
686
return (0);
687
}
688
689
void
690
vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
691
{
692
struct vm *vm = vcpu->vm;
693
struct vm_exit *vmexit;
694
695
KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
696
("vm_exit_suspended: invalid suspend type %d", vm->suspend));
697
698
vmexit = vm_exitinfo(vcpu);
699
vmexit->pc = pc;
700
vmexit->inst_length = 4;
701
vmexit->exitcode = VM_EXITCODE_SUSPENDED;
702
vmexit->u.suspended.how = vm->suspend;
703
}
704
705
void
706
vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
707
{
708
struct vm_exit *vmexit;
709
710
vmexit = vm_exitinfo(vcpu);
711
vmexit->pc = pc;
712
vmexit->inst_length = 4;
713
vmexit->exitcode = VM_EXITCODE_DEBUG;
714
}
715
716
static void
717
restore_guest_fpustate(struct vcpu *vcpu)
718
{
719
720
/* flush host state to the pcb */
721
vfp_save_state(curthread, curthread->td_pcb);
722
/* Ensure the VFP state will be re-loaded when exiting the guest */
723
PCPU_SET(fpcurthread, NULL);
724
725
/* restore guest FPU state */
726
vfp_enable();
727
vfp_restore(vcpu->guestfpu);
728
729
/*
730
* The FPU is now "dirty" with the guest's state so turn on emulation
731
* to trap any access to the FPU by the host.
732
*/
733
vfp_disable();
734
}
735
736
static void
737
save_guest_fpustate(struct vcpu *vcpu)
738
{
739
if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) !=
740
CPACR_FPEN_TRAP_ALL1)
741
panic("VFP not enabled in host!");
742
743
/* save guest FPU state */
744
vfp_enable();
745
vfp_store(vcpu->guestfpu);
746
vfp_disable();
747
748
KASSERT(PCPU_GET(fpcurthread) == NULL,
749
("%s: fpcurthread set with guest registers", __func__));
750
}
751
752
static void
753
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
754
{
755
int error;
756
757
if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
758
panic("Error %d setting state to %d\n", error, newstate);
759
}
760
761
static void
762
vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
763
{
764
int error;
765
766
if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
767
panic("Error %d setting state to %d", error, newstate);
768
}
769
770
int
771
vm_get_capability(struct vcpu *vcpu, int type, int *retval)
772
{
773
if (type < 0 || type >= VM_CAP_MAX)
774
return (EINVAL);
775
776
return (vmmops_getcap(vcpu->cookie, type, retval));
777
}
778
779
int
780
vm_set_capability(struct vcpu *vcpu, int type, int val)
781
{
782
if (type < 0 || type >= VM_CAP_MAX)
783
return (EINVAL);
784
785
return (vmmops_setcap(vcpu->cookie, type, val));
786
}
787
788
void *
789
vcpu_get_cookie(struct vcpu *vcpu)
790
{
791
return (vcpu->cookie);
792
}
793
794
int
795
vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
796
{
797
if (reg < 0 || reg >= VM_REG_LAST)
798
return (EINVAL);
799
800
return (vmmops_getreg(vcpu->cookie, reg, retval));
801
}
802
803
int
804
vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
805
{
806
int error;
807
808
if (reg < 0 || reg >= VM_REG_LAST)
809
return (EINVAL);
810
error = vmmops_setreg(vcpu->cookie, reg, val);
811
if (error || reg != VM_REG_GUEST_PC)
812
return (error);
813
814
vcpu->nextpc = val;
815
816
return (0);
817
}
818
819
void *
820
vm_get_cookie(struct vm *vm)
821
{
822
return (vm->cookie);
823
}
824
825
int
826
vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far)
827
{
828
return (vmmops_exception(vcpu->cookie, esr, far));
829
}
830
831
int
832
vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr)
833
{
834
return (vgic_attach_to_vm(vm->cookie, descr));
835
}
836
837
int
838
vm_assert_irq(struct vm *vm, uint32_t irq)
839
{
840
return (vgic_inject_irq(vm->cookie, -1, irq, true));
841
}
842
843
int
844
vm_deassert_irq(struct vm *vm, uint32_t irq)
845
{
846
return (vgic_inject_irq(vm->cookie, -1, irq, false));
847
}
848
849
int
850
vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
851
int func)
852
{
853
/* TODO: Should we raise an SError? */
854
return (vgic_inject_msi(vm->cookie, msg, addr));
855
}
856
857
static int
858
vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
859
{
860
struct hypctx *hypctx;
861
int i;
862
863
hypctx = vcpu_get_cookie(vcpu);
864
865
if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0)
866
return (1);
867
868
vme->exitcode = VM_EXITCODE_SMCCC;
869
vme->u.smccc_call.func_id = hypctx->tf.tf_x[0];
870
for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
871
vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
872
873
*retu = true;
874
return (0);
875
}
876
877
static int
878
vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
879
{
880
struct vm *vm;
881
882
vm = vcpu->vm;
883
vcpu_lock(vcpu);
884
while (1) {
885
if (vm->suspend)
886
break;
887
888
if (vgic_has_pending_irq(vcpu->cookie))
889
break;
890
891
if (vcpu_should_yield(vcpu))
892
break;
893
894
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
895
/*
896
* XXX msleep_spin() cannot be interrupted by signals so
897
* wake up periodically to check pending signals.
898
*/
899
msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
900
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
901
}
902
vcpu_unlock(vcpu);
903
904
*retu = false;
905
return (0);
906
}
907
908
static int
909
vm_handle_paging(struct vcpu *vcpu, bool *retu)
910
{
911
struct vm *vm = vcpu->vm;
912
struct vm_exit *vme;
913
struct vm_map *map;
914
uint64_t addr, esr;
915
pmap_t pmap;
916
int ftype, rv;
917
918
vme = &vcpu->exitinfo;
919
920
pmap = vmspace_pmap(vm_vmspace(vcpu->vm));
921
addr = vme->u.paging.gpa;
922
esr = vme->u.paging.esr;
923
924
/* The page exists, but the page table needs to be updated. */
925
if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
926
return (0);
927
928
switch (ESR_ELx_EXCEPTION(esr)) {
929
case EXCP_INSN_ABORT_L:
930
case EXCP_DATA_ABORT_L:
931
ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE;
932
break;
933
default:
934
panic("%s: Invalid exception (esr = %lx)", __func__, esr);
935
}
936
937
map = &vm_vmspace(vm)->vm_map;
938
rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
939
if (rv != KERN_SUCCESS)
940
return (EFAULT);
941
942
return (0);
943
}
944
945
static int
946
vm_handle_suspend(struct vcpu *vcpu, bool *retu)
947
{
948
struct vm *vm = vcpu->vm;
949
int error, i;
950
struct thread *td;
951
952
error = 0;
953
td = curthread;
954
955
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus);
956
957
/*
958
* Wait until all 'active_cpus' have suspended themselves.
959
*
960
* Since a VM may be suspended at any time including when one or
961
* more vcpus are doing a rendezvous we need to call the rendezvous
962
* handler while we are waiting to prevent a deadlock.
963
*/
964
vcpu_lock(vcpu);
965
while (error == 0) {
966
if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0)
967
break;
968
969
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
970
msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
971
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
972
if (td_ast_pending(td, TDA_SUSPEND)) {
973
vcpu_unlock(vcpu);
974
error = thread_check_susp(td, false);
975
vcpu_lock(vcpu);
976
}
977
}
978
vcpu_unlock(vcpu);
979
980
/*
981
* Wakeup the other sleeping vcpus and return to userspace.
982
*/
983
for (i = 0; i < vm->maxcpus; i++) {
984
if (CPU_ISSET(i, &vm->suspended_cpus)) {
985
vcpu_notify_event(vm_vcpu(vm, i));
986
}
987
}
988
989
*retu = true;
990
return (error);
991
}
992
993
int
994
vm_run(struct vcpu *vcpu)
995
{
996
struct vm *vm = vcpu->vm;
997
struct vm_eventinfo evinfo;
998
int error, vcpuid;
999
struct vm_exit *vme;
1000
bool retu;
1001
pmap_t pmap;
1002
1003
vcpuid = vcpu->vcpuid;
1004
1005
if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1006
return (EINVAL);
1007
1008
if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1009
return (EINVAL);
1010
1011
pmap = vmspace_pmap(vm_vmspace(vm));
1012
vme = &vcpu->exitinfo;
1013
evinfo.rptr = NULL;
1014
evinfo.sptr = &vm->suspend;
1015
evinfo.iptr = NULL;
1016
restart:
1017
critical_enter();
1018
1019
restore_guest_fpustate(vcpu);
1020
1021
vcpu_require_state(vcpu, VCPU_RUNNING);
1022
error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
1023
vcpu_require_state(vcpu, VCPU_FROZEN);
1024
1025
save_guest_fpustate(vcpu);
1026
1027
critical_exit();
1028
1029
if (error == 0) {
1030
retu = false;
1031
switch (vme->exitcode) {
1032
case VM_EXITCODE_INST_EMUL:
1033
vcpu->nextpc = vme->pc + vme->inst_length;
1034
error = vm_handle_inst_emul(vcpu, &retu);
1035
break;
1036
1037
case VM_EXITCODE_REG_EMUL:
1038
vcpu->nextpc = vme->pc + vme->inst_length;
1039
error = vm_handle_reg_emul(vcpu, &retu);
1040
break;
1041
1042
case VM_EXITCODE_HVC:
1043
/*
1044
* The HVC instruction saves the address for the
1045
* next instruction as the return address.
1046
*/
1047
vcpu->nextpc = vme->pc;
1048
/*
1049
* The PSCI call can change the exit information in the
1050
* case of suspend/reset/poweroff/cpu off/cpu on.
1051
*/
1052
error = vm_handle_smccc_call(vcpu, vme, &retu);
1053
break;
1054
1055
case VM_EXITCODE_WFI:
1056
vcpu->nextpc = vme->pc + vme->inst_length;
1057
error = vm_handle_wfi(vcpu, vme, &retu);
1058
break;
1059
1060
case VM_EXITCODE_PAGING:
1061
vcpu->nextpc = vme->pc;
1062
error = vm_handle_paging(vcpu, &retu);
1063
break;
1064
1065
case VM_EXITCODE_SUSPENDED:
1066
vcpu->nextpc = vme->pc;
1067
error = vm_handle_suspend(vcpu, &retu);
1068
break;
1069
1070
default:
1071
/* Handle in userland */
1072
vcpu->nextpc = vme->pc;
1073
retu = true;
1074
break;
1075
}
1076
}
1077
1078
if (error == 0 && retu == false)
1079
goto restart;
1080
1081
return (error);
1082
}
1083
1084