Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/arm64/vmm/vmm.c
39478 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (C) 2015 Mihai Carabas <[email protected]>
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/param.h>
30
#include <sys/systm.h>
31
#include <sys/cpuset.h>
32
#include <sys/kernel.h>
33
#include <sys/linker.h>
34
#include <sys/lock.h>
35
#include <sys/malloc.h>
36
#include <sys/module.h>
37
#include <sys/mutex.h>
38
#include <sys/pcpu.h>
39
#include <sys/proc.h>
40
#include <sys/queue.h>
41
#include <sys/rwlock.h>
42
#include <sys/sched.h>
43
#include <sys/smp.h>
44
#include <sys/sysctl.h>
45
46
#include <vm/vm.h>
47
#include <vm/vm_object.h>
48
#include <vm/vm_page.h>
49
#include <vm/pmap.h>
50
#include <vm/vm_map.h>
51
#include <vm/vm_extern.h>
52
#include <vm/vm_param.h>
53
54
#include <machine/armreg.h>
55
#include <machine/cpu.h>
56
#include <machine/fpu.h>
57
#include <machine/machdep.h>
58
#include <machine/pcb.h>
59
#include <machine/smp.h>
60
#include <machine/vm.h>
61
#include <machine/vmparam.h>
62
#include <machine/vmm.h>
63
#include <machine/vmm_instruction_emul.h>
64
65
#include <dev/pci/pcireg.h>
66
#include <dev/vmm/vmm_dev.h>
67
#include <dev/vmm/vmm_ktr.h>
68
#include <dev/vmm/vmm_mem.h>
69
#include <dev/vmm/vmm_stat.h>
70
71
#include "arm64.h"
72
#include "mmu.h"
73
74
#include "io/vgic.h"
75
#include "io/vtimer.h"
76
77
struct vcpu {
78
int flags;
79
enum vcpu_state state;
80
struct mtx mtx;
81
int hostcpu; /* host cpuid this vcpu last ran on */
82
int vcpuid;
83
void *stats;
84
struct vm_exit exitinfo;
85
uint64_t nextpc; /* (x) next instruction to execute */
86
struct vm *vm; /* (o) */
87
void *cookie; /* (i) cpu-specific data */
88
struct vfpstate *guestfpu; /* (a,i) guest fpu state */
89
};
90
91
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
92
#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
93
#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
94
#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
95
#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
96
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
97
98
struct vmm_mmio_region {
99
uint64_t start;
100
uint64_t end;
101
mem_region_read_t read;
102
mem_region_write_t write;
103
};
104
#define VM_MAX_MMIO_REGIONS 4
105
106
struct vmm_special_reg {
107
uint32_t esr_iss;
108
uint32_t esr_mask;
109
reg_read_t reg_read;
110
reg_write_t reg_write;
111
void *arg;
112
};
113
#define VM_MAX_SPECIAL_REGS 16
114
115
/*
116
* Initialization:
117
* (o) initialized the first time the VM is created
118
* (i) initialized when VM is created and when it is reinitialized
119
* (x) initialized before use
120
*/
121
struct vm {
122
void *cookie; /* (i) cpu-specific data */
123
volatile cpuset_t active_cpus; /* (i) active vcpus */
124
volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
125
int suspend; /* (i) stop VM execution */
126
bool dying; /* (o) is dying */
127
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
128
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
129
struct vmspace *vmspace; /* (o) guest's address space */
130
struct vm_mem mem; /* (i) guest memory */
131
char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
132
struct vcpu **vcpu; /* (i) guest vcpus */
133
struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
134
/* (o) guest MMIO regions */
135
struct vmm_special_reg special_reg[VM_MAX_SPECIAL_REGS];
136
/* The following describe the vm cpu topology */
137
uint16_t sockets; /* (o) num of sockets */
138
uint16_t cores; /* (o) num of cores/socket */
139
uint16_t threads; /* (o) num of threads/core */
140
uint16_t maxcpus; /* (o) max pluggable cpus */
141
struct sx vcpus_init_lock; /* (o) */
142
};
143
144
static bool vmm_initialized = false;
145
146
static int vm_handle_wfi(struct vcpu *vcpu,
147
struct vm_exit *vme, bool *retu);
148
149
static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
150
151
/* statistics */
152
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
153
154
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
155
156
static int vmm_ipinum;
157
SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
158
"IPI vector used for vcpu notifications");
159
160
struct vmm_regs {
161
uint64_t id_aa64afr0;
162
uint64_t id_aa64afr1;
163
uint64_t id_aa64dfr0;
164
uint64_t id_aa64dfr1;
165
uint64_t id_aa64isar0;
166
uint64_t id_aa64isar1;
167
uint64_t id_aa64isar2;
168
uint64_t id_aa64mmfr0;
169
uint64_t id_aa64mmfr1;
170
uint64_t id_aa64mmfr2;
171
uint64_t id_aa64pfr0;
172
uint64_t id_aa64pfr1;
173
};
174
175
static const struct vmm_regs vmm_arch_regs_masks = {
176
.id_aa64dfr0 =
177
ID_AA64DFR0_CTX_CMPs_MASK |
178
ID_AA64DFR0_WRPs_MASK |
179
ID_AA64DFR0_BRPs_MASK |
180
ID_AA64DFR0_PMUVer_3 |
181
ID_AA64DFR0_DebugVer_8,
182
.id_aa64isar0 =
183
ID_AA64ISAR0_TLB_TLBIOSR |
184
ID_AA64ISAR0_SHA3_IMPL |
185
ID_AA64ISAR0_RDM_IMPL |
186
ID_AA64ISAR0_Atomic_IMPL |
187
ID_AA64ISAR0_CRC32_BASE |
188
ID_AA64ISAR0_SHA2_512 |
189
ID_AA64ISAR0_SHA1_BASE |
190
ID_AA64ISAR0_AES_PMULL,
191
.id_aa64mmfr0 =
192
ID_AA64MMFR0_TGran4_IMPL |
193
ID_AA64MMFR0_TGran64_IMPL |
194
ID_AA64MMFR0_TGran16_IMPL |
195
ID_AA64MMFR0_ASIDBits_16 |
196
ID_AA64MMFR0_PARange_4P,
197
.id_aa64mmfr1 =
198
ID_AA64MMFR1_SpecSEI_IMPL |
199
ID_AA64MMFR1_PAN_ATS1E1 |
200
ID_AA64MMFR1_HAFDBS_AF,
201
.id_aa64pfr0 =
202
ID_AA64PFR0_GIC_CPUIF_NONE |
203
ID_AA64PFR0_AdvSIMD_HP |
204
ID_AA64PFR0_FP_HP |
205
ID_AA64PFR0_EL3_64 |
206
ID_AA64PFR0_EL2_64 |
207
ID_AA64PFR0_EL1_64 |
208
ID_AA64PFR0_EL0_64,
209
};
210
211
/* Host registers masked by vmm_arch_regs_masks. */
212
static struct vmm_regs vmm_arch_regs;
213
214
u_int vm_maxcpu;
215
SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
216
&vm_maxcpu, 0, "Maximum number of vCPUs");
217
218
static void vcpu_notify_event_locked(struct vcpu *vcpu);
219
220
/* global statistics */
221
VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
222
VMM_STAT(VMEXIT_UNKNOWN, "number of vmexits for the unknown exception");
223
VMM_STAT(VMEXIT_WFI, "number of times wfi was intercepted");
224
VMM_STAT(VMEXIT_WFE, "number of times wfe was intercepted");
225
VMM_STAT(VMEXIT_HVC, "number of times hvc was intercepted");
226
VMM_STAT(VMEXIT_MSR, "number of times msr/mrs was intercepted");
227
VMM_STAT(VMEXIT_DATA_ABORT, "number of vmexits for a data abort");
228
VMM_STAT(VMEXIT_INSN_ABORT, "number of vmexits for an instruction abort");
229
VMM_STAT(VMEXIT_UNHANDLED_SYNC, "number of vmexits for an unhandled synchronous exception");
230
VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
231
VMM_STAT(VMEXIT_FIQ, "number of vmexits for an interrupt");
232
VMM_STAT(VMEXIT_BRK, "number of vmexits for a breakpoint exception");
233
VMM_STAT(VMEXIT_SS, "number of vmexits for a single-step exception");
234
VMM_STAT(VMEXIT_UNHANDLED_EL2, "number of vmexits for an unhandled EL2 exception");
235
VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
236
237
/*
238
* Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
239
* is a safe value for now.
240
*/
241
#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
242
243
static int
244
vmm_regs_init(struct vmm_regs *regs, const struct vmm_regs *masks)
245
{
246
#define _FETCH_KERN_REG(reg, field) do { \
247
regs->field = vmm_arch_regs_masks.field; \
248
if (!get_kernel_reg_iss_masked(reg ## _ISS, &regs->field, \
249
masks->field)) \
250
regs->field = 0; \
251
} while (0)
252
_FETCH_KERN_REG(ID_AA64AFR0_EL1, id_aa64afr0);
253
_FETCH_KERN_REG(ID_AA64AFR1_EL1, id_aa64afr1);
254
_FETCH_KERN_REG(ID_AA64DFR0_EL1, id_aa64dfr0);
255
_FETCH_KERN_REG(ID_AA64DFR1_EL1, id_aa64dfr1);
256
_FETCH_KERN_REG(ID_AA64ISAR0_EL1, id_aa64isar0);
257
_FETCH_KERN_REG(ID_AA64ISAR1_EL1, id_aa64isar1);
258
_FETCH_KERN_REG(ID_AA64ISAR2_EL1, id_aa64isar2);
259
_FETCH_KERN_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0);
260
_FETCH_KERN_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1);
261
_FETCH_KERN_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2);
262
_FETCH_KERN_REG(ID_AA64PFR0_EL1, id_aa64pfr0);
263
_FETCH_KERN_REG(ID_AA64PFR1_EL1, id_aa64pfr1);
264
#undef _FETCH_KERN_REG
265
return (0);
266
}
267
268
static void
269
vcpu_cleanup(struct vcpu *vcpu, bool destroy)
270
{
271
vmmops_vcpu_cleanup(vcpu->cookie);
272
vcpu->cookie = NULL;
273
if (destroy) {
274
vmm_stat_free(vcpu->stats);
275
fpu_save_area_free(vcpu->guestfpu);
276
vcpu_lock_destroy(vcpu);
277
}
278
}
279
280
static struct vcpu *
281
vcpu_alloc(struct vm *vm, int vcpu_id)
282
{
283
struct vcpu *vcpu;
284
285
KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
286
("vcpu_alloc: invalid vcpu %d", vcpu_id));
287
288
vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
289
vcpu_lock_init(vcpu);
290
vcpu->state = VCPU_IDLE;
291
vcpu->hostcpu = NOCPU;
292
vcpu->vcpuid = vcpu_id;
293
vcpu->vm = vm;
294
vcpu->guestfpu = fpu_save_area_alloc();
295
vcpu->stats = vmm_stat_alloc();
296
return (vcpu);
297
}
298
299
static void
300
vcpu_init(struct vcpu *vcpu)
301
{
302
vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
303
MPASS(vcpu->cookie != NULL);
304
fpu_save_area_reset(vcpu->guestfpu);
305
vmm_stat_init(vcpu->stats);
306
}
307
308
struct vm_exit *
309
vm_exitinfo(struct vcpu *vcpu)
310
{
311
return (&vcpu->exitinfo);
312
}
313
314
static int
315
vmm_unsupported_quirk(void)
316
{
317
/*
318
* Known to not load on Ampere eMAG
319
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=285051
320
*/
321
if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_APM,
322
CPU_PART_EMAG8180, 0, 0))
323
return (ENXIO);
324
325
return (0);
326
}
327
328
static int
329
vmm_init(void)
330
{
331
int error;
332
333
vm_maxcpu = mp_ncpus;
334
TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
335
336
if (vm_maxcpu > VM_MAXCPU) {
337
printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
338
vm_maxcpu = VM_MAXCPU;
339
}
340
if (vm_maxcpu == 0)
341
vm_maxcpu = 1;
342
343
error = vmm_regs_init(&vmm_arch_regs, &vmm_arch_regs_masks);
344
if (error != 0)
345
return (error);
346
347
return (vmmops_modinit(0));
348
}
349
350
static int
351
vmm_handler(module_t mod, int what, void *arg)
352
{
353
int error;
354
355
switch (what) {
356
case MOD_LOAD:
357
error = vmm_unsupported_quirk();
358
if (error != 0)
359
break;
360
error = vmmdev_init();
361
if (error != 0)
362
break;
363
error = vmm_init();
364
if (error == 0)
365
vmm_initialized = true;
366
else
367
(void)vmmdev_cleanup();
368
break;
369
case MOD_UNLOAD:
370
error = vmmdev_cleanup();
371
if (error == 0 && vmm_initialized) {
372
error = vmmops_modcleanup();
373
if (error) {
374
/*
375
* Something bad happened - prevent new
376
* VMs from being created
377
*/
378
vmm_initialized = false;
379
}
380
}
381
break;
382
default:
383
error = 0;
384
break;
385
}
386
return (error);
387
}
388
389
static moduledata_t vmm_kmod = {
390
"vmm",
391
vmm_handler,
392
NULL
393
};
394
395
/*
396
* vmm initialization has the following dependencies:
397
*
398
* - HYP initialization requires smp_rendezvous() and therefore must happen
399
* after SMP is fully functional (after SI_SUB_SMP).
400
* - vmm device initialization requires an initialized devfs.
401
*/
402
DECLARE_MODULE(vmm, vmm_kmod, MAX(SI_SUB_SMP, SI_SUB_DEVFS) + 1, SI_ORDER_ANY);
403
MODULE_VERSION(vmm, 1);
404
405
static void
406
vm_init(struct vm *vm, bool create)
407
{
408
int i;
409
410
vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
411
MPASS(vm->cookie != NULL);
412
413
CPU_ZERO(&vm->active_cpus);
414
CPU_ZERO(&vm->debug_cpus);
415
416
vm->suspend = 0;
417
CPU_ZERO(&vm->suspended_cpus);
418
419
memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
420
memset(vm->special_reg, 0, sizeof(vm->special_reg));
421
422
if (!create) {
423
for (i = 0; i < vm->maxcpus; i++) {
424
if (vm->vcpu[i] != NULL)
425
vcpu_init(vm->vcpu[i]);
426
}
427
}
428
}
429
430
void
431
vm_disable_vcpu_creation(struct vm *vm)
432
{
433
sx_xlock(&vm->vcpus_init_lock);
434
vm->dying = true;
435
sx_xunlock(&vm->vcpus_init_lock);
436
}
437
438
struct vcpu *
439
vm_alloc_vcpu(struct vm *vm, int vcpuid)
440
{
441
struct vcpu *vcpu;
442
443
if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
444
return (NULL);
445
446
/* Some interrupt controllers may have a CPU limit */
447
if (vcpuid >= vgic_max_cpu_count(vm->cookie))
448
return (NULL);
449
450
vcpu = (struct vcpu *)
451
atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
452
if (__predict_true(vcpu != NULL))
453
return (vcpu);
454
455
sx_xlock(&vm->vcpus_init_lock);
456
vcpu = vm->vcpu[vcpuid];
457
if (vcpu == NULL && !vm->dying) {
458
vcpu = vcpu_alloc(vm, vcpuid);
459
vcpu_init(vcpu);
460
461
/*
462
* Ensure vCPU is fully created before updating pointer
463
* to permit unlocked reads above.
464
*/
465
atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
466
(uintptr_t)vcpu);
467
}
468
sx_xunlock(&vm->vcpus_init_lock);
469
return (vcpu);
470
}
471
472
void
473
vm_slock_vcpus(struct vm *vm)
474
{
475
sx_slock(&vm->vcpus_init_lock);
476
}
477
478
void
479
vm_unlock_vcpus(struct vm *vm)
480
{
481
sx_unlock(&vm->vcpus_init_lock);
482
}
483
484
int
485
vm_create(const char *name, struct vm **retvm)
486
{
487
struct vm *vm;
488
struct vmspace *vmspace;
489
490
/*
491
* If vmm.ko could not be successfully initialized then don't attempt
492
* to create the virtual machine.
493
*/
494
if (!vmm_initialized)
495
return (ENXIO);
496
497
if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
498
return (EINVAL);
499
500
vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
501
if (vmspace == NULL)
502
return (ENOMEM);
503
504
vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
505
strcpy(vm->name, name);
506
vm->vmspace = vmspace;
507
vm_mem_init(&vm->mem);
508
sx_init(&vm->vcpus_init_lock, "vm vcpus");
509
510
vm->sockets = 1;
511
vm->cores = 1; /* XXX backwards compatibility */
512
vm->threads = 1; /* XXX backwards compatibility */
513
vm->maxcpus = vm_maxcpu;
514
515
vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
516
M_WAITOK | M_ZERO);
517
518
vm_init(vm, true);
519
520
*retvm = vm;
521
return (0);
522
}
523
524
void
525
vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
526
uint16_t *threads, uint16_t *maxcpus)
527
{
528
*sockets = vm->sockets;
529
*cores = vm->cores;
530
*threads = vm->threads;
531
*maxcpus = vm->maxcpus;
532
}
533
534
uint16_t
535
vm_get_maxcpus(struct vm *vm)
536
{
537
return (vm->maxcpus);
538
}
539
540
int
541
vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
542
uint16_t threads, uint16_t maxcpus)
543
{
544
/* Ignore maxcpus. */
545
if ((sockets * cores * threads) > vm->maxcpus)
546
return (EINVAL);
547
vm->sockets = sockets;
548
vm->cores = cores;
549
vm->threads = threads;
550
return(0);
551
}
552
553
static void
554
vm_cleanup(struct vm *vm, bool destroy)
555
{
556
pmap_t pmap __diagused;
557
int i;
558
559
if (destroy) {
560
vm_xlock_memsegs(vm);
561
pmap = vmspace_pmap(vm->vmspace);
562
sched_pin();
563
PCPU_SET(curvmpmap, NULL);
564
sched_unpin();
565
CPU_FOREACH(i) {
566
MPASS(cpuid_to_pcpu[i]->pc_curvmpmap != pmap);
567
}
568
} else
569
vm_assert_memseg_xlocked(vm);
570
571
572
vgic_detach_from_vm(vm->cookie);
573
574
for (i = 0; i < vm->maxcpus; i++) {
575
if (vm->vcpu[i] != NULL)
576
vcpu_cleanup(vm->vcpu[i], destroy);
577
}
578
579
vmmops_cleanup(vm->cookie);
580
581
vm_mem_cleanup(vm);
582
if (destroy) {
583
vm_mem_destroy(vm);
584
585
vmmops_vmspace_free(vm->vmspace);
586
vm->vmspace = NULL;
587
588
for (i = 0; i < vm->maxcpus; i++)
589
free(vm->vcpu[i], M_VMM);
590
free(vm->vcpu, M_VMM);
591
sx_destroy(&vm->vcpus_init_lock);
592
}
593
}
594
595
void
596
vm_destroy(struct vm *vm)
597
{
598
vm_cleanup(vm, true);
599
free(vm, M_VMM);
600
}
601
602
int
603
vm_reinit(struct vm *vm)
604
{
605
int error;
606
607
/*
608
* A virtual machine can be reset only if all vcpus are suspended.
609
*/
610
if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
611
vm_cleanup(vm, false);
612
vm_init(vm, false);
613
error = 0;
614
} else {
615
error = EBUSY;
616
}
617
618
return (error);
619
}
620
621
const char *
622
vm_name(struct vm *vm)
623
{
624
return (vm->name);
625
}
626
627
int
628
vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
629
uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
630
{
631
return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault));
632
}
633
634
static int
635
vmm_reg_raz(struct vcpu *vcpu, uint64_t *rval, void *arg)
636
{
637
*rval = 0;
638
return (0);
639
}
640
641
static int
642
vmm_reg_read_arg(struct vcpu *vcpu, uint64_t *rval, void *arg)
643
{
644
*rval = *(uint64_t *)arg;
645
return (0);
646
}
647
648
static int
649
vmm_reg_wi(struct vcpu *vcpu, uint64_t wval, void *arg)
650
{
651
return (0);
652
}
653
654
static const struct vmm_special_reg vmm_special_regs[] = {
655
#define SPECIAL_REG(_reg, _read, _write) \
656
{ \
657
.esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
658
((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
659
((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
660
((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
661
((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
662
.esr_mask = ISS_MSR_REG_MASK, \
663
.reg_read = (_read), \
664
.reg_write = (_write), \
665
.arg = NULL, \
666
}
667
#define ID_SPECIAL_REG(_reg, _name) \
668
{ \
669
.esr_iss = ((_reg ## _op0) << ISS_MSR_OP0_SHIFT) | \
670
((_reg ## _op1) << ISS_MSR_OP1_SHIFT) | \
671
((_reg ## _CRn) << ISS_MSR_CRn_SHIFT) | \
672
((_reg ## _CRm) << ISS_MSR_CRm_SHIFT) | \
673
((_reg ## _op2) << ISS_MSR_OP2_SHIFT), \
674
.esr_mask = ISS_MSR_REG_MASK, \
675
.reg_read = vmm_reg_read_arg, \
676
.reg_write = vmm_reg_wi, \
677
.arg = &(vmm_arch_regs._name), \
678
}
679
680
/* ID registers */
681
ID_SPECIAL_REG(ID_AA64PFR0_EL1, id_aa64pfr0),
682
ID_SPECIAL_REG(ID_AA64DFR0_EL1, id_aa64dfr0),
683
ID_SPECIAL_REG(ID_AA64ISAR0_EL1, id_aa64isar0),
684
ID_SPECIAL_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0),
685
ID_SPECIAL_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1),
686
687
/*
688
* All other ID registers are read as zero.
689
* They are all in the op0=3, op1=0, CRn=0, CRm={0..7} space.
690
*/
691
{
692
.esr_iss = (3 << ISS_MSR_OP0_SHIFT) |
693
(0 << ISS_MSR_OP1_SHIFT) |
694
(0 << ISS_MSR_CRn_SHIFT) |
695
(0 << ISS_MSR_CRm_SHIFT),
696
.esr_mask = ISS_MSR_OP0_MASK | ISS_MSR_OP1_MASK |
697
ISS_MSR_CRn_MASK | (0x8 << ISS_MSR_CRm_SHIFT),
698
.reg_read = vmm_reg_raz,
699
.reg_write = vmm_reg_wi,
700
.arg = NULL,
701
},
702
703
/* Counter physical registers */
704
SPECIAL_REG(CNTP_CTL_EL0, vtimer_phys_ctl_read, vtimer_phys_ctl_write),
705
SPECIAL_REG(CNTP_CVAL_EL0, vtimer_phys_cval_read,
706
vtimer_phys_cval_write),
707
SPECIAL_REG(CNTP_TVAL_EL0, vtimer_phys_tval_read,
708
vtimer_phys_tval_write),
709
SPECIAL_REG(CNTPCT_EL0, vtimer_phys_cnt_read, vtimer_phys_cnt_write),
710
#undef SPECIAL_REG
711
};
712
713
void
714
vm_register_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask,
715
reg_read_t reg_read, reg_write_t reg_write, void *arg)
716
{
717
int i;
718
719
for (i = 0; i < nitems(vm->special_reg); i++) {
720
if (vm->special_reg[i].esr_iss == 0 &&
721
vm->special_reg[i].esr_mask == 0) {
722
vm->special_reg[i].esr_iss = iss;
723
vm->special_reg[i].esr_mask = mask;
724
vm->special_reg[i].reg_read = reg_read;
725
vm->special_reg[i].reg_write = reg_write;
726
vm->special_reg[i].arg = arg;
727
return;
728
}
729
}
730
731
panic("%s: No free special register slot", __func__);
732
}
733
734
void
735
vm_deregister_reg_handler(struct vm *vm, uint64_t iss, uint64_t mask)
736
{
737
int i;
738
739
for (i = 0; i < nitems(vm->special_reg); i++) {
740
if (vm->special_reg[i].esr_iss == iss &&
741
vm->special_reg[i].esr_mask == mask) {
742
memset(&vm->special_reg[i], 0,
743
sizeof(vm->special_reg[i]));
744
return;
745
}
746
}
747
748
panic("%s: Invalid special register: iss %lx mask %lx", __func__, iss,
749
mask);
750
}
751
752
static int
753
vm_handle_reg_emul(struct vcpu *vcpu, bool *retu)
754
{
755
struct vm *vm;
756
struct vm_exit *vme;
757
struct vre *vre;
758
int i, rv;
759
760
vm = vcpu->vm;
761
vme = &vcpu->exitinfo;
762
vre = &vme->u.reg_emul.vre;
763
764
for (i = 0; i < nitems(vm->special_reg); i++) {
765
if (vm->special_reg[i].esr_iss == 0 &&
766
vm->special_reg[i].esr_mask == 0)
767
continue;
768
769
if ((vre->inst_syndrome & vm->special_reg[i].esr_mask) ==
770
vm->special_reg[i].esr_iss) {
771
rv = vmm_emulate_register(vcpu, vre,
772
vm->special_reg[i].reg_read,
773
vm->special_reg[i].reg_write,
774
vm->special_reg[i].arg);
775
if (rv == 0) {
776
*retu = false;
777
}
778
return (rv);
779
}
780
}
781
for (i = 0; i < nitems(vmm_special_regs); i++) {
782
if ((vre->inst_syndrome & vmm_special_regs[i].esr_mask) ==
783
vmm_special_regs[i].esr_iss) {
784
rv = vmm_emulate_register(vcpu, vre,
785
vmm_special_regs[i].reg_read,
786
vmm_special_regs[i].reg_write,
787
vmm_special_regs[i].arg);
788
if (rv == 0) {
789
*retu = false;
790
}
791
return (rv);
792
}
793
}
794
795
796
*retu = true;
797
return (0);
798
}
799
800
void
801
vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
802
mem_region_read_t mmio_read, mem_region_write_t mmio_write)
803
{
804
int i;
805
806
for (i = 0; i < nitems(vm->mmio_region); i++) {
807
if (vm->mmio_region[i].start == 0 &&
808
vm->mmio_region[i].end == 0) {
809
vm->mmio_region[i].start = start;
810
vm->mmio_region[i].end = start + size;
811
vm->mmio_region[i].read = mmio_read;
812
vm->mmio_region[i].write = mmio_write;
813
return;
814
}
815
}
816
817
panic("%s: No free MMIO region", __func__);
818
}
819
820
void
821
vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
822
{
823
int i;
824
825
for (i = 0; i < nitems(vm->mmio_region); i++) {
826
if (vm->mmio_region[i].start == start &&
827
vm->mmio_region[i].end == start + size) {
828
memset(&vm->mmio_region[i], 0,
829
sizeof(vm->mmio_region[i]));
830
return;
831
}
832
}
833
834
panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
835
start + size);
836
}
837
838
static int
839
vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
840
{
841
struct vm *vm;
842
struct vm_exit *vme;
843
struct vie *vie;
844
struct hyp *hyp;
845
uint64_t fault_ipa;
846
struct vm_guest_paging *paging;
847
struct vmm_mmio_region *vmr;
848
int error, i;
849
850
vm = vcpu->vm;
851
hyp = vm->cookie;
852
if (!hyp->vgic_attached)
853
goto out_user;
854
855
vme = &vcpu->exitinfo;
856
vie = &vme->u.inst_emul.vie;
857
paging = &vme->u.inst_emul.paging;
858
859
fault_ipa = vme->u.inst_emul.gpa;
860
861
vmr = NULL;
862
for (i = 0; i < nitems(vm->mmio_region); i++) {
863
if (vm->mmio_region[i].start <= fault_ipa &&
864
vm->mmio_region[i].end > fault_ipa) {
865
vmr = &vm->mmio_region[i];
866
break;
867
}
868
}
869
if (vmr == NULL)
870
goto out_user;
871
872
error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
873
vmr->read, vmr->write, retu);
874
return (error);
875
876
out_user:
877
*retu = true;
878
return (0);
879
}
880
881
int
882
vm_suspend(struct vm *vm, enum vm_suspend_how how)
883
{
884
int i;
885
886
if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
887
return (EINVAL);
888
889
if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
890
VM_CTR2(vm, "virtual machine already suspended %d/%d",
891
vm->suspend, how);
892
return (EALREADY);
893
}
894
895
VM_CTR1(vm, "virtual machine successfully suspended %d", how);
896
897
/*
898
* Notify all active vcpus that they are now suspended.
899
*/
900
for (i = 0; i < vm->maxcpus; i++) {
901
if (CPU_ISSET(i, &vm->active_cpus))
902
vcpu_notify_event(vm_vcpu(vm, i));
903
}
904
905
return (0);
906
}
907
908
void
909
vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
910
{
911
struct vm *vm = vcpu->vm;
912
struct vm_exit *vmexit;
913
914
KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
915
("vm_exit_suspended: invalid suspend type %d", vm->suspend));
916
917
vmexit = vm_exitinfo(vcpu);
918
vmexit->pc = pc;
919
vmexit->inst_length = 4;
920
vmexit->exitcode = VM_EXITCODE_SUSPENDED;
921
vmexit->u.suspended.how = vm->suspend;
922
}
923
924
void
925
vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
926
{
927
struct vm_exit *vmexit;
928
929
vmexit = vm_exitinfo(vcpu);
930
vmexit->pc = pc;
931
vmexit->inst_length = 4;
932
vmexit->exitcode = VM_EXITCODE_DEBUG;
933
}
934
935
int
936
vm_activate_cpu(struct vcpu *vcpu)
937
{
938
struct vm *vm = vcpu->vm;
939
940
if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
941
return (EBUSY);
942
943
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
944
return (0);
945
946
}
947
948
int
949
vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
950
{
951
if (vcpu == NULL) {
952
vm->debug_cpus = vm->active_cpus;
953
for (int i = 0; i < vm->maxcpus; i++) {
954
if (CPU_ISSET(i, &vm->active_cpus))
955
vcpu_notify_event(vm_vcpu(vm, i));
956
}
957
} else {
958
if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
959
return (EINVAL);
960
961
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
962
vcpu_notify_event(vcpu);
963
}
964
return (0);
965
}
966
967
int
968
vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
969
{
970
971
if (vcpu == NULL) {
972
CPU_ZERO(&vm->debug_cpus);
973
} else {
974
if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
975
return (EINVAL);
976
977
CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
978
}
979
return (0);
980
}
981
982
int
983
vcpu_debugged(struct vcpu *vcpu)
984
{
985
986
return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
987
}
988
989
cpuset_t
990
vm_active_cpus(struct vm *vm)
991
{
992
993
return (vm->active_cpus);
994
}
995
996
cpuset_t
997
vm_debug_cpus(struct vm *vm)
998
{
999
1000
return (vm->debug_cpus);
1001
}
1002
1003
cpuset_t
1004
vm_suspended_cpus(struct vm *vm)
1005
{
1006
1007
return (vm->suspended_cpus);
1008
}
1009
1010
1011
void *
1012
vcpu_stats(struct vcpu *vcpu)
1013
{
1014
1015
return (vcpu->stats);
1016
}
1017
1018
/*
1019
* This function is called to ensure that a vcpu "sees" a pending event
1020
* as soon as possible:
1021
* - If the vcpu thread is sleeping then it is woken up.
1022
* - If the vcpu is running on a different host_cpu then an IPI will be directed
1023
* to the host_cpu to cause the vcpu to trap into the hypervisor.
1024
*/
1025
static void
1026
vcpu_notify_event_locked(struct vcpu *vcpu)
1027
{
1028
int hostcpu;
1029
1030
hostcpu = vcpu->hostcpu;
1031
if (vcpu->state == VCPU_RUNNING) {
1032
KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
1033
if (hostcpu != curcpu) {
1034
ipi_cpu(hostcpu, vmm_ipinum);
1035
} else {
1036
/*
1037
* If the 'vcpu' is running on 'curcpu' then it must
1038
* be sending a notification to itself (e.g. SELF_IPI).
1039
* The pending event will be picked up when the vcpu
1040
* transitions back to guest context.
1041
*/
1042
}
1043
} else {
1044
KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
1045
"with hostcpu %d", vcpu->state, hostcpu));
1046
if (vcpu->state == VCPU_SLEEPING)
1047
wakeup_one(vcpu);
1048
}
1049
}
1050
1051
void
1052
vcpu_notify_event(struct vcpu *vcpu)
1053
{
1054
vcpu_lock(vcpu);
1055
vcpu_notify_event_locked(vcpu);
1056
vcpu_unlock(vcpu);
1057
}
1058
1059
struct vmspace *
1060
vm_vmspace(struct vm *vm)
1061
{
1062
return (vm->vmspace);
1063
}
1064
1065
struct vm_mem *
1066
vm_mem(struct vm *vm)
1067
{
1068
return (&vm->mem);
1069
}
1070
1071
static void
1072
restore_guest_fpustate(struct vcpu *vcpu)
1073
{
1074
1075
/* flush host state to the pcb */
1076
vfp_save_state(curthread, curthread->td_pcb);
1077
/* Ensure the VFP state will be re-loaded when exiting the guest */
1078
PCPU_SET(fpcurthread, NULL);
1079
1080
/* restore guest FPU state */
1081
vfp_enable();
1082
vfp_restore(vcpu->guestfpu);
1083
1084
/*
1085
* The FPU is now "dirty" with the guest's state so turn on emulation
1086
* to trap any access to the FPU by the host.
1087
*/
1088
vfp_disable();
1089
}
1090
1091
static void
1092
save_guest_fpustate(struct vcpu *vcpu)
1093
{
1094
if ((READ_SPECIALREG(cpacr_el1) & CPACR_FPEN_MASK) !=
1095
CPACR_FPEN_TRAP_ALL1)
1096
panic("VFP not enabled in host!");
1097
1098
/* save guest FPU state */
1099
vfp_enable();
1100
vfp_store(vcpu->guestfpu);
1101
vfp_disable();
1102
1103
KASSERT(PCPU_GET(fpcurthread) == NULL,
1104
("%s: fpcurthread set with guest registers", __func__));
1105
}
1106
static int
1107
vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
1108
bool from_idle)
1109
{
1110
int error;
1111
1112
vcpu_assert_locked(vcpu);
1113
1114
/*
1115
* State transitions from the vmmdev_ioctl() must always begin from
1116
* the VCPU_IDLE state. This guarantees that there is only a single
1117
* ioctl() operating on a vcpu at any point.
1118
*/
1119
if (from_idle) {
1120
while (vcpu->state != VCPU_IDLE) {
1121
vcpu_notify_event_locked(vcpu);
1122
msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
1123
}
1124
} else {
1125
KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
1126
"vcpu idle state"));
1127
}
1128
1129
if (vcpu->state == VCPU_RUNNING) {
1130
KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
1131
"mismatch for running vcpu", curcpu, vcpu->hostcpu));
1132
} else {
1133
KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
1134
"vcpu that is not running", vcpu->hostcpu));
1135
}
1136
1137
/*
1138
* The following state transitions are allowed:
1139
* IDLE -> FROZEN -> IDLE
1140
* FROZEN -> RUNNING -> FROZEN
1141
* FROZEN -> SLEEPING -> FROZEN
1142
*/
1143
switch (vcpu->state) {
1144
case VCPU_IDLE:
1145
case VCPU_RUNNING:
1146
case VCPU_SLEEPING:
1147
error = (newstate != VCPU_FROZEN);
1148
break;
1149
case VCPU_FROZEN:
1150
error = (newstate == VCPU_FROZEN);
1151
break;
1152
default:
1153
error = 1;
1154
break;
1155
}
1156
1157
if (error)
1158
return (EBUSY);
1159
1160
vcpu->state = newstate;
1161
if (newstate == VCPU_RUNNING)
1162
vcpu->hostcpu = curcpu;
1163
else
1164
vcpu->hostcpu = NOCPU;
1165
1166
if (newstate == VCPU_IDLE)
1167
wakeup(&vcpu->state);
1168
1169
return (0);
1170
}
1171
1172
static void
1173
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
1174
{
1175
int error;
1176
1177
if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
1178
panic("Error %d setting state to %d\n", error, newstate);
1179
}
1180
1181
static void
1182
vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
1183
{
1184
int error;
1185
1186
if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
1187
panic("Error %d setting state to %d", error, newstate);
1188
}
1189
1190
int
1191
vm_get_capability(struct vcpu *vcpu, int type, int *retval)
1192
{
1193
if (type < 0 || type >= VM_CAP_MAX)
1194
return (EINVAL);
1195
1196
return (vmmops_getcap(vcpu->cookie, type, retval));
1197
}
1198
1199
int
1200
vm_set_capability(struct vcpu *vcpu, int type, int val)
1201
{
1202
if (type < 0 || type >= VM_CAP_MAX)
1203
return (EINVAL);
1204
1205
return (vmmops_setcap(vcpu->cookie, type, val));
1206
}
1207
1208
struct vm *
1209
vcpu_vm(struct vcpu *vcpu)
1210
{
1211
return (vcpu->vm);
1212
}
1213
1214
int
1215
vcpu_vcpuid(struct vcpu *vcpu)
1216
{
1217
return (vcpu->vcpuid);
1218
}
1219
1220
void *
1221
vcpu_get_cookie(struct vcpu *vcpu)
1222
{
1223
return (vcpu->cookie);
1224
}
1225
1226
struct vcpu *
1227
vm_vcpu(struct vm *vm, int vcpuid)
1228
{
1229
return (vm->vcpu[vcpuid]);
1230
}
1231
1232
int
1233
vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
1234
{
1235
int error;
1236
1237
vcpu_lock(vcpu);
1238
error = vcpu_set_state_locked(vcpu, newstate, from_idle);
1239
vcpu_unlock(vcpu);
1240
1241
return (error);
1242
}
1243
1244
enum vcpu_state
1245
vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
1246
{
1247
enum vcpu_state state;
1248
1249
vcpu_lock(vcpu);
1250
state = vcpu->state;
1251
if (hostcpu != NULL)
1252
*hostcpu = vcpu->hostcpu;
1253
vcpu_unlock(vcpu);
1254
1255
return (state);
1256
}
1257
1258
int
1259
vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
1260
{
1261
1262
if (reg >= VM_REG_LAST)
1263
return (EINVAL);
1264
1265
return (vmmops_getreg(vcpu->cookie, reg, retval));
1266
}
1267
1268
int
1269
vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
1270
{
1271
int error;
1272
1273
if (reg >= VM_REG_LAST)
1274
return (EINVAL);
1275
error = vmmops_setreg(vcpu->cookie, reg, val);
1276
if (error || reg != VM_REG_GUEST_PC)
1277
return (error);
1278
1279
vcpu->nextpc = val;
1280
1281
return (0);
1282
}
1283
1284
void *
1285
vm_get_cookie(struct vm *vm)
1286
{
1287
return (vm->cookie);
1288
}
1289
1290
int
1291
vm_inject_exception(struct vcpu *vcpu, uint64_t esr, uint64_t far)
1292
{
1293
return (vmmops_exception(vcpu->cookie, esr, far));
1294
}
1295
1296
int
1297
vm_attach_vgic(struct vm *vm, struct vm_vgic_descr *descr)
1298
{
1299
return (vgic_attach_to_vm(vm->cookie, descr));
1300
}
1301
1302
int
1303
vm_assert_irq(struct vm *vm, uint32_t irq)
1304
{
1305
return (vgic_inject_irq(vm->cookie, -1, irq, true));
1306
}
1307
1308
int
1309
vm_deassert_irq(struct vm *vm, uint32_t irq)
1310
{
1311
return (vgic_inject_irq(vm->cookie, -1, irq, false));
1312
}
1313
1314
int
1315
vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
1316
int func)
1317
{
1318
/* TODO: Should we raise an SError? */
1319
return (vgic_inject_msi(vm->cookie, msg, addr));
1320
}
1321
1322
static int
1323
vm_handle_smccc_call(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1324
{
1325
struct hypctx *hypctx;
1326
int i;
1327
1328
hypctx = vcpu_get_cookie(vcpu);
1329
1330
if ((hypctx->tf.tf_esr & ESR_ELx_ISS_MASK) != 0)
1331
return (1);
1332
1333
vme->exitcode = VM_EXITCODE_SMCCC;
1334
vme->u.smccc_call.func_id = hypctx->tf.tf_x[0];
1335
for (i = 0; i < nitems(vme->u.smccc_call.args); i++)
1336
vme->u.smccc_call.args[i] = hypctx->tf.tf_x[i + 1];
1337
1338
*retu = true;
1339
return (0);
1340
}
1341
1342
static int
1343
vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1344
{
1345
struct vm *vm;
1346
1347
vm = vcpu->vm;
1348
vcpu_lock(vcpu);
1349
while (1) {
1350
if (vm->suspend)
1351
break;
1352
1353
if (vgic_has_pending_irq(vcpu->cookie))
1354
break;
1355
1356
if (vcpu_should_yield(vcpu))
1357
break;
1358
1359
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1360
/*
1361
* XXX msleep_spin() cannot be interrupted by signals so
1362
* wake up periodically to check pending signals.
1363
*/
1364
msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
1365
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1366
}
1367
vcpu_unlock(vcpu);
1368
1369
*retu = false;
1370
return (0);
1371
}
1372
1373
static int
1374
vm_handle_paging(struct vcpu *vcpu, bool *retu)
1375
{
1376
struct vm *vm = vcpu->vm;
1377
struct vm_exit *vme;
1378
struct vm_map *map;
1379
uint64_t addr, esr;
1380
pmap_t pmap;
1381
int ftype, rv;
1382
1383
vme = &vcpu->exitinfo;
1384
1385
pmap = vmspace_pmap(vcpu->vm->vmspace);
1386
addr = vme->u.paging.gpa;
1387
esr = vme->u.paging.esr;
1388
1389
/* The page exists, but the page table needs to be updated. */
1390
if (pmap_fault(pmap, esr, addr) == KERN_SUCCESS)
1391
return (0);
1392
1393
switch (ESR_ELx_EXCEPTION(esr)) {
1394
case EXCP_INSN_ABORT_L:
1395
case EXCP_DATA_ABORT_L:
1396
ftype = VM_PROT_EXECUTE | VM_PROT_READ | VM_PROT_WRITE;
1397
break;
1398
default:
1399
panic("%s: Invalid exception (esr = %lx)", __func__, esr);
1400
}
1401
1402
map = &vm->vmspace->vm_map;
1403
rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
1404
if (rv != KERN_SUCCESS)
1405
return (EFAULT);
1406
1407
return (0);
1408
}
1409
1410
static int
1411
vm_handle_suspend(struct vcpu *vcpu, bool *retu)
1412
{
1413
struct vm *vm = vcpu->vm;
1414
int error, i;
1415
struct thread *td;
1416
1417
error = 0;
1418
td = curthread;
1419
1420
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus);
1421
1422
/*
1423
* Wait until all 'active_cpus' have suspended themselves.
1424
*
1425
* Since a VM may be suspended at any time including when one or
1426
* more vcpus are doing a rendezvous we need to call the rendezvous
1427
* handler while we are waiting to prevent a deadlock.
1428
*/
1429
vcpu_lock(vcpu);
1430
while (error == 0) {
1431
if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0)
1432
break;
1433
1434
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1435
msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1436
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1437
if (td_ast_pending(td, TDA_SUSPEND)) {
1438
vcpu_unlock(vcpu);
1439
error = thread_check_susp(td, false);
1440
vcpu_lock(vcpu);
1441
}
1442
}
1443
vcpu_unlock(vcpu);
1444
1445
/*
1446
* Wakeup the other sleeping vcpus and return to userspace.
1447
*/
1448
for (i = 0; i < vm->maxcpus; i++) {
1449
if (CPU_ISSET(i, &vm->suspended_cpus)) {
1450
vcpu_notify_event(vm_vcpu(vm, i));
1451
}
1452
}
1453
1454
*retu = true;
1455
return (error);
1456
}
1457
1458
int
1459
vm_run(struct vcpu *vcpu)
1460
{
1461
struct vm *vm = vcpu->vm;
1462
struct vm_eventinfo evinfo;
1463
int error, vcpuid;
1464
struct vm_exit *vme;
1465
bool retu;
1466
pmap_t pmap;
1467
1468
vcpuid = vcpu->vcpuid;
1469
1470
if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1471
return (EINVAL);
1472
1473
if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1474
return (EINVAL);
1475
1476
pmap = vmspace_pmap(vm->vmspace);
1477
vme = &vcpu->exitinfo;
1478
evinfo.rptr = NULL;
1479
evinfo.sptr = &vm->suspend;
1480
evinfo.iptr = NULL;
1481
restart:
1482
critical_enter();
1483
1484
restore_guest_fpustate(vcpu);
1485
1486
vcpu_require_state(vcpu, VCPU_RUNNING);
1487
error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
1488
vcpu_require_state(vcpu, VCPU_FROZEN);
1489
1490
save_guest_fpustate(vcpu);
1491
1492
critical_exit();
1493
1494
if (error == 0) {
1495
retu = false;
1496
switch (vme->exitcode) {
1497
case VM_EXITCODE_INST_EMUL:
1498
vcpu->nextpc = vme->pc + vme->inst_length;
1499
error = vm_handle_inst_emul(vcpu, &retu);
1500
break;
1501
1502
case VM_EXITCODE_REG_EMUL:
1503
vcpu->nextpc = vme->pc + vme->inst_length;
1504
error = vm_handle_reg_emul(vcpu, &retu);
1505
break;
1506
1507
case VM_EXITCODE_HVC:
1508
/*
1509
* The HVC instruction saves the address for the
1510
* next instruction as the return address.
1511
*/
1512
vcpu->nextpc = vme->pc;
1513
/*
1514
* The PSCI call can change the exit information in the
1515
* case of suspend/reset/poweroff/cpu off/cpu on.
1516
*/
1517
error = vm_handle_smccc_call(vcpu, vme, &retu);
1518
break;
1519
1520
case VM_EXITCODE_WFI:
1521
vcpu->nextpc = vme->pc + vme->inst_length;
1522
error = vm_handle_wfi(vcpu, vme, &retu);
1523
break;
1524
1525
case VM_EXITCODE_PAGING:
1526
vcpu->nextpc = vme->pc;
1527
error = vm_handle_paging(vcpu, &retu);
1528
break;
1529
1530
case VM_EXITCODE_SUSPENDED:
1531
vcpu->nextpc = vme->pc;
1532
error = vm_handle_suspend(vcpu, &retu);
1533
break;
1534
1535
default:
1536
/* Handle in userland */
1537
vcpu->nextpc = vme->pc;
1538
retu = true;
1539
break;
1540
}
1541
}
1542
1543
if (error == 0 && retu == false)
1544
goto restart;
1545
1546
return (error);
1547
}
1548
1549