Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/riscv/vmm/vmm.c
39478 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2015 Mihai Carabas <[email protected]>
5
* Copyright (c) 2024 Ruslan Bukin <[email protected]>
6
*
7
* This software was developed by the University of Cambridge Computer
8
* Laboratory (Department of Computer Science and Technology) under Innovate
9
* UK project 105694, "Digital Security by Design (DSbD) Technology Platform
10
* Prototype".
11
*
12
* Redistribution and use in source and binary forms, with or without
13
* modification, are permitted provided that the following conditions
14
* are met:
15
* 1. Redistributions of source code must retain the above copyright
16
* notice, this list of conditions and the following disclaimer.
17
* 2. Redistributions in binary form must reproduce the above copyright
18
* notice, this list of conditions and the following disclaimer in the
19
* documentation and/or other materials provided with the distribution.
20
*
21
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
25
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31
* SUCH DAMAGE.
32
*/
33
34
#include <sys/param.h>
35
#include <sys/systm.h>
36
#include <sys/cpuset.h>
37
#include <sys/kernel.h>
38
#include <sys/linker.h>
39
#include <sys/lock.h>
40
#include <sys/malloc.h>
41
#include <sys/module.h>
42
#include <sys/mutex.h>
43
#include <sys/pcpu.h>
44
#include <sys/proc.h>
45
#include <sys/queue.h>
46
#include <sys/rwlock.h>
47
#include <sys/sched.h>
48
#include <sys/smp.h>
49
#include <sys/sysctl.h>
50
51
#include <vm/vm.h>
52
#include <vm/vm_object.h>
53
#include <vm/vm_page.h>
54
#include <vm/pmap.h>
55
#include <vm/vm_map.h>
56
#include <vm/vm_extern.h>
57
#include <vm/vm_param.h>
58
59
#include <machine/riscvreg.h>
60
#include <machine/cpu.h>
61
#include <machine/fpe.h>
62
#include <machine/machdep.h>
63
#include <machine/pcb.h>
64
#include <machine/smp.h>
65
#include <machine/vm.h>
66
#include <machine/vmparam.h>
67
#include <machine/vmm.h>
68
#include <machine/vmm_instruction_emul.h>
69
70
#include <dev/pci/pcireg.h>
71
72
#include <dev/vmm/vmm_dev.h>
73
#include <dev/vmm/vmm_ktr.h>
74
#include <dev/vmm/vmm_mem.h>
75
76
#include "vmm_stat.h"
77
#include "riscv.h"
78
79
#include "vmm_aplic.h"
80
81
struct vcpu {
82
int flags;
83
enum vcpu_state state;
84
struct mtx mtx;
85
int hostcpu; /* host cpuid this vcpu last ran on */
86
int vcpuid;
87
void *stats;
88
struct vm_exit exitinfo;
89
uint64_t nextpc; /* (x) next instruction to execute */
90
struct vm *vm; /* (o) */
91
void *cookie; /* (i) cpu-specific data */
92
struct fpreg *guestfpu; /* (a,i) guest fpu state */
93
};
94
95
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
96
#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
97
#define vcpu_lock_destroy(v) mtx_destroy(&((v)->mtx))
98
#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
99
#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
100
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
101
102
struct vmm_mmio_region {
103
uint64_t start;
104
uint64_t end;
105
mem_region_read_t read;
106
mem_region_write_t write;
107
};
108
#define VM_MAX_MMIO_REGIONS 4
109
110
/*
111
* Initialization:
112
* (o) initialized the first time the VM is created
113
* (i) initialized when VM is created and when it is reinitialized
114
* (x) initialized before use
115
*/
116
struct vm {
117
void *cookie; /* (i) cpu-specific data */
118
volatile cpuset_t active_cpus; /* (i) active vcpus */
119
volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug*/
120
int suspend; /* (i) stop VM execution */
121
bool dying; /* (o) is dying */
122
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
123
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
124
struct vmspace *vmspace; /* (o) guest's address space */
125
struct vm_mem mem; /* (i) [m+v] guest memory */
126
char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
127
struct vcpu **vcpu; /* (i) guest vcpus */
128
struct vmm_mmio_region mmio_region[VM_MAX_MMIO_REGIONS];
129
/* (o) guest MMIO regions */
130
/* The following describe the vm cpu topology */
131
uint16_t sockets; /* (o) num of sockets */
132
uint16_t cores; /* (o) num of cores/socket */
133
uint16_t threads; /* (o) num of threads/core */
134
uint16_t maxcpus; /* (o) max pluggable cpus */
135
struct sx vcpus_init_lock; /* (o) */
136
};
137
138
static bool vmm_initialized = false;
139
140
static MALLOC_DEFINE(M_VMM, "vmm", "vmm");
141
142
/* statistics */
143
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
144
145
SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
146
147
static int vmm_ipinum;
148
SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
149
"IPI vector used for vcpu notifications");
150
151
u_int vm_maxcpu;
152
SYSCTL_UINT(_hw_vmm, OID_AUTO, maxcpu, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
153
&vm_maxcpu, 0, "Maximum number of vCPUs");
154
155
static void vcpu_notify_event_locked(struct vcpu *vcpu);
156
157
/* global statistics */
158
VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
159
VMM_STAT(VMEXIT_IRQ, "number of vmexits for an irq");
160
VMM_STAT(VMEXIT_UNHANDLED, "number of vmexits for an unhandled exception");
161
162
/*
163
* Upper limit on vm_maxcpu. We could increase this to 28 bits, but this
164
* is a safe value for now.
165
*/
166
#define VM_MAXCPU MIN(0xffff - 1, CPU_SETSIZE)
167
168
static void
169
vcpu_cleanup(struct vcpu *vcpu, bool destroy)
170
{
171
vmmops_vcpu_cleanup(vcpu->cookie);
172
vcpu->cookie = NULL;
173
if (destroy) {
174
vmm_stat_free(vcpu->stats);
175
fpu_save_area_free(vcpu->guestfpu);
176
vcpu_lock_destroy(vcpu);
177
}
178
}
179
180
static struct vcpu *
181
vcpu_alloc(struct vm *vm, int vcpu_id)
182
{
183
struct vcpu *vcpu;
184
185
KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
186
("vcpu_alloc: invalid vcpu %d", vcpu_id));
187
188
vcpu = malloc(sizeof(*vcpu), M_VMM, M_WAITOK | M_ZERO);
189
vcpu_lock_init(vcpu);
190
vcpu->state = VCPU_IDLE;
191
vcpu->hostcpu = NOCPU;
192
vcpu->vcpuid = vcpu_id;
193
vcpu->vm = vm;
194
vcpu->guestfpu = fpu_save_area_alloc();
195
vcpu->stats = vmm_stat_alloc();
196
return (vcpu);
197
}
198
199
static void
200
vcpu_init(struct vcpu *vcpu)
201
{
202
vcpu->cookie = vmmops_vcpu_init(vcpu->vm->cookie, vcpu, vcpu->vcpuid);
203
MPASS(vcpu->cookie != NULL);
204
fpu_save_area_reset(vcpu->guestfpu);
205
vmm_stat_init(vcpu->stats);
206
}
207
208
struct vm_exit *
209
vm_exitinfo(struct vcpu *vcpu)
210
{
211
return (&vcpu->exitinfo);
212
}
213
214
static int
215
vmm_init(void)
216
{
217
218
vm_maxcpu = mp_ncpus;
219
220
TUNABLE_INT_FETCH("hw.vmm.maxcpu", &vm_maxcpu);
221
222
if (vm_maxcpu > VM_MAXCPU) {
223
printf("vmm: vm_maxcpu clamped to %u\n", VM_MAXCPU);
224
vm_maxcpu = VM_MAXCPU;
225
}
226
227
if (vm_maxcpu == 0)
228
vm_maxcpu = 1;
229
230
return (vmmops_modinit());
231
}
232
233
static int
234
vmm_handler(module_t mod, int what, void *arg)
235
{
236
int error;
237
238
switch (what) {
239
case MOD_LOAD:
240
error = vmmdev_init();
241
if (error != 0)
242
break;
243
error = vmm_init();
244
if (error == 0)
245
vmm_initialized = true;
246
else
247
(void)vmmdev_cleanup();
248
break;
249
case MOD_UNLOAD:
250
error = vmmdev_cleanup();
251
if (error == 0 && vmm_initialized) {
252
error = vmmops_modcleanup();
253
if (error) {
254
/*
255
* Something bad happened - prevent new
256
* VMs from being created
257
*/
258
vmm_initialized = false;
259
}
260
}
261
break;
262
default:
263
error = 0;
264
break;
265
}
266
return (error);
267
}
268
269
static moduledata_t vmm_kmod = {
270
"vmm",
271
vmm_handler,
272
NULL
273
};
274
275
/*
276
* vmm initialization has the following dependencies:
277
*
278
* - vmm device initialization requires an initialized devfs.
279
*/
280
DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_DEVFS + 1, SI_ORDER_ANY);
281
MODULE_VERSION(vmm, 1);
282
283
static void
284
vm_init(struct vm *vm, bool create)
285
{
286
int i;
287
288
vm->cookie = vmmops_init(vm, vmspace_pmap(vm->vmspace));
289
MPASS(vm->cookie != NULL);
290
291
CPU_ZERO(&vm->active_cpus);
292
CPU_ZERO(&vm->debug_cpus);
293
294
vm->suspend = 0;
295
CPU_ZERO(&vm->suspended_cpus);
296
297
memset(vm->mmio_region, 0, sizeof(vm->mmio_region));
298
299
if (!create) {
300
for (i = 0; i < vm->maxcpus; i++) {
301
if (vm->vcpu[i] != NULL)
302
vcpu_init(vm->vcpu[i]);
303
}
304
}
305
}
306
307
void
308
vm_disable_vcpu_creation(struct vm *vm)
309
{
310
sx_xlock(&vm->vcpus_init_lock);
311
vm->dying = true;
312
sx_xunlock(&vm->vcpus_init_lock);
313
}
314
315
struct vcpu *
316
vm_alloc_vcpu(struct vm *vm, int vcpuid)
317
{
318
struct vcpu *vcpu;
319
320
if (vcpuid < 0 || vcpuid >= vm_get_maxcpus(vm))
321
return (NULL);
322
323
/* Some interrupt controllers may have a CPU limit */
324
if (vcpuid >= aplic_max_cpu_count(vm->cookie))
325
return (NULL);
326
327
vcpu = (struct vcpu *)
328
atomic_load_acq_ptr((uintptr_t *)&vm->vcpu[vcpuid]);
329
if (__predict_true(vcpu != NULL))
330
return (vcpu);
331
332
sx_xlock(&vm->vcpus_init_lock);
333
vcpu = vm->vcpu[vcpuid];
334
if (vcpu == NULL && !vm->dying) {
335
vcpu = vcpu_alloc(vm, vcpuid);
336
vcpu_init(vcpu);
337
338
/*
339
* Ensure vCPU is fully created before updating pointer
340
* to permit unlocked reads above.
341
*/
342
atomic_store_rel_ptr((uintptr_t *)&vm->vcpu[vcpuid],
343
(uintptr_t)vcpu);
344
}
345
sx_xunlock(&vm->vcpus_init_lock);
346
return (vcpu);
347
}
348
349
void
350
vm_slock_vcpus(struct vm *vm)
351
{
352
sx_slock(&vm->vcpus_init_lock);
353
}
354
355
void
356
vm_unlock_vcpus(struct vm *vm)
357
{
358
sx_unlock(&vm->vcpus_init_lock);
359
}
360
361
int
362
vm_create(const char *name, struct vm **retvm)
363
{
364
struct vm *vm;
365
struct vmspace *vmspace;
366
367
/*
368
* If vmm.ko could not be successfully initialized then don't attempt
369
* to create the virtual machine.
370
*/
371
if (!vmm_initialized)
372
return (ENXIO);
373
374
if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
375
return (EINVAL);
376
377
vmspace = vmmops_vmspace_alloc(0, 1ul << 39);
378
if (vmspace == NULL)
379
return (ENOMEM);
380
381
vm = malloc(sizeof(struct vm), M_VMM, M_WAITOK | M_ZERO);
382
strcpy(vm->name, name);
383
vm->vmspace = vmspace;
384
vm_mem_init(&vm->mem);
385
sx_init(&vm->vcpus_init_lock, "vm vcpus");
386
387
vm->sockets = 1;
388
vm->cores = 1; /* XXX backwards compatibility */
389
vm->threads = 1; /* XXX backwards compatibility */
390
vm->maxcpus = vm_maxcpu;
391
392
vm->vcpu = malloc(sizeof(*vm->vcpu) * vm->maxcpus, M_VMM,
393
M_WAITOK | M_ZERO);
394
395
vm_init(vm, true);
396
397
*retvm = vm;
398
return (0);
399
}
400
401
void
402
vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
403
uint16_t *threads, uint16_t *maxcpus)
404
{
405
*sockets = vm->sockets;
406
*cores = vm->cores;
407
*threads = vm->threads;
408
*maxcpus = vm->maxcpus;
409
}
410
411
uint16_t
412
vm_get_maxcpus(struct vm *vm)
413
{
414
return (vm->maxcpus);
415
}
416
417
int
418
vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
419
uint16_t threads, uint16_t maxcpus)
420
{
421
/* Ignore maxcpus. */
422
if ((sockets * cores * threads) > vm->maxcpus)
423
return (EINVAL);
424
vm->sockets = sockets;
425
vm->cores = cores;
426
vm->threads = threads;
427
return(0);
428
}
429
430
static void
431
vm_cleanup(struct vm *vm, bool destroy)
432
{
433
int i;
434
435
if (destroy)
436
vm_xlock_memsegs(vm);
437
else
438
vm_assert_memseg_xlocked(vm);
439
440
aplic_detach_from_vm(vm->cookie);
441
442
for (i = 0; i < vm->maxcpus; i++) {
443
if (vm->vcpu[i] != NULL)
444
vcpu_cleanup(vm->vcpu[i], destroy);
445
}
446
447
vmmops_cleanup(vm->cookie);
448
449
vm_mem_cleanup(vm);
450
if (destroy) {
451
vm_mem_destroy(vm);
452
453
vmmops_vmspace_free(vm->vmspace);
454
vm->vmspace = NULL;
455
456
for (i = 0; i < vm->maxcpus; i++)
457
free(vm->vcpu[i], M_VMM);
458
free(vm->vcpu, M_VMM);
459
sx_destroy(&vm->vcpus_init_lock);
460
}
461
}
462
463
void
464
vm_destroy(struct vm *vm)
465
{
466
467
vm_cleanup(vm, true);
468
469
free(vm, M_VMM);
470
}
471
472
int
473
vm_reinit(struct vm *vm)
474
{
475
int error;
476
477
/*
478
* A virtual machine can be reset only if all vcpus are suspended.
479
*/
480
if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
481
vm_cleanup(vm, false);
482
vm_init(vm, false);
483
error = 0;
484
} else {
485
error = EBUSY;
486
}
487
488
return (error);
489
}
490
491
const char *
492
vm_name(struct vm *vm)
493
{
494
return (vm->name);
495
}
496
497
int
498
vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging,
499
uint64_t gla, int prot, uint64_t *gpa, int *is_fault)
500
{
501
return (vmmops_gla2gpa(vcpu->cookie, paging, gla, prot, gpa, is_fault));
502
}
503
504
void
505
vm_register_inst_handler(struct vm *vm, uint64_t start, uint64_t size,
506
mem_region_read_t mmio_read, mem_region_write_t mmio_write)
507
{
508
int i;
509
510
for (i = 0; i < nitems(vm->mmio_region); i++) {
511
if (vm->mmio_region[i].start == 0 &&
512
vm->mmio_region[i].end == 0) {
513
vm->mmio_region[i].start = start;
514
vm->mmio_region[i].end = start + size;
515
vm->mmio_region[i].read = mmio_read;
516
vm->mmio_region[i].write = mmio_write;
517
return;
518
}
519
}
520
521
panic("%s: No free MMIO region", __func__);
522
}
523
524
void
525
vm_deregister_inst_handler(struct vm *vm, uint64_t start, uint64_t size)
526
{
527
int i;
528
529
for (i = 0; i < nitems(vm->mmio_region); i++) {
530
if (vm->mmio_region[i].start == start &&
531
vm->mmio_region[i].end == start + size) {
532
memset(&vm->mmio_region[i], 0,
533
sizeof(vm->mmio_region[i]));
534
return;
535
}
536
}
537
538
panic("%s: Invalid MMIO region: %lx - %lx", __func__, start,
539
start + size);
540
}
541
542
static int
543
vm_handle_inst_emul(struct vcpu *vcpu, bool *retu)
544
{
545
struct vm *vm;
546
struct vm_exit *vme;
547
struct vie *vie;
548
struct hyp *hyp;
549
uint64_t fault_ipa;
550
struct vm_guest_paging *paging;
551
struct vmm_mmio_region *vmr;
552
int error, i;
553
554
vm = vcpu->vm;
555
hyp = vm->cookie;
556
if (!hyp->aplic_attached)
557
goto out_user;
558
559
vme = &vcpu->exitinfo;
560
vie = &vme->u.inst_emul.vie;
561
paging = &vme->u.inst_emul.paging;
562
563
fault_ipa = vme->u.inst_emul.gpa;
564
565
vmr = NULL;
566
for (i = 0; i < nitems(vm->mmio_region); i++) {
567
if (vm->mmio_region[i].start <= fault_ipa &&
568
vm->mmio_region[i].end > fault_ipa) {
569
vmr = &vm->mmio_region[i];
570
break;
571
}
572
}
573
if (vmr == NULL)
574
goto out_user;
575
576
error = vmm_emulate_instruction(vcpu, fault_ipa, vie, paging,
577
vmr->read, vmr->write, retu);
578
return (error);
579
580
out_user:
581
*retu = true;
582
return (0);
583
}
584
585
int
586
vm_suspend(struct vm *vm, enum vm_suspend_how how)
587
{
588
int i;
589
590
if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
591
return (EINVAL);
592
593
if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
594
VM_CTR2(vm, "virtual machine already suspended %d/%d",
595
vm->suspend, how);
596
return (EALREADY);
597
}
598
599
VM_CTR1(vm, "virtual machine successfully suspended %d", how);
600
601
/*
602
* Notify all active vcpus that they are now suspended.
603
*/
604
for (i = 0; i < vm->maxcpus; i++) {
605
if (CPU_ISSET(i, &vm->active_cpus))
606
vcpu_notify_event(vm_vcpu(vm, i));
607
}
608
609
return (0);
610
}
611
612
void
613
vm_exit_suspended(struct vcpu *vcpu, uint64_t pc)
614
{
615
struct vm *vm = vcpu->vm;
616
struct vm_exit *vmexit;
617
618
KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
619
("vm_exit_suspended: invalid suspend type %d", vm->suspend));
620
621
vmexit = vm_exitinfo(vcpu);
622
vmexit->pc = pc;
623
vmexit->inst_length = 4;
624
vmexit->exitcode = VM_EXITCODE_SUSPENDED;
625
vmexit->u.suspended.how = vm->suspend;
626
}
627
628
void
629
vm_exit_debug(struct vcpu *vcpu, uint64_t pc)
630
{
631
struct vm_exit *vmexit;
632
633
vmexit = vm_exitinfo(vcpu);
634
vmexit->pc = pc;
635
vmexit->inst_length = 4;
636
vmexit->exitcode = VM_EXITCODE_DEBUG;
637
}
638
639
int
640
vm_activate_cpu(struct vcpu *vcpu)
641
{
642
struct vm *vm = vcpu->vm;
643
644
if (CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
645
return (EBUSY);
646
647
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->active_cpus);
648
return (0);
649
650
}
651
652
int
653
vm_suspend_cpu(struct vm *vm, struct vcpu *vcpu)
654
{
655
if (vcpu == NULL) {
656
vm->debug_cpus = vm->active_cpus;
657
for (int i = 0; i < vm->maxcpus; i++) {
658
if (CPU_ISSET(i, &vm->active_cpus))
659
vcpu_notify_event(vm_vcpu(vm, i));
660
}
661
} else {
662
if (!CPU_ISSET(vcpu->vcpuid, &vm->active_cpus))
663
return (EINVAL);
664
665
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
666
vcpu_notify_event(vcpu);
667
}
668
return (0);
669
}
670
671
int
672
vm_resume_cpu(struct vm *vm, struct vcpu *vcpu)
673
{
674
675
if (vcpu == NULL) {
676
CPU_ZERO(&vm->debug_cpus);
677
} else {
678
if (!CPU_ISSET(vcpu->vcpuid, &vm->debug_cpus))
679
return (EINVAL);
680
681
CPU_CLR_ATOMIC(vcpu->vcpuid, &vm->debug_cpus);
682
}
683
return (0);
684
}
685
686
int
687
vcpu_debugged(struct vcpu *vcpu)
688
{
689
690
return (CPU_ISSET(vcpu->vcpuid, &vcpu->vm->debug_cpus));
691
}
692
693
cpuset_t
694
vm_active_cpus(struct vm *vm)
695
{
696
697
return (vm->active_cpus);
698
}
699
700
cpuset_t
701
vm_debug_cpus(struct vm *vm)
702
{
703
704
return (vm->debug_cpus);
705
}
706
707
cpuset_t
708
vm_suspended_cpus(struct vm *vm)
709
{
710
711
return (vm->suspended_cpus);
712
}
713
714
715
void *
716
vcpu_stats(struct vcpu *vcpu)
717
{
718
719
return (vcpu->stats);
720
}
721
722
/*
723
* This function is called to ensure that a vcpu "sees" a pending event
724
* as soon as possible:
725
* - If the vcpu thread is sleeping then it is woken up.
726
* - If the vcpu is running on a different host_cpu then an IPI will be directed
727
* to the host_cpu to cause the vcpu to trap into the hypervisor.
728
*/
729
static void
730
vcpu_notify_event_locked(struct vcpu *vcpu)
731
{
732
int hostcpu;
733
734
hostcpu = vcpu->hostcpu;
735
if (vcpu->state == VCPU_RUNNING) {
736
KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
737
if (hostcpu != curcpu) {
738
ipi_cpu(hostcpu, vmm_ipinum);
739
} else {
740
/*
741
* If the 'vcpu' is running on 'curcpu' then it must
742
* be sending a notification to itself (e.g. SELF_IPI).
743
* The pending event will be picked up when the vcpu
744
* transitions back to guest context.
745
*/
746
}
747
} else {
748
KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
749
"with hostcpu %d", vcpu->state, hostcpu));
750
if (vcpu->state == VCPU_SLEEPING)
751
wakeup_one(vcpu);
752
}
753
}
754
755
void
756
vcpu_notify_event(struct vcpu *vcpu)
757
{
758
vcpu_lock(vcpu);
759
vcpu_notify_event_locked(vcpu);
760
vcpu_unlock(vcpu);
761
}
762
763
struct vmspace *
764
vm_vmspace(struct vm *vm)
765
{
766
return (vm->vmspace);
767
}
768
769
struct vm_mem *
770
vm_mem(struct vm *vm)
771
{
772
return (&vm->mem);
773
}
774
775
static void
776
restore_guest_fpustate(struct vcpu *vcpu)
777
{
778
779
/* Flush host state to the pcb. */
780
fpe_state_save(curthread);
781
782
/* Ensure the VFP state will be re-loaded when exiting the guest. */
783
PCPU_SET(fpcurthread, NULL);
784
785
/* restore guest FPU state */
786
fpe_enable();
787
fpe_restore(vcpu->guestfpu);
788
789
/*
790
* The FPU is now "dirty" with the guest's state so turn on emulation
791
* to trap any access to the FPU by the host.
792
*/
793
fpe_disable();
794
}
795
796
static void
797
save_guest_fpustate(struct vcpu *vcpu)
798
{
799
800
/* Save guest FPE state. */
801
fpe_enable();
802
fpe_store(vcpu->guestfpu);
803
fpe_disable();
804
805
KASSERT(PCPU_GET(fpcurthread) == NULL,
806
("%s: fpcurthread set with guest registers", __func__));
807
}
808
809
static int
810
vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
811
bool from_idle)
812
{
813
int error;
814
815
vcpu_assert_locked(vcpu);
816
817
/*
818
* State transitions from the vmmdev_ioctl() must always begin from
819
* the VCPU_IDLE state. This guarantees that there is only a single
820
* ioctl() operating on a vcpu at any point.
821
*/
822
if (from_idle) {
823
while (vcpu->state != VCPU_IDLE) {
824
vcpu_notify_event_locked(vcpu);
825
msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
826
}
827
} else {
828
KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
829
"vcpu idle state"));
830
}
831
832
if (vcpu->state == VCPU_RUNNING) {
833
KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
834
"mismatch for running vcpu", curcpu, vcpu->hostcpu));
835
} else {
836
KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
837
"vcpu that is not running", vcpu->hostcpu));
838
}
839
840
/*
841
* The following state transitions are allowed:
842
* IDLE -> FROZEN -> IDLE
843
* FROZEN -> RUNNING -> FROZEN
844
* FROZEN -> SLEEPING -> FROZEN
845
*/
846
switch (vcpu->state) {
847
case VCPU_IDLE:
848
case VCPU_RUNNING:
849
case VCPU_SLEEPING:
850
error = (newstate != VCPU_FROZEN);
851
break;
852
case VCPU_FROZEN:
853
error = (newstate == VCPU_FROZEN);
854
break;
855
default:
856
error = 1;
857
break;
858
}
859
860
if (error)
861
return (EBUSY);
862
863
vcpu->state = newstate;
864
if (newstate == VCPU_RUNNING)
865
vcpu->hostcpu = curcpu;
866
else
867
vcpu->hostcpu = NOCPU;
868
869
if (newstate == VCPU_IDLE)
870
wakeup(&vcpu->state);
871
872
return (0);
873
}
874
875
static void
876
vcpu_require_state(struct vcpu *vcpu, enum vcpu_state newstate)
877
{
878
int error;
879
880
if ((error = vcpu_set_state(vcpu, newstate, false)) != 0)
881
panic("Error %d setting state to %d\n", error, newstate);
882
}
883
884
static void
885
vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
886
{
887
int error;
888
889
if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
890
panic("Error %d setting state to %d", error, newstate);
891
}
892
893
int
894
vm_get_capability(struct vcpu *vcpu, int type, int *retval)
895
{
896
897
if (type < 0 || type >= VM_CAP_MAX)
898
return (EINVAL);
899
900
return (vmmops_getcap(vcpu->cookie, type, retval));
901
}
902
903
int
904
vm_set_capability(struct vcpu *vcpu, int type, int val)
905
{
906
907
if (type < 0 || type >= VM_CAP_MAX)
908
return (EINVAL);
909
910
return (vmmops_setcap(vcpu->cookie, type, val));
911
}
912
913
struct vm *
914
vcpu_vm(struct vcpu *vcpu)
915
{
916
917
return (vcpu->vm);
918
}
919
920
int
921
vcpu_vcpuid(struct vcpu *vcpu)
922
{
923
924
return (vcpu->vcpuid);
925
}
926
927
void *
928
vcpu_get_cookie(struct vcpu *vcpu)
929
{
930
931
return (vcpu->cookie);
932
}
933
934
struct vcpu *
935
vm_vcpu(struct vm *vm, int vcpuid)
936
{
937
938
return (vm->vcpu[vcpuid]);
939
}
940
941
int
942
vcpu_set_state(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle)
943
{
944
int error;
945
946
vcpu_lock(vcpu);
947
error = vcpu_set_state_locked(vcpu, newstate, from_idle);
948
vcpu_unlock(vcpu);
949
950
return (error);
951
}
952
953
enum vcpu_state
954
vcpu_get_state(struct vcpu *vcpu, int *hostcpu)
955
{
956
enum vcpu_state state;
957
958
vcpu_lock(vcpu);
959
state = vcpu->state;
960
if (hostcpu != NULL)
961
*hostcpu = vcpu->hostcpu;
962
vcpu_unlock(vcpu);
963
964
return (state);
965
}
966
967
int
968
vm_get_register(struct vcpu *vcpu, int reg, uint64_t *retval)
969
{
970
971
if (reg >= VM_REG_LAST)
972
return (EINVAL);
973
974
return (vmmops_getreg(vcpu->cookie, reg, retval));
975
}
976
977
int
978
vm_set_register(struct vcpu *vcpu, int reg, uint64_t val)
979
{
980
int error;
981
982
if (reg >= VM_REG_LAST)
983
return (EINVAL);
984
error = vmmops_setreg(vcpu->cookie, reg, val);
985
if (error || reg != VM_REG_GUEST_SEPC)
986
return (error);
987
988
vcpu->nextpc = val;
989
990
return (0);
991
}
992
993
void *
994
vm_get_cookie(struct vm *vm)
995
{
996
997
return (vm->cookie);
998
}
999
1000
int
1001
vm_inject_exception(struct vcpu *vcpu, uint64_t scause)
1002
{
1003
1004
return (vmmops_exception(vcpu->cookie, scause));
1005
}
1006
1007
int
1008
vm_attach_aplic(struct vm *vm, struct vm_aplic_descr *descr)
1009
{
1010
1011
return (aplic_attach_to_vm(vm->cookie, descr));
1012
}
1013
1014
int
1015
vm_assert_irq(struct vm *vm, uint32_t irq)
1016
{
1017
1018
return (aplic_inject_irq(vm->cookie, -1, irq, true));
1019
}
1020
1021
int
1022
vm_deassert_irq(struct vm *vm, uint32_t irq)
1023
{
1024
1025
return (aplic_inject_irq(vm->cookie, -1, irq, false));
1026
}
1027
1028
int
1029
vm_raise_msi(struct vm *vm, uint64_t msg, uint64_t addr, int bus, int slot,
1030
int func)
1031
{
1032
1033
return (aplic_inject_msi(vm->cookie, msg, addr));
1034
}
1035
1036
static int
1037
vm_handle_wfi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1038
{
1039
struct vm *vm;
1040
1041
vm = vcpu->vm;
1042
vcpu_lock(vcpu);
1043
while (1) {
1044
if (vm->suspend)
1045
break;
1046
1047
if (aplic_check_pending(vcpu->cookie))
1048
break;
1049
1050
if (riscv_check_ipi(vcpu->cookie, false))
1051
break;
1052
1053
if (riscv_check_interrupts_pending(vcpu->cookie))
1054
break;
1055
1056
if (vcpu_should_yield(vcpu))
1057
break;
1058
1059
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1060
/*
1061
* XXX msleep_spin() cannot be interrupted by signals so
1062
* wake up periodically to check pending signals.
1063
*/
1064
msleep_spin(vcpu, &vcpu->mtx, "vmidle", hz);
1065
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1066
}
1067
vcpu_unlock(vcpu);
1068
1069
*retu = false;
1070
1071
return (0);
1072
}
1073
1074
static int
1075
vm_handle_paging(struct vcpu *vcpu, bool *retu)
1076
{
1077
struct vm *vm;
1078
struct vm_exit *vme;
1079
struct vm_map *map;
1080
uint64_t addr;
1081
pmap_t pmap;
1082
int ftype, rv;
1083
1084
vm = vcpu->vm;
1085
vme = &vcpu->exitinfo;
1086
1087
pmap = vmspace_pmap(vm->vmspace);
1088
addr = (vme->htval << 2) & ~(PAGE_SIZE - 1);
1089
1090
dprintf("%s: %lx\n", __func__, addr);
1091
1092
switch (vme->scause) {
1093
case SCAUSE_STORE_GUEST_PAGE_FAULT:
1094
ftype = VM_PROT_WRITE;
1095
break;
1096
case SCAUSE_FETCH_GUEST_PAGE_FAULT:
1097
ftype = VM_PROT_EXECUTE;
1098
break;
1099
case SCAUSE_LOAD_GUEST_PAGE_FAULT:
1100
ftype = VM_PROT_READ;
1101
break;
1102
default:
1103
panic("unknown page trap: %lu", vme->scause);
1104
}
1105
1106
/* The page exists, but the page table needs to be updated. */
1107
if (pmap_fault(pmap, addr, ftype))
1108
return (0);
1109
1110
map = &vm->vmspace->vm_map;
1111
rv = vm_fault(map, addr, ftype, VM_FAULT_NORMAL, NULL);
1112
if (rv != KERN_SUCCESS) {
1113
printf("%s: vm_fault failed, addr %lx, ftype %d, err %d\n",
1114
__func__, addr, ftype, rv);
1115
return (EFAULT);
1116
}
1117
1118
return (0);
1119
}
1120
1121
static int
1122
vm_handle_suspend(struct vcpu *vcpu, bool *retu)
1123
{
1124
struct vm *vm = vcpu->vm;
1125
int error, i;
1126
struct thread *td;
1127
1128
error = 0;
1129
td = curthread;
1130
1131
CPU_SET_ATOMIC(vcpu->vcpuid, &vm->suspended_cpus);
1132
1133
/*
1134
* Wait until all 'active_cpus' have suspended themselves.
1135
*
1136
* Since a VM may be suspended at any time including when one or
1137
* more vcpus are doing a rendezvous we need to call the rendezvous
1138
* handler while we are waiting to prevent a deadlock.
1139
*/
1140
vcpu_lock(vcpu);
1141
while (error == 0) {
1142
if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0)
1143
break;
1144
1145
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
1146
msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1147
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
1148
if (td_ast_pending(td, TDA_SUSPEND)) {
1149
vcpu_unlock(vcpu);
1150
error = thread_check_susp(td, false);
1151
vcpu_lock(vcpu);
1152
}
1153
}
1154
vcpu_unlock(vcpu);
1155
1156
/*
1157
* Wakeup the other sleeping vcpus and return to userspace.
1158
*/
1159
for (i = 0; i < vm->maxcpus; i++) {
1160
if (CPU_ISSET(i, &vm->suspended_cpus)) {
1161
vcpu_notify_event(vm_vcpu(vm, i));
1162
}
1163
}
1164
1165
*retu = true;
1166
return (error);
1167
}
1168
1169
int
1170
vm_run(struct vcpu *vcpu)
1171
{
1172
struct vm_eventinfo evinfo;
1173
struct vm_exit *vme;
1174
struct vm *vm;
1175
pmap_t pmap;
1176
int error;
1177
int vcpuid;
1178
bool retu;
1179
1180
vm = vcpu->vm;
1181
1182
dprintf("%s\n", __func__);
1183
1184
vcpuid = vcpu->vcpuid;
1185
1186
if (!CPU_ISSET(vcpuid, &vm->active_cpus))
1187
return (EINVAL);
1188
1189
if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
1190
return (EINVAL);
1191
1192
pmap = vmspace_pmap(vm->vmspace);
1193
vme = &vcpu->exitinfo;
1194
evinfo.rptr = NULL;
1195
evinfo.sptr = &vm->suspend;
1196
evinfo.iptr = NULL;
1197
restart:
1198
critical_enter();
1199
1200
restore_guest_fpustate(vcpu);
1201
1202
vcpu_require_state(vcpu, VCPU_RUNNING);
1203
error = vmmops_run(vcpu->cookie, vcpu->nextpc, pmap, &evinfo);
1204
vcpu_require_state(vcpu, VCPU_FROZEN);
1205
1206
save_guest_fpustate(vcpu);
1207
1208
critical_exit();
1209
1210
if (error == 0) {
1211
retu = false;
1212
switch (vme->exitcode) {
1213
case VM_EXITCODE_INST_EMUL:
1214
vcpu->nextpc = vme->pc + vme->inst_length;
1215
error = vm_handle_inst_emul(vcpu, &retu);
1216
break;
1217
case VM_EXITCODE_WFI:
1218
vcpu->nextpc = vme->pc + vme->inst_length;
1219
error = vm_handle_wfi(vcpu, vme, &retu);
1220
break;
1221
case VM_EXITCODE_ECALL:
1222
/* Handle in userland. */
1223
vcpu->nextpc = vme->pc + vme->inst_length;
1224
retu = true;
1225
break;
1226
case VM_EXITCODE_PAGING:
1227
vcpu->nextpc = vme->pc;
1228
error = vm_handle_paging(vcpu, &retu);
1229
break;
1230
case VM_EXITCODE_BOGUS:
1231
vcpu->nextpc = vme->pc;
1232
retu = false;
1233
error = 0;
1234
break;
1235
case VM_EXITCODE_SUSPENDED:
1236
vcpu->nextpc = vme->pc;
1237
error = vm_handle_suspend(vcpu, &retu);
1238
break;
1239
default:
1240
/* Handle in userland. */
1241
vcpu->nextpc = vme->pc;
1242
retu = true;
1243
break;
1244
}
1245
}
1246
1247
if (error == 0 && retu == false)
1248
goto restart;
1249
1250
return (error);
1251
}
1252
1253