Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/arm64/vmm/vmm_arm64.c
107847 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (C) 2015 Mihai Carabas <[email protected]>
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/cdefs.h>
30
#include <sys/param.h>
31
#include <sys/systm.h>
32
#include <sys/smp.h>
33
#include <sys/kernel.h>
34
#include <sys/malloc.h>
35
#include <sys/mman.h>
36
#include <sys/pcpu.h>
37
#include <sys/proc.h>
38
#include <sys/sysctl.h>
39
#include <sys/lock.h>
40
#include <sys/mutex.h>
41
#include <sys/vmem.h>
42
43
#include <vm/vm.h>
44
#include <vm/pmap.h>
45
#include <vm/vm_extern.h>
46
#include <vm/vm_map.h>
47
#include <vm/vm_page.h>
48
#include <vm/vm_param.h>
49
50
#include <machine/vm.h>
51
#include <machine/cpufunc.h>
52
#include <machine/cpu.h>
53
#include <machine/machdep.h>
54
#include <machine/vmm.h>
55
#include <machine/atomic.h>
56
#include <machine/hypervisor.h>
57
#include <machine/pmap.h>
58
59
#include <dev/vmm/vmm_mem.h>
60
#include <dev/vmm/vmm_vm.h>
61
62
#include "mmu.h"
63
#include "arm64.h"
64
#include "hyp.h"
65
#include "reset.h"
66
#include "io/vgic.h"
67
#include "io/vgic_v3.h"
68
#include "io/vtimer.h"
69
#include "vmm_handlers.h"
70
#include "vmm_stat.h"
71
72
#define HANDLED 1
73
#define UNHANDLED 0
74
75
/* Number of bits in an EL2 virtual address */
76
#define EL2_VIRT_BITS 48
77
CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS);
78
79
/* TODO: Move the host hypctx off the stack */
80
#define VMM_STACK_PAGES 4
81
#define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE)
82
83
static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits;
84
85
/* Register values passed to arm_setup_vectors to set in the hypervisor */
86
struct vmm_init_regs {
87
uint64_t tcr_el2;
88
uint64_t vtcr_el2;
89
};
90
91
MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
92
93
extern char hyp_init_vectors[];
94
extern char hyp_vectors[];
95
extern char hyp_stub_vectors[];
96
97
static vm_paddr_t hyp_code_base;
98
static size_t hyp_code_len;
99
100
static char *stack[MAXCPU];
101
static vm_offset_t stack_hyp_va[MAXCPU];
102
103
static vmem_t *el2_mem_alloc;
104
105
static void arm_setup_vectors(void *arg);
106
107
DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
108
109
static inline void
110
arm64_set_active_vcpu(struct hypctx *hypctx)
111
{
112
DPCPU_SET(vcpu, hypctx);
113
}
114
115
struct hypctx *
116
arm64_get_active_vcpu(void)
117
{
118
return (DPCPU_GET(vcpu));
119
}
120
121
static void
122
arm_setup_vectors(void *arg)
123
{
124
struct vmm_init_regs *el2_regs;
125
uintptr_t stack_top;
126
uint32_t sctlr_el2;
127
register_t daif;
128
129
el2_regs = arg;
130
arm64_set_active_vcpu(NULL);
131
132
/*
133
* Configure the system control register for EL2:
134
*
135
* SCTLR_EL2_M: MMU on
136
* SCTLR_EL2_C: Data cacheability not affected
137
* SCTLR_EL2_I: Instruction cacheability not affected
138
* SCTLR_EL2_A: Instruction alignment check
139
* SCTLR_EL2_SA: Stack pointer alignment check
140
* SCTLR_EL2_WXN: Treat writable memory as execute never
141
* ~SCTLR_EL2_EE: Data accesses are little-endian
142
*/
143
sctlr_el2 = SCTLR_EL2_RES1;
144
sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I;
145
sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA;
146
sctlr_el2 |= SCTLR_EL2_WXN;
147
sctlr_el2 &= ~SCTLR_EL2_EE;
148
149
daif = intr_disable();
150
151
if (in_vhe()) {
152
WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2);
153
} else {
154
/*
155
* Install the temporary vectors which will be responsible for
156
* initializing the VMM when we next trap into EL2.
157
*
158
* x0: the exception vector table responsible for hypervisor
159
* initialization on the next call.
160
*/
161
vmm_call_hyp(vtophys(&vmm_hyp_code));
162
163
/* Create and map the hypervisor stack */
164
stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
165
166
/* Special call to initialize EL2 */
167
vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
168
sctlr_el2, el2_regs->vtcr_el2);
169
}
170
171
intr_restore(daif);
172
}
173
174
static void
175
arm_teardown_vectors(void *arg)
176
{
177
register_t daif;
178
179
/*
180
* vmm_cleanup() will disable the MMU. For the next few instructions,
181
* before the hardware disables the MMU, one of the following is
182
* possible:
183
*
184
* a. The instruction addresses are fetched with the MMU disabled,
185
* and they must represent the actual physical addresses. This will work
186
* because we call the vmm_cleanup() function by its physical address.
187
*
188
* b. The instruction addresses are fetched using the old translation
189
* tables. This will work because we have an identity mapping in place
190
* in the translation tables and vmm_cleanup() is called by its physical
191
* address.
192
*/
193
daif = intr_disable();
194
/* TODO: Invalidate the cache */
195
vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors));
196
intr_restore(daif);
197
198
arm64_set_active_vcpu(NULL);
199
}
200
201
static uint64_t
202
vmm_vtcr_el2_sl(u_int levels)
203
{
204
#if PAGE_SIZE == PAGE_SIZE_4K
205
switch (levels) {
206
case 2:
207
return (VTCR_EL2_SL0_4K_LVL2);
208
case 3:
209
return (VTCR_EL2_SL0_4K_LVL1);
210
case 4:
211
return (VTCR_EL2_SL0_4K_LVL0);
212
default:
213
panic("%s: Invalid number of page table levels %u", __func__,
214
levels);
215
}
216
#elif PAGE_SIZE == PAGE_SIZE_16K
217
switch (levels) {
218
case 2:
219
return (VTCR_EL2_SL0_16K_LVL2);
220
case 3:
221
return (VTCR_EL2_SL0_16K_LVL1);
222
case 4:
223
return (VTCR_EL2_SL0_16K_LVL0);
224
default:
225
panic("%s: Invalid number of page table levels %u", __func__,
226
levels);
227
}
228
#else
229
#error Unsupported page size
230
#endif
231
}
232
233
int
234
vmmops_modinit(int ipinum)
235
{
236
struct vmm_init_regs el2_regs;
237
vm_offset_t next_hyp_va;
238
vm_paddr_t vmm_base;
239
uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
240
int cpu, i;
241
bool rv __diagused;
242
243
if (!has_hyp()) {
244
printf(
245
"vmm: Processor doesn't have support for virtualization\n");
246
return (ENXIO);
247
}
248
249
if (!vgic_present()) {
250
printf("vmm: No vgic found\n");
251
return (ENODEV);
252
}
253
254
get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1);
255
pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
256
/*
257
* Use 3 levels to give us up to 39 bits with 4k pages, or
258
* 47 bits with 16k pages.
259
*/
260
/* TODO: Check the number of levels for 64k pages */
261
vmm_pmap_levels = 3;
262
switch (pa_range_field) {
263
case ID_AA64MMFR0_PARange_4G:
264
printf("vmm: Not enough physical address bits\n");
265
return (ENXIO);
266
case ID_AA64MMFR0_PARange_64G:
267
vmm_virt_bits = 36;
268
#if PAGE_SIZE == PAGE_SIZE_16K
269
vmm_pmap_levels = 2;
270
#endif
271
break;
272
default:
273
vmm_virt_bits = 39;
274
break;
275
}
276
pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
277
278
if (!in_vhe()) {
279
/* Initialise the EL2 MMU */
280
if (!vmmpmap_init()) {
281
printf("vmm: Failed to init the EL2 MMU\n");
282
return (ENOMEM);
283
}
284
}
285
286
/* Set up the stage 2 pmap callbacks */
287
MPASS(pmap_clean_stage2_tlbi == NULL);
288
pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi;
289
pmap_stage2_invalidate_range = vmm_s2_tlbi_range;
290
pmap_stage2_invalidate_all = vmm_s2_tlbi_all;
291
292
if (!in_vhe()) {
293
/*
294
* Create an allocator for the virtual address space used by
295
* EL2. EL2 code is identity-mapped; the allocator is used to
296
* find space for VM structures.
297
*/
298
el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0,
299
M_WAITOK);
300
301
/* Create the mappings for the hypervisor translation table. */
302
hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code);
303
304
/* We need an physical identity mapping for when we activate the MMU */
305
hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
306
rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base,
307
VM_PROT_READ | VM_PROT_EXECUTE);
308
MPASS(rv);
309
310
next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE);
311
312
/* Create a per-CPU hypervisor stack */
313
CPU_FOREACH(cpu) {
314
stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO);
315
stack_hyp_va[cpu] = next_hyp_va;
316
317
for (i = 0; i < VMM_STACK_PAGES; i++) {
318
rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i),
319
PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)),
320
VM_PROT_READ | VM_PROT_WRITE);
321
MPASS(rv);
322
}
323
next_hyp_va += L2_SIZE;
324
}
325
326
el2_regs.tcr_el2 = TCR_EL2_RES1;
327
el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT,
328
TCR_EL2_PS_52BITS);
329
el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
330
el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA;
331
#if PAGE_SIZE == PAGE_SIZE_4K
332
el2_regs.tcr_el2 |= TCR_EL2_TG0_4K;
333
#elif PAGE_SIZE == PAGE_SIZE_16K
334
el2_regs.tcr_el2 |= TCR_EL2_TG0_16K;
335
#else
336
#error Unsupported page size
337
#endif
338
#ifdef SMP
339
el2_regs.tcr_el2 |= TCR_EL2_SH0_IS;
340
#endif
341
}
342
343
switch (pa_range_bits << TCR_EL2_PS_SHIFT) {
344
case TCR_EL2_PS_32BITS:
345
vmm_max_ipa_bits = 32;
346
break;
347
case TCR_EL2_PS_36BITS:
348
vmm_max_ipa_bits = 36;
349
break;
350
case TCR_EL2_PS_40BITS:
351
vmm_max_ipa_bits = 40;
352
break;
353
case TCR_EL2_PS_42BITS:
354
vmm_max_ipa_bits = 42;
355
break;
356
case TCR_EL2_PS_44BITS:
357
vmm_max_ipa_bits = 44;
358
break;
359
case TCR_EL2_PS_48BITS:
360
vmm_max_ipa_bits = 48;
361
break;
362
case TCR_EL2_PS_52BITS:
363
default:
364
vmm_max_ipa_bits = 52;
365
break;
366
}
367
368
/*
369
* Configure the Stage 2 translation control register:
370
*
371
* VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
372
* normal memory
373
* VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
374
* normal memory
375
* VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel
376
* VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
377
* VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
378
* shareable
379
*/
380
el2_regs.vtcr_el2 = VTCR_EL2_RES1;
381
el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
382
el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
383
el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels);
384
#if PAGE_SIZE == PAGE_SIZE_4K
385
el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K;
386
#elif PAGE_SIZE == PAGE_SIZE_16K
387
el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K;
388
#else
389
#error Unsupported page size
390
#endif
391
#ifdef SMP
392
el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS;
393
#endif
394
/*
395
* If FEAT_LPA2 is enabled in the host then we need to enable it here
396
* so the page tables created by pmap.c are correct. The meaning of
397
* the shareability field changes to become address bits when this
398
* is set.
399
*/
400
if ((READ_SPECIALREG(tcr_el1) & TCR_DS) != 0) {
401
el2_regs.vtcr_el2 |= VTCR_EL2_DS;
402
el2_regs.vtcr_el2 |=
403
min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_52BIT);
404
} else {
405
el2_regs.vtcr_el2 |=
406
min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
407
}
408
409
smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
410
411
if (!in_vhe()) {
412
/* Add memory to the vmem allocator (checking there is space) */
413
if (vmm_base > (L2_SIZE + PAGE_SIZE)) {
414
/*
415
* Ensure there is an L2 block before the vmm code to check
416
* for buffer overflows on earlier data. Include the PAGE_SIZE
417
* of the minimum we can allocate.
418
*/
419
vmm_base -= L2_SIZE + PAGE_SIZE;
420
vmm_base = rounddown2(vmm_base, L2_SIZE);
421
422
/*
423
* Check there is memory before the vmm code to add.
424
*
425
* Reserve the L2 block at address 0 so NULL dereference will
426
* raise an exception.
427
*/
428
if (vmm_base > L2_SIZE)
429
vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE,
430
M_WAITOK);
431
}
432
433
/*
434
* Add the memory after the stacks. There is most of an L2 block
435
* between the last stack and the first allocation so this should
436
* be safe without adding more padding.
437
*/
438
if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
439
vmem_add(el2_mem_alloc, next_hyp_va,
440
HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
441
}
442
443
vgic_init();
444
vtimer_init();
445
446
return (0);
447
}
448
449
int
450
vmmops_modcleanup(void)
451
{
452
int cpu;
453
454
if (!in_vhe()) {
455
smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
456
457
CPU_FOREACH(cpu) {
458
vmmpmap_remove(stack_hyp_va[cpu],
459
VMM_STACK_PAGES * PAGE_SIZE, false);
460
}
461
462
vmmpmap_remove(hyp_code_base, hyp_code_len, false);
463
}
464
465
vtimer_cleanup();
466
467
if (!in_vhe()) {
468
vmmpmap_fini();
469
470
CPU_FOREACH(cpu)
471
free(stack[cpu], M_HYP);
472
}
473
474
pmap_clean_stage2_tlbi = NULL;
475
pmap_stage2_invalidate_range = NULL;
476
pmap_stage2_invalidate_all = NULL;
477
478
return (0);
479
}
480
481
static vm_size_t
482
el2_hyp_size(struct vm *vm)
483
{
484
return (round_page(sizeof(struct hyp) +
485
sizeof(struct hypctx *) * vm_get_maxcpus(vm)));
486
}
487
488
static vm_size_t
489
el2_hypctx_size(void)
490
{
491
return (round_page(sizeof(struct hypctx)));
492
}
493
494
static vm_offset_t
495
el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot)
496
{
497
vmem_addr_t addr;
498
int err __diagused;
499
bool rv __diagused;
500
501
err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr);
502
MPASS(err == 0);
503
rv = vmmpmap_enter(addr, size, vtophys(data), prot);
504
MPASS(rv);
505
506
return (addr);
507
}
508
509
void *
510
vmmops_init(struct vm *vm, pmap_t pmap)
511
{
512
struct hyp *hyp;
513
vm_size_t size;
514
uint64_t idreg;
515
516
size = el2_hyp_size(vm);
517
hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
518
519
hyp->vm = vm;
520
hyp->vgic_attached = false;
521
522
get_kernel_reg(ID_AA64MMFR0_EL1, &idreg);
523
if (ID_AA64MMFR0_ECV_VAL(idreg) >= ID_AA64MMFR0_ECV_POFF)
524
hyp->feats |= HYP_FEAT_ECV_POFF;
525
526
switch (ID_AA64MMFR0_FGT_VAL(idreg)) {
527
case ID_AA64MMFR0_FGT_NONE:
528
break;
529
default:
530
case ID_AA64MMFR0_FGT_8_9:
531
hyp->feats |= HYP_FEAT_FGT2;
532
/* FALLTHROUGH */
533
case ID_AA64MMFR0_FGT_8_6:
534
hyp->feats |= HYP_FEAT_FGT;
535
break;
536
}
537
538
get_kernel_reg(ID_AA64MMFR1_EL1, &idreg);
539
if (ID_AA64MMFR1_HCX_VAL(idreg) >= ID_AA64MMFR1_HCX_IMPL)
540
hyp->feats |= HYP_FEAT_HCX;
541
542
vtimer_vminit(hyp);
543
vgic_vminit(hyp);
544
545
if (!in_vhe())
546
hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size,
547
VM_PROT_READ | VM_PROT_WRITE);
548
549
return (hyp);
550
}
551
552
void *
553
vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
554
{
555
struct hyp *hyp = vmi;
556
struct hypctx *hypctx;
557
vm_size_t size;
558
559
size = el2_hypctx_size();
560
hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
561
562
KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
563
("%s: Invalid vcpuid %d", __func__, vcpuid));
564
hyp->ctx[vcpuid] = hypctx;
565
566
hypctx->hyp = hyp;
567
hypctx->vcpu = vcpu1;
568
569
reset_vm_el01_regs(hypctx);
570
reset_vm_el2_regs(hypctx);
571
572
vtimer_cpuinit(hypctx);
573
vgic_cpuinit(hypctx);
574
575
if (!in_vhe())
576
hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size,
577
VM_PROT_READ | VM_PROT_WRITE);
578
579
return (hypctx);
580
}
581
582
static int
583
arm_vmm_pinit(pmap_t pmap)
584
{
585
586
pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels);
587
return (1);
588
}
589
590
struct vmspace *
591
vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
592
{
593
return (vmspace_alloc(min, max, arm_vmm_pinit));
594
}
595
596
void
597
vmmops_vmspace_free(struct vmspace *vmspace)
598
{
599
600
pmap_remove_pages(vmspace_pmap(vmspace));
601
vmspace_free(vmspace);
602
}
603
604
static inline void
605
arm64_print_hyp_regs(struct vm_exit *vme)
606
{
607
printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2);
608
printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
609
printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
610
printf("elr_el2: 0x%016lx\n", vme->pc);
611
}
612
613
static void
614
arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss,
615
struct vm_exit *vme_ret)
616
{
617
struct vm_guest_paging *paging;
618
struct vie *vie;
619
uint32_t esr_sas, reg_num;
620
621
/*
622
* Get the page address from HPFAR_EL2.
623
*/
624
vme_ret->u.inst_emul.gpa =
625
HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
626
/* Bits [11:0] are the same as bits [11:0] from the virtual address. */
627
vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 &
628
FAR_EL2_HPFAR_PAGE_MASK;
629
630
esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
631
reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
632
633
vie = &vme_ret->u.inst_emul.vie;
634
vie->access_size = 1 << esr_sas;
635
vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
636
vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
637
vie->reg = reg_num;
638
639
paging = &vme_ret->u.inst_emul.paging;
640
paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
641
paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
642
paging->tcr_el1 = hypctx->tcr_el1;
643
paging->tcr2_el1 = hypctx->tcr2_el1;
644
paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
645
if ((hypctx->sctlr_el1 & SCTLR_M) != 0)
646
paging->flags |= VM_GP_MMU_ENABLED;
647
}
648
649
static void
650
arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
651
{
652
uint32_t reg_num;
653
struct vre *vre;
654
655
/* u.hyp member will be replaced by u.reg_emul */
656
vre = &vme_ret->u.reg_emul.vre;
657
658
vre->inst_syndrome = esr_iss;
659
/* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
660
vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
661
reg_num = ISS_MSR_Rt(esr_iss);
662
vre->reg = reg_num;
663
}
664
665
void
666
raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc)
667
{
668
uint64_t esr;
669
670
if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t)
671
esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT;
672
else
673
esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT;
674
/* Set the bit that changes from insn -> data abort */
675
if (dabort)
676
esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT;
677
/* Set the IL bit if set by hardware */
678
esr |= hypctx->tf.tf_esr & ESR_ELx_IL;
679
680
vmmops_exception(hypctx, esr | fsc, far);
681
}
682
683
static int
684
handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret,
685
pmap_t pmap)
686
{
687
uint64_t gpa;
688
uint32_t esr_ec, esr_iss;
689
690
esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr);
691
esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK;
692
693
switch (esr_ec) {
694
case EXCP_UNKNOWN:
695
vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1);
696
arm64_print_hyp_regs(vme_ret);
697
vme_ret->exitcode = VM_EXITCODE_HYP;
698
break;
699
case EXCP_TRAP_WFI_WFE:
700
if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */
701
vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1);
702
vme_ret->exitcode = VM_EXITCODE_WFI;
703
} else {
704
vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1);
705
vme_ret->exitcode = VM_EXITCODE_HYP;
706
}
707
break;
708
case EXCP_HVC:
709
vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1);
710
vme_ret->exitcode = VM_EXITCODE_HVC;
711
break;
712
case EXCP_MSR:
713
vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1);
714
arm64_gen_reg_emul_data(esr_iss, vme_ret);
715
vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
716
break;
717
case EXCP_BRK:
718
vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1);
719
vme_ret->exitcode = VM_EXITCODE_BRK;
720
break;
721
case EXCP_SOFTSTP_EL0:
722
vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1);
723
vme_ret->exitcode = VM_EXITCODE_SS;
724
break;
725
case EXCP_INSN_ABORT_L:
726
case EXCP_DATA_ABORT_L:
727
vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ?
728
VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1);
729
switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
730
case ISS_DATA_DFSC_TF_L0:
731
case ISS_DATA_DFSC_TF_L1:
732
case ISS_DATA_DFSC_TF_L2:
733
case ISS_DATA_DFSC_TF_L3:
734
case ISS_DATA_DFSC_AFF_L1:
735
case ISS_DATA_DFSC_AFF_L2:
736
case ISS_DATA_DFSC_AFF_L3:
737
case ISS_DATA_DFSC_PF_L1:
738
case ISS_DATA_DFSC_PF_L2:
739
case ISS_DATA_DFSC_PF_L3:
740
gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
741
/* Check the IPA is valid */
742
if (gpa >= (1ul << vmm_max_ipa_bits)) {
743
raise_data_insn_abort(hypctx,
744
hypctx->exit_info.far_el2,
745
esr_ec == EXCP_DATA_ABORT_L,
746
ISS_DATA_DFSC_ASF_L0);
747
vme_ret->inst_length = 0;
748
return (HANDLED);
749
}
750
751
if (vm_mem_allocated(hypctx->vcpu, gpa)) {
752
vme_ret->exitcode = VM_EXITCODE_PAGING;
753
vme_ret->inst_length = 0;
754
vme_ret->u.paging.esr = hypctx->tf.tf_esr;
755
vme_ret->u.paging.gpa = gpa;
756
} else if (esr_ec == EXCP_INSN_ABORT_L) {
757
/*
758
* Raise an external abort. Device memory is
759
* not executable
760
*/
761
raise_data_insn_abort(hypctx,
762
hypctx->exit_info.far_el2, false,
763
ISS_DATA_DFSC_EXT);
764
vme_ret->inst_length = 0;
765
return (HANDLED);
766
} else {
767
arm64_gen_inst_emul_data(hypctx, esr_iss,
768
vme_ret);
769
vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
770
}
771
break;
772
default:
773
arm64_print_hyp_regs(vme_ret);
774
vme_ret->exitcode = VM_EXITCODE_HYP;
775
break;
776
}
777
778
break;
779
780
default:
781
vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1);
782
arm64_print_hyp_regs(vme_ret);
783
vme_ret->exitcode = VM_EXITCODE_HYP;
784
break;
785
}
786
787
/* We don't don't do any instruction emulation here */
788
return (UNHANDLED);
789
}
790
791
static int
792
arm64_handle_world_switch(struct hypctx *hypctx, int excp_type,
793
struct vm_exit *vme, pmap_t pmap)
794
{
795
int handled;
796
797
switch (excp_type) {
798
case EXCP_TYPE_EL1_SYNC:
799
/* The exit code will be set by handle_el1_sync_excp(). */
800
handled = handle_el1_sync_excp(hypctx, vme, pmap);
801
break;
802
803
case EXCP_TYPE_EL1_IRQ:
804
case EXCP_TYPE_EL1_FIQ:
805
/* The host kernel will handle IRQs and FIQs. */
806
vmm_stat_incr(hypctx->vcpu,
807
excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1);
808
vme->exitcode = VM_EXITCODE_BOGUS;
809
handled = UNHANDLED;
810
break;
811
812
case EXCP_TYPE_EL1_ERROR:
813
case EXCP_TYPE_EL2_SYNC:
814
case EXCP_TYPE_EL2_IRQ:
815
case EXCP_TYPE_EL2_FIQ:
816
case EXCP_TYPE_EL2_ERROR:
817
vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1);
818
vme->exitcode = VM_EXITCODE_BOGUS;
819
handled = UNHANDLED;
820
break;
821
822
default:
823
vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
824
vme->exitcode = VM_EXITCODE_BOGUS;
825
handled = UNHANDLED;
826
break;
827
}
828
829
return (handled);
830
}
831
832
static void
833
ptp_release(void **cookie)
834
{
835
if (*cookie != NULL) {
836
vm_gpa_release(*cookie);
837
*cookie = NULL;
838
}
839
}
840
841
static void *
842
ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
843
{
844
void *ptr;
845
846
ptp_release(cookie);
847
ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie);
848
return (ptr);
849
}
850
851
/* log2 of the number of bytes in a page table entry */
852
#define PTE_SHIFT 3
853
int
854
vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
855
int prot, uint64_t *gpa, int *is_fault)
856
{
857
struct hypctx *hypctx;
858
void *cookie;
859
uint64_t mask, *ptep, pte, pte_addr;
860
int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz;
861
bool is_el0;
862
863
/* Check if the MMU is off */
864
if ((paging->flags & VM_GP_MMU_ENABLED) == 0) {
865
*is_fault = 0;
866
*gpa = gla;
867
return (0);
868
}
869
870
is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t;
871
872
if (ADDR_IS_KERNEL(gla)) {
873
/* If address translation is disabled raise an exception */
874
if ((paging->tcr_el1 & TCR_EPD1) != 0) {
875
*is_fault = 1;
876
return (0);
877
}
878
if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) {
879
*is_fault = 1;
880
return (0);
881
}
882
pte_addr = paging->ttbr1_addr;
883
tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT;
884
/* Clear the top byte if TBI is on */
885
if ((paging->tcr_el1 & TCR_TBI1) != 0)
886
gla |= (0xfful << 56);
887
switch (paging->tcr_el1 & TCR_TG1_MASK) {
888
case TCR_TG1_4K:
889
granule_shift = PAGE_SHIFT_4K;
890
break;
891
case TCR_TG1_16K:
892
granule_shift = PAGE_SHIFT_16K;
893
break;
894
case TCR_TG1_64K:
895
granule_shift = PAGE_SHIFT_64K;
896
break;
897
default:
898
*is_fault = 1;
899
return (EINVAL);
900
}
901
} else {
902
/* If address translation is disabled raise an exception */
903
if ((paging->tcr_el1 & TCR_EPD0) != 0) {
904
*is_fault = 1;
905
return (0);
906
}
907
if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) {
908
*is_fault = 1;
909
return (0);
910
}
911
pte_addr = paging->ttbr0_addr;
912
tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT;
913
/* Clear the top byte if TBI is on */
914
if ((paging->tcr_el1 & TCR_TBI0) != 0)
915
gla &= ~(0xfful << 56);
916
switch (paging->tcr_el1 & TCR_TG0_MASK) {
917
case TCR_TG0_4K:
918
granule_shift = PAGE_SHIFT_4K;
919
break;
920
case TCR_TG0_16K:
921
granule_shift = PAGE_SHIFT_16K;
922
break;
923
case TCR_TG0_64K:
924
granule_shift = PAGE_SHIFT_64K;
925
break;
926
default:
927
*is_fault = 1;
928
return (EINVAL);
929
}
930
}
931
932
/*
933
* TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2
934
* for larger values.
935
*/
936
switch (granule_shift) {
937
case PAGE_SHIFT_4K:
938
case PAGE_SHIFT_16K:
939
/*
940
* See "Table D8-11 4KB granule, determining stage 1 initial
941
* lookup level" and "Table D8-21 16KB granule, determining
942
* stage 1 initial lookup level" from the "Arm Architecture
943
* Reference Manual for A-Profile architecture" revision I.a
944
* for the minimum and maximum values.
945
*
946
* TODO: Support less than 16 when FEAT_LPA2 is implemented
947
* and TCR_EL1.DS == 1
948
* TODO: Support more than 39 when FEAT_TTST is implemented
949
*/
950
if (tsz < 16 || tsz > 39) {
951
*is_fault = 1;
952
return (EINVAL);
953
}
954
break;
955
case PAGE_SHIFT_64K:
956
/* TODO: Support 64k granule. It will probably work, but is untested */
957
default:
958
*is_fault = 1;
959
return (EINVAL);
960
}
961
962
/*
963
* Calculate the input address bits. These are 64 bit in an address
964
* with the top tsz bits being all 0 or all 1.
965
*/
966
ia_bits = 64 - tsz;
967
968
/*
969
* Calculate the number of address bits used in the page table
970
* calculation. This is ia_bits minus the bottom granule_shift
971
* bits that are passed to the output address.
972
*/
973
address_bits = ia_bits - granule_shift;
974
975
/*
976
* Calculate the number of levels. Each level uses
977
* granule_shift - PTE_SHIFT bits of the input address.
978
* This is because the table is 1 << granule_shift and each
979
* entry is 1 << PTE_SHIFT bytes.
980
*/
981
levels = howmany(address_bits, granule_shift - PTE_SHIFT);
982
983
/* Mask of the upper unused bits in the virtual address */
984
gla &= (1ul << ia_bits) - 1;
985
hypctx = (struct hypctx *)vcpui;
986
cookie = NULL;
987
/* TODO: Check if the level supports block descriptors */
988
for (;levels > 0; levels--) {
989
int idx;
990
991
pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) +
992
granule_shift;
993
idx = (gla >> pte_shift) &
994
((1ul << (granule_shift - PTE_SHIFT)) - 1);
995
while (idx > PAGE_SIZE / sizeof(pte)) {
996
idx -= PAGE_SIZE / sizeof(pte);
997
pte_addr += PAGE_SIZE;
998
}
999
1000
ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie);
1001
if (ptep == NULL)
1002
goto error;
1003
pte = ptep[idx];
1004
1005
/* Calculate the level we are looking at */
1006
switch (levels) {
1007
default:
1008
goto fault;
1009
/* TODO: Level -1 when FEAT_LPA2 is implemented */
1010
case 4: /* Level 0 */
1011
if ((pte & ATTR_DESCR_MASK) != L0_TABLE)
1012
goto fault;
1013
/* FALLTHROUGH */
1014
case 3: /* Level 1 */
1015
case 2: /* Level 2 */
1016
switch (pte & ATTR_DESCR_MASK) {
1017
/* Use L1 macro as all levels are the same */
1018
case L1_TABLE:
1019
/* Check if EL0 can access this address space */
1020
if (is_el0 &&
1021
(pte & TATTR_AP_TABLE_NO_EL0) != 0)
1022
goto fault;
1023
/* Check if the address space is writable */
1024
if ((prot & PROT_WRITE) != 0 &&
1025
(pte & TATTR_AP_TABLE_RO) != 0)
1026
goto fault;
1027
if ((prot & PROT_EXEC) != 0) {
1028
/* Check the table exec attribute */
1029
if ((is_el0 &&
1030
(pte & TATTR_UXN_TABLE) != 0) ||
1031
(!is_el0 &&
1032
(pte & TATTR_PXN_TABLE) != 0))
1033
goto fault;
1034
}
1035
pte_addr = pte & ~ATTR_MASK;
1036
break;
1037
case L1_BLOCK:
1038
goto done;
1039
default:
1040
goto fault;
1041
}
1042
break;
1043
case 1: /* Level 3 */
1044
if ((pte & ATTR_DESCR_MASK) == L3_PAGE)
1045
goto done;
1046
goto fault;
1047
}
1048
}
1049
1050
done:
1051
/* Check if EL0 has access to the block/page */
1052
if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0)
1053
goto fault;
1054
if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0)
1055
goto fault;
1056
if ((prot & PROT_EXEC) != 0) {
1057
if ((is_el0 && (pte & ATTR_S1_UXN) != 0) ||
1058
(!is_el0 && (pte & ATTR_S1_PXN) != 0))
1059
goto fault;
1060
}
1061
mask = (1ul << pte_shift) - 1;
1062
*gpa = (pte & ~ATTR_MASK) | (gla & mask);
1063
*is_fault = 0;
1064
ptp_release(&cookie);
1065
return (0);
1066
1067
error:
1068
ptp_release(&cookie);
1069
return (EFAULT);
1070
fault:
1071
*is_fault = 1;
1072
ptp_release(&cookie);
1073
return (0);
1074
}
1075
1076
int
1077
vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
1078
{
1079
uint64_t excp_type;
1080
int handled;
1081
register_t daif;
1082
struct hyp *hyp;
1083
struct hypctx *hypctx;
1084
struct vcpu *vcpu;
1085
struct vm_exit *vme;
1086
int mode;
1087
1088
hypctx = (struct hypctx *)vcpui;
1089
hyp = hypctx->hyp;
1090
vcpu = hypctx->vcpu;
1091
vme = vm_exitinfo(vcpu);
1092
1093
hypctx->tf.tf_elr = (uint64_t)pc;
1094
1095
for (;;) {
1096
if (hypctx->has_exception) {
1097
hypctx->has_exception = false;
1098
hypctx->elr_el1 = hypctx->tf.tf_elr;
1099
1100
mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
1101
1102
if (mode == PSR_M_EL1t) {
1103
hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0;
1104
} else if (mode == PSR_M_EL1h) {
1105
hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200;
1106
} else if ((mode & PSR_M_32) == PSR_M_64) {
1107
/* 64-bit EL0 */
1108
hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400;
1109
} else {
1110
/* 32-bit EL0 */
1111
hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600;
1112
}
1113
1114
/* Set the new spsr */
1115
hypctx->spsr_el1 = hypctx->tf.tf_spsr;
1116
1117
/* Set the new cpsr */
1118
hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS;
1119
hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h;
1120
1121
/*
1122
* Update fields that may change on exeption entry
1123
* based on how sctlr_el1 is configured.
1124
*/
1125
if ((hypctx->sctlr_el1 & SCTLR_SPAN) == 0)
1126
hypctx->tf.tf_spsr |= PSR_PAN;
1127
if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0)
1128
hypctx->tf.tf_spsr &= ~PSR_SSBS;
1129
else
1130
hypctx->tf.tf_spsr |= PSR_SSBS;
1131
}
1132
1133
daif = intr_disable();
1134
1135
/* Check if the vcpu is suspended */
1136
if (vcpu_suspended(evinfo)) {
1137
intr_restore(daif);
1138
vm_exit_suspended(vcpu, pc);
1139
break;
1140
}
1141
1142
if (vcpu_debugged(vcpu)) {
1143
intr_restore(daif);
1144
vm_exit_debug(vcpu, pc);
1145
break;
1146
}
1147
1148
/* Activate the stage2 pmap so the vmid is valid */
1149
pmap_activate_vm(pmap);
1150
hyp->vttbr_el2 = pmap_to_ttbr0(pmap);
1151
1152
/*
1153
* TODO: What happens if a timer interrupt is asserted exactly
1154
* here, but for the previous VM?
1155
*/
1156
arm64_set_active_vcpu(hypctx);
1157
vgic_flush_hwstate(hypctx);
1158
1159
/* Call into EL2 to switch to the guest */
1160
excp_type = vmm_enter_guest(hyp, hypctx);
1161
1162
vgic_sync_hwstate(hypctx);
1163
vtimer_sync_hwstate(hypctx);
1164
1165
/*
1166
* Deactivate the stage2 pmap.
1167
*/
1168
PCPU_SET(curvmpmap, NULL);
1169
intr_restore(daif);
1170
1171
vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
1172
if (excp_type == EXCP_TYPE_MAINT_IRQ)
1173
continue;
1174
1175
vme->pc = hypctx->tf.tf_elr;
1176
vme->inst_length = INSN_SIZE;
1177
vme->u.hyp.exception_nr = excp_type;
1178
vme->u.hyp.esr_el2 = hypctx->tf.tf_esr;
1179
vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
1180
vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
1181
1182
handled = arm64_handle_world_switch(hypctx, excp_type, vme,
1183
pmap);
1184
if (handled == UNHANDLED)
1185
/* Exit loop to emulate instruction. */
1186
break;
1187
else
1188
/* Resume guest execution from the next instruction. */
1189
hypctx->tf.tf_elr += vme->inst_length;
1190
}
1191
1192
return (0);
1193
}
1194
1195
static void
1196
arm_pcpu_vmcleanup(void *arg)
1197
{
1198
struct hyp *hyp;
1199
int i, maxcpus;
1200
1201
hyp = arg;
1202
maxcpus = vm_get_maxcpus(hyp->vm);
1203
for (i = 0; i < maxcpus; i++) {
1204
if (arm64_get_active_vcpu() == hyp->ctx[i]) {
1205
arm64_set_active_vcpu(NULL);
1206
break;
1207
}
1208
}
1209
}
1210
1211
void
1212
vmmops_vcpu_cleanup(void *vcpui)
1213
{
1214
struct hypctx *hypctx = vcpui;
1215
1216
vtimer_cpucleanup(hypctx);
1217
vgic_cpucleanup(hypctx);
1218
1219
if (!in_vhe())
1220
vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true);
1221
1222
free(hypctx, M_HYP);
1223
}
1224
1225
void
1226
vmmops_cleanup(void *vmi)
1227
{
1228
struct hyp *hyp = vmi;
1229
1230
vtimer_vmcleanup(hyp);
1231
vgic_vmcleanup(hyp);
1232
1233
smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
1234
1235
if (!in_vhe())
1236
vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true);
1237
1238
free(hyp, M_HYP);
1239
}
1240
1241
/*
1242
* Return register value. Registers have different sizes and an explicit cast
1243
* must be made to ensure proper conversion.
1244
*/
1245
static uint64_t *
1246
hypctx_regptr(struct hypctx *hypctx, int reg)
1247
{
1248
switch (reg) {
1249
case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29:
1250
return (&hypctx->tf.tf_x[reg]);
1251
case VM_REG_GUEST_LR:
1252
return (&hypctx->tf.tf_lr);
1253
case VM_REG_GUEST_SP:
1254
return (&hypctx->tf.tf_sp);
1255
case VM_REG_GUEST_CPSR:
1256
return (&hypctx->tf.tf_spsr);
1257
case VM_REG_GUEST_PC:
1258
return (&hypctx->tf.tf_elr);
1259
case VM_REG_GUEST_SCTLR_EL1:
1260
return (&hypctx->sctlr_el1);
1261
case VM_REG_GUEST_TTBR0_EL1:
1262
return (&hypctx->ttbr0_el1);
1263
case VM_REG_GUEST_TTBR1_EL1:
1264
return (&hypctx->ttbr1_el1);
1265
case VM_REG_GUEST_TCR_EL1:
1266
return (&hypctx->tcr_el1);
1267
case VM_REG_GUEST_TCR2_EL1:
1268
return (&hypctx->tcr2_el1);
1269
case VM_REG_GUEST_MPIDR_EL1:
1270
return (&hypctx->vmpidr_el2);
1271
default:
1272
break;
1273
}
1274
return (NULL);
1275
}
1276
1277
int
1278
vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
1279
{
1280
uint64_t *regp;
1281
int running, hostcpu;
1282
struct hypctx *hypctx = vcpui;
1283
1284
running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1285
if (running && hostcpu != curcpu)
1286
panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm),
1287
vcpu_vcpuid(hypctx->vcpu));
1288
1289
regp = hypctx_regptr(hypctx, reg);
1290
if (regp == NULL)
1291
return (EINVAL);
1292
1293
*retval = *regp;
1294
return (0);
1295
}
1296
1297
int
1298
vmmops_setreg(void *vcpui, int reg, uint64_t val)
1299
{
1300
uint64_t *regp;
1301
struct hypctx *hypctx = vcpui;
1302
int running, hostcpu;
1303
1304
running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1305
if (running && hostcpu != curcpu)
1306
panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm),
1307
vcpu_vcpuid(hypctx->vcpu));
1308
1309
regp = hypctx_regptr(hypctx, reg);
1310
if (regp == NULL)
1311
return (EINVAL);
1312
1313
*regp = val;
1314
return (0);
1315
}
1316
1317
int
1318
vmmops_exception(void *vcpui, uint64_t esr, uint64_t far)
1319
{
1320
struct hypctx *hypctx = vcpui;
1321
int running, hostcpu;
1322
1323
running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1324
if (running && hostcpu != curcpu)
1325
panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
1326
vcpu_vcpuid(hypctx->vcpu));
1327
1328
hypctx->far_el1 = far;
1329
hypctx->esr_el1 = esr;
1330
hypctx->has_exception = true;
1331
1332
return (0);
1333
}
1334
1335
int
1336
vmmops_getcap(void *vcpui, int num, int *retval)
1337
{
1338
struct hypctx *hypctx = vcpui;
1339
int ret;
1340
1341
ret = ENOENT;
1342
1343
switch (num) {
1344
case VM_CAP_UNRESTRICTED_GUEST:
1345
*retval = 1;
1346
ret = 0;
1347
break;
1348
case VM_CAP_BRK_EXIT:
1349
case VM_CAP_SS_EXIT:
1350
case VM_CAP_MASK_HWINTR:
1351
*retval = (hypctx->setcaps & (1ul << num)) != 0;
1352
break;
1353
default:
1354
break;
1355
}
1356
1357
return (ret);
1358
}
1359
1360
int
1361
vmmops_setcap(void *vcpui, int num, int val)
1362
{
1363
struct hypctx *hypctx = vcpui;
1364
int ret;
1365
1366
ret = 0;
1367
1368
switch (num) {
1369
case VM_CAP_BRK_EXIT:
1370
if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1371
break;
1372
if (val != 0)
1373
hypctx->mdcr_el2 |= MDCR_EL2_TDE;
1374
else if ((hypctx->setcaps & (1ul << VM_CAP_SS_EXIT)) == 0)
1375
hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
1376
break;
1377
case VM_CAP_SS_EXIT:
1378
if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1379
break;
1380
1381
if (val != 0) {
1382
hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS);
1383
hypctx->debug_mdscr |= (hypctx->mdscr_el1 & MDSCR_SS);
1384
1385
hypctx->tf.tf_spsr |= PSR_SS;
1386
hypctx->mdscr_el1 |= MDSCR_SS;
1387
hypctx->mdcr_el2 |= MDCR_EL2_TDE;
1388
} else {
1389
hypctx->tf.tf_spsr &= ~PSR_SS;
1390
hypctx->tf.tf_spsr |= hypctx->debug_spsr;
1391
hypctx->debug_spsr &= ~PSR_SS;
1392
hypctx->mdscr_el1 &= ~MDSCR_SS;
1393
hypctx->mdscr_el1 |= hypctx->debug_mdscr;
1394
hypctx->debug_mdscr &= ~MDSCR_SS;
1395
if ((hypctx->setcaps & (1ul << VM_CAP_BRK_EXIT)) == 0)
1396
hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
1397
}
1398
break;
1399
case VM_CAP_MASK_HWINTR:
1400
if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1401
break;
1402
1403
if (val != 0) {
1404
hypctx->debug_spsr |= (hypctx->tf.tf_spsr &
1405
(PSR_I | PSR_F));
1406
hypctx->tf.tf_spsr |= PSR_I | PSR_F;
1407
} else {
1408
hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F);
1409
hypctx->tf.tf_spsr |= (hypctx->debug_spsr &
1410
(PSR_I | PSR_F));
1411
hypctx->debug_spsr &= ~(PSR_I | PSR_F);
1412
}
1413
break;
1414
default:
1415
ret = ENOENT;
1416
break;
1417
}
1418
1419
if (ret == 0) {
1420
if (val == 0)
1421
hypctx->setcaps &= ~(1ul << num);
1422
else
1423
hypctx->setcaps |= (1ul << num);
1424
}
1425
1426
return (ret);
1427
}
1428
1429