Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/arm64/vmm/vmm_arm64.c
39478 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (C) 2015 Mihai Carabas <[email protected]>
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/cdefs.h>
30
#include <sys/param.h>
31
#include <sys/systm.h>
32
#include <sys/smp.h>
33
#include <sys/kernel.h>
34
#include <sys/malloc.h>
35
#include <sys/mman.h>
36
#include <sys/pcpu.h>
37
#include <sys/proc.h>
38
#include <sys/sysctl.h>
39
#include <sys/lock.h>
40
#include <sys/mutex.h>
41
#include <sys/vmem.h>
42
43
#include <vm/vm.h>
44
#include <vm/pmap.h>
45
#include <vm/vm_extern.h>
46
#include <vm/vm_map.h>
47
#include <vm/vm_page.h>
48
#include <vm/vm_param.h>
49
50
#include <machine/armreg.h>
51
#include <machine/vm.h>
52
#include <machine/cpufunc.h>
53
#include <machine/cpu.h>
54
#include <machine/machdep.h>
55
#include <machine/vmm.h>
56
#include <machine/vmm_dev.h>
57
#include <machine/atomic.h>
58
#include <machine/hypervisor.h>
59
#include <machine/pmap.h>
60
61
#include <dev/vmm/vmm_mem.h>
62
63
#include "mmu.h"
64
#include "arm64.h"
65
#include "hyp.h"
66
#include "reset.h"
67
#include "io/vgic.h"
68
#include "io/vgic_v3.h"
69
#include "io/vtimer.h"
70
#include "vmm_handlers.h"
71
#include "vmm_stat.h"
72
73
#define HANDLED 1
74
#define UNHANDLED 0
75
76
/* Number of bits in an EL2 virtual address */
77
#define EL2_VIRT_BITS 48
78
CTASSERT((1ul << EL2_VIRT_BITS) >= HYP_VM_MAX_ADDRESS);
79
80
/* TODO: Move the host hypctx off the stack */
81
#define VMM_STACK_PAGES 4
82
#define VMM_STACK_SIZE (VMM_STACK_PAGES * PAGE_SIZE)
83
84
static int vmm_pmap_levels, vmm_virt_bits, vmm_max_ipa_bits;
85
86
/* Register values passed to arm_setup_vectors to set in the hypervisor */
87
struct vmm_init_regs {
88
uint64_t tcr_el2;
89
uint64_t vtcr_el2;
90
};
91
92
MALLOC_DEFINE(M_HYP, "ARM VMM HYP", "ARM VMM HYP");
93
94
extern char hyp_init_vectors[];
95
extern char hyp_vectors[];
96
extern char hyp_stub_vectors[];
97
98
static vm_paddr_t hyp_code_base;
99
static size_t hyp_code_len;
100
101
static char *stack[MAXCPU];
102
static vm_offset_t stack_hyp_va[MAXCPU];
103
104
static vmem_t *el2_mem_alloc;
105
106
static void arm_setup_vectors(void *arg);
107
108
DPCPU_DEFINE_STATIC(struct hypctx *, vcpu);
109
110
static inline void
111
arm64_set_active_vcpu(struct hypctx *hypctx)
112
{
113
DPCPU_SET(vcpu, hypctx);
114
}
115
116
struct hypctx *
117
arm64_get_active_vcpu(void)
118
{
119
return (DPCPU_GET(vcpu));
120
}
121
122
static void
123
arm_setup_vectors(void *arg)
124
{
125
struct vmm_init_regs *el2_regs;
126
uintptr_t stack_top;
127
uint32_t sctlr_el2;
128
register_t daif;
129
130
el2_regs = arg;
131
arm64_set_active_vcpu(NULL);
132
133
/*
134
* Configure the system control register for EL2:
135
*
136
* SCTLR_EL2_M: MMU on
137
* SCTLR_EL2_C: Data cacheability not affected
138
* SCTLR_EL2_I: Instruction cacheability not affected
139
* SCTLR_EL2_A: Instruction alignment check
140
* SCTLR_EL2_SA: Stack pointer alignment check
141
* SCTLR_EL2_WXN: Treat writable memory as execute never
142
* ~SCTLR_EL2_EE: Data accesses are little-endian
143
*/
144
sctlr_el2 = SCTLR_EL2_RES1;
145
sctlr_el2 |= SCTLR_EL2_M | SCTLR_EL2_C | SCTLR_EL2_I;
146
sctlr_el2 |= SCTLR_EL2_A | SCTLR_EL2_SA;
147
sctlr_el2 |= SCTLR_EL2_WXN;
148
sctlr_el2 &= ~SCTLR_EL2_EE;
149
150
daif = intr_disable();
151
152
if (in_vhe()) {
153
WRITE_SPECIALREG(vtcr_el2, el2_regs->vtcr_el2);
154
} else {
155
/*
156
* Install the temporary vectors which will be responsible for
157
* initializing the VMM when we next trap into EL2.
158
*
159
* x0: the exception vector table responsible for hypervisor
160
* initialization on the next call.
161
*/
162
vmm_call_hyp(vtophys(&vmm_hyp_code));
163
164
/* Create and map the hypervisor stack */
165
stack_top = stack_hyp_va[PCPU_GET(cpuid)] + VMM_STACK_SIZE;
166
167
/* Special call to initialize EL2 */
168
vmm_call_hyp(vmmpmap_to_ttbr0(), stack_top, el2_regs->tcr_el2,
169
sctlr_el2, el2_regs->vtcr_el2);
170
}
171
172
intr_restore(daif);
173
}
174
175
static void
176
arm_teardown_vectors(void *arg)
177
{
178
register_t daif;
179
180
/*
181
* vmm_cleanup() will disable the MMU. For the next few instructions,
182
* before the hardware disables the MMU, one of the following is
183
* possible:
184
*
185
* a. The instruction addresses are fetched with the MMU disabled,
186
* and they must represent the actual physical addresses. This will work
187
* because we call the vmm_cleanup() function by its physical address.
188
*
189
* b. The instruction addresses are fetched using the old translation
190
* tables. This will work because we have an identity mapping in place
191
* in the translation tables and vmm_cleanup() is called by its physical
192
* address.
193
*/
194
daif = intr_disable();
195
/* TODO: Invalidate the cache */
196
vmm_call_hyp(HYP_CLEANUP, vtophys(hyp_stub_vectors));
197
intr_restore(daif);
198
199
arm64_set_active_vcpu(NULL);
200
}
201
202
static uint64_t
203
vmm_vtcr_el2_sl(u_int levels)
204
{
205
#if PAGE_SIZE == PAGE_SIZE_4K
206
switch (levels) {
207
case 2:
208
return (VTCR_EL2_SL0_4K_LVL2);
209
case 3:
210
return (VTCR_EL2_SL0_4K_LVL1);
211
case 4:
212
return (VTCR_EL2_SL0_4K_LVL0);
213
default:
214
panic("%s: Invalid number of page table levels %u", __func__,
215
levels);
216
}
217
#elif PAGE_SIZE == PAGE_SIZE_16K
218
switch (levels) {
219
case 2:
220
return (VTCR_EL2_SL0_16K_LVL2);
221
case 3:
222
return (VTCR_EL2_SL0_16K_LVL1);
223
case 4:
224
return (VTCR_EL2_SL0_16K_LVL0);
225
default:
226
panic("%s: Invalid number of page table levels %u", __func__,
227
levels);
228
}
229
#else
230
#error Unsupported page size
231
#endif
232
}
233
234
int
235
vmmops_modinit(int ipinum)
236
{
237
struct vmm_init_regs el2_regs;
238
vm_offset_t next_hyp_va;
239
vm_paddr_t vmm_base;
240
uint64_t id_aa64mmfr0_el1, pa_range_bits, pa_range_field;
241
int cpu, i;
242
bool rv __diagused;
243
244
if (!has_hyp()) {
245
printf(
246
"vmm: Processor doesn't have support for virtualization\n");
247
return (ENXIO);
248
}
249
250
if (!vgic_present()) {
251
printf("vmm: No vgic found\n");
252
return (ENODEV);
253
}
254
255
if (!get_kernel_reg(ID_AA64MMFR0_EL1, &id_aa64mmfr0_el1)) {
256
printf("vmm: Unable to read ID_AA64MMFR0_EL1\n");
257
return (ENXIO);
258
}
259
pa_range_field = ID_AA64MMFR0_PARange_VAL(id_aa64mmfr0_el1);
260
/*
261
* Use 3 levels to give us up to 39 bits with 4k pages, or
262
* 47 bits with 16k pages.
263
*/
264
/* TODO: Check the number of levels for 64k pages */
265
vmm_pmap_levels = 3;
266
switch (pa_range_field) {
267
case ID_AA64MMFR0_PARange_4G:
268
printf("vmm: Not enough physical address bits\n");
269
return (ENXIO);
270
case ID_AA64MMFR0_PARange_64G:
271
vmm_virt_bits = 36;
272
#if PAGE_SIZE == PAGE_SIZE_16K
273
vmm_pmap_levels = 2;
274
#endif
275
break;
276
default:
277
vmm_virt_bits = 39;
278
break;
279
}
280
pa_range_bits = pa_range_field >> ID_AA64MMFR0_PARange_SHIFT;
281
282
if (!in_vhe()) {
283
/* Initialise the EL2 MMU */
284
if (!vmmpmap_init()) {
285
printf("vmm: Failed to init the EL2 MMU\n");
286
return (ENOMEM);
287
}
288
}
289
290
/* Set up the stage 2 pmap callbacks */
291
MPASS(pmap_clean_stage2_tlbi == NULL);
292
pmap_clean_stage2_tlbi = vmm_clean_s2_tlbi;
293
pmap_stage2_invalidate_range = vmm_s2_tlbi_range;
294
pmap_stage2_invalidate_all = vmm_s2_tlbi_all;
295
296
if (!in_vhe()) {
297
/*
298
* Create an allocator for the virtual address space used by
299
* EL2. EL2 code is identity-mapped; the allocator is used to
300
* find space for VM structures.
301
*/
302
el2_mem_alloc = vmem_create("VMM EL2", 0, 0, PAGE_SIZE, 0,
303
M_WAITOK);
304
305
/* Create the mappings for the hypervisor translation table. */
306
hyp_code_len = round_page(&vmm_hyp_code_end - &vmm_hyp_code);
307
308
/* We need an physical identity mapping for when we activate the MMU */
309
hyp_code_base = vmm_base = vtophys(&vmm_hyp_code);
310
rv = vmmpmap_enter(vmm_base, hyp_code_len, vmm_base,
311
VM_PROT_READ | VM_PROT_EXECUTE);
312
MPASS(rv);
313
314
next_hyp_va = roundup2(vmm_base + hyp_code_len, L2_SIZE);
315
316
/* Create a per-CPU hypervisor stack */
317
CPU_FOREACH(cpu) {
318
stack[cpu] = malloc(VMM_STACK_SIZE, M_HYP, M_WAITOK | M_ZERO);
319
stack_hyp_va[cpu] = next_hyp_va;
320
321
for (i = 0; i < VMM_STACK_PAGES; i++) {
322
rv = vmmpmap_enter(stack_hyp_va[cpu] + ptoa(i),
323
PAGE_SIZE, vtophys(stack[cpu] + ptoa(i)),
324
VM_PROT_READ | VM_PROT_WRITE);
325
MPASS(rv);
326
}
327
next_hyp_va += L2_SIZE;
328
}
329
330
el2_regs.tcr_el2 = TCR_EL2_RES1;
331
el2_regs.tcr_el2 |= min(pa_range_bits << TCR_EL2_PS_SHIFT,
332
TCR_EL2_PS_52BITS);
333
el2_regs.tcr_el2 |= TCR_EL2_T0SZ(64 - EL2_VIRT_BITS);
334
el2_regs.tcr_el2 |= TCR_EL2_IRGN0_WBWA | TCR_EL2_ORGN0_WBWA;
335
#if PAGE_SIZE == PAGE_SIZE_4K
336
el2_regs.tcr_el2 |= TCR_EL2_TG0_4K;
337
#elif PAGE_SIZE == PAGE_SIZE_16K
338
el2_regs.tcr_el2 |= TCR_EL2_TG0_16K;
339
#else
340
#error Unsupported page size
341
#endif
342
#ifdef SMP
343
el2_regs.tcr_el2 |= TCR_EL2_SH0_IS;
344
#endif
345
}
346
347
switch (pa_range_bits << TCR_EL2_PS_SHIFT) {
348
case TCR_EL2_PS_32BITS:
349
vmm_max_ipa_bits = 32;
350
break;
351
case TCR_EL2_PS_36BITS:
352
vmm_max_ipa_bits = 36;
353
break;
354
case TCR_EL2_PS_40BITS:
355
vmm_max_ipa_bits = 40;
356
break;
357
case TCR_EL2_PS_42BITS:
358
vmm_max_ipa_bits = 42;
359
break;
360
case TCR_EL2_PS_44BITS:
361
vmm_max_ipa_bits = 44;
362
break;
363
case TCR_EL2_PS_48BITS:
364
vmm_max_ipa_bits = 48;
365
break;
366
case TCR_EL2_PS_52BITS:
367
default:
368
vmm_max_ipa_bits = 52;
369
break;
370
}
371
372
/*
373
* Configure the Stage 2 translation control register:
374
*
375
* VTCR_IRGN0_WBWA: Translation table walks access inner cacheable
376
* normal memory
377
* VTCR_ORGN0_WBWA: Translation table walks access outer cacheable
378
* normal memory
379
* VTCR_EL2_TG0_4K/16K: Stage 2 uses the same page size as the kernel
380
* VTCR_EL2_SL0_4K_LVL1: Stage 2 uses concatenated level 1 tables
381
* VTCR_EL2_SH0_IS: Memory associated with Stage 2 walks is inner
382
* shareable
383
*/
384
el2_regs.vtcr_el2 = VTCR_EL2_RES1;
385
el2_regs.vtcr_el2 |= VTCR_EL2_IRGN0_WBWA | VTCR_EL2_ORGN0_WBWA;
386
el2_regs.vtcr_el2 |= VTCR_EL2_T0SZ(64 - vmm_virt_bits);
387
el2_regs.vtcr_el2 |= vmm_vtcr_el2_sl(vmm_pmap_levels);
388
#if PAGE_SIZE == PAGE_SIZE_4K
389
el2_regs.vtcr_el2 |= VTCR_EL2_TG0_4K;
390
#elif PAGE_SIZE == PAGE_SIZE_16K
391
el2_regs.vtcr_el2 |= VTCR_EL2_TG0_16K;
392
#else
393
#error Unsupported page size
394
#endif
395
#ifdef SMP
396
el2_regs.vtcr_el2 |= VTCR_EL2_SH0_IS;
397
#endif
398
/*
399
* If FEAT_LPA2 is enabled in the host then we need to enable it here
400
* so the page tables created by pmap.c are correct. The meaning of
401
* the shareability field changes to become address bits when this
402
* is set.
403
*/
404
if ((READ_SPECIALREG(tcr_el1) & TCR_DS) != 0) {
405
el2_regs.vtcr_el2 |= VTCR_EL2_DS;
406
el2_regs.vtcr_el2 |=
407
min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_52BIT);
408
} else {
409
el2_regs.vtcr_el2 |=
410
min(pa_range_bits << VTCR_EL2_PS_SHIFT, VTCR_EL2_PS_48BIT);
411
}
412
413
smp_rendezvous(NULL, arm_setup_vectors, NULL, &el2_regs);
414
415
if (!in_vhe()) {
416
/* Add memory to the vmem allocator (checking there is space) */
417
if (vmm_base > (L2_SIZE + PAGE_SIZE)) {
418
/*
419
* Ensure there is an L2 block before the vmm code to check
420
* for buffer overflows on earlier data. Include the PAGE_SIZE
421
* of the minimum we can allocate.
422
*/
423
vmm_base -= L2_SIZE + PAGE_SIZE;
424
vmm_base = rounddown2(vmm_base, L2_SIZE);
425
426
/*
427
* Check there is memory before the vmm code to add.
428
*
429
* Reserve the L2 block at address 0 so NULL dereference will
430
* raise an exception.
431
*/
432
if (vmm_base > L2_SIZE)
433
vmem_add(el2_mem_alloc, L2_SIZE, vmm_base - L2_SIZE,
434
M_WAITOK);
435
}
436
437
/*
438
* Add the memory after the stacks. There is most of an L2 block
439
* between the last stack and the first allocation so this should
440
* be safe without adding more padding.
441
*/
442
if (next_hyp_va < HYP_VM_MAX_ADDRESS - PAGE_SIZE)
443
vmem_add(el2_mem_alloc, next_hyp_va,
444
HYP_VM_MAX_ADDRESS - next_hyp_va, M_WAITOK);
445
}
446
447
vgic_init();
448
vtimer_init();
449
450
return (0);
451
}
452
453
int
454
vmmops_modcleanup(void)
455
{
456
int cpu;
457
458
if (!in_vhe()) {
459
smp_rendezvous(NULL, arm_teardown_vectors, NULL, NULL);
460
461
CPU_FOREACH(cpu) {
462
vmmpmap_remove(stack_hyp_va[cpu],
463
VMM_STACK_PAGES * PAGE_SIZE, false);
464
}
465
466
vmmpmap_remove(hyp_code_base, hyp_code_len, false);
467
}
468
469
vtimer_cleanup();
470
471
if (!in_vhe()) {
472
vmmpmap_fini();
473
474
CPU_FOREACH(cpu)
475
free(stack[cpu], M_HYP);
476
}
477
478
pmap_clean_stage2_tlbi = NULL;
479
pmap_stage2_invalidate_range = NULL;
480
pmap_stage2_invalidate_all = NULL;
481
482
return (0);
483
}
484
485
static vm_size_t
486
el2_hyp_size(struct vm *vm)
487
{
488
return (round_page(sizeof(struct hyp) +
489
sizeof(struct hypctx *) * vm_get_maxcpus(vm)));
490
}
491
492
static vm_size_t
493
el2_hypctx_size(void)
494
{
495
return (round_page(sizeof(struct hypctx)));
496
}
497
498
static vm_offset_t
499
el2_map_enter(vm_offset_t data, vm_size_t size, vm_prot_t prot)
500
{
501
vmem_addr_t addr;
502
int err __diagused;
503
bool rv __diagused;
504
505
err = vmem_alloc(el2_mem_alloc, size, M_NEXTFIT | M_WAITOK, &addr);
506
MPASS(err == 0);
507
rv = vmmpmap_enter(addr, size, vtophys(data), prot);
508
MPASS(rv);
509
510
return (addr);
511
}
512
513
void *
514
vmmops_init(struct vm *vm, pmap_t pmap)
515
{
516
struct hyp *hyp;
517
vm_size_t size;
518
uint64_t idreg;
519
520
size = el2_hyp_size(vm);
521
hyp = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
522
523
hyp->vm = vm;
524
hyp->vgic_attached = false;
525
526
if (get_kernel_reg(ID_AA64MMFR0_EL1, &idreg)) {
527
if (ID_AA64MMFR0_ECV_VAL(idreg) >= ID_AA64MMFR0_ECV_POFF)
528
hyp->feats |= HYP_FEAT_ECV_POFF;
529
}
530
531
if (get_kernel_reg(ID_AA64MMFR1_EL1, &idreg)) {
532
if (ID_AA64MMFR1_HCX_VAL(idreg) >= ID_AA64MMFR1_HCX_IMPL)
533
hyp->feats |= HYP_FEAT_HCX;
534
}
535
536
vtimer_vminit(hyp);
537
vgic_vminit(hyp);
538
539
if (!in_vhe())
540
hyp->el2_addr = el2_map_enter((vm_offset_t)hyp, size,
541
VM_PROT_READ | VM_PROT_WRITE);
542
543
return (hyp);
544
}
545
546
void *
547
vmmops_vcpu_init(void *vmi, struct vcpu *vcpu1, int vcpuid)
548
{
549
struct hyp *hyp = vmi;
550
struct hypctx *hypctx;
551
vm_size_t size;
552
553
size = el2_hypctx_size();
554
hypctx = malloc_aligned(size, PAGE_SIZE, M_HYP, M_WAITOK | M_ZERO);
555
556
KASSERT(vcpuid >= 0 && vcpuid < vm_get_maxcpus(hyp->vm),
557
("%s: Invalid vcpuid %d", __func__, vcpuid));
558
hyp->ctx[vcpuid] = hypctx;
559
560
hypctx->hyp = hyp;
561
hypctx->vcpu = vcpu1;
562
563
reset_vm_el01_regs(hypctx);
564
reset_vm_el2_regs(hypctx);
565
566
vtimer_cpuinit(hypctx);
567
vgic_cpuinit(hypctx);
568
569
if (!in_vhe())
570
hypctx->el2_addr = el2_map_enter((vm_offset_t)hypctx, size,
571
VM_PROT_READ | VM_PROT_WRITE);
572
573
return (hypctx);
574
}
575
576
static int
577
arm_vmm_pinit(pmap_t pmap)
578
{
579
580
pmap_pinit_stage(pmap, PM_STAGE2, vmm_pmap_levels);
581
return (1);
582
}
583
584
struct vmspace *
585
vmmops_vmspace_alloc(vm_offset_t min, vm_offset_t max)
586
{
587
return (vmspace_alloc(min, max, arm_vmm_pinit));
588
}
589
590
void
591
vmmops_vmspace_free(struct vmspace *vmspace)
592
{
593
594
pmap_remove_pages(vmspace_pmap(vmspace));
595
vmspace_free(vmspace);
596
}
597
598
static inline void
599
arm64_print_hyp_regs(struct vm_exit *vme)
600
{
601
printf("esr_el2: 0x%016lx\n", vme->u.hyp.esr_el2);
602
printf("far_el2: 0x%016lx\n", vme->u.hyp.far_el2);
603
printf("hpfar_el2: 0x%016lx\n", vme->u.hyp.hpfar_el2);
604
printf("elr_el2: 0x%016lx\n", vme->pc);
605
}
606
607
static void
608
arm64_gen_inst_emul_data(struct hypctx *hypctx, uint32_t esr_iss,
609
struct vm_exit *vme_ret)
610
{
611
struct vm_guest_paging *paging;
612
struct vie *vie;
613
uint32_t esr_sas, reg_num;
614
615
/*
616
* Get the page address from HPFAR_EL2.
617
*/
618
vme_ret->u.inst_emul.gpa =
619
HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
620
/* Bits [11:0] are the same as bits [11:0] from the virtual address. */
621
vme_ret->u.inst_emul.gpa += hypctx->exit_info.far_el2 &
622
FAR_EL2_HPFAR_PAGE_MASK;
623
624
esr_sas = (esr_iss & ISS_DATA_SAS_MASK) >> ISS_DATA_SAS_SHIFT;
625
reg_num = (esr_iss & ISS_DATA_SRT_MASK) >> ISS_DATA_SRT_SHIFT;
626
627
vie = &vme_ret->u.inst_emul.vie;
628
vie->access_size = 1 << esr_sas;
629
vie->sign_extend = (esr_iss & ISS_DATA_SSE) ? 1 : 0;
630
vie->dir = (esr_iss & ISS_DATA_WnR) ? VM_DIR_WRITE : VM_DIR_READ;
631
vie->reg = reg_num;
632
633
paging = &vme_ret->u.inst_emul.paging;
634
paging->ttbr0_addr = hypctx->ttbr0_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
635
paging->ttbr1_addr = hypctx->ttbr1_el1 & ~(TTBR_ASID_MASK | TTBR_CnP);
636
paging->tcr_el1 = hypctx->tcr_el1;
637
paging->tcr2_el1 = hypctx->tcr2_el1;
638
paging->flags = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
639
if ((hypctx->sctlr_el1 & SCTLR_M) != 0)
640
paging->flags |= VM_GP_MMU_ENABLED;
641
}
642
643
static void
644
arm64_gen_reg_emul_data(uint32_t esr_iss, struct vm_exit *vme_ret)
645
{
646
uint32_t reg_num;
647
struct vre *vre;
648
649
/* u.hyp member will be replaced by u.reg_emul */
650
vre = &vme_ret->u.reg_emul.vre;
651
652
vre->inst_syndrome = esr_iss;
653
/* ARMv8 Architecture Manual, p. D7-2273: 1 means read */
654
vre->dir = (esr_iss & ISS_MSR_DIR) ? VM_DIR_READ : VM_DIR_WRITE;
655
reg_num = ISS_MSR_Rt(esr_iss);
656
vre->reg = reg_num;
657
}
658
659
void
660
raise_data_insn_abort(struct hypctx *hypctx, uint64_t far, bool dabort, int fsc)
661
{
662
uint64_t esr;
663
664
if ((hypctx->tf.tf_spsr & PSR_M_MASK) == PSR_M_EL0t)
665
esr = EXCP_INSN_ABORT_L << ESR_ELx_EC_SHIFT;
666
else
667
esr = EXCP_INSN_ABORT << ESR_ELx_EC_SHIFT;
668
/* Set the bit that changes from insn -> data abort */
669
if (dabort)
670
esr |= EXCP_DATA_ABORT_L << ESR_ELx_EC_SHIFT;
671
/* Set the IL bit if set by hardware */
672
esr |= hypctx->tf.tf_esr & ESR_ELx_IL;
673
674
vmmops_exception(hypctx, esr | fsc, far);
675
}
676
677
static int
678
handle_el1_sync_excp(struct hypctx *hypctx, struct vm_exit *vme_ret,
679
pmap_t pmap)
680
{
681
uint64_t gpa;
682
uint32_t esr_ec, esr_iss;
683
684
esr_ec = ESR_ELx_EXCEPTION(hypctx->tf.tf_esr);
685
esr_iss = hypctx->tf.tf_esr & ESR_ELx_ISS_MASK;
686
687
switch (esr_ec) {
688
case EXCP_UNKNOWN:
689
vmm_stat_incr(hypctx->vcpu, VMEXIT_UNKNOWN, 1);
690
arm64_print_hyp_regs(vme_ret);
691
vme_ret->exitcode = VM_EXITCODE_HYP;
692
break;
693
case EXCP_TRAP_WFI_WFE:
694
if ((hypctx->tf.tf_esr & 0x3) == 0) { /* WFI */
695
vmm_stat_incr(hypctx->vcpu, VMEXIT_WFI, 1);
696
vme_ret->exitcode = VM_EXITCODE_WFI;
697
} else {
698
vmm_stat_incr(hypctx->vcpu, VMEXIT_WFE, 1);
699
vme_ret->exitcode = VM_EXITCODE_HYP;
700
}
701
break;
702
case EXCP_HVC:
703
vmm_stat_incr(hypctx->vcpu, VMEXIT_HVC, 1);
704
vme_ret->exitcode = VM_EXITCODE_HVC;
705
break;
706
case EXCP_MSR:
707
vmm_stat_incr(hypctx->vcpu, VMEXIT_MSR, 1);
708
arm64_gen_reg_emul_data(esr_iss, vme_ret);
709
vme_ret->exitcode = VM_EXITCODE_REG_EMUL;
710
break;
711
case EXCP_BRK:
712
vmm_stat_incr(hypctx->vcpu, VMEXIT_BRK, 1);
713
vme_ret->exitcode = VM_EXITCODE_BRK;
714
break;
715
case EXCP_SOFTSTP_EL0:
716
vmm_stat_incr(hypctx->vcpu, VMEXIT_SS, 1);
717
vme_ret->exitcode = VM_EXITCODE_SS;
718
break;
719
case EXCP_INSN_ABORT_L:
720
case EXCP_DATA_ABORT_L:
721
vmm_stat_incr(hypctx->vcpu, esr_ec == EXCP_DATA_ABORT_L ?
722
VMEXIT_DATA_ABORT : VMEXIT_INSN_ABORT, 1);
723
switch (hypctx->tf.tf_esr & ISS_DATA_DFSC_MASK) {
724
case ISS_DATA_DFSC_TF_L0:
725
case ISS_DATA_DFSC_TF_L1:
726
case ISS_DATA_DFSC_TF_L2:
727
case ISS_DATA_DFSC_TF_L3:
728
case ISS_DATA_DFSC_AFF_L1:
729
case ISS_DATA_DFSC_AFF_L2:
730
case ISS_DATA_DFSC_AFF_L3:
731
case ISS_DATA_DFSC_PF_L1:
732
case ISS_DATA_DFSC_PF_L2:
733
case ISS_DATA_DFSC_PF_L3:
734
gpa = HPFAR_EL2_FIPA_ADDR(hypctx->exit_info.hpfar_el2);
735
/* Check the IPA is valid */
736
if (gpa >= (1ul << vmm_max_ipa_bits)) {
737
raise_data_insn_abort(hypctx,
738
hypctx->exit_info.far_el2,
739
esr_ec == EXCP_DATA_ABORT_L,
740
ISS_DATA_DFSC_ASF_L0);
741
vme_ret->inst_length = 0;
742
return (HANDLED);
743
}
744
745
if (vm_mem_allocated(hypctx->vcpu, gpa)) {
746
vme_ret->exitcode = VM_EXITCODE_PAGING;
747
vme_ret->inst_length = 0;
748
vme_ret->u.paging.esr = hypctx->tf.tf_esr;
749
vme_ret->u.paging.gpa = gpa;
750
} else if (esr_ec == EXCP_INSN_ABORT_L) {
751
/*
752
* Raise an external abort. Device memory is
753
* not executable
754
*/
755
raise_data_insn_abort(hypctx,
756
hypctx->exit_info.far_el2, false,
757
ISS_DATA_DFSC_EXT);
758
vme_ret->inst_length = 0;
759
return (HANDLED);
760
} else {
761
arm64_gen_inst_emul_data(hypctx, esr_iss,
762
vme_ret);
763
vme_ret->exitcode = VM_EXITCODE_INST_EMUL;
764
}
765
break;
766
default:
767
arm64_print_hyp_regs(vme_ret);
768
vme_ret->exitcode = VM_EXITCODE_HYP;
769
break;
770
}
771
772
break;
773
774
default:
775
vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_SYNC, 1);
776
arm64_print_hyp_regs(vme_ret);
777
vme_ret->exitcode = VM_EXITCODE_HYP;
778
break;
779
}
780
781
/* We don't don't do any instruction emulation here */
782
return (UNHANDLED);
783
}
784
785
static int
786
arm64_handle_world_switch(struct hypctx *hypctx, int excp_type,
787
struct vm_exit *vme, pmap_t pmap)
788
{
789
int handled;
790
791
switch (excp_type) {
792
case EXCP_TYPE_EL1_SYNC:
793
/* The exit code will be set by handle_el1_sync_excp(). */
794
handled = handle_el1_sync_excp(hypctx, vme, pmap);
795
break;
796
797
case EXCP_TYPE_EL1_IRQ:
798
case EXCP_TYPE_EL1_FIQ:
799
/* The host kernel will handle IRQs and FIQs. */
800
vmm_stat_incr(hypctx->vcpu,
801
excp_type == EXCP_TYPE_EL1_IRQ ? VMEXIT_IRQ : VMEXIT_FIQ,1);
802
vme->exitcode = VM_EXITCODE_BOGUS;
803
handled = UNHANDLED;
804
break;
805
806
case EXCP_TYPE_EL1_ERROR:
807
case EXCP_TYPE_EL2_SYNC:
808
case EXCP_TYPE_EL2_IRQ:
809
case EXCP_TYPE_EL2_FIQ:
810
case EXCP_TYPE_EL2_ERROR:
811
vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED_EL2, 1);
812
vme->exitcode = VM_EXITCODE_BOGUS;
813
handled = UNHANDLED;
814
break;
815
816
default:
817
vmm_stat_incr(hypctx->vcpu, VMEXIT_UNHANDLED, 1);
818
vme->exitcode = VM_EXITCODE_BOGUS;
819
handled = UNHANDLED;
820
break;
821
}
822
823
return (handled);
824
}
825
826
static void
827
ptp_release(void **cookie)
828
{
829
if (*cookie != NULL) {
830
vm_gpa_release(*cookie);
831
*cookie = NULL;
832
}
833
}
834
835
static void *
836
ptp_hold(struct vcpu *vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
837
{
838
void *ptr;
839
840
ptp_release(cookie);
841
ptr = vm_gpa_hold(vcpu, ptpphys, len, VM_PROT_RW, cookie);
842
return (ptr);
843
}
844
845
/* log2 of the number of bytes in a page table entry */
846
#define PTE_SHIFT 3
847
int
848
vmmops_gla2gpa(void *vcpui, struct vm_guest_paging *paging, uint64_t gla,
849
int prot, uint64_t *gpa, int *is_fault)
850
{
851
struct hypctx *hypctx;
852
void *cookie;
853
uint64_t mask, *ptep, pte, pte_addr;
854
int address_bits, granule_shift, ia_bits, levels, pte_shift, tsz;
855
bool is_el0;
856
857
/* Check if the MMU is off */
858
if ((paging->flags & VM_GP_MMU_ENABLED) == 0) {
859
*is_fault = 0;
860
*gpa = gla;
861
return (0);
862
}
863
864
is_el0 = (paging->flags & PSR_M_MASK) == PSR_M_EL0t;
865
866
if (ADDR_IS_KERNEL(gla)) {
867
/* If address translation is disabled raise an exception */
868
if ((paging->tcr_el1 & TCR_EPD1) != 0) {
869
*is_fault = 1;
870
return (0);
871
}
872
if (is_el0 && (paging->tcr_el1 & TCR_E0PD1) != 0) {
873
*is_fault = 1;
874
return (0);
875
}
876
pte_addr = paging->ttbr1_addr;
877
tsz = (paging->tcr_el1 & TCR_T1SZ_MASK) >> TCR_T1SZ_SHIFT;
878
/* Clear the top byte if TBI is on */
879
if ((paging->tcr_el1 & TCR_TBI1) != 0)
880
gla |= (0xfful << 56);
881
switch (paging->tcr_el1 & TCR_TG1_MASK) {
882
case TCR_TG1_4K:
883
granule_shift = PAGE_SHIFT_4K;
884
break;
885
case TCR_TG1_16K:
886
granule_shift = PAGE_SHIFT_16K;
887
break;
888
case TCR_TG1_64K:
889
granule_shift = PAGE_SHIFT_64K;
890
break;
891
default:
892
*is_fault = 1;
893
return (EINVAL);
894
}
895
} else {
896
/* If address translation is disabled raise an exception */
897
if ((paging->tcr_el1 & TCR_EPD0) != 0) {
898
*is_fault = 1;
899
return (0);
900
}
901
if (is_el0 && (paging->tcr_el1 & TCR_E0PD0) != 0) {
902
*is_fault = 1;
903
return (0);
904
}
905
pte_addr = paging->ttbr0_addr;
906
tsz = (paging->tcr_el1 & TCR_T0SZ_MASK) >> TCR_T0SZ_SHIFT;
907
/* Clear the top byte if TBI is on */
908
if ((paging->tcr_el1 & TCR_TBI0) != 0)
909
gla &= ~(0xfful << 56);
910
switch (paging->tcr_el1 & TCR_TG0_MASK) {
911
case TCR_TG0_4K:
912
granule_shift = PAGE_SHIFT_4K;
913
break;
914
case TCR_TG0_16K:
915
granule_shift = PAGE_SHIFT_16K;
916
break;
917
case TCR_TG0_64K:
918
granule_shift = PAGE_SHIFT_64K;
919
break;
920
default:
921
*is_fault = 1;
922
return (EINVAL);
923
}
924
}
925
926
/*
927
* TODO: Support FEAT_TTST for smaller tsz values and FEAT_LPA2
928
* for larger values.
929
*/
930
switch (granule_shift) {
931
case PAGE_SHIFT_4K:
932
case PAGE_SHIFT_16K:
933
/*
934
* See "Table D8-11 4KB granule, determining stage 1 initial
935
* lookup level" and "Table D8-21 16KB granule, determining
936
* stage 1 initial lookup level" from the "Arm Architecture
937
* Reference Manual for A-Profile architecture" revision I.a
938
* for the minimum and maximum values.
939
*
940
* TODO: Support less than 16 when FEAT_LPA2 is implemented
941
* and TCR_EL1.DS == 1
942
* TODO: Support more than 39 when FEAT_TTST is implemented
943
*/
944
if (tsz < 16 || tsz > 39) {
945
*is_fault = 1;
946
return (EINVAL);
947
}
948
break;
949
case PAGE_SHIFT_64K:
950
/* TODO: Support 64k granule. It will probably work, but is untested */
951
default:
952
*is_fault = 1;
953
return (EINVAL);
954
}
955
956
/*
957
* Calculate the input address bits. These are 64 bit in an address
958
* with the top tsz bits being all 0 or all 1.
959
*/
960
ia_bits = 64 - tsz;
961
962
/*
963
* Calculate the number of address bits used in the page table
964
* calculation. This is ia_bits minus the bottom granule_shift
965
* bits that are passed to the output address.
966
*/
967
address_bits = ia_bits - granule_shift;
968
969
/*
970
* Calculate the number of levels. Each level uses
971
* granule_shift - PTE_SHIFT bits of the input address.
972
* This is because the table is 1 << granule_shift and each
973
* entry is 1 << PTE_SHIFT bytes.
974
*/
975
levels = howmany(address_bits, granule_shift - PTE_SHIFT);
976
977
/* Mask of the upper unused bits in the virtual address */
978
gla &= (1ul << ia_bits) - 1;
979
hypctx = (struct hypctx *)vcpui;
980
cookie = NULL;
981
/* TODO: Check if the level supports block descriptors */
982
for (;levels > 0; levels--) {
983
int idx;
984
985
pte_shift = (levels - 1) * (granule_shift - PTE_SHIFT) +
986
granule_shift;
987
idx = (gla >> pte_shift) &
988
((1ul << (granule_shift - PTE_SHIFT)) - 1);
989
while (idx > PAGE_SIZE / sizeof(pte)) {
990
idx -= PAGE_SIZE / sizeof(pte);
991
pte_addr += PAGE_SIZE;
992
}
993
994
ptep = ptp_hold(hypctx->vcpu, pte_addr, PAGE_SIZE, &cookie);
995
if (ptep == NULL)
996
goto error;
997
pte = ptep[idx];
998
999
/* Calculate the level we are looking at */
1000
switch (levels) {
1001
default:
1002
goto fault;
1003
/* TODO: Level -1 when FEAT_LPA2 is implemented */
1004
case 4: /* Level 0 */
1005
if ((pte & ATTR_DESCR_MASK) != L0_TABLE)
1006
goto fault;
1007
/* FALLTHROUGH */
1008
case 3: /* Level 1 */
1009
case 2: /* Level 2 */
1010
switch (pte & ATTR_DESCR_MASK) {
1011
/* Use L1 macro as all levels are the same */
1012
case L1_TABLE:
1013
/* Check if EL0 can access this address space */
1014
if (is_el0 &&
1015
(pte & TATTR_AP_TABLE_NO_EL0) != 0)
1016
goto fault;
1017
/* Check if the address space is writable */
1018
if ((prot & PROT_WRITE) != 0 &&
1019
(pte & TATTR_AP_TABLE_RO) != 0)
1020
goto fault;
1021
if ((prot & PROT_EXEC) != 0) {
1022
/* Check the table exec attribute */
1023
if ((is_el0 &&
1024
(pte & TATTR_UXN_TABLE) != 0) ||
1025
(!is_el0 &&
1026
(pte & TATTR_PXN_TABLE) != 0))
1027
goto fault;
1028
}
1029
pte_addr = pte & ~ATTR_MASK;
1030
break;
1031
case L1_BLOCK:
1032
goto done;
1033
default:
1034
goto fault;
1035
}
1036
break;
1037
case 1: /* Level 3 */
1038
if ((pte & ATTR_DESCR_MASK) == L3_PAGE)
1039
goto done;
1040
goto fault;
1041
}
1042
}
1043
1044
done:
1045
/* Check if EL0 has access to the block/page */
1046
if (is_el0 && (pte & ATTR_S1_AP(ATTR_S1_AP_USER)) == 0)
1047
goto fault;
1048
if ((prot & PROT_WRITE) != 0 && (pte & ATTR_S1_AP_RW_BIT) != 0)
1049
goto fault;
1050
if ((prot & PROT_EXEC) != 0) {
1051
if ((is_el0 && (pte & ATTR_S1_UXN) != 0) ||
1052
(!is_el0 && (pte & ATTR_S1_PXN) != 0))
1053
goto fault;
1054
}
1055
mask = (1ul << pte_shift) - 1;
1056
*gpa = (pte & ~ATTR_MASK) | (gla & mask);
1057
*is_fault = 0;
1058
ptp_release(&cookie);
1059
return (0);
1060
1061
error:
1062
ptp_release(&cookie);
1063
return (EFAULT);
1064
fault:
1065
*is_fault = 1;
1066
ptp_release(&cookie);
1067
return (0);
1068
}
1069
1070
int
1071
vmmops_run(void *vcpui, register_t pc, pmap_t pmap, struct vm_eventinfo *evinfo)
1072
{
1073
uint64_t excp_type;
1074
int handled;
1075
register_t daif;
1076
struct hyp *hyp;
1077
struct hypctx *hypctx;
1078
struct vcpu *vcpu;
1079
struct vm_exit *vme;
1080
int mode;
1081
1082
hypctx = (struct hypctx *)vcpui;
1083
hyp = hypctx->hyp;
1084
vcpu = hypctx->vcpu;
1085
vme = vm_exitinfo(vcpu);
1086
1087
hypctx->tf.tf_elr = (uint64_t)pc;
1088
1089
for (;;) {
1090
if (hypctx->has_exception) {
1091
hypctx->has_exception = false;
1092
hypctx->elr_el1 = hypctx->tf.tf_elr;
1093
1094
mode = hypctx->tf.tf_spsr & (PSR_M_MASK | PSR_M_32);
1095
1096
if (mode == PSR_M_EL1t) {
1097
hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x0;
1098
} else if (mode == PSR_M_EL1h) {
1099
hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x200;
1100
} else if ((mode & PSR_M_32) == PSR_M_64) {
1101
/* 64-bit EL0 */
1102
hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x400;
1103
} else {
1104
/* 32-bit EL0 */
1105
hypctx->tf.tf_elr = hypctx->vbar_el1 + 0x600;
1106
}
1107
1108
/* Set the new spsr */
1109
hypctx->spsr_el1 = hypctx->tf.tf_spsr;
1110
1111
/* Set the new cpsr */
1112
hypctx->tf.tf_spsr = hypctx->spsr_el1 & PSR_FLAGS;
1113
hypctx->tf.tf_spsr |= PSR_DAIF | PSR_M_EL1h;
1114
1115
/*
1116
* Update fields that may change on exeption entry
1117
* based on how sctlr_el1 is configured.
1118
*/
1119
if ((hypctx->sctlr_el1 & SCTLR_SPAN) == 0)
1120
hypctx->tf.tf_spsr |= PSR_PAN;
1121
if ((hypctx->sctlr_el1 & SCTLR_DSSBS) == 0)
1122
hypctx->tf.tf_spsr &= ~PSR_SSBS;
1123
else
1124
hypctx->tf.tf_spsr |= PSR_SSBS;
1125
}
1126
1127
daif = intr_disable();
1128
1129
/* Check if the vcpu is suspended */
1130
if (vcpu_suspended(evinfo)) {
1131
intr_restore(daif);
1132
vm_exit_suspended(vcpu, pc);
1133
break;
1134
}
1135
1136
if (vcpu_debugged(vcpu)) {
1137
intr_restore(daif);
1138
vm_exit_debug(vcpu, pc);
1139
break;
1140
}
1141
1142
/* Activate the stage2 pmap so the vmid is valid */
1143
pmap_activate_vm(pmap);
1144
hyp->vttbr_el2 = pmap_to_ttbr0(pmap);
1145
1146
/*
1147
* TODO: What happens if a timer interrupt is asserted exactly
1148
* here, but for the previous VM?
1149
*/
1150
arm64_set_active_vcpu(hypctx);
1151
vgic_flush_hwstate(hypctx);
1152
1153
/* Call into EL2 to switch to the guest */
1154
excp_type = vmm_enter_guest(hyp, hypctx);
1155
1156
vgic_sync_hwstate(hypctx);
1157
vtimer_sync_hwstate(hypctx);
1158
1159
/*
1160
* Deactivate the stage2 pmap.
1161
*/
1162
PCPU_SET(curvmpmap, NULL);
1163
intr_restore(daif);
1164
1165
vmm_stat_incr(vcpu, VMEXIT_COUNT, 1);
1166
if (excp_type == EXCP_TYPE_MAINT_IRQ)
1167
continue;
1168
1169
vme->pc = hypctx->tf.tf_elr;
1170
vme->inst_length = INSN_SIZE;
1171
vme->u.hyp.exception_nr = excp_type;
1172
vme->u.hyp.esr_el2 = hypctx->tf.tf_esr;
1173
vme->u.hyp.far_el2 = hypctx->exit_info.far_el2;
1174
vme->u.hyp.hpfar_el2 = hypctx->exit_info.hpfar_el2;
1175
1176
handled = arm64_handle_world_switch(hypctx, excp_type, vme,
1177
pmap);
1178
if (handled == UNHANDLED)
1179
/* Exit loop to emulate instruction. */
1180
break;
1181
else
1182
/* Resume guest execution from the next instruction. */
1183
hypctx->tf.tf_elr += vme->inst_length;
1184
}
1185
1186
return (0);
1187
}
1188
1189
static void
1190
arm_pcpu_vmcleanup(void *arg)
1191
{
1192
struct hyp *hyp;
1193
int i, maxcpus;
1194
1195
hyp = arg;
1196
maxcpus = vm_get_maxcpus(hyp->vm);
1197
for (i = 0; i < maxcpus; i++) {
1198
if (arm64_get_active_vcpu() == hyp->ctx[i]) {
1199
arm64_set_active_vcpu(NULL);
1200
break;
1201
}
1202
}
1203
}
1204
1205
void
1206
vmmops_vcpu_cleanup(void *vcpui)
1207
{
1208
struct hypctx *hypctx = vcpui;
1209
1210
vtimer_cpucleanup(hypctx);
1211
vgic_cpucleanup(hypctx);
1212
1213
if (!in_vhe())
1214
vmmpmap_remove(hypctx->el2_addr, el2_hypctx_size(), true);
1215
1216
free(hypctx, M_HYP);
1217
}
1218
1219
void
1220
vmmops_cleanup(void *vmi)
1221
{
1222
struct hyp *hyp = vmi;
1223
1224
vtimer_vmcleanup(hyp);
1225
vgic_vmcleanup(hyp);
1226
1227
smp_rendezvous(NULL, arm_pcpu_vmcleanup, NULL, hyp);
1228
1229
if (!in_vhe())
1230
vmmpmap_remove(hyp->el2_addr, el2_hyp_size(hyp->vm), true);
1231
1232
free(hyp, M_HYP);
1233
}
1234
1235
/*
1236
* Return register value. Registers have different sizes and an explicit cast
1237
* must be made to ensure proper conversion.
1238
*/
1239
static uint64_t *
1240
hypctx_regptr(struct hypctx *hypctx, int reg)
1241
{
1242
switch (reg) {
1243
case VM_REG_GUEST_X0 ... VM_REG_GUEST_X29:
1244
return (&hypctx->tf.tf_x[reg]);
1245
case VM_REG_GUEST_LR:
1246
return (&hypctx->tf.tf_lr);
1247
case VM_REG_GUEST_SP:
1248
return (&hypctx->tf.tf_sp);
1249
case VM_REG_GUEST_CPSR:
1250
return (&hypctx->tf.tf_spsr);
1251
case VM_REG_GUEST_PC:
1252
return (&hypctx->tf.tf_elr);
1253
case VM_REG_GUEST_SCTLR_EL1:
1254
return (&hypctx->sctlr_el1);
1255
case VM_REG_GUEST_TTBR0_EL1:
1256
return (&hypctx->ttbr0_el1);
1257
case VM_REG_GUEST_TTBR1_EL1:
1258
return (&hypctx->ttbr1_el1);
1259
case VM_REG_GUEST_TCR_EL1:
1260
return (&hypctx->tcr_el1);
1261
case VM_REG_GUEST_TCR2_EL1:
1262
return (&hypctx->tcr2_el1);
1263
case VM_REG_GUEST_MPIDR_EL1:
1264
return (&hypctx->vmpidr_el2);
1265
default:
1266
break;
1267
}
1268
return (NULL);
1269
}
1270
1271
int
1272
vmmops_getreg(void *vcpui, int reg, uint64_t *retval)
1273
{
1274
uint64_t *regp;
1275
int running, hostcpu;
1276
struct hypctx *hypctx = vcpui;
1277
1278
running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1279
if (running && hostcpu != curcpu)
1280
panic("arm_getreg: %s%d is running", vm_name(hypctx->hyp->vm),
1281
vcpu_vcpuid(hypctx->vcpu));
1282
1283
regp = hypctx_regptr(hypctx, reg);
1284
if (regp == NULL)
1285
return (EINVAL);
1286
1287
*retval = *regp;
1288
return (0);
1289
}
1290
1291
int
1292
vmmops_setreg(void *vcpui, int reg, uint64_t val)
1293
{
1294
uint64_t *regp;
1295
struct hypctx *hypctx = vcpui;
1296
int running, hostcpu;
1297
1298
running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1299
if (running && hostcpu != curcpu)
1300
panic("arm_setreg: %s%d is running", vm_name(hypctx->hyp->vm),
1301
vcpu_vcpuid(hypctx->vcpu));
1302
1303
regp = hypctx_regptr(hypctx, reg);
1304
if (regp == NULL)
1305
return (EINVAL);
1306
1307
*regp = val;
1308
return (0);
1309
}
1310
1311
int
1312
vmmops_exception(void *vcpui, uint64_t esr, uint64_t far)
1313
{
1314
struct hypctx *hypctx = vcpui;
1315
int running, hostcpu;
1316
1317
running = vcpu_is_running(hypctx->vcpu, &hostcpu);
1318
if (running && hostcpu != curcpu)
1319
panic("%s: %s%d is running", __func__, vm_name(hypctx->hyp->vm),
1320
vcpu_vcpuid(hypctx->vcpu));
1321
1322
hypctx->far_el1 = far;
1323
hypctx->esr_el1 = esr;
1324
hypctx->has_exception = true;
1325
1326
return (0);
1327
}
1328
1329
int
1330
vmmops_getcap(void *vcpui, int num, int *retval)
1331
{
1332
struct hypctx *hypctx = vcpui;
1333
int ret;
1334
1335
ret = ENOENT;
1336
1337
switch (num) {
1338
case VM_CAP_UNRESTRICTED_GUEST:
1339
*retval = 1;
1340
ret = 0;
1341
break;
1342
case VM_CAP_BRK_EXIT:
1343
case VM_CAP_SS_EXIT:
1344
case VM_CAP_MASK_HWINTR:
1345
*retval = (hypctx->setcaps & (1ul << num)) != 0;
1346
break;
1347
default:
1348
break;
1349
}
1350
1351
return (ret);
1352
}
1353
1354
int
1355
vmmops_setcap(void *vcpui, int num, int val)
1356
{
1357
struct hypctx *hypctx = vcpui;
1358
int ret;
1359
1360
ret = 0;
1361
1362
switch (num) {
1363
case VM_CAP_BRK_EXIT:
1364
if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1365
break;
1366
if (val != 0)
1367
hypctx->mdcr_el2 |= MDCR_EL2_TDE;
1368
else
1369
hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
1370
break;
1371
case VM_CAP_SS_EXIT:
1372
if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1373
break;
1374
1375
if (val != 0) {
1376
hypctx->debug_spsr |= (hypctx->tf.tf_spsr & PSR_SS);
1377
hypctx->debug_mdscr |= hypctx->mdscr_el1 &
1378
(MDSCR_SS | MDSCR_KDE);
1379
1380
hypctx->tf.tf_spsr |= PSR_SS;
1381
hypctx->mdscr_el1 |= MDSCR_SS | MDSCR_KDE;
1382
hypctx->mdcr_el2 |= MDCR_EL2_TDE;
1383
} else {
1384
hypctx->tf.tf_spsr &= ~PSR_SS;
1385
hypctx->tf.tf_spsr |= hypctx->debug_spsr;
1386
hypctx->debug_spsr &= ~PSR_SS;
1387
hypctx->mdscr_el1 &= ~(MDSCR_SS | MDSCR_KDE);
1388
hypctx->mdscr_el1 |= hypctx->debug_mdscr;
1389
hypctx->debug_mdscr &= ~(MDSCR_SS | MDSCR_KDE);
1390
hypctx->mdcr_el2 &= ~MDCR_EL2_TDE;
1391
}
1392
break;
1393
case VM_CAP_MASK_HWINTR:
1394
if ((val != 0) == ((hypctx->setcaps & (1ul << num)) != 0))
1395
break;
1396
1397
if (val != 0) {
1398
hypctx->debug_spsr |= (hypctx->tf.tf_spsr &
1399
(PSR_I | PSR_F));
1400
hypctx->tf.tf_spsr |= PSR_I | PSR_F;
1401
} else {
1402
hypctx->tf.tf_spsr &= ~(PSR_I | PSR_F);
1403
hypctx->tf.tf_spsr |= (hypctx->debug_spsr &
1404
(PSR_I | PSR_F));
1405
hypctx->debug_spsr &= ~(PSR_I | PSR_F);
1406
}
1407
break;
1408
default:
1409
ret = ENOENT;
1410
break;
1411
}
1412
1413
if (ret == 0) {
1414
if (val == 0)
1415
hypctx->setcaps &= ~(1ul << num);
1416
else
1417
hypctx->setcaps |= (1ul << num);
1418
}
1419
1420
return (ret);
1421
}
1422
1423