Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/amd64/vmm/io/vlapic.c
108654 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2011 NetApp, Inc.
5
* All rights reserved.
6
* Copyright (c) 2019 Joyent, Inc.
7
*
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
10
* are met:
11
* 1. Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
16
*
17
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
* SUCH DAMAGE.
28
*/
29
30
#include <sys/cdefs.h>
31
#include "opt_bhyve_snapshot.h"
32
33
#include <sys/param.h>
34
#include <sys/lock.h>
35
#include <sys/kernel.h>
36
#include <sys/malloc.h>
37
#include <sys/mutex.h>
38
#include <sys/systm.h>
39
#include <sys/smp.h>
40
41
#include <x86/specialreg.h>
42
#include <x86/apicreg.h>
43
44
#include <machine/clock.h>
45
#include <machine/smp.h>
46
#include <machine/vmm.h>
47
#include <machine/vmm_snapshot.h>
48
49
#include <dev/vmm/vmm_ktr.h>
50
#include <dev/vmm/vmm_vm.h>
51
52
#include "vmm_lapic.h"
53
#include "vmm_stat.h"
54
55
#include "vlapic.h"
56
#include "vlapic_priv.h"
57
#include "vioapic.h"
58
59
#define PRIO(x) ((x) >> 4)
60
61
#define VLAPIC_VERSION (0x14)
62
63
#define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
64
65
/*
66
* The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
67
* vlapic_callout_handler() and vcpu accesses to:
68
* - timer_freq_bt, timer_period_bt, timer_fire_bt
69
* - timer LVT register
70
*/
71
#define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx))
72
#define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx))
73
#define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx))
74
75
/*
76
* APIC timer frequency:
77
* - arbitrary but chosen to be in the ballpark of contemporary hardware.
78
* - power-of-two to avoid loss of precision when converted to a bintime.
79
*/
80
#define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
81
82
static void vlapic_set_error(struct vlapic *, uint32_t, bool);
83
static void vlapic_callout_handler(void *arg);
84
static void vlapic_reset(struct vlapic *vlapic);
85
86
static __inline uint32_t
87
vlapic_get_id(struct vlapic *vlapic)
88
{
89
90
if (x2apic(vlapic))
91
return (vlapic->vcpuid);
92
else
93
return (vlapic->vcpuid << 24);
94
}
95
96
static uint32_t
97
x2apic_ldr(struct vlapic *vlapic)
98
{
99
int apicid;
100
uint32_t ldr;
101
102
apicid = vlapic_get_id(vlapic);
103
ldr = 1 << (apicid & 0xf);
104
ldr |= (apicid & 0xffff0) << 12;
105
return (ldr);
106
}
107
108
void
109
vlapic_dfr_write_handler(struct vlapic *vlapic)
110
{
111
struct LAPIC *lapic;
112
113
lapic = vlapic->apic_page;
114
if (x2apic(vlapic)) {
115
VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x",
116
lapic->dfr);
117
lapic->dfr = 0;
118
return;
119
}
120
121
lapic->dfr &= APIC_DFR_MODEL_MASK;
122
lapic->dfr |= APIC_DFR_RESERVED;
123
124
if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
125
VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
126
else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
127
VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
128
else
129
VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr);
130
}
131
132
void
133
vlapic_ldr_write_handler(struct vlapic *vlapic)
134
{
135
struct LAPIC *lapic;
136
137
lapic = vlapic->apic_page;
138
139
/* LDR is read-only in x2apic mode */
140
if (x2apic(vlapic)) {
141
VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x",
142
lapic->ldr);
143
lapic->ldr = x2apic_ldr(vlapic);
144
} else {
145
lapic->ldr &= ~APIC_LDR_RESERVED;
146
VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
147
}
148
}
149
150
void
151
vlapic_id_write_handler(struct vlapic *vlapic)
152
{
153
struct LAPIC *lapic;
154
155
/*
156
* We don't allow the ID register to be modified so reset it back to
157
* its default value.
158
*/
159
lapic = vlapic->apic_page;
160
lapic->id = vlapic_get_id(vlapic);
161
}
162
163
static int
164
vlapic_timer_divisor(uint32_t dcr)
165
{
166
switch (dcr & 0xB) {
167
case APIC_TDCR_1:
168
return (1);
169
case APIC_TDCR_2:
170
return (2);
171
case APIC_TDCR_4:
172
return (4);
173
case APIC_TDCR_8:
174
return (8);
175
case APIC_TDCR_16:
176
return (16);
177
case APIC_TDCR_32:
178
return (32);
179
case APIC_TDCR_64:
180
return (64);
181
case APIC_TDCR_128:
182
return (128);
183
default:
184
panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
185
}
186
}
187
188
#if 0
189
static inline void
190
vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
191
{
192
printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
193
*lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
194
*lvt & APIC_LVTT_M);
195
}
196
#endif
197
198
static uint32_t
199
vlapic_get_ccr(struct vlapic *vlapic)
200
{
201
struct bintime bt_now, bt_rem;
202
struct LAPIC *lapic __diagused;
203
uint32_t ccr;
204
205
ccr = 0;
206
lapic = vlapic->apic_page;
207
208
VLAPIC_TIMER_LOCK(vlapic);
209
if (callout_active(&vlapic->callout)) {
210
/*
211
* If the timer is scheduled to expire in the future then
212
* compute the value of 'ccr' based on the remaining time.
213
*/
214
binuptime(&bt_now);
215
if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
216
bt_rem = vlapic->timer_fire_bt;
217
bintime_sub(&bt_rem, &bt_now);
218
ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
219
ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
220
}
221
}
222
KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
223
"icr_timer is %#x", ccr, lapic->icr_timer));
224
VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
225
ccr, lapic->icr_timer);
226
VLAPIC_TIMER_UNLOCK(vlapic);
227
return (ccr);
228
}
229
230
void
231
vlapic_dcr_write_handler(struct vlapic *vlapic)
232
{
233
struct LAPIC *lapic;
234
int divisor;
235
236
lapic = vlapic->apic_page;
237
VLAPIC_TIMER_LOCK(vlapic);
238
239
divisor = vlapic_timer_divisor(lapic->dcr_timer);
240
VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d",
241
lapic->dcr_timer, divisor);
242
243
/*
244
* Update the timer frequency and the timer period.
245
*
246
* XXX changes to the frequency divider will not take effect until
247
* the timer is reloaded.
248
*/
249
FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
250
vlapic->timer_period_bt = vlapic->timer_freq_bt;
251
bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
252
253
VLAPIC_TIMER_UNLOCK(vlapic);
254
}
255
256
void
257
vlapic_esr_write_handler(struct vlapic *vlapic)
258
{
259
struct LAPIC *lapic;
260
261
lapic = vlapic->apic_page;
262
lapic->esr = vlapic->esr_pending;
263
vlapic->esr_pending = 0;
264
}
265
266
int
267
vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
268
{
269
struct LAPIC *lapic;
270
uint32_t *irrptr, *tmrptr, mask;
271
int idx;
272
273
KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
274
275
lapic = vlapic->apic_page;
276
if (!(lapic->svr & APIC_SVR_ENABLE)) {
277
VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
278
"interrupt %d", vector);
279
return (0);
280
}
281
282
if (vector < 16) {
283
vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR,
284
false);
285
VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d",
286
vector);
287
return (1);
288
}
289
290
if (vlapic->ops.set_intr_ready)
291
return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
292
293
idx = (vector / 32) * 4;
294
mask = 1 << (vector % 32);
295
296
irrptr = &lapic->irr0;
297
atomic_set_int(&irrptr[idx], mask);
298
299
/*
300
* Verify that the trigger-mode of the interrupt matches with
301
* the vlapic TMR registers.
302
*/
303
tmrptr = &lapic->tmr0;
304
if ((tmrptr[idx] & mask) != (level ? mask : 0)) {
305
VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but "
306
"interrupt is %s-triggered", idx / 4, tmrptr[idx],
307
level ? "level" : "edge");
308
}
309
310
VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
311
return (1);
312
}
313
314
static __inline uint32_t *
315
vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
316
{
317
struct LAPIC *lapic = vlapic->apic_page;
318
int i;
319
320
switch (offset) {
321
case APIC_OFFSET_CMCI_LVT:
322
return (&lapic->lvt_cmci);
323
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
324
i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
325
return ((&lapic->lvt_timer) + i);
326
default:
327
panic("vlapic_get_lvt: invalid LVT\n");
328
}
329
}
330
331
static __inline int
332
lvt_off_to_idx(uint32_t offset)
333
{
334
int index;
335
336
switch (offset) {
337
case APIC_OFFSET_CMCI_LVT:
338
index = APIC_LVT_CMCI;
339
break;
340
case APIC_OFFSET_TIMER_LVT:
341
index = APIC_LVT_TIMER;
342
break;
343
case APIC_OFFSET_THERM_LVT:
344
index = APIC_LVT_THERMAL;
345
break;
346
case APIC_OFFSET_PERF_LVT:
347
index = APIC_LVT_PMC;
348
break;
349
case APIC_OFFSET_LINT0_LVT:
350
index = APIC_LVT_LINT0;
351
break;
352
case APIC_OFFSET_LINT1_LVT:
353
index = APIC_LVT_LINT1;
354
break;
355
case APIC_OFFSET_ERROR_LVT:
356
index = APIC_LVT_ERROR;
357
break;
358
default:
359
index = -1;
360
break;
361
}
362
KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
363
"invalid lvt index %d for offset %#x", index, offset));
364
365
return (index);
366
}
367
368
static __inline uint32_t
369
vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
370
{
371
int idx;
372
uint32_t val;
373
374
idx = lvt_off_to_idx(offset);
375
val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
376
return (val);
377
}
378
379
void
380
vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
381
{
382
uint32_t *lvtptr, mask, val;
383
struct LAPIC *lapic;
384
int idx;
385
386
lapic = vlapic->apic_page;
387
lvtptr = vlapic_get_lvtptr(vlapic, offset);
388
val = *lvtptr;
389
idx = lvt_off_to_idx(offset);
390
391
if (!(lapic->svr & APIC_SVR_ENABLE))
392
val |= APIC_LVT_M;
393
mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
394
switch (offset) {
395
case APIC_OFFSET_TIMER_LVT:
396
mask |= APIC_LVTT_TM;
397
break;
398
case APIC_OFFSET_ERROR_LVT:
399
break;
400
case APIC_OFFSET_LINT0_LVT:
401
case APIC_OFFSET_LINT1_LVT:
402
mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
403
/* FALLTHROUGH */
404
default:
405
mask |= APIC_LVT_DM;
406
break;
407
}
408
val &= mask;
409
*lvtptr = val;
410
atomic_store_rel_32(&vlapic->lvt_last[idx], val);
411
}
412
413
static void
414
vlapic_mask_lvts(struct vlapic *vlapic)
415
{
416
struct LAPIC *lapic = vlapic->apic_page;
417
418
lapic->lvt_cmci |= APIC_LVT_M;
419
vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
420
421
lapic->lvt_timer |= APIC_LVT_M;
422
vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
423
424
lapic->lvt_thermal |= APIC_LVT_M;
425
vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
426
427
lapic->lvt_pcint |= APIC_LVT_M;
428
vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
429
430
lapic->lvt_lint0 |= APIC_LVT_M;
431
vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
432
433
lapic->lvt_lint1 |= APIC_LVT_M;
434
vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
435
436
lapic->lvt_error |= APIC_LVT_M;
437
vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
438
}
439
440
static int
441
vlapic_fire_lvt(struct vlapic *vlapic, u_int lvt)
442
{
443
uint32_t mode, reg, vec;
444
445
reg = atomic_load_acq_32(&vlapic->lvt_last[lvt]);
446
447
if (reg & APIC_LVT_M)
448
return (0);
449
vec = reg & APIC_LVT_VECTOR;
450
mode = reg & APIC_LVT_DM;
451
452
switch (mode) {
453
case APIC_LVT_DM_FIXED:
454
if (vec < 16) {
455
vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
456
lvt == APIC_LVT_ERROR);
457
return (0);
458
}
459
if (vlapic_set_intr_ready(vlapic, vec, false))
460
vcpu_notify_lapic(vlapic->vcpu);
461
break;
462
case APIC_LVT_DM_NMI:
463
vm_inject_nmi(vlapic->vcpu);
464
break;
465
case APIC_LVT_DM_EXTINT:
466
vm_inject_extint(vlapic->vcpu);
467
break;
468
default:
469
// Other modes ignored
470
return (0);
471
}
472
return (1);
473
}
474
475
#if 1
476
static void
477
dump_isrvec_stk(struct vlapic *vlapic)
478
{
479
int i;
480
uint32_t *isrptr;
481
482
isrptr = &vlapic->apic_page->isr0;
483
for (i = 0; i < 8; i++)
484
printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
485
486
for (i = 0; i <= vlapic->isrvec_stk_top; i++)
487
printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
488
}
489
#endif
490
491
/*
492
* Algorithm adopted from section "Interrupt, Task and Processor Priority"
493
* in Intel Architecture Manual Vol 3a.
494
*/
495
static void
496
vlapic_update_ppr(struct vlapic *vlapic)
497
{
498
int isrvec, tpr, ppr;
499
500
/*
501
* Note that the value on the stack at index 0 is always 0.
502
*
503
* This is a placeholder for the value of ISRV when none of the
504
* bits is set in the ISRx registers.
505
*/
506
isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
507
tpr = vlapic->apic_page->tpr;
508
509
#if 1
510
{
511
int i, lastprio, curprio, vector, idx;
512
uint32_t *isrptr;
513
514
if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
515
panic("isrvec_stk is corrupted: %d", isrvec);
516
517
/*
518
* Make sure that the priority of the nested interrupts is
519
* always increasing.
520
*/
521
lastprio = -1;
522
for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
523
curprio = PRIO(vlapic->isrvec_stk[i]);
524
if (curprio <= lastprio) {
525
dump_isrvec_stk(vlapic);
526
panic("isrvec_stk does not satisfy invariant");
527
}
528
lastprio = curprio;
529
}
530
531
/*
532
* Make sure that each bit set in the ISRx registers has a
533
* corresponding entry on the isrvec stack.
534
*/
535
i = 1;
536
isrptr = &vlapic->apic_page->isr0;
537
for (vector = 0; vector < 256; vector++) {
538
idx = (vector / 32) * 4;
539
if (isrptr[idx] & (1 << (vector % 32))) {
540
if (i > vlapic->isrvec_stk_top ||
541
vlapic->isrvec_stk[i] != vector) {
542
dump_isrvec_stk(vlapic);
543
panic("ISR and isrvec_stk out of sync");
544
}
545
i++;
546
}
547
}
548
}
549
#endif
550
551
if (PRIO(tpr) >= PRIO(isrvec))
552
ppr = tpr;
553
else
554
ppr = isrvec & 0xf0;
555
556
vlapic->apic_page->ppr = ppr;
557
VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
558
}
559
560
void
561
vlapic_sync_tpr(struct vlapic *vlapic)
562
{
563
vlapic_update_ppr(vlapic);
564
}
565
566
static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt");
567
568
static void
569
vlapic_process_eoi(struct vlapic *vlapic)
570
{
571
struct LAPIC *lapic = vlapic->apic_page;
572
uint32_t *isrptr, *tmrptr;
573
int i, idx, bitpos, vector;
574
575
isrptr = &lapic->isr0;
576
tmrptr = &lapic->tmr0;
577
578
for (i = 7; i >= 0; i--) {
579
idx = i * 4;
580
bitpos = fls(isrptr[idx]);
581
if (bitpos-- != 0) {
582
if (vlapic->isrvec_stk_top <= 0) {
583
panic("invalid vlapic isrvec_stk_top %d",
584
vlapic->isrvec_stk_top);
585
}
586
isrptr[idx] &= ~(1 << bitpos);
587
vector = i * 32 + bitpos;
588
VLAPIC_CTR1(vlapic, "EOI vector %d", vector);
589
VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
590
vlapic->isrvec_stk_top--;
591
vlapic_update_ppr(vlapic);
592
if ((tmrptr[idx] & (1 << bitpos)) != 0) {
593
vioapic_process_eoi(vlapic->vm, vector);
594
}
595
return;
596
}
597
}
598
VLAPIC_CTR0(vlapic, "Gratuitous EOI");
599
vmm_stat_incr(vlapic->vcpu, VLAPIC_GRATUITOUS_EOI, 1);
600
}
601
602
static __inline int
603
vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
604
{
605
606
return (lvt & mask);
607
}
608
609
static __inline int
610
vlapic_periodic_timer(struct vlapic *vlapic)
611
{
612
uint32_t lvt;
613
614
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
615
616
return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
617
}
618
619
static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
620
621
static void
622
vlapic_set_error(struct vlapic *vlapic, uint32_t mask, bool lvt_error)
623
{
624
625
vlapic->esr_pending |= mask;
626
627
/*
628
* Avoid infinite recursion if the error LVT itself is configured with
629
* an illegal vector.
630
*/
631
if (lvt_error)
632
return;
633
634
if (vlapic_fire_lvt(vlapic, APIC_LVT_ERROR)) {
635
vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_ERROR, 1);
636
}
637
}
638
639
static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
640
641
static void
642
vlapic_fire_timer(struct vlapic *vlapic)
643
{
644
645
KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
646
647
if (vlapic_fire_lvt(vlapic, APIC_LVT_TIMER)) {
648
VLAPIC_CTR0(vlapic, "vlapic timer fired");
649
vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_TIMER, 1);
650
}
651
}
652
653
static VMM_STAT(VLAPIC_INTR_CMC,
654
"corrected machine check interrupts generated by vlapic");
655
656
void
657
vlapic_fire_cmci(struct vlapic *vlapic)
658
{
659
660
if (vlapic_fire_lvt(vlapic, APIC_LVT_CMCI)) {
661
vmm_stat_incr(vlapic->vcpu, VLAPIC_INTR_CMC, 1);
662
}
663
}
664
665
static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
666
"lvts triggered");
667
668
int
669
vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
670
{
671
672
if (vlapic_enabled(vlapic) == false) {
673
/*
674
* When the local APIC is global/hardware disabled,
675
* LINT[1:0] pins are configured as INTR and NMI pins,
676
* respectively.
677
*/
678
switch (vector) {
679
case APIC_LVT_LINT0:
680
vm_inject_extint(vlapic->vcpu);
681
break;
682
case APIC_LVT_LINT1:
683
vm_inject_nmi(vlapic->vcpu);
684
break;
685
default:
686
break;
687
}
688
return (0);
689
}
690
691
switch (vector) {
692
case APIC_LVT_LINT0:
693
case APIC_LVT_LINT1:
694
case APIC_LVT_TIMER:
695
case APIC_LVT_ERROR:
696
case APIC_LVT_PMC:
697
case APIC_LVT_THERMAL:
698
case APIC_LVT_CMCI:
699
if (vlapic_fire_lvt(vlapic, vector)) {
700
vmm_stat_array_incr(vlapic->vcpu, LVTS_TRIGGERRED,
701
vector, 1);
702
}
703
break;
704
default:
705
return (EINVAL);
706
}
707
return (0);
708
}
709
710
static void
711
vlapic_callout_reset(struct vlapic *vlapic, sbintime_t t)
712
{
713
callout_reset_sbt_curcpu(&vlapic->callout, t, 0,
714
vlapic_callout_handler, vlapic, 0);
715
}
716
717
static void
718
vlapic_callout_handler(void *arg)
719
{
720
struct vlapic *vlapic;
721
struct bintime bt, btnow;
722
sbintime_t rem_sbt;
723
724
vlapic = arg;
725
726
VLAPIC_TIMER_LOCK(vlapic);
727
if (callout_pending(&vlapic->callout)) /* callout was reset */
728
goto done;
729
730
if (!callout_active(&vlapic->callout)) /* callout was stopped */
731
goto done;
732
733
callout_deactivate(&vlapic->callout);
734
735
vlapic_fire_timer(vlapic);
736
737
if (vlapic_periodic_timer(vlapic)) {
738
binuptime(&btnow);
739
KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
740
("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx",
741
btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
742
vlapic->timer_fire_bt.frac));
743
744
/*
745
* Compute the delta between when the timer was supposed to
746
* fire and the present time.
747
*/
748
bt = btnow;
749
bintime_sub(&bt, &vlapic->timer_fire_bt);
750
751
rem_sbt = bttosbt(vlapic->timer_period_bt);
752
if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) {
753
/*
754
* Adjust the time until the next countdown downward
755
* to account for the lost time.
756
*/
757
rem_sbt -= bttosbt(bt);
758
} else {
759
/*
760
* If the delta is greater than the timer period then
761
* just reset our time base instead of trying to catch
762
* up.
763
*/
764
vlapic->timer_fire_bt = btnow;
765
VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
766
"usecs, period is %lu usecs - resetting time base",
767
bttosbt(bt) / SBT_1US,
768
bttosbt(vlapic->timer_period_bt) / SBT_1US);
769
}
770
771
bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
772
vlapic_callout_reset(vlapic, rem_sbt);
773
}
774
done:
775
VLAPIC_TIMER_UNLOCK(vlapic);
776
}
777
778
void
779
vlapic_icrtmr_write_handler(struct vlapic *vlapic)
780
{
781
struct LAPIC *lapic;
782
sbintime_t sbt;
783
uint32_t icr_timer;
784
785
VLAPIC_TIMER_LOCK(vlapic);
786
787
lapic = vlapic->apic_page;
788
icr_timer = lapic->icr_timer;
789
790
vlapic->timer_period_bt = vlapic->timer_freq_bt;
791
bintime_mul(&vlapic->timer_period_bt, icr_timer);
792
793
if (icr_timer != 0) {
794
binuptime(&vlapic->timer_fire_bt);
795
bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
796
797
sbt = bttosbt(vlapic->timer_period_bt);
798
vlapic_callout_reset(vlapic, sbt);
799
} else
800
callout_stop(&vlapic->callout);
801
802
VLAPIC_TIMER_UNLOCK(vlapic);
803
}
804
805
/*
806
* This function populates 'dmask' with the set of vcpus that match the
807
* addressing specified by the (dest, phys, lowprio) tuple.
808
*
809
* 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
810
* or xAPIC (8-bit) destination field.
811
*/
812
static void
813
vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
814
bool lowprio, bool x2apic_dest)
815
{
816
struct vlapic *vlapic;
817
uint32_t dfr, ldr, ldest, cluster;
818
uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
819
cpuset_t amask;
820
int vcpuid;
821
822
if ((x2apic_dest && dest == 0xffffffff) ||
823
(!x2apic_dest && dest == 0xff)) {
824
/*
825
* Broadcast in both logical and physical modes.
826
*/
827
*dmask = vm_active_cpus(vm);
828
return;
829
}
830
831
if (phys) {
832
/*
833
* Physical mode: destination is APIC ID.
834
*/
835
CPU_ZERO(dmask);
836
vcpuid = vm_apicid2vcpuid(vm, dest);
837
amask = vm_active_cpus(vm);
838
if (vcpuid < vm_get_maxcpus(vm) && CPU_ISSET(vcpuid, &amask))
839
CPU_SET(vcpuid, dmask);
840
} else {
841
/*
842
* In the "Flat Model" the MDA is interpreted as an 8-bit wide
843
* bitmask. This model is only available in the xAPIC mode.
844
*/
845
mda_flat_ldest = dest & 0xff;
846
847
/*
848
* In the "Cluster Model" the MDA is used to identify a
849
* specific cluster and a set of APICs in that cluster.
850
*/
851
if (x2apic_dest) {
852
mda_cluster_id = dest >> 16;
853
mda_cluster_ldest = dest & 0xffff;
854
} else {
855
mda_cluster_id = (dest >> 4) & 0xf;
856
mda_cluster_ldest = dest & 0xf;
857
}
858
859
/*
860
* Logical mode: match each APIC that has a bit set
861
* in its LDR that matches a bit in the ldest.
862
*/
863
CPU_ZERO(dmask);
864
amask = vm_active_cpus(vm);
865
CPU_FOREACH_ISSET(vcpuid, &amask) {
866
vlapic = vm_lapic(vm_vcpu(vm, vcpuid));
867
dfr = vlapic->apic_page->dfr;
868
ldr = vlapic->apic_page->ldr;
869
870
if ((dfr & APIC_DFR_MODEL_MASK) ==
871
APIC_DFR_MODEL_FLAT) {
872
ldest = ldr >> 24;
873
mda_ldest = mda_flat_ldest;
874
} else if ((dfr & APIC_DFR_MODEL_MASK) ==
875
APIC_DFR_MODEL_CLUSTER) {
876
if (x2apic(vlapic)) {
877
cluster = ldr >> 16;
878
ldest = ldr & 0xffff;
879
} else {
880
cluster = ldr >> 28;
881
ldest = (ldr >> 24) & 0xf;
882
}
883
if (cluster != mda_cluster_id)
884
continue;
885
mda_ldest = mda_cluster_ldest;
886
} else {
887
/*
888
* Guest has configured a bad logical
889
* model for this vcpu - skip it.
890
*/
891
VLAPIC_CTR1(vlapic, "vlapic has bad logical "
892
"model %x - cannot deliver interrupt", dfr);
893
continue;
894
}
895
896
if ((mda_ldest & ldest) != 0) {
897
CPU_SET(vcpuid, dmask);
898
if (lowprio)
899
break;
900
}
901
}
902
}
903
}
904
905
static VMM_STAT(VLAPIC_IPI_SEND, "ipis sent from vcpu");
906
static VMM_STAT(VLAPIC_IPI_RECV, "ipis received by vcpu");
907
908
static void
909
vlapic_set_tpr(struct vlapic *vlapic, uint8_t val)
910
{
911
struct LAPIC *lapic = vlapic->apic_page;
912
913
if (lapic->tpr != val) {
914
VLAPIC_CTR2(vlapic, "vlapic TPR changed from %#x to %#x",
915
lapic->tpr, val);
916
lapic->tpr = val;
917
vlapic_update_ppr(vlapic);
918
}
919
}
920
921
static uint8_t
922
vlapic_get_tpr(struct vlapic *vlapic)
923
{
924
struct LAPIC *lapic = vlapic->apic_page;
925
926
return (lapic->tpr);
927
}
928
929
void
930
vlapic_set_cr8(struct vlapic *vlapic, uint64_t val)
931
{
932
uint8_t tpr;
933
934
if (val & ~0xf) {
935
vm_inject_gp(vlapic->vcpu);
936
return;
937
}
938
939
tpr = val << 4;
940
vlapic_set_tpr(vlapic, tpr);
941
}
942
943
uint64_t
944
vlapic_get_cr8(struct vlapic *vlapic)
945
{
946
uint8_t tpr;
947
948
tpr = vlapic_get_tpr(vlapic);
949
return (tpr >> 4);
950
}
951
952
static bool
953
vlapic_is_icr_valid(uint64_t icrval)
954
{
955
uint32_t mode = icrval & APIC_DELMODE_MASK;
956
uint32_t level = icrval & APIC_LEVEL_MASK;
957
uint32_t trigger = icrval & APIC_TRIGMOD_MASK;
958
uint32_t shorthand = icrval & APIC_DEST_MASK;
959
960
switch (mode) {
961
case APIC_DELMODE_FIXED:
962
if (trigger == APIC_TRIGMOD_EDGE)
963
return (true);
964
/*
965
* AMD allows a level assert IPI and Intel converts a level
966
* assert IPI into an edge IPI.
967
*/
968
if (trigger == APIC_TRIGMOD_LEVEL && level == APIC_LEVEL_ASSERT)
969
return (true);
970
break;
971
case APIC_DELMODE_LOWPRIO:
972
case APIC_DELMODE_SMI:
973
case APIC_DELMODE_NMI:
974
case APIC_DELMODE_INIT:
975
if (trigger == APIC_TRIGMOD_EDGE &&
976
(shorthand == APIC_DEST_DESTFLD ||
977
shorthand == APIC_DEST_ALLESELF))
978
return (true);
979
/*
980
* AMD allows a level assert IPI and Intel converts a level
981
* assert IPI into an edge IPI.
982
*/
983
if (trigger == APIC_TRIGMOD_LEVEL &&
984
level == APIC_LEVEL_ASSERT &&
985
(shorthand == APIC_DEST_DESTFLD ||
986
shorthand == APIC_DEST_ALLESELF))
987
return (true);
988
/*
989
* An level triggered deassert INIT is defined in the Intel
990
* Multiprocessor Specification and the Intel Software Developer
991
* Manual. Due to the MPS it's required to send a level assert
992
* INIT to a cpu and then a level deassert INIT. Some operating
993
* systems e.g. FreeBSD or Linux use that algorithm. According
994
* to the SDM a level deassert INIT is only supported by Pentium
995
* and P6 processors. It's always send to all cpus regardless of
996
* the destination or shorthand field. It resets the arbitration
997
* id register. This register is not software accessible and
998
* only required for the APIC bus arbitration. So, the level
999
* deassert INIT doesn't need any emulation and we should ignore
1000
* it. The SDM also defines that newer processors don't support
1001
* the level deassert INIT and it's not valid any more. As it's
1002
* defined for older systems, it can't be invalid per se.
1003
* Otherwise, backward compatibility would be broken. However,
1004
* when returning false here, it'll be ignored which is the
1005
* desired behaviour.
1006
*/
1007
if (mode == APIC_DELMODE_INIT &&
1008
trigger == APIC_TRIGMOD_LEVEL &&
1009
level == APIC_LEVEL_DEASSERT)
1010
return (false);
1011
break;
1012
case APIC_DELMODE_STARTUP:
1013
if (shorthand == APIC_DEST_DESTFLD ||
1014
shorthand == APIC_DEST_ALLESELF)
1015
return (true);
1016
break;
1017
case APIC_DELMODE_RR:
1018
/* Only available on AMD! */
1019
if (trigger == APIC_TRIGMOD_EDGE &&
1020
shorthand == APIC_DEST_DESTFLD)
1021
return (true);
1022
break;
1023
case APIC_DELMODE_RESV:
1024
return (false);
1025
default:
1026
__assert_unreachable();
1027
}
1028
1029
return (false);
1030
}
1031
1032
int
1033
vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
1034
{
1035
int i;
1036
bool phys;
1037
cpuset_t dmask, ipimask;
1038
uint64_t icrval;
1039
uint32_t dest, vec, mode, shorthand;
1040
struct vcpu *vcpu;
1041
struct vm_exit *vmexit;
1042
struct LAPIC *lapic;
1043
1044
lapic = vlapic->apic_page;
1045
lapic->icr_lo &= ~APIC_DELSTAT_PEND;
1046
icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1047
1048
if (x2apic(vlapic))
1049
dest = icrval >> 32;
1050
else
1051
dest = icrval >> (32 + 24);
1052
vec = icrval & APIC_VECTOR_MASK;
1053
mode = icrval & APIC_DELMODE_MASK;
1054
phys = (icrval & APIC_DESTMODE_LOG) == 0;
1055
shorthand = icrval & APIC_DEST_MASK;
1056
1057
VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
1058
1059
switch (shorthand) {
1060
case APIC_DEST_DESTFLD:
1061
vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic));
1062
break;
1063
case APIC_DEST_SELF:
1064
CPU_SETOF(vlapic->vcpuid, &dmask);
1065
break;
1066
case APIC_DEST_ALLISELF:
1067
dmask = vm_active_cpus(vlapic->vm);
1068
break;
1069
case APIC_DEST_ALLESELF:
1070
dmask = vm_active_cpus(vlapic->vm);
1071
CPU_CLR(vlapic->vcpuid, &dmask);
1072
break;
1073
default:
1074
__assert_unreachable();
1075
}
1076
1077
/*
1078
* Ignore invalid combinations of the icr.
1079
*/
1080
if (!vlapic_is_icr_valid(icrval)) {
1081
VLAPIC_CTR1(vlapic, "Ignoring invalid ICR %016lx", icrval);
1082
return (0);
1083
}
1084
1085
/*
1086
* ipimask is a set of vCPUs needing userland handling of the current
1087
* IPI.
1088
*/
1089
CPU_ZERO(&ipimask);
1090
1091
switch (mode) {
1092
case APIC_DELMODE_FIXED:
1093
if (vec < 16) {
1094
vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR,
1095
false);
1096
VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
1097
return (0);
1098
}
1099
1100
CPU_FOREACH_ISSET(i, &dmask) {
1101
vcpu = vm_vcpu(vlapic->vm, i);
1102
lapic_intr_edge(vcpu, vec);
1103
vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1);
1104
vmm_stat_incr(vcpu, VLAPIC_IPI_RECV, 1);
1105
VLAPIC_CTR2(vlapic,
1106
"vlapic sending ipi %d to vcpuid %d", vec, i);
1107
}
1108
1109
break;
1110
case APIC_DELMODE_NMI:
1111
CPU_FOREACH_ISSET(i, &dmask) {
1112
vcpu = vm_vcpu(vlapic->vm, i);
1113
vm_inject_nmi(vcpu);
1114
VLAPIC_CTR1(vlapic,
1115
"vlapic sending ipi nmi to vcpuid %d", i);
1116
}
1117
1118
break;
1119
case APIC_DELMODE_INIT:
1120
case APIC_DELMODE_STARTUP:
1121
if (!vlapic->ipi_exit) {
1122
if (!phys)
1123
break;
1124
1125
i = vm_apicid2vcpuid(vlapic->vm, dest);
1126
if (i >= vm_get_maxcpus(vlapic->vm) ||
1127
i == vlapic->vcpuid)
1128
break;
1129
1130
CPU_SETOF(i, &ipimask);
1131
1132
break;
1133
}
1134
1135
CPU_COPY(&dmask, &ipimask);
1136
break;
1137
default:
1138
return (1);
1139
}
1140
1141
if (!CPU_EMPTY(&ipimask)) {
1142
vmexit = vm_exitinfo(vlapic->vcpu);
1143
vmexit->exitcode = VM_EXITCODE_IPI;
1144
vmexit->u.ipi.mode = mode;
1145
vmexit->u.ipi.vector = vec;
1146
*vm_exitinfo_cpuset(vlapic->vcpu) = ipimask;
1147
1148
*retu = true;
1149
}
1150
1151
return (0);
1152
}
1153
1154
static void
1155
vlapic_handle_init(struct vcpu *vcpu, void *arg)
1156
{
1157
struct vlapic *vlapic = vm_lapic(vcpu);
1158
1159
vlapic_reset(vlapic);
1160
}
1161
1162
int
1163
vm_handle_ipi(struct vcpu *vcpu, struct vm_exit *vme, bool *retu)
1164
{
1165
struct vlapic *vlapic = vm_lapic(vcpu);
1166
cpuset_t *dmask = vm_exitinfo_cpuset(vcpu);
1167
uint8_t vec = vme->u.ipi.vector;
1168
1169
*retu = true;
1170
switch (vme->u.ipi.mode) {
1171
case APIC_DELMODE_INIT: {
1172
cpuset_t active, reinit;
1173
1174
active = vm_active_cpus(vcpu_vm(vcpu));
1175
CPU_AND(&reinit, &active, dmask);
1176
if (!CPU_EMPTY(&reinit)) {
1177
vm_smp_rendezvous(vcpu, reinit, vlapic_handle_init,
1178
NULL);
1179
}
1180
vm_await_start(vcpu_vm(vcpu), dmask);
1181
1182
if (!vlapic->ipi_exit)
1183
*retu = false;
1184
1185
break;
1186
}
1187
case APIC_DELMODE_STARTUP:
1188
/*
1189
* Ignore SIPIs in any state other than wait-for-SIPI
1190
*/
1191
*dmask = vm_start_cpus(vcpu_vm(vcpu), dmask);
1192
1193
if (CPU_EMPTY(dmask)) {
1194
*retu = false;
1195
break;
1196
}
1197
1198
/*
1199
* Old bhyve versions don't support the IPI
1200
* exit. Translate it into the old style.
1201
*/
1202
if (!vlapic->ipi_exit) {
1203
vme->exitcode = VM_EXITCODE_SPINUP_AP;
1204
vme->u.spinup_ap.vcpu = CPU_FFS(dmask) - 1;
1205
vme->u.spinup_ap.rip = vec << PAGE_SHIFT;
1206
}
1207
1208
break;
1209
default:
1210
__assert_unreachable();
1211
}
1212
1213
return (0);
1214
}
1215
1216
void
1217
vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val)
1218
{
1219
int vec;
1220
1221
KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode"));
1222
1223
vec = val & 0xff;
1224
lapic_intr_edge(vlapic->vcpu, vec);
1225
vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_SEND, 1);
1226
vmm_stat_incr(vlapic->vcpu, VLAPIC_IPI_RECV, 1);
1227
VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec);
1228
}
1229
1230
int
1231
vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
1232
{
1233
struct LAPIC *lapic = vlapic->apic_page;
1234
int idx, i, bitpos, vector;
1235
uint32_t *irrptr, val;
1236
1237
vlapic_update_ppr(vlapic);
1238
1239
if (vlapic->ops.pending_intr)
1240
return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
1241
1242
irrptr = &lapic->irr0;
1243
1244
for (i = 7; i >= 0; i--) {
1245
idx = i * 4;
1246
val = atomic_load_acq_int(&irrptr[idx]);
1247
bitpos = fls(val);
1248
if (bitpos != 0) {
1249
vector = i * 32 + (bitpos - 1);
1250
if (PRIO(vector) > PRIO(lapic->ppr)) {
1251
VLAPIC_CTR1(vlapic, "pending intr %d", vector);
1252
if (vecptr != NULL)
1253
*vecptr = vector;
1254
return (1);
1255
} else
1256
break;
1257
}
1258
}
1259
return (0);
1260
}
1261
1262
void
1263
vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1264
{
1265
struct LAPIC *lapic = vlapic->apic_page;
1266
uint32_t *irrptr, *isrptr;
1267
int idx, stk_top;
1268
1269
if (vlapic->ops.intr_accepted)
1270
return ((*vlapic->ops.intr_accepted)(vlapic, vector));
1271
1272
/*
1273
* clear the ready bit for vector being accepted in irr
1274
* and set the vector as in service in isr.
1275
*/
1276
idx = (vector / 32) * 4;
1277
1278
irrptr = &lapic->irr0;
1279
atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1280
VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
1281
1282
isrptr = &lapic->isr0;
1283
isrptr[idx] |= 1 << (vector % 32);
1284
VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
1285
1286
/*
1287
* Update the PPR
1288
*/
1289
vlapic->isrvec_stk_top++;
1290
1291
stk_top = vlapic->isrvec_stk_top;
1292
if (stk_top >= ISRVEC_STK_SIZE)
1293
panic("isrvec_stk_top overflow %d", stk_top);
1294
1295
vlapic->isrvec_stk[stk_top] = vector;
1296
}
1297
1298
void
1299
vlapic_svr_write_handler(struct vlapic *vlapic)
1300
{
1301
struct LAPIC *lapic;
1302
uint32_t old, new, changed;
1303
1304
lapic = vlapic->apic_page;
1305
1306
new = lapic->svr;
1307
old = vlapic->svr_last;
1308
vlapic->svr_last = new;
1309
1310
changed = old ^ new;
1311
if ((changed & APIC_SVR_ENABLE) != 0) {
1312
if ((new & APIC_SVR_ENABLE) == 0) {
1313
/*
1314
* The apic is now disabled so stop the apic timer
1315
* and mask all the LVT entries.
1316
*/
1317
VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
1318
VLAPIC_TIMER_LOCK(vlapic);
1319
callout_stop(&vlapic->callout);
1320
VLAPIC_TIMER_UNLOCK(vlapic);
1321
vlapic_mask_lvts(vlapic);
1322
} else {
1323
/*
1324
* The apic is now enabled so restart the apic timer
1325
* if it is configured in periodic mode.
1326
*/
1327
VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
1328
if (vlapic_periodic_timer(vlapic))
1329
vlapic_icrtmr_write_handler(vlapic);
1330
}
1331
}
1332
}
1333
1334
int
1335
vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
1336
uint64_t *data, bool *retu)
1337
{
1338
struct LAPIC *lapic = vlapic->apic_page;
1339
uint32_t *reg;
1340
int i;
1341
1342
/* Ignore MMIO accesses in x2APIC mode */
1343
if (x2apic(vlapic) && mmio_access) {
1344
VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode",
1345
offset);
1346
*data = 0;
1347
goto done;
1348
}
1349
1350
if (!x2apic(vlapic) && !mmio_access) {
1351
/*
1352
* XXX Generate GP fault for MSR accesses in xAPIC mode
1353
*/
1354
VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in "
1355
"xAPIC mode", offset);
1356
*data = 0;
1357
goto done;
1358
}
1359
1360
if (offset > sizeof(*lapic)) {
1361
*data = 0;
1362
goto done;
1363
}
1364
1365
offset &= ~3;
1366
switch(offset)
1367
{
1368
case APIC_OFFSET_ID:
1369
*data = lapic->id;
1370
break;
1371
case APIC_OFFSET_VER:
1372
*data = lapic->version;
1373
break;
1374
case APIC_OFFSET_TPR:
1375
*data = vlapic_get_tpr(vlapic);
1376
break;
1377
case APIC_OFFSET_APR:
1378
*data = lapic->apr;
1379
break;
1380
case APIC_OFFSET_PPR:
1381
*data = lapic->ppr;
1382
break;
1383
case APIC_OFFSET_EOI:
1384
*data = lapic->eoi;
1385
break;
1386
case APIC_OFFSET_LDR:
1387
*data = lapic->ldr;
1388
break;
1389
case APIC_OFFSET_DFR:
1390
*data = lapic->dfr;
1391
break;
1392
case APIC_OFFSET_SVR:
1393
*data = lapic->svr;
1394
break;
1395
case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1396
i = (offset - APIC_OFFSET_ISR0) >> 2;
1397
reg = &lapic->isr0;
1398
*data = *(reg + i);
1399
break;
1400
case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1401
i = (offset - APIC_OFFSET_TMR0) >> 2;
1402
reg = &lapic->tmr0;
1403
*data = *(reg + i);
1404
break;
1405
case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1406
i = (offset - APIC_OFFSET_IRR0) >> 2;
1407
reg = &lapic->irr0;
1408
*data = atomic_load_acq_int(reg + i);
1409
break;
1410
case APIC_OFFSET_ESR:
1411
*data = lapic->esr;
1412
break;
1413
case APIC_OFFSET_ICR_LOW:
1414
*data = lapic->icr_lo;
1415
if (x2apic(vlapic))
1416
*data |= (uint64_t)lapic->icr_hi << 32;
1417
break;
1418
case APIC_OFFSET_ICR_HI:
1419
*data = lapic->icr_hi;
1420
break;
1421
case APIC_OFFSET_CMCI_LVT:
1422
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1423
*data = vlapic_get_lvt(vlapic, offset);
1424
#ifdef INVARIANTS
1425
reg = vlapic_get_lvtptr(vlapic, offset);
1426
KASSERT(*data == *reg, ("inconsistent lvt value at "
1427
"offset %#lx: %#lx/%#x", offset, *data, *reg));
1428
#endif
1429
break;
1430
case APIC_OFFSET_TIMER_ICR:
1431
*data = lapic->icr_timer;
1432
break;
1433
case APIC_OFFSET_TIMER_CCR:
1434
*data = vlapic_get_ccr(vlapic);
1435
break;
1436
case APIC_OFFSET_TIMER_DCR:
1437
*data = lapic->dcr_timer;
1438
break;
1439
case APIC_OFFSET_SELF_IPI:
1440
/*
1441
* XXX generate a GP fault if vlapic is in x2apic mode
1442
*/
1443
*data = 0;
1444
break;
1445
case APIC_OFFSET_RRR:
1446
default:
1447
*data = 0;
1448
break;
1449
}
1450
done:
1451
VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data);
1452
return 0;
1453
}
1454
1455
int
1456
vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
1457
uint64_t data, bool *retu)
1458
{
1459
struct LAPIC *lapic = vlapic->apic_page;
1460
uint32_t *regptr;
1461
int retval;
1462
1463
KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE,
1464
("vlapic_write: invalid offset %#lx", offset));
1465
1466
VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx",
1467
offset, data);
1468
1469
if (offset > sizeof(*lapic))
1470
return (0);
1471
1472
/* Ignore MMIO accesses in x2APIC mode */
1473
if (x2apic(vlapic) && mmio_access) {
1474
VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx "
1475
"in x2APIC mode", data, offset);
1476
return (0);
1477
}
1478
1479
/*
1480
* XXX Generate GP fault for MSR accesses in xAPIC mode
1481
*/
1482
if (!x2apic(vlapic) && !mmio_access) {
1483
VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx "
1484
"in xAPIC mode", data, offset);
1485
return (0);
1486
}
1487
1488
retval = 0;
1489
switch(offset)
1490
{
1491
case APIC_OFFSET_ID:
1492
lapic->id = data;
1493
vlapic_id_write_handler(vlapic);
1494
break;
1495
case APIC_OFFSET_TPR:
1496
vlapic_set_tpr(vlapic, data & 0xff);
1497
break;
1498
case APIC_OFFSET_EOI:
1499
vlapic_process_eoi(vlapic);
1500
break;
1501
case APIC_OFFSET_LDR:
1502
lapic->ldr = data;
1503
vlapic_ldr_write_handler(vlapic);
1504
break;
1505
case APIC_OFFSET_DFR:
1506
lapic->dfr = data;
1507
vlapic_dfr_write_handler(vlapic);
1508
break;
1509
case APIC_OFFSET_SVR:
1510
lapic->svr = data;
1511
vlapic_svr_write_handler(vlapic);
1512
break;
1513
case APIC_OFFSET_ICR_LOW:
1514
lapic->icr_lo = data;
1515
if (x2apic(vlapic))
1516
lapic->icr_hi = data >> 32;
1517
retval = vlapic_icrlo_write_handler(vlapic, retu);
1518
break;
1519
case APIC_OFFSET_ICR_HI:
1520
lapic->icr_hi = data;
1521
break;
1522
case APIC_OFFSET_CMCI_LVT:
1523
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1524
regptr = vlapic_get_lvtptr(vlapic, offset);
1525
*regptr = data;
1526
vlapic_lvt_write_handler(vlapic, offset);
1527
break;
1528
case APIC_OFFSET_TIMER_ICR:
1529
lapic->icr_timer = data;
1530
vlapic_icrtmr_write_handler(vlapic);
1531
break;
1532
1533
case APIC_OFFSET_TIMER_DCR:
1534
lapic->dcr_timer = data;
1535
vlapic_dcr_write_handler(vlapic);
1536
break;
1537
1538
case APIC_OFFSET_ESR:
1539
vlapic_esr_write_handler(vlapic);
1540
break;
1541
1542
case APIC_OFFSET_SELF_IPI:
1543
if (x2apic(vlapic))
1544
vlapic_self_ipi_handler(vlapic, data);
1545
break;
1546
1547
case APIC_OFFSET_VER:
1548
case APIC_OFFSET_APR:
1549
case APIC_OFFSET_PPR:
1550
case APIC_OFFSET_RRR:
1551
case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1552
case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1553
case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1554
case APIC_OFFSET_TIMER_CCR:
1555
default:
1556
// Read only.
1557
break;
1558
}
1559
1560
return (retval);
1561
}
1562
1563
static void
1564
vlapic_reset(struct vlapic *vlapic)
1565
{
1566
struct LAPIC *lapic;
1567
1568
lapic = vlapic->apic_page;
1569
bzero(lapic, sizeof(struct LAPIC));
1570
1571
lapic->id = vlapic_get_id(vlapic);
1572
lapic->version = VLAPIC_VERSION;
1573
lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
1574
lapic->dfr = 0xffffffff;
1575
lapic->svr = APIC_SVR_VECTOR;
1576
vlapic_mask_lvts(vlapic);
1577
vlapic_reset_tmr(vlapic);
1578
1579
lapic->dcr_timer = 0;
1580
vlapic_dcr_write_handler(vlapic);
1581
1582
vlapic->svr_last = lapic->svr;
1583
}
1584
1585
void
1586
vlapic_init(struct vlapic *vlapic)
1587
{
1588
KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1589
KASSERT(vlapic->vcpuid >= 0 &&
1590
vlapic->vcpuid < vm_get_maxcpus(vlapic->vm),
1591
("vlapic_init: vcpuid is not initialized"));
1592
KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1593
"initialized"));
1594
1595
/*
1596
* If the vlapic is configured in x2apic mode then it will be
1597
* accessed in the critical section via the MSR emulation code.
1598
*
1599
* Therefore the timer mutex must be a spinlock because blockable
1600
* mutexes cannot be acquired in a critical section.
1601
*/
1602
mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
1603
callout_init(&vlapic->callout, 1);
1604
1605
vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1606
1607
if (vlapic->vcpuid == 0)
1608
vlapic->msr_apicbase |= APICBASE_BSP;
1609
1610
vlapic->ipi_exit = false;
1611
1612
vlapic_reset(vlapic);
1613
}
1614
1615
void
1616
vlapic_cleanup(struct vlapic *vlapic)
1617
{
1618
1619
callout_drain(&vlapic->callout);
1620
mtx_destroy(&vlapic->timer_mtx);
1621
}
1622
1623
uint64_t
1624
vlapic_get_apicbase(struct vlapic *vlapic)
1625
{
1626
1627
return (vlapic->msr_apicbase);
1628
}
1629
1630
int
1631
vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new)
1632
{
1633
1634
if (vlapic->msr_apicbase != new) {
1635
VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx "
1636
"not supported", vlapic->msr_apicbase, new);
1637
return (-1);
1638
}
1639
1640
return (0);
1641
}
1642
1643
void
1644
vlapic_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state)
1645
{
1646
struct vlapic *vlapic;
1647
struct LAPIC *lapic;
1648
1649
vlapic = vm_lapic(vcpu);
1650
1651
if (state == X2APIC_DISABLED)
1652
vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1653
else
1654
vlapic->msr_apicbase |= APICBASE_X2APIC;
1655
1656
/*
1657
* Reset the local APIC registers whose values are mode-dependent.
1658
*
1659
* XXX this works because the APIC mode can be changed only at vcpu
1660
* initialization time.
1661
*/
1662
lapic = vlapic->apic_page;
1663
lapic->id = vlapic_get_id(vlapic);
1664
if (x2apic(vlapic)) {
1665
lapic->ldr = x2apic_ldr(vlapic);
1666
lapic->dfr = 0;
1667
} else {
1668
lapic->ldr = 0;
1669
lapic->dfr = 0xffffffff;
1670
}
1671
1672
if (state == X2APIC_ENABLED) {
1673
if (vlapic->ops.enable_x2apic_mode)
1674
(*vlapic->ops.enable_x2apic_mode)(vlapic);
1675
}
1676
}
1677
1678
void
1679
vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1680
int delmode, int vec)
1681
{
1682
struct vcpu *vcpu;
1683
bool lowprio;
1684
int vcpuid;
1685
cpuset_t dmask;
1686
1687
if (delmode != IOART_DELFIXED &&
1688
delmode != IOART_DELLOPRI &&
1689
delmode != IOART_DELEXINT) {
1690
VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode);
1691
return;
1692
}
1693
lowprio = (delmode == IOART_DELLOPRI);
1694
1695
/*
1696
* We don't provide any virtual interrupt redirection hardware so
1697
* all interrupts originating from the ioapic or MSI specify the
1698
* 'dest' in the legacy xAPIC format.
1699
*/
1700
vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1701
1702
CPU_FOREACH_ISSET(vcpuid, &dmask) {
1703
vcpu = vm_vcpu(vm, vcpuid);
1704
if (delmode == IOART_DELEXINT) {
1705
vm_inject_extint(vcpu);
1706
} else {
1707
lapic_set_intr(vcpu, vec, level);
1708
}
1709
}
1710
}
1711
1712
void
1713
vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum)
1714
{
1715
/*
1716
* Post an interrupt to the vcpu currently running on 'hostcpu'.
1717
*
1718
* This is done by leveraging features like Posted Interrupts (Intel)
1719
* Doorbell MSR (AMD AVIC) that avoid a VM exit.
1720
*
1721
* If neither of these features are available then fallback to
1722
* sending an IPI to 'hostcpu'.
1723
*/
1724
if (vlapic->ops.post_intr)
1725
(*vlapic->ops.post_intr)(vlapic, hostcpu);
1726
else
1727
ipi_cpu(hostcpu, ipinum);
1728
}
1729
1730
bool
1731
vlapic_enabled(struct vlapic *vlapic)
1732
{
1733
struct LAPIC *lapic = vlapic->apic_page;
1734
1735
if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
1736
(lapic->svr & APIC_SVR_ENABLE) != 0)
1737
return (true);
1738
else
1739
return (false);
1740
}
1741
1742
static void
1743
vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
1744
{
1745
struct LAPIC *lapic;
1746
uint32_t *tmrptr, mask;
1747
int idx;
1748
1749
lapic = vlapic->apic_page;
1750
tmrptr = &lapic->tmr0;
1751
idx = (vector / 32) * 4;
1752
mask = 1 << (vector % 32);
1753
if (level)
1754
tmrptr[idx] |= mask;
1755
else
1756
tmrptr[idx] &= ~mask;
1757
1758
if (vlapic->ops.set_tmr != NULL)
1759
(*vlapic->ops.set_tmr)(vlapic, vector, level);
1760
}
1761
1762
void
1763
vlapic_reset_tmr(struct vlapic *vlapic)
1764
{
1765
int vector;
1766
1767
VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
1768
1769
for (vector = 0; vector <= 255; vector++)
1770
vlapic_set_tmr(vlapic, vector, false);
1771
}
1772
1773
void
1774
vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
1775
int delmode, int vector)
1776
{
1777
cpuset_t dmask;
1778
bool lowprio;
1779
1780
KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
1781
1782
/*
1783
* A level trigger is valid only for fixed and lowprio delivery modes.
1784
*/
1785
if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
1786
VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
1787
"delivery-mode %d", delmode);
1788
return;
1789
}
1790
1791
lowprio = (delmode == APIC_DELMODE_LOWPRIO);
1792
vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
1793
1794
if (!CPU_ISSET(vlapic->vcpuid, &dmask))
1795
return;
1796
1797
VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
1798
vlapic_set_tmr(vlapic, vector, true);
1799
}
1800
1801
#ifdef BHYVE_SNAPSHOT
1802
static void
1803
vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr)
1804
{
1805
/* The implementation is similar to the one in the
1806
* `vlapic_icrtmr_write_handler` function
1807
*/
1808
sbintime_t sbt;
1809
struct bintime bt;
1810
1811
VLAPIC_TIMER_LOCK(vlapic);
1812
1813
bt = vlapic->timer_freq_bt;
1814
bintime_mul(&bt, ccr);
1815
1816
if (ccr != 0) {
1817
binuptime(&vlapic->timer_fire_bt);
1818
bintime_add(&vlapic->timer_fire_bt, &bt);
1819
1820
sbt = bttosbt(bt);
1821
vlapic_callout_reset(vlapic, sbt);
1822
} else {
1823
/* even if the CCR was 0, periodic timers should be reset */
1824
if (vlapic_periodic_timer(vlapic)) {
1825
binuptime(&vlapic->timer_fire_bt);
1826
bintime_add(&vlapic->timer_fire_bt,
1827
&vlapic->timer_period_bt);
1828
sbt = bttosbt(vlapic->timer_period_bt);
1829
1830
callout_stop(&vlapic->callout);
1831
vlapic_callout_reset(vlapic, sbt);
1832
}
1833
}
1834
1835
VLAPIC_TIMER_UNLOCK(vlapic);
1836
}
1837
1838
int
1839
vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta)
1840
{
1841
int ret;
1842
struct vcpu *vcpu;
1843
struct vlapic *vlapic;
1844
struct LAPIC *lapic;
1845
uint32_t ccr;
1846
uint16_t i, maxcpus;
1847
1848
KASSERT(vm != NULL, ("%s: arg was NULL", __func__));
1849
1850
ret = 0;
1851
1852
maxcpus = vm_get_maxcpus(vm);
1853
for (i = 0; i < maxcpus; i++) {
1854
vcpu = vm_vcpu(vm, i);
1855
if (vcpu == NULL)
1856
continue;
1857
vlapic = vm_lapic(vcpu);
1858
1859
/* snapshot the page first; timer period depends on icr_timer */
1860
lapic = vlapic->apic_page;
1861
SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done);
1862
1863
SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done);
1864
1865
SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec,
1866
meta, ret, done);
1867
SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac,
1868
meta, ret, done);
1869
1870
/*
1871
* Timer period is equal to 'icr_timer' ticks at a frequency of
1872
* 'timer_freq_bt'.
1873
*/
1874
if (meta->op == VM_SNAPSHOT_RESTORE) {
1875
vlapic->timer_period_bt = vlapic->timer_freq_bt;
1876
bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
1877
}
1878
1879
SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk,
1880
sizeof(vlapic->isrvec_stk),
1881
meta, ret, done);
1882
SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done);
1883
1884
SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last,
1885
sizeof(vlapic->lvt_last),
1886
meta, ret, done);
1887
1888
if (meta->op == VM_SNAPSHOT_SAVE)
1889
ccr = vlapic_get_ccr(vlapic);
1890
1891
SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done);
1892
1893
if (meta->op == VM_SNAPSHOT_RESTORE &&
1894
vlapic_enabled(vlapic) && lapic->icr_timer != 0) {
1895
/* Reset the value of the 'timer_fire_bt' and the vlapic
1896
* callout based on the value of the current count
1897
* register saved when the VM snapshot was created.
1898
* If initial count register is 0, timer is not used.
1899
* Look at "10.5.4 APIC Timer" in Software Developer Manual.
1900
*/
1901
vlapic_reset_callout(vlapic, ccr);
1902
}
1903
}
1904
1905
done:
1906
return (ret);
1907
}
1908
#endif
1909
1910