Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kvm/lapic.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
3
/*
4
* Local APIC virtualization
5
*
6
* Copyright (C) 2006 Qumranet, Inc.
7
* Copyright (C) 2007 Novell
8
* Copyright (C) 2007 Intel
9
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
10
*
11
* Authors:
12
* Dor Laor <[email protected]>
13
* Gregory Haskins <[email protected]>
14
* Yaozu (Eddie) Dong <[email protected]>
15
*
16
* Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
17
*/
18
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20
#include <linux/kvm_host.h>
21
#include <linux/kvm.h>
22
#include <linux/mm.h>
23
#include <linux/highmem.h>
24
#include <linux/smp.h>
25
#include <linux/hrtimer.h>
26
#include <linux/io.h>
27
#include <linux/export.h>
28
#include <linux/math64.h>
29
#include <linux/slab.h>
30
#include <asm/apic.h>
31
#include <asm/processor.h>
32
#include <asm/mce.h>
33
#include <asm/msr.h>
34
#include <asm/page.h>
35
#include <asm/current.h>
36
#include <asm/apicdef.h>
37
#include <asm/delay.h>
38
#include <linux/atomic.h>
39
#include <linux/jump_label.h>
40
#include "kvm_cache_regs.h"
41
#include "irq.h"
42
#include "ioapic.h"
43
#include "trace.h"
44
#include "x86.h"
45
#include "xen.h"
46
#include "cpuid.h"
47
#include "hyperv.h"
48
#include "smm.h"
49
50
#ifndef CONFIG_X86_64
51
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
52
#else
53
#define mod_64(x, y) ((x) % (y))
54
#endif
55
56
/* 14 is the version for Xeon and Pentium 8.4.8*/
57
#define APIC_VERSION 0x14UL
58
#define LAPIC_MMIO_LENGTH (1 << 12)
59
60
/*
61
* Enable local APIC timer advancement (tscdeadline mode only) with adaptive
62
* tuning. When enabled, KVM programs the host timer event to fire early, i.e.
63
* before the deadline expires, to account for the delay between taking the
64
* VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume
65
* the guest, i.e. so that the interrupt arrives in the guest with minimal
66
* latency relative to the deadline programmed by the guest.
67
*/
68
static bool lapic_timer_advance __read_mostly = true;
69
module_param(lapic_timer_advance, bool, 0444);
70
71
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
72
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
73
#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
74
#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
75
/* step-by-step approximation to mitigate fluctuation */
76
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
77
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data);
78
static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data);
79
80
static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
81
{
82
apic_set_reg(apic->regs, reg_off, val);
83
}
84
85
static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg)
86
{
87
return apic_get_reg64(apic->regs, reg);
88
}
89
90
static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic,
91
int reg, u64 val)
92
{
93
apic_set_reg64(apic->regs, reg, val);
94
}
95
96
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
97
{
98
struct kvm_lapic *apic = vcpu->arch.apic;
99
100
return apic_test_vector(vector, apic->regs + APIC_ISR) ||
101
apic_test_vector(vector, apic->regs + APIC_IRR);
102
}
103
104
__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
105
EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
106
107
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
108
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
109
110
static inline int apic_enabled(struct kvm_lapic *apic)
111
{
112
return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
113
}
114
115
#define LVT_MASK \
116
(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
117
118
#define LINT_MASK \
119
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
120
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
121
122
static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
123
{
124
return apic->vcpu->vcpu_id;
125
}
126
127
static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
128
{
129
return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
130
(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
131
}
132
133
bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
134
{
135
return kvm_x86_ops.set_hv_timer
136
&& !(kvm_mwait_in_guest(vcpu->kvm) ||
137
kvm_can_post_timer_interrupt(vcpu));
138
}
139
140
static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
141
{
142
return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
143
}
144
145
static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
146
{
147
return ((id >> 4) << 16) | (1 << (id & 0xf));
148
}
149
150
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
151
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
152
switch (map->logical_mode) {
153
case KVM_APIC_MODE_SW_DISABLED:
154
/* Arbitrarily use the flat map so that @cluster isn't NULL. */
155
*cluster = map->xapic_flat_map;
156
*mask = 0;
157
return true;
158
case KVM_APIC_MODE_X2APIC: {
159
u32 offset = (dest_id >> 16) * 16;
160
u32 max_apic_id = map->max_apic_id;
161
162
if (offset <= max_apic_id) {
163
u8 cluster_size = min(max_apic_id - offset + 1, 16U);
164
165
offset = array_index_nospec(offset, map->max_apic_id + 1);
166
*cluster = &map->phys_map[offset];
167
*mask = dest_id & (0xffff >> (16 - cluster_size));
168
} else {
169
*mask = 0;
170
}
171
172
return true;
173
}
174
case KVM_APIC_MODE_XAPIC_FLAT:
175
*cluster = map->xapic_flat_map;
176
*mask = dest_id & 0xff;
177
return true;
178
case KVM_APIC_MODE_XAPIC_CLUSTER:
179
*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
180
*mask = dest_id & 0xf;
181
return true;
182
case KVM_APIC_MODE_MAP_DISABLED:
183
return false;
184
default:
185
WARN_ON_ONCE(1);
186
return false;
187
}
188
}
189
190
static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
191
struct kvm_vcpu *vcpu,
192
bool *xapic_id_mismatch)
193
{
194
struct kvm_lapic *apic = vcpu->arch.apic;
195
u32 x2apic_id = kvm_x2apic_id(apic);
196
u32 xapic_id = kvm_xapic_id(apic);
197
u32 physical_id;
198
199
/*
200
* For simplicity, KVM always allocates enough space for all possible
201
* xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on
202
* without the optimized map.
203
*/
204
if (WARN_ON_ONCE(xapic_id > new->max_apic_id))
205
return -EINVAL;
206
207
/*
208
* Bail if a vCPU was added and/or enabled its APIC between allocating
209
* the map and doing the actual calculations for the map. Note, KVM
210
* hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if
211
* the compiler decides to reload x2apic_id after this check.
212
*/
213
if (x2apic_id > new->max_apic_id)
214
return -E2BIG;
215
216
/*
217
* Deliberately truncate the vCPU ID when detecting a mismatched APIC
218
* ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a
219
* 32-bit value. Any unwanted aliasing due to truncation results will
220
* be detected below.
221
*/
222
if (!apic_x2apic_mode(apic) && xapic_id != (u8)vcpu->vcpu_id)
223
*xapic_id_mismatch = true;
224
225
/*
226
* Apply KVM's hotplug hack if userspace has enable 32-bit APIC IDs.
227
* Allow sending events to vCPUs by their x2APIC ID even if the target
228
* vCPU is in legacy xAPIC mode, and silently ignore aliased xAPIC IDs
229
* (the x2APIC ID is truncated to 8 bits, causing IDs > 0xff to wrap
230
* and collide).
231
*
232
* Honor the architectural (and KVM's non-optimized) behavior if
233
* userspace has not enabled 32-bit x2APIC IDs. Each APIC is supposed
234
* to process messages independently. If multiple vCPUs have the same
235
* effective APIC ID, e.g. due to the x2APIC wrap or because the guest
236
* manually modified its xAPIC IDs, events targeting that ID are
237
* supposed to be recognized by all vCPUs with said ID.
238
*/
239
if (vcpu->kvm->arch.x2apic_format) {
240
/* See also kvm_apic_match_physical_addr(). */
241
if (apic_x2apic_mode(apic) || x2apic_id > 0xff)
242
new->phys_map[x2apic_id] = apic;
243
244
if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
245
new->phys_map[xapic_id] = apic;
246
} else {
247
/*
248
* Disable the optimized map if the physical APIC ID is already
249
* mapped, i.e. is aliased to multiple vCPUs. The optimized
250
* map requires a strict 1:1 mapping between IDs and vCPUs.
251
*/
252
if (apic_x2apic_mode(apic))
253
physical_id = x2apic_id;
254
else
255
physical_id = xapic_id;
256
257
if (new->phys_map[physical_id])
258
return -EINVAL;
259
260
new->phys_map[physical_id] = apic;
261
}
262
263
return 0;
264
}
265
266
static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
267
struct kvm_vcpu *vcpu)
268
{
269
struct kvm_lapic *apic = vcpu->arch.apic;
270
enum kvm_apic_logical_mode logical_mode;
271
struct kvm_lapic **cluster;
272
u16 mask;
273
u32 ldr;
274
275
if (new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
276
return;
277
278
if (!kvm_apic_sw_enabled(apic))
279
return;
280
281
ldr = kvm_lapic_get_reg(apic, APIC_LDR);
282
if (!ldr)
283
return;
284
285
if (apic_x2apic_mode(apic)) {
286
logical_mode = KVM_APIC_MODE_X2APIC;
287
} else {
288
ldr = GET_APIC_LOGICAL_ID(ldr);
289
if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
290
logical_mode = KVM_APIC_MODE_XAPIC_FLAT;
291
else
292
logical_mode = KVM_APIC_MODE_XAPIC_CLUSTER;
293
}
294
295
/*
296
* To optimize logical mode delivery, all software-enabled APICs must
297
* be configured for the same mode.
298
*/
299
if (new->logical_mode == KVM_APIC_MODE_SW_DISABLED) {
300
new->logical_mode = logical_mode;
301
} else if (new->logical_mode != logical_mode) {
302
new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
303
return;
304
}
305
306
/*
307
* In x2APIC mode, the LDR is read-only and derived directly from the
308
* x2APIC ID, thus is guaranteed to be addressable. KVM reuses
309
* kvm_apic_map.phys_map to optimize logical mode x2APIC interrupts by
310
* reversing the LDR calculation to get cluster of APICs, i.e. no
311
* additional work is required.
312
*/
313
if (apic_x2apic_mode(apic))
314
return;
315
316
if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
317
&cluster, &mask))) {
318
new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
319
return;
320
}
321
322
if (!mask)
323
return;
324
325
ldr = ffs(mask) - 1;
326
if (!is_power_of_2(mask) || cluster[ldr])
327
new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
328
else
329
cluster[ldr] = apic;
330
}
331
332
/*
333
* CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
334
*
335
* DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
336
* apic_map_lock_held.
337
*/
338
enum {
339
CLEAN,
340
UPDATE_IN_PROGRESS,
341
DIRTY
342
};
343
344
static void kvm_recalculate_apic_map(struct kvm *kvm)
345
{
346
struct kvm_apic_map *new, *old = NULL;
347
struct kvm_vcpu *vcpu;
348
unsigned long i;
349
u32 max_id = 255; /* enough space for any xAPIC ID */
350
bool xapic_id_mismatch;
351
int r;
352
353
/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
354
if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
355
return;
356
357
WARN_ONCE(!irqchip_in_kernel(kvm),
358
"Dirty APIC map without an in-kernel local APIC");
359
360
mutex_lock(&kvm->arch.apic_map_lock);
361
362
retry:
363
/*
364
* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
365
* or the APIC registers (if dirty). Note, on retry the map may have
366
* not yet been marked dirty by whatever task changed a vCPU's x2APIC
367
* ID, i.e. the map may still show up as in-progress. In that case
368
* this task still needs to retry and complete its calculation.
369
*/
370
if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
371
DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
372
/* Someone else has updated the map. */
373
mutex_unlock(&kvm->arch.apic_map_lock);
374
return;
375
}
376
377
/*
378
* Reset the mismatch flag between attempts so that KVM does the right
379
* thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
380
* keep max_id strictly increasing. Disallowing max_id from shrinking
381
* ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
382
* with the highest x2APIC ID is toggling its APIC on and off.
383
*/
384
xapic_id_mismatch = false;
385
386
kvm_for_each_vcpu(i, vcpu, kvm)
387
if (kvm_apic_present(vcpu))
388
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
389
390
new = kvzalloc(sizeof(struct kvm_apic_map) +
391
sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
392
GFP_KERNEL_ACCOUNT);
393
394
if (!new)
395
goto out;
396
397
new->max_apic_id = max_id;
398
new->logical_mode = KVM_APIC_MODE_SW_DISABLED;
399
400
kvm_for_each_vcpu(i, vcpu, kvm) {
401
if (!kvm_apic_present(vcpu))
402
continue;
403
404
r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
405
if (r) {
406
kvfree(new);
407
new = NULL;
408
if (r == -E2BIG) {
409
cond_resched();
410
goto retry;
411
}
412
413
goto out;
414
}
415
416
kvm_recalculate_logical_map(new, vcpu);
417
}
418
out:
419
/*
420
* The optimized map is effectively KVM's internal version of APICv,
421
* and all unwanted aliasing that results in disabling the optimized
422
* map also applies to APICv.
423
*/
424
if (!new)
425
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
426
else
427
kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
428
429
if (!new || new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
430
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
431
else
432
kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
433
434
if (xapic_id_mismatch)
435
kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
436
else
437
kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
438
439
old = rcu_dereference_protected(kvm->arch.apic_map,
440
lockdep_is_held(&kvm->arch.apic_map_lock));
441
rcu_assign_pointer(kvm->arch.apic_map, new);
442
/*
443
* Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
444
* If another update has come in, leave it DIRTY.
445
*/
446
atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
447
UPDATE_IN_PROGRESS, CLEAN);
448
mutex_unlock(&kvm->arch.apic_map_lock);
449
450
if (old)
451
kvfree_rcu(old, rcu);
452
453
kvm_make_scan_ioapic_request(kvm);
454
}
455
456
static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
457
{
458
bool enabled = val & APIC_SPIV_APIC_ENABLED;
459
460
kvm_lapic_set_reg(apic, APIC_SPIV, val);
461
462
if (enabled != apic->sw_enabled) {
463
apic->sw_enabled = enabled;
464
if (enabled)
465
static_branch_slow_dec_deferred(&apic_sw_disabled);
466
else
467
static_branch_inc(&apic_sw_disabled.key);
468
469
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
470
}
471
472
/* Check if there are APF page ready requests pending */
473
if (enabled) {
474
kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
475
kvm_xen_sw_enable_lapic(apic->vcpu);
476
}
477
}
478
479
static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
480
{
481
kvm_lapic_set_reg(apic, APIC_ID, id << 24);
482
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
483
}
484
485
static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
486
{
487
kvm_lapic_set_reg(apic, APIC_LDR, id);
488
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
489
}
490
491
static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
492
{
493
kvm_lapic_set_reg(apic, APIC_DFR, val);
494
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
495
}
496
497
static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
498
{
499
u32 ldr = kvm_apic_calc_x2apic_ldr(id);
500
501
WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
502
503
kvm_lapic_set_reg(apic, APIC_ID, id);
504
kvm_lapic_set_reg(apic, APIC_LDR, ldr);
505
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
506
}
507
508
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
509
{
510
return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
511
}
512
513
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
514
{
515
return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
516
}
517
518
static inline int apic_lvtt_period(struct kvm_lapic *apic)
519
{
520
return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
521
}
522
523
static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
524
{
525
return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
526
}
527
528
static inline int apic_lvt_nmi_mode(u32 lvt_val)
529
{
530
return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
531
}
532
533
static inline bool kvm_lapic_lvt_supported(struct kvm_lapic *apic, int lvt_index)
534
{
535
return apic->nr_lvt_entries > lvt_index;
536
}
537
538
static inline int kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu *vcpu)
539
{
540
return KVM_APIC_MAX_NR_LVT_ENTRIES - !(vcpu->arch.mcg_cap & MCG_CMCI_P);
541
}
542
543
void kvm_apic_set_version(struct kvm_vcpu *vcpu)
544
{
545
struct kvm_lapic *apic = vcpu->arch.apic;
546
u32 v = 0;
547
548
if (!lapic_in_kernel(vcpu))
549
return;
550
551
v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16);
552
553
/*
554
* KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
555
* which doesn't have EOI register; Some buggy OSes (e.g. Windows with
556
* Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
557
* version first and level-triggered interrupts never get EOIed in
558
* IOAPIC.
559
*/
560
if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) &&
561
!ioapic_in_kernel(vcpu->kvm))
562
v |= APIC_LVR_DIRECTED_EOI;
563
kvm_lapic_set_reg(apic, APIC_LVR, v);
564
}
565
566
void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu)
567
{
568
int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
569
struct kvm_lapic *apic = vcpu->arch.apic;
570
int i;
571
572
if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries)
573
return;
574
575
/* Initialize/mask any "new" LVT entries. */
576
for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++)
577
kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
578
579
apic->nr_lvt_entries = nr_lvt_entries;
580
581
/* The number of LVT entries is reflected in the version register. */
582
kvm_apic_set_version(vcpu);
583
}
584
585
static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = {
586
[LVT_TIMER] = LVT_MASK, /* timer mode mask added at runtime */
587
[LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK,
588
[LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK,
589
[LVT_LINT0] = LINT_MASK,
590
[LVT_LINT1] = LINT_MASK,
591
[LVT_ERROR] = LVT_MASK,
592
[LVT_CMCI] = LVT_MASK | APIC_MODE_MASK
593
};
594
595
static u8 count_vectors(void *bitmap)
596
{
597
int vec;
598
u32 *reg;
599
u8 count = 0;
600
601
for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
602
reg = bitmap + APIC_VECTOR_TO_REG_OFFSET(vec);
603
count += hweight32(*reg);
604
}
605
606
return count;
607
}
608
609
bool __kvm_apic_update_irr(unsigned long *pir, void *regs, int *max_irr)
610
{
611
unsigned long pir_vals[NR_PIR_WORDS];
612
u32 *__pir = (void *)pir_vals;
613
u32 i, vec;
614
u32 irr_val, prev_irr_val;
615
int max_updated_irr;
616
617
max_updated_irr = -1;
618
*max_irr = -1;
619
620
if (!pi_harvest_pir(pir, pir_vals))
621
return false;
622
623
for (i = vec = 0; i <= 7; i++, vec += 32) {
624
u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
625
626
irr_val = READ_ONCE(*p_irr);
627
628
if (__pir[i]) {
629
prev_irr_val = irr_val;
630
do {
631
irr_val = prev_irr_val | __pir[i];
632
} while (prev_irr_val != irr_val &&
633
!try_cmpxchg(p_irr, &prev_irr_val, irr_val));
634
635
if (prev_irr_val != irr_val)
636
max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec;
637
}
638
if (irr_val)
639
*max_irr = __fls(irr_val) + vec;
640
}
641
642
return ((max_updated_irr != -1) &&
643
(max_updated_irr == *max_irr));
644
}
645
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
646
647
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, unsigned long *pir, int *max_irr)
648
{
649
struct kvm_lapic *apic = vcpu->arch.apic;
650
bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
651
652
if (unlikely(!apic->apicv_active && irr_updated))
653
apic->irr_pending = true;
654
return irr_updated;
655
}
656
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
657
658
static inline int apic_search_irr(struct kvm_lapic *apic)
659
{
660
return apic_find_highest_vector(apic->regs + APIC_IRR);
661
}
662
663
static inline int apic_find_highest_irr(struct kvm_lapic *apic)
664
{
665
int result;
666
667
/*
668
* Note that irr_pending is just a hint. It will be always
669
* true with virtual interrupt delivery enabled.
670
*/
671
if (!apic->irr_pending)
672
return -1;
673
674
result = apic_search_irr(apic);
675
ASSERT(result == -1 || result >= 16);
676
677
return result;
678
}
679
680
static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
681
{
682
if (unlikely(apic->apicv_active)) {
683
apic_clear_vector(vec, apic->regs + APIC_IRR);
684
} else {
685
apic->irr_pending = false;
686
apic_clear_vector(vec, apic->regs + APIC_IRR);
687
if (apic_search_irr(apic) != -1)
688
apic->irr_pending = true;
689
}
690
}
691
692
void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
693
{
694
apic_clear_irr(vec, vcpu->arch.apic);
695
}
696
EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
697
698
static void *apic_vector_to_isr(int vec, struct kvm_lapic *apic)
699
{
700
return apic->regs + APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(vec);
701
}
702
703
static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
704
{
705
if (__test_and_set_bit(APIC_VECTOR_TO_BIT_NUMBER(vec),
706
apic_vector_to_isr(vec, apic)))
707
return;
708
709
/*
710
* With APIC virtualization enabled, all caching is disabled
711
* because the processor can modify ISR under the hood. Instead
712
* just set SVI.
713
*/
714
if (unlikely(apic->apicv_active))
715
kvm_x86_call(hwapic_isr_update)(apic->vcpu, vec);
716
else {
717
++apic->isr_count;
718
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
719
/*
720
* ISR (in service register) bit is set when injecting an interrupt.
721
* The highest vector is injected. Thus the latest bit set matches
722
* the highest bit in ISR.
723
*/
724
apic->highest_isr_cache = vec;
725
}
726
}
727
728
static inline int apic_find_highest_isr(struct kvm_lapic *apic)
729
{
730
int result;
731
732
/*
733
* Note that isr_count is always 1, and highest_isr_cache
734
* is always -1, with APIC virtualization enabled.
735
*/
736
if (!apic->isr_count)
737
return -1;
738
if (likely(apic->highest_isr_cache != -1))
739
return apic->highest_isr_cache;
740
741
result = apic_find_highest_vector(apic->regs + APIC_ISR);
742
ASSERT(result == -1 || result >= 16);
743
744
return result;
745
}
746
747
static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
748
{
749
if (!__test_and_clear_bit(APIC_VECTOR_TO_BIT_NUMBER(vec),
750
apic_vector_to_isr(vec, apic)))
751
return;
752
753
/*
754
* We do get here for APIC virtualization enabled if the guest
755
* uses the Hyper-V APIC enlightenment. In this case we may need
756
* to trigger a new interrupt delivery by writing the SVI field;
757
* on the other hand isr_count and highest_isr_cache are unused
758
* and must be left alone.
759
*/
760
if (unlikely(apic->apicv_active))
761
kvm_x86_call(hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic));
762
else {
763
--apic->isr_count;
764
BUG_ON(apic->isr_count < 0);
765
apic->highest_isr_cache = -1;
766
}
767
}
768
769
void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu)
770
{
771
struct kvm_lapic *apic = vcpu->arch.apic;
772
773
if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active)
774
return;
775
776
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
777
}
778
EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr);
779
780
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
781
{
782
/* This may race with setting of irr in __apic_accept_irq() and
783
* value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
784
* will cause vmexit immediately and the value will be recalculated
785
* on the next vmentry.
786
*/
787
return apic_find_highest_irr(vcpu->arch.apic);
788
}
789
EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
790
791
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
792
int vector, int level, int trig_mode,
793
struct dest_map *dest_map);
794
795
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
796
struct dest_map *dest_map)
797
{
798
struct kvm_lapic *apic = vcpu->arch.apic;
799
800
return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
801
irq->level, irq->trig_mode, dest_map);
802
}
803
804
static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
805
struct kvm_lapic_irq *irq, u32 min)
806
{
807
int i, count = 0;
808
struct kvm_vcpu *vcpu;
809
810
if (min > map->max_apic_id)
811
return 0;
812
813
min = array_index_nospec(min, map->max_apic_id + 1);
814
815
for_each_set_bit(i, ipi_bitmap,
816
min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
817
if (map->phys_map[min + i]) {
818
vcpu = map->phys_map[min + i]->vcpu;
819
count += kvm_apic_set_irq(vcpu, irq, NULL);
820
}
821
}
822
823
return count;
824
}
825
826
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
827
unsigned long ipi_bitmap_high, u32 min,
828
unsigned long icr, int op_64_bit)
829
{
830
struct kvm_apic_map *map;
831
struct kvm_lapic_irq irq = {0};
832
int cluster_size = op_64_bit ? 64 : 32;
833
int count;
834
835
if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
836
return -KVM_EINVAL;
837
838
irq.vector = icr & APIC_VECTOR_MASK;
839
irq.delivery_mode = icr & APIC_MODE_MASK;
840
irq.level = (icr & APIC_INT_ASSERT) != 0;
841
irq.trig_mode = icr & APIC_INT_LEVELTRIG;
842
843
rcu_read_lock();
844
map = rcu_dereference(kvm->arch.apic_map);
845
846
count = -EOPNOTSUPP;
847
if (likely(map)) {
848
count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
849
min += cluster_size;
850
count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
851
}
852
853
rcu_read_unlock();
854
return count;
855
}
856
857
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
858
{
859
860
return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
861
sizeof(val));
862
}
863
864
static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
865
{
866
867
return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
868
sizeof(*val));
869
}
870
871
static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
872
{
873
return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
874
}
875
876
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
877
{
878
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
879
return;
880
881
__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
882
}
883
884
static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
885
{
886
u8 val;
887
888
if (pv_eoi_get_user(vcpu, &val) < 0)
889
return false;
890
891
val &= KVM_PV_EOI_ENABLED;
892
893
if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
894
return false;
895
896
/*
897
* Clear pending bit in any case: it will be set again on vmentry.
898
* While this might not be ideal from performance point of view,
899
* this makes sure pv eoi is only enabled when we know it's safe.
900
*/
901
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
902
903
return val;
904
}
905
906
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
907
{
908
int highest_irr;
909
if (kvm_x86_ops.sync_pir_to_irr)
910
highest_irr = kvm_x86_call(sync_pir_to_irr)(apic->vcpu);
911
else
912
highest_irr = apic_find_highest_irr(apic);
913
if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
914
return -1;
915
return highest_irr;
916
}
917
918
static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
919
{
920
u32 tpr, isrv, ppr, old_ppr;
921
int isr;
922
923
old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
924
tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
925
isr = apic_find_highest_isr(apic);
926
isrv = (isr != -1) ? isr : 0;
927
928
if ((tpr & 0xf0) >= (isrv & 0xf0))
929
ppr = tpr & 0xff;
930
else
931
ppr = isrv & 0xf0;
932
933
*new_ppr = ppr;
934
if (old_ppr != ppr)
935
kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
936
937
return ppr < old_ppr;
938
}
939
940
static void apic_update_ppr(struct kvm_lapic *apic)
941
{
942
u32 ppr;
943
944
if (__apic_update_ppr(apic, &ppr) &&
945
apic_has_interrupt_for_ppr(apic, ppr) != -1)
946
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
947
}
948
949
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
950
{
951
apic_update_ppr(vcpu->arch.apic);
952
}
953
EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
954
955
static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
956
{
957
kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
958
apic_update_ppr(apic);
959
}
960
961
static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
962
{
963
return mda == (apic_x2apic_mode(apic) ?
964
X2APIC_BROADCAST : APIC_BROADCAST);
965
}
966
967
static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
968
{
969
if (kvm_apic_broadcast(apic, mda))
970
return true;
971
972
/*
973
* Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they
974
* were in x2APIC mode if the target APIC ID can't be encoded as an
975
* xAPIC ID. This allows unique addressing of hotplugged vCPUs (which
976
* start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC
977
* mode. Match the x2APIC ID if and only if the target APIC ID can't
978
* be encoded in xAPIC to avoid spurious matches against a vCPU that
979
* changed its (addressable) xAPIC ID (which is writable).
980
*/
981
if (apic_x2apic_mode(apic) || mda > 0xff)
982
return mda == kvm_x2apic_id(apic);
983
984
return mda == kvm_xapic_id(apic);
985
}
986
987
static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
988
{
989
u32 logical_id;
990
991
if (kvm_apic_broadcast(apic, mda))
992
return true;
993
994
logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
995
996
if (apic_x2apic_mode(apic))
997
return ((logical_id >> 16) == (mda >> 16))
998
&& (logical_id & mda & 0xffff) != 0;
999
1000
logical_id = GET_APIC_LOGICAL_ID(logical_id);
1001
1002
switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
1003
case APIC_DFR_FLAT:
1004
return (logical_id & mda) != 0;
1005
case APIC_DFR_CLUSTER:
1006
return ((logical_id >> 4) == (mda >> 4))
1007
&& (logical_id & mda & 0xf) != 0;
1008
default:
1009
return false;
1010
}
1011
}
1012
1013
/* The KVM local APIC implementation has two quirks:
1014
*
1015
* - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
1016
* in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
1017
* KVM doesn't do that aliasing.
1018
*
1019
* - in-kernel IOAPIC messages have to be delivered directly to
1020
* x2APIC, because the kernel does not support interrupt remapping.
1021
* In order to support broadcast without interrupt remapping, x2APIC
1022
* rewrites the destination of non-IPI messages from APIC_BROADCAST
1023
* to X2APIC_BROADCAST.
1024
*
1025
* The broadcast quirk can be disabled with KVM_CAP_X2APIC_API. This is
1026
* important when userspace wants to use x2APIC-format MSIs, because
1027
* APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
1028
*/
1029
static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
1030
struct kvm_lapic *source, struct kvm_lapic *target)
1031
{
1032
bool ipi = source != NULL;
1033
1034
if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
1035
!ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
1036
return X2APIC_BROADCAST;
1037
1038
return dest_id;
1039
}
1040
1041
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
1042
int shorthand, unsigned int dest, int dest_mode)
1043
{
1044
struct kvm_lapic *target = vcpu->arch.apic;
1045
u32 mda = kvm_apic_mda(vcpu, dest, source, target);
1046
1047
ASSERT(target);
1048
switch (shorthand) {
1049
case APIC_DEST_NOSHORT:
1050
if (dest_mode == APIC_DEST_PHYSICAL)
1051
return kvm_apic_match_physical_addr(target, mda);
1052
else
1053
return kvm_apic_match_logical_addr(target, mda);
1054
case APIC_DEST_SELF:
1055
return target == source;
1056
case APIC_DEST_ALLINC:
1057
return true;
1058
case APIC_DEST_ALLBUT:
1059
return target != source;
1060
default:
1061
return false;
1062
}
1063
}
1064
EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
1065
1066
int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
1067
const unsigned long *bitmap, u32 bitmap_size)
1068
{
1069
u32 mod;
1070
int i, idx = -1;
1071
1072
mod = vector % dest_vcpus;
1073
1074
for (i = 0; i <= mod; i++) {
1075
idx = find_next_bit(bitmap, bitmap_size, idx + 1);
1076
BUG_ON(idx == bitmap_size);
1077
}
1078
1079
return idx;
1080
}
1081
1082
static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
1083
{
1084
if (!kvm->arch.disabled_lapic_found) {
1085
kvm->arch.disabled_lapic_found = true;
1086
pr_info("Disabled LAPIC found during irq injection\n");
1087
}
1088
}
1089
1090
static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
1091
struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
1092
{
1093
if (kvm->arch.x2apic_broadcast_quirk_disabled) {
1094
if ((irq->dest_id == APIC_BROADCAST &&
1095
map->logical_mode != KVM_APIC_MODE_X2APIC))
1096
return true;
1097
if (irq->dest_id == X2APIC_BROADCAST)
1098
return true;
1099
} else {
1100
bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
1101
if (irq->dest_id == (x2apic_ipi ?
1102
X2APIC_BROADCAST : APIC_BROADCAST))
1103
return true;
1104
}
1105
1106
return false;
1107
}
1108
1109
/* Return true if the interrupt can be handled by using *bitmap as index mask
1110
* for valid destinations in *dst array.
1111
* Return false if kvm_apic_map_get_dest_lapic did nothing useful.
1112
* Note: we may have zero kvm_lapic destinations when we return true, which
1113
* means that the interrupt should be dropped. In this case, *bitmap would be
1114
* zero and *dst undefined.
1115
*/
1116
static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
1117
struct kvm_lapic **src, struct kvm_lapic_irq *irq,
1118
struct kvm_apic_map *map, struct kvm_lapic ***dst,
1119
unsigned long *bitmap)
1120
{
1121
int i, lowest;
1122
1123
if (irq->shorthand == APIC_DEST_SELF && src) {
1124
*dst = src;
1125
*bitmap = 1;
1126
return true;
1127
} else if (irq->shorthand)
1128
return false;
1129
1130
if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
1131
return false;
1132
1133
if (irq->dest_mode == APIC_DEST_PHYSICAL) {
1134
if (irq->dest_id > map->max_apic_id) {
1135
*bitmap = 0;
1136
} else {
1137
u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
1138
*dst = &map->phys_map[dest_id];
1139
*bitmap = 1;
1140
}
1141
return true;
1142
}
1143
1144
*bitmap = 0;
1145
if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
1146
(u16 *)bitmap))
1147
return false;
1148
1149
if (!kvm_lowest_prio_delivery(irq))
1150
return true;
1151
1152
if (!kvm_vector_hashing_enabled()) {
1153
lowest = -1;
1154
for_each_set_bit(i, bitmap, 16) {
1155
if (!(*dst)[i])
1156
continue;
1157
if (lowest < 0)
1158
lowest = i;
1159
else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
1160
(*dst)[lowest]->vcpu) < 0)
1161
lowest = i;
1162
}
1163
} else {
1164
if (!*bitmap)
1165
return true;
1166
1167
lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
1168
bitmap, 16);
1169
1170
if (!(*dst)[lowest]) {
1171
kvm_apic_disabled_lapic_found(kvm);
1172
*bitmap = 0;
1173
return true;
1174
}
1175
}
1176
1177
*bitmap = (lowest >= 0) ? 1 << lowest : 0;
1178
1179
return true;
1180
}
1181
1182
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
1183
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
1184
{
1185
struct kvm_apic_map *map;
1186
unsigned long bitmap;
1187
struct kvm_lapic **dst = NULL;
1188
int i;
1189
bool ret;
1190
1191
*r = -1;
1192
1193
if (irq->shorthand == APIC_DEST_SELF) {
1194
if (KVM_BUG_ON(!src, kvm)) {
1195
*r = 0;
1196
return true;
1197
}
1198
*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
1199
return true;
1200
}
1201
1202
rcu_read_lock();
1203
map = rcu_dereference(kvm->arch.apic_map);
1204
1205
ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
1206
if (ret) {
1207
*r = 0;
1208
for_each_set_bit(i, &bitmap, 16) {
1209
if (!dst[i])
1210
continue;
1211
*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1212
}
1213
}
1214
1215
rcu_read_unlock();
1216
return ret;
1217
}
1218
1219
/*
1220
* This routine tries to handle interrupts in posted mode, here is how
1221
* it deals with different cases:
1222
* - For single-destination interrupts, handle it in posted mode
1223
* - Else if vector hashing is enabled and it is a lowest-priority
1224
* interrupt, handle it in posted mode and use the following mechanism
1225
* to find the destination vCPU.
1226
* 1. For lowest-priority interrupts, store all the possible
1227
* destination vCPUs in an array.
1228
* 2. Use "guest vector % max number of destination vCPUs" to find
1229
* the right destination vCPU in the array for the lowest-priority
1230
* interrupt.
1231
* - Otherwise, use remapped mode to inject the interrupt.
1232
*/
1233
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1234
struct kvm_vcpu **dest_vcpu)
1235
{
1236
struct kvm_apic_map *map;
1237
unsigned long bitmap;
1238
struct kvm_lapic **dst = NULL;
1239
bool ret = false;
1240
1241
if (irq->shorthand)
1242
return false;
1243
1244
rcu_read_lock();
1245
map = rcu_dereference(kvm->arch.apic_map);
1246
1247
if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1248
hweight16(bitmap) == 1) {
1249
unsigned long i = find_first_bit(&bitmap, 16);
1250
1251
if (dst[i]) {
1252
*dest_vcpu = dst[i]->vcpu;
1253
ret = true;
1254
}
1255
}
1256
1257
rcu_read_unlock();
1258
return ret;
1259
}
1260
1261
/*
1262
* Add a pending IRQ into lapic.
1263
* Return 1 if successfully added and 0 if discarded.
1264
*/
1265
static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1266
int vector, int level, int trig_mode,
1267
struct dest_map *dest_map)
1268
{
1269
int result = 0;
1270
struct kvm_vcpu *vcpu = apic->vcpu;
1271
1272
trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1273
trig_mode, vector);
1274
switch (delivery_mode) {
1275
case APIC_DM_LOWEST:
1276
vcpu->arch.apic_arb_prio++;
1277
fallthrough;
1278
case APIC_DM_FIXED:
1279
if (unlikely(trig_mode && !level))
1280
break;
1281
1282
/* FIXME add logic for vcpu on reset */
1283
if (unlikely(!apic_enabled(apic)))
1284
break;
1285
1286
result = 1;
1287
1288
if (dest_map) {
1289
__set_bit(vcpu->vcpu_id, dest_map->map);
1290
dest_map->vectors[vcpu->vcpu_id] = vector;
1291
}
1292
1293
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1294
if (trig_mode)
1295
apic_set_vector(vector, apic->regs + APIC_TMR);
1296
else
1297
apic_clear_vector(vector, apic->regs + APIC_TMR);
1298
}
1299
1300
kvm_x86_call(deliver_interrupt)(apic, delivery_mode,
1301
trig_mode, vector);
1302
break;
1303
1304
case APIC_DM_REMRD:
1305
result = 1;
1306
vcpu->arch.pv.pv_unhalted = 1;
1307
kvm_make_request(KVM_REQ_EVENT, vcpu);
1308
kvm_vcpu_kick(vcpu);
1309
break;
1310
1311
case APIC_DM_SMI:
1312
if (!kvm_inject_smi(vcpu)) {
1313
kvm_vcpu_kick(vcpu);
1314
result = 1;
1315
}
1316
break;
1317
1318
case APIC_DM_NMI:
1319
result = 1;
1320
kvm_inject_nmi(vcpu);
1321
kvm_vcpu_kick(vcpu);
1322
break;
1323
1324
case APIC_DM_INIT:
1325
if (!trig_mode || level) {
1326
result = 1;
1327
/* assumes that there are only KVM_APIC_INIT/SIPI */
1328
apic->pending_events = (1UL << KVM_APIC_INIT);
1329
kvm_make_request(KVM_REQ_EVENT, vcpu);
1330
kvm_vcpu_kick(vcpu);
1331
}
1332
break;
1333
1334
case APIC_DM_STARTUP:
1335
result = 1;
1336
apic->sipi_vector = vector;
1337
/* make sure sipi_vector is visible for the receiver */
1338
smp_wmb();
1339
set_bit(KVM_APIC_SIPI, &apic->pending_events);
1340
kvm_make_request(KVM_REQ_EVENT, vcpu);
1341
kvm_vcpu_kick(vcpu);
1342
break;
1343
1344
case APIC_DM_EXTINT:
1345
/*
1346
* Should only be called by kvm_apic_local_deliver() with LVT0,
1347
* before NMI watchdog was enabled. Already handled by
1348
* kvm_apic_accept_pic_intr().
1349
*/
1350
break;
1351
1352
default:
1353
printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1354
delivery_mode);
1355
break;
1356
}
1357
return result;
1358
}
1359
1360
/*
1361
* This routine identifies the destination vcpus mask meant to receive the
1362
* IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1363
* out the destination vcpus array and set the bitmap or it traverses to
1364
* each available vcpu to identify the same.
1365
*/
1366
void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1367
unsigned long *vcpu_bitmap)
1368
{
1369
struct kvm_lapic **dest_vcpu = NULL;
1370
struct kvm_lapic *src = NULL;
1371
struct kvm_apic_map *map;
1372
struct kvm_vcpu *vcpu;
1373
unsigned long bitmap, i;
1374
int vcpu_idx;
1375
bool ret;
1376
1377
rcu_read_lock();
1378
map = rcu_dereference(kvm->arch.apic_map);
1379
1380
ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1381
&bitmap);
1382
if (ret) {
1383
for_each_set_bit(i, &bitmap, 16) {
1384
if (!dest_vcpu[i])
1385
continue;
1386
vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1387
__set_bit(vcpu_idx, vcpu_bitmap);
1388
}
1389
} else {
1390
kvm_for_each_vcpu(i, vcpu, kvm) {
1391
if (!kvm_apic_present(vcpu))
1392
continue;
1393
if (!kvm_apic_match_dest(vcpu, NULL,
1394
irq->shorthand,
1395
irq->dest_id,
1396
irq->dest_mode))
1397
continue;
1398
__set_bit(i, vcpu_bitmap);
1399
}
1400
}
1401
rcu_read_unlock();
1402
}
1403
1404
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1405
{
1406
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1407
}
1408
1409
static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1410
{
1411
return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1412
}
1413
1414
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1415
{
1416
int __maybe_unused trigger_mode;
1417
1418
/* Eoi the ioapic only if the ioapic doesn't own the vector. */
1419
if (!kvm_ioapic_handles_vector(apic, vector))
1420
return;
1421
1422
/*
1423
* If the intercepted EOI is for an IRQ that was pending from previous
1424
* routing, then re-scan the I/O APIC routes as EOIs for the IRQ likely
1425
* no longer need to be intercepted.
1426
*/
1427
if (apic->vcpu->arch.highest_stale_pending_ioapic_eoi == vector)
1428
kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
1429
1430
/* Request a KVM exit to inform the userspace IOAPIC. */
1431
if (irqchip_split(apic->vcpu->kvm)) {
1432
apic->vcpu->arch.pending_ioapic_eoi = vector;
1433
kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1434
return;
1435
}
1436
1437
#ifdef CONFIG_KVM_IOAPIC
1438
if (apic_test_vector(vector, apic->regs + APIC_TMR))
1439
trigger_mode = IOAPIC_LEVEL_TRIG;
1440
else
1441
trigger_mode = IOAPIC_EDGE_TRIG;
1442
1443
kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1444
#endif
1445
}
1446
1447
static int apic_set_eoi(struct kvm_lapic *apic)
1448
{
1449
int vector = apic_find_highest_isr(apic);
1450
1451
trace_kvm_eoi(apic, vector);
1452
1453
/*
1454
* Not every write EOI will has corresponding ISR,
1455
* one example is when Kernel check timer on setup_IO_APIC
1456
*/
1457
if (vector == -1)
1458
return vector;
1459
1460
apic_clear_isr(vector, apic);
1461
apic_update_ppr(apic);
1462
1463
if (kvm_hv_synic_has_vector(apic->vcpu, vector))
1464
kvm_hv_synic_send_eoi(apic->vcpu, vector);
1465
1466
kvm_ioapic_send_eoi(apic, vector);
1467
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1468
return vector;
1469
}
1470
1471
/*
1472
* this interface assumes a trap-like exit, which has already finished
1473
* desired side effect including vISR and vPPR update.
1474
*/
1475
void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1476
{
1477
struct kvm_lapic *apic = vcpu->arch.apic;
1478
1479
trace_kvm_eoi(apic, vector);
1480
1481
kvm_ioapic_send_eoi(apic, vector);
1482
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1483
}
1484
EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1485
1486
void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1487
{
1488
struct kvm_lapic_irq irq;
1489
1490
/* KVM has no delay and should always clear the BUSY/PENDING flag. */
1491
WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
1492
1493
irq.vector = icr_low & APIC_VECTOR_MASK;
1494
irq.delivery_mode = icr_low & APIC_MODE_MASK;
1495
irq.dest_mode = icr_low & APIC_DEST_MASK;
1496
irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1497
irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1498
irq.shorthand = icr_low & APIC_SHORT_MASK;
1499
irq.msi_redir_hint = false;
1500
if (apic_x2apic_mode(apic))
1501
irq.dest_id = icr_high;
1502
else
1503
irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high);
1504
1505
trace_kvm_apic_ipi(icr_low, irq.dest_id);
1506
1507
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1508
}
1509
EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
1510
1511
static u32 apic_get_tmcct(struct kvm_lapic *apic)
1512
{
1513
ktime_t remaining, now;
1514
s64 ns;
1515
1516
ASSERT(apic != NULL);
1517
1518
/* if initial count is 0, current count should also be 0 */
1519
if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1520
apic->lapic_timer.period == 0)
1521
return 0;
1522
1523
now = ktime_get();
1524
remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1525
if (ktime_to_ns(remaining) < 0)
1526
remaining = 0;
1527
1528
ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1529
return div64_u64(ns, (apic->vcpu->kvm->arch.apic_bus_cycle_ns *
1530
apic->divide_count));
1531
}
1532
1533
static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1534
{
1535
struct kvm_vcpu *vcpu = apic->vcpu;
1536
struct kvm_run *run = vcpu->run;
1537
1538
kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1539
run->tpr_access.rip = kvm_rip_read(vcpu);
1540
run->tpr_access.is_write = write;
1541
}
1542
1543
static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1544
{
1545
if (apic->vcpu->arch.tpr_access_reporting)
1546
__report_tpr_access(apic, write);
1547
}
1548
1549
static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1550
{
1551
u32 val = 0;
1552
1553
if (offset >= LAPIC_MMIO_LENGTH)
1554
return 0;
1555
1556
switch (offset) {
1557
case APIC_ARBPRI:
1558
break;
1559
1560
case APIC_TMCCT: /* Timer CCR */
1561
if (apic_lvtt_tscdeadline(apic))
1562
return 0;
1563
1564
val = apic_get_tmcct(apic);
1565
break;
1566
case APIC_PROCPRI:
1567
apic_update_ppr(apic);
1568
val = kvm_lapic_get_reg(apic, offset);
1569
break;
1570
case APIC_TASKPRI:
1571
report_tpr_access(apic, false);
1572
fallthrough;
1573
default:
1574
val = kvm_lapic_get_reg(apic, offset);
1575
break;
1576
}
1577
1578
return val;
1579
}
1580
1581
static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1582
{
1583
return container_of(dev, struct kvm_lapic, dev);
1584
}
1585
1586
#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4))
1587
#define APIC_REGS_MASK(first, count) \
1588
(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1589
1590
u64 kvm_lapic_readable_reg_mask(struct kvm_lapic *apic)
1591
{
1592
/* Leave bits '0' for reserved and write-only registers. */
1593
u64 valid_reg_mask =
1594
APIC_REG_MASK(APIC_ID) |
1595
APIC_REG_MASK(APIC_LVR) |
1596
APIC_REG_MASK(APIC_TASKPRI) |
1597
APIC_REG_MASK(APIC_PROCPRI) |
1598
APIC_REG_MASK(APIC_LDR) |
1599
APIC_REG_MASK(APIC_SPIV) |
1600
APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1601
APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1602
APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1603
APIC_REG_MASK(APIC_ESR) |
1604
APIC_REG_MASK(APIC_ICR) |
1605
APIC_REG_MASK(APIC_LVTT) |
1606
APIC_REG_MASK(APIC_LVTTHMR) |
1607
APIC_REG_MASK(APIC_LVTPC) |
1608
APIC_REG_MASK(APIC_LVT0) |
1609
APIC_REG_MASK(APIC_LVT1) |
1610
APIC_REG_MASK(APIC_LVTERR) |
1611
APIC_REG_MASK(APIC_TMICT) |
1612
APIC_REG_MASK(APIC_TMCCT) |
1613
APIC_REG_MASK(APIC_TDCR);
1614
1615
if (kvm_lapic_lvt_supported(apic, LVT_CMCI))
1616
valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI);
1617
1618
/* ARBPRI, DFR, and ICR2 are not valid in x2APIC mode. */
1619
if (!apic_x2apic_mode(apic))
1620
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
1621
APIC_REG_MASK(APIC_DFR) |
1622
APIC_REG_MASK(APIC_ICR2);
1623
1624
return valid_reg_mask;
1625
}
1626
EXPORT_SYMBOL_GPL(kvm_lapic_readable_reg_mask);
1627
1628
static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1629
void *data)
1630
{
1631
unsigned char alignment = offset & 0xf;
1632
u32 result;
1633
1634
/*
1635
* WARN if KVM reads ICR in x2APIC mode, as it's an 8-byte register in
1636
* x2APIC and needs to be manually handled by the caller.
1637
*/
1638
WARN_ON_ONCE(apic_x2apic_mode(apic) && offset == APIC_ICR);
1639
1640
if (alignment + len > 4)
1641
return 1;
1642
1643
if (offset > 0x3f0 ||
1644
!(kvm_lapic_readable_reg_mask(apic) & APIC_REG_MASK(offset)))
1645
return 1;
1646
1647
result = __apic_read(apic, offset & ~0xf);
1648
1649
trace_kvm_apic_read(offset, result);
1650
1651
switch (len) {
1652
case 1:
1653
case 2:
1654
case 4:
1655
memcpy(data, (char *)&result + alignment, len);
1656
break;
1657
default:
1658
printk(KERN_ERR "Local APIC read with len = %x, "
1659
"should be 1,2, or 4 instead\n", len);
1660
break;
1661
}
1662
return 0;
1663
}
1664
1665
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1666
{
1667
return addr >= apic->base_address &&
1668
addr < apic->base_address + LAPIC_MMIO_LENGTH;
1669
}
1670
1671
static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1672
gpa_t address, int len, void *data)
1673
{
1674
struct kvm_lapic *apic = to_lapic(this);
1675
u32 offset = address - apic->base_address;
1676
1677
if (!apic_mmio_in_range(apic, address))
1678
return -EOPNOTSUPP;
1679
1680
if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1681
if (!kvm_check_has_quirk(vcpu->kvm,
1682
KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1683
return -EOPNOTSUPP;
1684
1685
memset(data, 0xff, len);
1686
return 0;
1687
}
1688
1689
kvm_lapic_reg_read(apic, offset, len, data);
1690
1691
return 0;
1692
}
1693
1694
static void update_divide_count(struct kvm_lapic *apic)
1695
{
1696
u32 tmp1, tmp2, tdcr;
1697
1698
tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1699
tmp1 = tdcr & 0xf;
1700
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1701
apic->divide_count = 0x1 << (tmp2 & 0x7);
1702
}
1703
1704
static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1705
{
1706
/*
1707
* Do not allow the guest to program periodic timers with small
1708
* interval, since the hrtimers are not throttled by the host
1709
* scheduler.
1710
*/
1711
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1712
s64 min_period = min_timer_period_us * 1000LL;
1713
1714
if (apic->lapic_timer.period < min_period) {
1715
pr_info_once(
1716
"vcpu %i: requested %lld ns "
1717
"lapic timer period limited to %lld ns\n",
1718
apic->vcpu->vcpu_id,
1719
apic->lapic_timer.period, min_period);
1720
apic->lapic_timer.period = min_period;
1721
}
1722
}
1723
}
1724
1725
static void cancel_hv_timer(struct kvm_lapic *apic);
1726
1727
static void cancel_apic_timer(struct kvm_lapic *apic)
1728
{
1729
hrtimer_cancel(&apic->lapic_timer.timer);
1730
preempt_disable();
1731
if (apic->lapic_timer.hv_timer_in_use)
1732
cancel_hv_timer(apic);
1733
preempt_enable();
1734
atomic_set(&apic->lapic_timer.pending, 0);
1735
}
1736
1737
static void apic_update_lvtt(struct kvm_lapic *apic)
1738
{
1739
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1740
apic->lapic_timer.timer_mode_mask;
1741
1742
if (apic->lapic_timer.timer_mode != timer_mode) {
1743
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1744
APIC_LVT_TIMER_TSCDEADLINE)) {
1745
cancel_apic_timer(apic);
1746
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1747
apic->lapic_timer.period = 0;
1748
apic->lapic_timer.tscdeadline = 0;
1749
}
1750
apic->lapic_timer.timer_mode = timer_mode;
1751
limit_periodic_timer_frequency(apic);
1752
}
1753
}
1754
1755
/*
1756
* On APICv, this test will cause a busy wait
1757
* during a higher-priority task.
1758
*/
1759
1760
static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1761
{
1762
struct kvm_lapic *apic = vcpu->arch.apic;
1763
u32 reg;
1764
1765
/*
1766
* Assume a timer IRQ was "injected" if the APIC is protected. KVM's
1767
* copy of the vIRR is bogus, it's the responsibility of the caller to
1768
* precisely check whether or not a timer IRQ is pending.
1769
*/
1770
if (apic->guest_apic_protected)
1771
return true;
1772
1773
reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1774
if (kvm_apic_hw_enabled(apic)) {
1775
int vec = reg & APIC_VECTOR_MASK;
1776
void *bitmap = apic->regs + APIC_ISR;
1777
1778
if (apic->apicv_active)
1779
bitmap = apic->regs + APIC_IRR;
1780
1781
if (apic_test_vector(vec, bitmap))
1782
return true;
1783
}
1784
return false;
1785
}
1786
1787
static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1788
{
1789
u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1790
1791
/*
1792
* If the guest TSC is running at a different ratio than the host, then
1793
* convert the delay to nanoseconds to achieve an accurate delay. Note
1794
* that __delay() uses delay_tsc whenever the hardware has TSC, thus
1795
* always for VMX enabled hardware.
1796
*/
1797
if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) {
1798
__delay(min(guest_cycles,
1799
nsec_to_cycles(vcpu, timer_advance_ns)));
1800
} else {
1801
u64 delay_ns = guest_cycles * 1000000ULL;
1802
do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1803
ndelay(min_t(u32, delay_ns, timer_advance_ns));
1804
}
1805
}
1806
1807
static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1808
s64 advance_expire_delta)
1809
{
1810
struct kvm_lapic *apic = vcpu->arch.apic;
1811
u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1812
u64 ns;
1813
1814
/* Do not adjust for tiny fluctuations or large random spikes. */
1815
if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1816
abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1817
return;
1818
1819
/* too early */
1820
if (advance_expire_delta < 0) {
1821
ns = -advance_expire_delta * 1000000ULL;
1822
do_div(ns, vcpu->arch.virtual_tsc_khz);
1823
timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1824
} else {
1825
/* too late */
1826
ns = advance_expire_delta * 1000000ULL;
1827
do_div(ns, vcpu->arch.virtual_tsc_khz);
1828
timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1829
}
1830
1831
if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1832
timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1833
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1834
}
1835
1836
static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1837
{
1838
struct kvm_lapic *apic = vcpu->arch.apic;
1839
u64 guest_tsc, tsc_deadline;
1840
1841
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1842
apic->lapic_timer.expired_tscdeadline = 0;
1843
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1844
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1845
1846
adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1847
1848
/*
1849
* If the timer fired early, reread the TSC to account for the overhead
1850
* of the above adjustment to avoid waiting longer than is necessary.
1851
*/
1852
if (guest_tsc < tsc_deadline)
1853
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1854
1855
if (guest_tsc < tsc_deadline)
1856
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1857
}
1858
1859
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1860
{
1861
if (lapic_in_kernel(vcpu) &&
1862
vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1863
vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1864
lapic_timer_int_injected(vcpu))
1865
__kvm_wait_lapic_expire(vcpu);
1866
}
1867
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1868
1869
static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1870
{
1871
struct kvm_timer *ktimer = &apic->lapic_timer;
1872
1873
kvm_apic_local_deliver(apic, APIC_LVTT);
1874
if (apic_lvtt_tscdeadline(apic)) {
1875
ktimer->tscdeadline = 0;
1876
} else if (apic_lvtt_oneshot(apic)) {
1877
ktimer->tscdeadline = 0;
1878
ktimer->target_expiration = 0;
1879
}
1880
}
1881
1882
static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1883
{
1884
struct kvm_vcpu *vcpu = apic->vcpu;
1885
struct kvm_timer *ktimer = &apic->lapic_timer;
1886
1887
if (atomic_read(&apic->lapic_timer.pending))
1888
return;
1889
1890
if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1891
ktimer->expired_tscdeadline = ktimer->tscdeadline;
1892
1893
if (!from_timer_fn && apic->apicv_active) {
1894
WARN_ON(kvm_get_running_vcpu() != vcpu);
1895
kvm_apic_inject_pending_timer_irqs(apic);
1896
return;
1897
}
1898
1899
if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1900
/*
1901
* Ensure the guest's timer has truly expired before posting an
1902
* interrupt. Open code the relevant checks to avoid querying
1903
* lapic_timer_int_injected(), which will be false since the
1904
* interrupt isn't yet injected. Waiting until after injecting
1905
* is not an option since that won't help a posted interrupt.
1906
*/
1907
if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1908
vcpu->arch.apic->lapic_timer.timer_advance_ns)
1909
__kvm_wait_lapic_expire(vcpu);
1910
kvm_apic_inject_pending_timer_irqs(apic);
1911
return;
1912
}
1913
1914
atomic_inc(&apic->lapic_timer.pending);
1915
kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
1916
if (from_timer_fn)
1917
kvm_vcpu_kick(vcpu);
1918
}
1919
1920
static void start_sw_tscdeadline(struct kvm_lapic *apic)
1921
{
1922
struct kvm_timer *ktimer = &apic->lapic_timer;
1923
u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1924
u64 ns = 0;
1925
ktime_t expire;
1926
struct kvm_vcpu *vcpu = apic->vcpu;
1927
u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1928
unsigned long flags;
1929
ktime_t now;
1930
1931
if (unlikely(!tscdeadline || !this_tsc_khz))
1932
return;
1933
1934
local_irq_save(flags);
1935
1936
now = ktime_get();
1937
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1938
1939
ns = (tscdeadline - guest_tsc) * 1000000ULL;
1940
do_div(ns, this_tsc_khz);
1941
1942
if (likely(tscdeadline > guest_tsc) &&
1943
likely(ns > apic->lapic_timer.timer_advance_ns)) {
1944
expire = ktime_add_ns(now, ns);
1945
expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1946
hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1947
} else
1948
apic_timer_expired(apic, false);
1949
1950
local_irq_restore(flags);
1951
}
1952
1953
static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1954
{
1955
return (u64)tmict * apic->vcpu->kvm->arch.apic_bus_cycle_ns *
1956
(u64)apic->divide_count;
1957
}
1958
1959
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1960
{
1961
ktime_t now, remaining;
1962
u64 ns_remaining_old, ns_remaining_new;
1963
1964
apic->lapic_timer.period =
1965
tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1966
limit_periodic_timer_frequency(apic);
1967
1968
now = ktime_get();
1969
remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1970
if (ktime_to_ns(remaining) < 0)
1971
remaining = 0;
1972
1973
ns_remaining_old = ktime_to_ns(remaining);
1974
ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1975
apic->divide_count, old_divisor);
1976
1977
apic->lapic_timer.tscdeadline +=
1978
nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1979
nsec_to_cycles(apic->vcpu, ns_remaining_old);
1980
apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1981
}
1982
1983
static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
1984
{
1985
ktime_t now;
1986
u64 tscl = rdtsc();
1987
s64 deadline;
1988
1989
now = ktime_get();
1990
apic->lapic_timer.period =
1991
tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1992
1993
if (!apic->lapic_timer.period) {
1994
apic->lapic_timer.tscdeadline = 0;
1995
return false;
1996
}
1997
1998
limit_periodic_timer_frequency(apic);
1999
deadline = apic->lapic_timer.period;
2000
2001
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
2002
if (unlikely(count_reg != APIC_TMICT)) {
2003
deadline = tmict_to_ns(apic,
2004
kvm_lapic_get_reg(apic, count_reg));
2005
if (unlikely(deadline <= 0)) {
2006
if (apic_lvtt_period(apic))
2007
deadline = apic->lapic_timer.period;
2008
else
2009
deadline = 0;
2010
}
2011
else if (unlikely(deadline > apic->lapic_timer.period)) {
2012
pr_info_ratelimited(
2013
"vcpu %i: requested lapic timer restore with "
2014
"starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
2015
"Using initial count to start timer.\n",
2016
apic->vcpu->vcpu_id,
2017
count_reg,
2018
kvm_lapic_get_reg(apic, count_reg),
2019
deadline, apic->lapic_timer.period);
2020
kvm_lapic_set_reg(apic, count_reg, 0);
2021
deadline = apic->lapic_timer.period;
2022
}
2023
}
2024
}
2025
2026
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
2027
nsec_to_cycles(apic->vcpu, deadline);
2028
apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
2029
2030
return true;
2031
}
2032
2033
static void advance_periodic_target_expiration(struct kvm_lapic *apic)
2034
{
2035
ktime_t now = ktime_get();
2036
u64 tscl = rdtsc();
2037
ktime_t delta;
2038
2039
/*
2040
* Synchronize both deadlines to the same time source or
2041
* differences in the periods (caused by differences in the
2042
* underlying clocks or numerical approximation errors) will
2043
* cause the two to drift apart over time as the errors
2044
* accumulate.
2045
*/
2046
apic->lapic_timer.target_expiration =
2047
ktime_add_ns(apic->lapic_timer.target_expiration,
2048
apic->lapic_timer.period);
2049
delta = ktime_sub(apic->lapic_timer.target_expiration, now);
2050
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
2051
nsec_to_cycles(apic->vcpu, delta);
2052
}
2053
2054
static void start_sw_period(struct kvm_lapic *apic)
2055
{
2056
if (!apic->lapic_timer.period)
2057
return;
2058
2059
if (ktime_after(ktime_get(),
2060
apic->lapic_timer.target_expiration)) {
2061
apic_timer_expired(apic, false);
2062
2063
if (apic_lvtt_oneshot(apic))
2064
return;
2065
2066
advance_periodic_target_expiration(apic);
2067
}
2068
2069
hrtimer_start(&apic->lapic_timer.timer,
2070
apic->lapic_timer.target_expiration,
2071
HRTIMER_MODE_ABS_HARD);
2072
}
2073
2074
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
2075
{
2076
if (!lapic_in_kernel(vcpu))
2077
return false;
2078
2079
return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
2080
}
2081
2082
static void cancel_hv_timer(struct kvm_lapic *apic)
2083
{
2084
WARN_ON(preemptible());
2085
WARN_ON(!apic->lapic_timer.hv_timer_in_use);
2086
kvm_x86_call(cancel_hv_timer)(apic->vcpu);
2087
apic->lapic_timer.hv_timer_in_use = false;
2088
}
2089
2090
static bool start_hv_timer(struct kvm_lapic *apic)
2091
{
2092
struct kvm_timer *ktimer = &apic->lapic_timer;
2093
struct kvm_vcpu *vcpu = apic->vcpu;
2094
bool expired;
2095
2096
WARN_ON(preemptible());
2097
if (!kvm_can_use_hv_timer(vcpu))
2098
return false;
2099
2100
if (!ktimer->tscdeadline)
2101
return false;
2102
2103
if (kvm_x86_call(set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
2104
return false;
2105
2106
ktimer->hv_timer_in_use = true;
2107
hrtimer_cancel(&ktimer->timer);
2108
2109
/*
2110
* To simplify handling the periodic timer, leave the hv timer running
2111
* even if the deadline timer has expired, i.e. rely on the resulting
2112
* VM-Exit to recompute the periodic timer's target expiration.
2113
*/
2114
if (!apic_lvtt_period(apic)) {
2115
/*
2116
* Cancel the hv timer if the sw timer fired while the hv timer
2117
* was being programmed, or if the hv timer itself expired.
2118
*/
2119
if (atomic_read(&ktimer->pending)) {
2120
cancel_hv_timer(apic);
2121
} else if (expired) {
2122
apic_timer_expired(apic, false);
2123
cancel_hv_timer(apic);
2124
}
2125
}
2126
2127
trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
2128
2129
return true;
2130
}
2131
2132
static void start_sw_timer(struct kvm_lapic *apic)
2133
{
2134
struct kvm_timer *ktimer = &apic->lapic_timer;
2135
2136
WARN_ON(preemptible());
2137
if (apic->lapic_timer.hv_timer_in_use)
2138
cancel_hv_timer(apic);
2139
if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
2140
return;
2141
2142
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
2143
start_sw_period(apic);
2144
else if (apic_lvtt_tscdeadline(apic))
2145
start_sw_tscdeadline(apic);
2146
trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
2147
}
2148
2149
static void restart_apic_timer(struct kvm_lapic *apic)
2150
{
2151
preempt_disable();
2152
2153
if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
2154
goto out;
2155
2156
if (!start_hv_timer(apic))
2157
start_sw_timer(apic);
2158
out:
2159
preempt_enable();
2160
}
2161
2162
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
2163
{
2164
struct kvm_lapic *apic = vcpu->arch.apic;
2165
2166
preempt_disable();
2167
/* If the preempt notifier has already run, it also called apic_timer_expired */
2168
if (!apic->lapic_timer.hv_timer_in_use)
2169
goto out;
2170
WARN_ON(kvm_vcpu_is_blocking(vcpu));
2171
apic_timer_expired(apic, false);
2172
cancel_hv_timer(apic);
2173
2174
if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
2175
advance_periodic_target_expiration(apic);
2176
restart_apic_timer(apic);
2177
}
2178
out:
2179
preempt_enable();
2180
}
2181
EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
2182
2183
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
2184
{
2185
restart_apic_timer(vcpu->arch.apic);
2186
}
2187
2188
void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
2189
{
2190
struct kvm_lapic *apic = vcpu->arch.apic;
2191
2192
preempt_disable();
2193
/* Possibly the TSC deadline timer is not enabled yet */
2194
if (apic->lapic_timer.hv_timer_in_use)
2195
start_sw_timer(apic);
2196
preempt_enable();
2197
}
2198
2199
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
2200
{
2201
struct kvm_lapic *apic = vcpu->arch.apic;
2202
2203
WARN_ON(!apic->lapic_timer.hv_timer_in_use);
2204
restart_apic_timer(apic);
2205
}
2206
2207
static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
2208
{
2209
atomic_set(&apic->lapic_timer.pending, 0);
2210
2211
if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
2212
&& !set_target_expiration(apic, count_reg))
2213
return;
2214
2215
restart_apic_timer(apic);
2216
}
2217
2218
static void start_apic_timer(struct kvm_lapic *apic)
2219
{
2220
__start_apic_timer(apic, APIC_TMICT);
2221
}
2222
2223
static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
2224
{
2225
bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
2226
2227
if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
2228
apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
2229
if (lvt0_in_nmi_mode) {
2230
atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2231
} else
2232
atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2233
}
2234
}
2235
2236
static int get_lvt_index(u32 reg)
2237
{
2238
if (reg == APIC_LVTCMCI)
2239
return LVT_CMCI;
2240
if (reg < APIC_LVTT || reg > APIC_LVTERR)
2241
return -1;
2242
return array_index_nospec(
2243
(reg - APIC_LVTT) >> 4, KVM_APIC_MAX_NR_LVT_ENTRIES);
2244
}
2245
2246
static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
2247
{
2248
int ret = 0;
2249
2250
trace_kvm_apic_write(reg, val);
2251
2252
switch (reg) {
2253
case APIC_ID: /* Local APIC ID */
2254
if (!apic_x2apic_mode(apic)) {
2255
kvm_apic_set_xapic_id(apic, val >> 24);
2256
} else {
2257
ret = 1;
2258
}
2259
break;
2260
2261
case APIC_TASKPRI:
2262
report_tpr_access(apic, true);
2263
apic_set_tpr(apic, val & 0xff);
2264
break;
2265
2266
case APIC_EOI:
2267
apic_set_eoi(apic);
2268
break;
2269
2270
case APIC_LDR:
2271
if (!apic_x2apic_mode(apic))
2272
kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
2273
else
2274
ret = 1;
2275
break;
2276
2277
case APIC_DFR:
2278
if (!apic_x2apic_mode(apic))
2279
kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2280
else
2281
ret = 1;
2282
break;
2283
2284
case APIC_SPIV: {
2285
u32 mask = 0x3ff;
2286
if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2287
mask |= APIC_SPIV_DIRECTED_EOI;
2288
apic_set_spiv(apic, val & mask);
2289
if (!(val & APIC_SPIV_APIC_ENABLED)) {
2290
int i;
2291
2292
for (i = 0; i < apic->nr_lvt_entries; i++) {
2293
kvm_lapic_set_reg(apic, APIC_LVTx(i),
2294
kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED);
2295
}
2296
apic_update_lvtt(apic);
2297
atomic_set(&apic->lapic_timer.pending, 0);
2298
2299
}
2300
break;
2301
}
2302
case APIC_ICR:
2303
WARN_ON_ONCE(apic_x2apic_mode(apic));
2304
2305
/* No delay here, so we always clear the pending bit */
2306
val &= ~APIC_ICR_BUSY;
2307
kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2308
kvm_lapic_set_reg(apic, APIC_ICR, val);
2309
break;
2310
case APIC_ICR2:
2311
if (apic_x2apic_mode(apic))
2312
ret = 1;
2313
else
2314
kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000);
2315
break;
2316
2317
case APIC_LVT0:
2318
apic_manage_nmi_watchdog(apic, val);
2319
fallthrough;
2320
case APIC_LVTTHMR:
2321
case APIC_LVTPC:
2322
case APIC_LVT1:
2323
case APIC_LVTERR:
2324
case APIC_LVTCMCI: {
2325
u32 index = get_lvt_index(reg);
2326
if (!kvm_lapic_lvt_supported(apic, index)) {
2327
ret = 1;
2328
break;
2329
}
2330
if (!kvm_apic_sw_enabled(apic))
2331
val |= APIC_LVT_MASKED;
2332
val &= apic_lvt_mask[index];
2333
kvm_lapic_set_reg(apic, reg, val);
2334
break;
2335
}
2336
2337
case APIC_LVTT:
2338
if (!kvm_apic_sw_enabled(apic))
2339
val |= APIC_LVT_MASKED;
2340
val &= (apic_lvt_mask[LVT_TIMER] | apic->lapic_timer.timer_mode_mask);
2341
kvm_lapic_set_reg(apic, APIC_LVTT, val);
2342
apic_update_lvtt(apic);
2343
break;
2344
2345
case APIC_TMICT:
2346
if (apic_lvtt_tscdeadline(apic))
2347
break;
2348
2349
cancel_apic_timer(apic);
2350
kvm_lapic_set_reg(apic, APIC_TMICT, val);
2351
start_apic_timer(apic);
2352
break;
2353
2354
case APIC_TDCR: {
2355
uint32_t old_divisor = apic->divide_count;
2356
2357
kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2358
update_divide_count(apic);
2359
if (apic->divide_count != old_divisor &&
2360
apic->lapic_timer.period) {
2361
hrtimer_cancel(&apic->lapic_timer.timer);
2362
update_target_expiration(apic, old_divisor);
2363
restart_apic_timer(apic);
2364
}
2365
break;
2366
}
2367
case APIC_ESR:
2368
if (apic_x2apic_mode(apic) && val != 0)
2369
ret = 1;
2370
break;
2371
2372
case APIC_SELF_IPI:
2373
/*
2374
* Self-IPI exists only when x2APIC is enabled. Bits 7:0 hold
2375
* the vector, everything else is reserved.
2376
*/
2377
if (!apic_x2apic_mode(apic) || (val & ~APIC_VECTOR_MASK))
2378
ret = 1;
2379
else
2380
kvm_apic_send_ipi(apic, APIC_DEST_SELF | val, 0);
2381
break;
2382
default:
2383
ret = 1;
2384
break;
2385
}
2386
2387
/*
2388
* Recalculate APIC maps if necessary, e.g. if the software enable bit
2389
* was toggled, the APIC ID changed, etc... The maps are marked dirty
2390
* on relevant changes, i.e. this is a nop for most writes.
2391
*/
2392
kvm_recalculate_apic_map(apic->vcpu->kvm);
2393
2394
return ret;
2395
}
2396
2397
static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2398
gpa_t address, int len, const void *data)
2399
{
2400
struct kvm_lapic *apic = to_lapic(this);
2401
unsigned int offset = address - apic->base_address;
2402
u32 val;
2403
2404
if (!apic_mmio_in_range(apic, address))
2405
return -EOPNOTSUPP;
2406
2407
if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2408
if (!kvm_check_has_quirk(vcpu->kvm,
2409
KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2410
return -EOPNOTSUPP;
2411
2412
return 0;
2413
}
2414
2415
/*
2416
* APIC register must be aligned on 128-bits boundary.
2417
* 32/64/128 bits registers must be accessed thru 32 bits.
2418
* Refer SDM 8.4.1
2419
*/
2420
if (len != 4 || (offset & 0xf))
2421
return 0;
2422
2423
val = *(u32*)data;
2424
2425
kvm_lapic_reg_write(apic, offset & 0xff0, val);
2426
2427
return 0;
2428
}
2429
2430
void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2431
{
2432
kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2433
}
2434
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2435
2436
#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13))
2437
2438
int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
2439
{
2440
if (data & X2APIC_ICR_RESERVED_BITS)
2441
return 1;
2442
2443
/*
2444
* The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but
2445
* only AMD requires it to be zero, Intel essentially just ignores the
2446
* bit. And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled,
2447
* the CPU performs the reserved bits checks, i.e. the underlying CPU
2448
* behavior will "win". Arbitrarily clear the BUSY bit, as there is no
2449
* sane way to provide consistent behavior with respect to hardware.
2450
*/
2451
data &= ~APIC_ICR_BUSY;
2452
2453
kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
2454
if (kvm_x86_ops.x2apic_icr_is_split) {
2455
kvm_lapic_set_reg(apic, APIC_ICR, data);
2456
kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
2457
} else {
2458
kvm_lapic_set_reg64(apic, APIC_ICR, data);
2459
}
2460
trace_kvm_apic_write(APIC_ICR, data);
2461
return 0;
2462
}
2463
2464
static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
2465
{
2466
if (kvm_x86_ops.x2apic_icr_is_split)
2467
return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
2468
(u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
2469
2470
return kvm_lapic_get_reg64(apic, APIC_ICR);
2471
}
2472
2473
/* emulate APIC access in a trap manner */
2474
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2475
{
2476
struct kvm_lapic *apic = vcpu->arch.apic;
2477
2478
/*
2479
* ICR is a single 64-bit register when x2APIC is enabled, all others
2480
* registers hold 32-bit values. For legacy xAPIC, ICR writes need to
2481
* go down the common path to get the upper half from ICR2.
2482
*
2483
* Note, using the write helpers may incur an unnecessary write to the
2484
* virtual APIC state, but KVM needs to conditionally modify the value
2485
* in certain cases, e.g. to clear the ICR busy bit. The cost of extra
2486
* conditional branches is likely a wash relative to the cost of the
2487
* maybe-unecessary write, and both are in the noise anyways.
2488
*/
2489
if (apic_x2apic_mode(apic) && offset == APIC_ICR)
2490
WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
2491
else
2492
kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
2493
}
2494
EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2495
2496
void kvm_free_lapic(struct kvm_vcpu *vcpu)
2497
{
2498
struct kvm_lapic *apic = vcpu->arch.apic;
2499
2500
if (!vcpu->arch.apic) {
2501
static_branch_dec(&kvm_has_noapic_vcpu);
2502
return;
2503
}
2504
2505
hrtimer_cancel(&apic->lapic_timer.timer);
2506
2507
if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2508
static_branch_slow_dec_deferred(&apic_hw_disabled);
2509
2510
if (!apic->sw_enabled)
2511
static_branch_slow_dec_deferred(&apic_sw_disabled);
2512
2513
if (apic->regs)
2514
free_page((unsigned long)apic->regs);
2515
2516
kfree(apic);
2517
}
2518
2519
/*
2520
*----------------------------------------------------------------------
2521
* LAPIC interface
2522
*----------------------------------------------------------------------
2523
*/
2524
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2525
{
2526
struct kvm_lapic *apic = vcpu->arch.apic;
2527
2528
if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2529
return 0;
2530
2531
return apic->lapic_timer.tscdeadline;
2532
}
2533
2534
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2535
{
2536
struct kvm_lapic *apic = vcpu->arch.apic;
2537
2538
if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2539
return;
2540
2541
hrtimer_cancel(&apic->lapic_timer.timer);
2542
apic->lapic_timer.tscdeadline = data;
2543
start_apic_timer(apic);
2544
}
2545
2546
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2547
{
2548
apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
2549
}
2550
2551
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2552
{
2553
u64 tpr;
2554
2555
tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2556
2557
return (tpr & 0xf0) >> 4;
2558
}
2559
2560
static void __kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value)
2561
{
2562
u64 old_value = vcpu->arch.apic_base;
2563
struct kvm_lapic *apic = vcpu->arch.apic;
2564
2565
vcpu->arch.apic_base = value;
2566
2567
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2568
vcpu->arch.cpuid_dynamic_bits_dirty = true;
2569
2570
if (!apic)
2571
return;
2572
2573
/* update jump label if enable bit changes */
2574
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2575
if (value & MSR_IA32_APICBASE_ENABLE) {
2576
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2577
static_branch_slow_dec_deferred(&apic_hw_disabled);
2578
/* Check if there are APF page ready requests pending */
2579
kvm_make_request(KVM_REQ_APF_READY, vcpu);
2580
} else {
2581
static_branch_inc(&apic_hw_disabled.key);
2582
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2583
}
2584
}
2585
2586
if ((old_value ^ value) & X2APIC_ENABLE) {
2587
if (value & X2APIC_ENABLE)
2588
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2589
else if (value & MSR_IA32_APICBASE_ENABLE)
2590
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2591
}
2592
2593
if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
2594
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
2595
kvm_x86_call(set_virtual_apic_mode)(vcpu);
2596
}
2597
2598
apic->base_address = apic->vcpu->arch.apic_base &
2599
MSR_IA32_APICBASE_BASE;
2600
2601
if ((value & MSR_IA32_APICBASE_ENABLE) &&
2602
apic->base_address != APIC_DEFAULT_PHYS_BASE) {
2603
kvm_set_apicv_inhibit(apic->vcpu->kvm,
2604
APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
2605
}
2606
}
2607
2608
int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated)
2609
{
2610
enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
2611
enum lapic_mode new_mode = kvm_apic_mode(value);
2612
2613
if (vcpu->arch.apic_base == value)
2614
return 0;
2615
2616
u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
2617
(guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
2618
2619
if ((value & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
2620
return 1;
2621
if (!host_initiated) {
2622
if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
2623
return 1;
2624
if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
2625
return 1;
2626
}
2627
2628
__kvm_apic_set_base(vcpu, value);
2629
kvm_recalculate_apic_map(vcpu->kvm);
2630
return 0;
2631
}
2632
EXPORT_SYMBOL_GPL(kvm_apic_set_base);
2633
2634
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2635
{
2636
struct kvm_lapic *apic = vcpu->arch.apic;
2637
2638
/*
2639
* When APICv is enabled, KVM must always search the IRR for a pending
2640
* IRQ, as other vCPUs and devices can set IRR bits even if the vCPU
2641
* isn't running. If APICv is disabled, KVM _should_ search the IRR
2642
* for a pending IRQ. But KVM currently doesn't ensure *all* hardware,
2643
* e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching
2644
* the IRR at this time could race with IRQ delivery from hardware that
2645
* still sees APICv as being enabled.
2646
*
2647
* FIXME: Ensure other vCPUs and devices observe the change in APICv
2648
* state prior to updating KVM's metadata caches, so that KVM
2649
* can safely search the IRR and set irr_pending accordingly.
2650
*/
2651
apic->irr_pending = true;
2652
2653
if (apic->apicv_active)
2654
apic->isr_count = 1;
2655
else
2656
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2657
2658
apic->highest_isr_cache = -1;
2659
}
2660
2661
int kvm_alloc_apic_access_page(struct kvm *kvm)
2662
{
2663
void __user *hva;
2664
int ret = 0;
2665
2666
mutex_lock(&kvm->slots_lock);
2667
if (kvm->arch.apic_access_memslot_enabled ||
2668
kvm->arch.apic_access_memslot_inhibited)
2669
goto out;
2670
2671
hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
2672
APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
2673
if (IS_ERR(hva)) {
2674
ret = PTR_ERR(hva);
2675
goto out;
2676
}
2677
2678
kvm->arch.apic_access_memslot_enabled = true;
2679
out:
2680
mutex_unlock(&kvm->slots_lock);
2681
return ret;
2682
}
2683
EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page);
2684
2685
void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu)
2686
{
2687
struct kvm *kvm = vcpu->kvm;
2688
2689
if (!kvm->arch.apic_access_memslot_enabled)
2690
return;
2691
2692
kvm_vcpu_srcu_read_unlock(vcpu);
2693
2694
mutex_lock(&kvm->slots_lock);
2695
2696
if (kvm->arch.apic_access_memslot_enabled) {
2697
__x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
2698
/*
2699
* Clear "enabled" after the memslot is deleted so that a
2700
* different vCPU doesn't get a false negative when checking
2701
* the flag out of slots_lock. No additional memory barrier is
2702
* needed as modifying memslots requires waiting other vCPUs to
2703
* drop SRCU (see above), and false positives are ok as the
2704
* flag is rechecked after acquiring slots_lock.
2705
*/
2706
kvm->arch.apic_access_memslot_enabled = false;
2707
2708
/*
2709
* Mark the memslot as inhibited to prevent reallocating the
2710
* memslot during vCPU creation, e.g. if a vCPU is hotplugged.
2711
*/
2712
kvm->arch.apic_access_memslot_inhibited = true;
2713
}
2714
2715
mutex_unlock(&kvm->slots_lock);
2716
2717
kvm_vcpu_srcu_read_lock(vcpu);
2718
}
2719
2720
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2721
{
2722
struct kvm_lapic *apic = vcpu->arch.apic;
2723
u64 msr_val;
2724
int i;
2725
2726
kvm_x86_call(apicv_pre_state_restore)(vcpu);
2727
2728
if (!init_event) {
2729
msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
2730
if (kvm_vcpu_is_reset_bsp(vcpu))
2731
msr_val |= MSR_IA32_APICBASE_BSP;
2732
2733
/*
2734
* Use the inner helper to avoid an extra recalcuation of the
2735
* optimized APIC map if some other task has dirtied the map.
2736
* The recalculation needed for this vCPU will be done after
2737
* all APIC state has been initialized (see below).
2738
*/
2739
__kvm_apic_set_base(vcpu, msr_val);
2740
}
2741
2742
if (!apic)
2743
return;
2744
2745
/* Stop the timer in case it's a reset to an active apic */
2746
hrtimer_cancel(&apic->lapic_timer.timer);
2747
2748
/* The xAPIC ID is set at RESET even if the APIC was already enabled. */
2749
if (!init_event)
2750
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2751
kvm_apic_set_version(apic->vcpu);
2752
2753
for (i = 0; i < apic->nr_lvt_entries; i++)
2754
kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
2755
apic_update_lvtt(apic);
2756
if (kvm_vcpu_is_reset_bsp(vcpu) &&
2757
kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2758
kvm_lapic_set_reg(apic, APIC_LVT0,
2759
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2760
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2761
2762
kvm_apic_set_dfr(apic, 0xffffffffU);
2763
apic_set_spiv(apic, 0xff);
2764
kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2765
if (!apic_x2apic_mode(apic))
2766
kvm_apic_set_ldr(apic, 0);
2767
kvm_lapic_set_reg(apic, APIC_ESR, 0);
2768
if (!apic_x2apic_mode(apic)) {
2769
kvm_lapic_set_reg(apic, APIC_ICR, 0);
2770
kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2771
} else {
2772
kvm_lapic_set_reg64(apic, APIC_ICR, 0);
2773
}
2774
kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2775
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2776
for (i = 0; i < 8; i++) {
2777
kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2778
kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2779
kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2780
}
2781
kvm_apic_update_apicv(vcpu);
2782
update_divide_count(apic);
2783
atomic_set(&apic->lapic_timer.pending, 0);
2784
2785
vcpu->arch.pv_eoi.msr_val = 0;
2786
apic_update_ppr(apic);
2787
if (apic->apicv_active) {
2788
kvm_x86_call(apicv_post_state_restore)(vcpu);
2789
kvm_x86_call(hwapic_isr_update)(vcpu, -1);
2790
}
2791
2792
vcpu->arch.apic_arb_prio = 0;
2793
vcpu->arch.apic_attention = 0;
2794
2795
kvm_recalculate_apic_map(vcpu->kvm);
2796
}
2797
2798
/*
2799
*----------------------------------------------------------------------
2800
* timer interface
2801
*----------------------------------------------------------------------
2802
*/
2803
2804
static bool lapic_is_periodic(struct kvm_lapic *apic)
2805
{
2806
return apic_lvtt_period(apic);
2807
}
2808
2809
int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2810
{
2811
struct kvm_lapic *apic = vcpu->arch.apic;
2812
2813
if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2814
return atomic_read(&apic->lapic_timer.pending);
2815
2816
return 0;
2817
}
2818
2819
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2820
{
2821
u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2822
int vector, mode, trig_mode;
2823
int r;
2824
2825
if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2826
vector = reg & APIC_VECTOR_MASK;
2827
mode = reg & APIC_MODE_MASK;
2828
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2829
2830
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
2831
if (r && lvt_type == APIC_LVTPC &&
2832
guest_cpuid_is_intel_compatible(apic->vcpu))
2833
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
2834
return r;
2835
}
2836
return 0;
2837
}
2838
2839
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2840
{
2841
struct kvm_lapic *apic = vcpu->arch.apic;
2842
2843
if (apic)
2844
kvm_apic_local_deliver(apic, APIC_LVT0);
2845
}
2846
2847
static const struct kvm_io_device_ops apic_mmio_ops = {
2848
.read = apic_mmio_read,
2849
.write = apic_mmio_write,
2850
};
2851
2852
static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2853
{
2854
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2855
struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2856
2857
apic_timer_expired(apic, true);
2858
2859
if (lapic_is_periodic(apic)) {
2860
advance_periodic_target_expiration(apic);
2861
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2862
return HRTIMER_RESTART;
2863
} else
2864
return HRTIMER_NORESTART;
2865
}
2866
2867
int kvm_create_lapic(struct kvm_vcpu *vcpu)
2868
{
2869
struct kvm_lapic *apic;
2870
2871
ASSERT(vcpu != NULL);
2872
2873
if (!irqchip_in_kernel(vcpu->kvm)) {
2874
static_branch_inc(&kvm_has_noapic_vcpu);
2875
return 0;
2876
}
2877
2878
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2879
if (!apic)
2880
goto nomem;
2881
2882
vcpu->arch.apic = apic;
2883
2884
if (kvm_x86_ops.alloc_apic_backing_page)
2885
apic->regs = kvm_x86_call(alloc_apic_backing_page)(vcpu);
2886
else
2887
apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2888
if (!apic->regs) {
2889
printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2890
vcpu->vcpu_id);
2891
goto nomem_free_apic;
2892
}
2893
apic->vcpu = vcpu;
2894
2895
apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
2896
2897
hrtimer_setup(&apic->lapic_timer.timer, apic_timer_fn, CLOCK_MONOTONIC,
2898
HRTIMER_MODE_ABS_HARD);
2899
if (lapic_timer_advance)
2900
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2901
2902
/*
2903
* Stuff the APIC ENABLE bit in lieu of temporarily incrementing
2904
* apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
2905
*/
2906
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2907
static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2908
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2909
2910
/*
2911
* Defer evaluating inhibits until the vCPU is first run, as this vCPU
2912
* will not get notified of any changes until this vCPU is visible to
2913
* other vCPUs (marked online and added to the set of vCPUs).
2914
*
2915
* Opportunistically mark APICv active as VMX in particularly is highly
2916
* unlikely to have inhibits. Ignore the current per-VM APICv state so
2917
* that vCPU creation is guaranteed to run with a deterministic value,
2918
* the request will ensure the vCPU gets the correct state before VM-Entry.
2919
*/
2920
if (enable_apicv) {
2921
apic->apicv_active = true;
2922
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
2923
}
2924
2925
return 0;
2926
nomem_free_apic:
2927
kfree(apic);
2928
vcpu->arch.apic = NULL;
2929
nomem:
2930
return -ENOMEM;
2931
}
2932
2933
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2934
{
2935
struct kvm_lapic *apic = vcpu->arch.apic;
2936
u32 ppr;
2937
2938
if (!kvm_apic_present(vcpu))
2939
return -1;
2940
2941
if (apic->guest_apic_protected)
2942
return -1;
2943
2944
__apic_update_ppr(apic, &ppr);
2945
return apic_has_interrupt_for_ppr(apic, ppr);
2946
}
2947
EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
2948
2949
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2950
{
2951
u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2952
2953
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2954
return 1;
2955
if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2956
GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2957
return 1;
2958
return 0;
2959
}
2960
2961
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2962
{
2963
struct kvm_lapic *apic = vcpu->arch.apic;
2964
2965
if (atomic_read(&apic->lapic_timer.pending) > 0) {
2966
kvm_apic_inject_pending_timer_irqs(apic);
2967
atomic_set(&apic->lapic_timer.pending, 0);
2968
}
2969
}
2970
2971
void kvm_apic_ack_interrupt(struct kvm_vcpu *vcpu, int vector)
2972
{
2973
struct kvm_lapic *apic = vcpu->arch.apic;
2974
u32 ppr;
2975
2976
if (WARN_ON_ONCE(vector < 0 || !apic))
2977
return;
2978
2979
/*
2980
* We get here even with APIC virtualization enabled, if doing
2981
* nested virtualization and L1 runs with the "acknowledge interrupt
2982
* on exit" mode. Then we cannot inject the interrupt via RVI,
2983
* because the process would deliver it through the IDT.
2984
*/
2985
2986
apic_clear_irr(vector, apic);
2987
if (kvm_hv_synic_auto_eoi_set(vcpu, vector)) {
2988
/*
2989
* For auto-EOI interrupts, there might be another pending
2990
* interrupt above PPR, so check whether to raise another
2991
* KVM_REQ_EVENT.
2992
*/
2993
apic_update_ppr(apic);
2994
} else {
2995
/*
2996
* For normal interrupts, PPR has been raised and there cannot
2997
* be a higher-priority pending interrupt---except if there was
2998
* a concurrent interrupt injection, but that would have
2999
* triggered KVM_REQ_EVENT already.
3000
*/
3001
apic_set_isr(vector, apic);
3002
__apic_update_ppr(apic, &ppr);
3003
}
3004
3005
}
3006
EXPORT_SYMBOL_GPL(kvm_apic_ack_interrupt);
3007
3008
static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
3009
struct kvm_lapic_state *s, bool set)
3010
{
3011
if (apic_x2apic_mode(vcpu->arch.apic)) {
3012
u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic);
3013
u32 *id = (u32 *)(s->regs + APIC_ID);
3014
u32 *ldr = (u32 *)(s->regs + APIC_LDR);
3015
u64 icr;
3016
3017
if (vcpu->kvm->arch.x2apic_format) {
3018
if (*id != x2apic_id)
3019
return -EINVAL;
3020
} else {
3021
/*
3022
* Ignore the userspace value when setting APIC state.
3023
* KVM's model is that the x2APIC ID is readonly, e.g.
3024
* KVM only supports delivering interrupts to KVM's
3025
* version of the x2APIC ID. However, for backwards
3026
* compatibility, don't reject attempts to set a
3027
* mismatched ID for userspace that hasn't opted into
3028
* x2apic_format.
3029
*/
3030
if (set)
3031
*id = x2apic_id;
3032
else
3033
*id = x2apic_id << 24;
3034
}
3035
3036
/*
3037
* In x2APIC mode, the LDR is fixed and based on the id. And
3038
* if the ICR is _not_ split, ICR is internally a single 64-bit
3039
* register, but needs to be split to ICR+ICR2 in userspace for
3040
* backwards compatibility.
3041
*/
3042
if (set)
3043
*ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
3044
3045
if (!kvm_x86_ops.x2apic_icr_is_split) {
3046
if (set) {
3047
icr = apic_get_reg(s->regs, APIC_ICR) |
3048
(u64)apic_get_reg(s->regs, APIC_ICR2) << 32;
3049
apic_set_reg64(s->regs, APIC_ICR, icr);
3050
} else {
3051
icr = apic_get_reg64(s->regs, APIC_ICR);
3052
apic_set_reg(s->regs, APIC_ICR2, icr >> 32);
3053
}
3054
}
3055
}
3056
3057
return 0;
3058
}
3059
3060
int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
3061
{
3062
memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
3063
3064
/*
3065
* Get calculated timer current count for remaining timer period (if
3066
* any) and store it in the returned register set.
3067
*/
3068
apic_set_reg(s->regs, APIC_TMCCT, __apic_read(vcpu->arch.apic, APIC_TMCCT));
3069
3070
return kvm_apic_state_fixup(vcpu, s, false);
3071
}
3072
3073
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
3074
{
3075
struct kvm_lapic *apic = vcpu->arch.apic;
3076
int r;
3077
3078
kvm_x86_call(apicv_pre_state_restore)(vcpu);
3079
3080
/* set SPIV separately to get count of SW disabled APICs right */
3081
apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
3082
3083
r = kvm_apic_state_fixup(vcpu, s, true);
3084
if (r) {
3085
kvm_recalculate_apic_map(vcpu->kvm);
3086
return r;
3087
}
3088
memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
3089
3090
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
3091
kvm_recalculate_apic_map(vcpu->kvm);
3092
kvm_apic_set_version(vcpu);
3093
3094
apic_update_ppr(apic);
3095
cancel_apic_timer(apic);
3096
apic->lapic_timer.expired_tscdeadline = 0;
3097
apic_update_lvtt(apic);
3098
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
3099
update_divide_count(apic);
3100
__start_apic_timer(apic, APIC_TMCCT);
3101
kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
3102
kvm_apic_update_apicv(vcpu);
3103
if (apic->apicv_active) {
3104
kvm_x86_call(apicv_post_state_restore)(vcpu);
3105
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
3106
}
3107
kvm_make_request(KVM_REQ_EVENT, vcpu);
3108
3109
#ifdef CONFIG_KVM_IOAPIC
3110
if (ioapic_in_kernel(vcpu->kvm))
3111
kvm_rtc_eoi_tracking_restore_one(vcpu);
3112
#endif
3113
3114
vcpu->arch.apic_arb_prio = 0;
3115
3116
return 0;
3117
}
3118
3119
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
3120
{
3121
struct hrtimer *timer;
3122
3123
if (!lapic_in_kernel(vcpu) ||
3124
kvm_can_post_timer_interrupt(vcpu))
3125
return;
3126
3127
timer = &vcpu->arch.apic->lapic_timer.timer;
3128
if (hrtimer_cancel(timer))
3129
hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
3130
}
3131
3132
/*
3133
* apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
3134
*
3135
* Detect whether guest triggered PV EOI since the
3136
* last entry. If yes, set EOI on guests's behalf.
3137
* Clear PV EOI in guest memory in any case.
3138
*/
3139
static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
3140
struct kvm_lapic *apic)
3141
{
3142
int vector;
3143
/*
3144
* PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
3145
* and KVM_PV_EOI_ENABLED in guest memory as follows:
3146
*
3147
* KVM_APIC_PV_EOI_PENDING is unset:
3148
* -> host disabled PV EOI.
3149
* KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
3150
* -> host enabled PV EOI, guest did not execute EOI yet.
3151
* KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
3152
* -> host enabled PV EOI, guest executed EOI.
3153
*/
3154
BUG_ON(!pv_eoi_enabled(vcpu));
3155
3156
if (pv_eoi_test_and_clr_pending(vcpu))
3157
return;
3158
vector = apic_set_eoi(apic);
3159
trace_kvm_pv_eoi(apic, vector);
3160
}
3161
3162
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
3163
{
3164
u32 data;
3165
3166
if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
3167
apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
3168
3169
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
3170
return;
3171
3172
if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
3173
sizeof(u32)))
3174
return;
3175
3176
apic_set_tpr(vcpu->arch.apic, data & 0xff);
3177
}
3178
3179
/*
3180
* apic_sync_pv_eoi_to_guest - called before vmentry
3181
*
3182
* Detect whether it's safe to enable PV EOI and
3183
* if yes do so.
3184
*/
3185
static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
3186
struct kvm_lapic *apic)
3187
{
3188
if (!pv_eoi_enabled(vcpu) ||
3189
/* IRR set or many bits in ISR: could be nested. */
3190
apic->irr_pending ||
3191
/* Cache not set: could be safe but we don't bother. */
3192
apic->highest_isr_cache == -1 ||
3193
/* Need EOI to update ioapic. */
3194
kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
3195
/*
3196
* PV EOI was disabled by apic_sync_pv_eoi_from_guest
3197
* so we need not do anything here.
3198
*/
3199
return;
3200
}
3201
3202
pv_eoi_set_pending(apic->vcpu);
3203
}
3204
3205
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
3206
{
3207
u32 data, tpr;
3208
int max_irr, max_isr;
3209
struct kvm_lapic *apic = vcpu->arch.apic;
3210
3211
apic_sync_pv_eoi_to_guest(vcpu, apic);
3212
3213
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
3214
return;
3215
3216
tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
3217
max_irr = apic_find_highest_irr(apic);
3218
if (max_irr < 0)
3219
max_irr = 0;
3220
max_isr = apic_find_highest_isr(apic);
3221
if (max_isr < 0)
3222
max_isr = 0;
3223
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
3224
3225
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
3226
sizeof(u32));
3227
}
3228
3229
int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
3230
{
3231
if (vapic_addr) {
3232
if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
3233
&vcpu->arch.apic->vapic_cache,
3234
vapic_addr, sizeof(u32)))
3235
return -EINVAL;
3236
__set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
3237
} else {
3238
__clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
3239
}
3240
3241
vcpu->arch.apic->vapic_addr = vapic_addr;
3242
return 0;
3243
}
3244
3245
static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
3246
{
3247
u32 low;
3248
3249
if (reg == APIC_ICR) {
3250
*data = kvm_x2apic_icr_read(apic);
3251
return 0;
3252
}
3253
3254
if (kvm_lapic_reg_read(apic, reg, 4, &low))
3255
return 1;
3256
3257
*data = low;
3258
3259
return 0;
3260
}
3261
3262
static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
3263
{
3264
/*
3265
* ICR is a 64-bit register in x2APIC mode (and Hyper-V PV vAPIC) and
3266
* can be written as such, all other registers remain accessible only
3267
* through 32-bit reads/writes.
3268
*/
3269
if (reg == APIC_ICR)
3270
return kvm_x2apic_icr_write(apic, data);
3271
3272
/* Bits 63:32 are reserved in all other registers. */
3273
if (data >> 32)
3274
return 1;
3275
3276
return kvm_lapic_reg_write(apic, reg, (u32)data);
3277
}
3278
3279
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
3280
{
3281
struct kvm_lapic *apic = vcpu->arch.apic;
3282
u32 reg = (msr - APIC_BASE_MSR) << 4;
3283
3284
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
3285
return 1;
3286
3287
return kvm_lapic_msr_write(apic, reg, data);
3288
}
3289
3290
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
3291
{
3292
struct kvm_lapic *apic = vcpu->arch.apic;
3293
u32 reg = (msr - APIC_BASE_MSR) << 4;
3294
3295
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
3296
return 1;
3297
3298
return kvm_lapic_msr_read(apic, reg, data);
3299
}
3300
3301
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
3302
{
3303
if (!lapic_in_kernel(vcpu))
3304
return 1;
3305
3306
return kvm_lapic_msr_write(vcpu->arch.apic, reg, data);
3307
}
3308
3309
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
3310
{
3311
if (!lapic_in_kernel(vcpu))
3312
return 1;
3313
3314
return kvm_lapic_msr_read(vcpu->arch.apic, reg, data);
3315
}
3316
3317
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
3318
{
3319
u64 addr = data & ~KVM_MSR_ENABLED;
3320
struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
3321
unsigned long new_len;
3322
int ret;
3323
3324
if (!IS_ALIGNED(addr, 4))
3325
return 1;
3326
3327
if (data & KVM_MSR_ENABLED) {
3328
if (addr == ghc->gpa && len <= ghc->len)
3329
new_len = ghc->len;
3330
else
3331
new_len = len;
3332
3333
ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
3334
if (ret)
3335
return ret;
3336
}
3337
3338
vcpu->arch.pv_eoi.msr_val = data;
3339
3340
return 0;
3341
}
3342
3343
int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
3344
{
3345
struct kvm_lapic *apic = vcpu->arch.apic;
3346
u8 sipi_vector;
3347
int r;
3348
3349
if (!kvm_apic_has_pending_init_or_sipi(vcpu))
3350
return 0;
3351
3352
if (is_guest_mode(vcpu)) {
3353
r = kvm_check_nested_events(vcpu);
3354
if (r < 0)
3355
return r == -EBUSY ? 0 : r;
3356
/*
3357
* Continue processing INIT/SIPI even if a nested VM-Exit
3358
* occurred, e.g. pending SIPIs should be dropped if INIT+SIPI
3359
* are blocked as a result of transitioning to VMX root mode.
3360
*/
3361
}
3362
3363
/*
3364
* INITs are blocked while CPU is in specific states (SMM, VMX root
3365
* mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in
3366
* wait-for-SIPI (WFS).
3367
*/
3368
if (!kvm_apic_init_sipi_allowed(vcpu)) {
3369
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
3370
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3371
return 0;
3372
}
3373
3374
if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
3375
kvm_vcpu_reset(vcpu, true);
3376
if (kvm_vcpu_is_bsp(apic->vcpu))
3377
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
3378
else
3379
kvm_set_mp_state(vcpu, KVM_MP_STATE_INIT_RECEIVED);
3380
}
3381
if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
3382
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
3383
/* evaluate pending_events before reading the vector */
3384
smp_rmb();
3385
sipi_vector = apic->sipi_vector;
3386
kvm_x86_call(vcpu_deliver_sipi_vector)(vcpu,
3387
sipi_vector);
3388
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
3389
}
3390
}
3391
return 0;
3392
}
3393
3394
void kvm_lapic_exit(void)
3395
{
3396
static_key_deferred_flush(&apic_hw_disabled);
3397
WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
3398
static_key_deferred_flush(&apic_sw_disabled);
3399
WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
3400
}
3401
3402