Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/vgic/vgic.c
26532 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2015, 2016 ARM Ltd.
4
*/
5
6
#include <linux/interrupt.h>
7
#include <linux/irq.h>
8
#include <linux/kvm.h>
9
#include <linux/kvm_host.h>
10
#include <linux/list_sort.h>
11
#include <linux/nospec.h>
12
13
#include <asm/kvm_hyp.h>
14
15
#include "vgic.h"
16
17
#define CREATE_TRACE_POINTS
18
#include "trace.h"
19
20
struct vgic_global kvm_vgic_global_state __ro_after_init = {
21
.gicv3_cpuif = STATIC_KEY_FALSE_INIT,
22
};
23
24
/*
25
* Locking order is always:
26
* kvm->lock (mutex)
27
* vcpu->mutex (mutex)
28
* kvm->arch.config_lock (mutex)
29
* its->cmd_lock (mutex)
30
* its->its_lock (mutex)
31
* vgic_cpu->ap_list_lock must be taken with IRQs disabled
32
* vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
33
* vgic_irq->irq_lock must be taken with IRQs disabled
34
*
35
* As the ap_list_lock might be taken from the timer interrupt handler,
36
* we have to disable IRQs before taking this lock and everything lower
37
* than it.
38
*
39
* The config_lock has additional ordering requirements:
40
* kvm->slots_lock
41
* kvm->srcu
42
* kvm->arch.config_lock
43
*
44
* If you need to take multiple locks, always take the upper lock first,
45
* then the lower ones, e.g. first take the its_lock, then the irq_lock.
46
* If you are already holding a lock and need to take a higher one, you
47
* have to drop the lower ranking lock first and re-acquire it after having
48
* taken the upper one.
49
*
50
* When taking more than one ap_list_lock at the same time, always take the
51
* lowest numbered VCPU's ap_list_lock first, so:
52
* vcpuX->vcpu_id < vcpuY->vcpu_id:
53
* raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
54
* raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
55
*
56
* Since the VGIC must support injecting virtual interrupts from ISRs, we have
57
* to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
58
* spinlocks for any lock that may be taken while injecting an interrupt.
59
*/
60
61
/*
62
* Index the VM's xarray of mapped LPIs and return a reference to the IRQ
63
* structure. The caller is expected to call vgic_put_irq() later once it's
64
* finished with the IRQ.
65
*/
66
static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
67
{
68
struct vgic_dist *dist = &kvm->arch.vgic;
69
struct vgic_irq *irq = NULL;
70
71
rcu_read_lock();
72
73
irq = xa_load(&dist->lpi_xa, intid);
74
if (!vgic_try_get_irq_kref(irq))
75
irq = NULL;
76
77
rcu_read_unlock();
78
79
return irq;
80
}
81
82
/*
83
* This looks up the virtual interrupt ID to get the corresponding
84
* struct vgic_irq. It also increases the refcount, so any caller is expected
85
* to call vgic_put_irq() once it's finished with this IRQ.
86
*/
87
struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
88
{
89
/* SPIs */
90
if (intid >= VGIC_NR_PRIVATE_IRQS &&
91
intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
92
intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
93
return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
94
}
95
96
/* LPIs */
97
if (intid >= VGIC_MIN_LPI)
98
return vgic_get_lpi(kvm, intid);
99
100
return NULL;
101
}
102
103
struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
104
{
105
if (WARN_ON(!vcpu))
106
return NULL;
107
108
/* SGIs and PPIs */
109
if (intid < VGIC_NR_PRIVATE_IRQS) {
110
intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
111
return &vcpu->arch.vgic_cpu.private_irqs[intid];
112
}
113
114
return vgic_get_irq(vcpu->kvm, intid);
115
}
116
117
/*
118
* We can't do anything in here, because we lack the kvm pointer to
119
* lock and remove the item from the lpi_list. So we keep this function
120
* empty and use the return value of kref_put() to trigger the freeing.
121
*/
122
static void vgic_irq_release(struct kref *ref)
123
{
124
}
125
126
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
127
{
128
struct vgic_dist *dist = &kvm->arch.vgic;
129
unsigned long flags;
130
131
if (irq->intid < VGIC_MIN_LPI)
132
return;
133
134
if (!kref_put(&irq->refcount, vgic_irq_release))
135
return;
136
137
xa_lock_irqsave(&dist->lpi_xa, flags);
138
__xa_erase(&dist->lpi_xa, irq->intid);
139
xa_unlock_irqrestore(&dist->lpi_xa, flags);
140
141
kfree_rcu(irq, rcu);
142
}
143
144
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
145
{
146
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
147
struct vgic_irq *irq, *tmp;
148
unsigned long flags;
149
150
raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
151
152
list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
153
if (irq->intid >= VGIC_MIN_LPI) {
154
raw_spin_lock(&irq->irq_lock);
155
list_del(&irq->ap_list);
156
irq->vcpu = NULL;
157
raw_spin_unlock(&irq->irq_lock);
158
vgic_put_irq(vcpu->kvm, irq);
159
}
160
}
161
162
raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
163
}
164
165
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
166
{
167
WARN_ON(irq_set_irqchip_state(irq->host_irq,
168
IRQCHIP_STATE_PENDING,
169
pending));
170
}
171
172
bool vgic_get_phys_line_level(struct vgic_irq *irq)
173
{
174
bool line_level;
175
176
BUG_ON(!irq->hw);
177
178
if (irq->ops && irq->ops->get_input_level)
179
return irq->ops->get_input_level(irq->intid);
180
181
WARN_ON(irq_get_irqchip_state(irq->host_irq,
182
IRQCHIP_STATE_PENDING,
183
&line_level));
184
return line_level;
185
}
186
187
/* Set/Clear the physical active state */
188
void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
189
{
190
191
BUG_ON(!irq->hw);
192
WARN_ON(irq_set_irqchip_state(irq->host_irq,
193
IRQCHIP_STATE_ACTIVE,
194
active));
195
}
196
197
/**
198
* vgic_target_oracle - compute the target vcpu for an irq
199
*
200
* @irq: The irq to route. Must be already locked.
201
*
202
* Based on the current state of the interrupt (enabled, pending,
203
* active, vcpu and target_vcpu), compute the next vcpu this should be
204
* given to. Return NULL if this shouldn't be injected at all.
205
*
206
* Requires the IRQ lock to be held.
207
*/
208
static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
209
{
210
lockdep_assert_held(&irq->irq_lock);
211
212
/* If the interrupt is active, it must stay on the current vcpu */
213
if (irq->active)
214
return irq->vcpu ? : irq->target_vcpu;
215
216
/*
217
* If the IRQ is not active but enabled and pending, we should direct
218
* it to its configured target VCPU.
219
* If the distributor is disabled, pending interrupts shouldn't be
220
* forwarded.
221
*/
222
if (irq->enabled && irq_is_pending(irq)) {
223
if (unlikely(irq->target_vcpu &&
224
!irq->target_vcpu->kvm->arch.vgic.enabled))
225
return NULL;
226
227
return irq->target_vcpu;
228
}
229
230
/* If neither active nor pending and enabled, then this IRQ should not
231
* be queued to any VCPU.
232
*/
233
return NULL;
234
}
235
236
/*
237
* The order of items in the ap_lists defines how we'll pack things in LRs as
238
* well, the first items in the list being the first things populated in the
239
* LRs.
240
*
241
* A hard rule is that active interrupts can never be pushed out of the LRs
242
* (and therefore take priority) since we cannot reliably trap on deactivation
243
* of IRQs and therefore they have to be present in the LRs.
244
*
245
* Otherwise things should be sorted by the priority field and the GIC
246
* hardware support will take care of preemption of priority groups etc.
247
*
248
* Return negative if "a" sorts before "b", 0 to preserve order, and positive
249
* to sort "b" before "a".
250
*/
251
static int vgic_irq_cmp(void *priv, const struct list_head *a,
252
const struct list_head *b)
253
{
254
struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
255
struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
256
bool penda, pendb;
257
int ret;
258
259
/*
260
* list_sort may call this function with the same element when
261
* the list is fairly long.
262
*/
263
if (unlikely(irqa == irqb))
264
return 0;
265
266
raw_spin_lock(&irqa->irq_lock);
267
raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
268
269
if (irqa->active || irqb->active) {
270
ret = (int)irqb->active - (int)irqa->active;
271
goto out;
272
}
273
274
penda = irqa->enabled && irq_is_pending(irqa);
275
pendb = irqb->enabled && irq_is_pending(irqb);
276
277
if (!penda || !pendb) {
278
ret = (int)pendb - (int)penda;
279
goto out;
280
}
281
282
/* Both pending and enabled, sort by priority */
283
ret = irqa->priority - irqb->priority;
284
out:
285
raw_spin_unlock(&irqb->irq_lock);
286
raw_spin_unlock(&irqa->irq_lock);
287
return ret;
288
}
289
290
/* Must be called with the ap_list_lock held */
291
static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
292
{
293
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
294
295
lockdep_assert_held(&vgic_cpu->ap_list_lock);
296
297
list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
298
}
299
300
/*
301
* Only valid injection if changing level for level-triggered IRQs or for a
302
* rising edge, and in-kernel connected IRQ lines can only be controlled by
303
* their owner.
304
*/
305
static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
306
{
307
if (irq->owner != owner)
308
return false;
309
310
switch (irq->config) {
311
case VGIC_CONFIG_LEVEL:
312
return irq->line_level != level;
313
case VGIC_CONFIG_EDGE:
314
return level;
315
}
316
317
return false;
318
}
319
320
/*
321
* Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
322
* Do the queuing if necessary, taking the right locks in the right order.
323
* Returns true when the IRQ was queued, false otherwise.
324
*
325
* Needs to be entered with the IRQ lock already held, but will return
326
* with all locks dropped.
327
*/
328
bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
329
unsigned long flags) __releases(&irq->irq_lock)
330
{
331
struct kvm_vcpu *vcpu;
332
333
lockdep_assert_held(&irq->irq_lock);
334
335
retry:
336
vcpu = vgic_target_oracle(irq);
337
if (irq->vcpu || !vcpu) {
338
/*
339
* If this IRQ is already on a VCPU's ap_list, then it
340
* cannot be moved or modified and there is no more work for
341
* us to do.
342
*
343
* Otherwise, if the irq is not pending and enabled, it does
344
* not need to be inserted into an ap_list and there is also
345
* no more work for us to do.
346
*/
347
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
348
349
/*
350
* We have to kick the VCPU here, because we could be
351
* queueing an edge-triggered interrupt for which we
352
* get no EOI maintenance interrupt. In that case,
353
* while the IRQ is already on the VCPU's AP list, the
354
* VCPU could have EOI'ed the original interrupt and
355
* won't see this one until it exits for some other
356
* reason.
357
*/
358
if (vcpu) {
359
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
360
kvm_vcpu_kick(vcpu);
361
}
362
return false;
363
}
364
365
/*
366
* We must unlock the irq lock to take the ap_list_lock where
367
* we are going to insert this new pending interrupt.
368
*/
369
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
370
371
/* someone can do stuff here, which we re-check below */
372
373
raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
374
raw_spin_lock(&irq->irq_lock);
375
376
/*
377
* Did something change behind our backs?
378
*
379
* There are two cases:
380
* 1) The irq lost its pending state or was disabled behind our
381
* backs and/or it was queued to another VCPU's ap_list.
382
* 2) Someone changed the affinity on this irq behind our
383
* backs and we are now holding the wrong ap_list_lock.
384
*
385
* In both cases, drop the locks and retry.
386
*/
387
388
if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
389
raw_spin_unlock(&irq->irq_lock);
390
raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
391
flags);
392
393
raw_spin_lock_irqsave(&irq->irq_lock, flags);
394
goto retry;
395
}
396
397
/*
398
* Grab a reference to the irq to reflect the fact that it is
399
* now in the ap_list. This is safe as the caller must already hold a
400
* reference on the irq.
401
*/
402
vgic_get_irq_kref(irq);
403
list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
404
irq->vcpu = vcpu;
405
406
raw_spin_unlock(&irq->irq_lock);
407
raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
408
409
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
410
kvm_vcpu_kick(vcpu);
411
412
return true;
413
}
414
415
/**
416
* kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
417
* @kvm: The VM structure pointer
418
* @vcpu: The CPU for PPIs or NULL for global interrupts
419
* @intid: The INTID to inject a new state to.
420
* @level: Edge-triggered: true: to trigger the interrupt
421
* false: to ignore the call
422
* Level-sensitive true: raise the input signal
423
* false: lower the input signal
424
* @owner: The opaque pointer to the owner of the IRQ being raised to verify
425
* that the caller is allowed to inject this IRQ. Userspace
426
* injections will have owner == NULL.
427
*
428
* The VGIC is not concerned with devices being active-LOW or active-HIGH for
429
* level-sensitive interrupts. You can think of the level parameter as 1
430
* being HIGH and 0 being LOW and all devices being active-HIGH.
431
*/
432
int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
433
unsigned int intid, bool level, void *owner)
434
{
435
struct vgic_irq *irq;
436
unsigned long flags;
437
int ret;
438
439
ret = vgic_lazy_init(kvm);
440
if (ret)
441
return ret;
442
443
if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
444
return -EINVAL;
445
446
trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level);
447
448
if (intid < VGIC_NR_PRIVATE_IRQS)
449
irq = vgic_get_vcpu_irq(vcpu, intid);
450
else
451
irq = vgic_get_irq(kvm, intid);
452
if (!irq)
453
return -EINVAL;
454
455
raw_spin_lock_irqsave(&irq->irq_lock, flags);
456
457
if (!vgic_validate_injection(irq, level, owner)) {
458
/* Nothing to see here, move along... */
459
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
460
vgic_put_irq(kvm, irq);
461
return 0;
462
}
463
464
if (irq->config == VGIC_CONFIG_LEVEL)
465
irq->line_level = level;
466
else
467
irq->pending_latch = true;
468
469
vgic_queue_irq_unlock(kvm, irq, flags);
470
vgic_put_irq(kvm, irq);
471
472
return 0;
473
}
474
475
/* @irq->irq_lock must be held */
476
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
477
unsigned int host_irq,
478
struct irq_ops *ops)
479
{
480
struct irq_desc *desc;
481
struct irq_data *data;
482
483
/*
484
* Find the physical IRQ number corresponding to @host_irq
485
*/
486
desc = irq_to_desc(host_irq);
487
if (!desc) {
488
kvm_err("%s: no interrupt descriptor\n", __func__);
489
return -EINVAL;
490
}
491
data = irq_desc_get_irq_data(desc);
492
while (data->parent_data)
493
data = data->parent_data;
494
495
irq->hw = true;
496
irq->host_irq = host_irq;
497
irq->hwintid = data->hwirq;
498
irq->ops = ops;
499
return 0;
500
}
501
502
/* @irq->irq_lock must be held */
503
static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
504
{
505
irq->hw = false;
506
irq->hwintid = 0;
507
irq->ops = NULL;
508
}
509
510
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
511
u32 vintid, struct irq_ops *ops)
512
{
513
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
514
unsigned long flags;
515
int ret;
516
517
BUG_ON(!irq);
518
519
raw_spin_lock_irqsave(&irq->irq_lock, flags);
520
ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
521
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
522
vgic_put_irq(vcpu->kvm, irq);
523
524
return ret;
525
}
526
527
/**
528
* kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
529
* @vcpu: The VCPU pointer
530
* @vintid: The INTID of the interrupt
531
*
532
* Reset the active and pending states of a mapped interrupt. Kernel
533
* subsystems injecting mapped interrupts should reset their interrupt lines
534
* when we are doing a reset of the VM.
535
*/
536
void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
537
{
538
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
539
unsigned long flags;
540
541
if (!irq->hw)
542
goto out;
543
544
raw_spin_lock_irqsave(&irq->irq_lock, flags);
545
irq->active = false;
546
irq->pending_latch = false;
547
irq->line_level = false;
548
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
549
out:
550
vgic_put_irq(vcpu->kvm, irq);
551
}
552
553
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
554
{
555
struct vgic_irq *irq;
556
unsigned long flags;
557
558
if (!vgic_initialized(vcpu->kvm))
559
return -EAGAIN;
560
561
irq = vgic_get_vcpu_irq(vcpu, vintid);
562
BUG_ON(!irq);
563
564
raw_spin_lock_irqsave(&irq->irq_lock, flags);
565
kvm_vgic_unmap_irq(irq);
566
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
567
vgic_put_irq(vcpu->kvm, irq);
568
569
return 0;
570
}
571
572
int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid)
573
{
574
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
575
unsigned long flags;
576
int ret = -1;
577
578
raw_spin_lock_irqsave(&irq->irq_lock, flags);
579
if (irq->hw)
580
ret = irq->hwintid;
581
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
582
583
vgic_put_irq(vcpu->kvm, irq);
584
return ret;
585
}
586
587
/**
588
* kvm_vgic_set_owner - Set the owner of an interrupt for a VM
589
*
590
* @vcpu: Pointer to the VCPU (used for PPIs)
591
* @intid: The virtual INTID identifying the interrupt (PPI or SPI)
592
* @owner: Opaque pointer to the owner
593
*
594
* Returns 0 if intid is not already used by another in-kernel device and the
595
* owner is set, otherwise returns an error code.
596
*/
597
int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
598
{
599
struct vgic_irq *irq;
600
unsigned long flags;
601
int ret = 0;
602
603
if (!vgic_initialized(vcpu->kvm))
604
return -EAGAIN;
605
606
/* SGIs and LPIs cannot be wired up to any device */
607
if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
608
return -EINVAL;
609
610
irq = vgic_get_vcpu_irq(vcpu, intid);
611
raw_spin_lock_irqsave(&irq->irq_lock, flags);
612
if (irq->owner && irq->owner != owner)
613
ret = -EEXIST;
614
else
615
irq->owner = owner;
616
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
617
618
return ret;
619
}
620
621
/**
622
* vgic_prune_ap_list - Remove non-relevant interrupts from the list
623
*
624
* @vcpu: The VCPU pointer
625
*
626
* Go over the list of "interesting" interrupts, and prune those that we
627
* won't have to consider in the near future.
628
*/
629
static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
630
{
631
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
632
struct vgic_irq *irq, *tmp;
633
634
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
635
636
retry:
637
raw_spin_lock(&vgic_cpu->ap_list_lock);
638
639
list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
640
struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
641
bool target_vcpu_needs_kick = false;
642
643
raw_spin_lock(&irq->irq_lock);
644
645
BUG_ON(vcpu != irq->vcpu);
646
647
target_vcpu = vgic_target_oracle(irq);
648
649
if (!target_vcpu) {
650
/*
651
* We don't need to process this interrupt any
652
* further, move it off the list.
653
*/
654
list_del(&irq->ap_list);
655
irq->vcpu = NULL;
656
raw_spin_unlock(&irq->irq_lock);
657
658
/*
659
* This vgic_put_irq call matches the
660
* vgic_get_irq_kref in vgic_queue_irq_unlock,
661
* where we added the LPI to the ap_list. As
662
* we remove the irq from the list, we drop
663
* also drop the refcount.
664
*/
665
vgic_put_irq(vcpu->kvm, irq);
666
continue;
667
}
668
669
if (target_vcpu == vcpu) {
670
/* We're on the right CPU */
671
raw_spin_unlock(&irq->irq_lock);
672
continue;
673
}
674
675
/* This interrupt looks like it has to be migrated. */
676
677
raw_spin_unlock(&irq->irq_lock);
678
raw_spin_unlock(&vgic_cpu->ap_list_lock);
679
680
/*
681
* Ensure locking order by always locking the smallest
682
* ID first.
683
*/
684
if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
685
vcpuA = vcpu;
686
vcpuB = target_vcpu;
687
} else {
688
vcpuA = target_vcpu;
689
vcpuB = vcpu;
690
}
691
692
raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
693
raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
694
SINGLE_DEPTH_NESTING);
695
raw_spin_lock(&irq->irq_lock);
696
697
/*
698
* If the affinity has been preserved, move the
699
* interrupt around. Otherwise, it means things have
700
* changed while the interrupt was unlocked, and we
701
* need to replay this.
702
*
703
* In all cases, we cannot trust the list not to have
704
* changed, so we restart from the beginning.
705
*/
706
if (target_vcpu == vgic_target_oracle(irq)) {
707
struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
708
709
list_del(&irq->ap_list);
710
irq->vcpu = target_vcpu;
711
list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
712
target_vcpu_needs_kick = true;
713
}
714
715
raw_spin_unlock(&irq->irq_lock);
716
raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
717
raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
718
719
if (target_vcpu_needs_kick) {
720
kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
721
kvm_vcpu_kick(target_vcpu);
722
}
723
724
goto retry;
725
}
726
727
raw_spin_unlock(&vgic_cpu->ap_list_lock);
728
}
729
730
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
731
{
732
if (kvm_vgic_global_state.type == VGIC_V2)
733
vgic_v2_fold_lr_state(vcpu);
734
else
735
vgic_v3_fold_lr_state(vcpu);
736
}
737
738
/* Requires the irq_lock to be held. */
739
static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
740
struct vgic_irq *irq, int lr)
741
{
742
lockdep_assert_held(&irq->irq_lock);
743
744
if (kvm_vgic_global_state.type == VGIC_V2)
745
vgic_v2_populate_lr(vcpu, irq, lr);
746
else
747
vgic_v3_populate_lr(vcpu, irq, lr);
748
}
749
750
static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
751
{
752
if (kvm_vgic_global_state.type == VGIC_V2)
753
vgic_v2_clear_lr(vcpu, lr);
754
else
755
vgic_v3_clear_lr(vcpu, lr);
756
}
757
758
static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
759
{
760
if (kvm_vgic_global_state.type == VGIC_V2)
761
vgic_v2_set_underflow(vcpu);
762
else
763
vgic_v3_set_underflow(vcpu);
764
}
765
766
/* Requires the ap_list_lock to be held. */
767
static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
768
bool *multi_sgi)
769
{
770
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
771
struct vgic_irq *irq;
772
int count = 0;
773
774
*multi_sgi = false;
775
776
lockdep_assert_held(&vgic_cpu->ap_list_lock);
777
778
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
779
int w;
780
781
raw_spin_lock(&irq->irq_lock);
782
/* GICv2 SGIs can count for more than one... */
783
w = vgic_irq_get_lr_count(irq);
784
raw_spin_unlock(&irq->irq_lock);
785
786
count += w;
787
*multi_sgi |= (w > 1);
788
}
789
return count;
790
}
791
792
/* Requires the VCPU's ap_list_lock to be held. */
793
static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
794
{
795
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
796
struct vgic_irq *irq;
797
int count;
798
bool multi_sgi;
799
u8 prio = 0xff;
800
int i = 0;
801
802
lockdep_assert_held(&vgic_cpu->ap_list_lock);
803
804
count = compute_ap_list_depth(vcpu, &multi_sgi);
805
if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
806
vgic_sort_ap_list(vcpu);
807
808
count = 0;
809
810
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
811
raw_spin_lock(&irq->irq_lock);
812
813
/*
814
* If we have multi-SGIs in the pipeline, we need to
815
* guarantee that they are all seen before any IRQ of
816
* lower priority. In that case, we need to filter out
817
* these interrupts by exiting early. This is easy as
818
* the AP list has been sorted already.
819
*/
820
if (multi_sgi && irq->priority > prio) {
821
_raw_spin_unlock(&irq->irq_lock);
822
break;
823
}
824
825
if (likely(vgic_target_oracle(irq) == vcpu)) {
826
vgic_populate_lr(vcpu, irq, count++);
827
828
if (irq->source)
829
prio = irq->priority;
830
}
831
832
raw_spin_unlock(&irq->irq_lock);
833
834
if (count == kvm_vgic_global_state.nr_lr) {
835
if (!list_is_last(&irq->ap_list,
836
&vgic_cpu->ap_list_head))
837
vgic_set_underflow(vcpu);
838
break;
839
}
840
}
841
842
/* Nuke remaining LRs */
843
for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
844
vgic_clear_lr(vcpu, i);
845
846
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
847
vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
848
else
849
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
850
}
851
852
static inline bool can_access_vgic_from_kernel(void)
853
{
854
/*
855
* GICv2 can always be accessed from the kernel because it is
856
* memory-mapped, and VHE systems can access GICv3 EL2 system
857
* registers.
858
*/
859
return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
860
}
861
862
static inline void vgic_save_state(struct kvm_vcpu *vcpu)
863
{
864
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
865
vgic_v2_save_state(vcpu);
866
else
867
__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
868
}
869
870
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
871
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
872
{
873
int used_lrs;
874
875
/* If nesting, emulate the HW effect from L0 to L1 */
876
if (vgic_state_is_nested(vcpu)) {
877
vgic_v3_sync_nested(vcpu);
878
return;
879
}
880
881
if (vcpu_has_nv(vcpu))
882
vgic_v3_nested_update_mi(vcpu);
883
884
/* An empty ap_list_head implies used_lrs == 0 */
885
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
886
return;
887
888
if (can_access_vgic_from_kernel())
889
vgic_save_state(vcpu);
890
891
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
892
used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
893
else
894
used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
895
896
if (used_lrs)
897
vgic_fold_lr_state(vcpu);
898
vgic_prune_ap_list(vcpu);
899
}
900
901
static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
902
{
903
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
904
vgic_v2_restore_state(vcpu);
905
else
906
__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
907
}
908
909
/* Flush our emulation state into the GIC hardware before entering the guest. */
910
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
911
{
912
/*
913
* If in a nested state, we must return early. Two possibilities:
914
*
915
* - If we have any pending IRQ for the guest and the guest
916
* expects IRQs to be handled in its virtual EL2 mode (the
917
* virtual IMO bit is set) and it is not already running in
918
* virtual EL2 mode, then we have to emulate an IRQ
919
* exception to virtual EL2.
920
*
921
* We do that by placing a request to ourselves which will
922
* abort the entry procedure and inject the exception at the
923
* beginning of the run loop.
924
*
925
* - Otherwise, do exactly *NOTHING*. The guest state is
926
* already loaded, and we can carry on with running it.
927
*
928
* If we have NV, but are not in a nested state, compute the
929
* maintenance interrupt state, as it may fire.
930
*/
931
if (vgic_state_is_nested(vcpu)) {
932
if (kvm_vgic_vcpu_pending_irq(vcpu))
933
kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
934
935
return;
936
}
937
938
if (vcpu_has_nv(vcpu))
939
vgic_v3_nested_update_mi(vcpu);
940
941
/*
942
* If there are no virtual interrupts active or pending for this
943
* VCPU, then there is no work to do and we can bail out without
944
* taking any lock. There is a potential race with someone injecting
945
* interrupts to the VCPU, but it is a benign race as the VCPU will
946
* either observe the new interrupt before or after doing this check,
947
* and introducing additional synchronization mechanism doesn't change
948
* this.
949
*
950
* Note that we still need to go through the whole thing if anything
951
* can be directly injected (GICv4).
952
*/
953
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
954
!vgic_supports_direct_irqs(vcpu->kvm))
955
return;
956
957
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
958
959
if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
960
raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
961
vgic_flush_lr_state(vcpu);
962
raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
963
}
964
965
if (can_access_vgic_from_kernel())
966
vgic_restore_state(vcpu);
967
968
if (vgic_supports_direct_irqs(vcpu->kvm))
969
vgic_v4_commit(vcpu);
970
}
971
972
void kvm_vgic_load(struct kvm_vcpu *vcpu)
973
{
974
if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
975
if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
976
__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
977
return;
978
}
979
980
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
981
vgic_v2_load(vcpu);
982
else
983
vgic_v3_load(vcpu);
984
}
985
986
void kvm_vgic_put(struct kvm_vcpu *vcpu)
987
{
988
if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
989
if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
990
__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
991
return;
992
}
993
994
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
995
vgic_v2_put(vcpu);
996
else
997
vgic_v3_put(vcpu);
998
}
999
1000
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1001
{
1002
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1003
struct vgic_irq *irq;
1004
bool pending = false;
1005
unsigned long flags;
1006
struct vgic_vmcr vmcr;
1007
1008
if (!vcpu->kvm->arch.vgic.enabled)
1009
return false;
1010
1011
if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
1012
return true;
1013
1014
vgic_get_vmcr(vcpu, &vmcr);
1015
1016
raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
1017
1018
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
1019
raw_spin_lock(&irq->irq_lock);
1020
pending = irq_is_pending(irq) && irq->enabled &&
1021
!irq->active &&
1022
irq->priority < vmcr.pmr;
1023
raw_spin_unlock(&irq->irq_lock);
1024
1025
if (pending)
1026
break;
1027
}
1028
1029
raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
1030
1031
return pending;
1032
}
1033
1034
void vgic_kick_vcpus(struct kvm *kvm)
1035
{
1036
struct kvm_vcpu *vcpu;
1037
unsigned long c;
1038
1039
/*
1040
* We've injected an interrupt, time to find out who deserves
1041
* a good kick...
1042
*/
1043
kvm_for_each_vcpu(c, vcpu, kvm) {
1044
if (kvm_vgic_vcpu_pending_irq(vcpu)) {
1045
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
1046
kvm_vcpu_kick(vcpu);
1047
}
1048
}
1049
}
1050
1051
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
1052
{
1053
struct vgic_irq *irq;
1054
bool map_is_active;
1055
unsigned long flags;
1056
1057
if (!vgic_initialized(vcpu->kvm))
1058
return false;
1059
1060
irq = vgic_get_vcpu_irq(vcpu, vintid);
1061
raw_spin_lock_irqsave(&irq->irq_lock, flags);
1062
map_is_active = irq->hw && irq->active;
1063
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
1064
vgic_put_irq(vcpu->kvm, irq);
1065
1066
return map_is_active;
1067
}
1068
1069
/*
1070
* Level-triggered mapped IRQs are special because we only observe rising
1071
* edges as input to the VGIC.
1072
*
1073
* If the guest never acked the interrupt we have to sample the physical
1074
* line and set the line level, because the device state could have changed
1075
* or we simply need to process the still pending interrupt later.
1076
*
1077
* We could also have entered the guest with the interrupt active+pending.
1078
* On the next exit, we need to re-evaluate the pending state, as it could
1079
* otherwise result in a spurious interrupt by injecting a now potentially
1080
* stale pending state.
1081
*
1082
* If this causes us to lower the level, we have to also clear the physical
1083
* active state, since we will otherwise never be told when the interrupt
1084
* becomes asserted again.
1085
*
1086
* Another case is when the interrupt requires a helping hand on
1087
* deactivation (no HW deactivation, for example).
1088
*/
1089
void vgic_irq_handle_resampling(struct vgic_irq *irq,
1090
bool lr_deactivated, bool lr_pending)
1091
{
1092
if (vgic_irq_is_mapped_level(irq)) {
1093
bool resample = false;
1094
1095
if (unlikely(vgic_irq_needs_resampling(irq))) {
1096
resample = !(irq->active || irq->pending_latch);
1097
} else if (lr_pending || (lr_deactivated && irq->line_level)) {
1098
irq->line_level = vgic_get_phys_line_level(irq);
1099
resample = !irq->line_level;
1100
}
1101
1102
if (resample)
1103
vgic_irq_set_phys_active(irq, false);
1104
}
1105
}
1106
1107