CoCalc -- vgic.c

GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/vgic/vgic.c
²⁶⁵³² views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
 * Copyright (C) 2015, 2016 ARM Ltd.
4
 */
5

6
#include <linux/interrupt.h>
7
#include <linux/irq.h>
8
#include <linux/kvm.h>
9
#include <linux/kvm_host.h>
10
#include <linux/list_sort.h>
11
#include <linux/nospec.h>
12

13
#include <asm/kvm_hyp.h>
14

15
#include "vgic.h"
16

17
#define CREATE_TRACE_POINTS
18
#include "trace.h"
19

20
struct vgic_global kvm_vgic_global_state __ro_after_init = {
21
	.gicv3_cpuif = STATIC_KEY_FALSE_INIT,
22
};
23

24
/*
25
 * Locking order is always:
26
 * kvm->lock (mutex)
27
 *   vcpu->mutex (mutex)
28
 *     kvm->arch.config_lock (mutex)
29
 *       its->cmd_lock (mutex)
30
 *         its->its_lock (mutex)
31
 *           vgic_cpu->ap_list_lock		must be taken with IRQs disabled
32
 *             vgic_dist->lpi_xa.xa_lock	must be taken with IRQs disabled
33
 *               vgic_irq->irq_lock		must be taken with IRQs disabled
34
 *
35
 * As the ap_list_lock might be taken from the timer interrupt handler,
36
 * we have to disable IRQs before taking this lock and everything lower
37
 * than it.
38
 *
39
 * The config_lock has additional ordering requirements:
40
 * kvm->slots_lock
41
 *   kvm->srcu
42
 *     kvm->arch.config_lock
43
 *
44
 * If you need to take multiple locks, always take the upper lock first,
45
 * then the lower ones, e.g. first take the its_lock, then the irq_lock.
46
 * If you are already holding a lock and need to take a higher one, you
47
 * have to drop the lower ranking lock first and re-acquire it after having
48
 * taken the upper one.
49
 *
50
 * When taking more than one ap_list_lock at the same time, always take the
51
 * lowest numbered VCPU's ap_list_lock first, so:
52
 *   vcpuX->vcpu_id < vcpuY->vcpu_id:
53
 *     raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
54
 *     raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
55
 *
56
 * Since the VGIC must support injecting virtual interrupts from ISRs, we have
57
 * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
58
 * spinlocks for any lock that may be taken while injecting an interrupt.
59
 */
60

61
/*
62
 * Index the VM's xarray of mapped LPIs and return a reference to the IRQ
63
 * structure. The caller is expected to call vgic_put_irq() later once it's
64
 * finished with the IRQ.
65
 */
66
static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
67
{
68
	struct vgic_dist *dist = &kvm->arch.vgic;
69
	struct vgic_irq *irq = NULL;
70

71
	rcu_read_lock();
72

73
	irq = xa_load(&dist->lpi_xa, intid);
74
	if (!vgic_try_get_irq_kref(irq))
75
		irq = NULL;
76

77
	rcu_read_unlock();
78

79
	return irq;
80
}
81

82
/*
83
 * This looks up the virtual interrupt ID to get the corresponding
84
 * struct vgic_irq. It also increases the refcount, so any caller is expected
85
 * to call vgic_put_irq() once it's finished with this IRQ.
86
 */
87
struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
88
{
89
	/* SPIs */
90
	if (intid >= VGIC_NR_PRIVATE_IRQS &&
91
	    intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
92
		intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
93
		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
94
	}
95

96
	/* LPIs */
97
	if (intid >= VGIC_MIN_LPI)
98
		return vgic_get_lpi(kvm, intid);
99

100
	return NULL;
101
}
102

103
struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
104
{
105
	if (WARN_ON(!vcpu))
106
		return NULL;
107

108
	/* SGIs and PPIs */
109
	if (intid < VGIC_NR_PRIVATE_IRQS) {
110
		intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
111
		return &vcpu->arch.vgic_cpu.private_irqs[intid];
112
	}
113

114
	return vgic_get_irq(vcpu->kvm, intid);
115
}
116

117
/*
118
 * We can't do anything in here, because we lack the kvm pointer to
119
 * lock and remove the item from the lpi_list. So we keep this function
120
 * empty and use the return value of kref_put() to trigger the freeing.
121
 */
122
static void vgic_irq_release(struct kref *ref)
123
{
124
}
125

126
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
127
{
128
	struct vgic_dist *dist = &kvm->arch.vgic;
129
	unsigned long flags;
130

131
	if (irq->intid < VGIC_MIN_LPI)
132
		return;
133

134
	if (!kref_put(&irq->refcount, vgic_irq_release))
135
		return;
136

137
	xa_lock_irqsave(&dist->lpi_xa, flags);
138
	__xa_erase(&dist->lpi_xa, irq->intid);
139
	xa_unlock_irqrestore(&dist->lpi_xa, flags);
140

141
	kfree_rcu(irq, rcu);
142
}
143

144
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
145
{
146
	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
147
	struct vgic_irq *irq, *tmp;
148
	unsigned long flags;
149

150
	raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
151

152
	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
153
		if (irq->intid >= VGIC_MIN_LPI) {
154
			raw_spin_lock(&irq->irq_lock);
155
			list_del(&irq->ap_list);
156
			irq->vcpu = NULL;
157
			raw_spin_unlock(&irq->irq_lock);
158
			vgic_put_irq(vcpu->kvm, irq);
159
		}
160
	}
161

162
	raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
163
}
164

165
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
166
{
167
	WARN_ON(irq_set_irqchip_state(irq->host_irq,
168
				      IRQCHIP_STATE_PENDING,
169
				      pending));
170
}
171

172
bool vgic_get_phys_line_level(struct vgic_irq *irq)
173
{
174
	bool line_level;
175

176
	BUG_ON(!irq->hw);
177

178
	if (irq->ops && irq->ops->get_input_level)
179
		return irq->ops->get_input_level(irq->intid);
180

181
	WARN_ON(irq_get_irqchip_state(irq->host_irq,
182
				      IRQCHIP_STATE_PENDING,
183
				      &line_level));
184
	return line_level;
185
}
186

187
/* Set/Clear the physical active state */
188
void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
189
{
190

191
	BUG_ON(!irq->hw);
192
	WARN_ON(irq_set_irqchip_state(irq->host_irq,
193
				      IRQCHIP_STATE_ACTIVE,
194
				      active));
195
}
196

197
/**
198
 * vgic_target_oracle - compute the target vcpu for an irq
199
 *
200
 * @irq:	The irq to route. Must be already locked.
201
 *
202
 * Based on the current state of the interrupt (enabled, pending,
203
 * active, vcpu and target_vcpu), compute the next vcpu this should be
204
 * given to. Return NULL if this shouldn't be injected at all.
205
 *
206
 * Requires the IRQ lock to be held.
207
 */
208
static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
209
{
210
	lockdep_assert_held(&irq->irq_lock);
211

212
	/* If the interrupt is active, it must stay on the current vcpu */
213
	if (irq->active)
214
		return irq->vcpu ? : irq->target_vcpu;
215

216
	/*
217
	 * If the IRQ is not active but enabled and pending, we should direct
218
	 * it to its configured target VCPU.
219
	 * If the distributor is disabled, pending interrupts shouldn't be
220
	 * forwarded.
221
	 */
222
	if (irq->enabled && irq_is_pending(irq)) {
223
		if (unlikely(irq->target_vcpu &&
224
			     !irq->target_vcpu->kvm->arch.vgic.enabled))
225
			return NULL;
226

227
		return irq->target_vcpu;
228
	}
229

230
	/* If neither active nor pending and enabled, then this IRQ should not
231
	 * be queued to any VCPU.
232
	 */
233
	return NULL;
234
}
235

236
/*
237
 * The order of items in the ap_lists defines how we'll pack things in LRs as
238
 * well, the first items in the list being the first things populated in the
239
 * LRs.
240
 *
241
 * A hard rule is that active interrupts can never be pushed out of the LRs
242
 * (and therefore take priority) since we cannot reliably trap on deactivation
243
 * of IRQs and therefore they have to be present in the LRs.
244
 *
245
 * Otherwise things should be sorted by the priority field and the GIC
246
 * hardware support will take care of preemption of priority groups etc.
247
 *
248
 * Return negative if "a" sorts before "b", 0 to preserve order, and positive
249
 * to sort "b" before "a".
250
 */
251
static int vgic_irq_cmp(void *priv, const struct list_head *a,
252
			const struct list_head *b)
253
{
254
	struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
255
	struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
256
	bool penda, pendb;
257
	int ret;
258

259
	/*
260
	 * list_sort may call this function with the same element when
261
	 * the list is fairly long.
262
	 */
263
	if (unlikely(irqa == irqb))
264
		return 0;
265

266
	raw_spin_lock(&irqa->irq_lock);
267
	raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
268

269
	if (irqa->active || irqb->active) {
270
		ret = (int)irqb->active - (int)irqa->active;
271
		goto out;
272
	}
273

274
	penda = irqa->enabled && irq_is_pending(irqa);
275
	pendb = irqb->enabled && irq_is_pending(irqb);
276

277
	if (!penda || !pendb) {
278
		ret = (int)pendb - (int)penda;
279
		goto out;
280
	}
281

282
	/* Both pending and enabled, sort by priority */
283
	ret = irqa->priority - irqb->priority;
284
out:
285
	raw_spin_unlock(&irqb->irq_lock);
286
	raw_spin_unlock(&irqa->irq_lock);
287
	return ret;
288
}
289

290
/* Must be called with the ap_list_lock held */
291
static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
292
{
293
	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
294

295
	lockdep_assert_held(&vgic_cpu->ap_list_lock);
296

297
	list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
298
}
299

300
/*
301
 * Only valid injection if changing level for level-triggered IRQs or for a
302
 * rising edge, and in-kernel connected IRQ lines can only be controlled by
303
 * their owner.
304
 */
305
static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
306
{
307
	if (irq->owner != owner)
308
		return false;
309

310
	switch (irq->config) {
311
	case VGIC_CONFIG_LEVEL:
312
		return irq->line_level != level;
313
	case VGIC_CONFIG_EDGE:
314
		return level;
315
	}
316

317
	return false;
318
}
319

320
/*
321
 * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
322
 * Do the queuing if necessary, taking the right locks in the right order.
323
 * Returns true when the IRQ was queued, false otherwise.
324
 *
325
 * Needs to be entered with the IRQ lock already held, but will return
326
 * with all locks dropped.
327
 */
328
bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
329
			   unsigned long flags) __releases(&irq->irq_lock)
330
{
331
	struct kvm_vcpu *vcpu;
332

333
	lockdep_assert_held(&irq->irq_lock);
334

335
retry:
336
	vcpu = vgic_target_oracle(irq);
337
	if (irq->vcpu || !vcpu) {
338
		/*
339
		 * If this IRQ is already on a VCPU's ap_list, then it
340
		 * cannot be moved or modified and there is no more work for
341
		 * us to do.
342
		 *
343
		 * Otherwise, if the irq is not pending and enabled, it does
344
		 * not need to be inserted into an ap_list and there is also
345
		 * no more work for us to do.
346
		 */
347
		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
348

349
		/*
350
		 * We have to kick the VCPU here, because we could be
351
		 * queueing an edge-triggered interrupt for which we
352
		 * get no EOI maintenance interrupt. In that case,
353
		 * while the IRQ is already on the VCPU's AP list, the
354
		 * VCPU could have EOI'ed the original interrupt and
355
		 * won't see this one until it exits for some other
356
		 * reason.
357
		 */
358
		if (vcpu) {
359
			kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
360
			kvm_vcpu_kick(vcpu);
361
		}
362
		return false;
363
	}
364

365
	/*
366
	 * We must unlock the irq lock to take the ap_list_lock where
367
	 * we are going to insert this new pending interrupt.
368
	 */
369
	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
370

371
	/* someone can do stuff here, which we re-check below */
372

373
	raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
374
	raw_spin_lock(&irq->irq_lock);
375

376
	/*
377
	 * Did something change behind our backs?
378
	 *
379
	 * There are two cases:
380
	 * 1) The irq lost its pending state or was disabled behind our
381
	 *    backs and/or it was queued to another VCPU's ap_list.
382
	 * 2) Someone changed the affinity on this irq behind our
383
	 *    backs and we are now holding the wrong ap_list_lock.
384
	 *
385
	 * In both cases, drop the locks and retry.
386
	 */
387

388
	if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
389
		raw_spin_unlock(&irq->irq_lock);
390
		raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
391
					   flags);
392

393
		raw_spin_lock_irqsave(&irq->irq_lock, flags);
394
		goto retry;
395
	}
396

397
	/*
398
	 * Grab a reference to the irq to reflect the fact that it is
399
	 * now in the ap_list. This is safe as the caller must already hold a
400
	 * reference on the irq.
401
	 */
402
	vgic_get_irq_kref(irq);
403
	list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
404
	irq->vcpu = vcpu;
405

406
	raw_spin_unlock(&irq->irq_lock);
407
	raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
408

409
	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
410
	kvm_vcpu_kick(vcpu);
411

412
	return true;
413
}
414

415
/**
416
 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
417
 * @kvm:     The VM structure pointer
418
 * @vcpu:    The CPU for PPIs or NULL for global interrupts
419
 * @intid:   The INTID to inject a new state to.
420
 * @level:   Edge-triggered:  true:  to trigger the interrupt
421
 *			      false: to ignore the call
422
 *	     Level-sensitive  true:  raise the input signal
423
 *			      false: lower the input signal
424
 * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
425
 *           that the caller is allowed to inject this IRQ.  Userspace
426
 *           injections will have owner == NULL.
427
 *
428
 * The VGIC is not concerned with devices being active-LOW or active-HIGH for
429
 * level-sensitive interrupts.  You can think of the level parameter as 1
430
 * being HIGH and 0 being LOW and all devices being active-HIGH.
431
 */
432
int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
433
			unsigned int intid, bool level, void *owner)
434
{
435
	struct vgic_irq *irq;
436
	unsigned long flags;
437
	int ret;
438

439
	ret = vgic_lazy_init(kvm);
440
	if (ret)
441
		return ret;
442

443
	if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
444
		return -EINVAL;
445

446
	trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level);
447

448
	if (intid < VGIC_NR_PRIVATE_IRQS)
449
		irq = vgic_get_vcpu_irq(vcpu, intid);
450
	else
451
		irq = vgic_get_irq(kvm, intid);
452
	if (!irq)
453
		return -EINVAL;
454

455
	raw_spin_lock_irqsave(&irq->irq_lock, flags);
456

457
	if (!vgic_validate_injection(irq, level, owner)) {
458
		/* Nothing to see here, move along... */
459
		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
460
		vgic_put_irq(kvm, irq);
461
		return 0;
462
	}
463

464
	if (irq->config == VGIC_CONFIG_LEVEL)
465
		irq->line_level = level;
466
	else
467
		irq->pending_latch = true;
468

469
	vgic_queue_irq_unlock(kvm, irq, flags);
470
	vgic_put_irq(kvm, irq);
471

472
	return 0;
473
}
474

475
/* @irq->irq_lock must be held */
476
static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
477
			    unsigned int host_irq,
478
			    struct irq_ops *ops)
479
{
480
	struct irq_desc *desc;
481
	struct irq_data *data;
482

483
	/*
484
	 * Find the physical IRQ number corresponding to @host_irq
485
	 */
486
	desc = irq_to_desc(host_irq);
487
	if (!desc) {
488
		kvm_err("%s: no interrupt descriptor\n", __func__);
489
		return -EINVAL;
490
	}
491
	data = irq_desc_get_irq_data(desc);
492
	while (data->parent_data)
493
		data = data->parent_data;
494

495
	irq->hw = true;
496
	irq->host_irq = host_irq;
497
	irq->hwintid = data->hwirq;
498
	irq->ops = ops;
499
	return 0;
500
}
501

502
/* @irq->irq_lock must be held */
503
static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
504
{
505
	irq->hw = false;
506
	irq->hwintid = 0;
507
	irq->ops = NULL;
508
}
509

510
int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
511
			  u32 vintid, struct irq_ops *ops)
512
{
513
	struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
514
	unsigned long flags;
515
	int ret;
516

517
	BUG_ON(!irq);
518

519
	raw_spin_lock_irqsave(&irq->irq_lock, flags);
520
	ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
521
	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
522
	vgic_put_irq(vcpu->kvm, irq);
523

524
	return ret;
525
}
526

527
/**
528
 * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
529
 * @vcpu: The VCPU pointer
530
 * @vintid: The INTID of the interrupt
531
 *
532
 * Reset the active and pending states of a mapped interrupt.  Kernel
533
 * subsystems injecting mapped interrupts should reset their interrupt lines
534
 * when we are doing a reset of the VM.
535
 */
536
void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
537
{
538
	struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
539
	unsigned long flags;
540

541
	if (!irq->hw)
542
		goto out;
543

544
	raw_spin_lock_irqsave(&irq->irq_lock, flags);
545
	irq->active = false;
546
	irq->pending_latch = false;
547
	irq->line_level = false;
548
	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
549
out:
550
	vgic_put_irq(vcpu->kvm, irq);
551
}
552

553
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
554
{
555
	struct vgic_irq *irq;
556
	unsigned long flags;
557

558
	if (!vgic_initialized(vcpu->kvm))
559
		return -EAGAIN;
560

561
	irq = vgic_get_vcpu_irq(vcpu, vintid);
562
	BUG_ON(!irq);
563

564
	raw_spin_lock_irqsave(&irq->irq_lock, flags);
565
	kvm_vgic_unmap_irq(irq);
566
	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
567
	vgic_put_irq(vcpu->kvm, irq);
568

569
	return 0;
570
}
571

572
int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid)
573
{
574
	struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
575
	unsigned long flags;
576
	int ret = -1;
577

578
	raw_spin_lock_irqsave(&irq->irq_lock, flags);
579
	if (irq->hw)
580
		ret = irq->hwintid;
581
	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
582

583
	vgic_put_irq(vcpu->kvm, irq);
584
	return ret;
585
}
586

587
/**
588
 * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
589
 *
590
 * @vcpu:   Pointer to the VCPU (used for PPIs)
591
 * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
592
 * @owner:  Opaque pointer to the owner
593
 *
594
 * Returns 0 if intid is not already used by another in-kernel device and the
595
 * owner is set, otherwise returns an error code.
596
 */
597
int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
598
{
599
	struct vgic_irq *irq;
600
	unsigned long flags;
601
	int ret = 0;
602

603
	if (!vgic_initialized(vcpu->kvm))
604
		return -EAGAIN;
605

606
	/* SGIs and LPIs cannot be wired up to any device */
607
	if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
608
		return -EINVAL;
609

610
	irq = vgic_get_vcpu_irq(vcpu, intid);
611
	raw_spin_lock_irqsave(&irq->irq_lock, flags);
612
	if (irq->owner && irq->owner != owner)
613
		ret = -EEXIST;
614
	else
615
		irq->owner = owner;
616
	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
617

618
	return ret;
619
}
620

621
/**
622
 * vgic_prune_ap_list - Remove non-relevant interrupts from the list
623
 *
624
 * @vcpu: The VCPU pointer
625
 *
626
 * Go over the list of "interesting" interrupts, and prune those that we
627
 * won't have to consider in the near future.
628
 */
629
static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
630
{
631
	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
632
	struct vgic_irq *irq, *tmp;
633

634
	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
635

636
retry:
637
	raw_spin_lock(&vgic_cpu->ap_list_lock);
638

639
	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
640
		struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
641
		bool target_vcpu_needs_kick = false;
642

643
		raw_spin_lock(&irq->irq_lock);
644

645
		BUG_ON(vcpu != irq->vcpu);
646

647
		target_vcpu = vgic_target_oracle(irq);
648

649
		if (!target_vcpu) {
650
			/*
651
			 * We don't need to process this interrupt any
652
			 * further, move it off the list.
653
			 */
654
			list_del(&irq->ap_list);
655
			irq->vcpu = NULL;
656
			raw_spin_unlock(&irq->irq_lock);
657

658
			/*
659
			 * This vgic_put_irq call matches the
660
			 * vgic_get_irq_kref in vgic_queue_irq_unlock,
661
			 * where we added the LPI to the ap_list. As
662
			 * we remove the irq from the list, we drop
663
			 * also drop the refcount.
664
			 */
665
			vgic_put_irq(vcpu->kvm, irq);
666
			continue;
667
		}
668

669
		if (target_vcpu == vcpu) {
670
			/* We're on the right CPU */
671
			raw_spin_unlock(&irq->irq_lock);
672
			continue;
673
		}
674

675
		/* This interrupt looks like it has to be migrated. */
676

677
		raw_spin_unlock(&irq->irq_lock);
678
		raw_spin_unlock(&vgic_cpu->ap_list_lock);
679

680
		/*
681
		 * Ensure locking order by always locking the smallest
682
		 * ID first.
683
		 */
684
		if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
685
			vcpuA = vcpu;
686
			vcpuB = target_vcpu;
687
		} else {
688
			vcpuA = target_vcpu;
689
			vcpuB = vcpu;
690
		}
691

692
		raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
693
		raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
694
				      SINGLE_DEPTH_NESTING);
695
		raw_spin_lock(&irq->irq_lock);
696

697
		/*
698
		 * If the affinity has been preserved, move the
699
		 * interrupt around. Otherwise, it means things have
700
		 * changed while the interrupt was unlocked, and we
701
		 * need to replay this.
702
		 *
703
		 * In all cases, we cannot trust the list not to have
704
		 * changed, so we restart from the beginning.
705
		 */
706
		if (target_vcpu == vgic_target_oracle(irq)) {
707
			struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
708

709
			list_del(&irq->ap_list);
710
			irq->vcpu = target_vcpu;
711
			list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
712
			target_vcpu_needs_kick = true;
713
		}
714

715
		raw_spin_unlock(&irq->irq_lock);
716
		raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
717
		raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
718

719
		if (target_vcpu_needs_kick) {
720
			kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
721
			kvm_vcpu_kick(target_vcpu);
722
		}
723

724
		goto retry;
725
	}
726

727
	raw_spin_unlock(&vgic_cpu->ap_list_lock);
728
}
729

730
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
731
{
732
	if (kvm_vgic_global_state.type == VGIC_V2)
733
		vgic_v2_fold_lr_state(vcpu);
734
	else
735
		vgic_v3_fold_lr_state(vcpu);
736
}
737

738
/* Requires the irq_lock to be held. */
739
static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
740
				    struct vgic_irq *irq, int lr)
741
{
742
	lockdep_assert_held(&irq->irq_lock);
743

744
	if (kvm_vgic_global_state.type == VGIC_V2)
745
		vgic_v2_populate_lr(vcpu, irq, lr);
746
	else
747
		vgic_v3_populate_lr(vcpu, irq, lr);
748
}
749

750
static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
751
{
752
	if (kvm_vgic_global_state.type == VGIC_V2)
753
		vgic_v2_clear_lr(vcpu, lr);
754
	else
755
		vgic_v3_clear_lr(vcpu, lr);
756
}
757

758
static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
759
{
760
	if (kvm_vgic_global_state.type == VGIC_V2)
761
		vgic_v2_set_underflow(vcpu);
762
	else
763
		vgic_v3_set_underflow(vcpu);
764
}
765

766
/* Requires the ap_list_lock to be held. */
767
static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
768
				 bool *multi_sgi)
769
{
770
	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
771
	struct vgic_irq *irq;
772
	int count = 0;
773

774
	*multi_sgi = false;
775

776
	lockdep_assert_held(&vgic_cpu->ap_list_lock);
777

778
	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
779
		int w;
780

781
		raw_spin_lock(&irq->irq_lock);
782
		/* GICv2 SGIs can count for more than one... */
783
		w = vgic_irq_get_lr_count(irq);
784
		raw_spin_unlock(&irq->irq_lock);
785

786
		count += w;
787
		*multi_sgi |= (w > 1);
788
	}
789
	return count;
790
}
791

792
/* Requires the VCPU's ap_list_lock to be held. */
793
static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
794
{
795
	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
796
	struct vgic_irq *irq;
797
	int count;
798
	bool multi_sgi;
799
	u8 prio = 0xff;
800
	int i = 0;
801

802
	lockdep_assert_held(&vgic_cpu->ap_list_lock);
803

804
	count = compute_ap_list_depth(vcpu, &multi_sgi);
805
	if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
806
		vgic_sort_ap_list(vcpu);
807

808
	count = 0;
809

810
	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
811
		raw_spin_lock(&irq->irq_lock);
812

813
		/*
814
		 * If we have multi-SGIs in the pipeline, we need to
815
		 * guarantee that they are all seen before any IRQ of
816
		 * lower priority. In that case, we need to filter out
817
		 * these interrupts by exiting early. This is easy as
818
		 * the AP list has been sorted already.
819
		 */
820
		if (multi_sgi && irq->priority > prio) {
821
			_raw_spin_unlock(&irq->irq_lock);
822
			break;
823
		}
824

825
		if (likely(vgic_target_oracle(irq) == vcpu)) {
826
			vgic_populate_lr(vcpu, irq, count++);
827

828
			if (irq->source)
829
				prio = irq->priority;
830
		}
831

832
		raw_spin_unlock(&irq->irq_lock);
833

834
		if (count == kvm_vgic_global_state.nr_lr) {
835
			if (!list_is_last(&irq->ap_list,
836
					  &vgic_cpu->ap_list_head))
837
				vgic_set_underflow(vcpu);
838
			break;
839
		}
840
	}
841

842
	/* Nuke remaining LRs */
843
	for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
844
		vgic_clear_lr(vcpu, i);
845

846
	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
847
		vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
848
	else
849
		vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
850
}
851

852
static inline bool can_access_vgic_from_kernel(void)
853
{
854
	/*
855
	 * GICv2 can always be accessed from the kernel because it is
856
	 * memory-mapped, and VHE systems can access GICv3 EL2 system
857
	 * registers.
858
	 */
859
	return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
860
}
861

862
static inline void vgic_save_state(struct kvm_vcpu *vcpu)
863
{
864
	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
865
		vgic_v2_save_state(vcpu);
866
	else
867
		__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
868
}
869

870
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
871
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
872
{
873
	int used_lrs;
874

875
	/* If nesting, emulate the HW effect from L0 to L1 */
876
	if (vgic_state_is_nested(vcpu)) {
877
		vgic_v3_sync_nested(vcpu);
878
		return;
879
	}
880

881
	if (vcpu_has_nv(vcpu))
882
		vgic_v3_nested_update_mi(vcpu);
883

884
	/* An empty ap_list_head implies used_lrs == 0 */
885
	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
886
		return;
887

888
	if (can_access_vgic_from_kernel())
889
		vgic_save_state(vcpu);
890

891
	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
892
		used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
893
	else
894
		used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
895

896
	if (used_lrs)
897
		vgic_fold_lr_state(vcpu);
898
	vgic_prune_ap_list(vcpu);
899
}
900

901
static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
902
{
903
	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
904
		vgic_v2_restore_state(vcpu);
905
	else
906
		__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
907
}
908

909
/* Flush our emulation state into the GIC hardware before entering the guest. */
910
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
911
{
912
	/*
913
	 * If in a nested state, we must return early. Two possibilities:
914
	 *
915
	 * - If we have any pending IRQ for the guest and the guest
916
	 *   expects IRQs to be handled in its virtual EL2 mode (the
917
	 *   virtual IMO bit is set) and it is not already running in
918
	 *   virtual EL2 mode, then we have to emulate an IRQ
919
	 *   exception to virtual EL2.
920
	 *
921
	 *   We do that by placing a request to ourselves which will
922
	 *   abort the entry procedure and inject the exception at the
923
	 *   beginning of the run loop.
924
	 *
925
	 * - Otherwise, do exactly *NOTHING*. The guest state is
926
	 *   already loaded, and we can carry on with running it.
927
	 *
928
	 * If we have NV, but are not in a nested state, compute the
929
	 * maintenance interrupt state, as it may fire.
930
	 */
931
	if (vgic_state_is_nested(vcpu)) {
932
		if (kvm_vgic_vcpu_pending_irq(vcpu))
933
			kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
934

935
		return;
936
	}
937

938
	if (vcpu_has_nv(vcpu))
939
		vgic_v3_nested_update_mi(vcpu);
940

941
	/*
942
	 * If there are no virtual interrupts active or pending for this
943
	 * VCPU, then there is no work to do and we can bail out without
944
	 * taking any lock.  There is a potential race with someone injecting
945
	 * interrupts to the VCPU, but it is a benign race as the VCPU will
946
	 * either observe the new interrupt before or after doing this check,
947
	 * and introducing additional synchronization mechanism doesn't change
948
	 * this.
949
	 *
950
	 * Note that we still need to go through the whole thing if anything
951
	 * can be directly injected (GICv4).
952
	 */
953
	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
954
	    !vgic_supports_direct_irqs(vcpu->kvm))
955
		return;
956

957
	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
958

959
	if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
960
		raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
961
		vgic_flush_lr_state(vcpu);
962
		raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
963
	}
964

965
	if (can_access_vgic_from_kernel())
966
		vgic_restore_state(vcpu);
967

968
	if (vgic_supports_direct_irqs(vcpu->kvm))
969
		vgic_v4_commit(vcpu);
970
}
971

972
void kvm_vgic_load(struct kvm_vcpu *vcpu)
973
{
974
	if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
975
		if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
976
			__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
977
		return;
978
	}
979

980
	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
981
		vgic_v2_load(vcpu);
982
	else
983
		vgic_v3_load(vcpu);
984
}
985

986
void kvm_vgic_put(struct kvm_vcpu *vcpu)
987
{
988
	if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
989
		if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
990
			__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
991
		return;
992
	}
993

994
	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
995
		vgic_v2_put(vcpu);
996
	else
997
		vgic_v3_put(vcpu);
998
}
999

1000
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1001
{
1002
	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1003
	struct vgic_irq *irq;
1004
	bool pending = false;
1005
	unsigned long flags;
1006
	struct vgic_vmcr vmcr;
1007

1008
	if (!vcpu->kvm->arch.vgic.enabled)
1009
		return false;
1010

1011
	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
1012
		return true;
1013

1014
	vgic_get_vmcr(vcpu, &vmcr);
1015

1016
	raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
1017

1018
	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
1019
		raw_spin_lock(&irq->irq_lock);
1020
		pending = irq_is_pending(irq) && irq->enabled &&
1021
			  !irq->active &&
1022
			  irq->priority < vmcr.pmr;
1023
		raw_spin_unlock(&irq->irq_lock);
1024

1025
		if (pending)
1026
			break;
1027
	}
1028

1029
	raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
1030

1031
	return pending;
1032
}
1033

1034
void vgic_kick_vcpus(struct kvm *kvm)
1035
{
1036
	struct kvm_vcpu *vcpu;
1037
	unsigned long c;
1038

1039
	/*
1040
	 * We've injected an interrupt, time to find out who deserves
1041
	 * a good kick...
1042
	 */
1043
	kvm_for_each_vcpu(c, vcpu, kvm) {
1044
		if (kvm_vgic_vcpu_pending_irq(vcpu)) {
1045
			kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
1046
			kvm_vcpu_kick(vcpu);
1047
		}
1048
	}
1049
}
1050

1051
bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
1052
{
1053
	struct vgic_irq *irq;
1054
	bool map_is_active;
1055
	unsigned long flags;
1056

1057
	if (!vgic_initialized(vcpu->kvm))
1058
		return false;
1059

1060
	irq = vgic_get_vcpu_irq(vcpu, vintid);
1061
	raw_spin_lock_irqsave(&irq->irq_lock, flags);
1062
	map_is_active = irq->hw && irq->active;
1063
	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
1064
	vgic_put_irq(vcpu->kvm, irq);
1065

1066
	return map_is_active;
1067
}
1068

1069
/*
1070
 * Level-triggered mapped IRQs are special because we only observe rising
1071
 * edges as input to the VGIC.
1072
 *
1073
 * If the guest never acked the interrupt we have to sample the physical
1074
 * line and set the line level, because the device state could have changed
1075
 * or we simply need to process the still pending interrupt later.
1076
 *
1077
 * We could also have entered the guest with the interrupt active+pending.
1078
 * On the next exit, we need to re-evaluate the pending state, as it could
1079
 * otherwise result in a spurious interrupt by injecting a now potentially
1080
 * stale pending state.
1081
 *
1082
 * If this causes us to lower the level, we have to also clear the physical
1083
 * active state, since we will otherwise never be told when the interrupt
1084
 * becomes asserted again.
1085
 *
1086
 * Another case is when the interrupt requires a helping hand on
1087
 * deactivation (no HW deactivation, for example).
1088
 */
1089
void vgic_irq_handle_resampling(struct vgic_irq *irq,
1090
				bool lr_deactivated, bool lr_pending)
1091
{
1092
	if (vgic_irq_is_mapped_level(irq)) {
1093
		bool resample = false;
1094

1095
		if (unlikely(vgic_irq_needs_resampling(irq))) {
1096
			resample = !(irq->active || irq->pending_latch);
1097
		} else if (lr_pending || (lr_deactivated && irq->line_level)) {
1098
			irq->line_level = vgic_get_phys_line_level(irq);
1099
			resample = !irq->line_level;
1100
		}
1101

1102
		if (resample)
1103
			vgic_irq_set_phys_active(irq, false);
1104
	}
1105
}
1106

1107
Product

Resources

Company