// SPDX-License-Identifier: GPL-2.0-only1/*2* Copyright (C) 2017 ARM Ltd.3* Author: Marc Zyngier <[email protected]>4*/56#include <linux/interrupt.h>7#include <linux/irq.h>8#include <linux/irqdomain.h>9#include <linux/kvm_host.h>10#include <linux/irqchip/arm-gic-v3.h>1112#include "vgic.h"1314/*15* How KVM uses GICv4 (insert rude comments here):16*17* The vgic-v4 layer acts as a bridge between several entities:18* - The GICv4 ITS representation offered by the ITS driver19* - VFIO, which is in charge of the PCI endpoint20* - The virtual ITS, which is the only thing the guest sees21*22* The configuration of VLPIs is triggered by a callback from VFIO,23* instructing KVM that a PCI device has been configured to deliver24* MSIs to a vITS.25*26* kvm_vgic_v4_set_forwarding() is thus called with the routing entry,27* and this is used to find the corresponding vITS data structures28* (ITS instance, device, event and irq) using a process that is29* extremely similar to the injection of an MSI.30*31* At this stage, we can link the guest's view of an LPI (uniquely32* identified by the routing entry) and the host irq, using the GICv433* driver mapping operation. Should the mapping succeed, we've then34* successfully upgraded the guest's LPI to a VLPI. We can then start35* with updating GICv4's view of the property table and generating an36* INValidation in order to kickstart the delivery of this VLPI to the37* guest directly, without software intervention. Well, almost.38*39* When the PCI endpoint is deconfigured, this operation is reversed40* with VFIO calling kvm_vgic_v4_unset_forwarding().41*42* Once the VLPI has been mapped, it needs to follow any change the43* guest performs on its LPI through the vITS. For that, a number of44* command handlers have hooks to communicate these changes to the HW:45* - Any invalidation triggers a call to its_prop_update_vlpi()46* - The INT command results in a irq_set_irqchip_state(), which47* generates an INT on the corresponding VLPI.48* - The CLEAR command results in a irq_set_irqchip_state(), which49* generates an CLEAR on the corresponding VLPI.50* - DISCARD translates into an unmap, similar to a call to51* kvm_vgic_v4_unset_forwarding().52* - MOVI is translated by an update of the existing mapping, changing53* the target vcpu, resulting in a VMOVI being generated.54* - MOVALL is translated by a string of mapping updates (similar to55* the handling of MOVI). MOVALL is horrible.56*57* Note that a DISCARD/MAPTI sequence emitted from the guest without58* reprogramming the PCI endpoint after MAPTI does not result in a59* VLPI being mapped, as there is no callback from VFIO (the guest60* will get the interrupt via the normal SW injection). Fixing this is61* not trivial, and requires some horrible messing with the VFIO62* internals. Not fun. Don't do that.63*64* Then there is the scheduling. Each time a vcpu is about to run on a65* physical CPU, KVM must tell the corresponding redistributor about66* it. And if we've migrated our vcpu from one CPU to another, we must67* tell the ITS (so that the messages reach the right redistributor).68* This is done in two steps: first issue a irq_set_affinity() on the69* irq corresponding to the vcpu, then call its_make_vpe_resident().70* You must be in a non-preemptible context. On exit, a call to71* its_make_vpe_non_resident() tells the redistributor that we're done72* with the vcpu.73*74* Finally, the doorbell handling: Each vcpu is allocated an interrupt75* which will fire each time a VLPI is made pending whilst the vcpu is76* not running. Each time the vcpu gets blocked, the doorbell77* interrupt gets enabled. When the vcpu is unblocked (for whatever78* reason), the doorbell interrupt is disabled.79*/8081#define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING)8283static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)84{85struct kvm_vcpu *vcpu = info;8687/* We got the message, no need to fire again */88if (!kvm_vgic_global_state.has_gicv4_1 &&89!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))90disable_irq_nosync(irq);9192/*93* The v4.1 doorbell can fire concurrently with the vPE being94* made non-resident. Ensure we only update pending_last95* *after* the non-residency sequence has completed.96*/97raw_spin_lock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock);98vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;99raw_spin_unlock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock);100101kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);102kvm_vcpu_kick(vcpu);103104return IRQ_HANDLED;105}106107static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq)108{109vpe->sgi_config[irq->intid].enabled = irq->enabled;110vpe->sgi_config[irq->intid].group = irq->group;111vpe->sgi_config[irq->intid].priority = irq->priority;112}113114static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu)115{116struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;117int i;118119/*120* With GICv4.1, every virtual SGI can be directly injected. So121* let's pretend that they are HW interrupts, tied to a host122* IRQ. The SGI code will do its magic.123*/124for (i = 0; i < VGIC_NR_SGIS; i++) {125struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i);126struct irq_desc *desc;127unsigned long flags;128int ret;129130raw_spin_lock_irqsave(&irq->irq_lock, flags);131132if (irq->hw)133goto unlock;134135irq->hw = true;136irq->host_irq = irq_find_mapping(vpe->sgi_domain, i);137138/* Transfer the full irq state to the vPE */139vgic_v4_sync_sgi_config(vpe, irq);140desc = irq_to_desc(irq->host_irq);141ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc),142false);143if (!WARN_ON(ret)) {144/* Transfer pending state */145ret = irq_set_irqchip_state(irq->host_irq,146IRQCHIP_STATE_PENDING,147irq->pending_latch);148WARN_ON(ret);149irq->pending_latch = false;150}151unlock:152raw_spin_unlock_irqrestore(&irq->irq_lock, flags);153vgic_put_irq(vcpu->kvm, irq);154}155}156157static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)158{159int i;160161for (i = 0; i < VGIC_NR_SGIS; i++) {162struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i);163struct irq_desc *desc;164unsigned long flags;165bool pending;166int ret;167168raw_spin_lock_irqsave(&irq->irq_lock, flags);169170if (!irq->hw)171goto unlock;172173irq->hw = false;174ret = irq_get_irqchip_state(irq->host_irq,175IRQCHIP_STATE_PENDING,176&pending);177WARN_ON(ret);178179irq->pending_latch = pending;180181desc = irq_to_desc(irq->host_irq);182irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));183unlock:184raw_spin_unlock_irqrestore(&irq->irq_lock, flags);185vgic_put_irq(vcpu->kvm, irq);186}187}188189void vgic_v4_configure_vsgis(struct kvm *kvm)190{191struct vgic_dist *dist = &kvm->arch.vgic;192struct kvm_vcpu *vcpu;193unsigned long i;194195lockdep_assert_held(&kvm->arch.config_lock);196197kvm_arm_halt_guest(kvm);198199kvm_for_each_vcpu(i, vcpu, kvm) {200if (dist->nassgireq)201vgic_v4_enable_vsgis(vcpu);202else203vgic_v4_disable_vsgis(vcpu);204}205206kvm_arm_resume_guest(kvm);207}208209/*210* Must be called with GICv4.1 and the vPE unmapped, which211* indicates the invalidation of any VPT caches associated212* with the vPE, thus we can get the VLPI state by peeking213* at the VPT.214*/215void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val)216{217struct its_vpe *vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe;218int mask = BIT(irq->intid % BITS_PER_BYTE);219void *va;220u8 *ptr;221222va = page_address(vpe->vpt_page);223ptr = va + irq->intid / BITS_PER_BYTE;224225*val = !!(*ptr & mask);226}227228int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq)229{230return request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu);231}232233/**234* vgic_v4_init - Initialize the GICv4 data structures235* @kvm: Pointer to the VM being initialized236*237* We may be called each time a vITS is created, or when the238* vgic is initialized. In both cases, the number of vcpus239* should now be fixed.240*/241int vgic_v4_init(struct kvm *kvm)242{243struct vgic_dist *dist = &kvm->arch.vgic;244struct kvm_vcpu *vcpu;245int nr_vcpus, ret;246unsigned long i;247248lockdep_assert_held(&kvm->arch.config_lock);249250if (!kvm_vgic_global_state.has_gicv4)251return 0; /* Nothing to see here... move along. */252253if (dist->its_vm.vpes)254return 0;255256nr_vcpus = atomic_read(&kvm->online_vcpus);257258dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes),259GFP_KERNEL_ACCOUNT);260if (!dist->its_vm.vpes)261return -ENOMEM;262263dist->its_vm.nr_vpes = nr_vcpus;264265kvm_for_each_vcpu(i, vcpu, kvm)266dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;267268ret = its_alloc_vcpu_irqs(&dist->its_vm);269if (ret < 0) {270kvm_err("VPE IRQ allocation failure\n");271kfree(dist->its_vm.vpes);272dist->its_vm.nr_vpes = 0;273dist->its_vm.vpes = NULL;274return ret;275}276277kvm_for_each_vcpu(i, vcpu, kvm) {278int irq = dist->its_vm.vpes[i]->irq;279unsigned long irq_flags = DB_IRQ_FLAGS;280281/*282* Don't automatically enable the doorbell, as we're283* flipping it back and forth when the vcpu gets284* blocked. Also disable the lazy disabling, as the285* doorbell could kick us out of the guest too286* early...287*288* On GICv4.1, the doorbell is managed in HW and must289* be left enabled.290*/291if (kvm_vgic_global_state.has_gicv4_1)292irq_flags &= ~IRQ_NOAUTOEN;293irq_set_status_flags(irq, irq_flags);294295ret = vgic_v4_request_vpe_irq(vcpu, irq);296if (ret) {297kvm_err("failed to allocate vcpu IRQ%d\n", irq);298/*299* Trick: adjust the number of vpes so we know300* how many to nuke on teardown...301*/302dist->its_vm.nr_vpes = i;303break;304}305}306307if (ret)308vgic_v4_teardown(kvm);309310return ret;311}312313/**314* vgic_v4_teardown - Free the GICv4 data structures315* @kvm: Pointer to the VM being destroyed316*/317void vgic_v4_teardown(struct kvm *kvm)318{319struct its_vm *its_vm = &kvm->arch.vgic.its_vm;320int i;321322lockdep_assert_held(&kvm->arch.config_lock);323324if (!its_vm->vpes)325return;326327for (i = 0; i < its_vm->nr_vpes; i++) {328struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);329int irq = its_vm->vpes[i]->irq;330331irq_clear_status_flags(irq, DB_IRQ_FLAGS);332free_irq(irq, vcpu);333}334335its_free_vcpu_irqs(its_vm);336kfree(its_vm->vpes);337its_vm->nr_vpes = 0;338its_vm->vpes = NULL;339}340341static inline bool vgic_v4_want_doorbell(struct kvm_vcpu *vcpu)342{343if (vcpu_get_flag(vcpu, IN_WFI))344return true;345346if (likely(!vcpu_has_nv(vcpu)))347return false;348349/*350* GICv4 hardware is only ever used for the L1. Mark the vPE (i.e. the351* L1 context) nonresident and request a doorbell to kick us out of the352* L2 when an IRQ becomes pending.353*/354return vcpu_get_flag(vcpu, IN_NESTED_ERET);355}356357int vgic_v4_put(struct kvm_vcpu *vcpu)358{359struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;360361if (!vgic_supports_direct_irqs(vcpu->kvm) || !vpe->resident)362return 0;363364return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu));365}366367int vgic_v4_load(struct kvm_vcpu *vcpu)368{369struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;370int err;371372if (!vgic_supports_direct_irqs(vcpu->kvm) || vpe->resident)373return 0;374375if (vcpu_get_flag(vcpu, IN_WFI))376return 0;377378/*379* Before making the VPE resident, make sure the redistributor380* corresponding to our current CPU expects us here. See the381* doc in drivers/irqchip/irq-gic-v4.c to understand how this382* turns into a VMOVP command at the ITS level.383*/384err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));385if (err)386return err;387388err = its_make_vpe_resident(vpe, false, vcpu->kvm->arch.vgic.enabled);389if (err)390return err;391392/*393* Now that the VPE is resident, let's get rid of a potential394* doorbell interrupt that would still be pending. This is a395* GICv4.0 only "feature"...396*/397if (!kvm_vgic_global_state.has_gicv4_1)398err = irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);399400return err;401}402403void vgic_v4_commit(struct kvm_vcpu *vcpu)404{405struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;406407/*408* No need to wait for the vPE to be ready across a shallow guest409* exit, as only a vcpu_put will invalidate it.410*/411if (!vpe->ready)412its_commit_vpe(vpe);413}414415static struct vgic_its *vgic_get_its(struct kvm *kvm,416struct kvm_kernel_irq_routing_entry *irq_entry)417{418struct kvm_msi msi = (struct kvm_msi) {419.address_lo = irq_entry->msi.address_lo,420.address_hi = irq_entry->msi.address_hi,421.data = irq_entry->msi.data,422.flags = irq_entry->msi.flags,423.devid = irq_entry->msi.devid,424};425426return vgic_msi_to_its(kvm, &msi);427}428429int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,430struct kvm_kernel_irq_routing_entry *irq_entry)431{432struct vgic_its *its;433struct vgic_irq *irq;434struct its_vlpi_map map;435unsigned long flags;436int ret = 0;437438if (!vgic_supports_direct_msis(kvm))439return 0;440441/*442* Get the ITS, and escape early on error (not a valid443* doorbell for any of our vITSs).444*/445its = vgic_get_its(kvm, irq_entry);446if (IS_ERR(its))447return 0;448449guard(mutex)(&its->its_lock);450451/*452* Perform the actual DevID/EventID -> LPI translation.453*454* Silently exit if translation fails as the guest (or userspace!) has455* managed to do something stupid. Emulated LPI injection will still456* work if the guest figures itself out at a later time.457*/458if (vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,459irq_entry->msi.data, &irq))460return 0;461462raw_spin_lock_irqsave(&irq->irq_lock, flags);463464/* Silently exit if the vLPI is already mapped */465if (irq->hw)466goto out_unlock_irq;467468/*469* Emit the mapping request. If it fails, the ITS probably470* isn't v4 compatible, so let's silently bail out. Holding471* the ITS lock should ensure that nothing can modify the472* target vcpu.473*/474map = (struct its_vlpi_map) {475.vm = &kvm->arch.vgic.its_vm,476.vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe,477.vintid = irq->intid,478.properties = ((irq->priority & 0xfc) |479(irq->enabled ? LPI_PROP_ENABLED : 0) |480LPI_PROP_GROUP1),481.db_enabled = true,482};483484ret = its_map_vlpi(virq, &map);485if (ret)486goto out_unlock_irq;487488irq->hw = true;489irq->host_irq = virq;490atomic_inc(&map.vpe->vlpi_count);491492/* Transfer pending state */493if (!irq->pending_latch)494goto out_unlock_irq;495496ret = irq_set_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING,497irq->pending_latch);498WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq);499500/*501* Clear pending_latch and communicate this state502* change via vgic_queue_irq_unlock.503*/504irq->pending_latch = false;505vgic_queue_irq_unlock(kvm, irq, flags);506return ret;507508out_unlock_irq:509raw_spin_unlock_irqrestore(&irq->irq_lock, flags);510return ret;511}512513static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)514{515struct vgic_irq *irq;516unsigned long idx;517518guard(rcu)();519xa_for_each(&kvm->arch.vgic.lpi_xa, idx, irq) {520if (!irq->hw || irq->host_irq != host_irq)521continue;522523if (!vgic_try_get_irq_ref(irq))524return NULL;525526return irq;527}528529return NULL;530}531532void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)533{534struct vgic_irq *irq;535unsigned long flags;536537if (!vgic_supports_direct_msis(kvm))538return;539540irq = __vgic_host_irq_get_vlpi(kvm, host_irq);541if (!irq)542return;543544raw_spin_lock_irqsave(&irq->irq_lock, flags);545WARN_ON(irq->hw && irq->host_irq != host_irq);546if (irq->hw) {547atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);548irq->hw = false;549its_unmap_vlpi(host_irq);550}551552raw_spin_unlock_irqrestore(&irq->irq_lock, flags);553vgic_put_irq(kvm, irq);554}555556557