// SPDX-License-Identifier: GPL-2.0-only1/*2* Copyright (C) 2017 ARM Ltd.3* Author: Marc Zyngier <[email protected]>4*/56#include <linux/interrupt.h>7#include <linux/irq.h>8#include <linux/irqdomain.h>9#include <linux/kvm_host.h>10#include <linux/irqchip/arm-gic-v3.h>1112#include "vgic.h"1314/*15* How KVM uses GICv4 (insert rude comments here):16*17* The vgic-v4 layer acts as a bridge between several entities:18* - The GICv4 ITS representation offered by the ITS driver19* - VFIO, which is in charge of the PCI endpoint20* - The virtual ITS, which is the only thing the guest sees21*22* The configuration of VLPIs is triggered by a callback from VFIO,23* instructing KVM that a PCI device has been configured to deliver24* MSIs to a vITS.25*26* kvm_vgic_v4_set_forwarding() is thus called with the routing entry,27* and this is used to find the corresponding vITS data structures28* (ITS instance, device, event and irq) using a process that is29* extremely similar to the injection of an MSI.30*31* At this stage, we can link the guest's view of an LPI (uniquely32* identified by the routing entry) and the host irq, using the GICv433* driver mapping operation. Should the mapping succeed, we've then34* successfully upgraded the guest's LPI to a VLPI. We can then start35* with updating GICv4's view of the property table and generating an36* INValidation in order to kickstart the delivery of this VLPI to the37* guest directly, without software intervention. Well, almost.38*39* When the PCI endpoint is deconfigured, this operation is reversed40* with VFIO calling kvm_vgic_v4_unset_forwarding().41*42* Once the VLPI has been mapped, it needs to follow any change the43* guest performs on its LPI through the vITS. For that, a number of44* command handlers have hooks to communicate these changes to the HW:45* - Any invalidation triggers a call to its_prop_update_vlpi()46* - The INT command results in a irq_set_irqchip_state(), which47* generates an INT on the corresponding VLPI.48* - The CLEAR command results in a irq_set_irqchip_state(), which49* generates an CLEAR on the corresponding VLPI.50* - DISCARD translates into an unmap, similar to a call to51* kvm_vgic_v4_unset_forwarding().52* - MOVI is translated by an update of the existing mapping, changing53* the target vcpu, resulting in a VMOVI being generated.54* - MOVALL is translated by a string of mapping updates (similar to55* the handling of MOVI). MOVALL is horrible.56*57* Note that a DISCARD/MAPTI sequence emitted from the guest without58* reprogramming the PCI endpoint after MAPTI does not result in a59* VLPI being mapped, as there is no callback from VFIO (the guest60* will get the interrupt via the normal SW injection). Fixing this is61* not trivial, and requires some horrible messing with the VFIO62* internals. Not fun. Don't do that.63*64* Then there is the scheduling. Each time a vcpu is about to run on a65* physical CPU, KVM must tell the corresponding redistributor about66* it. And if we've migrated our vcpu from one CPU to another, we must67* tell the ITS (so that the messages reach the right redistributor).68* This is done in two steps: first issue a irq_set_affinity() on the69* irq corresponding to the vcpu, then call its_make_vpe_resident().70* You must be in a non-preemptible context. On exit, a call to71* its_make_vpe_non_resident() tells the redistributor that we're done72* with the vcpu.73*74* Finally, the doorbell handling: Each vcpu is allocated an interrupt75* which will fire each time a VLPI is made pending whilst the vcpu is76* not running. Each time the vcpu gets blocked, the doorbell77* interrupt gets enabled. When the vcpu is unblocked (for whatever78* reason), the doorbell interrupt is disabled.79*/8081#define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING)8283static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)84{85struct kvm_vcpu *vcpu = info;8687/* We got the message, no need to fire again */88if (!kvm_vgic_global_state.has_gicv4_1 &&89!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))90disable_irq_nosync(irq);9192/*93* The v4.1 doorbell can fire concurrently with the vPE being94* made non-resident. Ensure we only update pending_last95* *after* the non-residency sequence has completed.96*/97raw_spin_lock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock);98vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;99raw_spin_unlock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock);100101kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);102kvm_vcpu_kick(vcpu);103104return IRQ_HANDLED;105}106107static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq)108{109vpe->sgi_config[irq->intid].enabled = irq->enabled;110vpe->sgi_config[irq->intid].group = irq->group;111vpe->sgi_config[irq->intid].priority = irq->priority;112}113114static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu)115{116struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;117int i;118119/*120* With GICv4.1, every virtual SGI can be directly injected. So121* let's pretend that they are HW interrupts, tied to a host122* IRQ. The SGI code will do its magic.123*/124for (i = 0; i < VGIC_NR_SGIS; i++) {125struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i);126struct irq_desc *desc;127unsigned long flags;128int ret;129130raw_spin_lock_irqsave(&irq->irq_lock, flags);131132if (irq->hw)133goto unlock;134135irq->hw = true;136irq->host_irq = irq_find_mapping(vpe->sgi_domain, i);137138/* Transfer the full irq state to the vPE */139vgic_v4_sync_sgi_config(vpe, irq);140desc = irq_to_desc(irq->host_irq);141ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc),142false);143if (!WARN_ON(ret)) {144/* Transfer pending state */145ret = irq_set_irqchip_state(irq->host_irq,146IRQCHIP_STATE_PENDING,147irq->pending_latch);148WARN_ON(ret);149irq->pending_latch = false;150}151unlock:152raw_spin_unlock_irqrestore(&irq->irq_lock, flags);153vgic_put_irq(vcpu->kvm, irq);154}155}156157static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)158{159int i;160161for (i = 0; i < VGIC_NR_SGIS; i++) {162struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i);163struct irq_desc *desc;164unsigned long flags;165int ret;166167raw_spin_lock_irqsave(&irq->irq_lock, flags);168169if (!irq->hw)170goto unlock;171172irq->hw = false;173ret = irq_get_irqchip_state(irq->host_irq,174IRQCHIP_STATE_PENDING,175&irq->pending_latch);176WARN_ON(ret);177178desc = irq_to_desc(irq->host_irq);179irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));180unlock:181raw_spin_unlock_irqrestore(&irq->irq_lock, flags);182vgic_put_irq(vcpu->kvm, irq);183}184}185186void vgic_v4_configure_vsgis(struct kvm *kvm)187{188struct vgic_dist *dist = &kvm->arch.vgic;189struct kvm_vcpu *vcpu;190unsigned long i;191192lockdep_assert_held(&kvm->arch.config_lock);193194kvm_arm_halt_guest(kvm);195196kvm_for_each_vcpu(i, vcpu, kvm) {197if (dist->nassgireq)198vgic_v4_enable_vsgis(vcpu);199else200vgic_v4_disable_vsgis(vcpu);201}202203kvm_arm_resume_guest(kvm);204}205206/*207* Must be called with GICv4.1 and the vPE unmapped, which208* indicates the invalidation of any VPT caches associated209* with the vPE, thus we can get the VLPI state by peeking210* at the VPT.211*/212void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val)213{214struct its_vpe *vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe;215int mask = BIT(irq->intid % BITS_PER_BYTE);216void *va;217u8 *ptr;218219va = page_address(vpe->vpt_page);220ptr = va + irq->intid / BITS_PER_BYTE;221222*val = !!(*ptr & mask);223}224225int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq)226{227return request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu);228}229230/**231* vgic_v4_init - Initialize the GICv4 data structures232* @kvm: Pointer to the VM being initialized233*234* We may be called each time a vITS is created, or when the235* vgic is initialized. In both cases, the number of vcpus236* should now be fixed.237*/238int vgic_v4_init(struct kvm *kvm)239{240struct vgic_dist *dist = &kvm->arch.vgic;241struct kvm_vcpu *vcpu;242int nr_vcpus, ret;243unsigned long i;244245lockdep_assert_held(&kvm->arch.config_lock);246247if (!kvm_vgic_global_state.has_gicv4)248return 0; /* Nothing to see here... move along. */249250if (dist->its_vm.vpes)251return 0;252253nr_vcpus = atomic_read(&kvm->online_vcpus);254255dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes),256GFP_KERNEL_ACCOUNT);257if (!dist->its_vm.vpes)258return -ENOMEM;259260dist->its_vm.nr_vpes = nr_vcpus;261262kvm_for_each_vcpu(i, vcpu, kvm)263dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;264265ret = its_alloc_vcpu_irqs(&dist->its_vm);266if (ret < 0) {267kvm_err("VPE IRQ allocation failure\n");268kfree(dist->its_vm.vpes);269dist->its_vm.nr_vpes = 0;270dist->its_vm.vpes = NULL;271return ret;272}273274kvm_for_each_vcpu(i, vcpu, kvm) {275int irq = dist->its_vm.vpes[i]->irq;276unsigned long irq_flags = DB_IRQ_FLAGS;277278/*279* Don't automatically enable the doorbell, as we're280* flipping it back and forth when the vcpu gets281* blocked. Also disable the lazy disabling, as the282* doorbell could kick us out of the guest too283* early...284*285* On GICv4.1, the doorbell is managed in HW and must286* be left enabled.287*/288if (kvm_vgic_global_state.has_gicv4_1)289irq_flags &= ~IRQ_NOAUTOEN;290irq_set_status_flags(irq, irq_flags);291292ret = vgic_v4_request_vpe_irq(vcpu, irq);293if (ret) {294kvm_err("failed to allocate vcpu IRQ%d\n", irq);295/*296* Trick: adjust the number of vpes so we know297* how many to nuke on teardown...298*/299dist->its_vm.nr_vpes = i;300break;301}302}303304if (ret)305vgic_v4_teardown(kvm);306307return ret;308}309310/**311* vgic_v4_teardown - Free the GICv4 data structures312* @kvm: Pointer to the VM being destroyed313*/314void vgic_v4_teardown(struct kvm *kvm)315{316struct its_vm *its_vm = &kvm->arch.vgic.its_vm;317int i;318319lockdep_assert_held(&kvm->arch.config_lock);320321if (!its_vm->vpes)322return;323324for (i = 0; i < its_vm->nr_vpes; i++) {325struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);326int irq = its_vm->vpes[i]->irq;327328irq_clear_status_flags(irq, DB_IRQ_FLAGS);329free_irq(irq, vcpu);330}331332its_free_vcpu_irqs(its_vm);333kfree(its_vm->vpes);334its_vm->nr_vpes = 0;335its_vm->vpes = NULL;336}337338static inline bool vgic_v4_want_doorbell(struct kvm_vcpu *vcpu)339{340if (vcpu_get_flag(vcpu, IN_WFI))341return true;342343if (likely(!vcpu_has_nv(vcpu)))344return false;345346/*347* GICv4 hardware is only ever used for the L1. Mark the vPE (i.e. the348* L1 context) nonresident and request a doorbell to kick us out of the349* L2 when an IRQ becomes pending.350*/351return vcpu_get_flag(vcpu, IN_NESTED_ERET);352}353354int vgic_v4_put(struct kvm_vcpu *vcpu)355{356struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;357358if (!vgic_supports_direct_irqs(vcpu->kvm) || !vpe->resident)359return 0;360361return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu));362}363364int vgic_v4_load(struct kvm_vcpu *vcpu)365{366struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;367int err;368369if (!vgic_supports_direct_irqs(vcpu->kvm) || vpe->resident)370return 0;371372if (vcpu_get_flag(vcpu, IN_WFI))373return 0;374375/*376* Before making the VPE resident, make sure the redistributor377* corresponding to our current CPU expects us here. See the378* doc in drivers/irqchip/irq-gic-v4.c to understand how this379* turns into a VMOVP command at the ITS level.380*/381err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));382if (err)383return err;384385err = its_make_vpe_resident(vpe, false, vcpu->kvm->arch.vgic.enabled);386if (err)387return err;388389/*390* Now that the VPE is resident, let's get rid of a potential391* doorbell interrupt that would still be pending. This is a392* GICv4.0 only "feature"...393*/394if (!kvm_vgic_global_state.has_gicv4_1)395err = irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);396397return err;398}399400void vgic_v4_commit(struct kvm_vcpu *vcpu)401{402struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;403404/*405* No need to wait for the vPE to be ready across a shallow guest406* exit, as only a vcpu_put will invalidate it.407*/408if (!vpe->ready)409its_commit_vpe(vpe);410}411412static struct vgic_its *vgic_get_its(struct kvm *kvm,413struct kvm_kernel_irq_routing_entry *irq_entry)414{415struct kvm_msi msi = (struct kvm_msi) {416.address_lo = irq_entry->msi.address_lo,417.address_hi = irq_entry->msi.address_hi,418.data = irq_entry->msi.data,419.flags = irq_entry->msi.flags,420.devid = irq_entry->msi.devid,421};422423return vgic_msi_to_its(kvm, &msi);424}425426int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,427struct kvm_kernel_irq_routing_entry *irq_entry)428{429struct vgic_its *its;430struct vgic_irq *irq;431struct its_vlpi_map map;432unsigned long flags;433int ret = 0;434435if (!vgic_supports_direct_msis(kvm))436return 0;437438/*439* Get the ITS, and escape early on error (not a valid440* doorbell for any of our vITSs).441*/442its = vgic_get_its(kvm, irq_entry);443if (IS_ERR(its))444return 0;445446guard(mutex)(&its->its_lock);447448/*449* Perform the actual DevID/EventID -> LPI translation.450*451* Silently exit if translation fails as the guest (or userspace!) has452* managed to do something stupid. Emulated LPI injection will still453* work if the guest figures itself out at a later time.454*/455if (vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,456irq_entry->msi.data, &irq))457return 0;458459raw_spin_lock_irqsave(&irq->irq_lock, flags);460461/* Silently exit if the vLPI is already mapped */462if (irq->hw)463goto out_unlock_irq;464465/*466* Emit the mapping request. If it fails, the ITS probably467* isn't v4 compatible, so let's silently bail out. Holding468* the ITS lock should ensure that nothing can modify the469* target vcpu.470*/471map = (struct its_vlpi_map) {472.vm = &kvm->arch.vgic.its_vm,473.vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe,474.vintid = irq->intid,475.properties = ((irq->priority & 0xfc) |476(irq->enabled ? LPI_PROP_ENABLED : 0) |477LPI_PROP_GROUP1),478.db_enabled = true,479};480481ret = its_map_vlpi(virq, &map);482if (ret)483goto out_unlock_irq;484485irq->hw = true;486irq->host_irq = virq;487atomic_inc(&map.vpe->vlpi_count);488489/* Transfer pending state */490if (!irq->pending_latch)491goto out_unlock_irq;492493ret = irq_set_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING,494irq->pending_latch);495WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq);496497/*498* Clear pending_latch and communicate this state499* change via vgic_queue_irq_unlock.500*/501irq->pending_latch = false;502vgic_queue_irq_unlock(kvm, irq, flags);503return ret;504505out_unlock_irq:506raw_spin_unlock_irqrestore(&irq->irq_lock, flags);507return ret;508}509510static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)511{512struct vgic_irq *irq;513unsigned long idx;514515guard(rcu)();516xa_for_each(&kvm->arch.vgic.lpi_xa, idx, irq) {517if (!irq->hw || irq->host_irq != host_irq)518continue;519520if (!vgic_try_get_irq_kref(irq))521return NULL;522523return irq;524}525526return NULL;527}528529void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)530{531struct vgic_irq *irq;532unsigned long flags;533534if (!vgic_supports_direct_msis(kvm))535return;536537irq = __vgic_host_irq_get_vlpi(kvm, host_irq);538if (!irq)539return;540541raw_spin_lock_irqsave(&irq->irq_lock, flags);542WARN_ON(irq->hw && irq->host_irq != host_irq);543if (irq->hw) {544atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);545irq->hw = false;546its_unmap_vlpi(host_irq);547}548549raw_spin_unlock_irqrestore(&irq->irq_lock, flags);550vgic_put_irq(kvm, irq);551}552553554