Path: blob/master/arch/powerpc/kvm/book3s_xive_native.c
26424 views
// SPDX-License-Identifier: GPL-2.01/*2* Copyright (c) 2017-2019, IBM Corporation.3*/45#define pr_fmt(fmt) "xive-kvm: " fmt67#include <linux/kernel.h>8#include <linux/kvm_host.h>9#include <linux/err.h>10#include <linux/gfp.h>11#include <linux/spinlock.h>12#include <linux/delay.h>13#include <linux/file.h>14#include <linux/irqdomain.h>15#include <asm/uaccess.h>16#include <asm/kvm_book3s.h>17#include <asm/kvm_ppc.h>18#include <asm/hvcall.h>19#include <asm/xive.h>20#include <asm/xive-regs.h>21#include <asm/debug.h>22#include <asm/opal.h>2324#include <linux/debugfs.h>25#include <linux/seq_file.h>2627#include "book3s_xive.h"2829static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)30{31u64 val;3233/*34* The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_1035* load operation, so there is no need to enforce load-after-store36* ordering.37*/3839val = in_be64(xd->eoi_mmio + offset);40return (u8)val;41}4243static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)44{45struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;46struct xive_q *q = &xc->queues[prio];4748xive_native_disable_queue(xc->vp_id, q, prio);49if (q->qpage) {50put_page(virt_to_page(q->qpage));51q->qpage = NULL;52}53}5455static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,56u8 prio, __be32 *qpage,57u32 order, bool can_escalate)58{59int rc;60__be32 *qpage_prev = q->qpage;6162rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,63can_escalate);64if (rc)65return rc;6667if (qpage_prev)68put_page(virt_to_page(qpage_prev));6970return rc;71}7273void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)74{75struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;76int i;7778if (!kvmppc_xive_enabled(vcpu))79return;8081if (!xc)82return;8384pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);8586/* Ensure no interrupt is still routed to that VP */87xc->valid = false;88kvmppc_xive_disable_vcpu_interrupts(vcpu);8990/* Free escalations */91for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {92/* Free the escalation irq */93if (xc->esc_virq[i]) {94if (kvmppc_xive_has_single_escalation(xc->xive))95xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);96free_irq(xc->esc_virq[i], vcpu);97irq_dispose_mapping(xc->esc_virq[i]);98kfree(xc->esc_virq_names[i]);99xc->esc_virq[i] = 0;100}101}102103/* Disable the VP */104xive_native_disable_vp(xc->vp_id);105106/* Clear the cam word so guest entry won't try to push context */107vcpu->arch.xive_cam_word = 0;108109/* Free the queues */110for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {111kvmppc_xive_native_cleanup_queue(vcpu, i);112}113114/* Free the VP */115kfree(xc);116117/* Cleanup the vcpu */118vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;119vcpu->arch.xive_vcpu = NULL;120}121122int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,123struct kvm_vcpu *vcpu, u32 server_num)124{125struct kvmppc_xive *xive = dev->private;126struct kvmppc_xive_vcpu *xc = NULL;127int rc;128u32 vp_id;129130pr_devel("native_connect_vcpu(server=%d)\n", server_num);131132if (dev->ops != &kvm_xive_native_ops) {133pr_devel("Wrong ops !\n");134return -EPERM;135}136if (xive->kvm != vcpu->kvm)137return -EPERM;138if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)139return -EBUSY;140141mutex_lock(&xive->lock);142143rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);144if (rc)145goto bail;146147xc = kzalloc(sizeof(*xc), GFP_KERNEL);148if (!xc) {149rc = -ENOMEM;150goto bail;151}152153vcpu->arch.xive_vcpu = xc;154xc->xive = xive;155xc->vcpu = vcpu;156xc->server_num = server_num;157158xc->vp_id = vp_id;159xc->valid = true;160vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;161162rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);163if (rc) {164pr_err("Failed to get VP info from OPAL: %d\n", rc);165goto bail;166}167168if (!kvmppc_xive_check_save_restore(vcpu)) {169pr_err("inconsistent save-restore setup for VCPU %d\n", server_num);170rc = -EIO;171goto bail;172}173174/*175* Enable the VP first as the single escalation mode will176* affect escalation interrupts numbering177*/178rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));179if (rc) {180pr_err("Failed to enable VP in OPAL: %d\n", rc);181goto bail;182}183184/* Configure VCPU fields for use by assembly push/pull */185vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);186vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);187188/* TODO: reset all queues to a clean state ? */189bail:190mutex_unlock(&xive->lock);191if (rc)192kvmppc_xive_native_cleanup_vcpu(vcpu);193194return rc;195}196197/*198* Device passthrough support199*/200static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)201{202struct kvmppc_xive *xive = kvm->arch.xive;203pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;204205if (irq >= KVMPPC_XIVE_NR_IRQS)206return -EINVAL;207208/*209* Clear the ESB pages of the IRQ number being mapped (or210* unmapped) into the guest and let the VM fault handler211* repopulate with the appropriate ESB pages (device or IC)212*/213pr_debug("clearing esb pages for girq 0x%lx\n", irq);214mutex_lock(&xive->mapping_lock);215if (xive->mapping)216unmap_mapping_range(xive->mapping,217esb_pgoff << PAGE_SHIFT,2182ull << PAGE_SHIFT, 1);219mutex_unlock(&xive->mapping_lock);220return 0;221}222223static struct kvmppc_xive_ops kvmppc_xive_native_ops = {224.reset_mapped = kvmppc_xive_native_reset_mapped,225};226227static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)228{229struct vm_area_struct *vma = vmf->vma;230struct kvm_device *dev = vma->vm_file->private_data;231struct kvmppc_xive *xive = dev->private;232struct kvmppc_xive_src_block *sb;233struct kvmppc_xive_irq_state *state;234struct xive_irq_data *xd;235u32 hw_num;236u16 src;237u64 page;238unsigned long irq;239u64 page_offset;240241/*242* Linux/KVM uses a two pages ESB setting, one for trigger and243* one for EOI244*/245page_offset = vmf->pgoff - vma->vm_pgoff;246irq = page_offset / 2;247248sb = kvmppc_xive_find_source(xive, irq, &src);249if (!sb) {250pr_devel("%s: source %lx not found !\n", __func__, irq);251return VM_FAULT_SIGBUS;252}253254state = &sb->irq_state[src];255256/* Some sanity checking */257if (!state->valid) {258pr_devel("%s: source %lx invalid !\n", __func__, irq);259return VM_FAULT_SIGBUS;260}261262kvmppc_xive_select_irq(state, &hw_num, &xd);263264arch_spin_lock(&sb->lock);265266/*267* first/even page is for trigger268* second/odd page is for EOI and management.269*/270page = page_offset % 2 ? xd->eoi_page : xd->trig_page;271arch_spin_unlock(&sb->lock);272273if (WARN_ON(!page)) {274pr_err("%s: accessing invalid ESB page for source %lx !\n",275__func__, irq);276return VM_FAULT_SIGBUS;277}278279vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);280return VM_FAULT_NOPAGE;281}282283static const struct vm_operations_struct xive_native_esb_vmops = {284.fault = xive_native_esb_fault,285};286287static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)288{289struct vm_area_struct *vma = vmf->vma;290291switch (vmf->pgoff - vma->vm_pgoff) {292case 0: /* HW - forbid access */293case 1: /* HV - forbid access */294return VM_FAULT_SIGBUS;295case 2: /* OS */296vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);297return VM_FAULT_NOPAGE;298case 3: /* USER - TODO */299default:300return VM_FAULT_SIGBUS;301}302}303304static const struct vm_operations_struct xive_native_tima_vmops = {305.fault = xive_native_tima_fault,306};307308static int kvmppc_xive_native_mmap(struct kvm_device *dev,309struct vm_area_struct *vma)310{311struct kvmppc_xive *xive = dev->private;312313/* We only allow mappings at fixed offset for now */314if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {315if (vma_pages(vma) > 4)316return -EINVAL;317vma->vm_ops = &xive_native_tima_vmops;318} else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {319if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)320return -EINVAL;321vma->vm_ops = &xive_native_esb_vmops;322} else {323return -EINVAL;324}325326vm_flags_set(vma, VM_IO | VM_PFNMAP);327vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);328329/*330* Grab the KVM device file address_space to be able to clear331* the ESB pages mapping when a device is passed-through into332* the guest.333*/334xive->mapping = vma->vm_file->f_mapping;335return 0;336}337338static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,339u64 addr)340{341struct kvmppc_xive_src_block *sb;342struct kvmppc_xive_irq_state *state;343u64 __user *ubufp = (u64 __user *) addr;344u64 val;345u16 idx;346int rc;347348pr_devel("%s irq=0x%lx\n", __func__, irq);349350if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)351return -E2BIG;352353sb = kvmppc_xive_find_source(xive, irq, &idx);354if (!sb) {355pr_debug("No source, creating source block...\n");356sb = kvmppc_xive_create_src_block(xive, irq);357if (!sb) {358pr_err("Failed to create block...\n");359return -ENOMEM;360}361}362state = &sb->irq_state[idx];363364if (get_user(val, ubufp)) {365pr_err("fault getting user info !\n");366return -EFAULT;367}368369arch_spin_lock(&sb->lock);370371/*372* If the source doesn't already have an IPI, allocate373* one and get the corresponding data374*/375if (!state->ipi_number) {376state->ipi_number = xive_native_alloc_irq();377if (state->ipi_number == 0) {378pr_err("Failed to allocate IRQ !\n");379rc = -ENXIO;380goto unlock;381}382xive_native_populate_irq_data(state->ipi_number,383&state->ipi_data);384pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,385state->ipi_number, irq);386}387388/* Restore LSI state */389if (val & KVM_XIVE_LEVEL_SENSITIVE) {390state->lsi = true;391if (val & KVM_XIVE_LEVEL_ASSERTED)392state->asserted = true;393pr_devel(" LSI ! Asserted=%d\n", state->asserted);394}395396/* Mask IRQ to start with */397state->act_server = 0;398state->act_priority = MASKED;399xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);400xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);401402/* Increment the number of valid sources and mark this one valid */403if (!state->valid)404xive->src_count++;405state->valid = true;406407rc = 0;408409unlock:410arch_spin_unlock(&sb->lock);411412return rc;413}414415static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,416struct kvmppc_xive_src_block *sb,417struct kvmppc_xive_irq_state *state,418u32 server, u8 priority, bool masked,419u32 eisn)420{421struct kvm *kvm = xive->kvm;422u32 hw_num;423int rc = 0;424425arch_spin_lock(&sb->lock);426427if (state->act_server == server && state->act_priority == priority &&428state->eisn == eisn)429goto unlock;430431pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",432priority, server, masked, state->act_server,433state->act_priority);434435kvmppc_xive_select_irq(state, &hw_num, NULL);436437if (priority != MASKED && !masked) {438rc = kvmppc_xive_select_target(kvm, &server, priority);439if (rc)440goto unlock;441442state->act_priority = priority;443state->act_server = server;444state->eisn = eisn;445446rc = xive_native_configure_irq(hw_num,447kvmppc_xive_vp(xive, server),448priority, eisn);449} else {450state->act_priority = MASKED;451state->act_server = 0;452state->eisn = 0;453454rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);455}456457unlock:458arch_spin_unlock(&sb->lock);459return rc;460}461462static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,463long irq, u64 addr)464{465struct kvmppc_xive_src_block *sb;466struct kvmppc_xive_irq_state *state;467u64 __user *ubufp = (u64 __user *) addr;468u16 src;469u64 kvm_cfg;470u32 server;471u8 priority;472bool masked;473u32 eisn;474475sb = kvmppc_xive_find_source(xive, irq, &src);476if (!sb)477return -ENOENT;478479state = &sb->irq_state[src];480481if (!state->valid)482return -EINVAL;483484if (get_user(kvm_cfg, ubufp))485return -EFAULT;486487pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);488489priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>490KVM_XIVE_SOURCE_PRIORITY_SHIFT;491server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>492KVM_XIVE_SOURCE_SERVER_SHIFT;493masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>494KVM_XIVE_SOURCE_MASKED_SHIFT;495eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>496KVM_XIVE_SOURCE_EISN_SHIFT;497498if (priority != xive_prio_from_guest(priority)) {499pr_err("invalid priority for queue %d for VCPU %d\n",500priority, server);501return -EINVAL;502}503504return kvmppc_xive_native_update_source_config(xive, sb, state, server,505priority, masked, eisn);506}507508static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,509long irq, u64 addr)510{511struct kvmppc_xive_src_block *sb;512struct kvmppc_xive_irq_state *state;513struct xive_irq_data *xd;514u32 hw_num;515u16 src;516int rc = 0;517518pr_devel("%s irq=0x%lx", __func__, irq);519520sb = kvmppc_xive_find_source(xive, irq, &src);521if (!sb)522return -ENOENT;523524state = &sb->irq_state[src];525526rc = -EINVAL;527528arch_spin_lock(&sb->lock);529530if (state->valid) {531kvmppc_xive_select_irq(state, &hw_num, &xd);532xive_native_sync_source(hw_num);533rc = 0;534}535536arch_spin_unlock(&sb->lock);537return rc;538}539540static int xive_native_validate_queue_size(u32 qshift)541{542/*543* We only support 64K pages for the moment. This is also544* advertised in the DT property "ibm,xive-eq-sizes"545*/546switch (qshift) {547case 0: /* EQ reset */548case 16:549return 0;550case 12:551case 21:552case 24:553default:554return -EINVAL;555}556}557558static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,559long eq_idx, u64 addr)560{561struct kvm *kvm = xive->kvm;562struct kvm_vcpu *vcpu;563struct kvmppc_xive_vcpu *xc;564void __user *ubufp = (void __user *) addr;565u32 server;566u8 priority;567struct kvm_ppc_xive_eq kvm_eq;568int rc;569__be32 *qaddr = NULL;570struct page *page;571struct xive_q *q;572gfn_t gfn;573unsigned long page_size;574int srcu_idx;575576/*577* Demangle priority/server tuple from the EQ identifier578*/579priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>580KVM_XIVE_EQ_PRIORITY_SHIFT;581server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>582KVM_XIVE_EQ_SERVER_SHIFT;583584if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))585return -EFAULT;586587vcpu = kvmppc_xive_find_server(kvm, server);588if (!vcpu) {589pr_err("Can't find server %d\n", server);590return -ENOENT;591}592xc = vcpu->arch.xive_vcpu;593594if (priority != xive_prio_from_guest(priority)) {595pr_err("Trying to restore invalid queue %d for VCPU %d\n",596priority, server);597return -EINVAL;598}599q = &xc->queues[priority];600601pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",602__func__, server, priority, kvm_eq.flags,603kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);604605/* reset queue and disable queueing */606if (!kvm_eq.qshift) {607q->guest_qaddr = 0;608q->guest_qshift = 0;609610rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,611NULL, 0, true);612if (rc) {613pr_err("Failed to reset queue %d for VCPU %d: %d\n",614priority, xc->server_num, rc);615return rc;616}617618return 0;619}620621/*622* sPAPR specifies a "Unconditional Notify (n) flag" for the623* H_INT_SET_QUEUE_CONFIG hcall which forces notification624* without using the coalescing mechanisms provided by the625* XIVE END ESBs. This is required on KVM as notification626* using the END ESBs is not supported.627*/628if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {629pr_err("invalid flags %d\n", kvm_eq.flags);630return -EINVAL;631}632633rc = xive_native_validate_queue_size(kvm_eq.qshift);634if (rc) {635pr_err("invalid queue size %d\n", kvm_eq.qshift);636return rc;637}638639if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {640pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,6411ull << kvm_eq.qshift);642return -EINVAL;643}644645srcu_idx = srcu_read_lock(&kvm->srcu);646gfn = gpa_to_gfn(kvm_eq.qaddr);647648page_size = kvm_host_page_size(vcpu, gfn);649if (1ull << kvm_eq.qshift > page_size) {650srcu_read_unlock(&kvm->srcu, srcu_idx);651pr_warn("Incompatible host page size %lx!\n", page_size);652return -EINVAL;653}654655page = gfn_to_page(kvm, gfn);656if (!page) {657srcu_read_unlock(&kvm->srcu, srcu_idx);658pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);659return -EINVAL;660}661662qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);663srcu_read_unlock(&kvm->srcu, srcu_idx);664665/*666* Backup the queue page guest address to the mark EQ page667* dirty for migration.668*/669q->guest_qaddr = kvm_eq.qaddr;670q->guest_qshift = kvm_eq.qshift;671672/*673* Unconditional Notification is forced by default at the674* OPAL level because the use of END ESBs is not supported by675* Linux.676*/677rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,678(__be32 *) qaddr, kvm_eq.qshift, true);679if (rc) {680pr_err("Failed to configure queue %d for VCPU %d: %d\n",681priority, xc->server_num, rc);682put_page(page);683return rc;684}685686/*687* Only restore the queue state when needed. When doing the688* H_INT_SET_SOURCE_CONFIG hcall, it should not.689*/690if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {691rc = xive_native_set_queue_state(xc->vp_id, priority,692kvm_eq.qtoggle,693kvm_eq.qindex);694if (rc)695goto error;696}697698rc = kvmppc_xive_attach_escalation(vcpu, priority,699kvmppc_xive_has_single_escalation(xive));700error:701if (rc)702kvmppc_xive_native_cleanup_queue(vcpu, priority);703return rc;704}705706static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,707long eq_idx, u64 addr)708{709struct kvm *kvm = xive->kvm;710struct kvm_vcpu *vcpu;711struct kvmppc_xive_vcpu *xc;712struct xive_q *q;713void __user *ubufp = (u64 __user *) addr;714u32 server;715u8 priority;716struct kvm_ppc_xive_eq kvm_eq;717u64 qaddr;718u64 qshift;719u64 qeoi_page;720u32 escalate_irq;721u64 qflags;722int rc;723724/*725* Demangle priority/server tuple from the EQ identifier726*/727priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>728KVM_XIVE_EQ_PRIORITY_SHIFT;729server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>730KVM_XIVE_EQ_SERVER_SHIFT;731732vcpu = kvmppc_xive_find_server(kvm, server);733if (!vcpu) {734pr_err("Can't find server %d\n", server);735return -ENOENT;736}737xc = vcpu->arch.xive_vcpu;738739if (priority != xive_prio_from_guest(priority)) {740pr_err("invalid priority for queue %d for VCPU %d\n",741priority, server);742return -EINVAL;743}744q = &xc->queues[priority];745746memset(&kvm_eq, 0, sizeof(kvm_eq));747748if (!q->qpage)749return 0;750751rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,752&qeoi_page, &escalate_irq, &qflags);753if (rc)754return rc;755756kvm_eq.flags = 0;757if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)758kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;759760kvm_eq.qshift = q->guest_qshift;761kvm_eq.qaddr = q->guest_qaddr;762763rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,764&kvm_eq.qindex);765if (rc)766return rc;767768pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",769__func__, server, priority, kvm_eq.flags,770kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);771772if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))773return -EFAULT;774775return 0;776}777778static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)779{780int i;781782for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {783struct kvmppc_xive_irq_state *state = &sb->irq_state[i];784785if (!state->valid)786continue;787788if (state->act_priority == MASKED)789continue;790791state->eisn = 0;792state->act_server = 0;793state->act_priority = MASKED;794xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);795xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);796if (state->pt_number) {797xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);798xive_native_configure_irq(state->pt_number,7990, MASKED, 0);800}801}802}803804static int kvmppc_xive_reset(struct kvmppc_xive *xive)805{806struct kvm *kvm = xive->kvm;807struct kvm_vcpu *vcpu;808unsigned long i;809810pr_devel("%s\n", __func__);811812mutex_lock(&xive->lock);813814kvm_for_each_vcpu(i, vcpu, kvm) {815struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;816unsigned int prio;817818if (!xc)819continue;820821kvmppc_xive_disable_vcpu_interrupts(vcpu);822823for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {824825/* Single escalation, no queue 7 */826if (prio == 7 && kvmppc_xive_has_single_escalation(xive))827break;828829if (xc->esc_virq[prio]) {830free_irq(xc->esc_virq[prio], vcpu);831irq_dispose_mapping(xc->esc_virq[prio]);832kfree(xc->esc_virq_names[prio]);833xc->esc_virq[prio] = 0;834}835836kvmppc_xive_native_cleanup_queue(vcpu, prio);837}838}839840for (i = 0; i <= xive->max_sbid; i++) {841struct kvmppc_xive_src_block *sb = xive->src_blocks[i];842843if (sb) {844arch_spin_lock(&sb->lock);845kvmppc_xive_reset_sources(sb);846arch_spin_unlock(&sb->lock);847}848}849850mutex_unlock(&xive->lock);851852return 0;853}854855static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)856{857int j;858859for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {860struct kvmppc_xive_irq_state *state = &sb->irq_state[j];861struct xive_irq_data *xd;862u32 hw_num;863864if (!state->valid)865continue;866867/*868* The struct kvmppc_xive_irq_state reflects the state869* of the EAS configuration and not the state of the870* source. The source is masked setting the PQ bits to871* '-Q', which is what is being done before calling872* the KVM_DEV_XIVE_EQ_SYNC control.873*874* If a source EAS is configured, OPAL syncs the XIVE875* IC of the source and the XIVE IC of the previous876* target if any.877*878* So it should be fine ignoring MASKED sources as879* they have been synced already.880*/881if (state->act_priority == MASKED)882continue;883884kvmppc_xive_select_irq(state, &hw_num, &xd);885xive_native_sync_source(hw_num);886xive_native_sync_queue(hw_num);887}888}889890static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)891{892struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;893unsigned int prio;894int srcu_idx;895896if (!xc)897return -ENOENT;898899for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {900struct xive_q *q = &xc->queues[prio];901902if (!q->qpage)903continue;904905/* Mark EQ page dirty for migration */906srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);907mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));908srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);909}910return 0;911}912913static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)914{915struct kvm *kvm = xive->kvm;916struct kvm_vcpu *vcpu;917unsigned long i;918919pr_devel("%s\n", __func__);920921mutex_lock(&xive->lock);922for (i = 0; i <= xive->max_sbid; i++) {923struct kvmppc_xive_src_block *sb = xive->src_blocks[i];924925if (sb) {926arch_spin_lock(&sb->lock);927kvmppc_xive_native_sync_sources(sb);928arch_spin_unlock(&sb->lock);929}930}931932kvm_for_each_vcpu(i, vcpu, kvm) {933kvmppc_xive_native_vcpu_eq_sync(vcpu);934}935mutex_unlock(&xive->lock);936937return 0;938}939940static int kvmppc_xive_native_set_attr(struct kvm_device *dev,941struct kvm_device_attr *attr)942{943struct kvmppc_xive *xive = dev->private;944945switch (attr->group) {946case KVM_DEV_XIVE_GRP_CTRL:947switch (attr->attr) {948case KVM_DEV_XIVE_RESET:949return kvmppc_xive_reset(xive);950case KVM_DEV_XIVE_EQ_SYNC:951return kvmppc_xive_native_eq_sync(xive);952case KVM_DEV_XIVE_NR_SERVERS:953return kvmppc_xive_set_nr_servers(xive, attr->addr);954}955break;956case KVM_DEV_XIVE_GRP_SOURCE:957return kvmppc_xive_native_set_source(xive, attr->attr,958attr->addr);959case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:960return kvmppc_xive_native_set_source_config(xive, attr->attr,961attr->addr);962case KVM_DEV_XIVE_GRP_EQ_CONFIG:963return kvmppc_xive_native_set_queue_config(xive, attr->attr,964attr->addr);965case KVM_DEV_XIVE_GRP_SOURCE_SYNC:966return kvmppc_xive_native_sync_source(xive, attr->attr,967attr->addr);968}969return -ENXIO;970}971972static int kvmppc_xive_native_get_attr(struct kvm_device *dev,973struct kvm_device_attr *attr)974{975struct kvmppc_xive *xive = dev->private;976977switch (attr->group) {978case KVM_DEV_XIVE_GRP_EQ_CONFIG:979return kvmppc_xive_native_get_queue_config(xive, attr->attr,980attr->addr);981}982return -ENXIO;983}984985static int kvmppc_xive_native_has_attr(struct kvm_device *dev,986struct kvm_device_attr *attr)987{988switch (attr->group) {989case KVM_DEV_XIVE_GRP_CTRL:990switch (attr->attr) {991case KVM_DEV_XIVE_RESET:992case KVM_DEV_XIVE_EQ_SYNC:993case KVM_DEV_XIVE_NR_SERVERS:994return 0;995}996break;997case KVM_DEV_XIVE_GRP_SOURCE:998case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:999case KVM_DEV_XIVE_GRP_SOURCE_SYNC:1000if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&1001attr->attr < KVMPPC_XIVE_NR_IRQS)1002return 0;1003break;1004case KVM_DEV_XIVE_GRP_EQ_CONFIG:1005return 0;1006}1007return -ENXIO;1008}10091010/*1011* Called when device fd is closed. kvm->lock is held.1012*/1013static void kvmppc_xive_native_release(struct kvm_device *dev)1014{1015struct kvmppc_xive *xive = dev->private;1016struct kvm *kvm = xive->kvm;1017struct kvm_vcpu *vcpu;1018unsigned long i;10191020pr_devel("Releasing xive native device\n");10211022/*1023* Clear the KVM device file address_space which is used to1024* unmap the ESB pages when a device is passed-through.1025*/1026mutex_lock(&xive->mapping_lock);1027xive->mapping = NULL;1028mutex_unlock(&xive->mapping_lock);10291030/*1031* Since this is the device release function, we know that1032* userspace does not have any open fd or mmap referring to1033* the device. Therefore there can not be any of the1034* device attribute set/get, mmap, or page fault functions1035* being executed concurrently, and similarly, the1036* connect_vcpu and set/clr_mapped functions also cannot1037* be being executed.1038*/10391040debugfs_remove(xive->dentry);10411042/*1043* We should clean up the vCPU interrupt presenters first.1044*/1045kvm_for_each_vcpu(i, vcpu, kvm) {1046/*1047* Take vcpu->mutex to ensure that no one_reg get/set ioctl1048* (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.1049* Holding the vcpu->mutex also means that the vcpu cannot1050* be executing the KVM_RUN ioctl, and therefore it cannot1051* be executing the XIVE push or pull code or accessing1052* the XIVE MMIO regions.1053*/1054mutex_lock(&vcpu->mutex);1055kvmppc_xive_native_cleanup_vcpu(vcpu);1056mutex_unlock(&vcpu->mutex);1057}10581059/*1060* Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type1061* and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe1062* against xive code getting called during vcpu execution or1063* set/get one_reg operations.1064*/1065kvm->arch.xive = NULL;10661067for (i = 0; i <= xive->max_sbid; i++) {1068if (xive->src_blocks[i])1069kvmppc_xive_free_sources(xive->src_blocks[i]);1070kfree(xive->src_blocks[i]);1071xive->src_blocks[i] = NULL;1072}10731074if (xive->vp_base != XIVE_INVALID_VP)1075xive_native_free_vp_block(xive->vp_base);10761077/*1078* A reference of the kvmppc_xive pointer is now kept under1079* the xive_devices struct of the machine for reuse. It is1080* freed when the VM is destroyed for now until we fix all the1081* execution paths.1082*/10831084kfree(dev);1085}10861087/*1088* Create a XIVE device. kvm->lock is held.1089*/1090static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)1091{1092struct kvmppc_xive *xive;1093struct kvm *kvm = dev->kvm;10941095pr_devel("Creating xive native device\n");10961097if (kvm->arch.xive)1098return -EEXIST;10991100xive = kvmppc_xive_get_device(kvm, type);1101if (!xive)1102return -ENOMEM;11031104dev->private = xive;1105xive->dev = dev;1106xive->kvm = kvm;1107mutex_init(&xive->mapping_lock);1108mutex_init(&xive->lock);11091110/* VP allocation is delayed to the first call to connect_vcpu */1111xive->vp_base = XIVE_INVALID_VP;1112/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets1113* on a POWER9 system.1114*/1115xive->nr_servers = KVM_MAX_VCPUS;11161117if (xive_native_has_single_escalation())1118xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;11191120if (xive_native_has_save_restore())1121xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;11221123xive->ops = &kvmppc_xive_native_ops;11241125kvm->arch.xive = xive;1126return 0;1127}11281129/*1130* Interrupt Pending Buffer (IPB) offset1131*/1132#define TM_IPB_SHIFT 401133#define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT)11341135int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)1136{1137struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;1138u64 opal_state;1139int rc;11401141if (!kvmppc_xive_enabled(vcpu))1142return -EPERM;11431144if (!xc)1145return -ENOENT;11461147/* Thread context registers. We only care about IPB and CPPR */1148val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;11491150/* Get the VP state from OPAL */1151rc = xive_native_get_vp_state(xc->vp_id, &opal_state);1152if (rc)1153return rc;11541155/*1156* Capture the backup of IPB register in the NVT structure and1157* merge it in our KVM VP state.1158*/1159val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);11601161pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",1162__func__,1163vcpu->arch.xive_saved_state.nsr,1164vcpu->arch.xive_saved_state.cppr,1165vcpu->arch.xive_saved_state.ipb,1166vcpu->arch.xive_saved_state.pipr,1167vcpu->arch.xive_saved_state.w01,1168(u32) vcpu->arch.xive_cam_word, opal_state);11691170return 0;1171}11721173int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)1174{1175struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;1176struct kvmppc_xive *xive = vcpu->kvm->arch.xive;11771178pr_devel("%s w01=%016llx vp=%016llx\n", __func__,1179val->xive_timaval[0], val->xive_timaval[1]);11801181if (!kvmppc_xive_enabled(vcpu))1182return -EPERM;11831184if (!xc || !xive)1185return -ENOENT;11861187/* We can't update the state of a "pushed" VCPU */1188if (WARN_ON(vcpu->arch.xive_pushed))1189return -EBUSY;11901191/*1192* Restore the thread context registers. IPB and CPPR should1193* be the only ones that matter.1194*/1195vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];11961197/*1198* There is no need to restore the XIVE internal state (IPB1199* stored in the NVT) as the IPB register was merged in KVM VP1200* state when captured.1201*/1202return 0;1203}12041205bool kvmppc_xive_native_supported(void)1206{1207return xive_native_has_queue_state_support();1208}12091210static int xive_native_debug_show(struct seq_file *m, void *private)1211{1212struct kvmppc_xive *xive = m->private;1213struct kvm *kvm = xive->kvm;1214struct kvm_vcpu *vcpu;1215unsigned long i;12161217if (!kvm)1218return 0;12191220seq_puts(m, "=========\nVCPU state\n=========\n");12211222kvm_for_each_vcpu(i, vcpu, kvm) {1223struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;12241225if (!xc)1226continue;12271228seq_printf(m, "VCPU %d: VP=%#x/%02x\n"1229" NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",1230xc->server_num, xc->vp_id, xc->vp_chip_id,1231vcpu->arch.xive_saved_state.nsr,1232vcpu->arch.xive_saved_state.cppr,1233vcpu->arch.xive_saved_state.ipb,1234vcpu->arch.xive_saved_state.pipr,1235be64_to_cpu(vcpu->arch.xive_saved_state.w01),1236be32_to_cpu(vcpu->arch.xive_cam_word));12371238kvmppc_xive_debug_show_queues(m, vcpu);1239}12401241seq_puts(m, "=========\nSources\n=========\n");12421243for (i = 0; i <= xive->max_sbid; i++) {1244struct kvmppc_xive_src_block *sb = xive->src_blocks[i];12451246if (sb) {1247arch_spin_lock(&sb->lock);1248kvmppc_xive_debug_show_sources(m, sb);1249arch_spin_unlock(&sb->lock);1250}1251}12521253return 0;1254}12551256DEFINE_SHOW_ATTRIBUTE(xive_native_debug);12571258static void xive_native_debugfs_init(struct kvmppc_xive *xive)1259{1260xive->dentry = debugfs_create_file("xive", 0444, xive->kvm->debugfs_dentry,1261xive, &xive_native_debug_fops);12621263pr_debug("%s: created\n", __func__);1264}12651266static void kvmppc_xive_native_init(struct kvm_device *dev)1267{1268struct kvmppc_xive *xive = dev->private;12691270/* Register some debug interfaces */1271xive_native_debugfs_init(xive);1272}12731274struct kvm_device_ops kvm_xive_native_ops = {1275.name = "kvm-xive-native",1276.create = kvmppc_xive_native_create,1277.init = kvmppc_xive_native_init,1278.release = kvmppc_xive_native_release,1279.set_attr = kvmppc_xive_native_set_attr,1280.get_attr = kvmppc_xive_native_get_attr,1281.has_attr = kvmppc_xive_native_has_attr,1282.mmap = kvmppc_xive_native_mmap,1283};128412851286