#define pr_fmt(fmt) "xive-kvm: " fmt
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/uaccess.h>
#include <linux/irqdomain.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/xive-regs.h>
#include <asm/debug.h>
#include <asm/time.h>
#include <asm/opal.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include "book3s_xive.h"
#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
#define XICS_DUMMY 1
static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
{
u8 cppr;
u16 ack;
eieio();
ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
mb();
if (!((ack >> 8) & TM_QW1_NSR_EO))
return;
cppr = ack & 0xff;
if (cppr < 8)
xc->pending |= 1 << cppr;
if (cppr >= xc->hw_cppr)
pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
smp_processor_id(), cppr, xc->hw_cppr);
xc->hw_cppr = cppr;
}
static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
{
u64 val;
if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
offset |= XIVE_ESB_LD_ST_MO;
val = __raw_readq(__x_eoi_page(xd) + offset);
#ifdef __LITTLE_ENDIAN__
val >>= 64-8;
#endif
return (u8)val;
}
static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
{
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
__raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
__raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
} else {
uint64_t eoi_val;
eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
if ((eoi_val & 1) && __x_trig_page(xd))
__raw_writeq(0, __x_trig_page(xd));
}
}
enum {
scan_fetch,
scan_poll,
scan_eoi,
};
static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
u8 pending, int scan_type)
{
u32 hirq = 0;
u8 prio = 0xff;
while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
struct xive_q *q;
u32 idx, toggle;
__be32 *qpage;
prio = ffs(pending) - 1;
if (prio >= xc->cppr || prio > 7) {
if (xc->mfrr < xc->cppr) {
prio = xc->mfrr;
hirq = XICS_IPI;
}
break;
}
q = &xc->queues[prio];
idx = q->idx;
toggle = q->toggle;
qpage = READ_ONCE(q->qpage);
skip_ipi:
hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
if (scan_type == scan_fetch) {
xive_vm_source_eoi(xc->vp_ipi,
&xc->vp_ipi_data);
q->idx = idx;
q->toggle = toggle;
}
WARN_ON(hirq && hirq != XICS_IPI);
if (hirq)
goto skip_ipi;
}
if (hirq == XICS_DUMMY)
goto skip_ipi;
if (!hirq) {
pending &= ~(1 << prio);
if (atomic_read(&q->pending_count)) {
int p = atomic_xchg(&q->pending_count, 0);
if (p) {
WARN_ON(p > atomic_read(&q->count));
atomic_sub(p, &q->count);
}
}
}
if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
prio = xc->mfrr;
hirq = XICS_IPI;
break;
}
if (scan_type == scan_fetch) {
q->idx = idx;
q->toggle = toggle;
}
}
if (scan_type == scan_poll)
return hirq;
xc->pending = pending;
if (scan_type == scan_eoi)
return hirq;
if (hirq)
xc->cppr = prio;
if (xc->cppr != xc->hw_cppr) {
xc->hw_cppr = xc->cppr;
__raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
}
return hirq;
}
static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
u8 old_cppr;
u32 hirq;
pr_devel("H_XIRR\n");
xc->stat_vm_h_xirr++;
xive_vm_ack_pending(xc);
pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
xc->pending, xc->hw_cppr, xc->cppr);
old_cppr = xive_prio_to_guest(xc->cppr);
hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
hirq, xc->hw_cppr, xc->cppr);
if (hirq & 0xff000000)
pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
kvmppc_set_gpr(vcpu, 4, hirq | (old_cppr << 24));
return H_SUCCESS;
}
static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
u8 pending = xc->pending;
u32 hirq;
pr_devel("H_IPOLL(server=%ld)\n", server);
xc->stat_vm_h_ipoll++;
if (xc->server_num != server) {
vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
if (!vcpu)
return H_PARAMETER;
xc = vcpu->arch.xive_vcpu;
pending = 0xff;
} else {
__be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
u8 pipr = be64_to_cpu(qw1) & 0xff;
if (pipr < 8)
pending |= 1 << pipr;
}
hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
kvmppc_set_gpr(vcpu, 4, hirq | (xc->cppr << 24));
return H_SUCCESS;
}
static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
{
u8 pending, prio;
pending = xc->pending;
if (xc->mfrr != 0xff) {
if (xc->mfrr < 8)
pending |= 1 << xc->mfrr;
else
pending |= 0x80;
}
if (!pending)
return;
prio = ffs(pending) - 1;
__raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
}
static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
struct kvmppc_xive_vcpu *xc)
{
unsigned int prio;
for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
struct xive_q *q = &xc->queues[prio];
struct kvmppc_xive_irq_state *state;
struct kvmppc_xive_src_block *sb;
u32 idx, toggle, entry, irq, hw_num;
struct xive_irq_data *xd;
__be32 *qpage;
u16 src;
idx = q->idx;
toggle = q->toggle;
qpage = READ_ONCE(q->qpage);
if (!qpage)
continue;
for (;;) {
entry = be32_to_cpup(qpage + idx);
if ((entry >> 31) == toggle)
break;
irq = entry & 0x7fffffff;
if (irq == XICS_DUMMY || irq == XICS_IPI)
goto next;
sb = kvmppc_xive_find_source(xive, irq, &src);
if (!sb)
goto next;
state = &sb->irq_state[src];
if (xc->server_num == state->act_server)
goto next;
qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
kvmppc_xive_select_irq(state, &hw_num, &xd);
if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
xive_vm_source_eoi(hw_num, xd);
next:
idx = (idx + 1) & q->msk;
if (idx == 0)
toggle ^= 1;
}
}
}
static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
u8 old_cppr;
pr_devel("H_CPPR(cppr=%ld)\n", cppr);
xc->stat_vm_h_cppr++;
cppr = xive_prio_from_guest(cppr);
old_cppr = xc->cppr;
xc->cppr = cppr;
smp_mb();
if (cppr > old_cppr) {
xive_vm_push_pending_to_hw(xc);
} else {
xive_vm_scan_for_rerouted_irqs(xive, xc);
}
xc->hw_cppr = cppr;
__raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
return H_SUCCESS;
}
static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct xive_irq_data *xd;
u8 new_cppr = xirr >> 24;
u32 irq = xirr & 0x00ffffff, hw_num;
u16 src;
int rc = 0;
pr_devel("H_EOI(xirr=%08lx)\n", xirr);
xc->stat_vm_h_eoi++;
xc->cppr = xive_prio_from_guest(new_cppr);
if (irq == XICS_IPI || irq == 0) {
smp_mb();
goto bail;
}
sb = kvmppc_xive_find_source(xive, irq, &src);
if (!sb) {
pr_devel(" source not found !\n");
rc = H_PARAMETER;
smp_mb();
goto bail;
}
state = &sb->irq_state[src];
kvmppc_xive_select_irq(state, &hw_num, &xd);
state->in_eoi = true;
smp_mb();
again:
if (state->guest_priority == MASKED) {
arch_spin_lock(&sb->lock);
if (state->guest_priority != MASKED) {
arch_spin_unlock(&sb->lock);
goto again;
}
pr_devel(" EOI on saved P...\n");
state->old_p = false;
arch_spin_unlock(&sb->lock);
} else {
pr_devel(" EOI on source...\n");
xive_vm_source_eoi(hw_num, xd);
if (state->lsi && state->asserted)
__raw_writeq(0, __x_trig_page(xd));
}
mb();
state->in_eoi = false;
bail:
xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
xive_vm_push_pending_to_hw(xc);
pr_devel(" after scan pending=%02x\n", xc->pending);
xc->hw_cppr = xc->cppr;
__raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
return rc;
}
static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
xc->stat_vm_h_ipi++;
vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
if (!vcpu)
return H_PARAMETER;
xc = vcpu->arch.xive_vcpu;
xc->mfrr = mfrr;
mb();
if (mfrr < xc->cppr)
__raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
return H_SUCCESS;
}
#define XIVE_Q_GAP 2
static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
return xc->vp_cam & TM_QW1W2_HO;
}
bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvmppc_xive *xive = xc->xive;
if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE)
return kvmppc_xive_vcpu_has_save_restore(vcpu);
return true;
}
void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
{
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
if (!tima || !vcpu->arch.xive_cam_word)
return;
eieio();
if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
vcpu->arch.xive_pushed = 1;
eieio();
vcpu->arch.irq_pending = 0;
if (vcpu->arch.xive_esc_on) {
pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
XIVE_ESB_SET_PQ_01));
mb();
if (!(pq & XIVE_ESB_VAL_P))
vcpu->arch.xive_esc_on = 0;
}
}
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
{
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
if (!vcpu->arch.xive_pushed)
return;
if (WARN_ON(!tima))
return;
eieio();
__raw_readl(tima + TM_SPC_PULL_OS_CTX);
if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
vcpu->arch.xive_saved_state.lsmfb = 0;
vcpu->arch.xive_saved_state.ack = 0xff;
vcpu->arch.xive_pushed = 0;
eieio();
}
EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
{
void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
bool ret = true;
if (!esc_vaddr)
return ret;
if (vcpu->arch.xive_esc_on) {
ret = false;
__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
} else {
vcpu->arch.xive_esc_on = true;
mb();
__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
}
mb();
return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
static bool xive_irq_trigger(struct xive_irq_data *xd)
{
if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
return false;
if (WARN_ON(!xd->trig_mmio))
return false;
out_be64(xd->trig_mmio, 0);
return true;
}
static irqreturn_t xive_esc_irq(int irq, void *data)
{
struct kvm_vcpu *vcpu = data;
vcpu->arch.irq_pending = 1;
smp_mb();
if (vcpu->arch.ceded || vcpu->arch.nested)
kvmppc_fast_vcpu_kick(vcpu);
vcpu->arch.xive_esc_on = false;
smp_wmb();
return IRQ_HANDLED;
}
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
bool single_escalation)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct xive_q *q = &xc->queues[prio];
char *name = NULL;
int rc;
if (xc->esc_virq[prio])
return 0;
xc->esc_virq[prio] = irq_create_mapping(NULL, q->esc_irq);
if (!xc->esc_virq[prio]) {
pr_err("Failed to map escalation interrupt for queue %d of VCPU %d\n",
prio, xc->server_num);
return -EIO;
}
if (single_escalation)
name = kasprintf(GFP_KERNEL, "kvm-%lld-%d",
vcpu->kvm->arch.lpid, xc->server_num);
else
name = kasprintf(GFP_KERNEL, "kvm-%lld-%d-%d",
vcpu->kvm->arch.lpid, xc->server_num, prio);
if (!name) {
pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
prio, xc->server_num);
rc = -ENOMEM;
goto error;
}
pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
IRQF_NO_THREAD, name, vcpu);
if (rc) {
pr_err("Failed to request escalation interrupt for queue %d of VCPU %d\n",
prio, xc->server_num);
goto error;
}
xc->esc_virq_names[prio] = name;
if (single_escalation) {
struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
vcpu->arch.xive_esc_raddr = xd->eoi_page;
vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
xd->flags |= XIVE_IRQ_FLAG_NO_EOI;
}
return 0;
error:
irq_dispose_mapping(xc->esc_virq[prio]);
xc->esc_virq[prio] = 0;
kfree(name);
return rc;
}
static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvmppc_xive *xive = xc->xive;
struct xive_q *q = &xc->queues[prio];
void *qpage;
int rc;
if (WARN_ON(q->qpage))
return 0;
qpage = (__be32 *)__get_free_pages(GFP_KERNEL, xive->q_page_order);
if (!qpage) {
pr_err("Failed to allocate queue %d for VCPU %d\n",
prio, xc->server_num);
return -ENOMEM;
}
memset(qpage, 0, 1 << xive->q_order);
rc = xive_native_configure_queue(xc->vp_id, q, prio, qpage,
xive->q_order, true);
if (rc)
pr_err("Failed to configure queue %d for VCPU %d\n",
prio, xc->server_num);
return rc;
}
static int xive_check_provisioning(struct kvm *kvm, u8 prio)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvm_vcpu *vcpu;
unsigned long i;
int rc;
lockdep_assert_held(&xive->lock);
if (xive->qmap & (1 << prio))
return 0;
pr_devel("Provisioning prio... %d\n", prio);
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!vcpu->arch.xive_vcpu)
continue;
rc = xive_provision_queue(vcpu, prio);
if (rc == 0 && !kvmppc_xive_has_single_escalation(xive))
kvmppc_xive_attach_escalation(vcpu, prio,
kvmppc_xive_has_single_escalation(xive));
if (rc)
return rc;
}
mb();
xive->qmap |= (1 << prio);
return 0;
}
static void xive_inc_q_pending(struct kvm *kvm, u32 server, u8 prio)
{
struct kvm_vcpu *vcpu;
struct kvmppc_xive_vcpu *xc;
struct xive_q *q;
vcpu = kvmppc_xive_find_server(kvm, server);
if (!vcpu) {
pr_warn("%s: Can't find server %d\n", __func__, server);
return;
}
xc = vcpu->arch.xive_vcpu;
if (WARN_ON(!xc))
return;
q = &xc->queues[prio];
atomic_inc(&q->pending_count);
}
static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct xive_q *q;
u32 max;
if (WARN_ON(!xc))
return -ENXIO;
if (!xc->valid)
return -ENXIO;
q = &xc->queues[prio];
if (WARN_ON(!q->qpage))
return -ENXIO;
max = (q->msk + 1) - XIVE_Q_GAP;
return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
}
int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
{
struct kvm_vcpu *vcpu;
unsigned long i;
int rc;
vcpu = kvmppc_xive_find_server(kvm, *server);
if (!vcpu) {
pr_devel("Can't find server %d\n", *server);
return -EINVAL;
}
pr_devel("Finding irq target on 0x%x/%d...\n", *server, prio);
rc = xive_try_pick_queue(vcpu, prio);
if (rc == 0)
return rc;
pr_devel(" .. failed, looking up candidate...\n");
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!vcpu->arch.xive_vcpu)
continue;
rc = xive_try_pick_queue(vcpu, prio);
if (rc == 0) {
*server = vcpu->arch.xive_vcpu->server_num;
pr_devel(" found on 0x%x/%d\n", *server, prio);
return rc;
}
}
pr_devel(" no available target !\n");
return -EBUSY;
}
static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb,
struct kvmppc_xive_irq_state *state)
{
struct xive_irq_data *xd;
u32 hw_num;
u8 old_prio;
u64 val;
for (;;) {
arch_spin_lock(&sb->lock);
old_prio = state->guest_priority;
state->guest_priority = MASKED;
mb();
if (!state->in_eoi)
break;
state->guest_priority = old_prio;
arch_spin_unlock(&sb->lock);
}
if (old_prio == MASKED)
return old_prio;
kvmppc_xive_select_irq(state, &hw_num, &xd);
val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
state->old_p = !!(val & 2);
state->old_q = !!(val & 1);
xive_native_sync_source(hw_num);
return old_prio;
}
static void xive_lock_for_unmask(struct kvmppc_xive_src_block *sb,
struct kvmppc_xive_irq_state *state)
{
for (;;) {
arch_spin_lock(&sb->lock);
if (!state->in_eoi)
break;
arch_spin_unlock(&sb->lock);
}
}
static void xive_finish_unmask(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb,
struct kvmppc_xive_irq_state *state,
u8 prio)
{
struct xive_irq_data *xd;
u32 hw_num;
if (state->guest_priority != MASKED)
goto bail;
kvmppc_xive_select_irq(state, &hw_num, &xd);
if (state->old_q)
xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
if (!state->old_p)
xive_vm_source_eoi(hw_num, xd);
mb();
bail:
state->guest_priority = prio;
}
static int xive_target_interrupt(struct kvm *kvm,
struct kvmppc_xive_irq_state *state,
u32 server, u8 prio)
{
struct kvmppc_xive *xive = kvm->arch.xive;
u32 hw_num;
int rc;
rc = kvmppc_xive_select_target(kvm, &server, prio);
if (rc)
return rc;
if (state->act_priority != MASKED)
xive_inc_q_pending(kvm,
state->act_server,
state->act_priority);
state->act_priority = prio;
state->act_server = server;
kvmppc_xive_select_irq(state, &hw_num, NULL);
return xive_native_configure_irq(hw_num,
kvmppc_xive_vp(xive, server),
prio, state->number);
}
int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u8 new_act_prio;
int rc = 0;
u16 idx;
if (!xive)
return -ENODEV;
pr_devel("set_xive ! irq 0x%x server 0x%x prio %d\n",
irq, server, priority);
if (priority != MASKED) {
mutex_lock(&xive->lock);
rc = xive_check_provisioning(xive->kvm,
xive_prio_from_guest(priority));
mutex_unlock(&xive->lock);
}
if (rc) {
pr_devel(" provisioning failure %d !\n", rc);
return rc;
}
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb)
return -EINVAL;
state = &sb->irq_state[idx];
if (priority == MASKED)
xive_lock_and_mask(xive, sb, state);
else
xive_lock_for_unmask(sb, state);
new_act_prio = state->act_priority;
if (priority != MASKED)
new_act_prio = xive_prio_from_guest(priority);
pr_devel(" new_act_prio=%x act_server=%x act_prio=%x\n",
new_act_prio, state->act_server, state->act_priority);
if (new_act_prio != MASKED &&
(state->act_server != server ||
state->act_priority != new_act_prio))
rc = xive_target_interrupt(kvm, state, server, new_act_prio);
if (priority != MASKED)
xive_finish_unmask(xive, sb, state, priority);
state->saved_priority = priority;
arch_spin_unlock(&sb->lock);
return rc;
}
int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u16 idx;
if (!xive)
return -ENODEV;
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb)
return -EINVAL;
state = &sb->irq_state[idx];
arch_spin_lock(&sb->lock);
*server = state->act_server;
*priority = state->guest_priority;
arch_spin_unlock(&sb->lock);
return 0;
}
int kvmppc_xive_int_on(struct kvm *kvm, u32 irq)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u16 idx;
if (!xive)
return -ENODEV;
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb)
return -EINVAL;
state = &sb->irq_state[idx];
pr_devel("int_on(irq=0x%x)\n", irq);
if (state->act_priority == MASKED) {
pr_devel("int_on on untargetted interrupt\n");
return -EINVAL;
}
if (state->saved_priority == MASKED)
return 0;
xive_lock_for_unmask(sb, state);
xive_finish_unmask(xive, sb, state, state->saved_priority);
arch_spin_unlock(&sb->lock);
return 0;
}
int kvmppc_xive_int_off(struct kvm *kvm, u32 irq)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u16 idx;
if (!xive)
return -ENODEV;
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb)
return -EINVAL;
state = &sb->irq_state[idx];
pr_devel("int_off(irq=0x%x)\n", irq);
state->saved_priority = xive_lock_and_mask(xive, sb, state);
arch_spin_unlock(&sb->lock);
return 0;
}
static bool xive_restore_pending_irq(struct kvmppc_xive *xive, u32 irq)
{
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u16 idx;
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb)
return false;
state = &sb->irq_state[idx];
if (!state->valid)
return false;
xive_irq_trigger(&state->ipi_data);
return true;
}
u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
if (!xc)
return 0;
return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
(u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
(u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
}
int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
u8 cppr, mfrr;
u32 xisr;
if (!xc || !xive)
return -ENOENT;
cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
KVM_REG_PPC_ICP_XISR_MASK;
mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
pr_devel("set_icp vcpu %d cppr=0x%x mfrr=0x%x xisr=0x%x\n",
xc->server_num, cppr, mfrr, xisr);
if (WARN_ON(vcpu->arch.xive_pushed))
return -EIO;
vcpu->arch.xive_saved_state.cppr = cppr;
xc->hw_cppr = xc->cppr = cppr;
xc->mfrr = mfrr;
if (mfrr < cppr)
xive_irq_trigger(&xc->vp_ipi_data);
if (xisr > XICS_IPI && !xive_restore_pending_irq(xive, xisr)) {
xc->delayed_irq = xisr;
xive->delayed_irqs++;
pr_devel(" xisr restore delayed\n");
}
return 0;
}
int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
struct irq_data *host_data =
irq_domain_get_irq_data(irq_get_default_domain(), host_irq);
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
u16 idx;
u8 prio;
int rc;
if (!xive)
return -ENODEV;
pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n",
__func__, guest_irq, host_irq, hw_irq);
sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
if (!sb)
return -EINVAL;
state = &sb->irq_state[idx];
rc = irq_set_vcpu_affinity(host_irq, state);
if (rc) {
pr_err("Failed to set VCPU affinity for host IRQ %ld\n", host_irq);
return rc;
}
prio = xive_lock_and_mask(xive, sb, state);
pr_devel(" old IPI prio %02x P:%d Q:%d\n", prio,
state->old_p, state->old_q);
xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
if (xive->ops && xive->ops->reset_mapped)
xive->ops->reset_mapped(kvm, guest_irq);
state->pt_number = hw_irq;
state->pt_data = irq_data_get_irq_handler_data(host_data);
xive_native_configure_irq(hw_irq,
kvmppc_xive_vp(xive, state->act_server),
state->act_priority, state->number);
if (prio != MASKED && !state->old_p)
xive_vm_source_eoi(hw_irq, state->pt_data);
state->old_p = state->old_q = false;
mb();
state->guest_priority = prio;
arch_spin_unlock(&sb->lock);
return 0;
}
EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u16 idx;
u8 prio;
int rc;
if (!xive)
return -ENODEV;
pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n", __func__, guest_irq, host_irq);
sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
if (!sb)
return -EINVAL;
state = &sb->irq_state[idx];
prio = xive_lock_and_mask(xive, sb, state);
pr_devel(" old IRQ prio %02x P:%d Q:%d\n", prio,
state->old_p, state->old_q);
if (state->old_p)
xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
rc = irq_set_vcpu_affinity(host_irq, NULL);
if (rc) {
pr_err("Failed to clr VCPU affinity for host IRQ %ld\n", host_irq);
return rc;
}
state->pt_number = 0;
state->pt_data = NULL;
if (xive->ops && xive->ops->reset_mapped) {
xive->ops->reset_mapped(kvm, guest_irq);
}
xive_native_configure_irq(state->ipi_number,
kvmppc_xive_vp(xive, state->act_server),
state->act_priority, state->number);
if (prio == MASKED || state->old_p)
xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_10);
else
xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_00);
mb();
state->guest_priority = prio;
arch_spin_unlock(&sb->lock);
return 0;
}
EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvm *kvm = vcpu->kvm;
struct kvmppc_xive *xive = kvm->arch.xive;
int i, j;
for (i = 0; i <= xive->max_sbid; i++) {
struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
if (!sb)
continue;
for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
if (!state->valid)
continue;
if (state->act_priority == MASKED)
continue;
if (state->act_server != xc->server_num)
continue;
arch_spin_lock(&sb->lock);
state->act_priority = MASKED;
xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
if (state->pt_number) {
xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
xive_native_configure_irq(state->pt_number, 0, MASKED, 0);
}
arch_spin_unlock(&sb->lock);
}
}
if (vcpu->arch.xive_esc_on) {
__raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
XIVE_ESB_SET_PQ_01));
vcpu->arch.xive_esc_on = false;
}
vcpu->arch.xive_esc_vaddr = 0;
vcpu->arch.xive_esc_raddr = 0;
}
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
xd->stale_p = false;
smp_mb();
if (!vcpu->arch.xive_esc_on)
xd->stale_p = true;
}
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
int i;
if (!kvmppc_xics_enabled(vcpu))
return;
if (!xc)
return;
pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
xc->valid = false;
kvmppc_xive_disable_vcpu_interrupts(vcpu);
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
if (xc->esc_virq[i]) {
if (kvmppc_xive_has_single_escalation(xc->xive))
xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
}
}
xive_native_disable_vp(xc->vp_id);
vcpu->arch.xive_cam_word = 0;
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
xive_native_disable_queue(xc->vp_id, q, i);
if (q->qpage) {
free_pages((unsigned long)q->qpage,
xive->q_page_order);
q->qpage = NULL;
}
}
if (xc->vp_ipi) {
xive_cleanup_irq_data(&xc->vp_ipi_data);
xive_native_free_irq(xc->vp_ipi);
}
kfree(xc);
vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
vcpu->arch.xive_vcpu = NULL;
}
static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
{
return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers;
}
int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
{
u32 vp_id;
if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
pr_devel("Out of bounds !\n");
return -EINVAL;
}
if (xive->vp_base == XIVE_INVALID_VP) {
xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
if (xive->vp_base == XIVE_INVALID_VP)
return -ENOSPC;
}
vp_id = kvmppc_xive_vp(xive, cpu);
if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
pr_devel("Duplicate !\n");
return -EEXIST;
}
*vp = vp_id;
return 0;
}
int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu)
{
struct kvmppc_xive *xive = dev->private;
struct kvmppc_xive_vcpu *xc;
int i, r = -EBUSY;
u32 vp_id;
pr_devel("connect_vcpu(cpu=%d)\n", cpu);
if (dev->ops != &kvm_xive_ops) {
pr_devel("Wrong ops !\n");
return -EPERM;
}
if (xive->kvm != vcpu->kvm)
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
mutex_lock(&xive->lock);
r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
if (r)
goto bail;
xc = kzalloc(sizeof(*xc), GFP_KERNEL);
if (!xc) {
r = -ENOMEM;
goto bail;
}
vcpu->arch.xive_vcpu = xc;
xc->xive = xive;
xc->vcpu = vcpu;
xc->server_num = cpu;
xc->vp_id = vp_id;
xc->mfrr = 0xff;
xc->valid = true;
r = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
if (r)
goto bail;
if (!kvmppc_xive_check_save_restore(vcpu)) {
pr_err("inconsistent save-restore setup for VCPU %d\n", cpu);
r = -EIO;
goto bail;
}
vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
xc->vp_ipi = xive_native_alloc_irq();
if (!xc->vp_ipi) {
pr_err("Failed to allocate xive irq for VCPU IPI\n");
r = -EIO;
goto bail;
}
pr_devel(" IPI=0x%x\n", xc->vp_ipi);
r = xive_native_populate_irq_data(xc->vp_ipi, &xc->vp_ipi_data);
if (r)
goto bail;
r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
if (r) {
pr_err("Failed to enable VP in OPAL, err %d\n", r);
goto bail;
}
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
if (i == 7 && kvmppc_xive_has_single_escalation(xive))
break;
if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i);
if (r == 0 && !kvmppc_xive_has_single_escalation(xive))
kvmppc_xive_attach_escalation(
vcpu, i, kvmppc_xive_has_single_escalation(xive));
if (r)
goto bail;
} else {
r = xive_native_configure_queue(xc->vp_id,
q, i, NULL, 0, true);
if (r) {
pr_err("Failed to configure queue %d for VCPU %d\n",
i, cpu);
goto bail;
}
}
}
r = kvmppc_xive_attach_escalation(vcpu, 0, kvmppc_xive_has_single_escalation(xive));
if (r)
goto bail;
r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
if (!r)
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00);
bail:
mutex_unlock(&xive->lock);
if (r) {
kvmppc_xive_cleanup_vcpu(vcpu);
return r;
}
vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
return 0;
}
static void xive_pre_save_set_queued(struct kvmppc_xive *xive, u32 irq)
{
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u16 idx;
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb)
return;
state = &sb->irq_state[idx];
if (!state->valid) {
pr_err("invalid irq 0x%x in cpu queue!\n", irq);
return;
}
if (!state->saved_p)
pr_err("Interrupt 0x%x is marked in a queue but P not set !\n", irq);
state->in_queue = true;
}
static void xive_pre_save_mask_irq(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb,
u32 irq)
{
struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
if (!state->valid)
return;
state->saved_scan_prio = xive_lock_and_mask(xive, sb, state);
state->saved_p = state->old_p;
state->saved_q = state->old_q;
arch_spin_unlock(&sb->lock);
}
static void xive_pre_save_unmask_irq(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb,
u32 irq)
{
struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
if (!state->valid)
return;
xive_lock_for_unmask(sb, state);
if (state->saved_scan_prio != MASKED)
xive_finish_unmask(xive, sb, state, state->saved_scan_prio);
arch_spin_unlock(&sb->lock);
}
static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
{
u32 idx = q->idx;
u32 toggle = q->toggle;
u32 irq;
do {
irq = __xive_read_eq(q->qpage, q->msk, &idx, &toggle);
if (irq > XICS_IPI)
xive_pre_save_set_queued(xive, irq);
} while(irq);
}
static void xive_pre_save_scan(struct kvmppc_xive *xive)
{
struct kvm_vcpu *vcpu = NULL;
unsigned long i;
int j;
for (i = 0; i <= xive->max_sbid; i++) {
struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
if (!sb)
continue;
for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
xive_pre_save_mask_irq(xive, sb, j);
}
kvm_for_each_vcpu(i, vcpu, xive->kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
if (!xc)
continue;
for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
if (xc->queues[j].qpage)
xive_pre_save_queue(xive, &xc->queues[j]);
}
}
for (i = 0; i <= xive->max_sbid; i++) {
struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
if (!sb)
continue;
for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
xive_pre_save_unmask_irq(xive, sb, j);
}
}
static void xive_post_save_scan(struct kvmppc_xive *xive)
{
u32 i, j;
for (i = 0; i <= xive->max_sbid; i++) {
struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
if (!sb)
continue;
for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
sb->irq_state[j].in_queue = false;
}
xive->saved_src_count = 0;
}
static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
{
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u64 __user *ubufp = (u64 __user *) addr;
u64 val, prio;
u16 idx;
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb)
return -ENOENT;
state = &sb->irq_state[idx];
if (!state->valid)
return -ENOENT;
pr_devel("get_source(%ld)...\n", irq);
if (xive->saved_src_count == 0)
xive_pre_save_scan(xive);
xive->saved_src_count++;
val = state->act_server;
prio = state->saved_scan_prio;
if (prio == MASKED) {
val |= KVM_XICS_MASKED;
prio = state->saved_priority;
}
val |= prio << KVM_XICS_PRIORITY_SHIFT;
if (state->lsi) {
val |= KVM_XICS_LEVEL_SENSITIVE;
if (state->saved_p)
val |= KVM_XICS_PENDING;
} else {
if (state->saved_p)
val |= KVM_XICS_PRESENTED;
if (state->saved_q)
val |= KVM_XICS_QUEUED;
if (state->in_queue || (prio == MASKED && state->saved_q))
val |= KVM_XICS_PENDING;
}
if (xive->saved_src_count == xive->src_count)
xive_post_save_scan(xive);
if (put_user(val, ubufp))
return -EFAULT;
return 0;
}
struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
struct kvmppc_xive *xive, int irq)
{
struct kvmppc_xive_src_block *sb;
int i, bid;
bid = irq >> KVMPPC_XICS_ICS_SHIFT;
mutex_lock(&xive->lock);
if (xive->src_blocks[bid])
goto out;
sb = kzalloc(sizeof(*sb), GFP_KERNEL);
if (!sb)
goto out;
sb->id = bid;
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
sb->irq_state[i].eisn = 0;
sb->irq_state[i].guest_priority = MASKED;
sb->irq_state[i].saved_priority = MASKED;
sb->irq_state[i].act_priority = MASKED;
}
smp_wmb();
xive->src_blocks[bid] = sb;
if (bid > xive->max_sbid)
xive->max_sbid = bid;
out:
mutex_unlock(&xive->lock);
return xive->src_blocks[bid];
}
static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
{
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu = NULL;
unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
if (!xc)
continue;
if (xc->delayed_irq == irq) {
xc->delayed_irq = 0;
xive->delayed_irqs--;
return true;
}
}
return false;
}
static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
{
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u64 __user *ubufp = (u64 __user *) addr;
u16 idx;
u64 val;
u8 act_prio, guest_prio;
u32 server;
int rc = 0;
if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
return -ENOENT;
pr_devel("set_source(irq=0x%lx)\n", irq);
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb) {
pr_devel("No source, creating source block...\n");
sb = kvmppc_xive_create_src_block(xive, irq);
if (!sb) {
pr_devel("Failed to create block...\n");
return -ENOMEM;
}
}
state = &sb->irq_state[idx];
if (get_user(val, ubufp)) {
pr_devel("fault getting user info !\n");
return -EFAULT;
}
server = val & KVM_XICS_DESTINATION_MASK;
guest_prio = val >> KVM_XICS_PRIORITY_SHIFT;
pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
val, server, guest_prio);
if (!state->ipi_number) {
state->ipi_number = xive_native_alloc_irq();
if (state->ipi_number == 0) {
pr_devel("Failed to allocate IPI !\n");
return -ENOMEM;
}
xive_native_populate_irq_data(state->ipi_number, &state->ipi_data);
pr_devel(" src_ipi=0x%x\n", state->ipi_number);
}
state->guest_priority = 0;
xive_lock_and_mask(xive, sb, state);
act_prio = xive_prio_from_guest(guest_prio);
state->act_priority = MASKED;
arch_spin_unlock(&sb->lock);
if (act_prio != MASKED) {
mutex_lock(&xive->lock);
rc = xive_check_provisioning(xive->kvm, act_prio);
mutex_unlock(&xive->lock);
if (rc == 0)
rc = xive_target_interrupt(xive->kvm, state,
server, act_prio);
}
if (xive->delayed_irqs && xive_check_delayed_irq(xive, irq)) {
val |= KVM_XICS_PENDING;
pr_devel(" Found delayed ! forcing PENDING !\n");
}
state->old_p = false;
state->old_q = false;
state->lsi = false;
state->asserted = false;
if (val & KVM_XICS_LEVEL_SENSITIVE) {
state->lsi = true;
if (val & KVM_XICS_PENDING)
state->asserted = true;
pr_devel(" LSI ! Asserted=%d\n", state->asserted);
}
if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
state->old_p = true;
if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
state->old_q = true;
pr_devel(" P=%d, Q=%d\n", state->old_p, state->old_q);
if (val & KVM_XICS_MASKED) {
pr_devel(" masked, saving prio\n");
state->guest_priority = MASKED;
state->saved_priority = guest_prio;
} else {
pr_devel(" unmasked, restoring to prio %d\n", guest_prio);
xive_finish_unmask(xive, sb, state, guest_prio);
state->saved_priority = guest_prio;
}
if (!state->valid)
xive->src_count++;
state->valid = true;
return 0;
}
int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
struct kvmppc_xive *xive = kvm->arch.xive;
struct kvmppc_xive_src_block *sb;
struct kvmppc_xive_irq_state *state;
u16 idx;
if (!xive)
return -ENODEV;
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb)
return -EINVAL;
state = &sb->irq_state[idx];
if (!state->valid)
return -EINVAL;
if (state->pt_number)
return -EINVAL;
if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
state->asserted = true;
else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
state->asserted = false;
return 0;
}
xive_irq_trigger(&state->ipi_data);
return 0;
}
int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
{
u32 __user *ubufp = (u32 __user *) addr;
u32 nr_servers;
int rc = 0;
if (get_user(nr_servers, ubufp))
return -EFAULT;
pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
if (!nr_servers || nr_servers > KVM_MAX_VCPU_IDS)
return -EINVAL;
mutex_lock(&xive->lock);
if (xive->vp_base != XIVE_INVALID_VP)
rc = -EBUSY;
else if (nr_servers > KVM_MAX_VCPUS)
xive->nr_servers = KVM_MAX_VCPUS;
else
xive->nr_servers = nr_servers;
mutex_unlock(&xive->lock);
return rc;
}
static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xive *xive = dev->private;
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
return xive_set_source(xive, attr->attr, attr->addr);
case KVM_DEV_XICS_GRP_CTRL:
switch (attr->attr) {
case KVM_DEV_XICS_NR_SERVERS:
return kvmppc_xive_set_nr_servers(xive, attr->addr);
}
}
return -ENXIO;
}
static int xive_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xive *xive = dev->private;
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
return xive_get_source(xive, attr->attr, attr->addr);
}
return -ENXIO;
}
static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
switch (attr->group) {
case KVM_DEV_XICS_GRP_SOURCES:
if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
attr->attr < KVMPPC_XICS_NR_IRQS)
return 0;
break;
case KVM_DEV_XICS_GRP_CTRL:
switch (attr->attr) {
case KVM_DEV_XICS_NR_SERVERS:
return 0;
}
}
return -ENXIO;
}
static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
{
xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
xive_native_configure_irq(hw_num, 0, MASKED, 0);
}
void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
{
int i;
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
if (!state->valid)
continue;
kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data);
xive_cleanup_irq_data(&state->ipi_data);
xive_native_free_irq(state->ipi_number);
if (state->pt_number)
kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data);
state->valid = false;
}
}
static void kvmppc_xive_release(struct kvm_device *dev)
{
struct kvmppc_xive *xive = dev->private;
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
unsigned long i;
pr_devel("Releasing xive device\n");
debugfs_remove(xive->dentry);
kvm_for_each_vcpu(i, vcpu, kvm) {
mutex_lock(&vcpu->mutex);
kvmppc_xive_cleanup_vcpu(vcpu);
mutex_unlock(&vcpu->mutex);
}
kvm->arch.xive = NULL;
for (i = 0; i <= xive->max_sbid; i++) {
if (xive->src_blocks[i])
kvmppc_xive_free_sources(xive->src_blocks[i]);
kfree(xive->src_blocks[i]);
xive->src_blocks[i] = NULL;
}
if (xive->vp_base != XIVE_INVALID_VP)
xive_native_free_vp_block(xive->vp_base);
kfree(dev);
}
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
{
struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ?
&kvm->arch.xive_devices.native :
&kvm->arch.xive_devices.xics_on_xive;
struct kvmppc_xive *xive = *kvm_xive_device;
if (!xive) {
xive = kzalloc(sizeof(*xive), GFP_KERNEL);
*kvm_xive_device = xive;
} else {
memset(xive, 0, sizeof(*xive));
}
return xive;
}
static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xive *xive;
struct kvm *kvm = dev->kvm;
pr_devel("Creating xive for partition\n");
if (kvm->arch.xive)
return -EEXIST;
xive = kvmppc_xive_get_device(kvm, type);
if (!xive)
return -ENOMEM;
dev->private = xive;
xive->dev = dev;
xive->kvm = kvm;
mutex_init(&xive->lock);
xive->q_order = xive_native_default_eq_shift();
if (xive->q_order < PAGE_SHIFT)
xive->q_page_order = 0;
else
xive->q_page_order = xive->q_order - PAGE_SHIFT;
xive->vp_base = XIVE_INVALID_VP;
xive->nr_servers = KVM_MAX_VCPUS;
if (xive_native_has_single_escalation())
xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
if (xive_native_has_save_restore())
xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
kvm->arch.xive = xive;
return 0;
}
int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
switch (req) {
case H_XIRR:
return xive_vm_h_xirr(vcpu);
case H_CPPR:
return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
case H_EOI:
return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
case H_IPI:
return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5));
case H_IPOLL:
return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
case H_XIRR_X:
xive_vm_h_xirr(vcpu);
kvmppc_set_gpr(vcpu, 5, get_tb() + kvmppc_get_tb_offset(vcpu));
return H_SUCCESS;
}
return H_UNSUPPORTED;
}
EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
unsigned int i;
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
u32 i0, i1, idx;
if (!q->qpage && !xc->esc_virq[i])
continue;
if (q->qpage) {
seq_printf(m, " q[%d]: ", i);
idx = q->idx;
i0 = be32_to_cpup(q->qpage + idx);
idx = (idx + 1) & q->msk;
i1 = be32_to_cpup(q->qpage + idx);
seq_printf(m, "T=%d %08x %08x...\n", q->toggle,
i0, i1);
}
if (xc->esc_virq[i]) {
struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
struct xive_irq_data *xd =
irq_data_get_irq_handler_data(d);
u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
seq_printf(m, " ESC %d %c%c EOI @%llx",
xc->esc_virq[i],
(pq & XIVE_ESB_VAL_P) ? 'P' : '-',
(pq & XIVE_ESB_VAL_Q) ? 'Q' : '-',
xd->eoi_page);
seq_puts(m, "\n");
}
}
return 0;
}
void kvmppc_xive_debug_show_sources(struct seq_file *m,
struct kvmppc_xive_src_block *sb)
{
int i;
seq_puts(m, " LISN HW/CHIP TYPE PQ EISN CPU/PRIO\n");
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
struct xive_irq_data *xd;
u64 pq;
u32 hw_num;
if (!state->valid)
continue;
kvmppc_xive_select_irq(state, &hw_num, &xd);
pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
seq_printf(m, "%08x %08x/%02x", state->number, hw_num,
xd->src_chip);
if (state->lsi)
seq_printf(m, " %cLSI", state->asserted ? '^' : ' ');
else
seq_puts(m, " MSI");
seq_printf(m, " %s %c%c %08x % 4d/%d",
state->ipi_number == hw_num ? "IPI" : " PT",
pq & XIVE_ESB_VAL_P ? 'P' : '-',
pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
state->eisn, state->act_server,
state->act_priority);
seq_puts(m, "\n");
}
}
static int xive_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xive *xive = m->private;
struct kvm *kvm = xive->kvm;
struct kvm_vcpu *vcpu;
u64 t_rm_h_xirr = 0;
u64 t_rm_h_ipoll = 0;
u64 t_rm_h_cppr = 0;
u64 t_rm_h_eoi = 0;
u64 t_rm_h_ipi = 0;
u64 t_vm_h_xirr = 0;
u64 t_vm_h_ipoll = 0;
u64 t_vm_h_cppr = 0;
u64 t_vm_h_eoi = 0;
u64 t_vm_h_ipi = 0;
unsigned long i;
if (!kvm)
return 0;
seq_puts(m, "=========\nVCPU state\n=========\n");
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
if (!xc)
continue;
seq_printf(m, "VCPU %d: VP:%#x/%02x\n"
" CPPR:%#x HWCPPR:%#x MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
xc->server_num, xc->vp_id, xc->vp_chip_id,
xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
kvmppc_xive_debug_show_queues(m, vcpu);
t_rm_h_xirr += xc->stat_rm_h_xirr;
t_rm_h_ipoll += xc->stat_rm_h_ipoll;
t_rm_h_cppr += xc->stat_rm_h_cppr;
t_rm_h_eoi += xc->stat_rm_h_eoi;
t_rm_h_ipi += xc->stat_rm_h_ipi;
t_vm_h_xirr += xc->stat_vm_h_xirr;
t_vm_h_ipoll += xc->stat_vm_h_ipoll;
t_vm_h_cppr += xc->stat_vm_h_cppr;
t_vm_h_eoi += xc->stat_vm_h_eoi;
t_vm_h_ipi += xc->stat_vm_h_ipi;
}
seq_puts(m, "Hcalls totals\n");
seq_printf(m, " H_XIRR R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr);
seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll);
seq_printf(m, " H_CPPR R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr);
seq_printf(m, " H_EOI R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi);
seq_printf(m, " H_IPI R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi);
seq_puts(m, "=========\nSources\n=========\n");
for (i = 0; i <= xive->max_sbid; i++) {
struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
if (sb) {
arch_spin_lock(&sb->lock);
kvmppc_xive_debug_show_sources(m, sb);
arch_spin_unlock(&sb->lock);
}
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(xive_debug);
static void xive_debugfs_init(struct kvmppc_xive *xive)
{
xive->dentry = debugfs_create_file("xive", S_IRUGO, xive->kvm->debugfs_dentry,
xive, &xive_debug_fops);
pr_debug("%s: created\n", __func__);
}
static void kvmppc_xive_init(struct kvm_device *dev)
{
struct kvmppc_xive *xive = dev->private;
xive_debugfs_init(xive);
}
struct kvm_device_ops kvm_xive_ops = {
.name = "kvm-xive",
.create = kvmppc_xive_create,
.init = kvmppc_xive_init,
.release = kvmppc_xive_release,
.set_attr = xive_set_attr,
.get_attr = xive_get_attr,
.has_attr = xive_has_attr,
};