Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/kvm/book3s_xive_native.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (c) 2017-2019, IBM Corporation.
4
*/
5
6
#define pr_fmt(fmt) "xive-kvm: " fmt
7
8
#include <linux/kernel.h>
9
#include <linux/kvm_host.h>
10
#include <linux/err.h>
11
#include <linux/gfp.h>
12
#include <linux/spinlock.h>
13
#include <linux/delay.h>
14
#include <linux/file.h>
15
#include <linux/irqdomain.h>
16
#include <asm/uaccess.h>
17
#include <asm/kvm_book3s.h>
18
#include <asm/kvm_ppc.h>
19
#include <asm/hvcall.h>
20
#include <asm/xive.h>
21
#include <asm/xive-regs.h>
22
#include <asm/debug.h>
23
#include <asm/opal.h>
24
25
#include <linux/debugfs.h>
26
#include <linux/seq_file.h>
27
28
#include "book3s_xive.h"
29
30
static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
31
{
32
u64 val;
33
34
/*
35
* The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10
36
* load operation, so there is no need to enforce load-after-store
37
* ordering.
38
*/
39
40
val = in_be64(xd->eoi_mmio + offset);
41
return (u8)val;
42
}
43
44
static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
45
{
46
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
47
struct xive_q *q = &xc->queues[prio];
48
49
xive_native_disable_queue(xc->vp_id, q, prio);
50
if (q->qpage) {
51
put_page(virt_to_page(q->qpage));
52
q->qpage = NULL;
53
}
54
}
55
56
static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
57
u8 prio, __be32 *qpage,
58
u32 order, bool can_escalate)
59
{
60
int rc;
61
__be32 *qpage_prev = q->qpage;
62
63
rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
64
can_escalate);
65
if (rc)
66
return rc;
67
68
if (qpage_prev)
69
put_page(virt_to_page(qpage_prev));
70
71
return rc;
72
}
73
74
void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
75
{
76
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
77
int i;
78
79
if (!kvmppc_xive_enabled(vcpu))
80
return;
81
82
if (!xc)
83
return;
84
85
pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
86
87
/* Ensure no interrupt is still routed to that VP */
88
xc->valid = false;
89
kvmppc_xive_disable_vcpu_interrupts(vcpu);
90
91
/* Free escalations */
92
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
93
/* Free the escalation irq */
94
if (xc->esc_virq[i]) {
95
if (kvmppc_xive_has_single_escalation(xc->xive))
96
xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
97
free_irq(xc->esc_virq[i], vcpu);
98
irq_dispose_mapping(xc->esc_virq[i]);
99
kfree(xc->esc_virq_names[i]);
100
xc->esc_virq[i] = 0;
101
}
102
}
103
104
/* Disable the VP */
105
xive_native_disable_vp(xc->vp_id);
106
107
/* Clear the cam word so guest entry won't try to push context */
108
vcpu->arch.xive_cam_word = 0;
109
110
/* Free the queues */
111
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
112
kvmppc_xive_native_cleanup_queue(vcpu, i);
113
}
114
115
/* Free the VP */
116
kfree(xc);
117
118
/* Cleanup the vcpu */
119
vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
120
vcpu->arch.xive_vcpu = NULL;
121
}
122
123
int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
124
struct kvm_vcpu *vcpu, u32 server_num)
125
{
126
struct kvmppc_xive *xive = dev->private;
127
struct kvmppc_xive_vcpu *xc = NULL;
128
int rc;
129
u32 vp_id;
130
131
pr_devel("native_connect_vcpu(server=%d)\n", server_num);
132
133
if (dev->ops != &kvm_xive_native_ops) {
134
pr_devel("Wrong ops !\n");
135
return -EPERM;
136
}
137
if (xive->kvm != vcpu->kvm)
138
return -EPERM;
139
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
140
return -EBUSY;
141
142
mutex_lock(&xive->lock);
143
144
rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
145
if (rc)
146
goto bail;
147
148
xc = kzalloc(sizeof(*xc), GFP_KERNEL);
149
if (!xc) {
150
rc = -ENOMEM;
151
goto bail;
152
}
153
154
vcpu->arch.xive_vcpu = xc;
155
xc->xive = xive;
156
xc->vcpu = vcpu;
157
xc->server_num = server_num;
158
159
xc->vp_id = vp_id;
160
xc->valid = true;
161
vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
162
163
rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
164
if (rc) {
165
pr_err("Failed to get VP info from OPAL: %d\n", rc);
166
goto bail;
167
}
168
169
if (!kvmppc_xive_check_save_restore(vcpu)) {
170
pr_err("inconsistent save-restore setup for VCPU %d\n", server_num);
171
rc = -EIO;
172
goto bail;
173
}
174
175
/*
176
* Enable the VP first as the single escalation mode will
177
* affect escalation interrupts numbering
178
*/
179
rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
180
if (rc) {
181
pr_err("Failed to enable VP in OPAL: %d\n", rc);
182
goto bail;
183
}
184
185
/* Configure VCPU fields for use by assembly push/pull */
186
vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
187
vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
188
189
/* TODO: reset all queues to a clean state ? */
190
bail:
191
mutex_unlock(&xive->lock);
192
if (rc)
193
kvmppc_xive_native_cleanup_vcpu(vcpu);
194
195
return rc;
196
}
197
198
/*
199
* Device passthrough support
200
*/
201
static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
202
{
203
struct kvmppc_xive *xive = kvm->arch.xive;
204
pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
205
206
if (irq >= KVMPPC_XIVE_NR_IRQS)
207
return -EINVAL;
208
209
/*
210
* Clear the ESB pages of the IRQ number being mapped (or
211
* unmapped) into the guest and let the VM fault handler
212
* repopulate with the appropriate ESB pages (device or IC)
213
*/
214
pr_debug("clearing esb pages for girq 0x%lx\n", irq);
215
mutex_lock(&xive->mapping_lock);
216
if (xive->mapping)
217
unmap_mapping_range(xive->mapping,
218
esb_pgoff << PAGE_SHIFT,
219
2ull << PAGE_SHIFT, 1);
220
mutex_unlock(&xive->mapping_lock);
221
return 0;
222
}
223
224
static struct kvmppc_xive_ops kvmppc_xive_native_ops = {
225
.reset_mapped = kvmppc_xive_native_reset_mapped,
226
};
227
228
static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
229
{
230
struct vm_area_struct *vma = vmf->vma;
231
struct kvm_device *dev = vma->vm_file->private_data;
232
struct kvmppc_xive *xive = dev->private;
233
struct kvmppc_xive_src_block *sb;
234
struct kvmppc_xive_irq_state *state;
235
struct xive_irq_data *xd;
236
u32 hw_num;
237
u16 src;
238
u64 page;
239
unsigned long irq;
240
u64 page_offset;
241
242
/*
243
* Linux/KVM uses a two pages ESB setting, one for trigger and
244
* one for EOI
245
*/
246
page_offset = vmf->pgoff - vma->vm_pgoff;
247
irq = page_offset / 2;
248
249
sb = kvmppc_xive_find_source(xive, irq, &src);
250
if (!sb) {
251
pr_devel("%s: source %lx not found !\n", __func__, irq);
252
return VM_FAULT_SIGBUS;
253
}
254
255
state = &sb->irq_state[src];
256
257
/* Some sanity checking */
258
if (!state->valid) {
259
pr_devel("%s: source %lx invalid !\n", __func__, irq);
260
return VM_FAULT_SIGBUS;
261
}
262
263
kvmppc_xive_select_irq(state, &hw_num, &xd);
264
265
arch_spin_lock(&sb->lock);
266
267
/*
268
* first/even page is for trigger
269
* second/odd page is for EOI and management.
270
*/
271
page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
272
arch_spin_unlock(&sb->lock);
273
274
if (WARN_ON(!page)) {
275
pr_err("%s: accessing invalid ESB page for source %lx !\n",
276
__func__, irq);
277
return VM_FAULT_SIGBUS;
278
}
279
280
vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
281
return VM_FAULT_NOPAGE;
282
}
283
284
static const struct vm_operations_struct xive_native_esb_vmops = {
285
.fault = xive_native_esb_fault,
286
};
287
288
static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
289
{
290
struct vm_area_struct *vma = vmf->vma;
291
292
switch (vmf->pgoff - vma->vm_pgoff) {
293
case 0: /* HW - forbid access */
294
case 1: /* HV - forbid access */
295
return VM_FAULT_SIGBUS;
296
case 2: /* OS */
297
vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
298
return VM_FAULT_NOPAGE;
299
case 3: /* USER - TODO */
300
default:
301
return VM_FAULT_SIGBUS;
302
}
303
}
304
305
static const struct vm_operations_struct xive_native_tima_vmops = {
306
.fault = xive_native_tima_fault,
307
};
308
309
static int kvmppc_xive_native_mmap(struct kvm_device *dev,
310
struct vm_area_struct *vma)
311
{
312
struct kvmppc_xive *xive = dev->private;
313
314
/* We only allow mappings at fixed offset for now */
315
if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
316
if (vma_pages(vma) > 4)
317
return -EINVAL;
318
vma->vm_ops = &xive_native_tima_vmops;
319
} else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
320
if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
321
return -EINVAL;
322
vma->vm_ops = &xive_native_esb_vmops;
323
} else {
324
return -EINVAL;
325
}
326
327
vm_flags_set(vma, VM_IO | VM_PFNMAP);
328
vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
329
330
/*
331
* Grab the KVM device file address_space to be able to clear
332
* the ESB pages mapping when a device is passed-through into
333
* the guest.
334
*/
335
xive->mapping = vma->vm_file->f_mapping;
336
return 0;
337
}
338
339
static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
340
u64 addr)
341
{
342
struct kvmppc_xive_src_block *sb;
343
struct kvmppc_xive_irq_state *state;
344
u64 __user *ubufp = (u64 __user *) addr;
345
u64 val;
346
u16 idx;
347
int rc;
348
349
pr_devel("%s irq=0x%lx\n", __func__, irq);
350
351
if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
352
return -E2BIG;
353
354
sb = kvmppc_xive_find_source(xive, irq, &idx);
355
if (!sb) {
356
pr_debug("No source, creating source block...\n");
357
sb = kvmppc_xive_create_src_block(xive, irq);
358
if (!sb) {
359
pr_err("Failed to create block...\n");
360
return -ENOMEM;
361
}
362
}
363
state = &sb->irq_state[idx];
364
365
if (get_user(val, ubufp)) {
366
pr_err("fault getting user info !\n");
367
return -EFAULT;
368
}
369
370
arch_spin_lock(&sb->lock);
371
372
/*
373
* If the source doesn't already have an IPI, allocate
374
* one and get the corresponding data
375
*/
376
if (!state->ipi_number) {
377
state->ipi_number = xive_native_alloc_irq();
378
if (state->ipi_number == 0) {
379
pr_err("Failed to allocate IRQ !\n");
380
rc = -ENXIO;
381
goto unlock;
382
}
383
xive_native_populate_irq_data(state->ipi_number,
384
&state->ipi_data);
385
pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
386
state->ipi_number, irq);
387
}
388
389
/* Restore LSI state */
390
if (val & KVM_XIVE_LEVEL_SENSITIVE) {
391
state->lsi = true;
392
if (val & KVM_XIVE_LEVEL_ASSERTED)
393
state->asserted = true;
394
pr_devel(" LSI ! Asserted=%d\n", state->asserted);
395
}
396
397
/* Mask IRQ to start with */
398
state->act_server = 0;
399
state->act_priority = MASKED;
400
xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
401
xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
402
403
/* Increment the number of valid sources and mark this one valid */
404
if (!state->valid)
405
xive->src_count++;
406
state->valid = true;
407
408
rc = 0;
409
410
unlock:
411
arch_spin_unlock(&sb->lock);
412
413
return rc;
414
}
415
416
static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
417
struct kvmppc_xive_src_block *sb,
418
struct kvmppc_xive_irq_state *state,
419
u32 server, u8 priority, bool masked,
420
u32 eisn)
421
{
422
struct kvm *kvm = xive->kvm;
423
u32 hw_num;
424
int rc = 0;
425
426
arch_spin_lock(&sb->lock);
427
428
if (state->act_server == server && state->act_priority == priority &&
429
state->eisn == eisn)
430
goto unlock;
431
432
pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
433
priority, server, masked, state->act_server,
434
state->act_priority);
435
436
kvmppc_xive_select_irq(state, &hw_num, NULL);
437
438
if (priority != MASKED && !masked) {
439
rc = kvmppc_xive_select_target(kvm, &server, priority);
440
if (rc)
441
goto unlock;
442
443
state->act_priority = priority;
444
state->act_server = server;
445
state->eisn = eisn;
446
447
rc = xive_native_configure_irq(hw_num,
448
kvmppc_xive_vp(xive, server),
449
priority, eisn);
450
} else {
451
state->act_priority = MASKED;
452
state->act_server = 0;
453
state->eisn = 0;
454
455
rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
456
}
457
458
unlock:
459
arch_spin_unlock(&sb->lock);
460
return rc;
461
}
462
463
static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
464
long irq, u64 addr)
465
{
466
struct kvmppc_xive_src_block *sb;
467
struct kvmppc_xive_irq_state *state;
468
u64 __user *ubufp = (u64 __user *) addr;
469
u16 src;
470
u64 kvm_cfg;
471
u32 server;
472
u8 priority;
473
bool masked;
474
u32 eisn;
475
476
sb = kvmppc_xive_find_source(xive, irq, &src);
477
if (!sb)
478
return -ENOENT;
479
480
state = &sb->irq_state[src];
481
482
if (!state->valid)
483
return -EINVAL;
484
485
if (get_user(kvm_cfg, ubufp))
486
return -EFAULT;
487
488
pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
489
490
priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
491
KVM_XIVE_SOURCE_PRIORITY_SHIFT;
492
server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
493
KVM_XIVE_SOURCE_SERVER_SHIFT;
494
masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
495
KVM_XIVE_SOURCE_MASKED_SHIFT;
496
eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
497
KVM_XIVE_SOURCE_EISN_SHIFT;
498
499
if (priority != xive_prio_from_guest(priority)) {
500
pr_err("invalid priority for queue %d for VCPU %d\n",
501
priority, server);
502
return -EINVAL;
503
}
504
505
return kvmppc_xive_native_update_source_config(xive, sb, state, server,
506
priority, masked, eisn);
507
}
508
509
static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
510
long irq, u64 addr)
511
{
512
struct kvmppc_xive_src_block *sb;
513
struct kvmppc_xive_irq_state *state;
514
struct xive_irq_data *xd;
515
u32 hw_num;
516
u16 src;
517
int rc = 0;
518
519
pr_devel("%s irq=0x%lx", __func__, irq);
520
521
sb = kvmppc_xive_find_source(xive, irq, &src);
522
if (!sb)
523
return -ENOENT;
524
525
state = &sb->irq_state[src];
526
527
rc = -EINVAL;
528
529
arch_spin_lock(&sb->lock);
530
531
if (state->valid) {
532
kvmppc_xive_select_irq(state, &hw_num, &xd);
533
xive_native_sync_source(hw_num);
534
rc = 0;
535
}
536
537
arch_spin_unlock(&sb->lock);
538
return rc;
539
}
540
541
static int xive_native_validate_queue_size(u32 qshift)
542
{
543
/*
544
* We only support 64K pages for the moment. This is also
545
* advertised in the DT property "ibm,xive-eq-sizes"
546
*/
547
switch (qshift) {
548
case 0: /* EQ reset */
549
case 16:
550
return 0;
551
case 12:
552
case 21:
553
case 24:
554
default:
555
return -EINVAL;
556
}
557
}
558
559
static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
560
long eq_idx, u64 addr)
561
{
562
struct kvm *kvm = xive->kvm;
563
struct kvm_vcpu *vcpu;
564
struct kvmppc_xive_vcpu *xc;
565
void __user *ubufp = (void __user *) addr;
566
u32 server;
567
u8 priority;
568
struct kvm_ppc_xive_eq kvm_eq;
569
int rc;
570
__be32 *qaddr = NULL;
571
struct page *page;
572
struct xive_q *q;
573
gfn_t gfn;
574
unsigned long page_size;
575
int srcu_idx;
576
577
/*
578
* Demangle priority/server tuple from the EQ identifier
579
*/
580
priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
581
KVM_XIVE_EQ_PRIORITY_SHIFT;
582
server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
583
KVM_XIVE_EQ_SERVER_SHIFT;
584
585
if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
586
return -EFAULT;
587
588
vcpu = kvmppc_xive_find_server(kvm, server);
589
if (!vcpu) {
590
pr_err("Can't find server %d\n", server);
591
return -ENOENT;
592
}
593
xc = vcpu->arch.xive_vcpu;
594
595
if (priority != xive_prio_from_guest(priority)) {
596
pr_err("Trying to restore invalid queue %d for VCPU %d\n",
597
priority, server);
598
return -EINVAL;
599
}
600
q = &xc->queues[priority];
601
602
pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
603
__func__, server, priority, kvm_eq.flags,
604
kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
605
606
/* reset queue and disable queueing */
607
if (!kvm_eq.qshift) {
608
q->guest_qaddr = 0;
609
q->guest_qshift = 0;
610
611
rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
612
NULL, 0, true);
613
if (rc) {
614
pr_err("Failed to reset queue %d for VCPU %d: %d\n",
615
priority, xc->server_num, rc);
616
return rc;
617
}
618
619
return 0;
620
}
621
622
/*
623
* sPAPR specifies a "Unconditional Notify (n) flag" for the
624
* H_INT_SET_QUEUE_CONFIG hcall which forces notification
625
* without using the coalescing mechanisms provided by the
626
* XIVE END ESBs. This is required on KVM as notification
627
* using the END ESBs is not supported.
628
*/
629
if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
630
pr_err("invalid flags %d\n", kvm_eq.flags);
631
return -EINVAL;
632
}
633
634
rc = xive_native_validate_queue_size(kvm_eq.qshift);
635
if (rc) {
636
pr_err("invalid queue size %d\n", kvm_eq.qshift);
637
return rc;
638
}
639
640
if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
641
pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
642
1ull << kvm_eq.qshift);
643
return -EINVAL;
644
}
645
646
srcu_idx = srcu_read_lock(&kvm->srcu);
647
gfn = gpa_to_gfn(kvm_eq.qaddr);
648
649
page_size = kvm_host_page_size(vcpu, gfn);
650
if (1ull << kvm_eq.qshift > page_size) {
651
srcu_read_unlock(&kvm->srcu, srcu_idx);
652
pr_warn("Incompatible host page size %lx!\n", page_size);
653
return -EINVAL;
654
}
655
656
page = gfn_to_page(kvm, gfn);
657
if (!page) {
658
srcu_read_unlock(&kvm->srcu, srcu_idx);
659
pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
660
return -EINVAL;
661
}
662
663
qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
664
srcu_read_unlock(&kvm->srcu, srcu_idx);
665
666
/*
667
* Backup the queue page guest address to the mark EQ page
668
* dirty for migration.
669
*/
670
q->guest_qaddr = kvm_eq.qaddr;
671
q->guest_qshift = kvm_eq.qshift;
672
673
/*
674
* Unconditional Notification is forced by default at the
675
* OPAL level because the use of END ESBs is not supported by
676
* Linux.
677
*/
678
rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
679
(__be32 *) qaddr, kvm_eq.qshift, true);
680
if (rc) {
681
pr_err("Failed to configure queue %d for VCPU %d: %d\n",
682
priority, xc->server_num, rc);
683
put_page(page);
684
return rc;
685
}
686
687
/*
688
* Only restore the queue state when needed. When doing the
689
* H_INT_SET_SOURCE_CONFIG hcall, it should not.
690
*/
691
if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
692
rc = xive_native_set_queue_state(xc->vp_id, priority,
693
kvm_eq.qtoggle,
694
kvm_eq.qindex);
695
if (rc)
696
goto error;
697
}
698
699
rc = kvmppc_xive_attach_escalation(vcpu, priority,
700
kvmppc_xive_has_single_escalation(xive));
701
error:
702
if (rc)
703
kvmppc_xive_native_cleanup_queue(vcpu, priority);
704
return rc;
705
}
706
707
static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
708
long eq_idx, u64 addr)
709
{
710
struct kvm *kvm = xive->kvm;
711
struct kvm_vcpu *vcpu;
712
struct kvmppc_xive_vcpu *xc;
713
struct xive_q *q;
714
void __user *ubufp = (u64 __user *) addr;
715
u32 server;
716
u8 priority;
717
struct kvm_ppc_xive_eq kvm_eq;
718
u64 qaddr;
719
u64 qshift;
720
u64 qeoi_page;
721
u32 escalate_irq;
722
u64 qflags;
723
int rc;
724
725
/*
726
* Demangle priority/server tuple from the EQ identifier
727
*/
728
priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
729
KVM_XIVE_EQ_PRIORITY_SHIFT;
730
server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
731
KVM_XIVE_EQ_SERVER_SHIFT;
732
733
vcpu = kvmppc_xive_find_server(kvm, server);
734
if (!vcpu) {
735
pr_err("Can't find server %d\n", server);
736
return -ENOENT;
737
}
738
xc = vcpu->arch.xive_vcpu;
739
740
if (priority != xive_prio_from_guest(priority)) {
741
pr_err("invalid priority for queue %d for VCPU %d\n",
742
priority, server);
743
return -EINVAL;
744
}
745
q = &xc->queues[priority];
746
747
memset(&kvm_eq, 0, sizeof(kvm_eq));
748
749
if (!q->qpage)
750
return 0;
751
752
rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
753
&qeoi_page, &escalate_irq, &qflags);
754
if (rc)
755
return rc;
756
757
kvm_eq.flags = 0;
758
if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
759
kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
760
761
kvm_eq.qshift = q->guest_qshift;
762
kvm_eq.qaddr = q->guest_qaddr;
763
764
rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
765
&kvm_eq.qindex);
766
if (rc)
767
return rc;
768
769
pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
770
__func__, server, priority, kvm_eq.flags,
771
kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
772
773
if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
774
return -EFAULT;
775
776
return 0;
777
}
778
779
static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
780
{
781
int i;
782
783
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
784
struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
785
786
if (!state->valid)
787
continue;
788
789
if (state->act_priority == MASKED)
790
continue;
791
792
state->eisn = 0;
793
state->act_server = 0;
794
state->act_priority = MASKED;
795
xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
796
xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
797
if (state->pt_number) {
798
xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
799
xive_native_configure_irq(state->pt_number,
800
0, MASKED, 0);
801
}
802
}
803
}
804
805
static int kvmppc_xive_reset(struct kvmppc_xive *xive)
806
{
807
struct kvm *kvm = xive->kvm;
808
struct kvm_vcpu *vcpu;
809
unsigned long i;
810
811
pr_devel("%s\n", __func__);
812
813
mutex_lock(&xive->lock);
814
815
kvm_for_each_vcpu(i, vcpu, kvm) {
816
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
817
unsigned int prio;
818
819
if (!xc)
820
continue;
821
822
kvmppc_xive_disable_vcpu_interrupts(vcpu);
823
824
for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
825
826
/* Single escalation, no queue 7 */
827
if (prio == 7 && kvmppc_xive_has_single_escalation(xive))
828
break;
829
830
if (xc->esc_virq[prio]) {
831
free_irq(xc->esc_virq[prio], vcpu);
832
irq_dispose_mapping(xc->esc_virq[prio]);
833
kfree(xc->esc_virq_names[prio]);
834
xc->esc_virq[prio] = 0;
835
}
836
837
kvmppc_xive_native_cleanup_queue(vcpu, prio);
838
}
839
}
840
841
for (i = 0; i <= xive->max_sbid; i++) {
842
struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
843
844
if (sb) {
845
arch_spin_lock(&sb->lock);
846
kvmppc_xive_reset_sources(sb);
847
arch_spin_unlock(&sb->lock);
848
}
849
}
850
851
mutex_unlock(&xive->lock);
852
853
return 0;
854
}
855
856
static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
857
{
858
int j;
859
860
for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
861
struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
862
struct xive_irq_data *xd;
863
u32 hw_num;
864
865
if (!state->valid)
866
continue;
867
868
/*
869
* The struct kvmppc_xive_irq_state reflects the state
870
* of the EAS configuration and not the state of the
871
* source. The source is masked setting the PQ bits to
872
* '-Q', which is what is being done before calling
873
* the KVM_DEV_XIVE_EQ_SYNC control.
874
*
875
* If a source EAS is configured, OPAL syncs the XIVE
876
* IC of the source and the XIVE IC of the previous
877
* target if any.
878
*
879
* So it should be fine ignoring MASKED sources as
880
* they have been synced already.
881
*/
882
if (state->act_priority == MASKED)
883
continue;
884
885
kvmppc_xive_select_irq(state, &hw_num, &xd);
886
xive_native_sync_source(hw_num);
887
xive_native_sync_queue(hw_num);
888
}
889
}
890
891
static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
892
{
893
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
894
unsigned int prio;
895
int srcu_idx;
896
897
if (!xc)
898
return -ENOENT;
899
900
for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
901
struct xive_q *q = &xc->queues[prio];
902
903
if (!q->qpage)
904
continue;
905
906
/* Mark EQ page dirty for migration */
907
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
908
mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
909
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
910
}
911
return 0;
912
}
913
914
static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
915
{
916
struct kvm *kvm = xive->kvm;
917
struct kvm_vcpu *vcpu;
918
unsigned long i;
919
920
pr_devel("%s\n", __func__);
921
922
mutex_lock(&xive->lock);
923
for (i = 0; i <= xive->max_sbid; i++) {
924
struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
925
926
if (sb) {
927
arch_spin_lock(&sb->lock);
928
kvmppc_xive_native_sync_sources(sb);
929
arch_spin_unlock(&sb->lock);
930
}
931
}
932
933
kvm_for_each_vcpu(i, vcpu, kvm) {
934
kvmppc_xive_native_vcpu_eq_sync(vcpu);
935
}
936
mutex_unlock(&xive->lock);
937
938
return 0;
939
}
940
941
static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
942
struct kvm_device_attr *attr)
943
{
944
struct kvmppc_xive *xive = dev->private;
945
946
switch (attr->group) {
947
case KVM_DEV_XIVE_GRP_CTRL:
948
switch (attr->attr) {
949
case KVM_DEV_XIVE_RESET:
950
return kvmppc_xive_reset(xive);
951
case KVM_DEV_XIVE_EQ_SYNC:
952
return kvmppc_xive_native_eq_sync(xive);
953
case KVM_DEV_XIVE_NR_SERVERS:
954
return kvmppc_xive_set_nr_servers(xive, attr->addr);
955
}
956
break;
957
case KVM_DEV_XIVE_GRP_SOURCE:
958
return kvmppc_xive_native_set_source(xive, attr->attr,
959
attr->addr);
960
case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
961
return kvmppc_xive_native_set_source_config(xive, attr->attr,
962
attr->addr);
963
case KVM_DEV_XIVE_GRP_EQ_CONFIG:
964
return kvmppc_xive_native_set_queue_config(xive, attr->attr,
965
attr->addr);
966
case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
967
return kvmppc_xive_native_sync_source(xive, attr->attr,
968
attr->addr);
969
}
970
return -ENXIO;
971
}
972
973
static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
974
struct kvm_device_attr *attr)
975
{
976
struct kvmppc_xive *xive = dev->private;
977
978
switch (attr->group) {
979
case KVM_DEV_XIVE_GRP_EQ_CONFIG:
980
return kvmppc_xive_native_get_queue_config(xive, attr->attr,
981
attr->addr);
982
}
983
return -ENXIO;
984
}
985
986
static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
987
struct kvm_device_attr *attr)
988
{
989
switch (attr->group) {
990
case KVM_DEV_XIVE_GRP_CTRL:
991
switch (attr->attr) {
992
case KVM_DEV_XIVE_RESET:
993
case KVM_DEV_XIVE_EQ_SYNC:
994
case KVM_DEV_XIVE_NR_SERVERS:
995
return 0;
996
}
997
break;
998
case KVM_DEV_XIVE_GRP_SOURCE:
999
case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
1000
case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
1001
if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
1002
attr->attr < KVMPPC_XIVE_NR_IRQS)
1003
return 0;
1004
break;
1005
case KVM_DEV_XIVE_GRP_EQ_CONFIG:
1006
return 0;
1007
}
1008
return -ENXIO;
1009
}
1010
1011
/*
1012
* Called when device fd is closed. kvm->lock is held.
1013
*/
1014
static void kvmppc_xive_native_release(struct kvm_device *dev)
1015
{
1016
struct kvmppc_xive *xive = dev->private;
1017
struct kvm *kvm = xive->kvm;
1018
struct kvm_vcpu *vcpu;
1019
unsigned long i;
1020
1021
pr_devel("Releasing xive native device\n");
1022
1023
/*
1024
* Clear the KVM device file address_space which is used to
1025
* unmap the ESB pages when a device is passed-through.
1026
*/
1027
mutex_lock(&xive->mapping_lock);
1028
xive->mapping = NULL;
1029
mutex_unlock(&xive->mapping_lock);
1030
1031
/*
1032
* Since this is the device release function, we know that
1033
* userspace does not have any open fd or mmap referring to
1034
* the device. Therefore there can not be any of the
1035
* device attribute set/get, mmap, or page fault functions
1036
* being executed concurrently, and similarly, the
1037
* connect_vcpu and set/clr_mapped functions also cannot
1038
* be being executed.
1039
*/
1040
1041
debugfs_remove(xive->dentry);
1042
1043
/*
1044
* We should clean up the vCPU interrupt presenters first.
1045
*/
1046
kvm_for_each_vcpu(i, vcpu, kvm) {
1047
/*
1048
* Take vcpu->mutex to ensure that no one_reg get/set ioctl
1049
* (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
1050
* Holding the vcpu->mutex also means that the vcpu cannot
1051
* be executing the KVM_RUN ioctl, and therefore it cannot
1052
* be executing the XIVE push or pull code or accessing
1053
* the XIVE MMIO regions.
1054
*/
1055
mutex_lock(&vcpu->mutex);
1056
kvmppc_xive_native_cleanup_vcpu(vcpu);
1057
mutex_unlock(&vcpu->mutex);
1058
}
1059
1060
/*
1061
* Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1062
* and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1063
* against xive code getting called during vcpu execution or
1064
* set/get one_reg operations.
1065
*/
1066
kvm->arch.xive = NULL;
1067
1068
for (i = 0; i <= xive->max_sbid; i++) {
1069
if (xive->src_blocks[i])
1070
kvmppc_xive_free_sources(xive->src_blocks[i]);
1071
kfree(xive->src_blocks[i]);
1072
xive->src_blocks[i] = NULL;
1073
}
1074
1075
if (xive->vp_base != XIVE_INVALID_VP)
1076
xive_native_free_vp_block(xive->vp_base);
1077
1078
/*
1079
* A reference of the kvmppc_xive pointer is now kept under
1080
* the xive_devices struct of the machine for reuse. It is
1081
* freed when the VM is destroyed for now until we fix all the
1082
* execution paths.
1083
*/
1084
1085
kfree(dev);
1086
}
1087
1088
/*
1089
* Create a XIVE device. kvm->lock is held.
1090
*/
1091
static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1092
{
1093
struct kvmppc_xive *xive;
1094
struct kvm *kvm = dev->kvm;
1095
1096
pr_devel("Creating xive native device\n");
1097
1098
if (kvm->arch.xive)
1099
return -EEXIST;
1100
1101
xive = kvmppc_xive_get_device(kvm, type);
1102
if (!xive)
1103
return -ENOMEM;
1104
1105
dev->private = xive;
1106
xive->dev = dev;
1107
xive->kvm = kvm;
1108
mutex_init(&xive->mapping_lock);
1109
mutex_init(&xive->lock);
1110
1111
/* VP allocation is delayed to the first call to connect_vcpu */
1112
xive->vp_base = XIVE_INVALID_VP;
1113
/* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
1114
* on a POWER9 system.
1115
*/
1116
xive->nr_servers = KVM_MAX_VCPUS;
1117
1118
if (xive_native_has_single_escalation())
1119
xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
1120
1121
if (xive_native_has_save_restore())
1122
xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
1123
1124
xive->ops = &kvmppc_xive_native_ops;
1125
1126
kvm->arch.xive = xive;
1127
return 0;
1128
}
1129
1130
/*
1131
* Interrupt Pending Buffer (IPB) offset
1132
*/
1133
#define TM_IPB_SHIFT 40
1134
#define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT)
1135
1136
int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1137
{
1138
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1139
u64 opal_state;
1140
int rc;
1141
1142
if (!kvmppc_xive_enabled(vcpu))
1143
return -EPERM;
1144
1145
if (!xc)
1146
return -ENOENT;
1147
1148
/* Thread context registers. We only care about IPB and CPPR */
1149
val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1150
1151
/* Get the VP state from OPAL */
1152
rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1153
if (rc)
1154
return rc;
1155
1156
/*
1157
* Capture the backup of IPB register in the NVT structure and
1158
* merge it in our KVM VP state.
1159
*/
1160
val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1161
1162
pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1163
__func__,
1164
vcpu->arch.xive_saved_state.nsr,
1165
vcpu->arch.xive_saved_state.cppr,
1166
vcpu->arch.xive_saved_state.ipb,
1167
vcpu->arch.xive_saved_state.pipr,
1168
vcpu->arch.xive_saved_state.w01,
1169
(u32) vcpu->arch.xive_cam_word, opal_state);
1170
1171
return 0;
1172
}
1173
1174
int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1175
{
1176
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1177
struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1178
1179
pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1180
val->xive_timaval[0], val->xive_timaval[1]);
1181
1182
if (!kvmppc_xive_enabled(vcpu))
1183
return -EPERM;
1184
1185
if (!xc || !xive)
1186
return -ENOENT;
1187
1188
/* We can't update the state of a "pushed" VCPU */
1189
if (WARN_ON(vcpu->arch.xive_pushed))
1190
return -EBUSY;
1191
1192
/*
1193
* Restore the thread context registers. IPB and CPPR should
1194
* be the only ones that matter.
1195
*/
1196
vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1197
1198
/*
1199
* There is no need to restore the XIVE internal state (IPB
1200
* stored in the NVT) as the IPB register was merged in KVM VP
1201
* state when captured.
1202
*/
1203
return 0;
1204
}
1205
1206
bool kvmppc_xive_native_supported(void)
1207
{
1208
return xive_native_has_queue_state_support();
1209
}
1210
1211
static int xive_native_debug_show(struct seq_file *m, void *private)
1212
{
1213
struct kvmppc_xive *xive = m->private;
1214
struct kvm *kvm = xive->kvm;
1215
struct kvm_vcpu *vcpu;
1216
unsigned long i;
1217
1218
if (!kvm)
1219
return 0;
1220
1221
seq_puts(m, "=========\nVCPU state\n=========\n");
1222
1223
kvm_for_each_vcpu(i, vcpu, kvm) {
1224
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1225
1226
if (!xc)
1227
continue;
1228
1229
seq_printf(m, "VCPU %d: VP=%#x/%02x\n"
1230
" NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1231
xc->server_num, xc->vp_id, xc->vp_chip_id,
1232
vcpu->arch.xive_saved_state.nsr,
1233
vcpu->arch.xive_saved_state.cppr,
1234
vcpu->arch.xive_saved_state.ipb,
1235
vcpu->arch.xive_saved_state.pipr,
1236
be64_to_cpu(vcpu->arch.xive_saved_state.w01),
1237
be32_to_cpu(vcpu->arch.xive_cam_word));
1238
1239
kvmppc_xive_debug_show_queues(m, vcpu);
1240
}
1241
1242
seq_puts(m, "=========\nSources\n=========\n");
1243
1244
for (i = 0; i <= xive->max_sbid; i++) {
1245
struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1246
1247
if (sb) {
1248
arch_spin_lock(&sb->lock);
1249
kvmppc_xive_debug_show_sources(m, sb);
1250
arch_spin_unlock(&sb->lock);
1251
}
1252
}
1253
1254
return 0;
1255
}
1256
1257
DEFINE_SHOW_ATTRIBUTE(xive_native_debug);
1258
1259
static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1260
{
1261
xive->dentry = debugfs_create_file("xive", 0444, xive->kvm->debugfs_dentry,
1262
xive, &xive_native_debug_fops);
1263
1264
pr_debug("%s: created\n", __func__);
1265
}
1266
1267
static void kvmppc_xive_native_init(struct kvm_device *dev)
1268
{
1269
struct kvmppc_xive *xive = dev->private;
1270
1271
/* Register some debug interfaces */
1272
xive_native_debugfs_init(xive);
1273
}
1274
1275
struct kvm_device_ops kvm_xive_native_ops = {
1276
.name = "kvm-xive-native",
1277
.create = kvmppc_xive_native_create,
1278
.init = kvmppc_xive_native_init,
1279
.release = kvmppc_xive_native_release,
1280
.set_attr = kvmppc_xive_native_set_attr,
1281
.get_attr = kvmppc_xive_native_get_attr,
1282
.has_attr = kvmppc_xive_native_has_attr,
1283
.mmap = kvmppc_xive_native_mmap,
1284
};
1285
1286