Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/s390/kvm/pci.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* s390 kvm PCI passthrough support
4
*
5
* Copyright IBM Corp. 2022
6
*
7
* Author(s): Matthew Rosato <[email protected]>
8
*/
9
10
#include <linux/kvm_host.h>
11
#include <linux/pci.h>
12
#include <asm/pci.h>
13
#include <asm/pci_insn.h>
14
#include <asm/pci_io.h>
15
#include <asm/sclp.h>
16
#include "pci.h"
17
#include "kvm-s390.h"
18
19
struct zpci_aift *aift;
20
21
static inline int __set_irq_noiib(u16 ctl, u8 isc)
22
{
23
union zpci_sic_iib iib = {{0}};
24
25
return zpci_set_irq_ctrl(ctl, isc, &iib);
26
}
27
28
void kvm_s390_pci_aen_exit(void)
29
{
30
unsigned long flags;
31
struct kvm_zdev **gait_kzdev;
32
33
lockdep_assert_held(&aift->aift_lock);
34
35
/*
36
* Contents of the aipb remain registered for the life of the host
37
* kernel, the information preserved in zpci_aipb and zpci_aif_sbv
38
* in case we insert the KVM module again later. Clear the AIFT
39
* information and free anything not registered with underlying
40
* firmware.
41
*/
42
spin_lock_irqsave(&aift->gait_lock, flags);
43
gait_kzdev = aift->kzdev;
44
aift->gait = NULL;
45
aift->sbv = NULL;
46
aift->kzdev = NULL;
47
spin_unlock_irqrestore(&aift->gait_lock, flags);
48
49
kfree(gait_kzdev);
50
}
51
52
static int zpci_setup_aipb(u8 nisc)
53
{
54
struct page *page;
55
int size, rc;
56
57
zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
58
if (!zpci_aipb)
59
return -ENOMEM;
60
61
aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
62
if (!aift->sbv) {
63
rc = -ENOMEM;
64
goto free_aipb;
65
}
66
zpci_aif_sbv = aift->sbv;
67
size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
68
sizeof(struct zpci_gaite)));
69
page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
70
if (!page) {
71
rc = -ENOMEM;
72
goto free_sbv;
73
}
74
aift->gait = (struct zpci_gaite *)page_to_virt(page);
75
76
zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
77
zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
78
zpci_aipb->aipb.afi = nisc;
79
zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
80
81
/* Setup Adapter Event Notification Interpretation */
82
if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
83
rc = -EIO;
84
goto free_gait;
85
}
86
87
return 0;
88
89
free_gait:
90
free_pages((unsigned long)aift->gait, size);
91
free_sbv:
92
airq_iv_release(aift->sbv);
93
zpci_aif_sbv = NULL;
94
free_aipb:
95
kfree(zpci_aipb);
96
zpci_aipb = NULL;
97
98
return rc;
99
}
100
101
static int zpci_reset_aipb(u8 nisc)
102
{
103
/*
104
* AEN registration can only happen once per system boot. If
105
* an aipb already exists then AEN was already registered and
106
* we can reuse the aipb contents. This can only happen if
107
* the KVM module was removed and re-inserted. However, we must
108
* ensure that the same forwarding ISC is used as this is assigned
109
* during KVM module load.
110
*/
111
if (zpci_aipb->aipb.afi != nisc)
112
return -EINVAL;
113
114
aift->sbv = zpci_aif_sbv;
115
aift->gait = phys_to_virt(zpci_aipb->aipb.gait);
116
117
return 0;
118
}
119
120
int kvm_s390_pci_aen_init(u8 nisc)
121
{
122
int rc = 0;
123
124
/* If already enabled for AEN, bail out now */
125
if (aift->gait || aift->sbv)
126
return -EPERM;
127
128
mutex_lock(&aift->aift_lock);
129
aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *),
130
GFP_KERNEL);
131
if (!aift->kzdev) {
132
rc = -ENOMEM;
133
goto unlock;
134
}
135
136
if (!zpci_aipb)
137
rc = zpci_setup_aipb(nisc);
138
else
139
rc = zpci_reset_aipb(nisc);
140
if (rc)
141
goto free_zdev;
142
143
/* Enable floating IRQs */
144
if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
145
rc = -EIO;
146
kvm_s390_pci_aen_exit();
147
}
148
149
goto unlock;
150
151
free_zdev:
152
kfree(aift->kzdev);
153
unlock:
154
mutex_unlock(&aift->aift_lock);
155
return rc;
156
}
157
158
/* Modify PCI: Register floating adapter interruption forwarding */
159
static int kvm_zpci_set_airq(struct zpci_dev *zdev)
160
{
161
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
162
struct zpci_fib fib = {};
163
u8 status;
164
165
fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
166
fib.fmt0.sum = 1; /* enable summary notifications */
167
fib.fmt0.noi = airq_iv_end(zdev->aibv);
168
fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
169
fib.fmt0.aibvo = 0;
170
fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
171
fib.fmt0.aisbo = zdev->aisb & 63;
172
fib.gd = zdev->gisa;
173
174
return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
175
}
176
177
/* Modify PCI: Unregister floating adapter interruption forwarding */
178
static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
179
{
180
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
181
struct zpci_fib fib = {};
182
u8 cc, status;
183
184
fib.gd = zdev->gisa;
185
186
cc = zpci_mod_fc(req, &fib, &status);
187
if (cc == 3 || (cc == 1 && status == 24))
188
/* Function already gone or IRQs already deregistered. */
189
cc = 0;
190
191
return cc ? -EIO : 0;
192
}
193
194
static inline void unaccount_mem(unsigned long nr_pages)
195
{
196
struct user_struct *user = get_uid(current_user());
197
198
if (user)
199
atomic_long_sub(nr_pages, &user->locked_vm);
200
if (current->mm)
201
atomic64_sub(nr_pages, &current->mm->pinned_vm);
202
}
203
204
static inline int account_mem(unsigned long nr_pages)
205
{
206
struct user_struct *user = get_uid(current_user());
207
unsigned long page_limit, cur_pages, new_pages;
208
209
page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
210
211
cur_pages = atomic_long_read(&user->locked_vm);
212
do {
213
new_pages = cur_pages + nr_pages;
214
if (new_pages > page_limit)
215
return -ENOMEM;
216
} while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
217
218
atomic64_add(nr_pages, &current->mm->pinned_vm);
219
220
return 0;
221
}
222
223
static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
224
bool assist)
225
{
226
struct page *pages[1], *aibv_page, *aisb_page = NULL;
227
unsigned int msi_vecs, idx;
228
struct zpci_gaite *gaite;
229
unsigned long hva, bit;
230
struct kvm *kvm;
231
phys_addr_t gaddr;
232
int rc = 0, gisc, npages, pcount = 0;
233
234
/*
235
* Interrupt forwarding is only applicable if the device is already
236
* enabled for interpretation
237
*/
238
if (zdev->gisa == 0)
239
return -EINVAL;
240
241
kvm = zdev->kzdev->kvm;
242
msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
243
244
/* Get the associated forwarding ISC - if invalid, return the error */
245
gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
246
if (gisc < 0)
247
return gisc;
248
249
/* Replace AIBV address */
250
idx = srcu_read_lock(&kvm->srcu);
251
hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
252
npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
253
srcu_read_unlock(&kvm->srcu, idx);
254
if (npages < 1) {
255
rc = -EIO;
256
goto out;
257
}
258
aibv_page = pages[0];
259
pcount++;
260
gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
261
fib->fmt0.aibv = gaddr;
262
263
/* Pin the guest AISB if one was specified */
264
if (fib->fmt0.sum == 1) {
265
idx = srcu_read_lock(&kvm->srcu);
266
hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
267
npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
268
pages);
269
srcu_read_unlock(&kvm->srcu, idx);
270
if (npages < 1) {
271
rc = -EIO;
272
goto unpin1;
273
}
274
aisb_page = pages[0];
275
pcount++;
276
}
277
278
/* Account for pinned pages, roll back on failure */
279
if (account_mem(pcount))
280
goto unpin2;
281
282
/* AISB must be allocated before we can fill in GAITE */
283
mutex_lock(&aift->aift_lock);
284
bit = airq_iv_alloc_bit(aift->sbv);
285
if (bit == -1UL)
286
goto unlock;
287
zdev->aisb = bit; /* store the summary bit number */
288
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
289
AIRQ_IV_BITLOCK |
290
AIRQ_IV_GUESTVEC,
291
phys_to_virt(fib->fmt0.aibv));
292
293
spin_lock_irq(&aift->gait_lock);
294
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
295
sizeof(struct zpci_gaite));
296
297
/* If assist not requested, host will get all alerts */
298
if (assist)
299
gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
300
else
301
gaite->gisa = 0;
302
303
gaite->gisc = fib->fmt0.isc;
304
gaite->count++;
305
gaite->aisbo = fib->fmt0.aisbo;
306
gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
307
~PAGE_MASK));
308
aift->kzdev[zdev->aisb] = zdev->kzdev;
309
spin_unlock_irq(&aift->gait_lock);
310
311
/* Update guest FIB for re-issue */
312
fib->fmt0.aisbo = zdev->aisb & 63;
313
fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
314
fib->fmt0.isc = gisc;
315
316
/* Save some guest fib values in the host for later use */
317
zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
318
zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
319
mutex_unlock(&aift->aift_lock);
320
321
/* Issue the clp to setup the irq now */
322
rc = kvm_zpci_set_airq(zdev);
323
return rc;
324
325
unlock:
326
mutex_unlock(&aift->aift_lock);
327
unpin2:
328
if (fib->fmt0.sum == 1)
329
unpin_user_page(aisb_page);
330
unpin1:
331
unpin_user_page(aibv_page);
332
out:
333
return rc;
334
}
335
336
static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
337
{
338
struct kvm_zdev *kzdev = zdev->kzdev;
339
struct zpci_gaite *gaite;
340
struct page *vpage = NULL, *spage = NULL;
341
int rc, pcount = 0;
342
u8 isc;
343
344
if (zdev->gisa == 0)
345
return -EINVAL;
346
347
mutex_lock(&aift->aift_lock);
348
349
/*
350
* If the clear fails due to an error, leave now unless we know this
351
* device is about to go away (force) -- In that case clear the GAITE
352
* regardless.
353
*/
354
rc = kvm_zpci_clear_airq(zdev);
355
if (rc && !force)
356
goto out;
357
358
if (zdev->kzdev->fib.fmt0.aibv == 0)
359
goto out;
360
spin_lock_irq(&aift->gait_lock);
361
gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
362
sizeof(struct zpci_gaite));
363
isc = gaite->gisc;
364
gaite->count--;
365
if (gaite->count == 0) {
366
/* Release guest AIBV and AISB */
367
vpage = phys_to_page(kzdev->fib.fmt0.aibv);
368
if (gaite->aisb != 0)
369
spage = phys_to_page(gaite->aisb);
370
/* Clear the GAIT entry */
371
gaite->aisb = 0;
372
gaite->gisc = 0;
373
gaite->aisbo = 0;
374
gaite->gisa = 0;
375
aift->kzdev[zdev->aisb] = NULL;
376
/* Clear zdev info */
377
airq_iv_free_bit(aift->sbv, zdev->aisb);
378
airq_iv_release(zdev->aibv);
379
zdev->aisb = 0;
380
zdev->aibv = NULL;
381
}
382
spin_unlock_irq(&aift->gait_lock);
383
kvm_s390_gisc_unregister(kzdev->kvm, isc);
384
kzdev->fib.fmt0.isc = 0;
385
kzdev->fib.fmt0.aibv = 0;
386
387
if (vpage) {
388
unpin_user_page(vpage);
389
pcount++;
390
}
391
if (spage) {
392
unpin_user_page(spage);
393
pcount++;
394
}
395
if (pcount > 0)
396
unaccount_mem(pcount);
397
out:
398
mutex_unlock(&aift->aift_lock);
399
400
return rc;
401
}
402
403
static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
404
{
405
struct kvm_zdev *kzdev;
406
407
kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
408
if (!kzdev)
409
return -ENOMEM;
410
411
kzdev->zdev = zdev;
412
zdev->kzdev = kzdev;
413
414
return 0;
415
}
416
417
static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
418
{
419
struct kvm_zdev *kzdev;
420
421
kzdev = zdev->kzdev;
422
WARN_ON(kzdev->zdev != zdev);
423
zdev->kzdev = NULL;
424
kfree(kzdev);
425
}
426
427
428
/*
429
* Register device with the specified KVM. If interpretation facilities are
430
* available, enable them and let userspace indicate whether or not they will
431
* be used (specify SHM bit to disable).
432
*/
433
static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
434
{
435
struct zpci_dev *zdev = opaque;
436
int rc;
437
438
if (!zdev)
439
return -EINVAL;
440
441
mutex_lock(&zdev->kzdev_lock);
442
443
if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
444
mutex_unlock(&zdev->kzdev_lock);
445
return -EINVAL;
446
}
447
448
kvm_get_kvm(kvm);
449
450
mutex_lock(&kvm->lock);
451
452
rc = kvm_s390_pci_dev_open(zdev);
453
if (rc)
454
goto err;
455
456
/*
457
* If interpretation facilities aren't available, add the device to
458
* the kzdev list but don't enable for interpretation.
459
*/
460
if (!kvm_s390_pci_interp_allowed())
461
goto out;
462
463
/*
464
* If this is the first request to use an interpreted device, make the
465
* necessary vcpu changes
466
*/
467
if (!kvm->arch.use_zpci_interp)
468
kvm_s390_vcpu_pci_enable_interp(kvm);
469
470
if (zdev_enabled(zdev)) {
471
rc = zpci_disable_device(zdev);
472
if (rc)
473
goto err;
474
}
475
476
/*
477
* Store information about the identity of the kvm guest allowed to
478
* access this device via interpretation to be used by host CLP
479
*/
480
zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
481
482
rc = zpci_reenable_device(zdev);
483
if (rc)
484
goto clear_gisa;
485
486
out:
487
zdev->kzdev->kvm = kvm;
488
489
spin_lock(&kvm->arch.kzdev_list_lock);
490
list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
491
spin_unlock(&kvm->arch.kzdev_list_lock);
492
493
mutex_unlock(&kvm->lock);
494
mutex_unlock(&zdev->kzdev_lock);
495
return 0;
496
497
clear_gisa:
498
zdev->gisa = 0;
499
err:
500
if (zdev->kzdev)
501
kvm_s390_pci_dev_release(zdev);
502
mutex_unlock(&kvm->lock);
503
mutex_unlock(&zdev->kzdev_lock);
504
kvm_put_kvm(kvm);
505
return rc;
506
}
507
508
static void kvm_s390_pci_unregister_kvm(void *opaque)
509
{
510
struct zpci_dev *zdev = opaque;
511
struct kvm *kvm;
512
513
if (!zdev)
514
return;
515
516
mutex_lock(&zdev->kzdev_lock);
517
518
if (WARN_ON(!zdev->kzdev)) {
519
mutex_unlock(&zdev->kzdev_lock);
520
return;
521
}
522
523
kvm = zdev->kzdev->kvm;
524
mutex_lock(&kvm->lock);
525
526
/*
527
* A 0 gisa means interpretation was never enabled, just remove the
528
* device from the list.
529
*/
530
if (zdev->gisa == 0)
531
goto out;
532
533
/* Forwarding must be turned off before interpretation */
534
if (zdev->kzdev->fib.fmt0.aibv != 0)
535
kvm_s390_pci_aif_disable(zdev, true);
536
537
/* Remove the host CLP guest designation */
538
zdev->gisa = 0;
539
540
if (zdev_enabled(zdev)) {
541
if (zpci_disable_device(zdev))
542
goto out;
543
}
544
545
zpci_reenable_device(zdev);
546
547
out:
548
spin_lock(&kvm->arch.kzdev_list_lock);
549
list_del(&zdev->kzdev->entry);
550
spin_unlock(&kvm->arch.kzdev_list_lock);
551
kvm_s390_pci_dev_release(zdev);
552
553
mutex_unlock(&kvm->lock);
554
mutex_unlock(&zdev->kzdev_lock);
555
556
kvm_put_kvm(kvm);
557
}
558
559
void kvm_s390_pci_init_list(struct kvm *kvm)
560
{
561
spin_lock_init(&kvm->arch.kzdev_list_lock);
562
INIT_LIST_HEAD(&kvm->arch.kzdev_list);
563
}
564
565
void kvm_s390_pci_clear_list(struct kvm *kvm)
566
{
567
/*
568
* This list should already be empty, either via vfio device closures
569
* or kvm fd cleanup.
570
*/
571
spin_lock(&kvm->arch.kzdev_list_lock);
572
WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
573
spin_unlock(&kvm->arch.kzdev_list_lock);
574
}
575
576
static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
577
{
578
struct zpci_dev *zdev = NULL;
579
struct kvm_zdev *kzdev;
580
581
spin_lock(&kvm->arch.kzdev_list_lock);
582
list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
583
if (kzdev->zdev->fh == fh) {
584
zdev = kzdev->zdev;
585
break;
586
}
587
}
588
spin_unlock(&kvm->arch.kzdev_list_lock);
589
590
return zdev;
591
}
592
593
static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
594
struct kvm_s390_zpci_op *args)
595
{
596
struct zpci_fib fib = {};
597
bool hostflag;
598
599
fib.fmt0.aibv = args->u.reg_aen.ibv;
600
fib.fmt0.isc = args->u.reg_aen.isc;
601
fib.fmt0.noi = args->u.reg_aen.noi;
602
if (args->u.reg_aen.sb != 0) {
603
fib.fmt0.aisb = args->u.reg_aen.sb;
604
fib.fmt0.aisbo = args->u.reg_aen.sbo;
605
fib.fmt0.sum = 1;
606
} else {
607
fib.fmt0.aisb = 0;
608
fib.fmt0.aisbo = 0;
609
fib.fmt0.sum = 0;
610
}
611
612
hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
613
return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
614
}
615
616
int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
617
{
618
struct kvm_zdev *kzdev;
619
struct zpci_dev *zdev;
620
int r;
621
622
zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
623
if (!zdev)
624
return -ENODEV;
625
626
mutex_lock(&zdev->kzdev_lock);
627
mutex_lock(&kvm->lock);
628
629
kzdev = zdev->kzdev;
630
if (!kzdev) {
631
r = -ENODEV;
632
goto out;
633
}
634
if (kzdev->kvm != kvm) {
635
r = -EPERM;
636
goto out;
637
}
638
639
switch (args->op) {
640
case KVM_S390_ZPCIOP_REG_AEN:
641
/* Fail on unknown flags */
642
if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
643
r = -EINVAL;
644
break;
645
}
646
r = kvm_s390_pci_zpci_reg_aen(zdev, args);
647
break;
648
case KVM_S390_ZPCIOP_DEREG_AEN:
649
r = kvm_s390_pci_aif_disable(zdev, false);
650
break;
651
default:
652
r = -EINVAL;
653
}
654
655
out:
656
mutex_unlock(&kvm->lock);
657
mutex_unlock(&zdev->kzdev_lock);
658
return r;
659
}
660
661
int __init kvm_s390_pci_init(void)
662
{
663
zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
664
zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
665
666
if (!kvm_s390_pci_interp_allowed())
667
return 0;
668
669
aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
670
if (!aift)
671
return -ENOMEM;
672
673
spin_lock_init(&aift->gait_lock);
674
mutex_init(&aift->aift_lock);
675
676
return 0;
677
}
678
679
void kvm_s390_pci_exit(void)
680
{
681
zpci_kvm_hook.kvm_register = NULL;
682
zpci_kvm_hook.kvm_unregister = NULL;
683
684
if (!kvm_s390_pci_interp_allowed())
685
return;
686
687
mutex_destroy(&aift->aift_lock);
688
689
kfree(aift);
690
}
691
692