Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/um/drivers/vfio_kern.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (C) 2025 Ant Group
4
* Author: Tiwei Bie <[email protected]>
5
*/
6
7
#define pr_fmt(fmt) "vfio-uml: " fmt
8
9
#include <linux/module.h>
10
#include <linux/logic_iomem.h>
11
#include <linux/mutex.h>
12
#include <linux/list.h>
13
#include <linux/string.h>
14
#include <linux/unaligned.h>
15
#include <irq_kern.h>
16
#include <init.h>
17
#include <os.h>
18
19
#include "mconsole_kern.h"
20
#include "virt-pci.h"
21
#include "vfio_user.h"
22
23
#define to_vdev(_pdev) container_of(_pdev, struct uml_vfio_device, pdev)
24
25
struct uml_vfio_intr_ctx {
26
struct uml_vfio_device *dev;
27
int irq;
28
};
29
30
struct uml_vfio_device {
31
const char *name;
32
int group;
33
34
struct um_pci_device pdev;
35
struct uml_vfio_user_device udev;
36
struct uml_vfio_intr_ctx *intr_ctx;
37
38
int msix_cap;
39
int msix_bar;
40
int msix_offset;
41
int msix_size;
42
u32 *msix_data;
43
44
struct list_head list;
45
};
46
47
struct uml_vfio_group {
48
int id;
49
int fd;
50
int users;
51
struct list_head list;
52
};
53
54
static struct {
55
int fd;
56
int users;
57
} uml_vfio_container = { .fd = -1 };
58
static DEFINE_MUTEX(uml_vfio_container_mtx);
59
60
static LIST_HEAD(uml_vfio_groups);
61
static DEFINE_MUTEX(uml_vfio_groups_mtx);
62
63
static LIST_HEAD(uml_vfio_devices);
64
static DEFINE_MUTEX(uml_vfio_devices_mtx);
65
66
static int uml_vfio_set_container(int group_fd)
67
{
68
int err;
69
70
guard(mutex)(&uml_vfio_container_mtx);
71
72
err = uml_vfio_user_set_container(uml_vfio_container.fd, group_fd);
73
if (err)
74
return err;
75
76
uml_vfio_container.users++;
77
if (uml_vfio_container.users > 1)
78
return 0;
79
80
err = uml_vfio_user_setup_iommu(uml_vfio_container.fd);
81
if (err) {
82
uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
83
uml_vfio_container.users--;
84
}
85
return err;
86
}
87
88
static void uml_vfio_unset_container(int group_fd)
89
{
90
guard(mutex)(&uml_vfio_container_mtx);
91
92
uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
93
uml_vfio_container.users--;
94
}
95
96
static int uml_vfio_open_group(int group_id)
97
{
98
struct uml_vfio_group *group;
99
int err;
100
101
guard(mutex)(&uml_vfio_groups_mtx);
102
103
list_for_each_entry(group, &uml_vfio_groups, list) {
104
if (group->id == group_id) {
105
group->users++;
106
return group->fd;
107
}
108
}
109
110
group = kzalloc(sizeof(*group), GFP_KERNEL);
111
if (!group)
112
return -ENOMEM;
113
114
group->fd = uml_vfio_user_open_group(group_id);
115
if (group->fd < 0) {
116
err = group->fd;
117
goto free_group;
118
}
119
120
err = uml_vfio_set_container(group->fd);
121
if (err)
122
goto close_group;
123
124
group->id = group_id;
125
group->users = 1;
126
127
list_add(&group->list, &uml_vfio_groups);
128
129
return group->fd;
130
131
close_group:
132
os_close_file(group->fd);
133
free_group:
134
kfree(group);
135
return err;
136
}
137
138
static int uml_vfio_release_group(int group_fd)
139
{
140
struct uml_vfio_group *group;
141
142
guard(mutex)(&uml_vfio_groups_mtx);
143
144
list_for_each_entry(group, &uml_vfio_groups, list) {
145
if (group->fd == group_fd) {
146
group->users--;
147
if (group->users == 0) {
148
uml_vfio_unset_container(group_fd);
149
os_close_file(group_fd);
150
list_del(&group->list);
151
kfree(group);
152
}
153
return 0;
154
}
155
}
156
157
return -ENOENT;
158
}
159
160
static irqreturn_t uml_vfio_interrupt(int unused, void *opaque)
161
{
162
struct uml_vfio_intr_ctx *ctx = opaque;
163
struct uml_vfio_device *dev = ctx->dev;
164
int index = ctx - dev->intr_ctx;
165
int irqfd = dev->udev.irqfd[index];
166
int irq = dev->msix_data[index];
167
uint64_t v;
168
int r;
169
170
do {
171
r = os_read_file(irqfd, &v, sizeof(v));
172
if (r == sizeof(v))
173
generic_handle_irq(irq);
174
} while (r == sizeof(v) || r == -EINTR);
175
WARN(r != -EAGAIN, "read returned %d\n", r);
176
177
return IRQ_HANDLED;
178
}
179
180
static int uml_vfio_activate_irq(struct uml_vfio_device *dev, int index)
181
{
182
struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
183
int err, irqfd;
184
185
if (ctx->irq >= 0)
186
return 0;
187
188
irqfd = uml_vfio_user_activate_irq(&dev->udev, index);
189
if (irqfd < 0)
190
return irqfd;
191
192
ctx->irq = um_request_irq(UM_IRQ_ALLOC, irqfd, IRQ_READ,
193
uml_vfio_interrupt, 0,
194
"vfio-uml", ctx);
195
if (ctx->irq < 0) {
196
err = ctx->irq;
197
goto deactivate;
198
}
199
200
err = add_sigio_fd(irqfd);
201
if (err)
202
goto free_irq;
203
204
return 0;
205
206
free_irq:
207
um_free_irq(ctx->irq, ctx);
208
ctx->irq = -1;
209
deactivate:
210
uml_vfio_user_deactivate_irq(&dev->udev, index);
211
return err;
212
}
213
214
static int uml_vfio_deactivate_irq(struct uml_vfio_device *dev, int index)
215
{
216
struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
217
218
if (ctx->irq >= 0) {
219
ignore_sigio_fd(dev->udev.irqfd[index]);
220
um_free_irq(ctx->irq, ctx);
221
uml_vfio_user_deactivate_irq(&dev->udev, index);
222
ctx->irq = -1;
223
}
224
return 0;
225
}
226
227
static int uml_vfio_update_msix_cap(struct uml_vfio_device *dev,
228
unsigned int offset, int size,
229
unsigned long val)
230
{
231
/*
232
* Here, we handle only the operations we care about,
233
* ignoring the rest.
234
*/
235
if (size == 2 && offset == dev->msix_cap + PCI_MSIX_FLAGS) {
236
switch (val & ~PCI_MSIX_FLAGS_QSIZE) {
237
case PCI_MSIX_FLAGS_ENABLE:
238
case 0:
239
return uml_vfio_user_update_irqs(&dev->udev);
240
}
241
}
242
return 0;
243
}
244
245
static int uml_vfio_update_msix_table(struct uml_vfio_device *dev,
246
unsigned int offset, int size,
247
unsigned long val)
248
{
249
int index;
250
251
/*
252
* Here, we handle only the operations we care about,
253
* ignoring the rest.
254
*/
255
offset -= dev->msix_offset + PCI_MSIX_ENTRY_DATA;
256
257
if (size != 4 || offset % PCI_MSIX_ENTRY_SIZE != 0)
258
return 0;
259
260
index = offset / PCI_MSIX_ENTRY_SIZE;
261
if (index >= dev->udev.irq_count)
262
return -EINVAL;
263
264
dev->msix_data[index] = val;
265
266
return val ? uml_vfio_activate_irq(dev, index) :
267
uml_vfio_deactivate_irq(dev, index);
268
}
269
270
static unsigned long __uml_vfio_cfgspace_read(struct uml_vfio_device *dev,
271
unsigned int offset, int size)
272
{
273
u8 data[8];
274
275
memset(data, 0xff, sizeof(data));
276
277
if (uml_vfio_user_cfgspace_read(&dev->udev, offset, data, size))
278
return ULONG_MAX;
279
280
switch (size) {
281
case 1:
282
return data[0];
283
case 2:
284
return le16_to_cpup((void *)data);
285
case 4:
286
return le32_to_cpup((void *)data);
287
#ifdef CONFIG_64BIT
288
case 8:
289
return le64_to_cpup((void *)data);
290
#endif
291
default:
292
return ULONG_MAX;
293
}
294
}
295
296
static unsigned long uml_vfio_cfgspace_read(struct um_pci_device *pdev,
297
unsigned int offset, int size)
298
{
299
struct uml_vfio_device *dev = to_vdev(pdev);
300
301
return __uml_vfio_cfgspace_read(dev, offset, size);
302
}
303
304
static void __uml_vfio_cfgspace_write(struct uml_vfio_device *dev,
305
unsigned int offset, int size,
306
unsigned long val)
307
{
308
u8 data[8];
309
310
switch (size) {
311
case 1:
312
data[0] = (u8)val;
313
break;
314
case 2:
315
put_unaligned_le16(val, (void *)data);
316
break;
317
case 4:
318
put_unaligned_le32(val, (void *)data);
319
break;
320
#ifdef CONFIG_64BIT
321
case 8:
322
put_unaligned_le64(val, (void *)data);
323
break;
324
#endif
325
}
326
327
WARN_ON(uml_vfio_user_cfgspace_write(&dev->udev, offset, data, size));
328
}
329
330
static void uml_vfio_cfgspace_write(struct um_pci_device *pdev,
331
unsigned int offset, int size,
332
unsigned long val)
333
{
334
struct uml_vfio_device *dev = to_vdev(pdev);
335
336
if (offset < dev->msix_cap + PCI_CAP_MSIX_SIZEOF &&
337
offset + size > dev->msix_cap)
338
WARN_ON(uml_vfio_update_msix_cap(dev, offset, size, val));
339
340
__uml_vfio_cfgspace_write(dev, offset, size, val);
341
}
342
343
static void uml_vfio_bar_copy_from(struct um_pci_device *pdev, int bar,
344
void *buffer, unsigned int offset, int size)
345
{
346
struct uml_vfio_device *dev = to_vdev(pdev);
347
348
memset(buffer, 0xff, size);
349
uml_vfio_user_bar_read(&dev->udev, bar, offset, buffer, size);
350
}
351
352
static unsigned long uml_vfio_bar_read(struct um_pci_device *pdev, int bar,
353
unsigned int offset, int size)
354
{
355
u8 data[8];
356
357
uml_vfio_bar_copy_from(pdev, bar, data, offset, size);
358
359
switch (size) {
360
case 1:
361
return data[0];
362
case 2:
363
return le16_to_cpup((void *)data);
364
case 4:
365
return le32_to_cpup((void *)data);
366
#ifdef CONFIG_64BIT
367
case 8:
368
return le64_to_cpup((void *)data);
369
#endif
370
default:
371
return ULONG_MAX;
372
}
373
}
374
375
static void uml_vfio_bar_copy_to(struct um_pci_device *pdev, int bar,
376
unsigned int offset, const void *buffer,
377
int size)
378
{
379
struct uml_vfio_device *dev = to_vdev(pdev);
380
381
uml_vfio_user_bar_write(&dev->udev, bar, offset, buffer, size);
382
}
383
384
static void uml_vfio_bar_write(struct um_pci_device *pdev, int bar,
385
unsigned int offset, int size,
386
unsigned long val)
387
{
388
struct uml_vfio_device *dev = to_vdev(pdev);
389
u8 data[8];
390
391
if (bar == dev->msix_bar && offset + size > dev->msix_offset &&
392
offset < dev->msix_offset + dev->msix_size)
393
WARN_ON(uml_vfio_update_msix_table(dev, offset, size, val));
394
395
switch (size) {
396
case 1:
397
data[0] = (u8)val;
398
break;
399
case 2:
400
put_unaligned_le16(val, (void *)data);
401
break;
402
case 4:
403
put_unaligned_le32(val, (void *)data);
404
break;
405
#ifdef CONFIG_64BIT
406
case 8:
407
put_unaligned_le64(val, (void *)data);
408
break;
409
#endif
410
}
411
412
uml_vfio_bar_copy_to(pdev, bar, offset, data, size);
413
}
414
415
static void uml_vfio_bar_set(struct um_pci_device *pdev, int bar,
416
unsigned int offset, u8 value, int size)
417
{
418
struct uml_vfio_device *dev = to_vdev(pdev);
419
int i;
420
421
for (i = 0; i < size; i++)
422
uml_vfio_user_bar_write(&dev->udev, bar, offset + i, &value, 1);
423
}
424
425
static const struct um_pci_ops uml_vfio_um_pci_ops = {
426
.cfgspace_read = uml_vfio_cfgspace_read,
427
.cfgspace_write = uml_vfio_cfgspace_write,
428
.bar_read = uml_vfio_bar_read,
429
.bar_write = uml_vfio_bar_write,
430
.bar_copy_from = uml_vfio_bar_copy_from,
431
.bar_copy_to = uml_vfio_bar_copy_to,
432
.bar_set = uml_vfio_bar_set,
433
};
434
435
static u8 uml_vfio_find_capability(struct uml_vfio_device *dev, u8 cap)
436
{
437
u8 id, pos;
438
u16 ent;
439
int ttl = 48; /* PCI_FIND_CAP_TTL */
440
441
pos = __uml_vfio_cfgspace_read(dev, PCI_CAPABILITY_LIST, sizeof(pos));
442
443
while (pos && ttl--) {
444
ent = __uml_vfio_cfgspace_read(dev, pos, sizeof(ent));
445
446
id = ent & 0xff;
447
if (id == 0xff)
448
break;
449
if (id == cap)
450
return pos;
451
452
pos = ent >> 8;
453
}
454
455
return 0;
456
}
457
458
static int uml_vfio_read_msix_table(struct uml_vfio_device *dev)
459
{
460
unsigned int off;
461
u16 flags;
462
u32 tbl;
463
464
off = uml_vfio_find_capability(dev, PCI_CAP_ID_MSIX);
465
if (!off)
466
return -ENOTSUPP;
467
468
dev->msix_cap = off;
469
470
tbl = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_TABLE, sizeof(tbl));
471
flags = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_FLAGS, sizeof(flags));
472
473
dev->msix_bar = tbl & PCI_MSIX_TABLE_BIR;
474
dev->msix_offset = tbl & PCI_MSIX_TABLE_OFFSET;
475
dev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * PCI_MSIX_ENTRY_SIZE;
476
477
dev->msix_data = kzalloc(dev->msix_size, GFP_KERNEL);
478
if (!dev->msix_data)
479
return -ENOMEM;
480
481
return 0;
482
}
483
484
static void uml_vfio_open_device(struct uml_vfio_device *dev)
485
{
486
struct uml_vfio_intr_ctx *ctx;
487
int err, group_id, i;
488
489
group_id = uml_vfio_user_get_group_id(dev->name);
490
if (group_id < 0) {
491
pr_err("Failed to get group id (%s), error %d\n",
492
dev->name, group_id);
493
goto free_dev;
494
}
495
496
dev->group = uml_vfio_open_group(group_id);
497
if (dev->group < 0) {
498
pr_err("Failed to open group %d (%s), error %d\n",
499
group_id, dev->name, dev->group);
500
goto free_dev;
501
}
502
503
err = uml_vfio_user_setup_device(&dev->udev, dev->group, dev->name);
504
if (err) {
505
pr_err("Failed to setup device (%s), error %d\n",
506
dev->name, err);
507
goto release_group;
508
}
509
510
err = uml_vfio_read_msix_table(dev);
511
if (err) {
512
pr_err("Failed to read MSI-X table (%s), error %d\n",
513
dev->name, err);
514
goto teardown_udev;
515
}
516
517
dev->intr_ctx = kmalloc_array(dev->udev.irq_count,
518
sizeof(struct uml_vfio_intr_ctx),
519
GFP_KERNEL);
520
if (!dev->intr_ctx) {
521
pr_err("Failed to allocate interrupt context (%s)\n",
522
dev->name);
523
goto free_msix;
524
}
525
526
for (i = 0; i < dev->udev.irq_count; i++) {
527
ctx = &dev->intr_ctx[i];
528
ctx->dev = dev;
529
ctx->irq = -1;
530
}
531
532
dev->pdev.ops = &uml_vfio_um_pci_ops;
533
534
err = um_pci_device_register(&dev->pdev);
535
if (err) {
536
pr_err("Failed to register UM PCI device (%s), error %d\n",
537
dev->name, err);
538
goto free_intr_ctx;
539
}
540
541
return;
542
543
free_intr_ctx:
544
kfree(dev->intr_ctx);
545
free_msix:
546
kfree(dev->msix_data);
547
teardown_udev:
548
uml_vfio_user_teardown_device(&dev->udev);
549
release_group:
550
uml_vfio_release_group(dev->group);
551
free_dev:
552
list_del(&dev->list);
553
kfree(dev->name);
554
kfree(dev);
555
}
556
557
static void uml_vfio_release_device(struct uml_vfio_device *dev)
558
{
559
int i;
560
561
for (i = 0; i < dev->udev.irq_count; i++)
562
uml_vfio_deactivate_irq(dev, i);
563
uml_vfio_user_update_irqs(&dev->udev);
564
565
um_pci_device_unregister(&dev->pdev);
566
kfree(dev->intr_ctx);
567
kfree(dev->msix_data);
568
uml_vfio_user_teardown_device(&dev->udev);
569
uml_vfio_release_group(dev->group);
570
list_del(&dev->list);
571
kfree(dev->name);
572
kfree(dev);
573
}
574
575
static struct uml_vfio_device *uml_vfio_find_device(const char *device)
576
{
577
struct uml_vfio_device *dev;
578
579
list_for_each_entry(dev, &uml_vfio_devices, list) {
580
if (!strcmp(dev->name, device))
581
return dev;
582
}
583
return NULL;
584
}
585
586
static struct uml_vfio_device *uml_vfio_add_device(const char *device)
587
{
588
struct uml_vfio_device *dev;
589
int fd;
590
591
guard(mutex)(&uml_vfio_devices_mtx);
592
593
if (uml_vfio_container.fd < 0) {
594
fd = uml_vfio_user_open_container();
595
if (fd < 0)
596
return ERR_PTR(fd);
597
uml_vfio_container.fd = fd;
598
}
599
600
if (uml_vfio_find_device(device))
601
return ERR_PTR(-EEXIST);
602
603
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
604
if (!dev)
605
return ERR_PTR(-ENOMEM);
606
607
dev->name = kstrdup(device, GFP_KERNEL);
608
if (!dev->name) {
609
kfree(dev);
610
return ERR_PTR(-ENOMEM);
611
}
612
613
list_add_tail(&dev->list, &uml_vfio_devices);
614
return dev;
615
}
616
617
static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp)
618
{
619
struct uml_vfio_device *dev;
620
621
dev = uml_vfio_add_device(device);
622
if (IS_ERR(dev))
623
return PTR_ERR(dev);
624
return 0;
625
}
626
627
static int uml_vfio_cmdline_get(char *buffer, const struct kernel_param *kp)
628
{
629
return 0;
630
}
631
632
static const struct kernel_param_ops uml_vfio_cmdline_param_ops = {
633
.set = uml_vfio_cmdline_set,
634
.get = uml_vfio_cmdline_get,
635
};
636
637
device_param_cb(device, &uml_vfio_cmdline_param_ops, NULL, 0400);
638
__uml_help(uml_vfio_cmdline_param_ops,
639
"vfio_uml.device=<domain:bus:slot.function>\n"
640
" Pass through a PCI device to UML via VFIO. Currently, only MSI-X\n"
641
" capable devices are supported, and it is assumed that drivers will\n"
642
" use MSI-X. This parameter can be specified multiple times to pass\n"
643
" through multiple PCI devices to UML.\n\n"
644
);
645
646
static int uml_vfio_mc_config(char *str, char **error_out)
647
{
648
struct uml_vfio_device *dev;
649
650
if (*str != '=') {
651
*error_out = "Invalid config";
652
return -EINVAL;
653
}
654
str += 1;
655
656
dev = uml_vfio_add_device(str);
657
if (IS_ERR(dev))
658
return PTR_ERR(dev);
659
uml_vfio_open_device(dev);
660
return 0;
661
}
662
663
static int uml_vfio_mc_id(char **str, int *start_out, int *end_out)
664
{
665
return -EOPNOTSUPP;
666
}
667
668
static int uml_vfio_mc_remove(int n, char **error_out)
669
{
670
return -EOPNOTSUPP;
671
}
672
673
static struct mc_device uml_vfio_mc = {
674
.list = LIST_HEAD_INIT(uml_vfio_mc.list),
675
.name = "vfio_uml.device",
676
.config = uml_vfio_mc_config,
677
.get_config = NULL,
678
.id = uml_vfio_mc_id,
679
.remove = uml_vfio_mc_remove,
680
};
681
682
static int __init uml_vfio_init(void)
683
{
684
struct uml_vfio_device *dev, *n;
685
686
sigio_broken();
687
688
/* If the opening fails, the device will be released. */
689
list_for_each_entry_safe(dev, n, &uml_vfio_devices, list)
690
uml_vfio_open_device(dev);
691
692
mconsole_register_dev(&uml_vfio_mc);
693
694
return 0;
695
}
696
late_initcall(uml_vfio_init);
697
698
static void __exit uml_vfio_exit(void)
699
{
700
struct uml_vfio_device *dev, *n;
701
702
list_for_each_entry_safe(dev, n, &uml_vfio_devices, list)
703
uml_vfio_release_device(dev);
704
705
if (uml_vfio_container.fd >= 0)
706
os_close_file(uml_vfio_container.fd);
707
}
708
module_exit(uml_vfio_exit);
709
710