Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/usr.sbin/bhyve/amd64/pci_gvt-d.c
108696 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
5
* Author: Corvin Köhne <[email protected]>
6
*/
7
8
#include <sys/types.h>
9
#include <sys/mman.h>
10
#include <sys/sysctl.h>
11
12
#include <dev/pci/pcireg.h>
13
14
#include <err.h>
15
#include <errno.h>
16
#include <fcntl.h>
17
#include <string.h>
18
#include <unistd.h>
19
20
#include "amd64/e820.h"
21
#include "pci_gvt-d-opregion.h"
22
#include "pci_passthru.h"
23
#include "pciids_intel_gpus.h"
24
25
#define KB (1024UL)
26
#define MB (1024 * KB)
27
#define GB (1024 * MB)
28
29
#ifndef _PATH_MEM
30
#define _PATH_MEM "/dev/mem"
31
#endif
32
33
#define PCI_VENDOR_INTEL 0x8086
34
35
#define PCIR_BDSM 0x5C /* Base of Data Stolen Memory register */
36
#define PCIR_BDSM_GEN11 0xC0
37
#define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
38
39
#define PCIM_BDSM_GSM_ALIGNMENT \
40
0x00100000 /* Graphics Stolen Memory is 1 MB aligned */
41
42
#define BDSM_GEN11_MMIO_ADDRESS 0x1080C0
43
44
#define GVT_D_MAP_GSM 0
45
#define GVT_D_MAP_OPREGION 1
46
#define GVT_D_MAP_VBT 2
47
48
static uint64_t
49
gvt_d_dsmbase_read(struct pci_devinst *pi, int baridx __unused, uint64_t offset,
50
int size)
51
{
52
switch (size) {
53
case 1:
54
return (pci_get_cfgdata8(pi, PCIR_BDSM_GEN11 + offset));
55
case 2:
56
return (pci_get_cfgdata16(pi, PCIR_BDSM_GEN11 + offset));
57
case 4:
58
return (pci_get_cfgdata32(pi, PCIR_BDSM_GEN11 + offset));
59
default:
60
return (UINT64_MAX);
61
}
62
}
63
64
static void
65
gvt_d_dsmbase_write(struct pci_devinst *pi, int baridx __unused,
66
uint64_t offset, int size, uint64_t val)
67
{
68
switch (size) {
69
case 1:
70
pci_set_cfgdata8(pi, PCIR_BDSM_GEN11 + offset, val);
71
break;
72
case 2:
73
pci_set_cfgdata16(pi, PCIR_BDSM_GEN11 + offset, val);
74
break;
75
case 4:
76
pci_set_cfgdata32(pi, PCIR_BDSM_GEN11 + offset, val);
77
break;
78
default:
79
break;
80
}
81
}
82
83
static int
84
set_bdsm_gen3(struct pci_devinst *const pi, vm_paddr_t bdsm_gpa)
85
{
86
struct passthru_softc *sc = pi->pi_arg;
87
uint32_t bdsm;
88
int error;
89
90
bdsm = pci_host_read_config(passthru_get_sel(sc), PCIR_BDSM, 4);
91
92
/* Protect the BDSM register in PCI space. */
93
pci_set_cfgdata32(pi, PCIR_BDSM,
94
bdsm_gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1)));
95
error = set_pcir_handler(sc, PCIR_BDSM, 4, passthru_cfgread_emulate,
96
passthru_cfgwrite_emulate);
97
if (error) {
98
warnx("%s: Failed to setup handler for BDSM register!", __func__);
99
return (error);
100
}
101
102
return (0);
103
}
104
105
static int
106
set_bdsm_gen11(struct pci_devinst *const pi, vm_paddr_t bdsm_gpa)
107
{
108
struct passthru_softc *sc = pi->pi_arg;
109
uint64_t bdsm;
110
int error;
111
112
bdsm = pci_host_read_config(passthru_get_sel(sc), PCIR_BDSM_GEN11, 8);
113
114
/* Protect the BDSM register in PCI space. */
115
pci_set_cfgdata32(pi, PCIR_BDSM_GEN11,
116
bdsm_gpa | (bdsm & (PCIM_BDSM_GSM_ALIGNMENT - 1)));
117
pci_set_cfgdata32(pi, PCIR_BDSM_GEN11 + 4, bdsm_gpa >> 32);
118
error = set_pcir_handler(sc, PCIR_BDSM_GEN11, 8, passthru_cfgread_emulate,
119
passthru_cfgwrite_emulate);
120
if (error) {
121
warnx("%s: Failed to setup handler for BDSM register!\n", __func__);
122
return (error);
123
}
124
125
/* Protect the BDSM register in MMIO space. */
126
error = passthru_set_bar_handler(sc, 0, BDSM_GEN11_MMIO_ADDRESS, sizeof(uint64_t),
127
gvt_d_dsmbase_read, gvt_d_dsmbase_write);
128
if (error) {
129
warnx("%s: Failed to setup handler for BDSM mirror!\n", __func__);
130
return (error);
131
}
132
133
return (0);
134
}
135
136
struct igd_ops {
137
int (*set_bdsm)(struct pci_devinst *const pi, vm_paddr_t bdsm_gpa);
138
};
139
140
static const struct igd_ops igd_ops_gen3 = { .set_bdsm = set_bdsm_gen3 };
141
142
static const struct igd_ops igd_ops_gen11 = { .set_bdsm = set_bdsm_gen11 };
143
144
struct igd_device {
145
uint32_t device_id;
146
const struct igd_ops *ops;
147
};
148
149
#define IGD_DEVICE(_device_id, _ops) \
150
{ \
151
.device_id = (_device_id), \
152
.ops = (_ops), \
153
}
154
155
static const struct igd_device igd_devices[] = {
156
INTEL_I915G_IDS(IGD_DEVICE, &igd_ops_gen3),
157
INTEL_I915GM_IDS(IGD_DEVICE, &igd_ops_gen3),
158
INTEL_I945G_IDS(IGD_DEVICE, &igd_ops_gen3),
159
INTEL_I945GM_IDS(IGD_DEVICE, &igd_ops_gen3),
160
INTEL_VLV_IDS(IGD_DEVICE, &igd_ops_gen3),
161
INTEL_PNV_IDS(IGD_DEVICE, &igd_ops_gen3),
162
INTEL_I965GM_IDS(IGD_DEVICE, &igd_ops_gen3),
163
INTEL_GM45_IDS(IGD_DEVICE, &igd_ops_gen3),
164
INTEL_G45_IDS(IGD_DEVICE, &igd_ops_gen3),
165
INTEL_ILK_IDS(IGD_DEVICE, &igd_ops_gen3),
166
INTEL_SNB_IDS(IGD_DEVICE, &igd_ops_gen3),
167
INTEL_IVB_IDS(IGD_DEVICE, &igd_ops_gen3),
168
INTEL_HSW_IDS(IGD_DEVICE, &igd_ops_gen3),
169
INTEL_BDW_IDS(IGD_DEVICE, &igd_ops_gen3),
170
INTEL_CHV_IDS(IGD_DEVICE, &igd_ops_gen3),
171
INTEL_SKL_IDS(IGD_DEVICE, &igd_ops_gen3),
172
INTEL_BXT_IDS(IGD_DEVICE, &igd_ops_gen3),
173
INTEL_KBL_IDS(IGD_DEVICE, &igd_ops_gen3),
174
INTEL_CFL_IDS(IGD_DEVICE, &igd_ops_gen3),
175
INTEL_WHL_IDS(IGD_DEVICE, &igd_ops_gen3),
176
INTEL_CML_IDS(IGD_DEVICE, &igd_ops_gen3),
177
INTEL_GLK_IDS(IGD_DEVICE, &igd_ops_gen3),
178
INTEL_CNL_IDS(IGD_DEVICE, &igd_ops_gen3),
179
INTEL_ICL_IDS(IGD_DEVICE, &igd_ops_gen11),
180
INTEL_EHL_IDS(IGD_DEVICE, &igd_ops_gen11),
181
INTEL_JSL_IDS(IGD_DEVICE, &igd_ops_gen11),
182
INTEL_TGL_IDS(IGD_DEVICE, &igd_ops_gen11),
183
INTEL_RKL_IDS(IGD_DEVICE, &igd_ops_gen11),
184
INTEL_ADLS_IDS(IGD_DEVICE, &igd_ops_gen11),
185
INTEL_ADLP_IDS(IGD_DEVICE, &igd_ops_gen11),
186
INTEL_ADLN_IDS(IGD_DEVICE, &igd_ops_gen11),
187
INTEL_RPLS_IDS(IGD_DEVICE, &igd_ops_gen11),
188
INTEL_RPLU_IDS(IGD_DEVICE, &igd_ops_gen11),
189
INTEL_RPLP_IDS(IGD_DEVICE, &igd_ops_gen11),
190
};
191
192
static const struct igd_ops *
193
get_igd_ops(struct pci_devinst *const pi)
194
{
195
struct passthru_softc *sc = pi->pi_arg;
196
uint16_t device_id;
197
198
device_id = pci_host_read_config(passthru_get_sel(sc), PCIR_DEVICE,
199
0x02);
200
for (size_t i = 0; i < nitems(igd_devices); i++) {
201
if (igd_devices[i].device_id != device_id)
202
continue;
203
204
return (igd_devices[i].ops);
205
}
206
207
return (NULL);
208
}
209
210
static int
211
gvt_d_probe(struct pci_devinst *const pi)
212
{
213
struct passthru_softc *sc;
214
uint16_t vendor;
215
uint8_t class;
216
217
sc = pi->pi_arg;
218
219
vendor = pci_host_read_config(passthru_get_sel(sc), PCIR_VENDOR, 0x02);
220
if (vendor != PCI_VENDOR_INTEL)
221
return (ENXIO);
222
223
class = pci_host_read_config(passthru_get_sel(sc), PCIR_CLASS, 0x01);
224
if (class != PCIC_DISPLAY)
225
return (ENXIO);
226
227
return (0);
228
}
229
230
static vm_paddr_t
231
gvt_d_alloc_mmio_memory(const vm_paddr_t host_address, const vm_paddr_t length,
232
const vm_paddr_t alignment, const enum e820_memory_type type)
233
{
234
vm_paddr_t address;
235
236
/* Try to reuse host address. */
237
address = e820_alloc(host_address, length, E820_ALIGNMENT_NONE, type,
238
E820_ALLOCATE_SPECIFIC);
239
if (address != 0) {
240
return (address);
241
}
242
243
/*
244
* We're not able to reuse the host address. Fall back to the highest usable
245
* address below 4 GB.
246
*/
247
return (
248
e820_alloc(4 * GB, length, alignment, type, E820_ALLOCATE_HIGHEST));
249
}
250
251
/*
252
* Note that the graphics stolen memory is somehow confusing. On the one hand
253
* the Intel Open Source HD Graphics Programmers' Reference Manual states that
254
* it's only GPU accessible. As the CPU can't access the area, the guest
255
* shouldn't need it. On the other hand, the Intel GOP driver refuses to work
256
* properly, if it's not set to a proper address.
257
*
258
* Intel itself maps it into the guest by EPT [1]. At the moment, we're not
259
* aware of any situation where this EPT mapping is required, so we don't do it
260
* yet.
261
*
262
* Intel also states that the Windows driver for Tiger Lake reads the address of
263
* the graphics stolen memory [2]. As the GVT-d code doesn't support Tiger Lake
264
* in its first implementation, we can't check how it behaves. We should keep an
265
* eye on it.
266
*
267
* [1]
268
* https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L655-L657
269
* [2]
270
* https://github.com/projectacrn/acrn-hypervisor/blob/e28d6fbfdfd556ff1bc3ff330e41d4ddbaa0f897/devicemodel/hw/pci/passthrough.c#L626-L629
271
*/
272
static int
273
gvt_d_setup_gsm(struct pci_devinst *const pi)
274
{
275
struct passthru_softc *sc;
276
struct passthru_mmio_mapping *gsm;
277
const struct igd_ops *igd_ops;
278
size_t sysctl_len;
279
int error;
280
281
sc = pi->pi_arg;
282
283
gsm = passthru_get_mmio(sc, GVT_D_MAP_GSM);
284
if (gsm == NULL) {
285
warnx("%s: Unable to access gsm", __func__);
286
return (-1);
287
}
288
289
sysctl_len = sizeof(gsm->hpa);
290
error = sysctlbyname("hw.intel_graphics_stolen_base", &gsm->hpa,
291
&sysctl_len, NULL, 0);
292
if (error) {
293
warn("%s: Unable to get graphics stolen memory base",
294
__func__);
295
return (-1);
296
}
297
sysctl_len = sizeof(gsm->len);
298
error = sysctlbyname("hw.intel_graphics_stolen_size", &gsm->len,
299
&sysctl_len, NULL, 0);
300
if (error) {
301
warn("%s: Unable to get graphics stolen memory length",
302
__func__);
303
return (-1);
304
}
305
gsm->hva = NULL; /* unused */
306
gsm->gva = NULL; /* unused */
307
gsm->gpa = gvt_d_alloc_mmio_memory(gsm->hpa, gsm->len,
308
PCIM_BDSM_GSM_ALIGNMENT, E820_TYPE_RESERVED);
309
if (gsm->gpa == 0) {
310
warnx(
311
"%s: Unable to add Graphics Stolen Memory to E820 table (hpa 0x%lx len 0x%lx)",
312
__func__, gsm->hpa, gsm->len);
313
e820_dump_table();
314
return (-1);
315
}
316
if (gsm->gpa != gsm->hpa) {
317
/*
318
* ACRN source code implies that graphics driver for newer Intel
319
* platforms like Tiger Lake will read the Graphics Stolen Memory
320
* address from an MMIO register. We have three options to solve this
321
* issue:
322
* 1. Patch the value in the MMIO register
323
* This could have unintended side effects. Without any
324
* documentation how this register is used by the GPU, don't do
325
* it.
326
* 2. Trap the MMIO register
327
* It's not possible to trap a single MMIO register. We need to
328
* trap a whole page. Trapping a bunch of MMIO register could
329
* degrade the performance noticeably. We have to test it.
330
* 3. Use an 1:1 host to guest mapping
331
* Maybe not always possible. As far as we know, no supported
332
* platform requires a 1:1 mapping. For that reason, just log a
333
* warning.
334
*/
335
warnx(
336
"Warning: Unable to reuse host address of Graphics Stolen Memory. GPU passthrough might not work properly.");
337
}
338
339
igd_ops = get_igd_ops(pi);
340
if (igd_ops == NULL) {
341
warn("%s: Unknown IGD device. It's not supported yet!",
342
__func__);
343
return (-1);
344
}
345
346
return (igd_ops->set_bdsm(pi, gsm->gpa));
347
}
348
349
static int
350
gvt_d_setup_vbt(struct pci_devinst *const pi, int memfd, uint64_t vbt_hpa,
351
uint64_t vbt_len, vm_paddr_t *vbt_gpa)
352
{
353
struct passthru_softc *sc;
354
struct passthru_mmio_mapping *vbt;
355
356
sc = pi->pi_arg;
357
358
vbt = passthru_get_mmio(sc, GVT_D_MAP_VBT);
359
if (vbt == NULL) {
360
warnx("%s: Unable to access VBT", __func__);
361
return (-1);
362
}
363
364
vbt->hpa = vbt_hpa;
365
vbt->len = vbt_len;
366
367
vbt->hva = mmap(NULL, vbt->len, PROT_READ, MAP_SHARED, memfd, vbt->hpa);
368
if (vbt->hva == MAP_FAILED) {
369
warn("%s: Unable to map VBT", __func__);
370
return (-1);
371
}
372
373
vbt->gpa = gvt_d_alloc_mmio_memory(vbt->hpa, vbt->len,
374
E820_ALIGNMENT_NONE, E820_TYPE_NVS);
375
if (vbt->gpa == 0) {
376
warnx(
377
"%s: Unable to add VBT to E820 table (hpa 0x%lx len 0x%lx)",
378
__func__, vbt->hpa, vbt->len);
379
munmap(vbt->hva, vbt->len);
380
e820_dump_table();
381
return (-1);
382
}
383
vbt->gva = vm_map_gpa(pi->pi_vmctx, vbt->gpa, vbt->len);
384
if (vbt->gva == NULL) {
385
warnx("%s: Unable to map guest VBT", __func__);
386
munmap(vbt->hva, vbt->len);
387
return (-1);
388
}
389
390
if (vbt->gpa != vbt->hpa) {
391
/*
392
* A 1:1 host to guest mapping is not required but this could
393
* change in the future.
394
*/
395
warnx(
396
"Warning: Unable to reuse host address of VBT. GPU passthrough might not work properly.");
397
}
398
399
memcpy(vbt->gva, vbt->hva, vbt->len);
400
401
/*
402
* Return the guest physical address. It's used to patch the OpRegion
403
* properly.
404
*/
405
*vbt_gpa = vbt->gpa;
406
407
return (0);
408
}
409
410
static int
411
gvt_d_setup_opregion(struct pci_devinst *const pi)
412
{
413
struct passthru_softc *sc;
414
struct passthru_mmio_mapping *opregion;
415
struct igd_opregion *opregion_ptr;
416
struct igd_opregion_header *header;
417
vm_paddr_t vbt_gpa = 0;
418
vm_paddr_t vbt_hpa;
419
uint64_t asls;
420
int error = 0;
421
int memfd;
422
423
sc = pi->pi_arg;
424
425
memfd = open(_PATH_MEM, O_RDONLY, 0);
426
if (memfd < 0) {
427
warn("%s: Failed to open %s", __func__, _PATH_MEM);
428
return (-1);
429
}
430
431
opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION);
432
if (opregion == NULL) {
433
warnx("%s: Unable to access opregion", __func__);
434
close(memfd);
435
return (-1);
436
}
437
438
asls = pci_host_read_config(passthru_get_sel(sc), PCIR_ASLS_CTL, 4);
439
440
header = mmap(NULL, sizeof(*header), PROT_READ, MAP_SHARED, memfd,
441
asls);
442
if (header == MAP_FAILED) {
443
warn("%s: Unable to map OpRegion header", __func__);
444
close(memfd);
445
return (-1);
446
}
447
if (memcmp(header->sign, IGD_OPREGION_HEADER_SIGN,
448
sizeof(header->sign)) != 0) {
449
warnx("%s: Invalid OpRegion signature", __func__);
450
munmap(header, sizeof(*header));
451
close(memfd);
452
return (-1);
453
}
454
455
opregion->hpa = asls;
456
opregion->len = header->size * KB;
457
munmap(header, sizeof(*header));
458
459
if (opregion->len != sizeof(struct igd_opregion)) {
460
warnx("%s: Invalid OpRegion size of 0x%lx", __func__,
461
opregion->len);
462
close(memfd);
463
return (-1);
464
}
465
466
opregion->hva = mmap(NULL, opregion->len, PROT_READ, MAP_SHARED, memfd,
467
opregion->hpa);
468
if (opregion->hva == MAP_FAILED) {
469
warn("%s: Unable to map host OpRegion", __func__);
470
close(memfd);
471
return (-1);
472
}
473
474
opregion_ptr = (struct igd_opregion *)opregion->hva;
475
if (opregion_ptr->mbox3.rvda != 0) {
476
/*
477
* OpRegion v2.0 contains a physical address to the VBT. This
478
* address is useless in a guest environment. It's possible to
479
* patch that but we don't support that yet. So, the only thing
480
* we can do is give up.
481
*/
482
if (opregion_ptr->header.over == 0x02000000) {
483
warnx(
484
"%s: VBT lays outside OpRegion. That's not yet supported for a version 2.0 OpRegion",
485
__func__);
486
close(memfd);
487
return (-1);
488
}
489
vbt_hpa = opregion->hpa + opregion_ptr->mbox3.rvda;
490
if (vbt_hpa < opregion->hpa) {
491
warnx(
492
"%s: overflow when calculating VBT address (OpRegion @ 0x%lx, RVDA = 0x%lx)",
493
__func__, opregion->hpa, opregion_ptr->mbox3.rvda);
494
close(memfd);
495
return (-1);
496
}
497
498
if ((error = gvt_d_setup_vbt(pi, memfd, vbt_hpa,
499
opregion_ptr->mbox3.rvds, &vbt_gpa)) != 0) {
500
close(memfd);
501
return (error);
502
}
503
}
504
505
close(memfd);
506
507
opregion->gpa = gvt_d_alloc_mmio_memory(opregion->hpa, opregion->len,
508
E820_ALIGNMENT_NONE, E820_TYPE_NVS);
509
if (opregion->gpa == 0) {
510
warnx(
511
"%s: Unable to add OpRegion to E820 table (hpa 0x%lx len 0x%lx)",
512
__func__, opregion->hpa, opregion->len);
513
e820_dump_table();
514
return (-1);
515
}
516
opregion->gva = vm_map_gpa(pi->pi_vmctx, opregion->gpa, opregion->len);
517
if (opregion->gva == NULL) {
518
warnx("%s: Unable to map guest OpRegion", __func__);
519
return (-1);
520
}
521
if (opregion->gpa != opregion->hpa) {
522
/*
523
* A 1:1 host to guest mapping is not required but this could
524
* change in the future.
525
*/
526
warnx(
527
"Warning: Unable to reuse host address of OpRegion. GPU passthrough might not work properly.");
528
}
529
530
memcpy(opregion->gva, opregion->hva, opregion->len);
531
532
/*
533
* Patch the VBT address to match our guest physical address.
534
*/
535
if (vbt_gpa != 0) {
536
if (vbt_gpa < opregion->gpa) {
537
warnx(
538
"%s: invalid guest VBT address 0x%16lx (OpRegion @ 0x%16lx)",
539
__func__, vbt_gpa, opregion->gpa);
540
return (-1);
541
}
542
543
((struct igd_opregion *)opregion->gva)->mbox3.rvda = vbt_gpa - opregion->gpa;
544
}
545
546
pci_set_cfgdata32(pi, PCIR_ASLS_CTL, opregion->gpa);
547
548
return (set_pcir_handler(sc, PCIR_ASLS_CTL, 4, passthru_cfgread_emulate,
549
passthru_cfgwrite_emulate));
550
}
551
552
static int
553
gvt_d_init(struct pci_devinst *const pi, nvlist_t *const nvl __unused)
554
{
555
int error;
556
557
if ((error = gvt_d_setup_gsm(pi)) != 0) {
558
warnx("%s: Unable to setup Graphics Stolen Memory", __func__);
559
goto done;
560
}
561
562
if ((error = gvt_d_setup_opregion(pi)) != 0) {
563
warnx("%s: Unable to setup OpRegion", __func__);
564
goto done;
565
}
566
567
done:
568
return (error);
569
}
570
571
static void
572
gvt_d_deinit(struct pci_devinst *const pi)
573
{
574
struct passthru_softc *sc;
575
struct passthru_mmio_mapping *opregion;
576
577
sc = pi->pi_arg;
578
579
opregion = passthru_get_mmio(sc, GVT_D_MAP_OPREGION);
580
581
/* HVA is only set, if it's initialized */
582
if (opregion->hva)
583
munmap((void *)opregion->hva, opregion->len);
584
}
585
586
static struct passthru_dev gvt_d_dev = {
587
.probe = gvt_d_probe,
588
.init = gvt_d_init,
589
.deinit = gvt_d_deinit,
590
};
591
PASSTHRU_DEV_SET(gvt_d_dev);
592
593