Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/devices/src/pci/vfio_pci.rs
5394 views
1
// Copyright 2019 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
use std::cmp::max;
6
use std::cmp::Reverse;
7
use std::collections::BTreeMap;
8
use std::collections::BTreeSet;
9
use std::fs;
10
use std::path::Path;
11
use std::path::PathBuf;
12
use std::str::FromStr;
13
use std::sync::Arc;
14
15
use acpi_tables::aml::Aml;
16
use base::debug;
17
use base::error;
18
use base::pagesize;
19
use base::warn;
20
use base::AsRawDescriptor;
21
use base::AsRawDescriptors;
22
use base::Event;
23
use base::EventToken;
24
use base::MemoryMapping;
25
use base::Protection;
26
use base::RawDescriptor;
27
use base::Tube;
28
use base::WaitContext;
29
use base::WorkerThread;
30
use hypervisor::MemCacheType;
31
use resources::AddressRange;
32
use resources::Alloc;
33
use resources::AllocOptions;
34
use resources::MmioType;
35
use resources::SystemAllocator;
36
use sync::Mutex;
37
use vfio_sys::vfio::VFIO_PCI_ACPI_NTFY_IRQ_INDEX;
38
use vfio_sys::*;
39
use vm_control::api::VmMemoryClient;
40
use vm_control::HotPlugDeviceInfo;
41
use vm_control::HotPlugDeviceType;
42
use vm_control::PciId;
43
use vm_control::VmMemoryDestination;
44
use vm_control::VmMemoryRegionId;
45
use vm_control::VmMemorySource;
46
use vm_control::VmRequest;
47
use vm_control::VmResponse;
48
49
use crate::pci::acpi::DeviceVcfgRegister;
50
use crate::pci::acpi::DsmMethod;
51
use crate::pci::acpi::PowerResourceMethod;
52
use crate::pci::acpi::SHM_OFFSET;
53
use crate::pci::msi::MsiConfig;
54
use crate::pci::msi::MsiStatus;
55
use crate::pci::msi::PCI_MSI_FLAGS;
56
use crate::pci::msi::PCI_MSI_FLAGS_64BIT;
57
use crate::pci::msi::PCI_MSI_FLAGS_MASKBIT;
58
use crate::pci::msi::PCI_MSI_NEXT_POINTER;
59
use crate::pci::msix::MsixConfig;
60
use crate::pci::msix::MsixStatus;
61
use crate::pci::msix::BITS_PER_PBA_ENTRY;
62
use crate::pci::msix::MSIX_PBA_ENTRIES_MODULO;
63
use crate::pci::msix::MSIX_TABLE_ENTRIES_MODULO;
64
use crate::pci::pci_device::BarRange;
65
use crate::pci::pci_device::Error as PciDeviceError;
66
use crate::pci::pci_device::PciDevice;
67
use crate::pci::pci_device::PreferredIrq;
68
use crate::pci::pm::PciPmCap;
69
use crate::pci::pm::PmConfig;
70
use crate::pci::pm::PM_CAP_LENGTH;
71
use crate::pci::PciAddress;
72
use crate::pci::PciBarConfiguration;
73
use crate::pci::PciBarIndex;
74
use crate::pci::PciBarPrefetchable;
75
use crate::pci::PciBarRegionType;
76
use crate::pci::PciCapabilityID;
77
use crate::pci::PciClassCode;
78
use crate::pci::PciInterruptPin;
79
use crate::pci::PCI_VCFG_DSM;
80
use crate::pci::PCI_VCFG_NOTY;
81
use crate::pci::PCI_VCFG_PM;
82
use crate::pci::PCI_VENDOR_ID_INTEL;
83
use crate::vfio::VfioDevice;
84
use crate::vfio::VfioError;
85
use crate::vfio::VfioIrqType;
86
use crate::vfio::VfioPciConfig;
87
use crate::IrqLevelEvent;
88
use crate::Suspendable;
89
90
const PCI_VENDOR_ID: u32 = 0x0;
91
const PCI_DEVICE_ID: u32 = 0x2;
92
const PCI_COMMAND: u32 = 0x4;
93
const PCI_COMMAND_MEMORY: u8 = 0x2;
94
const PCI_BASE_CLASS_CODE: u32 = 0x0B;
95
const PCI_INTERRUPT_NUM: u32 = 0x3C;
96
const PCI_INTERRUPT_PIN: u32 = 0x3D;
97
98
const PCI_CAPABILITY_LIST: u32 = 0x34;
99
const PCI_CAP_ID_MSI: u8 = 0x05;
100
const PCI_CAP_ID_MSIX: u8 = 0x11;
101
const PCI_CAP_ID_PM: u8 = 0x01;
102
103
// Size of the standard PCI config space
104
const PCI_CONFIG_SPACE_SIZE: u32 = 0x100;
105
// Size of the standard PCIe config space: 4KB
106
const PCIE_CONFIG_SPACE_SIZE: u32 = 0x1000;
107
108
// Extended Capabilities
109
const PCI_EXT_CAP_ID_CAC: u16 = 0x0C;
110
const PCI_EXT_CAP_ID_ARI: u16 = 0x0E;
111
const PCI_EXT_CAP_ID_SRIOV: u16 = 0x10;
112
const PCI_EXT_CAP_ID_REBAR: u16 = 0x15;
113
114
struct VfioPmCap {
115
offset: u32,
116
capabilities: u32,
117
config: PmConfig,
118
}
119
120
impl VfioPmCap {
121
fn new(config: &VfioPciConfig, cap_start: u32) -> Self {
122
let mut capabilities: u32 = config.read_config(cap_start);
123
capabilities |= (PciPmCap::default_cap() as u32) << 16;
124
VfioPmCap {
125
offset: cap_start,
126
capabilities,
127
config: PmConfig::new(false),
128
}
129
}
130
131
pub fn should_trigger_pme(&mut self) -> bool {
132
self.config.should_trigger_pme()
133
}
134
135
fn is_pm_reg(&self, offset: u32) -> bool {
136
(offset >= self.offset) && (offset < self.offset + PM_CAP_LENGTH as u32)
137
}
138
139
pub fn read(&self, offset: u32) -> u32 {
140
let offset = offset - self.offset;
141
if offset == 0 {
142
self.capabilities
143
} else {
144
let mut data = 0;
145
self.config.read(&mut data);
146
data
147
}
148
}
149
150
pub fn write(&mut self, offset: u64, data: &[u8]) {
151
let offset = offset - self.offset as u64;
152
if offset >= std::mem::size_of::<u32>() as u64 {
153
let offset = offset - std::mem::size_of::<u32>() as u64;
154
self.config.write(offset, data);
155
}
156
}
157
}
158
159
enum VfioMsiChange {
160
Disable,
161
Enable,
162
FunctionChanged,
163
}
164
165
struct VfioMsiCap {
166
config: MsiConfig,
167
offset: u32,
168
}
169
170
impl VfioMsiCap {
171
fn new(
172
config: &VfioPciConfig,
173
msi_cap_start: u32,
174
vm_socket_irq: Tube,
175
device_id: u32,
176
device_name: String,
177
) -> Self {
178
let msi_ctl: u16 = config.read_config(msi_cap_start + PCI_MSI_FLAGS);
179
let is_64bit = (msi_ctl & PCI_MSI_FLAGS_64BIT) != 0;
180
let mask_cap = (msi_ctl & PCI_MSI_FLAGS_MASKBIT) != 0;
181
182
VfioMsiCap {
183
config: MsiConfig::new(is_64bit, mask_cap, vm_socket_irq, device_id, device_name),
184
offset: msi_cap_start,
185
}
186
}
187
188
fn is_msi_reg(&self, index: u64, len: usize) -> bool {
189
self.config.is_msi_reg(self.offset, index, len)
190
}
191
192
fn write_msi_reg(&mut self, index: u64, data: &[u8]) -> Option<VfioMsiChange> {
193
let offset = index as u32 - self.offset;
194
match self.config.write_msi_capability(offset, data) {
195
MsiStatus::Enabled => Some(VfioMsiChange::Enable),
196
MsiStatus::Disabled => Some(VfioMsiChange::Disable),
197
MsiStatus::NothingToDo => None,
198
}
199
}
200
201
fn get_msi_irqfd(&self) -> Option<&Event> {
202
self.config.get_irqfd()
203
}
204
205
fn destroy(&mut self) {
206
self.config.destroy()
207
}
208
}
209
210
// MSI-X registers in MSI-X capability
211
const PCI_MSIX_FLAGS: u32 = 0x02; // Message Control
212
const PCI_MSIX_FLAGS_QSIZE: u16 = 0x07FF; // Table size
213
const PCI_MSIX_TABLE: u32 = 0x04; // Table offset
214
const PCI_MSIX_TABLE_BIR: u32 = 0x07; // BAR index
215
const PCI_MSIX_TABLE_OFFSET: u32 = 0xFFFFFFF8; // Offset into specified BAR
216
const PCI_MSIX_PBA: u32 = 0x08; // Pending bit Array offset
217
const PCI_MSIX_PBA_BIR: u32 = 0x07; // BAR index
218
const PCI_MSIX_PBA_OFFSET: u32 = 0xFFFFFFF8; // Offset into specified BAR
219
220
struct VfioMsixCap {
221
config: MsixConfig,
222
offset: u32,
223
table_size: u16,
224
table_pci_bar: PciBarIndex,
225
table_offset: u64,
226
table_size_bytes: u64,
227
pba_pci_bar: PciBarIndex,
228
pba_offset: u64,
229
pba_size_bytes: u64,
230
msix_interrupt_evt: Vec<Event>,
231
}
232
233
impl VfioMsixCap {
234
fn new(
235
config: &VfioPciConfig,
236
msix_cap_start: u32,
237
vm_socket_irq: Tube,
238
pci_id: u32,
239
device_name: String,
240
) -> Self {
241
let msix_ctl: u16 = config.read_config(msix_cap_start + PCI_MSIX_FLAGS);
242
let table: u32 = config.read_config(msix_cap_start + PCI_MSIX_TABLE);
243
let table_pci_bar = (table & PCI_MSIX_TABLE_BIR) as PciBarIndex;
244
let table_offset = (table & PCI_MSIX_TABLE_OFFSET) as u64;
245
let pba: u32 = config.read_config(msix_cap_start + PCI_MSIX_PBA);
246
let pba_pci_bar = (pba & PCI_MSIX_PBA_BIR) as PciBarIndex;
247
let pba_offset = (pba & PCI_MSIX_PBA_OFFSET) as u64;
248
249
let mut table_size = (msix_ctl & PCI_MSIX_FLAGS_QSIZE) as u64 + 1;
250
if table_pci_bar == pba_pci_bar
251
&& pba_offset > table_offset
252
&& (table_offset + table_size * MSIX_TABLE_ENTRIES_MODULO) > pba_offset
253
{
254
table_size = (pba_offset - table_offset) / MSIX_TABLE_ENTRIES_MODULO;
255
}
256
257
let table_size_bytes = table_size * MSIX_TABLE_ENTRIES_MODULO;
258
let pba_size_bytes =
259
table_size.div_ceil(BITS_PER_PBA_ENTRY as u64) * MSIX_PBA_ENTRIES_MODULO;
260
let mut msix_interrupt_evt = Vec::new();
261
for _ in 0..table_size {
262
msix_interrupt_evt.push(Event::new().expect("failed to create msix interrupt"));
263
}
264
VfioMsixCap {
265
config: MsixConfig::new(table_size as u16, vm_socket_irq, pci_id, device_name),
266
offset: msix_cap_start,
267
table_size: table_size as u16,
268
table_pci_bar,
269
table_offset,
270
table_size_bytes,
271
pba_pci_bar,
272
pba_offset,
273
pba_size_bytes,
274
msix_interrupt_evt,
275
}
276
}
277
278
// only msix control register is writable and need special handle in pci r/w
279
fn is_msix_control_reg(&self, offset: u32, size: u32) -> bool {
280
let control_start = self.offset + PCI_MSIX_FLAGS;
281
let control_end = control_start + 2;
282
283
offset < control_end && offset + size > control_start
284
}
285
286
fn read_msix_control(&self, data: &mut u32) {
287
*data = self.config.read_msix_capability(*data);
288
}
289
290
fn write_msix_control(&mut self, data: &[u8]) -> Option<VfioMsiChange> {
291
let old_enabled = self.config.enabled();
292
let old_masked = self.config.masked();
293
294
self.config
295
.write_msix_capability(PCI_MSIX_FLAGS.into(), data);
296
297
let new_enabled = self.config.enabled();
298
let new_masked = self.config.masked();
299
300
if !old_enabled && new_enabled {
301
Some(VfioMsiChange::Enable)
302
} else if old_enabled && !new_enabled {
303
Some(VfioMsiChange::Disable)
304
} else if new_enabled && old_masked != new_masked {
305
Some(VfioMsiChange::FunctionChanged)
306
} else {
307
None
308
}
309
}
310
311
fn is_msix_table(&self, bar_index: PciBarIndex, offset: u64) -> bool {
312
bar_index == self.table_pci_bar
313
&& offset >= self.table_offset
314
&& offset < self.table_offset + self.table_size_bytes
315
}
316
317
fn get_msix_table(&self, bar_index: PciBarIndex) -> Option<AddressRange> {
318
if bar_index == self.table_pci_bar {
319
AddressRange::from_start_and_size(self.table_offset, self.table_size_bytes)
320
} else {
321
None
322
}
323
}
324
325
fn read_table(&self, offset: u64, data: &mut [u8]) {
326
let offset = offset - self.table_offset;
327
self.config.read_msix_table(offset, data);
328
}
329
330
fn write_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
331
let offset = offset - self.table_offset;
332
self.config.write_msix_table(offset, data)
333
}
334
335
fn is_msix_pba(&self, bar_index: PciBarIndex, offset: u64) -> bool {
336
bar_index == self.pba_pci_bar
337
&& offset >= self.pba_offset
338
&& offset < self.pba_offset + self.pba_size_bytes
339
}
340
341
fn get_msix_pba(&self, bar_index: PciBarIndex) -> Option<AddressRange> {
342
if bar_index == self.pba_pci_bar {
343
AddressRange::from_start_and_size(self.pba_offset, self.pba_size_bytes)
344
} else {
345
None
346
}
347
}
348
349
fn read_pba(&self, offset: u64, data: &mut [u8]) {
350
let offset = offset - self.pba_offset;
351
self.config.read_pba_entries(offset, data);
352
}
353
354
fn write_pba(&mut self, offset: u64, data: &[u8]) {
355
let offset = offset - self.pba_offset;
356
self.config.write_pba_entries(offset, data);
357
}
358
359
fn get_msix_irqfd(&self, index: usize) -> Option<&Event> {
360
let irqfd = self.config.get_irqfd(index);
361
if let Some(fd) = irqfd {
362
if self.msix_vector_masked(index) {
363
Some(&self.msix_interrupt_evt[index])
364
} else {
365
Some(fd)
366
}
367
} else {
368
None
369
}
370
}
371
372
fn get_msix_irqfds(&self) -> Vec<Option<&Event>> {
373
let mut irqfds = Vec::new();
374
375
for i in 0..self.table_size {
376
irqfds.push(self.get_msix_irqfd(i as usize));
377
}
378
379
irqfds
380
}
381
382
fn table_size(&self) -> usize {
383
self.table_size.into()
384
}
385
386
fn clone_msix_evt(&self) -> Vec<Event> {
387
self.msix_interrupt_evt
388
.iter()
389
.map(|irq| irq.try_clone().unwrap())
390
.collect()
391
}
392
393
fn msix_vector_masked(&self, index: usize) -> bool {
394
!self.config.enabled() || self.config.masked() || self.config.table_masked(index)
395
}
396
397
fn trigger(&mut self, index: usize) {
398
self.config.trigger(index as u16);
399
}
400
401
fn destroy(&mut self) {
402
self.config.destroy()
403
}
404
}
405
406
impl AsRawDescriptors for VfioMsixCap {
407
fn as_raw_descriptors(&self) -> Vec<RawDescriptor> {
408
let mut rds = vec![self.config.as_raw_descriptor()];
409
rds.extend(
410
self.msix_interrupt_evt
411
.iter()
412
.map(|evt| evt.as_raw_descriptor()),
413
);
414
rds
415
}
416
}
417
418
struct VfioResourceAllocator {
419
// The region that is not allocated yet.
420
regions: BTreeSet<AddressRange>,
421
}
422
423
impl VfioResourceAllocator {
424
// Creates a new `VfioResourceAllocator` for managing VFIO resources.
425
// Can return `Err` if `base` + `size` overflows a u64.
426
//
427
// * `base` - The starting address of the range to manage.
428
// * `size` - The size of the address range in bytes.
429
fn new(pool: AddressRange) -> Result<Self, PciDeviceError> {
430
if pool.is_empty() {
431
return Err(PciDeviceError::SizeZero);
432
}
433
let mut regions = BTreeSet::new();
434
regions.insert(pool);
435
Ok(VfioResourceAllocator { regions })
436
}
437
438
fn internal_allocate_from_slot(
439
&mut self,
440
slot: AddressRange,
441
range: AddressRange,
442
) -> Result<u64, PciDeviceError> {
443
let slot_was_present = self.regions.remove(&slot);
444
assert!(slot_was_present);
445
446
let (before, after) = slot.non_overlapping_ranges(range);
447
448
if !before.is_empty() {
449
self.regions.insert(before);
450
}
451
if !after.is_empty() {
452
self.regions.insert(after);
453
}
454
455
Ok(range.start)
456
}
457
458
// Allocates a range of addresses from the managed region with a minimal alignment.
459
// Overlapping with a previous allocation is _not_ allowed.
460
// Returns allocated address.
461
fn allocate_with_align(&mut self, size: u64, alignment: u64) -> Result<u64, PciDeviceError> {
462
if size == 0 {
463
return Err(PciDeviceError::SizeZero);
464
}
465
if !alignment.is_power_of_two() {
466
return Err(PciDeviceError::BadAlignment);
467
}
468
469
// finds first region matching alignment and size.
470
let region = self.regions.iter().find(|range| {
471
match range.start % alignment {
472
0 => range.start.checked_add(size - 1),
473
r => range.start.checked_add(size - 1 + alignment - r),
474
}
475
.is_some_and(|end| end <= range.end)
476
});
477
478
match region {
479
Some(&slot) => {
480
let start = match slot.start % alignment {
481
0 => slot.start,
482
r => slot.start + alignment - r,
483
};
484
let end = start + size - 1;
485
let range = AddressRange::from_start_and_end(start, end);
486
487
self.internal_allocate_from_slot(slot, range)
488
}
489
None => Err(PciDeviceError::OutOfSpace),
490
}
491
}
492
493
// Allocates a range of addresses from the managed region with a required location.
494
// Overlapping with a previous allocation is allowed.
495
fn allocate_at_can_overlap(&mut self, range: AddressRange) -> Result<(), PciDeviceError> {
496
if range.is_empty() {
497
return Err(PciDeviceError::SizeZero);
498
}
499
500
while let Some(&slot) = self
501
.regions
502
.iter()
503
.find(|avail_range| avail_range.overlaps(range))
504
{
505
let _address = self.internal_allocate_from_slot(slot, range)?;
506
}
507
Ok(())
508
}
509
}
510
511
struct VfioPciWorker {
512
address: PciAddress,
513
sysfs_path: PathBuf,
514
vm_socket: Tube,
515
name: String,
516
pm_cap: Option<Arc<Mutex<VfioPmCap>>>,
517
msix_cap: Option<Arc<Mutex<VfioMsixCap>>>,
518
}
519
520
impl VfioPciWorker {
521
fn run(
522
&mut self,
523
req_irq_evt: Event,
524
wakeup_evt: Event,
525
acpi_notify_evt: Event,
526
kill_evt: Event,
527
msix_evt: Vec<Event>,
528
is_in_low_power: Arc<Mutex<bool>>,
529
gpe: Option<u32>,
530
notification_val: Arc<Mutex<Vec<u32>>>,
531
) {
532
#[derive(EventToken, Debug)]
533
enum Token {
534
ReqIrq,
535
WakeUp,
536
AcpiNotifyEvent,
537
Kill,
538
MsixIrqi { index: usize },
539
}
540
541
let wait_ctx: WaitContext<Token> = match WaitContext::build_with(&[
542
(&req_irq_evt, Token::ReqIrq),
543
(&wakeup_evt, Token::WakeUp),
544
(&acpi_notify_evt, Token::AcpiNotifyEvent),
545
(&kill_evt, Token::Kill),
546
]) {
547
Ok(pc) => pc,
548
Err(e) => {
549
error!(
550
"{} failed creating vfio WaitContext: {}",
551
self.name.clone(),
552
e
553
);
554
return;
555
}
556
};
557
558
for (index, msix_int) in msix_evt.iter().enumerate() {
559
wait_ctx
560
.add(msix_int, Token::MsixIrqi { index })
561
.expect("Failed to create vfio WaitContext for msix interrupt event")
562
}
563
564
'wait: loop {
565
let events = match wait_ctx.wait() {
566
Ok(v) => v,
567
Err(e) => {
568
error!("{} failed polling vfio events: {}", self.name.clone(), e);
569
break;
570
}
571
};
572
573
for event in events.iter().filter(|e| e.is_readable) {
574
match event.token {
575
Token::MsixIrqi { index } => {
576
if let Some(msix_cap) = &self.msix_cap {
577
msix_cap.lock().trigger(index);
578
}
579
}
580
Token::ReqIrq => {
581
let device = HotPlugDeviceInfo {
582
device_type: HotPlugDeviceType::EndPoint,
583
path: self.sysfs_path.clone(),
584
hp_interrupt: false,
585
};
586
587
let request = VmRequest::HotPlugVfioCommand { device, add: false };
588
if self.vm_socket.send(&request).is_ok() {
589
if let Err(e) = self.vm_socket.recv::<VmResponse>() {
590
error!("{} failed to remove vfio_device: {}", self.name.clone(), e);
591
} else {
592
break 'wait;
593
}
594
}
595
}
596
Token::WakeUp => {
597
let _ = wakeup_evt.wait();
598
599
if *is_in_low_power.lock() {
600
if let Some(pm_cap) = &self.pm_cap {
601
if pm_cap.lock().should_trigger_pme() {
602
let request =
603
VmRequest::PciPme(self.address.pme_requester_id());
604
if self.vm_socket.send(&request).is_ok() {
605
if let Err(e) = self.vm_socket.recv::<VmResponse>() {
606
error!(
607
"{} failed to send PME: {}",
608
self.name.clone(),
609
e
610
);
611
}
612
}
613
}
614
}
615
}
616
}
617
Token::AcpiNotifyEvent => {
618
if let Some(gpe) = gpe {
619
if let Ok(val) = base::EventExt::read_count(&acpi_notify_evt) {
620
notification_val.lock().push(val as u32);
621
let request = VmRequest::Gpe {
622
gpe,
623
clear_evt: None,
624
};
625
if self.vm_socket.send(&request).is_ok() {
626
if let Err(e) = self.vm_socket.recv::<VmResponse>() {
627
error!("{} failed to send GPE: {}", self.name.clone(), e);
628
}
629
}
630
} else {
631
error!("{} failed to read acpi_notify_evt", self.name.clone());
632
}
633
}
634
}
635
Token::Kill => break 'wait,
636
}
637
}
638
}
639
}
640
}
641
642
fn get_next_from_extcap_header(cap_header: u32) -> u32 {
643
(cap_header >> 20) & 0xffc
644
}
645
646
fn is_skipped_ext_cap(cap_id: u16) -> bool {
647
matches!(
648
cap_id,
649
// SR-IOV/ARI/Resizable_BAR capabilities are not well handled and should not be exposed
650
PCI_EXT_CAP_ID_ARI | PCI_EXT_CAP_ID_SRIOV | PCI_EXT_CAP_ID_REBAR
651
)
652
}
653
654
enum DeviceData {
655
IntelGfxData { opregion_index: u32 },
656
}
657
658
/// PCI Express Extended Capabilities information
659
#[derive(Copy, Clone)]
660
struct ExtCap {
661
/// cap offset in Configuration Space
662
offset: u32,
663
/// cap size
664
size: u32,
665
/// next offset, set next non-skipped offset for non-skipped ext cap
666
next: u16,
667
/// whether to be exposed to guest
668
is_skipped: bool,
669
}
670
671
/// Implements the Vfio Pci device, then a pci device is added into vm
672
pub struct VfioPciDevice {
673
device: Arc<VfioDevice>,
674
config: VfioPciConfig,
675
hotplug: bool,
676
hotplug_bus_number: Option<u8>,
677
preferred_address: PciAddress,
678
pci_address: Option<PciAddress>,
679
interrupt_evt: Option<IrqLevelEvent>,
680
acpi_notification_evt: Option<Event>,
681
mmio_regions: Vec<PciBarConfiguration>,
682
io_regions: Vec<PciBarConfiguration>,
683
pm_cap: Option<Arc<Mutex<VfioPmCap>>>,
684
msi_cap: Option<VfioMsiCap>,
685
msix_cap: Option<Arc<Mutex<VfioMsixCap>>>,
686
irq_type: Option<VfioIrqType>,
687
vm_memory_client: VmMemoryClient,
688
device_data: Option<DeviceData>,
689
pm_evt: Option<Event>,
690
is_in_low_power: Arc<Mutex<bool>>,
691
worker_thread: Option<WorkerThread<VfioPciWorker>>,
692
vm_socket_vm: Option<Tube>,
693
sysfs_path: PathBuf,
694
// PCI Express Extended Capabilities
695
ext_caps: Vec<ExtCap>,
696
vcfg_shm_mmap: Option<MemoryMapping>,
697
mapped_mmio_bars: BTreeMap<PciBarIndex, (u64, Vec<VmMemoryRegionId>)>,
698
activated: bool,
699
acpi_notifier_val: Arc<Mutex<Vec<u32>>>,
700
gpe: Option<u32>,
701
base_class_code: PciClassCode,
702
}
703
704
impl VfioPciDevice {
705
/// Constructs a new Vfio Pci device for the give Vfio device
706
pub fn new(
707
sysfs_path: &Path,
708
device: VfioDevice,
709
hotplug: bool,
710
hotplug_bus_number: Option<u8>,
711
guest_address: Option<PciAddress>,
712
vfio_device_socket_msi: Tube,
713
vfio_device_socket_msix: Tube,
714
vm_memory_client: VmMemoryClient,
715
vfio_device_socket_vm: Tube,
716
) -> Result<Self, PciDeviceError> {
717
let preferred_address = if let Some(bus_num) = hotplug_bus_number {
718
debug!("hotplug bus {}", bus_num);
719
PciAddress {
720
// Caller specify pcie bus number for hotplug device
721
bus: bus_num,
722
// devfn should be 0, otherwise pcie root port couldn't detect it
723
dev: 0,
724
func: 0,
725
}
726
} else if let Some(guest_address) = guest_address {
727
debug!("guest PCI address {}", guest_address);
728
guest_address
729
} else {
730
let addr = PciAddress::from_str(device.device_name()).map_err(|e| {
731
PciDeviceError::PciAddressParseFailure(device.device_name().clone(), e)
732
})?;
733
debug!("parsed device PCI address {}", addr);
734
addr
735
};
736
737
let dev = Arc::new(device);
738
let config = VfioPciConfig::new(Arc::clone(&dev));
739
let mut msi_socket = Some(vfio_device_socket_msi);
740
let mut msix_socket = Some(vfio_device_socket_msix);
741
let mut msi_cap: Option<VfioMsiCap> = None;
742
let mut msix_cap: Option<Arc<Mutex<VfioMsixCap>>> = None;
743
let mut pm_cap: Option<Arc<Mutex<VfioPmCap>>> = None;
744
745
let mut is_pcie = false;
746
let mut cap_next: u32 = config.read_config::<u8>(PCI_CAPABILITY_LIST).into();
747
let vendor_id: u16 = config.read_config(PCI_VENDOR_ID);
748
let device_id: u16 = config.read_config(PCI_DEVICE_ID);
749
let base_class_code = PciClassCode::try_from(config.read_config::<u8>(PCI_BASE_CLASS_CODE))
750
.unwrap_or(PciClassCode::Other);
751
752
let pci_id = PciId::new(vendor_id, device_id);
753
754
while cap_next != 0 {
755
let cap_id: u8 = config.read_config(cap_next);
756
if cap_id == PCI_CAP_ID_PM {
757
pm_cap = Some(Arc::new(Mutex::new(VfioPmCap::new(&config, cap_next))));
758
} else if cap_id == PCI_CAP_ID_MSI {
759
if let Some(msi_socket) = msi_socket.take() {
760
msi_cap = Some(VfioMsiCap::new(
761
&config,
762
cap_next,
763
msi_socket,
764
pci_id.into(),
765
dev.device_name().to_string(),
766
));
767
}
768
} else if cap_id == PCI_CAP_ID_MSIX {
769
if let Some(msix_socket) = msix_socket.take() {
770
msix_cap = Some(Arc::new(Mutex::new(VfioMsixCap::new(
771
&config,
772
cap_next,
773
msix_socket,
774
pci_id.into(),
775
dev.device_name().to_string(),
776
))));
777
}
778
} else if cap_id == PciCapabilityID::PciExpress as u8 {
779
is_pcie = true;
780
}
781
let offset = cap_next + PCI_MSI_NEXT_POINTER;
782
cap_next = config.read_config::<u8>(offset).into();
783
}
784
785
let mut ext_caps: Vec<ExtCap> = Vec::new();
786
if is_pcie {
787
let mut ext_cap_next: u32 = PCI_CONFIG_SPACE_SIZE;
788
while ext_cap_next != 0 {
789
let ext_cap_config: u32 = config.read_config::<u32>(ext_cap_next);
790
if ext_cap_config == 0 {
791
break;
792
}
793
ext_caps.push(ExtCap {
794
offset: ext_cap_next,
795
// Calculate the size later
796
size: 0,
797
// init as the real value
798
next: get_next_from_extcap_header(ext_cap_config) as u16,
799
is_skipped: is_skipped_ext_cap((ext_cap_config & 0xffff) as u16),
800
});
801
ext_cap_next = get_next_from_extcap_header(ext_cap_config);
802
}
803
804
// Manage extended caps
805
//
806
// Extended capabilities are chained with each pointing to the next, so
807
// we can drop anything other than the head of the chain simply by
808
// modifying the previous next pointer. For the head of the chain, we
809
// can modify the capability ID to something that cannot match a valid
810
// capability. ID PCI_EXT_CAP_ID_CAC is for this since it is no longer
811
// supported.
812
//
813
// reverse order by offset
814
ext_caps.sort_by(|a, b| b.offset.cmp(&a.offset));
815
let mut next_offset: u32 = PCIE_CONFIG_SPACE_SIZE;
816
let mut non_skipped_next: u16 = 0;
817
for ext_cap in ext_caps.iter_mut() {
818
if !ext_cap.is_skipped {
819
ext_cap.next = non_skipped_next;
820
non_skipped_next = ext_cap.offset as u16;
821
} else if ext_cap.offset == PCI_CONFIG_SPACE_SIZE {
822
ext_cap.next = non_skipped_next;
823
}
824
ext_cap.size = next_offset - ext_cap.offset;
825
next_offset = ext_cap.offset;
826
}
827
// order by offset
828
ext_caps.reverse();
829
}
830
831
let is_intel_gfx =
832
base_class_code == PciClassCode::DisplayController && vendor_id == PCI_VENDOR_ID_INTEL;
833
let device_data = if is_intel_gfx {
834
Some(DeviceData::IntelGfxData {
835
opregion_index: u32::MAX,
836
})
837
} else {
838
None
839
};
840
841
Ok(VfioPciDevice {
842
device: dev,
843
config,
844
hotplug,
845
hotplug_bus_number,
846
preferred_address,
847
pci_address: None,
848
interrupt_evt: None,
849
acpi_notification_evt: None,
850
mmio_regions: Vec::new(),
851
io_regions: Vec::new(),
852
pm_cap,
853
msi_cap,
854
msix_cap,
855
irq_type: None,
856
vm_memory_client,
857
device_data,
858
pm_evt: None,
859
is_in_low_power: Arc::new(Mutex::new(false)),
860
worker_thread: None,
861
vm_socket_vm: Some(vfio_device_socket_vm),
862
sysfs_path: sysfs_path.to_path_buf(),
863
ext_caps,
864
vcfg_shm_mmap: None,
865
mapped_mmio_bars: BTreeMap::new(),
866
activated: false,
867
acpi_notifier_val: Arc::new(Mutex::new(Vec::new())),
868
gpe: None,
869
base_class_code,
870
})
871
}
872
873
/// Gets the pci address of the device, if one has already been allocated.
874
pub fn pci_address(&self) -> Option<PciAddress> {
875
self.pci_address
876
}
877
878
pub fn is_gfx(&self) -> bool {
879
self.base_class_code == PciClassCode::DisplayController
880
}
881
882
fn is_intel_gfx(&self) -> bool {
883
matches!(self.device_data, Some(DeviceData::IntelGfxData { .. }))
884
}
885
886
fn enable_acpi_notification(&mut self) -> Result<(), PciDeviceError> {
887
if let Some(ref acpi_notification_evt) = self.acpi_notification_evt {
888
return self
889
.device
890
.acpi_notification_evt_enable(acpi_notification_evt, VFIO_PCI_ACPI_NTFY_IRQ_INDEX)
891
.map_err(|_| PciDeviceError::AcpiNotifySetupFailed);
892
}
893
Err(PciDeviceError::AcpiNotifySetupFailed)
894
}
895
896
#[allow(dead_code)]
897
fn disable_acpi_notification(&mut self) -> Result<(), PciDeviceError> {
898
if let Some(ref _acpi_notification_evt) = self.acpi_notification_evt {
899
return self
900
.device
901
.acpi_notification_disable(VFIO_PCI_ACPI_NTFY_IRQ_INDEX)
902
.map_err(|_| PciDeviceError::AcpiNotifyDeactivationFailed);
903
}
904
Err(PciDeviceError::AcpiNotifyDeactivationFailed)
905
}
906
907
#[allow(dead_code)]
908
fn test_acpi_notification(&mut self, val: u32) -> Result<(), PciDeviceError> {
909
if let Some(ref _acpi_notification_evt) = self.acpi_notification_evt {
910
return self
911
.device
912
.acpi_notification_test(VFIO_PCI_ACPI_NTFY_IRQ_INDEX, val)
913
.map_err(|_| PciDeviceError::AcpiNotifyTestFailed);
914
}
915
Err(PciDeviceError::AcpiNotifyTestFailed)
916
}
917
918
fn enable_intx(&mut self) {
919
if let Some(ref interrupt_evt) = self.interrupt_evt {
920
if let Err(e) = self.device.irq_enable(
921
&[Some(interrupt_evt.get_trigger())],
922
VFIO_PCI_INTX_IRQ_INDEX,
923
0,
924
) {
925
error!("{} Intx enable failed: {}", self.debug_label(), e);
926
return;
927
}
928
if let Err(e) = self.device.irq_mask(VFIO_PCI_INTX_IRQ_INDEX) {
929
error!("{} Intx mask failed: {}", self.debug_label(), e);
930
self.disable_intx();
931
return;
932
}
933
if let Err(e) = self
934
.device
935
.resample_virq_enable(interrupt_evt.get_resample(), VFIO_PCI_INTX_IRQ_INDEX)
936
{
937
error!("{} resample enable failed: {}", self.debug_label(), e);
938
self.disable_intx();
939
return;
940
}
941
if let Err(e) = self.device.irq_unmask(VFIO_PCI_INTX_IRQ_INDEX) {
942
error!("{} Intx unmask failed: {}", self.debug_label(), e);
943
self.disable_intx();
944
return;
945
}
946
self.irq_type = Some(VfioIrqType::Intx);
947
}
948
}
949
950
fn disable_intx(&mut self) {
951
if let Err(e) = self.device.irq_disable(VFIO_PCI_INTX_IRQ_INDEX) {
952
error!("{} Intx disable failed: {}", self.debug_label(), e);
953
}
954
self.irq_type = None;
955
}
956
957
fn disable_irqs(&mut self) {
958
match self.irq_type {
959
Some(VfioIrqType::Msi) => self.disable_msi(),
960
Some(VfioIrqType::Msix) => self.disable_msix(),
961
_ => (),
962
}
963
964
// Above disable_msi() or disable_msix() will enable intx again.
965
// so disable_intx here again.
966
if let Some(VfioIrqType::Intx) = self.irq_type {
967
self.disable_intx();
968
}
969
}
970
971
fn enable_msi(&mut self) {
972
self.disable_irqs();
973
974
let irqfd = match &self.msi_cap {
975
Some(cap) => {
976
if let Some(fd) = cap.get_msi_irqfd() {
977
fd
978
} else {
979
self.enable_intx();
980
return;
981
}
982
}
983
None => {
984
self.enable_intx();
985
return;
986
}
987
};
988
989
if let Err(e) = self
990
.device
991
.irq_enable(&[Some(irqfd)], VFIO_PCI_MSI_IRQ_INDEX, 0)
992
{
993
error!("{} failed to enable msi: {}", self.debug_label(), e);
994
self.enable_intx();
995
return;
996
}
997
998
self.irq_type = Some(VfioIrqType::Msi);
999
}
1000
1001
fn disable_msi(&mut self) {
1002
if let Err(e) = self.device.irq_disable(VFIO_PCI_MSI_IRQ_INDEX) {
1003
error!("{} failed to disable msi: {}", self.debug_label(), e);
1004
return;
1005
}
1006
self.irq_type = None;
1007
1008
self.enable_intx();
1009
}
1010
1011
fn enable_msix(&mut self) {
1012
if self.msix_cap.is_none() {
1013
return;
1014
}
1015
1016
self.disable_irqs();
1017
let cap = self.msix_cap.as_ref().unwrap().lock();
1018
let vector_in_use = cap.get_msix_irqfds().iter().any(|&irq| irq.is_some());
1019
1020
let mut failed = false;
1021
if !vector_in_use {
1022
// If there are no msix vectors currently in use, we explicitly assign a new eventfd
1023
// to vector 0. Then we enable it and immediately disable it, so that vfio will
1024
// activate physical device. If there are available msix vectors, just enable them
1025
// instead.
1026
let fd = Event::new().expect("failed to create event");
1027
let table_size = cap.table_size();
1028
let mut irqfds = vec![None; table_size];
1029
irqfds[0] = Some(&fd);
1030
for fd in irqfds.iter_mut().skip(1) {
1031
*fd = None;
1032
}
1033
if let Err(e) = self.device.irq_enable(&irqfds, VFIO_PCI_MSIX_IRQ_INDEX, 0) {
1034
error!("{} failed to enable msix: {}", self.debug_label(), e);
1035
failed = true;
1036
}
1037
irqfds[0] = None;
1038
if let Err(e) = self.device.irq_enable(&irqfds, VFIO_PCI_MSIX_IRQ_INDEX, 0) {
1039
error!("{} failed to enable msix: {}", self.debug_label(), e);
1040
failed = true;
1041
}
1042
} else {
1043
let result = self
1044
.device
1045
.irq_enable(&cap.get_msix_irqfds(), VFIO_PCI_MSIX_IRQ_INDEX, 0);
1046
if let Err(e) = result {
1047
error!("{} failed to enable msix: {}", self.debug_label(), e);
1048
failed = true;
1049
}
1050
}
1051
1052
std::mem::drop(cap);
1053
if failed {
1054
self.enable_intx();
1055
return;
1056
}
1057
self.irq_type = Some(VfioIrqType::Msix);
1058
}
1059
1060
fn disable_msix(&mut self) {
1061
if self.msix_cap.is_none() {
1062
return;
1063
}
1064
if let Err(e) = self.device.irq_disable(VFIO_PCI_MSIX_IRQ_INDEX) {
1065
error!("{} failed to disable msix: {}", self.debug_label(), e);
1066
return;
1067
}
1068
self.irq_type = None;
1069
self.enable_intx();
1070
}
1071
1072
fn msix_vectors_update(&self) -> Result<(), VfioError> {
1073
if let Some(cap) = &self.msix_cap {
1074
self.device
1075
.irq_enable(&cap.lock().get_msix_irqfds(), VFIO_PCI_MSIX_IRQ_INDEX, 0)?;
1076
}
1077
Ok(())
1078
}
1079
1080
fn msix_vector_update(&self, index: usize, irqfd: Option<&Event>) {
1081
if let Err(e) = self
1082
.device
1083
.irq_enable(&[irqfd], VFIO_PCI_MSIX_IRQ_INDEX, index as u32)
1084
{
1085
error!(
1086
"{} failed to update msix vector {}: {}",
1087
self.debug_label(),
1088
index,
1089
e
1090
);
1091
}
1092
}
1093
1094
fn adjust_bar_mmap(
1095
&self,
1096
bar_mmaps: Vec<vfio_region_sparse_mmap_area>,
1097
remove_mmaps: &[AddressRange],
1098
) -> Vec<vfio_region_sparse_mmap_area> {
1099
let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::with_capacity(bar_mmaps.len());
1100
let pgmask = (pagesize() as u64) - 1;
1101
1102
for mmap in bar_mmaps.iter() {
1103
let mmap_range = if let Some(mmap_range) =
1104
AddressRange::from_start_and_size(mmap.offset, mmap.size)
1105
{
1106
mmap_range
1107
} else {
1108
continue;
1109
};
1110
let mut to_mmap = match VfioResourceAllocator::new(mmap_range) {
1111
Ok(a) => a,
1112
Err(e) => {
1113
error!("{} adjust_bar_mmap failed: {}", self.debug_label(), e);
1114
mmaps.clear();
1115
return mmaps;
1116
}
1117
};
1118
1119
for &(mut remove_range) in remove_mmaps.iter() {
1120
remove_range = remove_range.intersect(mmap_range);
1121
if !remove_range.is_empty() {
1122
// align offsets to page size
1123
let begin = remove_range.start & !pgmask;
1124
let end = ((remove_range.end + 1 + pgmask) & !pgmask) - 1;
1125
let remove_range = AddressRange::from_start_and_end(begin, end);
1126
if let Err(e) = to_mmap.allocate_at_can_overlap(remove_range) {
1127
error!("{} adjust_bar_mmap failed: {}", self.debug_label(), e);
1128
}
1129
}
1130
}
1131
1132
for mmap in to_mmap.regions {
1133
mmaps.push(vfio_region_sparse_mmap_area {
1134
offset: mmap.start,
1135
size: mmap.end - mmap.start + 1,
1136
});
1137
}
1138
}
1139
1140
mmaps
1141
}
1142
1143
fn remove_bar_mmap_msix(
1144
&self,
1145
bar_index: PciBarIndex,
1146
bar_mmaps: Vec<vfio_region_sparse_mmap_area>,
1147
) -> Vec<vfio_region_sparse_mmap_area> {
1148
let msix_cap = &self.msix_cap.as_ref().unwrap().lock();
1149
let mut msix_regions = Vec::new();
1150
1151
if let Some(t) = msix_cap.get_msix_table(bar_index) {
1152
msix_regions.push(t);
1153
}
1154
if let Some(p) = msix_cap.get_msix_pba(bar_index) {
1155
msix_regions.push(p);
1156
}
1157
1158
if msix_regions.is_empty() {
1159
return bar_mmaps;
1160
}
1161
1162
self.adjust_bar_mmap(bar_mmaps, &msix_regions)
1163
}
1164
1165
fn add_bar_mmap(&self, index: PciBarIndex, bar_addr: u64) -> Vec<VmMemoryRegionId> {
1166
let mut mmaps_ids: Vec<VmMemoryRegionId> = Vec::new();
1167
if self.device.get_region_flags(index) & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1168
// the bar storing msix table and pba couldn't mmap.
1169
// these bars should be trapped, so that msix could be emulated.
1170
let mut mmaps = self.device.get_region_mmap(index);
1171
1172
if self.msix_cap.is_some() && !self.device.get_region_msix_mmappable(index) {
1173
mmaps = self.remove_bar_mmap_msix(index, mmaps);
1174
}
1175
if mmaps.is_empty() {
1176
return mmaps_ids;
1177
}
1178
1179
for mmap in mmaps.iter() {
1180
let mmap_offset = mmap.offset;
1181
let mmap_size = mmap.size;
1182
let guest_map_start = bar_addr + mmap_offset;
1183
let region_offset = self.device.get_region_offset(index);
1184
let offset = region_offset + mmap_offset;
1185
let descriptor = match self.device.device_file().try_clone() {
1186
Ok(device_file) => device_file.into(),
1187
Err(_) => break,
1188
};
1189
match self.vm_memory_client.register_memory(
1190
VmMemorySource::Descriptor {
1191
descriptor,
1192
offset,
1193
size: mmap_size,
1194
},
1195
VmMemoryDestination::GuestPhysicalAddress(guest_map_start),
1196
Protection::read_write(),
1197
MemCacheType::CacheCoherent,
1198
) {
1199
Ok(id) => {
1200
mmaps_ids.push(id);
1201
}
1202
Err(e) => {
1203
error!("register_memory failed: {}", e);
1204
break;
1205
}
1206
}
1207
}
1208
}
1209
1210
mmaps_ids
1211
}
1212
1213
fn remove_bar_mmap(&self, mmap_ids: &[VmMemoryRegionId]) {
1214
for mmap_id in mmap_ids {
1215
if let Err(e) = self.vm_memory_client.unregister_memory(*mmap_id) {
1216
error!("unregister_memory failed: {}", e);
1217
}
1218
}
1219
}
1220
1221
fn disable_bars_mmap(&mut self) {
1222
for (_, (_, mmap_ids)) in self.mapped_mmio_bars.iter() {
1223
self.remove_bar_mmap(mmap_ids);
1224
}
1225
self.mapped_mmio_bars.clear();
1226
}
1227
1228
fn commit_bars_mmap(&mut self) {
1229
// Unmap all bars before remapping bars, to prevent issues with overlap
1230
let mut needs_map = Vec::new();
1231
for mmio_info in self.mmio_regions.iter() {
1232
let bar_idx = mmio_info.bar_index();
1233
let addr = mmio_info.address();
1234
1235
if let Some((cur_addr, ids)) = self.mapped_mmio_bars.remove(&bar_idx) {
1236
if cur_addr == addr {
1237
self.mapped_mmio_bars.insert(bar_idx, (cur_addr, ids));
1238
continue;
1239
} else {
1240
self.remove_bar_mmap(&ids);
1241
}
1242
}
1243
1244
if addr != 0 {
1245
needs_map.push((bar_idx, addr));
1246
}
1247
}
1248
1249
for (bar_idx, addr) in needs_map.iter() {
1250
let ids = self.add_bar_mmap(*bar_idx, *addr);
1251
self.mapped_mmio_bars.insert(*bar_idx, (*addr, ids));
1252
}
1253
}
1254
1255
fn close(&mut self) {
1256
if let Some(msi) = self.msi_cap.as_mut() {
1257
msi.destroy();
1258
}
1259
if let Some(msix) = &self.msix_cap {
1260
msix.lock().destroy();
1261
}
1262
self.disable_bars_mmap();
1263
self.device.close();
1264
}
1265
1266
fn start_work_thread(&mut self) {
1267
let vm_socket = match self.vm_socket_vm.take() {
1268
Some(socket) => socket,
1269
None => return,
1270
};
1271
1272
let req_evt = match Event::new() {
1273
Ok(evt) => {
1274
if let Err(e) = self
1275
.device
1276
.irq_enable(&[Some(&evt)], VFIO_PCI_REQ_IRQ_INDEX, 0)
1277
{
1278
error!("{} enable req_irq failed: {}", self.debug_label(), e);
1279
return;
1280
}
1281
evt
1282
}
1283
Err(_) => return,
1284
};
1285
1286
let (self_pm_evt, pm_evt) = match Event::new().and_then(|e| Ok((e.try_clone()?, e))) {
1287
Ok(v) => v,
1288
Err(e) => {
1289
error!(
1290
"{} failed creating PM Event pair: {}",
1291
self.debug_label(),
1292
e
1293
);
1294
return;
1295
}
1296
};
1297
self.pm_evt = Some(self_pm_evt);
1298
1299
let (self_acpi_notify_evt, acpi_notify_evt) =
1300
match Event::new().and_then(|e| Ok((e.try_clone()?, e))) {
1301
Ok(v) => v,
1302
Err(e) => {
1303
error!(
1304
"{} failed creating ACPI Event pair: {}",
1305
self.debug_label(),
1306
e
1307
);
1308
return;
1309
}
1310
};
1311
self.acpi_notification_evt = Some(self_acpi_notify_evt);
1312
1313
if let Err(e) = self.enable_acpi_notification() {
1314
error!("{}: {}", self.debug_label(), e);
1315
}
1316
1317
let mut msix_evt = Vec::new();
1318
if let Some(msix_cap) = &self.msix_cap {
1319
msix_evt = msix_cap.lock().clone_msix_evt();
1320
}
1321
1322
let name = self.device.device_name().to_string();
1323
let address = self.pci_address.expect("Unassigned PCI Address.");
1324
let sysfs_path = self.sysfs_path.clone();
1325
let pm_cap = self.pm_cap.clone();
1326
let msix_cap = self.msix_cap.clone();
1327
let is_in_low_power = self.is_in_low_power.clone();
1328
let gpe_nr = self.gpe;
1329
let notification_val = self.acpi_notifier_val.clone();
1330
self.worker_thread = Some(WorkerThread::start("vfio_pci", move |kill_evt| {
1331
let mut worker = VfioPciWorker {
1332
address,
1333
sysfs_path,
1334
vm_socket,
1335
name,
1336
pm_cap,
1337
msix_cap,
1338
};
1339
worker.run(
1340
req_evt,
1341
pm_evt,
1342
acpi_notify_evt,
1343
kill_evt,
1344
msix_evt,
1345
is_in_low_power,
1346
gpe_nr,
1347
notification_val,
1348
);
1349
worker
1350
}));
1351
self.activated = true;
1352
}
1353
1354
fn collect_bars(&mut self) -> Vec<PciBarConfiguration> {
1355
let mut i = VFIO_PCI_BAR0_REGION_INDEX;
1356
let mut mem_bars: Vec<PciBarConfiguration> = Vec::new();
1357
1358
while i <= VFIO_PCI_ROM_REGION_INDEX {
1359
let mut low: u32 = 0xffffffff;
1360
let offset: u32 = if i == VFIO_PCI_ROM_REGION_INDEX {
1361
0x30
1362
} else {
1363
0x10 + i * 4
1364
};
1365
self.config.write_config(low, offset);
1366
low = self.config.read_config(offset);
1367
1368
let low_flag = low & 0xf;
1369
let is_64bit = low_flag & 0x4 == 0x4;
1370
if (low_flag & 0x1 == 0 || i == VFIO_PCI_ROM_REGION_INDEX) && low != 0 {
1371
let mut upper: u32 = 0xffffffff;
1372
if is_64bit {
1373
self.config.write_config(upper, offset + 4);
1374
upper = self.config.read_config(offset + 4);
1375
}
1376
1377
low &= 0xffff_fff0;
1378
let mut size: u64 = u64::from(upper);
1379
size <<= 32;
1380
size |= u64::from(low);
1381
size = !size + 1;
1382
let region_type = if is_64bit {
1383
PciBarRegionType::Memory64BitRegion
1384
} else {
1385
PciBarRegionType::Memory32BitRegion
1386
};
1387
let prefetch = if low_flag & 0x8 == 0x8 {
1388
PciBarPrefetchable::Prefetchable
1389
} else {
1390
PciBarPrefetchable::NotPrefetchable
1391
};
1392
mem_bars.push(PciBarConfiguration::new(
1393
i as usize,
1394
size,
1395
region_type,
1396
prefetch,
1397
));
1398
} else if low_flag & 0x1 == 0x1 {
1399
let size = !(low & 0xffff_fffc) + 1;
1400
self.io_regions.push(PciBarConfiguration::new(
1401
i as usize,
1402
size.into(),
1403
PciBarRegionType::IoRegion,
1404
PciBarPrefetchable::NotPrefetchable,
1405
));
1406
}
1407
1408
if is_64bit {
1409
i += 2;
1410
} else {
1411
i += 1;
1412
}
1413
}
1414
mem_bars
1415
}
1416
1417
fn configure_barmem(&mut self, bar_info: &PciBarConfiguration, bar_addr: u64) {
1418
let offset: u32 = bar_info.reg_index() as u32 * 4;
1419
let mmio_region = *bar_info;
1420
self.mmio_regions.push(mmio_region.set_address(bar_addr));
1421
1422
let val: u32 = self.config.read_config(offset);
1423
let low = ((bar_addr & !0xf) as u32) | (val & 0xf);
1424
self.config.write_config(low, offset);
1425
if bar_info.is_64bit_memory() {
1426
let upper = (bar_addr >> 32) as u32;
1427
self.config.write_config(upper, offset + 4);
1428
}
1429
}
1430
1431
fn allocate_root_barmem(
1432
&mut self,
1433
mem_bars: &[PciBarConfiguration],
1434
resources: &mut SystemAllocator,
1435
) -> Result<Vec<BarRange>, PciDeviceError> {
1436
let address = self.pci_address.unwrap();
1437
let mut ranges: Vec<BarRange> = Vec::new();
1438
for mem_bar in mem_bars {
1439
let bar_size = mem_bar.size();
1440
let mut bar_addr: u64 = 0;
1441
// Don't allocate mmio for hotplug device, OS will allocate it from
1442
// its parent's bridge window.
1443
if !self.hotplug {
1444
bar_addr = resources
1445
.allocate_mmio(
1446
bar_size,
1447
Alloc::PciBar {
1448
bus: address.bus,
1449
dev: address.dev,
1450
func: address.func,
1451
bar: mem_bar.bar_index() as u8,
1452
},
1453
"vfio_bar".to_string(),
1454
AllocOptions::new()
1455
.prefetchable(mem_bar.is_prefetchable())
1456
.max_address(if mem_bar.is_64bit_memory() {
1457
u64::MAX
1458
} else {
1459
u32::MAX.into()
1460
})
1461
.align(bar_size),
1462
)
1463
.map_err(|e| PciDeviceError::IoAllocationFailed(bar_size, e))?;
1464
ranges.push(BarRange {
1465
addr: bar_addr,
1466
size: bar_size,
1467
prefetchable: mem_bar.is_prefetchable(),
1468
});
1469
}
1470
self.configure_barmem(mem_bar, bar_addr);
1471
}
1472
Ok(ranges)
1473
}
1474
1475
fn allocate_nonroot_barmem(
1476
&mut self,
1477
mem_bars: &mut [PciBarConfiguration],
1478
resources: &mut SystemAllocator,
1479
) -> Result<Vec<BarRange>, PciDeviceError> {
1480
const NON_PREFETCHABLE: usize = 0;
1481
const PREFETCHABLE: usize = 1;
1482
const ARRAY_SIZE: usize = 2;
1483
let mut membars: [Vec<PciBarConfiguration>; ARRAY_SIZE] = [Vec::new(), Vec::new()];
1484
let mut allocator: [VfioResourceAllocator; ARRAY_SIZE] = [
1485
match VfioResourceAllocator::new(AddressRange::from_start_and_end(0, u32::MAX as u64)) {
1486
Ok(a) => a,
1487
Err(e) => {
1488
error!(
1489
"{} init nonroot VfioResourceAllocator failed: {}",
1490
self.debug_label(),
1491
e
1492
);
1493
return Err(e);
1494
}
1495
},
1496
match VfioResourceAllocator::new(AddressRange::from_start_and_end(0, u64::MAX)) {
1497
Ok(a) => a,
1498
Err(e) => {
1499
error!(
1500
"{} init nonroot VfioResourceAllocator failed: {}",
1501
self.debug_label(),
1502
e
1503
);
1504
return Err(e);
1505
}
1506
},
1507
];
1508
let mut memtype: [MmioType; ARRAY_SIZE] = [MmioType::Low, MmioType::High];
1509
// the window must be 1M-aligned as per the PCI spec
1510
let mut window_sz: [u64; ARRAY_SIZE] = [0; 2];
1511
let mut alignment: [u64; ARRAY_SIZE] = [0x100000; 2];
1512
1513
// Descend by bar size, this could reduce allocated size for all the bars.
1514
mem_bars.sort_by_key(|a| Reverse(a.size()));
1515
for mem_bar in mem_bars {
1516
let prefetchable = mem_bar.is_prefetchable();
1517
let is_64bit = mem_bar.is_64bit_memory();
1518
1519
// if one prefetchable bar is 32bit, all the prefetchable bars should be in Low MMIO,
1520
// as all the prefetchable bars should be in one region
1521
if prefetchable && !is_64bit {
1522
memtype[PREFETCHABLE] = MmioType::Low;
1523
}
1524
let i = if prefetchable {
1525
PREFETCHABLE
1526
} else {
1527
NON_PREFETCHABLE
1528
};
1529
let bar_size = mem_bar.size();
1530
let start = match allocator[i].allocate_with_align(bar_size, bar_size) {
1531
Ok(s) => s,
1532
Err(e) => {
1533
error!(
1534
"{} nonroot allocate_wit_align failed: {}",
1535
self.debug_label(),
1536
e
1537
);
1538
return Err(e);
1539
}
1540
};
1541
window_sz[i] = max(window_sz[i], start + bar_size);
1542
alignment[i] = max(alignment[i], bar_size);
1543
let mem_info = (*mem_bar).set_address(start);
1544
membars[i].push(mem_info);
1545
}
1546
1547
let address = self.pci_address.unwrap();
1548
let mut ranges: Vec<BarRange> = Vec::new();
1549
for (index, bars) in membars.iter().enumerate() {
1550
if bars.is_empty() {
1551
continue;
1552
}
1553
1554
let i = if index == 1 {
1555
PREFETCHABLE
1556
} else {
1557
NON_PREFETCHABLE
1558
};
1559
let mut window_addr: u64 = 0;
1560
// Don't allocate mmio for hotplug device, OS will allocate it from
1561
// its parent's bridge window.
1562
if !self.hotplug {
1563
window_sz[i] = (window_sz[i] + 0xfffff) & !0xfffff;
1564
let alloc = if i == NON_PREFETCHABLE {
1565
Alloc::PciBridgeWindow {
1566
bus: address.bus,
1567
dev: address.dev,
1568
func: address.func,
1569
}
1570
} else {
1571
Alloc::PciBridgePrefetchWindow {
1572
bus: address.bus,
1573
dev: address.dev,
1574
func: address.func,
1575
}
1576
};
1577
window_addr = resources
1578
.mmio_allocator(memtype[i])
1579
.allocate_with_align(
1580
window_sz[i],
1581
alloc,
1582
"vfio_bar_window".to_string(),
1583
alignment[i],
1584
)
1585
.map_err(|e| PciDeviceError::IoAllocationFailed(window_sz[i], e))?;
1586
for mem_info in bars {
1587
let bar_addr = window_addr + mem_info.address();
1588
ranges.push(BarRange {
1589
addr: bar_addr,
1590
size: mem_info.size(),
1591
prefetchable: mem_info.is_prefetchable(),
1592
});
1593
}
1594
}
1595
1596
for mem_info in bars {
1597
let bar_addr = window_addr + mem_info.address();
1598
self.configure_barmem(mem_info, bar_addr);
1599
}
1600
}
1601
Ok(ranges)
1602
}
1603
1604
/// Return the supported iova max address of the Vfio Pci device
1605
pub fn get_max_iova(&self) -> u64 {
1606
self.device.get_max_addr()
1607
}
1608
1609
fn get_ext_cap_by_reg(&self, reg: u32) -> Option<ExtCap> {
1610
self.ext_caps
1611
.iter()
1612
.find(|ext_cap| reg >= ext_cap.offset && reg < ext_cap.offset + ext_cap.size)
1613
.cloned()
1614
}
1615
1616
fn is_skipped_reg(&self, reg: u32) -> bool {
1617
// fast handle for pci config space
1618
if reg < PCI_CONFIG_SPACE_SIZE {
1619
return false;
1620
}
1621
1622
self.get_ext_cap_by_reg(reg)
1623
.is_some_and(|cap| cap.is_skipped)
1624
}
1625
}
1626
1627
impl PciDevice for VfioPciDevice {
1628
fn debug_label(&self) -> String {
1629
format!("vfio {} device", self.device.device_name())
1630
}
1631
1632
fn preferred_address(&self) -> Option<PciAddress> {
1633
Some(self.preferred_address)
1634
}
1635
1636
fn allocate_address(
1637
&mut self,
1638
resources: &mut SystemAllocator,
1639
) -> Result<PciAddress, PciDeviceError> {
1640
if self.pci_address.is_none() {
1641
let mut address = self.preferred_address;
1642
while address.func < 8 {
1643
if resources.reserve_pci(address, self.debug_label()) {
1644
self.pci_address = Some(address);
1645
break;
1646
} else if self.hotplug_bus_number.is_none() {
1647
break;
1648
} else {
1649
address.func += 1;
1650
}
1651
}
1652
if let Some(msi_cap) = &mut self.msi_cap {
1653
msi_cap.config.set_pci_address(self.pci_address.unwrap());
1654
}
1655
if let Some(msix_cap) = &mut self.msix_cap {
1656
msix_cap
1657
.lock()
1658
.config
1659
.set_pci_address(self.pci_address.unwrap());
1660
}
1661
}
1662
self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
1663
}
1664
1665
fn keep_rds(&self) -> Vec<RawDescriptor> {
1666
let mut rds = self.device.keep_rds();
1667
if let Some(ref interrupt_evt) = self.interrupt_evt {
1668
rds.extend(interrupt_evt.as_raw_descriptors());
1669
}
1670
rds.push(self.vm_memory_client.as_raw_descriptor());
1671
if let Some(vm_socket_vm) = &self.vm_socket_vm {
1672
rds.push(vm_socket_vm.as_raw_descriptor());
1673
}
1674
if let Some(msi_cap) = &self.msi_cap {
1675
rds.push(msi_cap.config.get_msi_socket());
1676
}
1677
if let Some(msix_cap) = &self.msix_cap {
1678
rds.extend(msix_cap.lock().as_raw_descriptors());
1679
}
1680
rds
1681
}
1682
1683
fn preferred_irq(&self) -> PreferredIrq {
1684
// Is INTx configured?
1685
let pin = match self.config.read_config::<u8>(PCI_INTERRUPT_PIN) {
1686
1 => PciInterruptPin::IntA,
1687
2 => PciInterruptPin::IntB,
1688
3 => PciInterruptPin::IntC,
1689
4 => PciInterruptPin::IntD,
1690
_ => return PreferredIrq::None,
1691
};
1692
1693
// TODO: replace sysfs/irq value parsing with vfio interface
1694
// reporting host allocated interrupt number and type.
1695
let path = self.sysfs_path.join("irq");
1696
let gsi = fs::read_to_string(path)
1697
.map(|v| v.trim().parse::<u32>().unwrap_or(0))
1698
.unwrap_or(0);
1699
1700
PreferredIrq::Fixed { pin, gsi }
1701
}
1702
1703
fn assign_irq(&mut self, irq_evt: IrqLevelEvent, pin: PciInterruptPin, irq_num: u32) {
1704
// Keep event/resample event references.
1705
self.interrupt_evt = Some(irq_evt);
1706
1707
// enable INTX
1708
self.enable_intx();
1709
1710
self.config
1711
.write_config(pin.to_mask() as u8, PCI_INTERRUPT_PIN);
1712
self.config.write_config(irq_num as u8, PCI_INTERRUPT_NUM);
1713
}
1714
1715
fn allocate_io_bars(
1716
&mut self,
1717
resources: &mut SystemAllocator,
1718
) -> Result<Vec<BarRange>, PciDeviceError> {
1719
let address = self
1720
.pci_address
1721
.expect("allocate_address must be called prior to allocate_device_bars");
1722
1723
let mut mem_bars = self.collect_bars();
1724
1725
let ranges = if address.bus == 0 {
1726
self.allocate_root_barmem(&mem_bars, resources)?
1727
} else {
1728
self.allocate_nonroot_barmem(&mut mem_bars, resources)?
1729
};
1730
1731
// Quirk, enable igd memory for guest vga arbitrate, otherwise kernel vga arbitrate
1732
// driver doesn't claim this vga device, then xorg couldn't boot up.
1733
if self.is_intel_gfx() {
1734
let mut cmd = self.config.read_config::<u8>(PCI_COMMAND);
1735
cmd |= PCI_COMMAND_MEMORY;
1736
self.config.write_config(cmd, PCI_COMMAND);
1737
}
1738
Ok(ranges)
1739
}
1740
1741
fn allocate_device_bars(
1742
&mut self,
1743
resources: &mut SystemAllocator,
1744
) -> Result<Vec<BarRange>, PciDeviceError> {
1745
let mut ranges: Vec<BarRange> = Vec::new();
1746
1747
if !self.is_intel_gfx() {
1748
return Ok(ranges);
1749
}
1750
1751
// Make intel gfx's opregion as mmio bar, and allocate a gpa for it
1752
// then write this gpa into pci cfg register
1753
if let Some((index, size)) = self.device.get_cap_type_info(
1754
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | (PCI_VENDOR_ID_INTEL as u32),
1755
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
1756
) {
1757
let address = self
1758
.pci_address
1759
.expect("allocate_address must be called prior to allocate_device_bars");
1760
let bar_addr = resources
1761
.allocate_mmio(
1762
size,
1763
Alloc::PciBar {
1764
bus: address.bus,
1765
dev: address.dev,
1766
func: address.func,
1767
bar: (index * 4) as u8,
1768
},
1769
"vfio_bar".to_string(),
1770
AllocOptions::new().max_address(u32::MAX.into()),
1771
)
1772
.map_err(|e| PciDeviceError::IoAllocationFailed(size, e))?;
1773
ranges.push(BarRange {
1774
addr: bar_addr,
1775
size,
1776
prefetchable: false,
1777
});
1778
self.device_data = Some(DeviceData::IntelGfxData {
1779
opregion_index: index,
1780
});
1781
1782
self.mmio_regions.push(
1783
PciBarConfiguration::new(
1784
index as usize,
1785
size,
1786
PciBarRegionType::Memory32BitRegion,
1787
PciBarPrefetchable::NotPrefetchable,
1788
)
1789
.set_address(bar_addr),
1790
);
1791
self.config.write_config(bar_addr as u32, 0xFC);
1792
}
1793
1794
Ok(ranges)
1795
}
1796
1797
fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
1798
for region in self.mmio_regions.iter().chain(self.io_regions.iter()) {
1799
if region.bar_index() == bar_num {
1800
let command: u8 = self.config.read_config(PCI_COMMAND);
1801
if (region.is_memory() && (command & PCI_COMMAND_MEMORY == 0)) || region.is_io() {
1802
return None;
1803
} else {
1804
return Some(*region);
1805
}
1806
}
1807
}
1808
1809
None
1810
}
1811
1812
fn register_device_capabilities(&mut self) -> Result<(), PciDeviceError> {
1813
Ok(())
1814
}
1815
1816
fn read_config_register(&self, reg_idx: usize) -> u32 {
1817
let reg: u32 = (reg_idx * 4) as u32;
1818
let mut config: u32 = self.config.read_config(reg);
1819
1820
// See VfioPciDevice::new for details how extended caps are managed
1821
if reg >= PCI_CONFIG_SPACE_SIZE {
1822
let ext_cap = self.get_ext_cap_by_reg(reg);
1823
if let Some(ext_cap) = ext_cap {
1824
if ext_cap.offset == reg {
1825
config = (config & !(0xffc << 20)) | (((ext_cap.next & 0xffc) as u32) << 20);
1826
}
1827
1828
if ext_cap.is_skipped {
1829
if reg == PCI_CONFIG_SPACE_SIZE {
1830
config = (config & (0xffc << 20)) | (PCI_EXT_CAP_ID_CAC as u32);
1831
} else {
1832
config = 0;
1833
}
1834
}
1835
}
1836
}
1837
1838
// Ignore IO bar
1839
if (0x10..=0x24).contains(&reg) {
1840
let bar_idx = (reg as usize - 0x10) / 4;
1841
if let Some(bar) = self.get_bar_configuration(bar_idx) {
1842
if bar.is_io() {
1843
config = 0;
1844
}
1845
}
1846
} else if let Some(msix_cap) = &self.msix_cap {
1847
let msix_cap = msix_cap.lock();
1848
if msix_cap.is_msix_control_reg(reg, 4) {
1849
msix_cap.read_msix_control(&mut config);
1850
}
1851
} else if let Some(pm_cap) = &self.pm_cap {
1852
let pm_cap = pm_cap.lock();
1853
if pm_cap.is_pm_reg(reg) {
1854
config = pm_cap.read(reg);
1855
}
1856
}
1857
1858
// Quirk for intel graphic, set stolen memory size to 0 in pci_cfg[0x51]
1859
if self.is_intel_gfx() && reg == 0x50 {
1860
config &= 0xffff00ff;
1861
}
1862
1863
config
1864
}
1865
1866
fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
1867
// When guest write config register at the first time, start worker thread
1868
if self.worker_thread.is_none() && self.vm_socket_vm.is_some() {
1869
self.start_work_thread();
1870
};
1871
1872
let start = (reg_idx * 4) as u64 + offset;
1873
1874
if let Some(pm_cap) = self.pm_cap.as_mut() {
1875
let mut pm_cap = pm_cap.lock();
1876
if pm_cap.is_pm_reg(start as u32) {
1877
pm_cap.write(start, data);
1878
}
1879
}
1880
1881
let mut msi_change: Option<VfioMsiChange> = None;
1882
if let Some(msi_cap) = self.msi_cap.as_mut() {
1883
if msi_cap.is_msi_reg(start, data.len()) {
1884
msi_change = msi_cap.write_msi_reg(start, data);
1885
}
1886
}
1887
1888
match msi_change {
1889
Some(VfioMsiChange::Enable) => self.enable_msi(),
1890
Some(VfioMsiChange::Disable) => self.disable_msi(),
1891
_ => (),
1892
}
1893
1894
msi_change = None;
1895
if let Some(msix_cap) = &self.msix_cap {
1896
let mut msix_cap = msix_cap.lock();
1897
if msix_cap.is_msix_control_reg(start as u32, data.len() as u32) {
1898
msi_change = msix_cap.write_msix_control(data);
1899
}
1900
}
1901
1902
match msi_change {
1903
Some(VfioMsiChange::Enable) => self.enable_msix(),
1904
Some(VfioMsiChange::Disable) => self.disable_msix(),
1905
Some(VfioMsiChange::FunctionChanged) => {
1906
if let Err(e) = self.msix_vectors_update() {
1907
error!("update msix vectors failed: {}", e);
1908
}
1909
}
1910
_ => (),
1911
}
1912
1913
if !self.is_skipped_reg(start as u32) {
1914
self.device
1915
.region_write(VFIO_PCI_CONFIG_REGION_INDEX as usize, data, start);
1916
}
1917
1918
// if guest enable memory access, then enable bar mappable once
1919
if start == PCI_COMMAND as u64
1920
&& data.len() == 2
1921
&& data[0] & PCI_COMMAND_MEMORY == PCI_COMMAND_MEMORY
1922
{
1923
self.commit_bars_mmap();
1924
} else if (0x10..=0x24).contains(&start) && data.len() == 4 {
1925
let bar_idx = (start as u32 - 0x10) / 4;
1926
let value: [u8; 4] = [data[0], data[1], data[2], data[3]];
1927
let val = u32::from_le_bytes(value);
1928
let mut modify = false;
1929
for region in self.mmio_regions.iter_mut() {
1930
if region.bar_index() == bar_idx as usize {
1931
let old_addr = region.address();
1932
let new_addr = val & 0xFFFFFFF0;
1933
if !region.is_64bit_memory() && (old_addr as u32) != new_addr {
1934
// Change 32bit bar address
1935
*region = region.set_address(u64::from(new_addr));
1936
modify = true;
1937
} else if region.is_64bit_memory() && (old_addr as u32) != new_addr {
1938
// Change 64bit bar low address
1939
*region =
1940
region.set_address(u64::from(new_addr) | ((old_addr >> 32) << 32));
1941
modify = true;
1942
}
1943
break;
1944
} else if region.is_64bit_memory()
1945
&& ((bar_idx % 2) == 1)
1946
&& (region.bar_index() + 1 == bar_idx as usize)
1947
{
1948
// Change 64bit bar high address
1949
let old_addr = region.address();
1950
if val != (old_addr >> 32) as u32 {
1951
let mut new_addr = (u64::from(val)) << 32;
1952
new_addr |= old_addr & 0xFFFFFFFF;
1953
*region = region.set_address(new_addr);
1954
modify = true;
1955
}
1956
break;
1957
}
1958
}
1959
if modify {
1960
// if bar is changed under memory enabled, mmap the
1961
// new bar immediately.
1962
let cmd = self.config.read_config::<u8>(PCI_COMMAND);
1963
if cmd & PCI_COMMAND_MEMORY == PCI_COMMAND_MEMORY {
1964
self.commit_bars_mmap();
1965
}
1966
}
1967
}
1968
}
1969
1970
fn read_virtual_config_register(&self, reg_idx: usize) -> u32 {
1971
if reg_idx == PCI_VCFG_NOTY {
1972
let mut q = self.acpi_notifier_val.lock();
1973
let mut val = 0;
1974
if !q.is_empty() {
1975
val = q.remove(0);
1976
}
1977
drop(q);
1978
return val;
1979
}
1980
1981
warn!(
1982
"{} read unsupported vcfg register {}",
1983
self.debug_label(),
1984
reg_idx
1985
);
1986
0xFFFF_FFFF
1987
}
1988
1989
fn write_virtual_config_register(&mut self, reg_idx: usize, value: u32) {
1990
match reg_idx {
1991
PCI_VCFG_PM => {
1992
match value {
1993
0 => {
1994
if let Some(pm_evt) =
1995
self.pm_evt.as_ref().map(|evt| evt.try_clone().unwrap())
1996
{
1997
*self.is_in_low_power.lock() = true;
1998
let _ = self.device.pm_low_power_enter_with_wakeup(pm_evt);
1999
} else {
2000
let _ = self.device.pm_low_power_enter();
2001
}
2002
}
2003
_ => {
2004
*self.is_in_low_power.lock() = false;
2005
let _ = self.device.pm_low_power_exit();
2006
}
2007
};
2008
}
2009
PCI_VCFG_DSM => {
2010
if let Some(shm) = &self.vcfg_shm_mmap {
2011
let mut args = [0u8; 4096];
2012
if let Err(e) = shm.read_slice(&mut args, 0) {
2013
error!("failed to read DSM Args: {}", e);
2014
return;
2015
}
2016
let res = match self.device.acpi_dsm(&args) {
2017
Ok(r) => r,
2018
Err(e) => {
2019
error!("failed to call DSM: {}", e);
2020
return;
2021
}
2022
};
2023
if let Err(e) = shm.write_slice(&res, 0) {
2024
error!("failed to write DSM result: {}", e);
2025
return;
2026
}
2027
if let Err(e) = shm.msync() {
2028
error!("failed to msync: {}", e)
2029
}
2030
}
2031
}
2032
_ => warn!(
2033
"{} write unsupported vcfg register {}",
2034
self.debug_label(),
2035
reg_idx
2036
),
2037
};
2038
}
2039
2040
fn read_bar(&mut self, bar_index: PciBarIndex, offset: u64, data: &mut [u8]) {
2041
if let Some(msix_cap) = &self.msix_cap {
2042
let msix_cap = msix_cap.lock();
2043
if msix_cap.is_msix_table(bar_index, offset) {
2044
msix_cap.read_table(offset, data);
2045
return;
2046
} else if msix_cap.is_msix_pba(bar_index, offset) {
2047
msix_cap.read_pba(offset, data);
2048
return;
2049
}
2050
}
2051
self.device.region_read(bar_index, data, offset);
2052
}
2053
2054
fn write_bar(&mut self, bar_index: PciBarIndex, offset: u64, data: &[u8]) {
2055
// Ignore igd opregion's write
2056
if let Some(device_data) = &self.device_data {
2057
match *device_data {
2058
DeviceData::IntelGfxData { opregion_index } => {
2059
if opregion_index == bar_index as u32 {
2060
return;
2061
}
2062
}
2063
}
2064
}
2065
2066
if let Some(msix_cap) = &self.msix_cap {
2067
let mut msix_cap = msix_cap.lock();
2068
if msix_cap.is_msix_table(bar_index, offset) {
2069
let behavior = msix_cap.write_table(offset, data);
2070
if let MsixStatus::EntryChanged(index) = behavior {
2071
let irqfd = msix_cap.get_msix_irqfd(index);
2072
self.msix_vector_update(index, irqfd);
2073
}
2074
return;
2075
} else if msix_cap.is_msix_pba(bar_index, offset) {
2076
msix_cap.write_pba(offset, data);
2077
return;
2078
}
2079
}
2080
2081
self.device.region_write(bar_index, data, offset);
2082
}
2083
2084
fn destroy_device(&mut self) {
2085
self.close();
2086
}
2087
2088
fn generate_acpi_methods(&mut self) -> (Vec<u8>, Option<(u32, MemoryMapping)>) {
2089
let mut amls = Vec::new();
2090
let mut shm = None;
2091
if let Some(pci_address) = self.pci_address {
2092
let vcfg_offset = pci_address.to_config_address(0, 13);
2093
if let Ok(vcfg_register) = DeviceVcfgRegister::new(vcfg_offset) {
2094
vcfg_register.to_aml_bytes(&mut amls);
2095
shm = vcfg_register
2096
.create_shm_mmap()
2097
.map(|shm| (vcfg_offset + SHM_OFFSET, shm));
2098
self.vcfg_shm_mmap = vcfg_register.create_shm_mmap();
2099
// All vfio-pci devices should have virtual _PRx method, otherwise
2100
// host couldn't know whether device has enter into suspend state,
2101
// host would always think it is in active state, so its parent PCIe
2102
// switch couldn't enter into suspend state.
2103
PowerResourceMethod {}.to_aml_bytes(&mut amls);
2104
// TODO: WIP: Ideally, we should generate DSM only if the physical
2105
// device has a _DSM; however, such information is not provided by
2106
// Linux. As a temporary workaround, we chech whether there is an
2107
// associated ACPI companion device node and skip generating guest
2108
// _DSM if there is none.
2109
let acpi_path = self.sysfs_path.join("firmware_node/path");
2110
if acpi_path.exists() {
2111
DsmMethod {}.to_aml_bytes(&mut amls);
2112
}
2113
}
2114
}
2115
2116
(amls, shm)
2117
}
2118
2119
fn set_gpe(&mut self, resources: &mut SystemAllocator) -> Option<u32> {
2120
if let Some(gpe_nr) = resources.allocate_gpe() {
2121
base::debug!("set_gpe: gpe-nr {} addr {:?}", gpe_nr, self.pci_address);
2122
self.gpe = Some(gpe_nr);
2123
}
2124
self.gpe
2125
}
2126
}
2127
2128
impl Suspendable for VfioPciDevice {
2129
fn sleep(&mut self) -> anyhow::Result<()> {
2130
if let Some(worker_thread) = self.worker_thread.take() {
2131
let res = worker_thread.stop();
2132
self.pci_address = Some(res.address);
2133
self.sysfs_path = res.sysfs_path;
2134
self.pm_cap = res.pm_cap;
2135
self.msix_cap = res.msix_cap;
2136
self.vm_socket_vm = Some(res.vm_socket);
2137
}
2138
Ok(())
2139
}
2140
2141
fn wake(&mut self) -> anyhow::Result<()> {
2142
if self.activated {
2143
self.start_work_thread();
2144
}
2145
Ok(())
2146
}
2147
}
2148
2149
#[cfg(test)]
2150
mod tests {
2151
use resources::AddressRange;
2152
2153
use super::VfioResourceAllocator;
2154
2155
#[test]
2156
fn no_overlap() {
2157
// regions [32, 95]
2158
let mut memory =
2159
VfioResourceAllocator::new(AddressRange::from_start_and_end(32, 95)).unwrap();
2160
memory
2161
.allocate_at_can_overlap(AddressRange::from_start_and_end(0, 15))
2162
.unwrap();
2163
memory
2164
.allocate_at_can_overlap(AddressRange::from_start_and_end(100, 115))
2165
.unwrap();
2166
2167
let mut iter = memory.regions.iter();
2168
assert_eq!(iter.next(), Some(&AddressRange::from_start_and_end(32, 95)));
2169
}
2170
2171
#[test]
2172
fn complete_overlap() {
2173
// regions [32, 95]
2174
let mut memory =
2175
VfioResourceAllocator::new(AddressRange::from_start_and_end(32, 95)).unwrap();
2176
// regions [32, 47], [64, 95]
2177
memory
2178
.allocate_at_can_overlap(AddressRange::from_start_and_end(48, 63))
2179
.unwrap();
2180
// regions [64, 95]
2181
memory
2182
.allocate_at_can_overlap(AddressRange::from_start_and_end(32, 47))
2183
.unwrap();
2184
2185
let mut iter = memory.regions.iter();
2186
assert_eq!(iter.next(), Some(&AddressRange::from_start_and_end(64, 95)));
2187
}
2188
2189
#[test]
2190
fn partial_overlap_one() {
2191
// regions [32, 95]
2192
let mut memory =
2193
VfioResourceAllocator::new(AddressRange::from_start_and_end(32, 95)).unwrap();
2194
// regions [32, 47], [64, 95]
2195
memory
2196
.allocate_at_can_overlap(AddressRange::from_start_and_end(48, 63))
2197
.unwrap();
2198
// regions [32, 39], [64, 95]
2199
memory
2200
.allocate_at_can_overlap(AddressRange::from_start_and_end(40, 55))
2201
.unwrap();
2202
2203
let mut iter = memory.regions.iter();
2204
assert_eq!(iter.next(), Some(&AddressRange::from_start_and_end(32, 39)));
2205
assert_eq!(iter.next(), Some(&AddressRange::from_start_and_end(64, 95)));
2206
}
2207
2208
#[test]
2209
fn partial_overlap_two() {
2210
// regions [32, 95]
2211
let mut memory =
2212
VfioResourceAllocator::new(AddressRange::from_start_and_end(32, 95)).unwrap();
2213
// regions [32, 47], [64, 95]
2214
memory
2215
.allocate_at_can_overlap(AddressRange::from_start_and_end(48, 63))
2216
.unwrap();
2217
// regions [32, 39], [72, 95]
2218
memory
2219
.allocate_at_can_overlap(AddressRange::from_start_and_end(40, 71))
2220
.unwrap();
2221
2222
let mut iter = memory.regions.iter();
2223
assert_eq!(iter.next(), Some(&AddressRange::from_start_and_end(32, 39)));
2224
assert_eq!(iter.next(), Some(&AddressRange::from_start_and_end(72, 95)));
2225
}
2226
2227
#[test]
2228
fn partial_overlap_three() {
2229
// regions [32, 95]
2230
let mut memory =
2231
VfioResourceAllocator::new(AddressRange::from_start_and_end(32, 95)).unwrap();
2232
// regions [32, 39], [48, 95]
2233
memory
2234
.allocate_at_can_overlap(AddressRange::from_start_and_end(40, 47))
2235
.unwrap();
2236
// regions [32, 39], [48, 63], [72, 95]
2237
memory
2238
.allocate_at_can_overlap(AddressRange::from_start_and_end(64, 71))
2239
.unwrap();
2240
// regions [32, 35], [76, 95]
2241
memory
2242
.allocate_at_can_overlap(AddressRange::from_start_and_end(36, 75))
2243
.unwrap();
2244
2245
let mut iter = memory.regions.iter();
2246
assert_eq!(iter.next(), Some(&AddressRange::from_start_and_end(32, 35)));
2247
assert_eq!(iter.next(), Some(&AddressRange::from_start_and_end(76, 95)));
2248
}
2249
}
2250
2251