Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/devices/src/vfio.rs
5392 views
1
// Copyright 2019 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
use std::collections::HashMap;
6
use std::ffi::CString;
7
use std::fs::File;
8
use std::fs::OpenOptions;
9
use std::io;
10
use std::mem;
11
use std::os::raw::c_ulong;
12
use std::os::unix::prelude::FileExt;
13
use std::path::Path;
14
use std::path::PathBuf;
15
#[cfg(all(target_os = "android", target_arch = "aarch64"))]
16
use std::ptr::addr_of_mut;
17
use std::result;
18
use std::slice;
19
use std::sync::Arc;
20
use std::sync::OnceLock;
21
22
use base::error;
23
use base::ioctl;
24
use base::ioctl_with_mut_ptr;
25
use base::ioctl_with_mut_ref;
26
use base::ioctl_with_ptr;
27
use base::ioctl_with_ref;
28
use base::ioctl_with_val;
29
use base::warn;
30
use base::AsRawDescriptor;
31
use base::Error;
32
use base::Event;
33
use base::FromRawDescriptor;
34
use base::RawDescriptor;
35
use base::SafeDescriptor;
36
use cfg_if::cfg_if;
37
use data_model::vec_with_array_field;
38
use hypervisor::DeviceKind;
39
use hypervisor::Vm;
40
use rand::seq::index::sample;
41
use remain::sorted;
42
use resources::address_allocator::AddressAllocator;
43
use resources::AddressRange;
44
use resources::Alloc;
45
use resources::Error as ResourcesError;
46
use sync::Mutex;
47
use thiserror::Error;
48
use vfio_sys::vfio::vfio_acpi_dsm;
49
use vfio_sys::vfio::VFIO_IRQ_SET_DATA_BOOL;
50
use vfio_sys::*;
51
use zerocopy::FromBytes;
52
use zerocopy::Immutable;
53
use zerocopy::IntoBytes;
54
55
use crate::IommuDevType;
56
57
#[sorted]
58
#[derive(Error, Debug)]
59
pub enum VfioError {
60
#[error("failed to duplicate VfioContainer")]
61
ContainerDupError,
62
#[error("failed to set container's IOMMU driver type as {0:?}: {1}")]
63
ContainerSetIOMMU(IommuType, Error),
64
#[error("failed to create KVM vfio device")]
65
CreateVfioKvmDevice,
66
#[error("failed to get Group Status: {0}")]
67
GetGroupStatus(Error),
68
#[error("failed to get vfio device fd: {0}")]
69
GroupGetDeviceFD(Error),
70
#[error("failed to add vfio group into vfio container: {0}")]
71
GroupSetContainer(Error),
72
#[error("group is inviable")]
73
GroupViable,
74
#[error("invalid region index: {0}")]
75
InvalidIndex(usize),
76
#[error("invalid operation")]
77
InvalidOperation,
78
#[error("invalid file path")]
79
InvalidPath,
80
#[error("failed to add guest memory map into iommu table: {0}")]
81
IommuDmaMap(Error),
82
#[error("failed to remove guest memory map from iommu table: {0}")]
83
IommuDmaUnmap(Error),
84
#[error("failed to get IOMMU cap info from host")]
85
IommuGetCapInfo,
86
#[error("failed to get IOMMU info from host: {0}")]
87
IommuGetInfo(Error),
88
#[error("failed to attach device to pKVM pvIOMMU: {0}")]
89
KvmPviommuSetConfig(Error),
90
#[error("failed to set KVM vfio device's attribute: {0}")]
91
KvmSetDeviceAttr(Error),
92
#[error("AddressAllocator is unavailable")]
93
NoRescAlloc,
94
#[error("failed to open /dev/vfio/vfio container: {0}")]
95
OpenContainer(io::Error),
96
#[error("failed to open {1} group: {0}")]
97
OpenGroup(io::Error, String),
98
#[error("failed to read {1} link: {0}")]
99
ReadLink(io::Error, PathBuf),
100
#[error("resources error: {0}")]
101
Resources(ResourcesError),
102
#[error("unknown vfio device type (flags: {0:#x})")]
103
UnknownDeviceType(u32),
104
#[error("failed to call vfio device's ACPI _DSM: {0}")]
105
VfioAcpiDsm(Error),
106
#[error("failed to disable vfio device's acpi notification: {0}")]
107
VfioAcpiNotificationDisable(Error),
108
#[error("failed to enable vfio device's acpi notification: {0}")]
109
VfioAcpiNotificationEnable(Error),
110
#[error("failed to test vfio device's acpi notification: {0}")]
111
VfioAcpiNotificationTest(Error),
112
#[error(
113
"vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/src/vfio.rs"
114
)]
115
VfioApiVersion,
116
#[error("failed to get vfio device's info or info doesn't match: {0}")]
117
VfioDeviceGetInfo(Error),
118
#[error("failed to get vfio device's region info: {0}")]
119
VfioDeviceGetRegionInfo(Error),
120
#[error("container doesn't support IOMMU driver type {0:?}")]
121
VfioIommuSupport(IommuType),
122
#[error("failed to disable vfio device's irq: {0}")]
123
VfioIrqDisable(Error),
124
#[error("failed to enable vfio device's irq: {0}")]
125
VfioIrqEnable(Error),
126
#[error("failed to mask vfio device's irq: {0}")]
127
VfioIrqMask(Error),
128
#[error("failed to unmask vfio device's irq: {0}")]
129
VfioIrqUnmask(Error),
130
#[error("failed to enter vfio device's low power state: {0}")]
131
VfioPmLowPowerEnter(Error),
132
#[error("failed to exit vfio device's low power state: {0}")]
133
VfioPmLowPowerExit(Error),
134
#[error("failed to probe support for VFIO low power state entry: {0}")]
135
VfioProbePmLowPowerEntry(Error),
136
#[error("failed to probe support for VFIO low power state exit: {0}")]
137
VfioProbePmLowPowerExit(Error),
138
}
139
140
type Result<T> = std::result::Result<T, VfioError>;
141
142
fn get_error() -> Error {
143
Error::last()
144
}
145
146
static KVM_VFIO_FILE: OnceLock<Option<SafeDescriptor>> = OnceLock::new();
147
148
fn create_kvm_vfio_file(vm: &impl Vm) -> Option<&'static SafeDescriptor> {
149
KVM_VFIO_FILE
150
.get_or_init(|| vm.create_device(DeviceKind::Vfio).ok())
151
.as_ref()
152
}
153
154
fn kvm_vfio_file() -> Option<&'static SafeDescriptor> {
155
match KVM_VFIO_FILE.get() {
156
Some(Some(v)) => Some(v),
157
_ => None,
158
}
159
}
160
161
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
162
pub enum VfioDeviceType {
163
Pci,
164
Platform,
165
}
166
167
enum KvmVfioGroupOps {
168
Add,
169
Delete,
170
}
171
172
#[derive(Debug)]
173
pub struct KvmVfioPviommu {
174
file: File,
175
}
176
177
impl KvmVfioPviommu {
178
pub fn new(vm: &impl Vm) -> Result<Self> {
179
cfg_if! {
180
if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
181
let file = Self::ioctl_kvm_dev_vfio_pviommu_attach(vm)?;
182
183
Ok(Self { file })
184
} else {
185
let _ = vm;
186
unimplemented!()
187
}
188
}
189
}
190
191
pub fn attach<T: AsRawDescriptor>(&self, device: &T, sid_idx: u32, vsid: u32) -> Result<()> {
192
cfg_if! {
193
if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
194
self.ioctl_kvm_pviommu_set_config(device, sid_idx, vsid)
195
} else {
196
let _ = device;
197
let _ = sid_idx;
198
let _ = vsid;
199
unimplemented!()
200
}
201
}
202
}
203
204
pub fn id(&self) -> u32 {
205
let fd = self.as_raw_descriptor();
206
// Guests identify pvIOMMUs to the hypervisor using the corresponding VMM FDs.
207
fd.try_into().unwrap()
208
}
209
210
pub fn get_sid_count<T: AsRawDescriptor>(vm: &impl Vm, device: &T) -> Result<u32> {
211
cfg_if! {
212
if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
213
let info = Self::ioctl_kvm_dev_vfio_pviommu_get_info(vm, device)?;
214
215
Ok(info.nr_sids)
216
} else {
217
let _ = vm;
218
let _ = device;
219
unimplemented!()
220
}
221
}
222
}
223
224
#[cfg(all(target_os = "android", target_arch = "aarch64"))]
225
fn ioctl_kvm_dev_vfio_pviommu_attach(vm: &impl Vm) -> Result<File> {
226
let kvm_vfio_file = create_kvm_vfio_file(vm).ok_or(VfioError::CreateVfioKvmDevice)?;
227
228
let vfio_dev_attr = kvm_sys::kvm_device_attr {
229
flags: 0,
230
group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
231
attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_ATTACH as u64,
232
addr: 0,
233
};
234
235
// SAFETY:
236
// Safe as we are the owner of vfio_dev_attr, which is valid.
237
let ret =
238
unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
239
240
if ret < 0 {
241
Err(VfioError::KvmSetDeviceAttr(get_error()))
242
} else {
243
// SAFETY: Safe as we verify the return value.
244
Ok(unsafe { File::from_raw_descriptor(ret) })
245
}
246
}
247
248
#[cfg(all(target_os = "android", target_arch = "aarch64"))]
249
fn ioctl_kvm_pviommu_set_config<T: AsRawDescriptor>(
250
&self,
251
device: &T,
252
sid_idx: u32,
253
vsid: u32,
254
) -> Result<()> {
255
let config = kvm_sys::kvm_vfio_iommu_config {
256
size: mem::size_of::<kvm_sys::kvm_vfio_iommu_config>() as u32,
257
device_fd: device.as_raw_descriptor(),
258
sid_idx,
259
vsid,
260
__reserved: 0,
261
};
262
263
// SAFETY:
264
// Safe as we are the owner of device and config which are valid, and we verify the return
265
// value.
266
let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_PVIOMMU_SET_CONFIG, &config) };
267
268
if ret < 0 {
269
Err(VfioError::KvmPviommuSetConfig(get_error()))
270
} else {
271
Ok(())
272
}
273
}
274
275
#[cfg(all(target_os = "android", target_arch = "aarch64"))]
276
fn ioctl_kvm_dev_vfio_pviommu_get_info<T: AsRawDescriptor>(
277
vm: &impl Vm,
278
device: &T,
279
) -> Result<kvm_sys::kvm_vfio_iommu_info> {
280
let kvm_vfio_file = create_kvm_vfio_file(vm).ok_or(VfioError::CreateVfioKvmDevice)?;
281
282
let mut info = kvm_sys::kvm_vfio_iommu_info {
283
size: mem::size_of::<kvm_sys::kvm_vfio_iommu_info>() as u32,
284
device_fd: device.as_raw_descriptor(),
285
nr_sids: 0,
286
__reserved: 0,
287
};
288
289
let vfio_dev_attr = kvm_sys::kvm_device_attr {
290
flags: 0,
291
group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
292
attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_GET_INFO as u64,
293
addr: addr_of_mut!(info) as usize as u64,
294
};
295
296
// SAFETY:
297
// Safe as we are the owner of vfio_dev_attr, which is valid.
298
let ret =
299
unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
300
301
if ret < 0 {
302
Err(VfioError::KvmSetDeviceAttr(get_error()))
303
} else {
304
Ok(info)
305
}
306
}
307
}
308
309
impl AsRawDescriptor for KvmVfioPviommu {
310
fn as_raw_descriptor(&self) -> RawDescriptor {
311
self.file.as_raw_descriptor()
312
}
313
}
314
315
#[repr(u32)]
316
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
317
pub enum IommuType {
318
Type1V2 = VFIO_TYPE1v2_IOMMU,
319
PkvmPviommu = VFIO_PKVM_PVIOMMU,
320
// ChromeOS specific vfio_iommu_type1 implementation that is optimized for
321
// small, dynamic mappings. For clients which create large, relatively
322
// static mappings, Type1V2 is still preferred.
323
//
324
// See crrev.com/c/3593528 for the implementation.
325
Type1ChromeOS = 100001,
326
}
327
328
/// VfioContainer contain multi VfioGroup, and delegate an IOMMU domain table
329
pub struct VfioContainer {
330
container: File,
331
groups: HashMap<u32, Arc<Mutex<VfioGroup>>>,
332
iommu_type: Option<IommuType>,
333
}
334
335
fn extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> Option<T>
336
where
337
T: FromBytes,
338
{
339
Some(T::read_from_prefix(bytes.get(offset..)?).ok()?.0)
340
}
341
342
const VFIO_API_VERSION: u8 = 0;
343
impl VfioContainer {
344
pub fn new() -> Result<Self> {
345
let container = OpenOptions::new()
346
.read(true)
347
.write(true)
348
.open("/dev/vfio/vfio")
349
.map_err(VfioError::OpenContainer)?;
350
351
Self::new_from_container(container)
352
}
353
354
// Construct a VfioContainer from an exist container file.
355
pub fn new_from_container(container: File) -> Result<Self> {
356
// SAFETY:
357
// Safe as file is vfio container descriptor and ioctl is defined by kernel.
358
let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION) };
359
if version as u8 != VFIO_API_VERSION {
360
return Err(VfioError::VfioApiVersion);
361
}
362
363
Ok(VfioContainer {
364
container,
365
groups: HashMap::new(),
366
iommu_type: None,
367
})
368
}
369
370
fn is_group_set(&self, group_id: u32) -> bool {
371
self.groups.contains_key(&group_id)
372
}
373
374
fn check_extension(&self, val: IommuType) -> bool {
375
// SAFETY:
376
// Safe as file is vfio container and make sure val is valid.
377
let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION, val as c_ulong) };
378
ret != 0
379
}
380
381
fn set_iommu(&mut self, val: IommuType) -> i32 {
382
// SAFETY:
383
// Safe as file is vfio container and make sure val is valid.
384
unsafe { ioctl_with_val(self, VFIO_SET_IOMMU, val as c_ulong) }
385
}
386
387
fn set_iommu_checked(&mut self, val: IommuType) -> Result<()> {
388
if !self.check_extension(val) {
389
Err(VfioError::VfioIommuSupport(val))
390
} else if self.set_iommu(val) != 0 {
391
Err(VfioError::ContainerSetIOMMU(val, get_error()))
392
} else {
393
self.iommu_type = Some(val);
394
Ok(())
395
}
396
}
397
398
/// # Safety
399
///
400
/// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
401
pub unsafe fn vfio_dma_map(
402
&self,
403
iova: u64,
404
size: u64,
405
user_addr: u64,
406
write_en: bool,
407
) -> Result<()> {
408
match self
409
.iommu_type
410
.expect("vfio_dma_map called before configuring IOMMU")
411
{
412
IommuType::Type1V2 | IommuType::Type1ChromeOS => {
413
self.vfio_iommu_type1_dma_map(iova, size, user_addr, write_en)
414
}
415
IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
416
}
417
}
418
419
/// # Safety
420
///
421
/// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
422
unsafe fn vfio_iommu_type1_dma_map(
423
&self,
424
iova: u64,
425
size: u64,
426
user_addr: u64,
427
write_en: bool,
428
) -> Result<()> {
429
let mut dma_map = vfio_iommu_type1_dma_map {
430
argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
431
flags: VFIO_DMA_MAP_FLAG_READ,
432
vaddr: user_addr,
433
iova,
434
size,
435
};
436
437
if write_en {
438
dma_map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
439
}
440
441
let ret = ioctl_with_ref(self, VFIO_IOMMU_MAP_DMA, &dma_map);
442
if ret != 0 {
443
return Err(VfioError::IommuDmaMap(get_error()));
444
}
445
446
Ok(())
447
}
448
449
pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
450
match self
451
.iommu_type
452
.expect("vfio_dma_unmap called before configuring IOMMU")
453
{
454
IommuType::Type1V2 | IommuType::Type1ChromeOS => {
455
self.vfio_iommu_type1_dma_unmap(iova, size)
456
}
457
IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
458
}
459
}
460
461
fn vfio_iommu_type1_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
462
let mut dma_unmap = vfio_iommu_type1_dma_unmap {
463
argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
464
flags: 0,
465
iova,
466
size,
467
..Default::default()
468
};
469
470
// SAFETY:
471
// Safe as file is vfio container, dma_unmap is constructed by us, and
472
// we check the return value
473
let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_UNMAP_DMA, &mut dma_unmap) };
474
if ret != 0 || dma_unmap.size != size {
475
return Err(VfioError::IommuDmaUnmap(get_error()));
476
}
477
478
Ok(())
479
}
480
481
pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
482
match self
483
.iommu_type
484
.expect("vfio_get_iommu_page_size_mask called before configuring IOMMU")
485
{
486
IommuType::Type1V2 | IommuType::Type1ChromeOS => {
487
self.vfio_iommu_type1_get_iommu_page_size_mask()
488
}
489
IommuType::PkvmPviommu => Ok(0),
490
}
491
}
492
493
fn vfio_iommu_type1_get_iommu_page_size_mask(&self) -> Result<u64> {
494
let mut iommu_info = vfio_iommu_type1_info {
495
argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
496
flags: 0,
497
iova_pgsizes: 0,
498
..Default::default()
499
};
500
501
// SAFETY:
502
// Safe as file is vfio container, iommu_info has valid values,
503
// and we check the return value
504
let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info) };
505
if ret != 0 || (iommu_info.flags & VFIO_IOMMU_INFO_PGSIZES) == 0 {
506
return Err(VfioError::IommuGetInfo(get_error()));
507
}
508
509
Ok(iommu_info.iova_pgsizes)
510
}
511
512
pub fn vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
513
match self
514
.iommu_type
515
.expect("vfio_iommu_iova_get_iova_ranges called before configuring IOMMU")
516
{
517
IommuType::Type1V2 | IommuType::Type1ChromeOS => {
518
self.vfio_iommu_type1_get_iova_ranges()
519
}
520
IommuType::PkvmPviommu => Ok(Vec::new()),
521
}
522
}
523
524
fn vfio_iommu_type1_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
525
// Query the buffer size needed fetch the capabilities.
526
let mut iommu_info_argsz = vfio_iommu_type1_info {
527
argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
528
flags: 0,
529
iova_pgsizes: 0,
530
..Default::default()
531
};
532
533
// SAFETY:
534
// Safe as file is vfio container, iommu_info_argsz has valid values,
535
// and we check the return value
536
let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info_argsz) };
537
if ret != 0 {
538
return Err(VfioError::IommuGetInfo(get_error()));
539
}
540
541
if (iommu_info_argsz.flags & VFIO_IOMMU_INFO_CAPS) == 0 {
542
return Err(VfioError::IommuGetCapInfo);
543
}
544
545
let mut iommu_info = vec_with_array_field::<vfio_iommu_type1_info, u8>(
546
iommu_info_argsz.argsz as usize - mem::size_of::<vfio_iommu_type1_info>(),
547
);
548
iommu_info[0].argsz = iommu_info_argsz.argsz;
549
let ret =
550
// SAFETY:
551
// Safe as file is vfio container, iommu_info has valid values,
552
// and we check the return value
553
unsafe { ioctl_with_mut_ptr(self, VFIO_IOMMU_GET_INFO, iommu_info.as_mut_ptr()) };
554
if ret != 0 {
555
return Err(VfioError::IommuGetInfo(get_error()));
556
}
557
558
// SAFETY:
559
// Safe because we initialized iommu_info with enough space, u8 has less strict
560
// alignment, and since it will no longer be mutated.
561
let info_bytes = unsafe {
562
std::slice::from_raw_parts(
563
iommu_info.as_ptr() as *const u8,
564
iommu_info_argsz.argsz as usize,
565
)
566
};
567
568
if (iommu_info[0].flags & VFIO_IOMMU_INFO_CAPS) == 0 {
569
return Err(VfioError::IommuGetCapInfo);
570
}
571
572
let mut offset = iommu_info[0].cap_offset as usize;
573
while offset != 0 {
574
let header = extract_vfio_struct::<vfio_info_cap_header>(info_bytes, offset)
575
.ok_or(VfioError::IommuGetCapInfo)?;
576
577
if header.id == VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE as u16 && header.version == 1 {
578
let iova_header =
579
extract_vfio_struct::<vfio_iommu_type1_info_cap_iova_range_header>(
580
info_bytes, offset,
581
)
582
.ok_or(VfioError::IommuGetCapInfo)?;
583
let range_offset = offset + mem::size_of::<vfio_iommu_type1_info_cap_iova_range>();
584
let mut ret = Vec::new();
585
for i in 0..iova_header.nr_iovas {
586
ret.push(
587
extract_vfio_struct::<vfio_iova_range>(
588
info_bytes,
589
range_offset + i as usize * mem::size_of::<vfio_iova_range>(),
590
)
591
.ok_or(VfioError::IommuGetCapInfo)?,
592
);
593
}
594
return Ok(ret
595
.iter()
596
.map(|range| AddressRange {
597
start: range.start,
598
end: range.end,
599
})
600
.collect());
601
}
602
offset = header.next as usize;
603
}
604
605
Err(VfioError::IommuGetCapInfo)
606
}
607
608
fn set_iommu_from(&mut self, iommu_dev: IommuDevType) -> Result<()> {
609
match iommu_dev {
610
IommuDevType::CoIommu | IommuDevType::VirtioIommu => {
611
// If we expect granular, dynamic mappings, try the ChromeOS Type1ChromeOS first,
612
// then fall back to upstream versions.
613
self.set_iommu_checked(IommuType::Type1ChromeOS)
614
.or_else(|_| self.set_iommu_checked(IommuType::Type1V2))
615
}
616
IommuDevType::NoIommu => self.set_iommu_checked(IommuType::Type1V2),
617
IommuDevType::PkvmPviommu => self.set_iommu_checked(IommuType::PkvmPviommu),
618
}
619
}
620
621
fn get_group_with_vm(
622
&mut self,
623
id: u32,
624
vm: &impl Vm,
625
iommu_dev: IommuDevType,
626
) -> Result<Arc<Mutex<VfioGroup>>> {
627
if let Some(group) = self.groups.get(&id) {
628
return Ok(group.clone());
629
}
630
631
let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
632
if self.groups.is_empty() {
633
self.set_iommu_from(iommu_dev)?;
634
// Before the first group is added into container, do once per container
635
// initialization. Both coiommu and virtio-iommu rely on small, dynamic
636
// mappings. However, if an iommu is not enabled, then we map the entirety
637
// of guest memory as a small number of large, static mappings.
638
match iommu_dev {
639
IommuDevType::CoIommu | IommuDevType::PkvmPviommu | IommuDevType::VirtioIommu => {}
640
IommuDevType::NoIommu => {
641
for region in vm.get_memory().regions() {
642
// SAFETY:
643
// Safe because the guest regions are guaranteed not to overlap
644
unsafe {
645
self.vfio_dma_map(
646
region.guest_addr.0,
647
region.size as u64,
648
region.host_addr as u64,
649
true,
650
)
651
}?;
652
}
653
}
654
}
655
}
656
657
let kvm_vfio_file = create_kvm_vfio_file(vm).ok_or(VfioError::CreateVfioKvmDevice)?;
658
group
659
.lock()
660
.kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Add)?;
661
662
self.groups.insert(id, group.clone());
663
664
Ok(group)
665
}
666
667
fn get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>> {
668
if let Some(group) = self.groups.get(&id) {
669
return Ok(group.clone());
670
}
671
672
let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
673
674
if self.groups.is_empty() {
675
// Before the first group is added into container, do once per
676
// container initialization.
677
self.set_iommu_checked(IommuType::Type1V2)?;
678
}
679
680
self.groups.insert(id, group.clone());
681
Ok(group)
682
}
683
684
fn remove_group(&mut self, id: u32, reduce: bool) {
685
let mut remove = false;
686
687
if let Some(group) = self.groups.get(&id) {
688
if reduce {
689
group.lock().reduce_device_num();
690
}
691
if group.lock().device_num() == 0 {
692
let kvm_vfio_file = kvm_vfio_file().expect("kvm vfio file isn't created");
693
if group
694
.lock()
695
.kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Delete)
696
.is_err()
697
{
698
warn!("failing in remove vfio group from kvm device");
699
}
700
remove = true;
701
}
702
}
703
704
if remove {
705
self.groups.remove(&id);
706
}
707
}
708
709
pub fn clone_as_raw_descriptor(&self) -> Result<RawDescriptor> {
710
// SAFETY: this call is safe because it doesn't modify any memory and we
711
// check the return value.
712
let raw_descriptor = unsafe { libc::dup(self.container.as_raw_descriptor()) };
713
if raw_descriptor < 0 {
714
Err(VfioError::ContainerDupError)
715
} else {
716
Ok(raw_descriptor)
717
}
718
}
719
720
// Gets group ids for all groups in the container.
721
pub fn group_ids(&self) -> Vec<&u32> {
722
self.groups.keys().collect()
723
}
724
}
725
726
impl AsRawDescriptor for VfioContainer {
727
fn as_raw_descriptor(&self) -> RawDescriptor {
728
self.container.as_raw_descriptor()
729
}
730
}
731
732
struct VfioGroup {
733
group: File,
734
device_num: u32,
735
}
736
737
impl VfioGroup {
738
fn new(container: &VfioContainer, id: u32) -> Result<Self> {
739
let group_path = format!("/dev/vfio/{id}");
740
let group_file = OpenOptions::new()
741
.read(true)
742
.write(true)
743
.open(Path::new(&group_path))
744
.map_err(|e| VfioError::OpenGroup(e, group_path))?;
745
746
let mut group_status = vfio_group_status {
747
argsz: mem::size_of::<vfio_group_status>() as u32,
748
flags: 0,
749
};
750
let mut ret =
751
// SAFETY:
752
// Safe as we are the owner of group_file and group_status which are valid value.
753
unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS, &mut group_status) };
754
if ret < 0 {
755
return Err(VfioError::GetGroupStatus(get_error()));
756
}
757
758
if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
759
return Err(VfioError::GroupViable);
760
}
761
762
let container_raw_descriptor = container.as_raw_descriptor();
763
// SAFETY:
764
// Safe as we are the owner of group_file and container_raw_descriptor which are valid
765
// value, and we verify the ret value
766
ret = unsafe {
767
ioctl_with_ref(
768
&group_file,
769
VFIO_GROUP_SET_CONTAINER,
770
&container_raw_descriptor,
771
)
772
};
773
if ret < 0 {
774
return Err(VfioError::GroupSetContainer(get_error()));
775
}
776
777
Ok(VfioGroup {
778
group: group_file,
779
device_num: 0,
780
})
781
}
782
783
fn get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32> {
784
let mut uuid_path = PathBuf::new();
785
uuid_path.push(sysfspath);
786
uuid_path.push("iommu_group");
787
let group_path = uuid_path
788
.read_link()
789
.map_err(|e| VfioError::ReadLink(e, uuid_path))?;
790
let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
791
let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
792
let group_id = group_str
793
.parse::<u32>()
794
.map_err(|_| VfioError::InvalidPath)?;
795
796
Ok(group_id)
797
}
798
799
fn kvm_device_set_group(
800
&self,
801
kvm_vfio_file: &SafeDescriptor,
802
ops: KvmVfioGroupOps,
803
) -> Result<()> {
804
let group_descriptor = self.as_raw_descriptor();
805
let group_descriptor_ptr = &group_descriptor as *const i32;
806
let vfio_dev_attr = match ops {
807
KvmVfioGroupOps::Add => kvm_sys::kvm_device_attr {
808
flags: 0,
809
group: kvm_sys::KVM_DEV_VFIO_GROUP,
810
attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
811
addr: group_descriptor_ptr as u64,
812
},
813
KvmVfioGroupOps::Delete => kvm_sys::kvm_device_attr {
814
flags: 0,
815
group: kvm_sys::KVM_DEV_VFIO_GROUP,
816
attr: kvm_sys::KVM_DEV_VFIO_GROUP_DEL as u64,
817
addr: group_descriptor_ptr as u64,
818
},
819
};
820
821
// SAFETY:
822
// Safe as we are the owner of vfio_dev_descriptor and vfio_dev_attr which are valid value,
823
// and we verify the return value.
824
if 0 != unsafe {
825
ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr)
826
} {
827
return Err(VfioError::KvmSetDeviceAttr(get_error()));
828
}
829
830
Ok(())
831
}
832
833
fn get_device(&self, name: &str) -> Result<File> {
834
let path: CString = CString::new(name.as_bytes()).expect("CString::new() failed");
835
let path_ptr = path.as_ptr();
836
837
// SAFETY:
838
// Safe as we are the owner of self and path_ptr which are valid value.
839
let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD, path_ptr) };
840
if ret < 0 {
841
return Err(VfioError::GroupGetDeviceFD(get_error()));
842
}
843
844
// SAFETY:
845
// Safe as ret is valid descriptor
846
Ok(unsafe { File::from_raw_descriptor(ret) })
847
}
848
849
fn add_device_num(&mut self) {
850
self.device_num += 1;
851
}
852
853
fn reduce_device_num(&mut self) {
854
self.device_num -= 1;
855
}
856
857
fn device_num(&self) -> u32 {
858
self.device_num
859
}
860
}
861
862
impl AsRawDescriptor for VfioGroup {
863
fn as_raw_descriptor(&self) -> RawDescriptor {
864
self.group.as_raw_descriptor()
865
}
866
}
867
868
/// A helper struct for managing VFIO containers
869
#[derive(Default)]
870
pub struct VfioContainerManager {
871
/// One VFIO container shared by all VFIO devices that don't attach to any IOMMU device.
872
no_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
873
874
/// For IOMMU enabled devices, all VFIO groups that share the same IOVA space are managed by
875
/// one VFIO container.
876
iommu_containers: Vec<Arc<Mutex<VfioContainer>>>,
877
878
/// One VFIO container shared by all VFIO devices that attach to the CoIOMMU device.
879
coiommu_container: Option<Arc<Mutex<VfioContainer>>>,
880
881
/// One VFIO container shared by all VFIO devices that attach to pKVM.
882
pkvm_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
883
}
884
885
impl VfioContainerManager {
886
pub fn new() -> Self {
887
Self::default()
888
}
889
890
/// The single place to create a VFIO container for a PCI endpoint.
891
///
892
/// The policy to determine whether an individual or a shared VFIO container
893
/// will be created for this device is governed by the physical PCI topology,
894
/// and the argument iommu_type.
895
///
896
/// # Arguments
897
///
898
/// * `sysfspath` - the path to the PCI device, e.g. /sys/bus/pci/devices/0000:02:00.0
899
/// * `iommu_type` - which type of IOMMU is enabled on this device
900
pub fn get_container<P: AsRef<Path>>(
901
&mut self,
902
iommu_type: IommuDevType,
903
sysfspath: Option<P>,
904
) -> Result<Arc<Mutex<VfioContainer>>> {
905
match iommu_type {
906
IommuDevType::NoIommu => {
907
// One VFIO container is used for all IOMMU disabled groups.
908
if let Some(container) = &self.no_iommu_container {
909
Ok(container.clone())
910
} else {
911
let container = Arc::new(Mutex::new(VfioContainer::new()?));
912
self.no_iommu_container = Some(container.clone());
913
Ok(container)
914
}
915
}
916
IommuDevType::VirtioIommu => {
917
let path = sysfspath.ok_or(VfioError::InvalidPath)?;
918
let group_id = VfioGroup::get_group_id(path)?;
919
920
// One VFIO container is used for all devices that belong to one VFIO group.
921
// NOTE: vfio_wrapper relies on each container containing exactly one group.
922
if let Some(container) = self
923
.iommu_containers
924
.iter()
925
.find(|container| container.lock().is_group_set(group_id))
926
{
927
Ok(container.clone())
928
} else {
929
let container = Arc::new(Mutex::new(VfioContainer::new()?));
930
self.iommu_containers.push(container.clone());
931
Ok(container)
932
}
933
}
934
IommuDevType::CoIommu => {
935
// One VFIO container is used for devices attached to CoIommu
936
if let Some(container) = &self.coiommu_container {
937
Ok(container.clone())
938
} else {
939
let container = Arc::new(Mutex::new(VfioContainer::new()?));
940
self.coiommu_container = Some(container.clone());
941
Ok(container)
942
}
943
}
944
IommuDevType::PkvmPviommu => {
945
// One VFIO container is used for devices attached to pKVM
946
if let Some(container) = &self.pkvm_iommu_container {
947
Ok(container.clone())
948
} else {
949
let container = Arc::new(Mutex::new(VfioContainer::new()?));
950
self.pkvm_iommu_container = Some(container.clone());
951
Ok(container)
952
}
953
}
954
}
955
}
956
}
957
958
/// Vfio Irq type used to enable/disable/mask/unmask vfio irq
959
pub enum VfioIrqType {
960
Intx,
961
Msi,
962
Msix,
963
}
964
965
/// Vfio Irq information used to assign and enable/disable/mask/unmask vfio irq
966
pub struct VfioIrq {
967
pub flags: u32,
968
pub index: u32,
969
}
970
971
/// Address on VFIO memory region.
972
#[derive(Debug, Default, Clone)]
973
pub struct VfioRegionAddr {
974
/// region number.
975
pub index: usize,
976
/// offset in the region.
977
pub addr: u64,
978
}
979
980
#[derive(Debug)]
981
pub struct VfioRegion {
982
// flags for this region: read/write/mmap
983
flags: u32,
984
size: u64,
985
// region offset used to read/write with vfio device descriptor
986
offset: u64,
987
// vectors for mmap offset and size
988
mmaps: Vec<vfio_region_sparse_mmap_area>,
989
// type and subtype for cap type
990
cap_info: Option<(u32, u32)>,
991
// if true, then the caller can safely mmap the MSIX region
992
// if false, the caller should remove the MSIX part of the region before mmapping
993
msix_region_mmappable: bool,
994
}
995
996
/// Vfio device for exposing regions which could be read/write to kernel vfio device.
997
pub struct VfioDevice {
998
dev: File,
999
name: String,
1000
container: Arc<Mutex<VfioContainer>>,
1001
dev_type: VfioDeviceType,
1002
group_descriptor: RawDescriptor,
1003
group_id: u32,
1004
// vec for vfio device's regions
1005
regions: Vec<VfioRegion>,
1006
num_irqs: u32,
1007
1008
iova_alloc: Arc<Mutex<AddressAllocator>>,
1009
dt_symbol: Option<String>,
1010
pviommu: Option<(Arc<Mutex<KvmVfioPviommu>>, Vec<u32>)>,
1011
}
1012
1013
impl VfioDevice {
1014
/// Create a new vfio device, then guest read/write on this device could be
1015
/// transfered into kernel vfio.
1016
/// sysfspath specify the vfio device path in sys file system.
1017
pub fn new_passthrough<P: AsRef<Path>>(
1018
sysfspath: &P,
1019
vm: &impl Vm,
1020
container: Arc<Mutex<VfioContainer>>,
1021
iommu_dev: IommuDevType,
1022
dt_symbol: Option<String>,
1023
) -> Result<Self> {
1024
let group_id = VfioGroup::get_group_id(sysfspath)?;
1025
1026
let group = container
1027
.lock()
1028
.get_group_with_vm(group_id, vm, iommu_dev)?;
1029
let name_osstr = sysfspath
1030
.as_ref()
1031
.file_name()
1032
.ok_or(VfioError::InvalidPath)?;
1033
let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1034
let name = String::from(name_str);
1035
let dev = group.lock().get_device(&name)?;
1036
let (dev_info, dev_type) = Self::get_device_info(&dev)?;
1037
let regions = Self::get_regions(&dev, dev_info.num_regions)?;
1038
group.lock().add_device_num();
1039
let group_descriptor = group.lock().as_raw_descriptor();
1040
1041
let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1042
let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1043
.map_err(VfioError::Resources)?;
1044
1045
let pviommu = if matches!(iommu_dev, IommuDevType::PkvmPviommu) {
1046
// We currently have a 1-to-1 mapping between pvIOMMUs and VFIO devices.
1047
let pviommu = KvmVfioPviommu::new(vm)?;
1048
1049
let vsids_len = KvmVfioPviommu::get_sid_count(vm, &dev)?.try_into().unwrap();
1050
let max_vsid = u32::MAX.try_into().unwrap();
1051
let random_vsids = sample(&mut rand::rng(), max_vsid, vsids_len).into_iter();
1052
let vsids = Vec::from_iter(random_vsids.map(|v| u32::try_from(v).unwrap()));
1053
for (i, vsid) in vsids.iter().enumerate() {
1054
pviommu.attach(&dev, i.try_into().unwrap(), *vsid)?;
1055
}
1056
1057
Some((Arc::new(Mutex::new(pviommu)), vsids))
1058
} else {
1059
None
1060
};
1061
1062
Ok(VfioDevice {
1063
dev,
1064
name,
1065
container,
1066
dev_type,
1067
group_descriptor,
1068
group_id,
1069
regions,
1070
num_irqs: dev_info.num_irqs,
1071
iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1072
dt_symbol,
1073
pviommu,
1074
})
1075
}
1076
1077
pub fn new<P: AsRef<Path>>(
1078
sysfspath: &P,
1079
container: Arc<Mutex<VfioContainer>>,
1080
) -> Result<Self> {
1081
let group_id = VfioGroup::get_group_id(sysfspath)?;
1082
let group = container.lock().get_group(group_id)?;
1083
let name_osstr = sysfspath
1084
.as_ref()
1085
.file_name()
1086
.ok_or(VfioError::InvalidPath)?;
1087
let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1088
let name = String::from(name_str);
1089
1090
let dev = match group.lock().get_device(&name) {
1091
Ok(dev) => dev,
1092
Err(e) => {
1093
container.lock().remove_group(group_id, false);
1094
return Err(e);
1095
}
1096
};
1097
let (dev_info, dev_type) = match Self::get_device_info(&dev) {
1098
Ok(dev_info) => dev_info,
1099
Err(e) => {
1100
container.lock().remove_group(group_id, false);
1101
return Err(e);
1102
}
1103
};
1104
let regions = match Self::get_regions(&dev, dev_info.num_regions) {
1105
Ok(regions) => regions,
1106
Err(e) => {
1107
container.lock().remove_group(group_id, false);
1108
return Err(e);
1109
}
1110
};
1111
group.lock().add_device_num();
1112
let group_descriptor = group.lock().as_raw_descriptor();
1113
1114
let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1115
let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1116
.map_err(VfioError::Resources)?;
1117
1118
Ok(VfioDevice {
1119
dev,
1120
name,
1121
container,
1122
dev_type,
1123
group_descriptor,
1124
group_id,
1125
regions,
1126
num_irqs: dev_info.num_irqs,
1127
iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1128
dt_symbol: None,
1129
pviommu: None,
1130
})
1131
}
1132
1133
/// Returns the file for this device.
1134
pub fn dev_file(&self) -> &File {
1135
&self.dev
1136
}
1137
1138
/// Returns PCI device name, formatted as BUS:DEVICE.FUNCTION string.
1139
pub fn device_name(&self) -> &String {
1140
&self.name
1141
}
1142
1143
/// Returns the type of this VFIO device.
1144
pub fn device_type(&self) -> VfioDeviceType {
1145
self.dev_type
1146
}
1147
1148
/// Returns the DT symbol (node label) of this VFIO device.
1149
pub fn dt_symbol(&self) -> Option<&str> {
1150
self.dt_symbol.as_deref()
1151
}
1152
1153
/// Returns the type and indentifier (if applicable) of the IOMMU used by this VFIO device and
1154
/// its master IDs.
1155
pub fn iommu(&self) -> Option<(IommuDevType, Option<u32>, &[u32])> {
1156
// We currently only report IommuDevType::PkvmPviommu.
1157
if let Some((ref pviommu, ref ids)) = self.pviommu {
1158
Some((
1159
IommuDevType::PkvmPviommu,
1160
Some(pviommu.lock().id()),
1161
ids.as_ref(),
1162
))
1163
} else {
1164
None
1165
}
1166
}
1167
1168
/// Probes support for VFIO LOW_POWER features.
1169
pub fn supports_pm_low_power(&self) -> bool {
1170
if self.probe_pm_low_power_entry().is_err() {
1171
false
1172
} else if self.probe_pm_low_power_exit().is_err() {
1173
warn!("VFIO supports LOW_POWER_ENTRY but not LOW_POWER_EXIT: ignoring feature");
1174
false
1175
} else {
1176
true
1177
}
1178
}
1179
1180
/// enter the device's low power state
1181
pub fn pm_low_power_enter(&self) -> Result<()> {
1182
self.device_feature(VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY)
1183
.map_err(VfioError::VfioPmLowPowerEnter)
1184
}
1185
1186
/// enter the device's low power state with wakeup notification
1187
pub fn pm_low_power_enter_with_wakeup(&self, wakeup_evt: Event) -> Result<()> {
1188
let payload = vfio_device_low_power_entry_with_wakeup {
1189
wakeup_eventfd: wakeup_evt.as_raw_descriptor(),
1190
reserved: 0,
1191
};
1192
let payload_size = mem::size_of::<vfio_device_low_power_entry_with_wakeup>();
1193
let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(payload_size);
1194
device_feature[0].argsz = (mem::size_of::<vfio_device_feature>() + payload_size) as u32;
1195
device_feature[0].flags =
1196
VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP;
1197
// SAFETY:
1198
// Safe as we know vfio_device_low_power_entry_with_wakeup has two 32-bit int fields
1199
unsafe {
1200
device_feature[0]
1201
.data
1202
.as_mut_slice(payload_size)
1203
.copy_from_slice(
1204
mem::transmute::<vfio_device_low_power_entry_with_wakeup, [u8; 8]>(payload)
1205
.as_slice(),
1206
);
1207
}
1208
// SAFETY:
1209
// Safe as we are the owner of self and power_management which are valid value
1210
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1211
if ret < 0 {
1212
Err(VfioError::VfioPmLowPowerEnter(get_error()))
1213
} else {
1214
Ok(())
1215
}
1216
}
1217
1218
/// exit the device's low power state
1219
pub fn pm_low_power_exit(&self) -> Result<()> {
1220
self.device_feature(VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_EXIT)
1221
.map_err(VfioError::VfioPmLowPowerExit)
1222
}
1223
1224
fn probe_pm_low_power_entry(&self) -> Result<()> {
1225
self.device_feature(VFIO_DEVICE_FEATURE_PROBE | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY)
1226
.map_err(VfioError::VfioProbePmLowPowerEntry)
1227
}
1228
1229
fn probe_pm_low_power_exit(&self) -> Result<()> {
1230
self.device_feature(VFIO_DEVICE_FEATURE_PROBE | VFIO_DEVICE_FEATURE_LOW_POWER_EXIT)
1231
.map_err(VfioError::VfioProbePmLowPowerExit)
1232
}
1233
1234
fn device_feature(&self, flags: u32) -> result::Result<(), Error> {
1235
let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(0);
1236
device_feature[0].argsz = mem::size_of::<vfio_device_feature>() as u32;
1237
device_feature[0].flags = flags;
1238
// SAFETY:
1239
// Safe as we are the owner of self and device_feature which are valid value
1240
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1241
if ret < 0 {
1242
Err(get_error())
1243
} else {
1244
Ok(())
1245
}
1246
}
1247
1248
/// call _DSM from the device's ACPI table
1249
pub fn acpi_dsm(&self, args: &[u8]) -> Result<Vec<u8>> {
1250
let count = args.len();
1251
let mut dsm = vec_with_array_field::<vfio_acpi_dsm, u8>(count);
1252
dsm[0].argsz = (mem::size_of::<vfio_acpi_dsm>() + mem::size_of_val(args)) as u32;
1253
dsm[0].padding = 0;
1254
// SAFETY:
1255
// Safe as we allocated enough space to hold args
1256
unsafe {
1257
dsm[0].args.as_mut_slice(count).clone_from_slice(args);
1258
}
1259
// SAFETY:
1260
// Safe as we are the owner of self and dsm which are valid value
1261
let ret = unsafe { ioctl_with_mut_ref(&self.dev, VFIO_DEVICE_ACPI_DSM, &mut dsm[0]) };
1262
if ret < 0 {
1263
Err(VfioError::VfioAcpiDsm(get_error()))
1264
} else {
1265
// SAFETY:
1266
// Safe as we allocated enough space to hold args
1267
let res = unsafe { dsm[0].args.as_slice(count) };
1268
Ok(res.to_vec())
1269
}
1270
}
1271
1272
/// Enable vfio device's ACPI notifications and associate EventFD with device.
1273
pub fn acpi_notification_evt_enable(
1274
&self,
1275
acpi_notification_eventfd: &Event,
1276
index: u32,
1277
) -> Result<()> {
1278
let u32_size = mem::size_of::<u32>();
1279
let count = 1;
1280
1281
let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1282
irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1283
irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1284
irq_set[0].index = index;
1285
irq_set[0].start = 0;
1286
irq_set[0].count = count as u32;
1287
1288
// SAFETY:
1289
// It is safe as enough space is reserved through vec_with_array_field(u32)<count>.
1290
let data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1291
data.copy_from_slice(&acpi_notification_eventfd.as_raw_descriptor().to_ne_bytes()[..]);
1292
1293
// SAFETY:
1294
// Safe as we are the owner of self and irq_set which are valid value
1295
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1296
if ret < 0 {
1297
Err(VfioError::VfioAcpiNotificationEnable(get_error()))
1298
} else {
1299
Ok(())
1300
}
1301
}
1302
1303
/// Disable vfio device's ACPI notification and disconnect EventFd with device.
1304
pub fn acpi_notification_disable(&self, index: u32) -> Result<()> {
1305
let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1306
irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1307
irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1308
irq_set[0].index = index;
1309
irq_set[0].start = 0;
1310
irq_set[0].count = 0;
1311
1312
// SAFETY:
1313
// Safe as we are the owner of self and irq_set which are valid value
1314
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1315
if ret < 0 {
1316
Err(VfioError::VfioAcpiNotificationDisable(get_error()))
1317
} else {
1318
Ok(())
1319
}
1320
}
1321
1322
/// Test vfio device's ACPI notification by simulating hardware triggering.
1323
/// When the signaling mechanism is set, the VFIO_IRQ_SET_DATA_BOOL can be used with
1324
/// VFIO_IRQ_SET_ACTION_TRIGGER to perform kernel level interrupt loopback testing.
1325
pub fn acpi_notification_test(&self, index: u32, val: u32) -> Result<()> {
1326
let u32_size = mem::size_of::<u32>();
1327
let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1328
irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + u32_size) as u32;
1329
irq_set[0].flags = VFIO_IRQ_SET_DATA_BOOL | VFIO_IRQ_SET_ACTION_TRIGGER;
1330
irq_set[0].index = index;
1331
irq_set[0].start = 0;
1332
irq_set[0].count = 1;
1333
1334
// SAFETY:
1335
// It is safe as enough space is reserved through vec_with_array_field(u32)<count>.
1336
let data = unsafe { irq_set[0].data.as_mut_slice(u32_size) };
1337
data.copy_from_slice(&val.to_ne_bytes()[..]);
1338
1339
// SAFETY:
1340
// Safe as we are the owner of self and irq_set which are valid value
1341
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1342
if ret < 0 {
1343
Err(VfioError::VfioAcpiNotificationTest(get_error()))
1344
} else {
1345
Ok(())
1346
}
1347
}
1348
1349
/// Enable vfio device's irq and associate Irqfd Event with device.
1350
/// When MSIx is enabled, multi vectors will be supported, and vectors starting from subindex to
1351
/// subindex + descriptors length will be assigned with irqfd in the descriptors array.
1352
/// when index = VFIO_PCI_REQ_IRQ_INDEX, kernel vfio will trigger this event when physical
1353
/// device is removed.
1354
/// If descriptor is None, -1 is assigned to the irq. A value of -1 is used to either de-assign
1355
/// interrupts if already assigned or skip un-assigned interrupts.
1356
pub fn irq_enable(
1357
&self,
1358
descriptors: &[Option<&Event>],
1359
index: u32,
1360
subindex: u32,
1361
) -> Result<()> {
1362
let count = descriptors.len();
1363
let u32_size = mem::size_of::<u32>();
1364
let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1365
irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1366
irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1367
irq_set[0].index = index;
1368
irq_set[0].start = subindex;
1369
irq_set[0].count = count as u32;
1370
1371
// SAFETY:
1372
// irq_set.data could be none, bool or descriptor according to flags, so irq_set.data
1373
// is u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
1374
// together as u32. It is safe as enough space is reserved through
1375
// vec_with_array_field(u32)<count>.
1376
let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1377
for descriptor in descriptors.iter().take(count) {
1378
let (left, right) = data.split_at_mut(u32_size);
1379
match descriptor {
1380
Some(fd) => left.copy_from_slice(&fd.as_raw_descriptor().to_ne_bytes()[..]),
1381
None => left.copy_from_slice(&(-1i32).to_ne_bytes()[..]),
1382
}
1383
data = right;
1384
}
1385
1386
// SAFETY:
1387
// Safe as we are the owner of self and irq_set which are valid value
1388
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1389
if ret < 0 {
1390
Err(VfioError::VfioIrqEnable(get_error()))
1391
} else {
1392
Ok(())
1393
}
1394
}
1395
1396
/// When intx is enabled, irqfd is used to trigger a level interrupt into guest, resample irqfd
1397
/// is used to get guest EOI notification.
1398
/// When host hw generates interrupt, vfio irq handler in host kernel receive and handle it,
1399
/// this handler disable hw irq first, then trigger irqfd to inject interrupt into guest. When
1400
/// resample irqfd is triggered by guest EOI, vfio kernel could enable hw irq, so hw could
1401
/// generate another interrupts.
1402
/// This function enable resample irqfd and let vfio kernel could get EOI notification.
1403
///
1404
/// descriptor: should be resample IrqFd.
1405
pub fn resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()> {
1406
let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1407
irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
1408
irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
1409
irq_set[0].index = index;
1410
irq_set[0].start = 0;
1411
irq_set[0].count = 1;
1412
1413
{
1414
// SAFETY:
1415
// irq_set.data could be none, bool or descriptor according to flags, so irq_set.data is
1416
// u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
1417
// together as u32. It is safe as enough space is reserved through
1418
// vec_with_array_field(u32)<1>.
1419
let descriptors = unsafe { irq_set[0].data.as_mut_slice(4) };
1420
descriptors.copy_from_slice(&descriptor.as_raw_descriptor().to_le_bytes()[..]);
1421
}
1422
1423
// SAFETY:
1424
// Safe as we are the owner of self and irq_set which are valid value
1425
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1426
if ret < 0 {
1427
Err(VfioError::VfioIrqEnable(get_error()))
1428
} else {
1429
Ok(())
1430
}
1431
}
1432
1433
/// disable vfio device's irq and disconnect Irqfd Event with device
1434
pub fn irq_disable(&self, index: u32) -> Result<()> {
1435
let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1436
irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1437
irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1438
irq_set[0].index = index;
1439
irq_set[0].start = 0;
1440
irq_set[0].count = 0;
1441
1442
// SAFETY:
1443
// Safe as we are the owner of self and irq_set which are valid value
1444
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1445
if ret < 0 {
1446
Err(VfioError::VfioIrqDisable(get_error()))
1447
} else {
1448
Ok(())
1449
}
1450
}
1451
1452
/// Unmask vfio device irq
1453
pub fn irq_unmask(&self, index: u32) -> Result<()> {
1454
let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1455
irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1456
irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
1457
irq_set[0].index = index;
1458
irq_set[0].start = 0;
1459
irq_set[0].count = 1;
1460
1461
// SAFETY:
1462
// Safe as we are the owner of self and irq_set which are valid value
1463
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1464
if ret < 0 {
1465
Err(VfioError::VfioIrqUnmask(get_error()))
1466
} else {
1467
Ok(())
1468
}
1469
}
1470
1471
/// Mask vfio device irq
1472
pub fn irq_mask(&self, index: u32) -> Result<()> {
1473
let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1474
irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1475
irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
1476
irq_set[0].index = index;
1477
irq_set[0].start = 0;
1478
irq_set[0].count = 1;
1479
1480
// SAFETY:
1481
// Safe as we are the owner of self and irq_set which are valid value
1482
let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1483
if ret < 0 {
1484
Err(VfioError::VfioIrqMask(get_error()))
1485
} else {
1486
Ok(())
1487
}
1488
}
1489
1490
/// Get and validate VFIO device information.
1491
fn get_device_info(device_file: &File) -> Result<(vfio_device_info, VfioDeviceType)> {
1492
let mut dev_info = vfio_device_info {
1493
argsz: mem::size_of::<vfio_device_info>() as u32,
1494
flags: 0,
1495
num_regions: 0,
1496
num_irqs: 0,
1497
..Default::default()
1498
};
1499
1500
// SAFETY:
1501
// Safe as we are the owner of device_file and dev_info which are valid value,
1502
// and we verify the return value.
1503
let ret = unsafe { ioctl_with_mut_ref(device_file, VFIO_DEVICE_GET_INFO, &mut dev_info) };
1504
if ret < 0 {
1505
return Err(VfioError::VfioDeviceGetInfo(get_error()));
1506
}
1507
1508
let dev_type = if (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) != 0 {
1509
if dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
1510
|| dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
1511
{
1512
return Err(VfioError::VfioDeviceGetInfo(get_error()));
1513
}
1514
1515
VfioDeviceType::Pci
1516
} else if (dev_info.flags & VFIO_DEVICE_FLAGS_PLATFORM) != 0 {
1517
VfioDeviceType::Platform
1518
} else {
1519
return Err(VfioError::UnknownDeviceType(dev_info.flags));
1520
};
1521
1522
Ok((dev_info, dev_type))
1523
}
1524
1525
/// Query interrupt information
1526
/// return: Vector of interrupts information, each of which contains flags and index
1527
pub fn get_irqs(&self) -> Result<Vec<VfioIrq>> {
1528
let mut irqs: Vec<VfioIrq> = Vec::new();
1529
1530
for i in 0..self.num_irqs {
1531
let argsz = mem::size_of::<vfio_irq_info>() as u32;
1532
let mut irq_info = vfio_irq_info {
1533
argsz,
1534
flags: 0,
1535
index: i,
1536
count: 0,
1537
};
1538
// SAFETY:
1539
// Safe as we are the owner of dev and irq_info which are valid value,
1540
// and we verify the return value.
1541
let ret = unsafe {
1542
ioctl_with_mut_ref(self.device_file(), VFIO_DEVICE_GET_IRQ_INFO, &mut irq_info)
1543
};
1544
if ret < 0 || irq_info.count != 1 {
1545
return Err(VfioError::VfioDeviceGetInfo(get_error()));
1546
}
1547
1548
let irq = VfioIrq {
1549
flags: irq_info.flags,
1550
index: irq_info.index,
1551
};
1552
irqs.push(irq);
1553
}
1554
Ok(irqs)
1555
}
1556
1557
#[allow(clippy::cast_ptr_alignment)]
1558
fn get_regions(dev: &File, num_regions: u32) -> Result<Vec<VfioRegion>> {
1559
let mut regions: Vec<VfioRegion> = Vec::new();
1560
for i in 0..num_regions {
1561
let argsz = mem::size_of::<vfio_region_info>() as u32;
1562
let mut reg_info = vfio_region_info {
1563
argsz,
1564
flags: 0,
1565
index: i,
1566
cap_offset: 0,
1567
size: 0,
1568
offset: 0,
1569
};
1570
let ret =
1571
// SAFETY:
1572
// Safe as we are the owner of dev and reg_info which are valid value,
1573
// and we verify the return value.
1574
unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO, &mut reg_info) };
1575
if ret < 0 {
1576
continue;
1577
}
1578
1579
let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new();
1580
let mut cap_info: Option<(u32, u32)> = None;
1581
let mut msix_region_mmappable = false;
1582
if reg_info.argsz > argsz {
1583
let cap_len: usize = (reg_info.argsz - argsz) as usize;
1584
let mut region_with_cap =
1585
vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
1586
region_with_cap[0].region_info.argsz = reg_info.argsz;
1587
region_with_cap[0].region_info.flags = 0;
1588
region_with_cap[0].region_info.index = i;
1589
region_with_cap[0].region_info.cap_offset = 0;
1590
region_with_cap[0].region_info.size = 0;
1591
region_with_cap[0].region_info.offset = 0;
1592
// SAFETY:
1593
// Safe as we are the owner of dev and region_info which are valid value,
1594
// and we verify the return value.
1595
let ret = unsafe {
1596
ioctl_with_mut_ref(
1597
dev,
1598
VFIO_DEVICE_GET_REGION_INFO,
1599
&mut (region_with_cap[0].region_info),
1600
)
1601
};
1602
if ret < 0 {
1603
return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
1604
}
1605
1606
// Some drivers (e.g. for NVIDIA vGPUs) do not fully populate the
1607
// `vfio_region_info` structure in response to the
1608
// `VFIO_DEVICE_GET_REGION_INFO` call if the passed size is not enough
1609
// to hold the entirety of the data.
1610
// This ensures we use complete data when we construct the `VfioRegion`
1611
// instance.
1612
reg_info = region_with_cap[0].region_info;
1613
1614
if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 {
1615
continue;
1616
}
1617
1618
let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32;
1619
let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32;
1620
let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32;
1621
let type_cap_sz = mem::size_of::<vfio_region_info_cap_type>() as u32;
1622
let region_info_sz = reg_info.argsz;
1623
1624
// region_with_cap[0].cap_info may contain many structures, like
1625
// vfio_region_info_cap_sparse_mmap struct or vfio_region_info_cap_type struct.
1626
// Both of them begin with vfio_info_cap_header, so we will get individual cap from
1627
// vfio_into_cap_header.
1628
// Go through all the cap structs.
1629
let info_ptr = region_with_cap.as_ptr() as *mut u8;
1630
let mut offset = region_with_cap[0].region_info.cap_offset;
1631
while offset != 0 {
1632
if offset + cap_header_sz > region_info_sz {
1633
break;
1634
}
1635
// SAFETY:
1636
// Safe, as cap_header struct is in this function allocated region_with_cap
1637
// vec.
1638
let cap_ptr = unsafe { info_ptr.offset(offset as isize) };
1639
// SAFETY:
1640
// Safe, as cap_header struct is in this function allocated region_with_cap
1641
// vec.
1642
let cap_header = unsafe { &*(cap_ptr as *const vfio_info_cap_header) };
1643
if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
1644
if offset + mmap_cap_sz > region_info_sz {
1645
break;
1646
}
1647
// cap_ptr is vfio_region_info_cap_sparse_mmap here
1648
let sparse_mmap =
1649
// SAFETY:
1650
// Safe, this vfio_region_info_cap_sparse_mmap is in this function
1651
// allocated region_with_cap vec.
1652
unsafe { &*(cap_ptr as *const vfio_region_info_cap_sparse_mmap) };
1653
1654
let area_num = sparse_mmap.nr_areas;
1655
if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz {
1656
break;
1657
}
1658
let areas =
1659
// SAFETY:
1660
// Safe, these vfio_region_sparse_mmap_area are in this function allocated
1661
// region_with_cap vec.
1662
unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) };
1663
for area in areas.iter() {
1664
mmaps.push(*area);
1665
}
1666
1667
// Sparse regions means the driver can decide which parts of the BAR are
1668
// safe to mmap. If that overlaps with the MSIX
1669
// data, that's the decision of the driver.
1670
// This is required for some devices (e.g. NVIDIA vGPUs).
1671
msix_region_mmappable = true;
1672
} else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_TYPE {
1673
if offset + type_cap_sz > region_info_sz {
1674
break;
1675
}
1676
// cap_ptr is vfio_region_info_cap_type here
1677
let cap_type_info =
1678
// SAFETY:
1679
// Safe, this vfio_region_info_cap_type is in this function allocated
1680
// region_with_cap vec
1681
unsafe { &*(cap_ptr as *const vfio_region_info_cap_type) };
1682
1683
cap_info = Some((cap_type_info.type_, cap_type_info.subtype));
1684
} else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_MSIX_MAPPABLE {
1685
mmaps.push(vfio_region_sparse_mmap_area {
1686
offset: 0,
1687
size: region_with_cap[0].region_info.size,
1688
});
1689
msix_region_mmappable = true;
1690
}
1691
1692
offset = cap_header.next;
1693
}
1694
} else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1695
mmaps.push(vfio_region_sparse_mmap_area {
1696
offset: 0,
1697
size: reg_info.size,
1698
});
1699
}
1700
1701
let region = VfioRegion {
1702
flags: reg_info.flags,
1703
size: reg_info.size,
1704
offset: reg_info.offset,
1705
mmaps,
1706
cap_info,
1707
msix_region_mmappable,
1708
};
1709
regions.push(region);
1710
}
1711
1712
Ok(regions)
1713
}
1714
1715
/// get a region's flag
1716
/// the return's value may conatin:
1717
/// VFIO_REGION_INFO_FLAG_READ: region supports read
1718
/// VFIO_REGION_INFO_FLAG_WRITE: region supports write
1719
/// VFIO_REGION_INFO_FLAG_MMAP: region supports mmap
1720
/// VFIO_REGION_INFO_FLAG_CAPS: region's info supports caps
1721
pub fn get_region_flags(&self, index: usize) -> u32 {
1722
match self.regions.get(index) {
1723
Some(v) => v.flags,
1724
None => {
1725
warn!("get_region_flags() with invalid index: {}", index);
1726
0
1727
}
1728
}
1729
}
1730
1731
/// get a region's offset
1732
/// return: Region offset from the start of vfio device descriptor
1733
pub fn get_region_offset(&self, index: usize) -> u64 {
1734
match self.regions.get(index) {
1735
Some(v) => v.offset,
1736
None => {
1737
warn!("get_region_offset with invalid index: {}", index);
1738
0
1739
}
1740
}
1741
}
1742
1743
/// get a region's size
1744
/// return: Region size from the start of vfio device descriptor
1745
pub fn get_region_size(&self, index: usize) -> u64 {
1746
match self.regions.get(index) {
1747
Some(v) => v.size,
1748
None => {
1749
warn!("get_region_size with invalid index: {}", index);
1750
0
1751
}
1752
}
1753
}
1754
1755
/// get a number of regions
1756
/// return: Number of regions of vfio device descriptor
1757
pub fn get_region_count(&self) -> usize {
1758
self.regions.len()
1759
}
1760
1761
/// get a region's mmap info vector
1762
pub fn get_region_mmap(&self, index: usize) -> Vec<vfio_region_sparse_mmap_area> {
1763
match self.regions.get(index) {
1764
Some(v) => v.mmaps.clone(),
1765
None => {
1766
warn!("get_region_mmap with invalid index: {}", index);
1767
Vec::new()
1768
}
1769
}
1770
}
1771
1772
/// get if the MSIX data with a region is safe to mmap, or if it should be removed
1773
/// before mmapping
1774
pub fn get_region_msix_mmappable(&self, index: usize) -> bool {
1775
match self.regions.get(index) {
1776
Some(v) => v.msix_region_mmappable,
1777
None => {
1778
warn!("get_region_msix_mmappable with invalid index: {}", index);
1779
false
1780
}
1781
}
1782
}
1783
1784
/// find the specified cap type in device regions
1785
/// Input:
1786
/// type_: cap type
1787
/// sub_type: cap sub_type
1788
/// Output:
1789
/// None: device doesn't have the specified cap type
1790
/// Some((bar_index, region_size)): device has the specified cap type, return region's
1791
/// index and size
1792
pub fn get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)> {
1793
for (index, region) in self.regions.iter().enumerate() {
1794
if let Some(cap_info) = &region.cap_info {
1795
if cap_info.0 == type_ && cap_info.1 == sub_type {
1796
return Some((index as u32, region.size));
1797
}
1798
}
1799
}
1800
1801
None
1802
}
1803
1804
/// Returns file offset corresponding to the given `VfioRegionAddr`.
1805
/// The offset can be used when reading/writing the VFIO device's FD directly.
1806
pub fn get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64> {
1807
let region = self
1808
.regions
1809
.get(addr.index)
1810
.ok_or(VfioError::InvalidIndex(addr.index))?;
1811
Ok(region.offset + addr.addr)
1812
}
1813
1814
/// Read region's data from VFIO device into buf
1815
/// index: region num
1816
/// buf: data destination and buf length is read size
1817
/// addr: offset in the region
1818
pub fn region_read(&self, index: usize, buf: &mut [u8], addr: u64) {
1819
let stub: &VfioRegion = self
1820
.regions
1821
.get(index)
1822
.unwrap_or_else(|| panic!("tried to read VFIO with an invalid index: {index}"));
1823
1824
let size = buf.len() as u64;
1825
if size > stub.size || addr + size > stub.size {
1826
panic!(
1827
"tried to read VFIO region with invalid arguments: index={index}, addr=0x{addr:x}, size=0x{size:x}"
1828
);
1829
}
1830
1831
self.dev
1832
.read_exact_at(buf, stub.offset + addr)
1833
.unwrap_or_else(|e| {
1834
panic!("failed to read region: index={index}, addr=0x{addr:x}, error={e}")
1835
});
1836
}
1837
1838
/// Reads a value from the specified `VfioRegionAddr.addr` + `offset`.
1839
pub fn region_read_from_addr<T: FromBytes>(&self, addr: &VfioRegionAddr, offset: u64) -> T {
1840
let mut val = mem::MaybeUninit::zeroed();
1841
let buf =
1842
// SAFETY:
1843
// Safe because we have zero-initialized `size_of::<T>()` bytes.
1844
unsafe { slice::from_raw_parts_mut(val.as_mut_ptr() as *mut u8, mem::size_of::<T>()) };
1845
self.region_read(addr.index, buf, addr.addr + offset);
1846
// SAFETY:
1847
// Safe because any bit pattern is valid for a type that implements FromBytes.
1848
unsafe { val.assume_init() }
1849
}
1850
1851
/// write the data from buf into a vfio device region
1852
/// index: region num
1853
/// buf: data src and buf length is write size
1854
/// addr: offset in the region
1855
pub fn region_write(&self, index: usize, buf: &[u8], addr: u64) {
1856
let stub: &VfioRegion = self
1857
.regions
1858
.get(index)
1859
.unwrap_or_else(|| panic!("tried to write VFIO with an invalid index: {index}"));
1860
1861
let size = buf.len() as u64;
1862
if size > stub.size
1863
|| addr + size > stub.size
1864
|| (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
1865
{
1866
panic!(
1867
"tried to write VFIO region with invalid arguments: index={index}, addr=0x{addr:x}, size=0x{size:x}"
1868
);
1869
}
1870
1871
self.dev
1872
.write_all_at(buf, stub.offset + addr)
1873
.unwrap_or_else(|e| {
1874
panic!("failed to write region: index={index}, addr=0x{addr:x}, error={e}")
1875
});
1876
}
1877
1878
/// Writes data into the specified `VfioRegionAddr.addr` + `offset`.
1879
pub fn region_write_to_addr(&self, data: &[u8], addr: &VfioRegionAddr, offset: u64) {
1880
self.region_write(addr.index, data, addr.addr + offset);
1881
}
1882
1883
/// get vfio device's descriptors which are passed into minijail process
1884
pub fn keep_rds(&self) -> Vec<RawDescriptor> {
1885
vec![
1886
self.dev.as_raw_descriptor(),
1887
self.group_descriptor,
1888
self.container.lock().as_raw_descriptor(),
1889
]
1890
}
1891
1892
/// Add (iova, user_addr) map into vfio container iommu table
1893
/// # Safety
1894
///
1895
/// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
1896
pub unsafe fn vfio_dma_map(
1897
&self,
1898
iova: u64,
1899
size: u64,
1900
user_addr: u64,
1901
write_en: bool,
1902
) -> Result<()> {
1903
self.container
1904
.lock()
1905
.vfio_dma_map(iova, size, user_addr, write_en)
1906
}
1907
1908
/// Remove (iova, user_addr) map from vfio container iommu table
1909
pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
1910
self.container.lock().vfio_dma_unmap(iova, size)
1911
}
1912
1913
pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
1914
self.container.lock().vfio_get_iommu_page_size_mask()
1915
}
1916
1917
pub fn alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64> {
1918
self.iova_alloc
1919
.lock()
1920
.allocate_with_align(size, alloc, "alloc_iova".to_owned(), align_size)
1921
.map_err(VfioError::Resources)
1922
}
1923
1924
pub fn get_iova(&self, alloc: &Alloc) -> Option<AddressRange> {
1925
self.iova_alloc.lock().get(alloc).map(|res| res.0)
1926
}
1927
1928
pub fn release_iova(&self, alloc: Alloc) -> Result<AddressRange> {
1929
self.iova_alloc
1930
.lock()
1931
.release(alloc)
1932
.map_err(VfioError::Resources)
1933
}
1934
1935
pub fn get_max_addr(&self) -> u64 {
1936
self.iova_alloc.lock().get_max_addr()
1937
}
1938
1939
/// Gets the vfio device backing `File`.
1940
pub fn device_file(&self) -> &File {
1941
&self.dev
1942
}
1943
1944
/// close vfio device
1945
pub fn close(&self) {
1946
self.container.lock().remove_group(self.group_id, true);
1947
}
1948
}
1949
1950
pub struct VfioPciConfig {
1951
device: Arc<VfioDevice>,
1952
}
1953
1954
impl VfioPciConfig {
1955
pub fn new(device: Arc<VfioDevice>) -> Self {
1956
VfioPciConfig { device }
1957
}
1958
1959
pub fn read_config<T: IntoBytes + FromBytes>(&self, offset: u32) -> T {
1960
let mut config = T::new_zeroed();
1961
self.device.region_read(
1962
VFIO_PCI_CONFIG_REGION_INDEX as usize,
1963
config.as_mut_bytes(),
1964
offset.into(),
1965
);
1966
config
1967
}
1968
1969
pub fn write_config<T: Immutable + IntoBytes>(&self, config: T, offset: u32) {
1970
self.device.region_write(
1971
VFIO_PCI_CONFIG_REGION_INDEX as usize,
1972
config.as_bytes(),
1973
offset.into(),
1974
);
1975
}
1976
1977
/// Set the VFIO device this config refers to as the bus master.
1978
pub fn set_bus_master(&self) {
1979
/// Constant definitions from `linux/pci_regs.h`.
1980
const PCI_COMMAND: u32 = 0x4;
1981
/// Enable bus mastering
1982
const PCI_COMMAND_MASTER: u16 = 0x4;
1983
1984
let mut cmd: u16 = self.read_config(PCI_COMMAND);
1985
1986
if cmd & PCI_COMMAND_MASTER != 0 {
1987
return;
1988
}
1989
1990
cmd |= PCI_COMMAND_MASTER;
1991
1992
self.write_config(cmd, PCI_COMMAND);
1993
}
1994
}
1995
1996
impl AsRawDescriptor for VfioDevice {
1997
fn as_raw_descriptor(&self) -> RawDescriptor {
1998
self.dev.as_raw_descriptor()
1999
}
2000
}
2001
2002