Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/devices/src/pci/coiommu.rs
5394 views
1
// Copyright 2022 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
//! This is the CoIOMMU backend implementation. CoIOMMU is a virtual device
6
//! which provide fine-grained pinning for the VFIO pci-passthrough device
7
//! so that hypervisor doesn't need to pin the enter VM's memory to improve
8
//! the memory utilization. CoIOMMU doesn't provide the intra-guest protection
9
//! so it can only be used for the TRUSTED passthrough devices.
10
//!
11
//! CoIOMMU is presented at KVM forum 2020:
12
//! <https://kvmforum2020.sched.com/event/eE2z/a-virtual-iommu-with-cooperative-dma-buffer-tracking-yu-zhang-intel>
13
//!
14
//! Also presented at usenix ATC20:
15
//! <https://www.usenix.org/conference/atc20/presentation/tian>
16
17
use std::collections::VecDeque;
18
use std::convert::TryInto;
19
use std::default::Default;
20
use std::fmt;
21
use std::mem;
22
use std::panic;
23
use std::sync::atomic::fence;
24
use std::sync::atomic::AtomicU32;
25
use std::sync::atomic::Ordering;
26
use std::sync::Arc;
27
use std::thread;
28
use std::time::Duration;
29
30
use anyhow::bail;
31
use anyhow::ensure;
32
use anyhow::Context;
33
use anyhow::Result;
34
use base::error;
35
use base::info;
36
use base::AsRawDescriptor;
37
use base::Event;
38
use base::EventToken;
39
use base::MemoryMapping;
40
use base::MemoryMappingBuilder;
41
use base::Protection;
42
use base::RawDescriptor;
43
use base::SafeDescriptor;
44
use base::SharedMemory;
45
use base::Timer;
46
use base::TimerTrait;
47
use base::Tube;
48
use base::TubeError;
49
use base::WaitContext;
50
use base::WorkerThread;
51
use hypervisor::Datamatch;
52
use hypervisor::MemCacheType;
53
use resources::Alloc;
54
use resources::AllocOptions;
55
use resources::SystemAllocator;
56
use serde::Deserialize;
57
use serde::Deserializer;
58
use serde::Serialize;
59
use serde_keyvalue::FromKeyValues;
60
use sync::Mutex;
61
use thiserror::Error as ThisError;
62
use vm_control::api::VmMemoryClient;
63
use vm_control::VmMemoryDestination;
64
use vm_control::VmMemorySource;
65
use vm_memory::GuestAddress;
66
use vm_memory::GuestMemory;
67
use zerocopy::FromBytes;
68
use zerocopy::IntoBytes;
69
70
use crate::pci::pci_configuration::PciBarConfiguration;
71
use crate::pci::pci_configuration::PciBarPrefetchable;
72
use crate::pci::pci_configuration::PciBarRegionType;
73
use crate::pci::pci_configuration::PciClassCode;
74
use crate::pci::pci_configuration::PciConfiguration;
75
use crate::pci::pci_configuration::PciHeaderType;
76
use crate::pci::pci_configuration::PciOtherSubclass;
77
use crate::pci::pci_configuration::COMMAND_REG;
78
use crate::pci::pci_configuration::COMMAND_REG_MEMORY_SPACE_MASK;
79
use crate::pci::pci_device::BarRange;
80
use crate::pci::pci_device::PciDevice;
81
use crate::pci::pci_device::Result as PciResult;
82
use crate::pci::PciAddress;
83
use crate::pci::PciBarIndex;
84
use crate::pci::PciDeviceError;
85
use crate::vfio::VfioContainer;
86
use crate::Suspendable;
87
use crate::UnpinRequest;
88
use crate::UnpinResponse;
89
90
const PCI_VENDOR_ID_COIOMMU: u16 = 0x1234;
91
const PCI_DEVICE_ID_COIOMMU: u16 = 0xabcd;
92
const COIOMMU_CMD_DEACTIVATE: u64 = 0;
93
const COIOMMU_CMD_ACTIVATE: u64 = 1;
94
const COIOMMU_CMD_PARK_UNPIN: u64 = 2;
95
const COIOMMU_CMD_UNPARK_UNPIN: u64 = 3;
96
const COIOMMU_REVISION_ID: u8 = 0x10;
97
const COIOMMU_MMIO_BAR: PciBarIndex = 0;
98
const COIOMMU_MMIO_BAR_SIZE: u64 = 0x2000;
99
const COIOMMU_NOTIFYMAP_BAR: PciBarIndex = 2;
100
const COIOMMU_NOTIFYMAP_SIZE: usize = 0x2000;
101
const COIOMMU_TOPOLOGYMAP_BAR: u8 = 4;
102
const COIOMMU_TOPOLOGYMAP_SIZE: usize = 0x2000;
103
const PAGE_SIZE_4K: u64 = 4096;
104
const PAGE_SHIFT_4K: u64 = 12;
105
const PIN_PAGES_IN_BATCH: u64 = 1 << 63;
106
107
const DTTE_PINNED_FLAG: u32 = 1 << 31;
108
const DTTE_ACCESSED_FLAG: u32 = 1 << 30;
109
const DTT_ENTRY_PRESENT: u64 = 1;
110
const DTT_ENTRY_PFN_SHIFT: u64 = 12;
111
112
#[derive(ThisError, Debug)]
113
enum Error {
114
#[error("CoIommu failed to create shared memory")]
115
CreateSharedMemory,
116
#[error("Failed to get DTT entry")]
117
GetDTTEntry,
118
}
119
120
//default interval is 60s
121
const UNPIN_DEFAULT_INTERVAL: Duration = Duration::from_secs(60);
122
const UNPIN_GEN_DEFAULT_THRES: u64 = 10;
123
/// Holds the coiommu unpin policy
124
#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Serialize, Deserialize)]
125
#[serde(rename_all = "kebab-case")]
126
pub enum CoIommuUnpinPolicy {
127
#[default]
128
Off,
129
Lru,
130
}
131
132
impl fmt::Display for CoIommuUnpinPolicy {
133
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
134
use self::CoIommuUnpinPolicy::*;
135
136
match self {
137
Off => write!(f, "off"),
138
Lru => write!(f, "lru"),
139
}
140
}
141
}
142
143
fn deserialize_unpin_interval<'de, D: Deserializer<'de>>(
144
deserializer: D,
145
) -> Result<Duration, D::Error> {
146
let secs = u64::deserialize(deserializer)?;
147
148
Ok(Duration::from_secs(secs))
149
}
150
151
fn deserialize_unpin_limit<'de, D: Deserializer<'de>>(
152
deserializer: D,
153
) -> Result<Option<u64>, D::Error> {
154
let limit = u64::deserialize(deserializer)?;
155
156
match limit {
157
0 => Err(serde::de::Error::custom(
158
"Please use non-zero unpin_limit value",
159
)),
160
limit => Ok(Some(limit)),
161
}
162
}
163
164
fn unpin_interval_default() -> Duration {
165
UNPIN_DEFAULT_INTERVAL
166
}
167
168
fn unpin_gen_threshold_default() -> u64 {
169
UNPIN_GEN_DEFAULT_THRES
170
}
171
172
/// Holds the parameters for a coiommu device
173
#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize, FromKeyValues)]
174
#[serde(deny_unknown_fields)]
175
pub struct CoIommuParameters {
176
#[serde(default)]
177
pub unpin_policy: CoIommuUnpinPolicy,
178
#[serde(
179
deserialize_with = "deserialize_unpin_interval",
180
default = "unpin_interval_default"
181
)]
182
pub unpin_interval: Duration,
183
#[serde(deserialize_with = "deserialize_unpin_limit", default)]
184
pub unpin_limit: Option<u64>,
185
// Number of unpin intervals a pinned page must be busy for to be aged into the
186
// older, less frequently checked generation.
187
#[serde(default = "unpin_gen_threshold_default")]
188
pub unpin_gen_threshold: u64,
189
}
190
191
impl Default for CoIommuParameters {
192
fn default() -> Self {
193
Self {
194
unpin_policy: CoIommuUnpinPolicy::Off,
195
unpin_interval: UNPIN_DEFAULT_INTERVAL,
196
unpin_limit: None,
197
unpin_gen_threshold: UNPIN_GEN_DEFAULT_THRES,
198
}
199
}
200
}
201
202
#[derive(Default, Debug, Copy, Clone)]
203
struct CoIommuReg {
204
dtt_root: u64,
205
cmd: u64,
206
dtt_level: u64,
207
}
208
209
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
210
struct PinnedPageInfo {
211
gfn: u64,
212
unpin_busy_cnt: u64,
213
}
214
215
impl PinnedPageInfo {
216
fn new(gfn: u64, unpin_busy_cnt: u64) -> Self {
217
PinnedPageInfo {
218
gfn,
219
unpin_busy_cnt,
220
}
221
}
222
}
223
224
#[derive(PartialEq, Debug, Eq)]
225
enum UnpinThreadState {
226
Unparked,
227
Parked,
228
}
229
230
struct CoIommuPinState {
231
new_gen_pinned_pages: VecDeque<PinnedPageInfo>,
232
old_gen_pinned_pages: VecDeque<u64>,
233
unpin_thread_state: UnpinThreadState,
234
unpin_park_count: u64,
235
}
236
237
unsafe fn vfio_map(
238
vfio_container: &Arc<Mutex<VfioContainer>>,
239
iova: u64,
240
size: u64,
241
user_addr: u64,
242
) -> bool {
243
match vfio_container
244
.lock()
245
.vfio_dma_map(iova, size, user_addr, true)
246
{
247
Ok(_) => true,
248
Err(e) => {
249
if let Some(errno) = std::io::Error::last_os_error().raw_os_error() {
250
if errno == libc::EEXIST {
251
// Already pinned. set PINNED flag
252
error!("CoIommu: iova 0x{:x} already pinned", iova);
253
return true;
254
}
255
}
256
error!("CoIommu: failed to map iova 0x{:x}: {}", iova, e);
257
false
258
}
259
}
260
}
261
262
fn vfio_unmap(vfio_container: &Arc<Mutex<VfioContainer>>, iova: u64, size: u64) -> bool {
263
match vfio_container.lock().vfio_dma_unmap(iova, size) {
264
Ok(_) => true,
265
Err(e) => {
266
error!("CoIommu: failed to unmap iova 0x{:x}: {}", iova, e);
267
false
268
}
269
}
270
}
271
272
#[derive(Default, Debug, Copy, Clone, FromBytes, IntoBytes)]
273
#[repr(C)]
274
struct PinPageInfo {
275
bdf: u16,
276
pad: [u16; 3],
277
nr_pages: u64,
278
}
279
280
const COIOMMU_UPPER_LEVEL_STRIDE: u64 = 9;
281
const COIOMMU_UPPER_LEVEL_MASK: u64 = (1 << COIOMMU_UPPER_LEVEL_STRIDE) - 1;
282
const COIOMMU_PT_LEVEL_STRIDE: u64 = 10;
283
const COIOMMU_PT_LEVEL_MASK: u64 = (1 << COIOMMU_PT_LEVEL_STRIDE) - 1;
284
285
fn level_to_offset(gfn: u64, level: u64) -> Result<u64> {
286
if level == 1 {
287
return Ok(gfn & COIOMMU_PT_LEVEL_MASK);
288
}
289
290
if level == 0 {
291
bail!("Invalid level for gfn 0x{:x}", gfn);
292
}
293
294
let offset = COIOMMU_PT_LEVEL_STRIDE + (level - 2) * COIOMMU_UPPER_LEVEL_STRIDE;
295
296
Ok((gfn >> offset) & COIOMMU_UPPER_LEVEL_MASK)
297
}
298
299
struct DTTIter {
300
ptr: *const u8,
301
gfn: u64,
302
}
303
304
impl Default for DTTIter {
305
fn default() -> Self {
306
DTTIter {
307
ptr: std::ptr::null(),
308
gfn: 0,
309
}
310
}
311
}
312
313
// Get a DMA Tracking Table(DTT) entry associated with the gfn.
314
//
315
// There are two ways to get the entry:
316
// #1. Walking the DMA Tracking Table(DTT) by the GFN to get the
317
// corresponding entry. The DTT is shared between frontend and
318
// backend. It is page-table-like strctures and the entry is indexed
319
// by GFN. The argument dtt_root represents the root page
320
// pga and dtt_level represents the maximum page table level.
321
//
322
// #2. Calculate the entry address via the argument dtt_iter. dtt_iter
323
// stores an entry address and the associated gfn. If the target gfn is
324
// in the same page table page with the gfn in dtt_iter, then can
325
// calculate the target entry address based on the entry address in
326
// dtt_iter.
327
//
328
// As the DTT entry is shared between frontend and backend, the accessing
329
// should be atomic. So the returned value is converted to an AtomicU32
330
// pointer.
331
fn gfn_to_dtt_pte(
332
mem: &GuestMemory,
333
dtt_level: u64,
334
dtt_root: u64,
335
dtt_iter: &mut DTTIter,
336
gfn: u64,
337
) -> Result<*const AtomicU32> {
338
let ptr = if dtt_iter.ptr.is_null()
339
|| dtt_iter.gfn >> COIOMMU_PT_LEVEL_STRIDE != gfn >> COIOMMU_PT_LEVEL_STRIDE
340
{
341
// Slow path to walk the DTT to get the pte entry
342
let mut level = dtt_level;
343
let mut pt_gpa = dtt_root;
344
let dtt_nonleaf_entry_size = mem::size_of::<u64>() as u64;
345
346
while level != 1 {
347
let index = level_to_offset(gfn, level)? * dtt_nonleaf_entry_size;
348
let parent_pt = mem
349
.read_obj_from_addr::<u64>(GuestAddress(pt_gpa + index))
350
.context(Error::GetDTTEntry)?;
351
352
if (parent_pt & DTT_ENTRY_PRESENT) == 0 {
353
bail!("DTT absent at level {} for gfn 0x{:x}", level, gfn);
354
}
355
356
pt_gpa = (parent_pt >> DTT_ENTRY_PFN_SHIFT) << PAGE_SHIFT_4K;
357
level -= 1;
358
}
359
360
let index = level_to_offset(gfn, level)? * mem::size_of::<u32>() as u64;
361
362
mem.get_host_address(GuestAddress(pt_gpa + index))
363
.context(Error::GetDTTEntry)?
364
} else if gfn > dtt_iter.gfn {
365
// SAFETY:
366
// Safe because we checked that dtt_iter.ptr is valid and that the dtt_pte
367
// for gfn lies on the same dtt page as the dtt_pte for dtt_iter.gfn, which
368
// means the calculated ptr will point to the same page as dtt_iter.ptr
369
unsafe {
370
dtt_iter
371
.ptr
372
.add(mem::size_of::<AtomicU32>() * (gfn - dtt_iter.gfn) as usize)
373
}
374
} else {
375
// SAFETY:
376
// Safe because we checked that dtt_iter.ptr is valid and that the dtt_pte
377
// for gfn lies on the same dtt page as the dtt_pte for dtt_iter.gfn, which
378
// means the calculated ptr will point to the same page as dtt_iter.ptr
379
unsafe {
380
dtt_iter
381
.ptr
382
.sub(mem::size_of::<AtomicU32>() * (dtt_iter.gfn - gfn) as usize)
383
}
384
};
385
386
dtt_iter.ptr = ptr;
387
dtt_iter.gfn = gfn;
388
389
Ok(ptr as *const AtomicU32)
390
}
391
392
fn pin_page(
393
pinstate: &mut CoIommuPinState,
394
policy: CoIommuUnpinPolicy,
395
vfio_container: &Arc<Mutex<VfioContainer>>,
396
mem: &GuestMemory,
397
dtt_level: u64,
398
dtt_root: u64,
399
dtt_iter: &mut DTTIter,
400
gfn: u64,
401
) -> Result<()> {
402
let leaf_entry = gfn_to_dtt_pte(mem, dtt_level, dtt_root, dtt_iter, gfn)?;
403
404
let gpa = gfn << PAGE_SHIFT_4K;
405
let host_addr = mem
406
.get_host_address_range(GuestAddress(gpa), PAGE_SIZE_4K as usize)
407
.context("failed to get host address")? as u64;
408
409
// SAFETY:
410
// Safe because ptr is valid and guaranteed by the gfn_to_dtt_pte.
411
// Test PINNED flag
412
if (unsafe { (*leaf_entry).load(Ordering::Relaxed) } & DTTE_PINNED_FLAG) != 0 {
413
info!("CoIommu: gfn 0x{:x} already pinned", gfn);
414
return Ok(());
415
}
416
417
// SAFETY:
418
// Safe because the gpa is valid from the gfn_to_dtt_pte and the host_addr
419
// is guaranteed by MemoryMapping interface.
420
if unsafe { vfio_map(vfio_container, gpa, PAGE_SIZE_4K, host_addr) } {
421
// SAFETY:
422
// Safe because ptr is valid and guaranteed by the gfn_to_dtt_pte.
423
// set PINNED flag
424
unsafe { (*leaf_entry).fetch_or(DTTE_PINNED_FLAG, Ordering::SeqCst) };
425
if policy == CoIommuUnpinPolicy::Lru {
426
pinstate
427
.new_gen_pinned_pages
428
.push_back(PinnedPageInfo::new(gfn, 0));
429
}
430
}
431
432
Ok(())
433
}
434
435
#[derive(PartialEq, Debug, Eq)]
436
enum UnpinResult {
437
UnpinlistEmpty,
438
Unpinned,
439
NotPinned,
440
NotUnpinned,
441
FailedUnpin,
442
UnpinParked,
443
}
444
445
fn unpin_page(
446
pinstate: &mut CoIommuPinState,
447
vfio_container: &Arc<Mutex<VfioContainer>>,
448
mem: &GuestMemory,
449
dtt_level: u64,
450
dtt_root: u64,
451
dtt_iter: &mut DTTIter,
452
gfn: u64,
453
force: bool,
454
) -> UnpinResult {
455
if pinstate.unpin_thread_state == UnpinThreadState::Parked {
456
return UnpinResult::UnpinParked;
457
}
458
459
let leaf_entry = match gfn_to_dtt_pte(mem, dtt_level, dtt_root, dtt_iter, gfn) {
460
Ok(v) => v,
461
Err(_) => {
462
// The case force == true may try to unpin a page which is not
463
// mapped in the dtt. For such page, the pte doesn't exist yet
464
// thus don't need to report any error log.
465
// The case force == false is used by coiommu to periodically
466
// unpin the pages which have been mapped in dtt, thus the pte
467
// for such page does exist. However with the unpin request from
468
// virtio balloon, such pages can be unpinned already and the DTT
469
// pages might be reclaimed by the Guest OS kernel as well, thus
470
// it is also possible to be here. Not to report an error log.
471
return UnpinResult::NotPinned;
472
}
473
};
474
475
if force {
476
// SAFETY:
477
// Safe because leaf_entry is valid and guaranteed by the gfn_to_dtt_pte.
478
// This case is for balloon to evict pages so these pages should
479
// already been locked by balloon and no device driver in VM is
480
// able to access these pages, so just clear ACCESSED flag first
481
// to make sure the following unpin can be success.
482
unsafe { (*leaf_entry).fetch_and(!DTTE_ACCESSED_FLAG, Ordering::SeqCst) };
483
}
484
485
// SAFETY:
486
// Safe because leaf_entry is valid and guaranteed by the gfn_to_dtt_pte.
487
if let Err(entry) = unsafe {
488
(*leaf_entry).compare_exchange(DTTE_PINNED_FLAG, 0, Ordering::SeqCst, Ordering::SeqCst)
489
} {
490
// The compare_exchange failed as the original leaf entry is
491
// not DTTE_PINNED_FLAG so cannot do the unpin.
492
if entry == 0 {
493
// The GFN is already unpinned. This is very similar to the
494
// gfn_to_dtt_pte error case, with the only difference being
495
// that the dtt_pte happens to be on a present page table.
496
UnpinResult::NotPinned
497
} else {
498
if !force {
499
// SAFETY:
500
// Safe because leaf_entry is valid and guaranteed by the gfn_to_dtt_pte.
501
// The ACCESSED_FLAG is set by the guest if guest requires DMA map for
502
// this page. It represents whether or not this page is touched by the
503
// guest. By clearing this flag after an unpin work, we can detect if
504
// this page has been touched by the guest in the next round of unpin
505
// work. If the ACCESSED_FLAG is set at the next round, unpin this page
506
// will be failed and we will be here again to clear this flag. If this
507
// flag is not set at the next round, unpin this page will be probably
508
// success.
509
unsafe { (*leaf_entry).fetch_and(!DTTE_ACCESSED_FLAG, Ordering::SeqCst) };
510
} else {
511
// If we're here, then the guest is trying to release a page via the
512
// balloon that it still has pinned. This most likely that something is
513
// wrong in the guest kernel. Just leave the page pinned and log
514
// an error.
515
// This failure blocks the balloon from removing the page, which ensures
516
// that the guest's view of memory will remain consistent with device
517
// DMA's view of memory. Also note that the host kernel maintains an
518
// elevated refcount for pinned pages, which is a second guarantee the
519
// pages accessible by device DMA won't be freed until after they are
520
// unpinned.
521
error!(
522
"CoIommu: force case cannot pin gfn 0x{:x} entry 0x{:x}",
523
gfn, entry
524
);
525
}
526
// GFN cannot be unpinned either because the unmap count
527
// is non-zero or the it has accessed flag set.
528
UnpinResult::NotUnpinned
529
}
530
} else {
531
// The compare_exchange success as the original leaf entry is
532
// DTTE_PINNED_FLAG and the new leaf entry is 0 now. Unpin the
533
// page.
534
let gpa = gfn << PAGE_SHIFT_4K;
535
if vfio_unmap(vfio_container, gpa, PAGE_SIZE_4K) {
536
UnpinResult::Unpinned
537
} else {
538
// SAFETY:
539
// Safe because leaf_entry is valid and guaranteed by the gfn_to_dtt_pte.
540
// make sure the pinned flag is set
541
unsafe { (*leaf_entry).fetch_or(DTTE_PINNED_FLAG, Ordering::SeqCst) };
542
// need to put this gfn back to pinned vector
543
UnpinResult::FailedUnpin
544
}
545
}
546
}
547
548
struct PinWorker {
549
mem: GuestMemory,
550
endpoints: Vec<u16>,
551
notifymap_mmap: Arc<MemoryMapping>,
552
dtt_level: u64,
553
dtt_root: u64,
554
ioevents: Vec<Event>,
555
vfio_container: Arc<Mutex<VfioContainer>>,
556
pinstate: Arc<Mutex<CoIommuPinState>>,
557
params: CoIommuParameters,
558
}
559
560
impl PinWorker {
561
fn debug_label(&self) -> &'static str {
562
"CoIommuPinWorker"
563
}
564
565
fn run(&mut self, kill_evt: Event) {
566
#[derive(EventToken)]
567
enum Token {
568
Kill,
569
Pin { index: usize },
570
}
571
572
let wait_ctx: WaitContext<Token> =
573
match WaitContext::build_with(&[(&kill_evt, Token::Kill)]) {
574
Ok(pc) => pc,
575
Err(e) => {
576
error!("{}: failed creating WaitContext: {}", self.debug_label(), e);
577
return;
578
}
579
};
580
581
for (index, event) in self.ioevents.iter().enumerate() {
582
match wait_ctx.add(event, Token::Pin { index }) {
583
Ok(_) => {}
584
Err(e) => {
585
error!(
586
"{}: failed to add ioevent for index {}: {}",
587
self.debug_label(),
588
index,
589
e
590
);
591
return;
592
}
593
}
594
}
595
596
'wait: loop {
597
let events = match wait_ctx.wait() {
598
Ok(v) => v,
599
Err(e) => {
600
error!("{}: failed polling for events: {}", self.debug_label(), e);
601
break;
602
}
603
};
604
605
for event in events.iter().filter(|e| e.is_readable) {
606
match event.token {
607
Token::Kill => break 'wait,
608
Token::Pin { index } => {
609
let offset = index * mem::size_of::<u64>();
610
if let Some(event) = self.ioevents.get(index) {
611
if let Err(e) = event.wait() {
612
error!(
613
"{}: failed reading event {}: {}",
614
self.debug_label(),
615
index,
616
e
617
);
618
self.notifymap_mmap.write_obj::<u64>(0, offset).unwrap();
619
break 'wait;
620
}
621
}
622
if let Ok(data) = self.notifymap_mmap.read_obj::<u64>(offset) {
623
if let Err(e) = self.pin_pages(data) {
624
error!("{}: {}", self.debug_label(), e);
625
}
626
}
627
fence(Ordering::SeqCst);
628
self.notifymap_mmap.write_obj::<u64>(0, offset).unwrap();
629
}
630
}
631
}
632
}
633
}
634
635
fn pin_pages_in_batch(&mut self, gpa: u64) -> Result<()> {
636
let pin_page_info = self
637
.mem
638
.read_obj_from_addr::<PinPageInfo>(GuestAddress(gpa))
639
.context("failed to get pin page info")?;
640
641
let bdf = pin_page_info.bdf;
642
ensure!(
643
self.endpoints.contains(&bdf),
644
"pin page for unexpected bdf 0x{:x}",
645
bdf
646
);
647
648
let mut nr_pages = pin_page_info.nr_pages;
649
let mut offset = mem::size_of::<PinPageInfo>() as u64;
650
let mut dtt_iter: DTTIter = Default::default();
651
let mut pinstate = self.pinstate.lock();
652
while nr_pages > 0 {
653
let gfn = self
654
.mem
655
.read_obj_from_addr::<u64>(GuestAddress(gpa + offset))
656
.context("failed to get pin page gfn")?;
657
658
pin_page(
659
&mut pinstate,
660
self.params.unpin_policy,
661
&self.vfio_container,
662
&self.mem,
663
self.dtt_level,
664
self.dtt_root,
665
&mut dtt_iter,
666
gfn,
667
)?;
668
669
offset += mem::size_of::<u64>() as u64;
670
nr_pages -= 1;
671
}
672
673
Ok(())
674
}
675
676
fn pin_pages(&mut self, gfn_bdf: u64) -> Result<()> {
677
if gfn_bdf & PIN_PAGES_IN_BATCH != 0 {
678
let gpa = gfn_bdf & !PIN_PAGES_IN_BATCH;
679
self.pin_pages_in_batch(gpa)
680
} else {
681
let bdf = (gfn_bdf & 0xffff) as u16;
682
let gfn = gfn_bdf >> 16;
683
let mut dtt_iter: DTTIter = Default::default();
684
ensure!(
685
self.endpoints.contains(&bdf),
686
"pin page for unexpected bdf 0x{:x}",
687
bdf
688
);
689
690
let mut pinstate = self.pinstate.lock();
691
pin_page(
692
&mut pinstate,
693
self.params.unpin_policy,
694
&self.vfio_container,
695
&self.mem,
696
self.dtt_level,
697
self.dtt_root,
698
&mut dtt_iter,
699
gfn,
700
)
701
}
702
}
703
}
704
705
struct UnpinWorker {
706
mem: GuestMemory,
707
dtt_level: u64,
708
dtt_root: u64,
709
vfio_container: Arc<Mutex<VfioContainer>>,
710
unpin_tube: Option<Tube>,
711
pinstate: Arc<Mutex<CoIommuPinState>>,
712
params: CoIommuParameters,
713
unpin_gen_threshold: u64,
714
}
715
716
impl UnpinWorker {
717
fn debug_label(&self) -> &'static str {
718
"CoIommuUnpinWorker"
719
}
720
721
fn run(&mut self, kill_evt: Event) {
722
#[derive(EventToken)]
723
enum Token {
724
UnpinTimer,
725
UnpinReq,
726
Kill,
727
}
728
729
let wait_ctx: WaitContext<Token> =
730
match WaitContext::build_with(&[(&kill_evt, Token::Kill)]) {
731
Ok(pc) => pc,
732
Err(e) => {
733
error!("{}: failed creating WaitContext: {}", self.debug_label(), e);
734
return;
735
}
736
};
737
738
if let Some(tube) = &self.unpin_tube {
739
if let Err(e) = wait_ctx.add(tube, Token::UnpinReq) {
740
error!("{}: failed creating WaitContext: {}", self.debug_label(), e);
741
return;
742
}
743
}
744
745
let mut unpin_timer = if self.params.unpin_policy != CoIommuUnpinPolicy::Off
746
&& !self.params.unpin_interval.is_zero()
747
{
748
let mut timer = match Timer::new() {
749
Ok(t) => t,
750
Err(e) => {
751
error!(
752
"{}: failed to create the unpin timer: {}",
753
self.debug_label(),
754
e
755
);
756
return;
757
}
758
};
759
if let Err(e) = timer.reset_repeating(self.params.unpin_interval) {
760
error!(
761
"{}: failed to start the unpin timer: {}",
762
self.debug_label(),
763
e
764
);
765
return;
766
}
767
if let Err(e) = wait_ctx.add(&timer, Token::UnpinTimer) {
768
error!("{}: failed creating WaitContext: {}", self.debug_label(), e);
769
return;
770
}
771
Some(timer)
772
} else {
773
None
774
};
775
776
let unpin_tube = self.unpin_tube.take();
777
'wait: loop {
778
let events = match wait_ctx.wait() {
779
Ok(v) => v,
780
Err(e) => {
781
error!("{}: failed polling for events: {}", self.debug_label(), e);
782
break;
783
}
784
};
785
786
for event in events.iter().filter(|e| e.is_readable) {
787
match event.token {
788
Token::UnpinTimer => {
789
self.unpin_pages();
790
if let Some(timer) = &mut unpin_timer {
791
if let Err(e) = timer.mark_waited() {
792
error!(
793
"{}: failed to clear unpin timer: {}",
794
self.debug_label(),
795
e
796
);
797
break 'wait;
798
}
799
}
800
}
801
Token::UnpinReq => {
802
if let Some(tube) = &unpin_tube {
803
match tube.recv::<UnpinRequest>() {
804
Ok(req) => {
805
let mut unpin_done = true;
806
for range in req.ranges {
807
// Locking with respect to pin_pages isn't necessary
808
// for this case because the unpinned pages in the range
809
// should all be in the balloon and so nothing will attempt
810
// to pin them.
811
if !self.unpin_pages_in_range(range.0, range.1) {
812
unpin_done = false;
813
break;
814
}
815
}
816
let resp = if unpin_done {
817
UnpinResponse::Success
818
} else {
819
UnpinResponse::Failed
820
};
821
if let Err(e) = tube.send(&resp) {
822
error!(
823
"{}: failed to send unpin response {}",
824
self.debug_label(),
825
e
826
);
827
}
828
}
829
Err(e) => {
830
if let TubeError::Disconnected = e {
831
if let Err(e) = wait_ctx.delete(tube) {
832
error!(
833
"{}: failed to remove unpin_tube: {}",
834
self.debug_label(),
835
e
836
);
837
}
838
} else {
839
error!(
840
"{}: failed to recv Unpin Request: {}",
841
self.debug_label(),
842
e
843
);
844
}
845
}
846
}
847
}
848
}
849
Token::Kill => break 'wait,
850
}
851
}
852
}
853
self.unpin_tube = unpin_tube;
854
}
855
856
fn unpin_pages(&mut self) {
857
if self.params.unpin_policy == CoIommuUnpinPolicy::Lru {
858
self.lru_unpin_pages();
859
}
860
}
861
862
fn lru_unpin_page(
863
&mut self,
864
dtt_iter: &mut DTTIter,
865
new_gen: bool,
866
) -> (UnpinResult, Option<PinnedPageInfo>) {
867
let mut pinstate = self.pinstate.lock();
868
let pageinfo = if new_gen {
869
pinstate.new_gen_pinned_pages.pop_front()
870
} else {
871
pinstate
872
.old_gen_pinned_pages
873
.pop_front()
874
.map(|gfn| PinnedPageInfo::new(gfn, 0))
875
};
876
877
pageinfo.map_or((UnpinResult::UnpinlistEmpty, None), |pageinfo| {
878
(
879
unpin_page(
880
&mut pinstate,
881
&self.vfio_container,
882
&self.mem,
883
self.dtt_level,
884
self.dtt_root,
885
dtt_iter,
886
pageinfo.gfn,
887
false,
888
),
889
Some(pageinfo),
890
)
891
})
892
}
893
894
fn lru_unpin_pages_in_loop(&mut self, unpin_limit: Option<u64>, new_gen: bool) -> u64 {
895
let mut not_unpinned_new_gen_pages = VecDeque::new();
896
let mut not_unpinned_old_gen_pages = VecDeque::new();
897
let mut unpinned_count = 0;
898
let has_limit = unpin_limit.is_some();
899
let limit_count = unpin_limit.unwrap_or(0);
900
let mut dtt_iter: DTTIter = Default::default();
901
902
// If has_limit is true but limit_count is 0, will not do the unpin
903
while !has_limit || unpinned_count != limit_count {
904
let (result, pinned_page) = self.lru_unpin_page(&mut dtt_iter, new_gen);
905
match result {
906
UnpinResult::UnpinlistEmpty => break,
907
UnpinResult::Unpinned => unpinned_count += 1,
908
UnpinResult::NotPinned => {}
909
UnpinResult::NotUnpinned => {
910
if let Some(mut page) = pinned_page {
911
if self.params.unpin_gen_threshold != 0 {
912
page.unpin_busy_cnt += 1;
913
// Unpin from new_gen queue but not
914
// successfully unpinned. Need to check
915
// the unpin_gen threshold. If reach, put
916
// it to old_gen queue.
917
// And if it is not from new_gen, directly
918
// put into old_gen queue.
919
if !new_gen || page.unpin_busy_cnt >= self.params.unpin_gen_threshold {
920
not_unpinned_old_gen_pages.push_back(page.gfn);
921
} else {
922
not_unpinned_new_gen_pages.push_back(page);
923
}
924
}
925
}
926
}
927
UnpinResult::FailedUnpin | UnpinResult::UnpinParked => {
928
// Although UnpinParked means we didn't actually try to unpin
929
// gfn, it's not worth specifically handing since parking is
930
// expected to be relatively rare.
931
if let Some(page) = pinned_page {
932
if new_gen {
933
not_unpinned_new_gen_pages.push_back(page);
934
} else {
935
not_unpinned_old_gen_pages.push_back(page.gfn);
936
}
937
}
938
if result == UnpinResult::UnpinParked {
939
thread::park();
940
}
941
}
942
}
943
}
944
945
if !not_unpinned_new_gen_pages.is_empty() {
946
let mut pinstate = self.pinstate.lock();
947
pinstate
948
.new_gen_pinned_pages
949
.append(&mut not_unpinned_new_gen_pages);
950
}
951
952
if !not_unpinned_old_gen_pages.is_empty() {
953
let mut pinstate = self.pinstate.lock();
954
pinstate
955
.old_gen_pinned_pages
956
.append(&mut not_unpinned_old_gen_pages);
957
}
958
959
unpinned_count
960
}
961
962
fn lru_unpin_pages(&mut self) {
963
let mut unpin_count = 0;
964
if self.params.unpin_gen_threshold != 0 {
965
self.unpin_gen_threshold += 1;
966
if self.unpin_gen_threshold == self.params.unpin_gen_threshold {
967
self.unpin_gen_threshold = 0;
968
// Try to unpin inactive queue first if reaches the thres hold
969
unpin_count = self.lru_unpin_pages_in_loop(self.params.unpin_limit, false);
970
}
971
}
972
// Unpin the new_gen queue with the updated unpin_limit after unpin old_gen queue
973
self.lru_unpin_pages_in_loop(
974
self.params
975
.unpin_limit
976
.map(|limit| limit.saturating_sub(unpin_count)),
977
true,
978
);
979
}
980
981
fn unpin_pages_in_range(&self, gfn: u64, count: u64) -> bool {
982
let mut dtt_iter: DTTIter = Default::default();
983
let mut index = 0;
984
while index != count {
985
let mut pinstate = self.pinstate.lock();
986
let result = unpin_page(
987
&mut pinstate,
988
&self.vfio_container,
989
&self.mem,
990
self.dtt_level,
991
self.dtt_root,
992
&mut dtt_iter,
993
gfn + index,
994
true,
995
);
996
drop(pinstate);
997
998
match result {
999
UnpinResult::Unpinned | UnpinResult::NotPinned => {}
1000
UnpinResult::UnpinParked => {
1001
thread::park();
1002
continue;
1003
}
1004
_ => {
1005
error!("coiommu: force unpin failed by {:?}", result);
1006
return false;
1007
}
1008
}
1009
index += 1;
1010
}
1011
true
1012
}
1013
}
1014
1015
pub struct CoIommuDev {
1016
config_regs: PciConfiguration,
1017
pci_address: Option<PciAddress>,
1018
mem: GuestMemory,
1019
coiommu_reg: CoIommuReg,
1020
endpoints: Vec<u16>,
1021
notifymap_mem: SafeDescriptor,
1022
notifymap_mmap: Arc<MemoryMapping>,
1023
notifymap_addr: Option<u64>,
1024
topologymap_mem: SafeDescriptor,
1025
topologymap_addr: Option<u64>,
1026
mmapped: bool,
1027
vm_memory_client: VmMemoryClient,
1028
pin_thread: Option<WorkerThread<PinWorker>>,
1029
unpin_thread: Option<WorkerThread<UnpinWorker>>,
1030
unpin_tube: Option<Tube>,
1031
ioevents: Vec<Event>,
1032
vfio_container: Arc<Mutex<VfioContainer>>,
1033
pinstate: Arc<Mutex<CoIommuPinState>>,
1034
params: CoIommuParameters,
1035
}
1036
1037
impl CoIommuDev {
1038
pub fn new(
1039
mem: GuestMemory,
1040
vfio_container: Arc<Mutex<VfioContainer>>,
1041
vm_memory_client: VmMemoryClient,
1042
unpin_tube: Option<Tube>,
1043
endpoints: Vec<u16>,
1044
vcpu_count: u64,
1045
params: CoIommuParameters,
1046
) -> Result<Self> {
1047
let config_regs = PciConfiguration::new(
1048
PCI_VENDOR_ID_COIOMMU,
1049
PCI_DEVICE_ID_COIOMMU,
1050
PciClassCode::Other,
1051
&PciOtherSubclass::Other,
1052
None, // No Programming interface.
1053
PciHeaderType::Device,
1054
PCI_VENDOR_ID_COIOMMU,
1055
PCI_DEVICE_ID_COIOMMU,
1056
COIOMMU_REVISION_ID,
1057
);
1058
1059
// notifymap_mem is used as Bar2 for Guest to check if request is completed by coIOMMU.
1060
let notifymap_mem = SharedMemory::new("coiommu_notifymap", COIOMMU_NOTIFYMAP_SIZE as u64)
1061
.context(Error::CreateSharedMemory)?;
1062
let notifymap_mmap = Arc::new(
1063
MemoryMappingBuilder::new(COIOMMU_NOTIFYMAP_SIZE)
1064
.from_shared_memory(&notifymap_mem)
1065
.offset(0)
1066
.build()?,
1067
);
1068
1069
// topologymap_mem is used as Bar4 for Guest to check which device is on top of coIOMMU.
1070
let topologymap_mem =
1071
SharedMemory::new("coiommu_topologymap", COIOMMU_TOPOLOGYMAP_SIZE as u64)
1072
.context(Error::CreateSharedMemory)?;
1073
let topologymap_mmap = Arc::new(
1074
MemoryMappingBuilder::new(COIOMMU_TOPOLOGYMAP_SIZE)
1075
.from_shared_memory(&topologymap_mem)
1076
.offset(0)
1077
.build()?,
1078
);
1079
1080
ensure!(
1081
(endpoints.len() + 1) * mem::size_of::<u16>() <= COIOMMU_TOPOLOGYMAP_SIZE,
1082
"Coiommu: too many endpoints"
1083
);
1084
topologymap_mmap.write_obj::<u16>(endpoints.len() as u16, 0)?;
1085
for (index, endpoint) in endpoints.iter().enumerate() {
1086
topologymap_mmap.write_obj::<u16>(*endpoint, (index + 1) * mem::size_of::<u16>())?;
1087
}
1088
1089
let mut ioevents = Vec::new();
1090
for _ in 0..vcpu_count {
1091
ioevents.push(Event::new().context("CoIommu failed to create event fd")?);
1092
}
1093
1094
Ok(Self {
1095
config_regs,
1096
pci_address: None,
1097
mem,
1098
coiommu_reg: Default::default(),
1099
endpoints,
1100
notifymap_mem: notifymap_mem.into(),
1101
notifymap_mmap,
1102
notifymap_addr: None,
1103
topologymap_mem: topologymap_mem.into(),
1104
topologymap_addr: None,
1105
mmapped: false,
1106
vm_memory_client,
1107
pin_thread: None,
1108
unpin_thread: None,
1109
unpin_tube,
1110
ioevents,
1111
vfio_container,
1112
pinstate: Arc::new(Mutex::new(CoIommuPinState {
1113
new_gen_pinned_pages: VecDeque::new(),
1114
old_gen_pinned_pages: VecDeque::new(),
1115
unpin_thread_state: UnpinThreadState::Unparked,
1116
unpin_park_count: 0,
1117
})),
1118
params,
1119
})
1120
}
1121
1122
fn register_mmap(
1123
&self,
1124
descriptor: SafeDescriptor,
1125
size: usize,
1126
offset: u64,
1127
gpa: u64,
1128
prot: Protection,
1129
) -> Result<()> {
1130
let _region = self
1131
.vm_memory_client
1132
.register_memory(
1133
VmMemorySource::Descriptor {
1134
descriptor,
1135
offset,
1136
size: size as u64,
1137
},
1138
VmMemoryDestination::GuestPhysicalAddress(gpa),
1139
prot,
1140
MemCacheType::CacheCoherent,
1141
)
1142
.context("register_mmap register_memory failed")?;
1143
Ok(())
1144
}
1145
1146
fn mmap(&mut self) {
1147
if self.mmapped {
1148
return;
1149
}
1150
1151
if let Some(gpa) = self.notifymap_addr {
1152
match self.register_mmap(
1153
self.notifymap_mem.try_clone().unwrap(),
1154
COIOMMU_NOTIFYMAP_SIZE,
1155
0,
1156
gpa,
1157
Protection::read_write(),
1158
) {
1159
Ok(_) => {}
1160
Err(e) => {
1161
panic!("{}: map notifymap failed: {}", self.debug_label(), e);
1162
}
1163
}
1164
}
1165
1166
if let Some(gpa) = self.topologymap_addr {
1167
match self.register_mmap(
1168
self.topologymap_mem.try_clone().unwrap(),
1169
COIOMMU_TOPOLOGYMAP_SIZE,
1170
0,
1171
gpa,
1172
Protection::read(),
1173
) {
1174
Ok(_) => {}
1175
Err(e) => {
1176
panic!("{}: map topologymap failed: {}", self.debug_label(), e);
1177
}
1178
}
1179
}
1180
1181
self.mmapped = true;
1182
}
1183
1184
fn start_workers(&mut self) {
1185
if self.pin_thread.is_none() {
1186
self.start_pin_thread();
1187
}
1188
1189
if self.unpin_thread.is_none() {
1190
self.start_unpin_thread();
1191
}
1192
}
1193
1194
fn start_pin_thread(&mut self) {
1195
let mem = self.mem.clone();
1196
let endpoints = self.endpoints.to_vec();
1197
let notifymap_mmap = self.notifymap_mmap.clone();
1198
let dtt_root = self.coiommu_reg.dtt_root;
1199
let dtt_level = self.coiommu_reg.dtt_level;
1200
let ioevents: Vec<Event> = self
1201
.ioevents
1202
.iter()
1203
.map(|e| e.try_clone().unwrap())
1204
.collect();
1205
1206
let bar0 = self.config_regs.get_bar_addr(COIOMMU_MMIO_BAR);
1207
let notify_base = bar0 + mem::size_of::<CoIommuReg>() as u64;
1208
for (i, evt) in self.ioevents.iter().enumerate() {
1209
self.vm_memory_client
1210
.register_io_event(
1211
evt.try_clone().expect("failed to clone event"),
1212
notify_base + i as u64,
1213
Datamatch::AnyLength,
1214
)
1215
.expect("failed to register ioevent");
1216
}
1217
1218
let vfio_container = self.vfio_container.clone();
1219
let pinstate = self.pinstate.clone();
1220
let params = self.params;
1221
1222
self.pin_thread = Some(WorkerThread::start("coiommu_pin", move |kill_evt| {
1223
let mut worker = PinWorker {
1224
mem,
1225
endpoints,
1226
notifymap_mmap,
1227
dtt_root,
1228
dtt_level,
1229
ioevents,
1230
vfio_container,
1231
pinstate,
1232
params,
1233
};
1234
worker.run(kill_evt);
1235
worker
1236
}));
1237
}
1238
1239
fn start_unpin_thread(&mut self) {
1240
let mem = self.mem.clone();
1241
let dtt_root = self.coiommu_reg.dtt_root;
1242
let dtt_level = self.coiommu_reg.dtt_level;
1243
let vfio_container = self.vfio_container.clone();
1244
let unpin_tube = self.unpin_tube.take();
1245
let pinstate = self.pinstate.clone();
1246
let params = self.params;
1247
self.unpin_thread = Some(WorkerThread::start("coiommu_unpin", move |kill_evt| {
1248
let mut worker = UnpinWorker {
1249
mem,
1250
dtt_level,
1251
dtt_root,
1252
vfio_container,
1253
unpin_tube,
1254
pinstate,
1255
params,
1256
unpin_gen_threshold: 0,
1257
};
1258
worker.run(kill_evt);
1259
worker
1260
}));
1261
}
1262
1263
fn allocate_bar_address(
1264
&mut self,
1265
resources: &mut SystemAllocator,
1266
address: PciAddress,
1267
size: u64,
1268
bar_num: u8,
1269
name: &str,
1270
) -> PciResult<u64> {
1271
let addr = resources
1272
.allocate_mmio(
1273
size,
1274
Alloc::PciBar {
1275
bus: address.bus,
1276
dev: address.dev,
1277
func: address.func,
1278
bar: bar_num,
1279
},
1280
name.to_string(),
1281
AllocOptions::new().prefetchable(true).align(size),
1282
)
1283
.map_err(|e| PciDeviceError::IoAllocationFailed(size, e))?;
1284
1285
let bar = PciBarConfiguration::new(
1286
bar_num as usize,
1287
size,
1288
PciBarRegionType::Memory64BitRegion,
1289
PciBarPrefetchable::Prefetchable,
1290
)
1291
.set_address(addr);
1292
1293
self.config_regs
1294
.add_pci_bar(bar)
1295
.map_err(|e| PciDeviceError::IoRegistrationFailed(addr, e))?;
1296
1297
Ok(addr)
1298
}
1299
1300
fn read_mmio(&mut self, offset: u64, data: &mut [u8]) {
1301
if offset >= mem::size_of::<CoIommuReg>() as u64 {
1302
error!(
1303
"{}: read_mmio: invalid offset 0x{:x}",
1304
self.debug_label(),
1305
offset
1306
);
1307
return;
1308
}
1309
1310
// Sanity check, must be 64bit aligned accessing
1311
if offset % 8 != 0 || data.len() != 8 {
1312
error!(
1313
"{}: read_mmio: unaligned accessing: offset 0x{:x} actual len {} expect len 8",
1314
self.debug_label(),
1315
offset,
1316
data.len()
1317
);
1318
return;
1319
}
1320
1321
let v = match offset / 8 {
1322
0 => self.coiommu_reg.dtt_root,
1323
1 => self.coiommu_reg.cmd,
1324
2 => self.coiommu_reg.dtt_level,
1325
_ => return,
1326
};
1327
1328
data.copy_from_slice(&v.to_ne_bytes());
1329
}
1330
1331
fn write_mmio(&mut self, offset: u64, data: &[u8]) {
1332
let mmio_len = mem::size_of::<CoIommuReg>() as u64;
1333
if offset >= mmio_len {
1334
if data.len() != 1 {
1335
error!(
1336
"{}: write_mmio: unaligned accessing: offset 0x{:x} actual len {} expect len 1",
1337
self.debug_label(),
1338
offset,
1339
data.len()
1340
);
1341
return;
1342
}
1343
1344
// Usually will not be here as this is for the per-vcpu notify
1345
// register which is monitored by the ioevents. For the notify
1346
// register which is not covered by the ioevents, they are not
1347
// be used by the frontend driver. In case the frontend driver
1348
// went here, do a simple handle to make sure the frontend driver
1349
// will not be blocked, and through an error log.
1350
let index = (offset - mmio_len) as usize;
1351
if let Some(event) = self.ioevents.get(index) {
1352
let _ = event.signal();
1353
} else {
1354
self.notifymap_mmap
1355
.write_obj::<u64>(0, index * mem::size_of::<u64>())
1356
.unwrap();
1357
error!(
1358
"{}: No page will be pinned as driver is accessing unused trigger register: offset 0x{:x}",
1359
self.debug_label(),
1360
offset
1361
);
1362
}
1363
return;
1364
}
1365
1366
// Sanity check, must be 64bit aligned accessing for CoIommuReg
1367
if offset % 8 != 0 || data.len() != 8 {
1368
error!(
1369
"{}: write_mmio: unaligned accessing: offset 0x{:x} actual len {} expect len 8",
1370
self.debug_label(),
1371
offset,
1372
data.len()
1373
);
1374
return;
1375
}
1376
1377
let index = offset / 8;
1378
let v = u64::from_ne_bytes(data.try_into().unwrap());
1379
match index {
1380
0 => {
1381
if self.coiommu_reg.dtt_root == 0 {
1382
self.coiommu_reg.dtt_root = v;
1383
}
1384
}
1385
1 => match v {
1386
// Deactivate can happen if the frontend driver in the guest
1387
// fails during probing or if the CoIommu device is removed
1388
// by the guest. Neither of these cases is expected, and if
1389
// either happens the guest will be non-functional due to
1390
// pass-through devices which rely on CoIommu not working.
1391
// So just fail hard and panic.
1392
COIOMMU_CMD_DEACTIVATE => {
1393
panic!("{}: Deactivate is not supported", self.debug_label())
1394
}
1395
COIOMMU_CMD_ACTIVATE => {
1396
if self.coiommu_reg.dtt_root != 0 && self.coiommu_reg.dtt_level != 0 {
1397
self.start_workers();
1398
}
1399
}
1400
COIOMMU_CMD_PARK_UNPIN => {
1401
let mut pinstate = self.pinstate.lock();
1402
pinstate.unpin_thread_state = UnpinThreadState::Parked;
1403
if let Some(v) = pinstate.unpin_park_count.checked_add(1) {
1404
pinstate.unpin_park_count = v;
1405
} else {
1406
panic!("{}: Park request overflowing", self.debug_label());
1407
}
1408
}
1409
COIOMMU_CMD_UNPARK_UNPIN => {
1410
let mut pinstate = self.pinstate.lock();
1411
if pinstate.unpin_thread_state == UnpinThreadState::Parked {
1412
if let Some(v) = pinstate.unpin_park_count.checked_sub(1) {
1413
pinstate.unpin_park_count = v;
1414
if pinstate.unpin_park_count == 0 {
1415
if let Some(worker_thread) = &self.unpin_thread {
1416
worker_thread.thread().unpark();
1417
}
1418
pinstate.unpin_thread_state = UnpinThreadState::Unparked;
1419
}
1420
} else {
1421
error!("{}: Park count is already reached to 0", self.debug_label());
1422
}
1423
}
1424
}
1425
_ => {}
1426
},
1427
2 => {
1428
if self.coiommu_reg.dtt_level == 0 {
1429
self.coiommu_reg.dtt_level = v;
1430
}
1431
}
1432
_ => {}
1433
}
1434
}
1435
}
1436
1437
impl PciDevice for CoIommuDev {
1438
fn debug_label(&self) -> String {
1439
"CoIommu".to_owned()
1440
}
1441
1442
fn allocate_address(&mut self, resources: &mut SystemAllocator) -> PciResult<PciAddress> {
1443
if self.pci_address.is_none() {
1444
self.pci_address = resources.allocate_pci(0, self.debug_label());
1445
}
1446
self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
1447
}
1448
1449
fn allocate_io_bars(&mut self, resources: &mut SystemAllocator) -> PciResult<Vec<BarRange>> {
1450
let address = self
1451
.pci_address
1452
.expect("allocate_address must be called prior to allocate_io_bars");
1453
1454
// Allocate one bar for the structures pointed to by the capability structures.
1455
let mut ranges: Vec<BarRange> = Vec::new();
1456
1457
let mmio_addr = self.allocate_bar_address(
1458
resources,
1459
address,
1460
COIOMMU_MMIO_BAR_SIZE,
1461
COIOMMU_MMIO_BAR as u8,
1462
"coiommu-mmiobar",
1463
)?;
1464
1465
ranges.push(BarRange {
1466
addr: mmio_addr,
1467
size: COIOMMU_MMIO_BAR_SIZE,
1468
prefetchable: false,
1469
});
1470
1471
Ok(ranges)
1472
}
1473
1474
fn allocate_device_bars(
1475
&mut self,
1476
resources: &mut SystemAllocator,
1477
) -> PciResult<Vec<BarRange>> {
1478
let address = self
1479
.pci_address
1480
.expect("allocate_address must be called prior to allocate_device_bars");
1481
1482
let mut ranges: Vec<BarRange> = Vec::new();
1483
1484
let topologymap_addr = self.allocate_bar_address(
1485
resources,
1486
address,
1487
COIOMMU_TOPOLOGYMAP_SIZE as u64,
1488
COIOMMU_TOPOLOGYMAP_BAR,
1489
"coiommu-topology",
1490
)?;
1491
self.topologymap_addr = Some(topologymap_addr);
1492
ranges.push(BarRange {
1493
addr: topologymap_addr,
1494
size: COIOMMU_TOPOLOGYMAP_SIZE as u64,
1495
prefetchable: false,
1496
});
1497
1498
let notifymap_addr = self.allocate_bar_address(
1499
resources,
1500
address,
1501
COIOMMU_NOTIFYMAP_SIZE as u64,
1502
COIOMMU_NOTIFYMAP_BAR as u8,
1503
"coiommu-notifymap",
1504
)?;
1505
self.notifymap_addr = Some(notifymap_addr);
1506
ranges.push(BarRange {
1507
addr: notifymap_addr,
1508
size: COIOMMU_NOTIFYMAP_SIZE as u64,
1509
prefetchable: false,
1510
});
1511
1512
Ok(ranges)
1513
}
1514
1515
fn read_config_register(&self, reg_idx: usize) -> u32 {
1516
self.config_regs.read_reg(reg_idx)
1517
}
1518
1519
fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) {
1520
if reg_idx == COMMAND_REG
1521
&& data.len() == 2
1522
&& data[0] & COMMAND_REG_MEMORY_SPACE_MASK as u8 != 0
1523
&& !self.mmapped
1524
{
1525
self.mmap();
1526
}
1527
1528
self.config_regs.write_reg(reg_idx, offset, data);
1529
}
1530
1531
fn keep_rds(&self) -> Vec<RawDescriptor> {
1532
let mut rds = vec![
1533
self.vfio_container.lock().as_raw_descriptor(),
1534
self.vm_memory_client.as_raw_descriptor(),
1535
self.notifymap_mem.as_raw_descriptor(),
1536
self.topologymap_mem.as_raw_descriptor(),
1537
];
1538
if let Some(unpin_tube) = &self.unpin_tube {
1539
rds.push(unpin_tube.as_raw_descriptor());
1540
}
1541
rds.extend(self.ioevents.iter().map(Event::as_raw_descriptor));
1542
rds
1543
}
1544
1545
fn read_bar(&mut self, bar_index: PciBarIndex, offset: u64, data: &mut [u8]) {
1546
match bar_index {
1547
COIOMMU_MMIO_BAR => self.read_mmio(offset, data),
1548
COIOMMU_NOTIFYMAP_BAR => {
1549
// With coiommu device activated, the accessing the notifymap bar
1550
// won't cause vmexit. If goes here, means the coiommu device is
1551
// deactivated, and will not do the pin/unpin work. Thus no need
1552
// to handle this notifymap read.
1553
}
1554
_ => {}
1555
}
1556
}
1557
1558
fn write_bar(&mut self, bar_index: PciBarIndex, offset: u64, data: &[u8]) {
1559
match bar_index {
1560
COIOMMU_MMIO_BAR => self.write_mmio(offset, data),
1561
COIOMMU_NOTIFYMAP_BAR => {
1562
// With coiommu device activated, the accessing the notifymap bar
1563
// won't cause vmexit. If goes here, means the coiommu device is
1564
// deactivated, and will not do the pin/unpin work. Thus no need
1565
// to handle this notifymap write.
1566
}
1567
_ => {}
1568
}
1569
}
1570
1571
fn get_bar_configuration(&self, bar_num: usize) -> Option<PciBarConfiguration> {
1572
self.config_regs.get_bar_configuration(bar_num)
1573
}
1574
}
1575
1576
impl Suspendable for CoIommuDev {}
1577
1578