Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/devices/src/pci/msix.rs
5394 views
1
// Copyright 2019 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
use std::convert::TryInto;
6
7
use anyhow::Context;
8
use base::error;
9
use base::info;
10
use base::AsRawDescriptor;
11
use base::Error as SysError;
12
use base::Event;
13
use base::RawDescriptor;
14
use base::Tube;
15
use base::TubeError;
16
use bit_field::*;
17
use remain::sorted;
18
use serde::Deserialize;
19
use serde::Serialize;
20
use snapshot::AnySnapshot;
21
use thiserror::Error;
22
use vm_control::PciId;
23
use vm_control::VmIrqRequest;
24
use vm_control::VmIrqResponse;
25
use zerocopy::FromBytes;
26
use zerocopy::Immutable;
27
use zerocopy::IntoBytes;
28
use zerocopy::KnownLayout;
29
30
use crate::pci::pci_configuration::PciCapConfig;
31
use crate::pci::pci_configuration::PciCapConfigWriteResult;
32
use crate::pci::PciCapability;
33
use crate::pci::PciCapabilityID;
34
35
const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048;
36
pub const MSIX_TABLE_ENTRIES_MODULO: u64 = 16;
37
pub const MSIX_PBA_ENTRIES_MODULO: u64 = 8;
38
pub const BITS_PER_PBA_ENTRY: usize = 64;
39
const FUNCTION_MASK_BIT: u16 = 0x4000;
40
const MSIX_ENABLE_BIT: u16 = 0x8000;
41
const MSIX_TABLE_ENTRY_MASK_BIT: u32 = 0x1;
42
43
#[derive(Serialize, Deserialize, Clone, Default)]
44
struct MsixTableEntry {
45
msg_addr_lo: u32,
46
msg_addr_hi: u32,
47
msg_data: u32,
48
vector_ctl: u32,
49
}
50
51
impl MsixTableEntry {
52
fn masked(&self) -> bool {
53
self.vector_ctl & MSIX_TABLE_ENTRY_MASK_BIT == MSIX_TABLE_ENTRY_MASK_BIT
54
}
55
}
56
57
struct IrqfdGsi {
58
irqfd: Event,
59
gsi: u32,
60
}
61
62
/// Wrapper over MSI-X Capability Structure and MSI-X Tables
63
pub struct MsixConfig {
64
table_entries: Vec<MsixTableEntry>,
65
pba_entries: Vec<u64>,
66
irq_vec: Vec<Option<IrqfdGsi>>,
67
masked: bool,
68
enabled: bool,
69
msi_device_socket: Tube,
70
msix_num: u16,
71
pci_id: u32,
72
pci_address: Option<resources::PciAddress>,
73
device_name: String,
74
}
75
76
#[derive(Serialize, Deserialize)]
77
struct MsixConfigSnapshot {
78
table_entries: Vec<MsixTableEntry>,
79
pba_entries: Vec<u64>,
80
/// Just like MsixConfig::irq_vec, but only the GSI.
81
irq_gsi_vec: Vec<Option<u32>>,
82
masked: bool,
83
enabled: bool,
84
msix_num: u16,
85
pci_id: u32,
86
pci_address: Option<resources::PciAddress>,
87
device_name: String,
88
}
89
90
#[sorted]
91
#[derive(Error, Debug)]
92
pub enum MsixError {
93
#[error("AddMsiRoute failed: {0}")]
94
AddMsiRoute(SysError),
95
#[error("failed to receive AddMsiRoute response: {0}")]
96
AddMsiRouteRecv(TubeError),
97
#[error("failed to send AddMsiRoute request: {0}")]
98
AddMsiRouteSend(TubeError),
99
#[error("AllocateOneMsi failed: {0}")]
100
AllocateOneMsi(SysError),
101
#[error("failed to receive AllocateOneMsi response: {0}")]
102
AllocateOneMsiRecv(TubeError),
103
#[error("failed to send AllocateOneMsi request: {0}")]
104
AllocateOneMsiSend(TubeError),
105
#[error("failed to deserialize snapshot: {0}")]
106
DeserializationFailed(anyhow::Error),
107
#[error("invalid vector length in snapshot: {0}")]
108
InvalidVectorLength(std::num::TryFromIntError),
109
#[error("ReleaseOneIrq failed: {0}")]
110
ReleaseOneIrq(base::Error),
111
#[error("failed to receive ReleaseOneIrq response: {0}")]
112
ReleaseOneIrqRecv(TubeError),
113
#[error("failed to send ReleaseOneIrq request: {0}")]
114
ReleaseOneIrqSend(TubeError),
115
}
116
117
type MsixResult<T> = std::result::Result<T, MsixError>;
118
119
#[derive(Copy, Clone)]
120
pub enum MsixStatus {
121
Changed,
122
EntryChanged(usize),
123
NothingToDo,
124
}
125
126
impl PciCapConfigWriteResult for MsixStatus {}
127
128
impl MsixConfig {
129
pub fn new(msix_vectors: u16, vm_socket: Tube, pci_id: u32, device_name: String) -> Self {
130
assert!(msix_vectors <= MAX_MSIX_VECTORS_PER_DEVICE);
131
132
let mut table_entries: Vec<MsixTableEntry> = Vec::new();
133
table_entries.resize_with(msix_vectors as usize, Default::default);
134
table_entries
135
.iter_mut()
136
.for_each(|entry| entry.vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT);
137
let mut pba_entries: Vec<u64> = Vec::new();
138
let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY);
139
pba_entries.resize_with(num_pba_entries, Default::default);
140
141
let mut irq_vec = Vec::new();
142
irq_vec.resize_with(msix_vectors.into(), || None::<IrqfdGsi>);
143
144
MsixConfig {
145
table_entries,
146
pba_entries,
147
irq_vec,
148
masked: false,
149
enabled: false,
150
msi_device_socket: vm_socket,
151
msix_num: msix_vectors,
152
pci_id,
153
pci_address: None,
154
device_name,
155
}
156
}
157
158
/// PCI address of the associated device.
159
pub fn set_pci_address(&mut self, pci_address: resources::PciAddress) {
160
self.pci_address = Some(pci_address);
161
}
162
163
/// Get the number of MSI-X vectors in this configuration.
164
pub fn num_vectors(&self) -> u16 {
165
self.msix_num
166
}
167
168
/// Check whether the Function Mask bit in Message Control word in set or not.
169
/// if 1, all of the vectors associated with the function are masked,
170
/// regardless of their per-vector Mask bit states.
171
/// If 0, each vector's Mask bit determines whether the vector is masked or not.
172
pub fn masked(&self) -> bool {
173
self.masked
174
}
175
176
/// Check whether the Function Mask bit in MSIX table Message Control
177
/// word in set or not.
178
/// If true, the vector is masked.
179
/// If false, the vector is unmasked.
180
pub fn table_masked(&self, index: usize) -> bool {
181
if index >= self.table_entries.len() {
182
true
183
} else {
184
self.table_entries[index].masked()
185
}
186
}
187
188
/// Check whether the MSI-X Enable bit in Message Control word in set or not.
189
/// if 1, the function is permitted to use MSI-X to request service.
190
pub fn enabled(&self) -> bool {
191
self.enabled
192
}
193
194
/// Read the MSI-X Capability Structure.
195
/// The top 2 bits in Message Control word are emulated and all other
196
/// bits are read only.
197
pub fn read_msix_capability(&self, data: u32) -> u32 {
198
let mut msg_ctl = (data >> 16) as u16;
199
msg_ctl &= !(MSIX_ENABLE_BIT | FUNCTION_MASK_BIT);
200
201
if self.enabled {
202
msg_ctl |= MSIX_ENABLE_BIT;
203
}
204
if self.masked {
205
msg_ctl |= FUNCTION_MASK_BIT;
206
}
207
(msg_ctl as u32) << 16 | (data & u16::MAX as u32)
208
}
209
210
/// Write to the MSI-X Capability Structure.
211
/// Only the top 2 bits in Message Control Word are writable.
212
pub fn write_msix_capability(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
213
if offset == 2 && data.len() == 2 {
214
let reg = u16::from_le_bytes([data[0], data[1]]);
215
let old_masked = self.masked;
216
let old_enabled = self.enabled;
217
218
self.masked = (reg & FUNCTION_MASK_BIT) == FUNCTION_MASK_BIT;
219
self.enabled = (reg & MSIX_ENABLE_BIT) == MSIX_ENABLE_BIT;
220
221
if !old_enabled && self.enabled {
222
if let Err(e) = self.msix_enable_all() {
223
error!("failed to enable MSI-X: {}", e);
224
self.enabled = false;
225
}
226
}
227
228
// If the Function Mask bit was set, and has just been cleared, it's
229
// important to go through the entire PBA to check if there was any
230
// pending MSI-X message to inject, given that the vector is not
231
// masked.
232
if old_masked && !self.masked {
233
for (index, entry) in self.table_entries.clone().iter().enumerate() {
234
if !entry.masked() && self.get_pba_bit(index as u16) == 1 {
235
self.inject_msix_and_clear_pba(index);
236
}
237
}
238
return MsixStatus::Changed;
239
} else if !old_masked && self.masked {
240
return MsixStatus::Changed;
241
}
242
} else {
243
error!(
244
"invalid write to MSI-X Capability Structure offset {:x}",
245
offset
246
);
247
}
248
MsixStatus::NothingToDo
249
}
250
251
/// Create a snapshot of the current MsixConfig struct for use in
252
/// snapshotting.
253
pub fn snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
254
AnySnapshot::to_any(MsixConfigSnapshot {
255
table_entries: self.table_entries.clone(),
256
pba_entries: self.pba_entries.clone(),
257
masked: self.masked,
258
enabled: self.enabled,
259
msix_num: self.msix_num,
260
pci_id: self.pci_id,
261
pci_address: self.pci_address,
262
device_name: self.device_name.clone(),
263
irq_gsi_vec: self
264
.irq_vec
265
.iter()
266
.map(|irq_opt| irq_opt.as_ref().map(|irq| irq.gsi))
267
.collect(),
268
})
269
.context("failed to serialize MsixConfigSnapshot")
270
}
271
272
/// Restore a MsixConfig struct based on a snapshot. In short, this will
273
/// restore all data exposed via MMIO, and recreate all MSI-X vectors (they
274
/// will be re-wired to the irq chip).
275
pub fn restore(&mut self, snapshot: AnySnapshot) -> MsixResult<()> {
276
let snapshot: MsixConfigSnapshot =
277
AnySnapshot::from_any(snapshot).map_err(MsixError::DeserializationFailed)?;
278
279
self.table_entries = snapshot.table_entries;
280
self.pba_entries = snapshot.pba_entries;
281
self.masked = snapshot.masked;
282
self.enabled = snapshot.enabled;
283
self.msix_num = snapshot.msix_num;
284
self.pci_id = snapshot.pci_id;
285
self.pci_address = snapshot.pci_address;
286
self.device_name = snapshot.device_name;
287
288
self.msix_release_all()?;
289
self.irq_vec
290
.resize_with(snapshot.irq_gsi_vec.len(), || None::<IrqfdGsi>);
291
for (vector, gsi) in snapshot.irq_gsi_vec.iter().enumerate() {
292
if let Some(gsi_num) = gsi {
293
self.msix_restore_one(vector, *gsi_num)?;
294
} else {
295
info!(
296
"skipping restore of vector {} for device {}",
297
vector, self.device_name
298
);
299
}
300
}
301
Ok(())
302
}
303
304
/// Restore the specified MSI-X vector.
305
///
306
/// Note: we skip the checks from [MsixConfig::msix_enable_one] because for
307
/// an interrupt to be present in [MsixConfigSnapshot::irq_gsi_vec], it must
308
/// have passed those checks.
309
fn msix_restore_one(&mut self, index: usize, gsi: u32) -> MsixResult<()> {
310
let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
311
let request = VmIrqRequest::AllocateOneMsiAtGsi {
312
irqfd,
313
gsi,
314
device_id: PciId::from(self.pci_id).into(),
315
queue_id: index,
316
device_name: self.device_name.clone(),
317
};
318
self.msi_device_socket
319
.send(&request)
320
.map_err(MsixError::AllocateOneMsiSend)?;
321
if let VmIrqResponse::Err(e) = self
322
.msi_device_socket
323
.recv()
324
.map_err(MsixError::AllocateOneMsiRecv)?
325
{
326
return Err(MsixError::AllocateOneMsi(e));
327
};
328
329
self.irq_vec[index] = Some(IrqfdGsi {
330
irqfd: match request {
331
VmIrqRequest::AllocateOneMsiAtGsi { irqfd, .. } => irqfd,
332
_ => unreachable!(),
333
},
334
gsi,
335
});
336
self.add_msi_route(index as u16, gsi)?;
337
Ok(())
338
}
339
340
/// On warm restore, there could already be MSIs registered. We need to
341
/// release them in case the routing has changed (e.g. different
342
/// data <-> GSI).
343
fn msix_release_all(&mut self) -> MsixResult<()> {
344
for irqfd_gsi in self.irq_vec.drain(..).flatten() {
345
let request = VmIrqRequest::ReleaseOneIrq {
346
gsi: irqfd_gsi.gsi,
347
irqfd: irqfd_gsi.irqfd,
348
};
349
350
self.msi_device_socket
351
.send(&request)
352
.map_err(MsixError::ReleaseOneIrqSend)?;
353
if let VmIrqResponse::Err(e) = self
354
.msi_device_socket
355
.recv()
356
.map_err(MsixError::ReleaseOneIrqRecv)?
357
{
358
return Err(MsixError::ReleaseOneIrq(e));
359
}
360
}
361
Ok(())
362
}
363
364
fn add_msi_route(&mut self, index: u16, gsi: u32) -> MsixResult<()> {
365
let mut data: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
366
self.read_msix_table((index * 16).into(), data.as_mut());
367
let msi_address: u64 = u64::from_le_bytes(data);
368
let mut data: [u8; 4] = [0, 0, 0, 0];
369
self.read_msix_table((index * 16 + 8).into(), data.as_mut());
370
let msi_data: u32 = u32::from_le_bytes(data);
371
372
if msi_address == 0 {
373
return Ok(());
374
}
375
376
// Only used on aarch64, but make sure it is initialized correctly on all archs for better
377
// test coverage.
378
#[allow(unused_variables)]
379
let pci_address = self
380
.pci_address
381
.expect("MsixConfig: must call set_pci_address before config writes");
382
383
self.msi_device_socket
384
.send(&VmIrqRequest::AddMsiRoute {
385
gsi,
386
msi_address,
387
msi_data,
388
#[cfg(target_arch = "aarch64")]
389
pci_address,
390
})
391
.map_err(MsixError::AddMsiRouteSend)?;
392
if let VmIrqResponse::Err(e) = self
393
.msi_device_socket
394
.recv()
395
.map_err(MsixError::AddMsiRouteRecv)?
396
{
397
return Err(MsixError::AddMsiRoute(e));
398
}
399
Ok(())
400
}
401
402
// Enable MSI-X
403
fn msix_enable_all(&mut self) -> MsixResult<()> {
404
for index in 0..self.irq_vec.len() {
405
self.msix_enable_one(index)?;
406
}
407
Ok(())
408
}
409
410
// Use a new MSI-X vector
411
// Create a new eventfd and bind them to a new msi
412
fn msix_enable_one(&mut self, index: usize) -> MsixResult<()> {
413
if self.irq_vec[index].is_some()
414
|| !self.enabled()
415
|| self.masked()
416
|| self.table_masked(index)
417
{
418
return Ok(());
419
}
420
let irqfd = Event::new().map_err(MsixError::AllocateOneMsi)?;
421
let request = VmIrqRequest::AllocateOneMsi {
422
irqfd,
423
device_id: vm_control::PciId::from(self.pci_id).into(),
424
queue_id: index,
425
device_name: self.device_name.clone(),
426
};
427
self.msi_device_socket
428
.send(&request)
429
.map_err(MsixError::AllocateOneMsiSend)?;
430
let irq_num: u32 = match self
431
.msi_device_socket
432
.recv()
433
.map_err(MsixError::AllocateOneMsiRecv)?
434
{
435
VmIrqResponse::AllocateOneMsi { gsi } => gsi,
436
VmIrqResponse::Err(e) => return Err(MsixError::AllocateOneMsi(e)),
437
_ => unreachable!(),
438
};
439
self.irq_vec[index] = Some(IrqfdGsi {
440
irqfd: match request {
441
VmIrqRequest::AllocateOneMsi { irqfd, .. } => irqfd,
442
_ => unreachable!(),
443
},
444
gsi: irq_num,
445
});
446
447
self.add_msi_route(index as u16, irq_num)?;
448
Ok(())
449
}
450
451
/// Read MSI-X table
452
/// # Arguments
453
/// * 'offset' - the offset within the MSI-X Table
454
/// * 'data' - used to store the read results
455
///
456
/// For all accesses to MSI-X Table and MSI-X PBA fields, software must use aligned full
457
/// DWORD or aligned full QWORD transactions; otherwise, the result is undefined.
458
///
459
/// location: DWORD3 DWORD2 DWORD1 DWORD0
460
/// entry 0: Vector Control Msg Data Msg Upper Addr Msg Addr
461
/// entry 1: Vector Control Msg Data Msg Upper Addr Msg Addr
462
/// entry 2: Vector Control Msg Data Msg Upper Addr Msg Addr
463
/// ...
464
pub fn read_msix_table(&self, offset: u64, data: &mut [u8]) {
465
let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
466
let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
467
468
if index >= self.table_entries.len() {
469
error!("invalid MSI-X table index {}", index);
470
return;
471
}
472
473
match data.len() {
474
4 => {
475
let value = match modulo_offset {
476
0x0 => self.table_entries[index].msg_addr_lo,
477
0x4 => self.table_entries[index].msg_addr_hi,
478
0x8 => self.table_entries[index].msg_data,
479
0xc => self.table_entries[index].vector_ctl,
480
_ => {
481
error!("invalid offset");
482
0
483
}
484
};
485
486
data.copy_from_slice(&value.to_le_bytes());
487
}
488
8 => {
489
let value = match modulo_offset {
490
0x0 => {
491
(u64::from(self.table_entries[index].msg_addr_hi) << 32)
492
| u64::from(self.table_entries[index].msg_addr_lo)
493
}
494
0x8 => {
495
(u64::from(self.table_entries[index].vector_ctl) << 32)
496
| u64::from(self.table_entries[index].msg_data)
497
}
498
_ => {
499
error!("invalid offset");
500
0
501
}
502
};
503
504
data.copy_from_slice(&value.to_le_bytes());
505
}
506
_ => error!("invalid data length"),
507
};
508
}
509
510
/// Write to MSI-X table
511
///
512
/// Message Address: the contents of this field specifies the address
513
/// for the memory write transaction; different MSI-X vectors have
514
/// different Message Address values
515
/// Message Data: the contents of this field specifies the data driven
516
/// on AD\[31::00\] during the memory write transaction's data phase.
517
/// Vector Control: only bit 0 (Mask Bit) is not reserved: when this bit
518
/// is set, the function is prohibited from sending a message using
519
/// this MSI-X Table entry.
520
pub fn write_msix_table(&mut self, offset: u64, data: &[u8]) -> MsixStatus {
521
let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize;
522
let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO;
523
524
if index >= self.table_entries.len() {
525
error!("invalid MSI-X table index {}", index);
526
return MsixStatus::NothingToDo;
527
}
528
529
// Store the value of the entry before modification
530
let old_entry = self.table_entries[index].clone();
531
532
match data.len() {
533
4 => {
534
let value = u32::from_le_bytes(data.try_into().unwrap());
535
match modulo_offset {
536
0x0 => self.table_entries[index].msg_addr_lo = value,
537
0x4 => self.table_entries[index].msg_addr_hi = value,
538
0x8 => self.table_entries[index].msg_data = value,
539
0xc => self.table_entries[index].vector_ctl = value,
540
_ => error!("invalid offset"),
541
};
542
}
543
8 => {
544
let value = u64::from_le_bytes(data.try_into().unwrap());
545
match modulo_offset {
546
0x0 => {
547
self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32;
548
self.table_entries[index].msg_addr_hi = (value >> 32) as u32;
549
}
550
0x8 => {
551
self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32;
552
self.table_entries[index].vector_ctl = (value >> 32) as u32;
553
}
554
_ => error!("invalid offset"),
555
};
556
}
557
_ => error!("invalid data length"),
558
};
559
560
let new_entry = self.table_entries[index].clone();
561
562
// This MSI-X vector is enabled for the first time.
563
if self.enabled()
564
&& !self.masked()
565
&& self.irq_vec[index].is_none()
566
&& old_entry.masked()
567
&& !new_entry.masked()
568
{
569
if let Err(e) = self.msix_enable_one(index) {
570
error!("failed to enable MSI-X vector {}: {}", index, e);
571
self.table_entries[index].vector_ctl |= MSIX_TABLE_ENTRY_MASK_BIT;
572
}
573
return MsixStatus::EntryChanged(index);
574
}
575
576
if self.enabled()
577
&& (old_entry.msg_addr_lo != new_entry.msg_addr_lo
578
|| old_entry.msg_addr_hi != new_entry.msg_addr_hi
579
|| old_entry.msg_data != new_entry.msg_data)
580
{
581
if let Some(irqfd_gsi) = &self.irq_vec[index] {
582
let irq_num = irqfd_gsi.gsi;
583
if let Err(e) = self.add_msi_route(index as u16, irq_num) {
584
error!("add_msi_route failed: {}", e);
585
}
586
}
587
}
588
589
// After the MSI-X table entry has been updated, it is necessary to
590
// check if the vector control masking bit has changed. In case the
591
// bit has been flipped from 1 to 0, we need to inject a MSI message
592
// if the corresponding pending bit from the PBA is set. Once the MSI
593
// has been injected, the pending bit in the PBA needs to be cleared.
594
// All of this is valid only if MSI-X has not been masked for the whole
595
// device.
596
597
// Check if bit has been flipped
598
if !self.masked() {
599
if old_entry.masked() && !self.table_entries[index].masked() {
600
if self.get_pba_bit(index as u16) == 1 {
601
self.inject_msix_and_clear_pba(index);
602
}
603
return MsixStatus::EntryChanged(index);
604
} else if !old_entry.masked() && self.table_entries[index].masked() {
605
return MsixStatus::EntryChanged(index);
606
}
607
}
608
MsixStatus::NothingToDo
609
}
610
611
/// Read PBA Entries
612
/// # Arguments
613
/// * 'offset' - the offset within the PBA entries
614
/// * 'data' - used to store the read results
615
///
616
/// Pending Bits\[63::00\]: For each Pending Bit that is set, the function
617
/// has a pending message for the associated MSI-X Table entry.
618
pub fn read_pba_entries(&self, offset: u64, data: &mut [u8]) {
619
let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize;
620
let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO;
621
622
if index >= self.pba_entries.len() {
623
error!("invalid PBA index {}", index);
624
return;
625
}
626
627
match data.len() {
628
4 => {
629
let value: u32 = match modulo_offset {
630
0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32,
631
0x4 => (self.pba_entries[index] >> 32) as u32,
632
_ => {
633
error!("invalid offset");
634
0
635
}
636
};
637
638
data.copy_from_slice(&value.to_le_bytes());
639
}
640
8 => {
641
let value: u64 = match modulo_offset {
642
0x0 => self.pba_entries[index],
643
_ => {
644
error!("invalid offset");
645
0
646
}
647
};
648
649
data.copy_from_slice(&value.to_le_bytes());
650
}
651
_ => error!("invalid data length"),
652
}
653
}
654
655
/// Write to PBA Entries
656
///
657
/// Software should never write, and should only read Pending Bits.
658
/// If software writes to Pending Bits, the result is undefined.
659
pub fn write_pba_entries(&mut self, _offset: u64, _data: &[u8]) {
660
error!("Pending Bit Array is read only");
661
}
662
663
fn set_pba_bit(&mut self, vector: u16, set: bool) {
664
assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
665
666
let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
667
let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
668
let mut mask: u64 = (1 << shift) as u64;
669
670
if set {
671
self.pba_entries[index] |= mask;
672
} else {
673
mask = !mask;
674
self.pba_entries[index] &= mask;
675
}
676
}
677
678
fn get_pba_bit(&self, vector: u16) -> u8 {
679
assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE);
680
681
let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY;
682
let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY;
683
684
((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8
685
}
686
687
fn inject_msix_and_clear_pba(&mut self, vector: usize) {
688
if let Some(irq) = &self.irq_vec[vector] {
689
irq.irqfd.signal().unwrap();
690
}
691
692
// Clear the bit from PBA
693
self.set_pba_bit(vector as u16, false);
694
}
695
696
/// Inject virtual interrupt to the guest
697
///
698
/// # Arguments
699
/// * 'vector' - the index to the MSI-X Table entry
700
///
701
/// PCI Spec 3.0 6.8.3.5: while a vector is masked, the function is
702
/// prohibited from sending the associated message, and the function
703
/// must set the associated Pending bit whenever the function would
704
/// otherwise send the message. When software unmasks a vector whose
705
/// associated Pending bit is set, the function must schedule sending
706
/// the associated message, and clear the Pending bit as soon as the
707
/// message has been sent.
708
///
709
/// If the vector is unmasked, writing to irqfd which wakes up KVM to
710
/// inject virtual interrupt to the guest.
711
pub fn trigger(&mut self, vector: u16) {
712
if self.table_entries[vector as usize].masked() || self.masked() {
713
self.set_pba_bit(vector, true);
714
} else if let Some(irq) = self.irq_vec.get(vector as usize).unwrap_or(&None) {
715
irq.irqfd.signal().unwrap();
716
}
717
}
718
719
/// Return the raw descriptor of the MSI device socket
720
pub fn get_msi_socket(&self) -> RawDescriptor {
721
self.msi_device_socket.as_raw_descriptor()
722
}
723
724
/// Return irqfd of MSI-X Table entry
725
///
726
/// # Arguments
727
/// * 'vector' - the index to the MSI-X table entry
728
pub fn get_irqfd(&self, vector: usize) -> Option<&Event> {
729
match self.irq_vec.get(vector).unwrap_or(&None) {
730
Some(irq) => Some(&irq.irqfd),
731
None => None,
732
}
733
}
734
735
pub fn destroy(&mut self) {
736
while let Some(irq) = self.irq_vec.pop() {
737
if let Some(irq) = irq {
738
let request = VmIrqRequest::ReleaseOneIrq {
739
gsi: irq.gsi,
740
irqfd: irq.irqfd,
741
};
742
if self.msi_device_socket.send(&request).is_err() {
743
continue;
744
}
745
let _ = self.msi_device_socket.recv::<VmIrqResponse>();
746
}
747
}
748
}
749
}
750
751
const MSIX_CONFIG_READ_MASK: [u32; 3] = [0xc000_0000, 0, 0];
752
753
impl PciCapConfig for MsixConfig {
754
fn read_mask(&self) -> &'static [u32] {
755
&MSIX_CONFIG_READ_MASK
756
}
757
758
fn read_reg(&self, reg_idx: usize) -> u32 {
759
if reg_idx == 0 {
760
self.read_msix_capability(0)
761
} else {
762
0
763
}
764
}
765
766
fn write_reg(
767
&mut self,
768
reg_idx: usize,
769
offset: u64,
770
data: &[u8],
771
) -> Option<Box<dyn PciCapConfigWriteResult>> {
772
let status = if reg_idx == 0 {
773
self.write_msix_capability(offset, data)
774
} else {
775
MsixStatus::NothingToDo
776
};
777
Some(Box::new(status))
778
}
779
}
780
781
impl AsRawDescriptor for MsixConfig {
782
fn as_raw_descriptor(&self) -> RawDescriptor {
783
self.msi_device_socket.as_raw_descriptor()
784
}
785
}
786
787
/// Message Control Register
788
// 10-0: MSI-X Table size
789
// 13-11: Reserved
790
// 14: Mask. Mask all MSI-X when set.
791
// 15: Enable. Enable all MSI-X when set.
792
// See <https://wiki.osdev.org/PCI#Enabling_MSI-X> for the details.
793
#[bitfield]
794
#[derive(Copy, Clone, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
795
pub struct MsixCtrl {
796
table_size: B10,
797
reserved: B4,
798
mask: B1,
799
enable: B1,
800
}
801
802
#[allow(dead_code)]
803
#[repr(C)]
804
#[derive(Clone, Copy, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
805
/// MSI-X Capability Structure
806
pub struct MsixCap {
807
// To make add_capability() happy
808
_cap_vndr: u8,
809
_cap_next: u8,
810
// Message Control Register
811
msg_ctl: MsixCtrl,
812
// Table. Contains the offset and the BAR indicator (BIR)
813
// 2-0: Table BAR indicator (BIR). Can be 0 to 5.
814
// 31-3: Table offset in the BAR pointed by the BIR.
815
table: u32,
816
// Pending Bit Array. Contains the offset and the BAR indicator (BIR)
817
// 2-0: PBA BAR indicator (BIR). Can be 0 to 5.
818
// 31-3: PBA offset in the BAR pointed by the BIR.
819
pba: u32,
820
}
821
822
impl PciCapability for MsixCap {
823
fn bytes(&self) -> &[u8] {
824
self.as_bytes()
825
}
826
827
fn id(&self) -> PciCapabilityID {
828
PciCapabilityID::Msix
829
}
830
831
fn writable_bits(&self) -> Vec<u32> {
832
// Only msg_ctl[15:14] is writable
833
vec![0x3000_0000, 0, 0]
834
}
835
}
836
837
impl MsixCap {
838
pub fn new(
839
table_pci_bar: u8,
840
table_size: u16,
841
table_off: u32,
842
pba_pci_bar: u8,
843
pba_off: u32,
844
) -> Self {
845
assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE);
846
847
// Set the table size and enable MSI-X.
848
let mut msg_ctl = MsixCtrl::new();
849
msg_ctl.set_enable(1);
850
// Table Size is N - 1 encoded.
851
msg_ctl.set_table_size(table_size - 1);
852
853
MsixCap {
854
_cap_vndr: 0,
855
_cap_next: 0,
856
msg_ctl,
857
table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8),
858
pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8),
859
}
860
}
861
}
862
863
#[cfg(test)]
864
mod tests {
865
866
use std::thread;
867
868
use super::*;
869
870
#[track_caller]
871
fn recv_allocate_msi(t: &Tube) -> u32 {
872
match t.recv::<VmIrqRequest>().unwrap() {
873
VmIrqRequest::AllocateOneMsiAtGsi { gsi, .. } => gsi,
874
msg => panic!("unexpected irqchip message: {msg:?}"),
875
}
876
}
877
878
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
879
struct MsiRouteDetails {
880
gsi: u32,
881
msi_address: u64,
882
msi_data: u32,
883
#[cfg(target_arch = "aarch64")]
884
pci_address: resources::PciAddress,
885
}
886
887
const TEST_PCI_ADDRESS: resources::PciAddress = resources::PciAddress {
888
bus: 1,
889
dev: 2,
890
func: 3,
891
};
892
893
#[track_caller]
894
fn recv_add_msi_route(t: &Tube) -> MsiRouteDetails {
895
match t.recv::<VmIrqRequest>().unwrap() {
896
VmIrqRequest::AddMsiRoute {
897
gsi,
898
msi_address,
899
msi_data,
900
#[cfg(target_arch = "aarch64")]
901
pci_address,
902
} => MsiRouteDetails {
903
gsi,
904
msi_address,
905
msi_data,
906
#[cfg(target_arch = "aarch64")]
907
pci_address,
908
},
909
msg => panic!("unexpected irqchip message: {msg:?}"),
910
}
911
}
912
913
#[track_caller]
914
fn recv_release_one_irq(t: &Tube) -> u32 {
915
match t.recv::<VmIrqRequest>().unwrap() {
916
VmIrqRequest::ReleaseOneIrq { gsi, irqfd: _ } => gsi,
917
msg => panic!("unexpected irqchip message: {msg:?}"),
918
}
919
}
920
921
#[track_caller]
922
fn send_ok(t: &Tube) {
923
t.send(&VmIrqResponse::Ok).unwrap();
924
}
925
926
/// Tests a cold restore where there are no existing vectors at the time
927
/// restore is called.
928
#[test]
929
fn verify_msix_restore_cold_smoke() {
930
let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
931
let (_unused, unused_config_tube) = Tube::pair().unwrap();
932
933
let mut cfg = MsixConfig::new(2, unused_config_tube, 0, "test_device".to_owned());
934
cfg.set_pci_address(TEST_PCI_ADDRESS);
935
936
// Set up two MSI-X vectors (0 and 1).
937
// Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
938
cfg.table_entries[0].msg_data = 0xd0;
939
cfg.table_entries[0].msg_addr_lo = 0xa0;
940
cfg.table_entries[0].msg_addr_hi = 0;
941
cfg.table_entries[1].msg_data = 0xd1;
942
cfg.table_entries[1].msg_addr_lo = 0xa1;
943
cfg.table_entries[1].msg_addr_hi = 0;
944
945
// Pretend that these vectors were hooked up to GSIs 10 & 20,
946
// respectively.
947
cfg.irq_vec = vec![
948
Some(IrqfdGsi {
949
gsi: 10,
950
irqfd: Event::new().unwrap(),
951
}),
952
Some(IrqfdGsi {
953
gsi: 20,
954
irqfd: Event::new().unwrap(),
955
}),
956
];
957
958
// Take a snapshot of MsixConfig.
959
let snapshot = cfg.snapshot().unwrap();
960
961
// Create a fake irqchip to respond to our requests
962
let irqchip_fake = thread::spawn(move || {
963
assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
964
send_ok(&irqchip_tube);
965
assert_eq!(
966
recv_add_msi_route(&irqchip_tube),
967
MsiRouteDetails {
968
gsi: 10,
969
msi_address: 0xa0,
970
msi_data: 0xd0,
971
#[cfg(target_arch = "aarch64")]
972
pci_address: TEST_PCI_ADDRESS,
973
}
974
);
975
send_ok(&irqchip_tube);
976
977
assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
978
send_ok(&irqchip_tube);
979
assert_eq!(
980
recv_add_msi_route(&irqchip_tube),
981
MsiRouteDetails {
982
gsi: 20,
983
msi_address: 0xa1,
984
msi_data: 0xd1,
985
#[cfg(target_arch = "aarch64")]
986
pci_address: TEST_PCI_ADDRESS,
987
}
988
);
989
send_ok(&irqchip_tube);
990
irqchip_tube
991
});
992
993
let mut restored_cfg = MsixConfig::new(10, msix_config_tube, 10, "some_device".to_owned());
994
restored_cfg.restore(snapshot).unwrap();
995
irqchip_fake.join().unwrap();
996
997
assert_eq!(restored_cfg.pci_id, 0);
998
assert_eq!(restored_cfg.device_name, "test_device");
999
}
1000
1001
/// Tests a warm restore where there are existing vectors at the time
1002
/// restore is called. These vectors need to be released first.
1003
#[test]
1004
fn verify_msix_restore_warm_smoke() {
1005
let (irqchip_tube, msix_config_tube) = Tube::pair().unwrap();
1006
1007
let mut cfg = MsixConfig::new(2, msix_config_tube, 0, "test_device".to_owned());
1008
cfg.set_pci_address(TEST_PCI_ADDRESS);
1009
1010
// Set up two MSI-X vectors (0 and 1).
1011
// Data is 0xdVEC_NUM. Address is 0xaVEC_NUM.
1012
cfg.table_entries[0].msg_data = 0xd0;
1013
cfg.table_entries[0].msg_addr_lo = 0xa0;
1014
cfg.table_entries[0].msg_addr_hi = 0;
1015
cfg.table_entries[1].msg_data = 0xd1;
1016
cfg.table_entries[1].msg_addr_lo = 0xa1;
1017
cfg.table_entries[1].msg_addr_hi = 0;
1018
1019
// Pretend that these vectors were hooked up to GSIs 10 & 20,
1020
// respectively.
1021
cfg.irq_vec = vec![
1022
Some(IrqfdGsi {
1023
gsi: 10,
1024
irqfd: Event::new().unwrap(),
1025
}),
1026
Some(IrqfdGsi {
1027
gsi: 20,
1028
irqfd: Event::new().unwrap(),
1029
}),
1030
];
1031
1032
// Take a snapshot of MsixConfig.
1033
let snapshot = cfg.snapshot().unwrap();
1034
1035
// Create a fake irqchip to respond to our requests
1036
let irqchip_fake = thread::spawn(move || {
1037
// First, we free the existing vectors / GSIs.
1038
assert_eq!(recv_release_one_irq(&irqchip_tube), 10);
1039
send_ok(&irqchip_tube);
1040
assert_eq!(recv_release_one_irq(&irqchip_tube), 20);
1041
send_ok(&irqchip_tube);
1042
1043
// Now we re-allocate them.
1044
assert_eq!(recv_allocate_msi(&irqchip_tube), 10);
1045
send_ok(&irqchip_tube);
1046
assert_eq!(
1047
recv_add_msi_route(&irqchip_tube),
1048
MsiRouteDetails {
1049
gsi: 10,
1050
msi_address: 0xa0,
1051
msi_data: 0xd0,
1052
#[cfg(target_arch = "aarch64")]
1053
pci_address: TEST_PCI_ADDRESS,
1054
}
1055
);
1056
send_ok(&irqchip_tube);
1057
1058
assert_eq!(recv_allocate_msi(&irqchip_tube), 20);
1059
send_ok(&irqchip_tube);
1060
assert_eq!(
1061
recv_add_msi_route(&irqchip_tube),
1062
MsiRouteDetails {
1063
gsi: 20,
1064
msi_address: 0xa1,
1065
msi_data: 0xd1,
1066
#[cfg(target_arch = "aarch64")]
1067
pci_address: TEST_PCI_ADDRESS,
1068
}
1069
);
1070
send_ok(&irqchip_tube);
1071
irqchip_tube
1072
});
1073
1074
cfg.restore(snapshot).unwrap();
1075
irqchip_fake.join().unwrap();
1076
1077
assert_eq!(cfg.pci_id, 0);
1078
assert_eq!(cfg.device_name, "test_device");
1079
}
1080
}
1081
1082