Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/devices/src/pci/pcie/pcie_host.rs
5394 views
1
// Copyright 2021 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
use std::fs::read;
6
use std::fs::write;
7
use std::fs::File;
8
use std::fs::OpenOptions;
9
use std::os::unix::fs::FileExt;
10
use std::path::Path;
11
use std::path::PathBuf;
12
use std::sync::Arc;
13
use std::thread;
14
15
use anyhow::anyhow;
16
use anyhow::bail;
17
use anyhow::Context;
18
use anyhow::Result;
19
use base::error;
20
use base::Tube;
21
use sync::Mutex;
22
use vm_control::HotPlugDeviceInfo;
23
use vm_control::HotPlugDeviceType;
24
use vm_control::VmRequest;
25
use vm_control::VmResponse;
26
use zerocopy::FromBytes;
27
use zerocopy::IntoBytes;
28
29
use crate::pci::pci_configuration::PciBridgeSubclass;
30
use crate::pci::pci_configuration::CAPABILITY_LIST_HEAD_OFFSET;
31
use crate::pci::pci_configuration::HEADER_TYPE_REG;
32
use crate::pci::pci_configuration::PCI_CAP_NEXT_POINTER;
33
use crate::pci::pcie::pci_bridge::PciBridgeBusRange;
34
use crate::pci::pcie::pci_bridge::BR_BUS_NUMBER_REG;
35
use crate::pci::pcie::pci_bridge::BR_MEM_BASE_MASK;
36
use crate::pci::pcie::pci_bridge::BR_MEM_BASE_SHIFT;
37
use crate::pci::pcie::pci_bridge::BR_MEM_LIMIT_MASK;
38
use crate::pci::pcie::pci_bridge::BR_MEM_MINIMUM;
39
use crate::pci::pcie::pci_bridge::BR_MEM_REG;
40
use crate::pci::pcie::pci_bridge::BR_PREF_MEM_64BIT;
41
use crate::pci::pcie::pci_bridge::BR_PREF_MEM_BASE_HIGH_REG;
42
use crate::pci::pcie::pci_bridge::BR_PREF_MEM_LIMIT_HIGH_REG;
43
use crate::pci::pcie::pci_bridge::BR_PREF_MEM_LOW_REG;
44
use crate::pci::pcie::pci_bridge::BR_WINDOW_ALIGNMENT;
45
use crate::pci::pcie::PcieDevicePortType;
46
use crate::pci::PciCapabilityID;
47
use crate::pci::PciClassCode;
48
49
// Host Pci device's sysfs config file
50
struct PciHostConfig {
51
config_file: File,
52
}
53
54
impl PciHostConfig {
55
// Create a new host pci device's sysfs config file
56
fn new(host_sysfs_path: &Path) -> Result<Self> {
57
let mut config_path = PathBuf::new();
58
config_path.push(host_sysfs_path);
59
config_path.push("config");
60
let f = OpenOptions::new()
61
.write(true)
62
.read(true)
63
.open(config_path.as_path())
64
.with_context(|| format!("failed to open: {}", config_path.display()))?;
65
Ok(PciHostConfig { config_file: f })
66
}
67
68
// Read host pci device's config register
69
fn read_config<T: IntoBytes + FromBytes + Copy + Default>(&self, offset: u64) -> T {
70
let length = std::mem::size_of::<T>();
71
let mut val = T::default();
72
if offset % length as u64 != 0 {
73
error!(
74
"read_config, offset {} isn't aligned to length {}",
75
offset, length
76
);
77
} else if let Err(e) = self.config_file.read_exact_at(val.as_mut_bytes(), offset) {
78
error!("failed to read host sysfs config: {}", e);
79
}
80
81
val
82
}
83
84
// write host pci device's config register
85
#[allow(dead_code)]
86
fn write_config(&self, offset: u64, data: &[u8]) {
87
if offset % data.len() as u64 != 0 {
88
error!(
89
"write_config, offset {} isn't aligned to length {}",
90
offset,
91
data.len()
92
);
93
return;
94
}
95
if let Err(e) = self.config_file.write_all_at(data, offset) {
96
error!("failed to write host sysfs config: {}", e);
97
}
98
}
99
}
100
101
// Find all the added pcie devices
102
fn visit_children(dir: &Path, children: &mut Vec<HotPlugDeviceInfo>) -> Result<()> {
103
// Each pci device has a sysfs directory
104
if !dir.is_dir() {
105
bail!("{} isn't directory", dir.display());
106
}
107
// Loop device sysfs subdirectory
108
let entries = dir
109
.read_dir()
110
.with_context(|| format!("failed to read dir {}", dir.display()))?;
111
let mut devices = Vec::new();
112
for entry in entries {
113
let sub_dir = match entry {
114
Ok(sub) => sub,
115
_ => continue,
116
};
117
118
if !sub_dir.path().is_dir() {
119
continue;
120
}
121
122
let name = sub_dir
123
.file_name()
124
.into_string()
125
.map_err(|_| anyhow!("failed to get dir name"))?;
126
// Child pci device has name format 0000:xx:xx.x, length is 12
127
if name.len() != 12 || !name.starts_with("0000:") {
128
continue;
129
}
130
let child_path = dir.join(name);
131
devices.push(child_path);
132
}
133
devices.reverse();
134
let mut iter = devices.iter().peekable();
135
while let Some(device) = iter.next() {
136
let class_path = device.join("class");
137
let class_id = read(class_path.as_path())
138
.with_context(|| format!("failed to read {}", class_path.display()))?;
139
let hp_interrupt = iter.peek().is_none();
140
if !class_id.starts_with("0x0604".as_bytes()) {
141
// If the device isn't pci bridge, this is a pcie endpoint device
142
children.push(HotPlugDeviceInfo {
143
device_type: HotPlugDeviceType::EndPoint,
144
path: device.to_path_buf(),
145
hp_interrupt,
146
});
147
// No need to look further
148
return Ok(());
149
} else {
150
// Find the pci express cap to get the port type of the pcie bridge
151
let host_config = PciHostConfig::new(device)?;
152
let mut cap_pointer: u8 = host_config.read_config(CAPABILITY_LIST_HEAD_OFFSET as u64);
153
while cap_pointer != 0x0 {
154
let cap_id: u8 = host_config.read_config(cap_pointer as u64);
155
if cap_id == PciCapabilityID::PciExpress as u8 {
156
break;
157
}
158
cap_pointer = host_config.read_config(cap_pointer as u64 + 0x1);
159
}
160
if cap_pointer == 0x0 {
161
bail!(
162
"Failed to get pcie express capability for {}",
163
device.display()
164
);
165
}
166
let express_cap_reg: u16 = host_config.read_config(cap_pointer as u64 + 0x2);
167
match (express_cap_reg & 0xf0) >> 4 {
168
x if x == PcieDevicePortType::UpstreamPort as u16 => {
169
children.push(HotPlugDeviceInfo {
170
device_type: HotPlugDeviceType::UpstreamPort,
171
path: device.to_path_buf(),
172
hp_interrupt,
173
})
174
}
175
x if x == PcieDevicePortType::DownstreamPort as u16 => {
176
children.push(HotPlugDeviceInfo {
177
device_type: HotPlugDeviceType::DownstreamPort,
178
path: device.to_path_buf(),
179
hp_interrupt,
180
})
181
}
182
_ => (),
183
}
184
}
185
}
186
for device in devices.iter() {
187
visit_children(device.as_path(), children)?;
188
}
189
Ok(())
190
}
191
192
struct HotplugWorker {
193
host_name: String,
194
}
195
196
impl HotplugWorker {
197
fn run(&self, vm_socket: Arc<Mutex<Tube>>, child_exist: Arc<Mutex<bool>>) -> Result<()> {
198
let mut host_sysfs = PathBuf::new();
199
host_sysfs.push("/sys/bus/pci/devices/");
200
host_sysfs.push(self.host_name.clone());
201
let rescan_path = host_sysfs.join("rescan");
202
// Let pcie root port rescan to find the added or removed children devices
203
write(rescan_path.as_path(), "1")
204
.with_context(|| format!("failed to write {}", rescan_path.display()))?;
205
206
// If child device existed, but code run here again, this means host has a
207
// hotplug out event, after the above rescan, host should find the removed
208
// child device, and host vfio-pci kernel driver should notify crosvm vfio-pci
209
// devie such hotplug out event, so nothing is needed to do here, just return
210
// it now.
211
let mut child_exist = child_exist.lock();
212
if *child_exist {
213
return Ok(());
214
}
215
216
// Probe the new added pcie endpoint devices
217
let mut children: Vec<HotPlugDeviceInfo> = Vec::new();
218
visit_children(host_sysfs.as_path(), &mut children)?;
219
220
// Without reverse children, physical larger BDF device is at the top, it will be
221
// added into guest first with smaller virtual function number, so physical smaller
222
// BDF device has larger virtual function number, phyiscal larger BDF device has
223
// smaller virtual function number. During hotplug out process, host pcie root port
224
// driver remove physical smaller BDF pcie endpoint device first, so host vfio-pci
225
// driver send plug out event first for smaller BDF device and wait for this device
226
// removed from crosvm, when crosvm receives this plug out event, crosvm will remove
227
// all the children devices, crosvm remove smaller virtual function number device
228
// first, this isn't the target device which host vfio-pci driver is waiting for.
229
// Host vfio-pci driver holds a lock when it is waiting, when crosvm remove another
230
// device throgh vfio-pci which try to get the same lock, so deadlock happens in
231
// host kernel.
232
//
233
// In order to fix the deadlock, children is reversed, so physical smaller BDF
234
// device has smaller virtual function number, and it will have the same order
235
// between host kernel and crosvm during hotplug out process.
236
children.reverse();
237
while let Some(child) = children.pop() {
238
if let HotPlugDeviceType::EndPoint = child.device_type {
239
// In order to bind device to vfio-pci driver, get device VID and DID
240
let vendor_path = child.path.join("vendor");
241
let vendor_id = read(vendor_path.as_path())
242
.with_context(|| format!("failed to read {}", vendor_path.display()))?;
243
// Remove the first two elements 0x
244
let prefix: &str = "0x";
245
let vendor = match vendor_id.strip_prefix(prefix.as_bytes()) {
246
Some(v) => v.to_vec(),
247
None => vendor_id,
248
};
249
let device_path = child.path.join("device");
250
let device_id = read(device_path.as_path())
251
.with_context(|| format!("failed to read {}", device_path.display()))?;
252
// Remove the first two elements 0x
253
let device = match device_id.strip_prefix(prefix.as_bytes()) {
254
Some(d) => d.to_vec(),
255
None => device_id,
256
};
257
let new_id = [
258
String::from_utf8_lossy(&vendor),
259
String::from_utf8_lossy(&device),
260
]
261
.join(" ");
262
if Path::new("/sys/bus/pci/drivers/vfio-pci-pm/new_id").exists() {
263
let _ = write("/sys/bus/pci/drivers/vfio-pci-pm/new_id", &new_id);
264
}
265
// This is normal - either the kernel doesn't support vfio-pci-pm driver,
266
// or the device failed to attach to vfio-pci-pm driver (most likely due to
267
// lack of power management capability).
268
if !child.path.join("driver/unbind").exists() {
269
write("/sys/bus/pci/drivers/vfio-pci/new_id", &new_id).with_context(|| {
270
format!("failed to write {new_id} into vfio-pci/new_id")
271
})?;
272
}
273
}
274
// Request to hotplug the new added pcie device into guest
275
let request = VmRequest::HotPlugVfioCommand {
276
device: child.clone(),
277
add: true,
278
};
279
let vm_socket = vm_socket.lock();
280
vm_socket
281
.send(&request)
282
.with_context(|| format!("failed to send hotplug request for {child:?}"))?;
283
let response = vm_socket
284
.recv::<VmResponse>()
285
.with_context(|| format!("failed to receive hotplug response for {child:?}"))?;
286
match response {
287
VmResponse::Ok => {}
288
_ => bail!("unexpected hotplug response: {response}"),
289
};
290
if !*child_exist {
291
*child_exist = true;
292
}
293
}
294
295
Ok(())
296
}
297
}
298
299
const PCI_CONFIG_DEVICE_ID: u64 = 0x02;
300
const PCI_BASE_CLASS_CODE: u64 = 0x0B;
301
const PCI_SUB_CLASS_CODE: u64 = 0x0A;
302
303
/// Pcie root port device has a corresponding host pcie root port.
304
pub struct PcieHostPort {
305
host_config: PciHostConfig,
306
host_name: String,
307
hotplug_in_process: Arc<Mutex<bool>>,
308
hotplug_child_exist: Arc<Mutex<bool>>,
309
vm_socket: Arc<Mutex<Tube>>,
310
}
311
312
impl PcieHostPort {
313
/// Create PcieHostPort, host_syfsfs_patch specify host pcie port
314
/// sysfs path.
315
pub fn new(host_sysfs_path: &Path, socket: Tube) -> Result<Self> {
316
let host_config = PciHostConfig::new(host_sysfs_path)?;
317
let host_name = host_sysfs_path
318
.file_name()
319
.unwrap()
320
.to_str()
321
.unwrap()
322
.to_owned();
323
let base_class: u8 = host_config.read_config(PCI_BASE_CLASS_CODE);
324
if base_class != PciClassCode::BridgeDevice.get_register_value() {
325
return Err(anyhow!("host {} isn't bridge", host_name));
326
}
327
let sub_class: u8 = host_config.read_config(PCI_SUB_CLASS_CODE);
328
if sub_class != PciBridgeSubclass::PciToPciBridge as u8 {
329
return Err(anyhow!("host {} isn't pci to pci bridge", host_name));
330
}
331
332
let mut pcie_cap_reg: u8 = 0;
333
334
let mut cap_next: u8 = host_config.read_config(CAPABILITY_LIST_HEAD_OFFSET as u64);
335
let mut counter: u16 = 0;
336
while cap_next != 0 && counter < 256 {
337
let cap_id: u8 = host_config.read_config(cap_next.into());
338
if cap_id == PciCapabilityID::PciExpress as u8 {
339
pcie_cap_reg = cap_next;
340
break;
341
}
342
let offset = cap_next as u64 + PCI_CAP_NEXT_POINTER as u64;
343
cap_next = host_config.read_config(offset);
344
counter += 1;
345
}
346
347
if pcie_cap_reg == 0 {
348
return Err(anyhow!("host {} isn't pcie device", host_name));
349
}
350
351
Ok(PcieHostPort {
352
host_config,
353
host_name,
354
hotplug_in_process: Arc::new(Mutex::new(false)),
355
hotplug_child_exist: Arc::new(Mutex::new(false)),
356
vm_socket: Arc::new(Mutex::new(socket)),
357
})
358
}
359
360
pub fn get_bus_range(&self) -> PciBridgeBusRange {
361
let bus_num: u32 = self.host_config.read_config((BR_BUS_NUMBER_REG * 4) as u64);
362
let primary = (bus_num & 0xFF) as u8;
363
let secondary = ((bus_num >> 8) & 0xFF) as u8;
364
let subordinate = ((bus_num >> 16) & 0xFF) as u8;
365
366
PciBridgeBusRange {
367
primary,
368
secondary,
369
subordinate,
370
}
371
}
372
373
pub fn read_device_id(&self) -> u16 {
374
self.host_config.read_config::<u16>(PCI_CONFIG_DEVICE_ID)
375
}
376
377
pub fn host_name(&self) -> String {
378
self.host_name.clone()
379
}
380
381
pub fn read_config(&self, reg_idx: usize, data: &mut u32) {
382
if reg_idx == HEADER_TYPE_REG {
383
*data = self.host_config.read_config((HEADER_TYPE_REG as u64) * 4)
384
}
385
}
386
387
pub fn write_config(&mut self, _reg_idx: usize, _offset: u64, _data: &[u8]) {}
388
389
pub fn get_bridge_window_size(&self) -> (u64, u64) {
390
let br_memory: u32 = self.host_config.read_config(BR_MEM_REG as u64 * 4);
391
let mem_base = (br_memory & BR_MEM_BASE_MASK) << BR_MEM_BASE_SHIFT;
392
let mem_limit = br_memory & BR_MEM_LIMIT_MASK;
393
let mem_size = if mem_limit > mem_base {
394
(mem_limit - mem_base) as u64 + BR_WINDOW_ALIGNMENT
395
} else {
396
BR_MEM_MINIMUM
397
};
398
let br_pref_mem_low: u32 = self.host_config.read_config(BR_PREF_MEM_LOW_REG as u64 * 4);
399
let pref_mem_base_low = (br_pref_mem_low & BR_MEM_BASE_MASK) << BR_MEM_BASE_SHIFT;
400
let pref_mem_limit_low = br_pref_mem_low & BR_MEM_LIMIT_MASK;
401
let mut pref_mem_base: u64 = pref_mem_base_low as u64;
402
let mut pref_mem_limit: u64 = pref_mem_limit_low as u64;
403
if br_pref_mem_low & BR_PREF_MEM_64BIT == BR_PREF_MEM_64BIT {
404
// 64bit prefetch memory
405
let pref_mem_base_high: u32 = self
406
.host_config
407
.read_config(BR_PREF_MEM_BASE_HIGH_REG as u64 * 4);
408
let pref_mem_limit_high: u32 = self
409
.host_config
410
.read_config(BR_PREF_MEM_LIMIT_HIGH_REG as u64 * 4);
411
pref_mem_base = ((pref_mem_base_high as u64) << 32) | (pref_mem_base_low as u64);
412
pref_mem_limit = ((pref_mem_limit_high as u64) << 32) | (pref_mem_limit_low as u64);
413
}
414
let pref_mem_size = if pref_mem_limit > pref_mem_base {
415
pref_mem_limit - pref_mem_base + BR_WINDOW_ALIGNMENT
416
} else {
417
BR_MEM_MINIMUM
418
};
419
420
(mem_size, pref_mem_size)
421
}
422
423
pub fn hotplug_probe(&mut self) {
424
if *self.hotplug_in_process.lock() {
425
return;
426
}
427
428
let hotplug_process = self.hotplug_in_process.clone();
429
let child_exist = self.hotplug_child_exist.clone();
430
let socket = self.vm_socket.clone();
431
let name = self.host_name.clone();
432
let _ = thread::Builder::new()
433
.name("pcie_hotplug".to_string())
434
.spawn(move || {
435
let mut hotplug = hotplug_process.lock();
436
*hotplug = true;
437
let hotplug_worker = HotplugWorker { host_name: name };
438
let _ = hotplug_worker.run(socket, child_exist);
439
*hotplug = false;
440
});
441
}
442
443
pub fn hot_unplug(&mut self) {
444
*self.hotplug_child_exist.lock() = false;
445
}
446
}
447
448