Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/arch/src/lib.rs
5394 views
1
// Copyright 2018 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
//! Virtual machine architecture support code.
6
7
pub mod android;
8
pub mod fdt;
9
pub mod pstore;
10
pub mod serial;
11
12
pub mod sys;
13
14
use std::collections::BTreeMap;
15
use std::error::Error as StdError;
16
use std::fs::File;
17
use std::io;
18
use std::ops::Deref;
19
use std::path::PathBuf;
20
use std::str::FromStr;
21
use std::sync::mpsc;
22
use std::sync::mpsc::SendError;
23
use std::sync::Arc;
24
25
use acpi_tables::sdt::SDT;
26
use base::syslog;
27
use base::AsRawDescriptors;
28
use base::FileGetLen;
29
use base::FileReadWriteAtVolatile;
30
use base::RecvTube;
31
use base::SendTube;
32
use base::Tube;
33
use devices::virtio::VirtioDevice;
34
use devices::BarRange;
35
use devices::Bus;
36
use devices::BusDevice;
37
use devices::BusDeviceObj;
38
use devices::BusError;
39
use devices::BusResumeDevice;
40
use devices::FwCfgParameters;
41
use devices::GpeScope;
42
use devices::HotPlugBus;
43
use devices::IrqChip;
44
use devices::IrqEventSource;
45
use devices::PciAddress;
46
use devices::PciBus;
47
use devices::PciDevice;
48
use devices::PciDeviceError;
49
use devices::PciInterruptPin;
50
use devices::PciRoot;
51
use devices::PciRootCommand;
52
use devices::PreferredIrq;
53
#[cfg(any(target_os = "android", target_os = "linux"))]
54
use devices::ProxyDevice;
55
use devices::SerialHardware;
56
use devices::SerialParameters;
57
pub use fdt::apply_device_tree_overlays;
58
pub use fdt::DtbOverlay;
59
#[cfg(feature = "gdb")]
60
use gdbstub::arch::Arch;
61
use hypervisor::MemCacheType;
62
use hypervisor::Vm;
63
#[cfg(windows)]
64
use jail::FakeMinijailStub as Minijail;
65
#[cfg(any(target_os = "android", target_os = "linux"))]
66
use minijail::Minijail;
67
use remain::sorted;
68
use resources::SystemAllocator;
69
use resources::SystemAllocatorConfig;
70
use serde::de::Visitor;
71
use serde::Deserialize;
72
use serde::Serialize;
73
use serde_keyvalue::FromKeyValues;
74
pub use serial::add_serial_devices;
75
pub use serial::get_serial_cmdline;
76
pub use serial::set_default_serial_parameters;
77
pub use serial::GetSerialCmdlineError;
78
pub use serial::SERIAL_ADDR;
79
use sync::Condvar;
80
use sync::Mutex;
81
#[cfg(any(target_os = "android", target_os = "linux"))]
82
pub use sys::linux::PlatformBusResources;
83
use thiserror::Error;
84
use uuid::Uuid;
85
use vm_control::BatControl;
86
use vm_control::BatteryType;
87
use vm_control::PmResource;
88
use vm_memory::GuestAddress;
89
use vm_memory::GuestMemory;
90
use vm_memory::GuestMemoryError;
91
use vm_memory::MemoryRegionInformation;
92
use vm_memory::MemoryRegionOptions;
93
94
cfg_if::cfg_if! {
95
if #[cfg(target_arch = "aarch64")] {
96
pub use devices::IrqChipAArch64 as IrqChipArch;
97
#[cfg(feature = "gdb")]
98
pub use gdbstub_arch::aarch64::AArch64 as GdbArch;
99
pub use hypervisor::CpuConfigAArch64 as CpuConfigArch;
100
pub use hypervisor::Hypervisor as HypervisorArch;
101
pub use hypervisor::VcpuAArch64 as VcpuArch;
102
pub use hypervisor::VcpuInitAArch64 as VcpuInitArch;
103
pub use hypervisor::VmAArch64 as VmArch;
104
} else if #[cfg(target_arch = "riscv64")] {
105
pub use devices::IrqChipRiscv64 as IrqChipArch;
106
#[cfg(feature = "gdb")]
107
pub use gdbstub_arch::riscv::Riscv64 as GdbArch;
108
pub use hypervisor::CpuConfigRiscv64 as CpuConfigArch;
109
pub use hypervisor::Hypervisor as HypervisorArch;
110
pub use hypervisor::VcpuInitRiscv64 as VcpuInitArch;
111
pub use hypervisor::VcpuRiscv64 as VcpuArch;
112
pub use hypervisor::VmRiscv64 as VmArch;
113
} else if #[cfg(target_arch = "x86_64")] {
114
pub use devices::IrqChipX86_64 as IrqChipArch;
115
#[cfg(feature = "gdb")]
116
pub use gdbstub_arch::x86::X86_64_SSE as GdbArch;
117
pub use hypervisor::CpuConfigX86_64 as CpuConfigArch;
118
pub use hypervisor::HypervisorX86_64 as HypervisorArch;
119
pub use hypervisor::VcpuInitX86_64 as VcpuInitArch;
120
pub use hypervisor::VcpuX86_64 as VcpuArch;
121
pub use hypervisor::VmX86_64 as VmArch;
122
}
123
}
124
125
pub enum VmImage {
126
Kernel(File),
127
Bios(File),
128
}
129
130
#[derive(Clone, Debug, Deserialize, Serialize, FromKeyValues, PartialEq, Eq)]
131
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
132
pub struct Pstore {
133
pub path: PathBuf,
134
pub size: u32,
135
}
136
137
#[derive(Clone, Copy, Debug, Serialize, Deserialize, FromKeyValues)]
138
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
139
pub enum FdtPosition {
140
/// At the start of RAM.
141
Start,
142
/// Near the end of RAM.
143
End,
144
/// After the payload, with some padding for alignment.
145
AfterPayload,
146
}
147
148
/// Set of CPU cores.
149
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
150
pub struct CpuSet(Vec<usize>);
151
152
impl CpuSet {
153
pub fn new<I: IntoIterator<Item = usize>>(cpus: I) -> Self {
154
CpuSet(cpus.into_iter().collect())
155
}
156
157
pub fn iter(&self) -> std::slice::Iter<'_, usize> {
158
self.0.iter()
159
}
160
}
161
162
impl FromIterator<usize> for CpuSet {
163
fn from_iter<T>(iter: T) -> Self
164
where
165
T: IntoIterator<Item = usize>,
166
{
167
CpuSet::new(iter)
168
}
169
}
170
171
#[cfg(target_arch = "aarch64")]
172
fn sve_auto_default() -> bool {
173
true
174
}
175
176
/// The SVE config for Vcpus.
177
#[cfg(target_arch = "aarch64")]
178
#[derive(Copy, Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
179
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
180
pub struct SveConfig {
181
/// Detect if SVE is available and enable accordingly. `enable` is ignored if auto is true
182
#[serde(default = "sve_auto_default")]
183
pub auto: bool,
184
}
185
186
#[cfg(target_arch = "aarch64")]
187
impl Default for SveConfig {
188
fn default() -> Self {
189
SveConfig {
190
auto: sve_auto_default(),
191
}
192
}
193
}
194
195
/// FFA config
196
// For now this is limited to android, will be opened to other aarch64 based pVMs after
197
// corresponding kernel APIs are upstreamed.
198
#[cfg(all(target_os = "android", target_arch = "aarch64"))]
199
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize, FromKeyValues)]
200
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
201
pub struct FfaConfig {
202
/// Just enable FFA, don't care about the negotiated version.
203
#[serde(default)]
204
pub auto: bool,
205
}
206
207
fn parse_cpu_range(s: &str, cpuset: &mut Vec<usize>) -> Result<(), String> {
208
fn parse_cpu(s: &str) -> Result<usize, String> {
209
s.parse()
210
.map_err(|_| format!("invalid CPU index {s} - index must be a non-negative integer"))
211
}
212
213
let (first_cpu, last_cpu) = match s.split_once('-') {
214
Some((first_cpu, last_cpu)) => {
215
let first_cpu = parse_cpu(first_cpu)?;
216
let last_cpu = parse_cpu(last_cpu)?;
217
218
if last_cpu < first_cpu {
219
return Err(format!(
220
"invalid CPU range {s} - ranges must be from low to high"
221
));
222
}
223
(first_cpu, last_cpu)
224
}
225
None => {
226
let cpu = parse_cpu(s)?;
227
(cpu, cpu)
228
}
229
};
230
231
cpuset.extend(first_cpu..=last_cpu);
232
233
Ok(())
234
}
235
236
impl FromStr for CpuSet {
237
type Err = String;
238
239
fn from_str(s: &str) -> Result<Self, Self::Err> {
240
let mut cpuset = Vec::new();
241
for part in s.split(',') {
242
parse_cpu_range(part, &mut cpuset)?;
243
}
244
Ok(CpuSet::new(cpuset))
245
}
246
}
247
248
impl Deref for CpuSet {
249
type Target = Vec<usize>;
250
251
fn deref(&self) -> &Self::Target {
252
&self.0
253
}
254
}
255
256
impl IntoIterator for CpuSet {
257
type Item = usize;
258
type IntoIter = std::vec::IntoIter<Self::Item>;
259
260
fn into_iter(self) -> Self::IntoIter {
261
self.0.into_iter()
262
}
263
}
264
265
/// Selects the interface for guest-controlled power management of assigned devices.
266
#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Eq, Serialize)]
267
pub enum DevicePowerManagerConfig {
268
/// Uses the protected KVM hypercall interface.
269
PkvmHvc,
270
}
271
272
impl FromStr for DevicePowerManagerConfig {
273
type Err = String;
274
275
fn from_str(s: &str) -> Result<Self, Self::Err> {
276
match s {
277
"pkvm-hvc" => Ok(Self::PkvmHvc),
278
_ => Err(format!("DevicePowerManagerConfig '{s}' not supported")),
279
}
280
}
281
}
282
283
/// Deserializes a `CpuSet` from a sequence which elements can either be integers, or strings
284
/// representing CPU ranges (e.g. `5-8`).
285
impl<'de> Deserialize<'de> for CpuSet {
286
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
287
where
288
D: serde::Deserializer<'de>,
289
{
290
struct CpuSetVisitor;
291
impl<'de> Visitor<'de> for CpuSetVisitor {
292
type Value = CpuSet;
293
294
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
295
formatter.write_str("CpuSet")
296
}
297
298
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
299
where
300
A: serde::de::SeqAccess<'de>,
301
{
302
#[derive(Deserialize)]
303
#[serde(untagged)]
304
enum CpuSetValue<'a> {
305
Single(usize),
306
Range(&'a str),
307
}
308
309
let mut cpus = Vec::new();
310
while let Some(cpuset) = seq.next_element::<CpuSetValue>()? {
311
match cpuset {
312
CpuSetValue::Single(cpu) => cpus.push(cpu),
313
CpuSetValue::Range(range) => {
314
parse_cpu_range(range, &mut cpus).map_err(serde::de::Error::custom)?;
315
}
316
}
317
}
318
319
Ok(CpuSet::new(cpus))
320
}
321
}
322
323
deserializer.deserialize_seq(CpuSetVisitor)
324
}
325
}
326
327
/// Serializes a `CpuSet` into a sequence of integers and strings representing CPU ranges.
328
impl Serialize for CpuSet {
329
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
330
where
331
S: serde::Serializer,
332
{
333
use serde::ser::SerializeSeq;
334
335
let mut seq = serializer.serialize_seq(None)?;
336
337
// Factorize ranges into "a-b" strings.
338
let mut serialize_range = |start: usize, end: usize| -> Result<(), S::Error> {
339
if start == end {
340
seq.serialize_element(&start)?;
341
} else {
342
seq.serialize_element(&format!("{start}-{end}"))?;
343
}
344
345
Ok(())
346
};
347
348
// Current range.
349
let mut range = None;
350
for core in &self.0 {
351
range = match range {
352
None => Some((core, core)),
353
Some((start, end)) if *end == *core - 1 => Some((start, core)),
354
Some((start, end)) => {
355
serialize_range(*start, *end)?;
356
Some((core, core))
357
}
358
};
359
}
360
361
if let Some((start, end)) = range {
362
serialize_range(*start, *end)?;
363
}
364
365
seq.end()
366
}
367
}
368
369
/// Mapping of guest VCPU threads to host CPU cores.
370
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
371
pub enum VcpuAffinity {
372
/// All VCPU threads will be pinned to the same set of host CPU cores.
373
Global(CpuSet),
374
/// Each VCPU may be pinned to a set of host CPU cores.
375
/// The map key is a guest VCPU index, and the corresponding value is the set of
376
/// host CPU indices that the VCPU thread will be allowed to run on.
377
/// If a VCPU index is not present in the map, its affinity will not be set.
378
PerVcpu(BTreeMap<usize, CpuSet>),
379
}
380
381
/// Memory region with optional size.
382
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, FromKeyValues)]
383
pub struct MemoryRegionConfig {
384
pub start: u64,
385
pub size: Option<u64>,
386
}
387
388
/// General PCI config.
389
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize, FromKeyValues)]
390
pub struct PciConfig {
391
/// region for PCI Configuration Access Mechanism
392
#[cfg(target_arch = "aarch64")]
393
pub cam: Option<MemoryRegionConfig>,
394
/// region for PCIe Enhanced Configuration Access Mechanism
395
#[cfg(target_arch = "x86_64")]
396
pub ecam: Option<MemoryRegionConfig>,
397
/// region for non-prefetchable PCI device memory below 4G
398
pub mem: Option<MemoryRegionConfig>,
399
}
400
401
/// Holds the pieces needed to build a VM. Passed to `build_vm` in the `LinuxArch` trait below to
402
/// create a `RunnableLinuxVm`.
403
#[sorted]
404
pub struct VmComponents {
405
#[cfg(all(target_arch = "x86_64", unix))]
406
pub ac_adapter: bool,
407
pub acpi_sdts: Vec<SDT>,
408
pub android_fstab: Option<File>,
409
pub boot_cpu: usize,
410
pub bootorder_fw_cfg_blob: Vec<u8>,
411
#[cfg(target_arch = "x86_64")]
412
pub break_linux_pci_config_io: bool,
413
pub cpu_capacity: BTreeMap<usize, u32>,
414
pub cpu_clusters: Vec<CpuSet>,
415
#[cfg(all(
416
target_arch = "aarch64",
417
any(target_os = "android", target_os = "linux")
418
))]
419
pub cpu_frequencies: BTreeMap<usize, Vec<u32>>,
420
pub delay_rt: bool,
421
pub dev_pm: Option<DevicePowerManagerConfig>,
422
pub dynamic_power_coefficient: BTreeMap<usize, u32>,
423
pub extra_kernel_params: Vec<String>,
424
#[cfg(target_arch = "x86_64")]
425
pub force_s2idle: bool,
426
pub fw_cfg_enable: bool,
427
pub fw_cfg_parameters: Vec<FwCfgParameters>,
428
pub host_cpu_topology: bool,
429
pub hugepages: bool,
430
pub hv_cfg: hypervisor::Config,
431
pub initrd_image: Option<File>,
432
pub itmt: bool,
433
pub memory_size: u64,
434
pub no_i8042: bool,
435
pub no_rtc: bool,
436
pub no_smt: bool,
437
#[cfg(all(
438
target_arch = "aarch64",
439
any(target_os = "android", target_os = "linux")
440
))]
441
pub normalized_cpu_ipc_ratios: BTreeMap<usize, u32>,
442
pub pci_config: PciConfig,
443
pub pflash_block_size: u32,
444
pub pflash_image: Option<File>,
445
pub pstore: Option<Pstore>,
446
/// A file to load as pVM firmware. Must be `Some` iff
447
/// `hv_cfg.protection_type == ProtectionType::UnprotectedWithFirmware`.
448
pub pvm_fw: Option<File>,
449
pub rt_cpus: CpuSet,
450
#[cfg(target_arch = "x86_64")]
451
pub smbios: SmbiosOptions,
452
pub smccc_trng: bool,
453
#[cfg(target_arch = "aarch64")]
454
pub sve_config: SveConfig,
455
pub swiotlb: Option<u64>,
456
pub vcpu_affinity: Option<VcpuAffinity>,
457
pub vcpu_count: usize,
458
#[cfg(all(
459
target_arch = "aarch64",
460
any(target_os = "android", target_os = "linux")
461
))]
462
pub vcpu_domain_paths: BTreeMap<usize, PathBuf>,
463
#[cfg(all(
464
target_arch = "aarch64",
465
any(target_os = "android", target_os = "linux")
466
))]
467
pub vcpu_domains: BTreeMap<usize, u32>,
468
#[cfg(any(target_os = "android", target_os = "linux"))]
469
pub vfio_platform_pm: bool,
470
#[cfg(all(
471
target_arch = "aarch64",
472
any(target_os = "android", target_os = "linux")
473
))]
474
pub virt_cpufreq_v2: bool,
475
pub vm_image: VmImage,
476
}
477
478
/// Holds the elements needed to run a Linux VM. Created by `build_vm`.
479
#[sorted]
480
pub struct RunnableLinuxVm<V: VmArch, Vcpu: VcpuArch> {
481
pub bat_control: Option<BatControl>,
482
pub delay_rt: bool,
483
pub devices_thread: Option<std::thread::JoinHandle<()>>,
484
pub hotplug_bus: BTreeMap<u8, Arc<Mutex<dyn HotPlugBus>>>,
485
pub hypercall_bus: Arc<Bus>,
486
pub io_bus: Arc<Bus>,
487
pub irq_chip: Box<dyn IrqChipArch>,
488
pub mmio_bus: Arc<Bus>,
489
pub no_smt: bool,
490
pub pid_debug_label_map: BTreeMap<u32, String>,
491
#[cfg(any(target_os = "android", target_os = "linux"))]
492
pub platform_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
493
pub pm: Option<Arc<Mutex<dyn PmResource + Send>>>,
494
/// Devices to be notified before the system resumes from the S3 suspended state.
495
pub resume_notify_devices: Vec<Arc<Mutex<dyn BusResumeDevice>>>,
496
pub root_config: Arc<Mutex<PciRoot>>,
497
pub rt_cpus: CpuSet,
498
pub suspend_tube: (Arc<Mutex<SendTube>>, RecvTube),
499
pub vcpu_affinity: Option<VcpuAffinity>,
500
pub vcpu_count: usize,
501
pub vcpu_init: Vec<VcpuInitArch>,
502
/// If vcpus is None, then it's the responsibility of the vcpu thread to create vcpus.
503
/// If it's Some, then `build_vm` already created the vcpus.
504
pub vcpus: Option<Vec<Vcpu>>,
505
pub vm: V,
506
pub vm_request_tubes: Vec<Tube>,
507
}
508
509
/// The device and optional jail.
510
pub struct VirtioDeviceStub {
511
pub dev: Box<dyn VirtioDevice>,
512
pub jail: Option<Minijail>,
513
}
514
515
/// Trait which is implemented for each Linux Architecture in order to
516
/// set up the memory, cpus, and system devices and to boot the kernel.
517
pub trait LinuxArch {
518
type Error: StdError;
519
type ArchMemoryLayout;
520
521
/// Decide architecture specific memory layout details to be used by later stages of the VM
522
/// setup.
523
fn arch_memory_layout(
524
components: &VmComponents,
525
) -> std::result::Result<Self::ArchMemoryLayout, Self::Error>;
526
527
/// Returns a Vec of the valid memory addresses as pairs of address and length. These should be
528
/// used to configure the `GuestMemory` structure for the platform.
529
///
530
/// # Arguments
531
///
532
/// * `components` - Parts used to determine the memory layout.
533
fn guest_memory_layout(
534
components: &VmComponents,
535
arch_memory_layout: &Self::ArchMemoryLayout,
536
hypervisor: &impl hypervisor::Hypervisor,
537
) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error>;
538
539
/// Gets the configuration for a new `SystemAllocator` that fits the given `Vm`'s memory layout.
540
///
541
/// This is the per-architecture template for constructing the `SystemAllocator`. Platform
542
/// agnostic modifications may be made to this configuration, but the final `SystemAllocator`
543
/// will be at least as strict as this configuration.
544
///
545
/// # Arguments
546
///
547
/// * `vm` - The virtual machine to be used as a template for the `SystemAllocator`.
548
fn get_system_allocator_config<V: Vm>(
549
vm: &V,
550
arch_memory_layout: &Self::ArchMemoryLayout,
551
) -> SystemAllocatorConfig;
552
553
/// Takes `VmComponents` and generates a `RunnableLinuxVm`.
554
///
555
/// # Arguments
556
///
557
/// * `components` - Parts to use to build the VM.
558
/// * `vm_evt_wrtube` - Tube used by sub-devices to request that crosvm exit because guest wants
559
/// to stop/shut down or requested reset.
560
/// * `system_allocator` - Allocator created by this trait's implementation of
561
/// `get_system_allocator_config`.
562
/// * `serial_parameters` - Definitions for how the serial devices should be configured.
563
/// * `serial_jail` - Jail used for serial devices created here.
564
/// * `battery` - Defines what battery device will be created.
565
/// * `vm` - A VM implementation to build upon.
566
/// * `ramoops_region` - Region allocated for ramoops.
567
/// * `devices` - The devices to be built into the VM.
568
/// * `irq_chip` - The IRQ chip implemention for the VM.
569
/// * `debugcon_jail` - Jail used for debugcon devices created here.
570
/// * `pflash_jail` - Jail used for pflash device created here.
571
/// * `fw_cfg_jail` - Jail used for fw_cfg device created here.
572
/// * `device_tree_overlays` - Device tree overlay binaries
573
fn build_vm<V, Vcpu>(
574
components: VmComponents,
575
arch_memory_layout: &Self::ArchMemoryLayout,
576
vm_evt_wrtube: &SendTube,
577
system_allocator: &mut SystemAllocator,
578
serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
579
serial_jail: Option<Minijail>,
580
battery: (Option<BatteryType>, Option<Minijail>),
581
vm: V,
582
ramoops_region: Option<pstore::RamoopsRegion>,
583
devices: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
584
irq_chip: &mut dyn IrqChipArch,
585
vcpu_ids: &mut Vec<usize>,
586
dump_device_tree_blob: Option<PathBuf>,
587
debugcon_jail: Option<Minijail>,
588
#[cfg(target_arch = "x86_64")] pflash_jail: Option<Minijail>,
589
#[cfg(target_arch = "x86_64")] fw_cfg_jail: Option<Minijail>,
590
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
591
guest_suspended_cvar: Option<Arc<(Mutex<bool>, Condvar)>>,
592
device_tree_overlays: Vec<DtbOverlay>,
593
fdt_position: Option<FdtPosition>,
594
no_pmu: bool,
595
) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
596
where
597
V: VmArch,
598
Vcpu: VcpuArch;
599
600
/// Configures the vcpu and should be called once per vcpu from the vcpu's thread.
601
///
602
/// # Arguments
603
///
604
/// * `vm` - The virtual machine object.
605
/// * `hypervisor` - The `Hypervisor` that created the vcpu.
606
/// * `irq_chip` - The `IrqChip` associated with this vm.
607
/// * `vcpu` - The VCPU object to configure.
608
/// * `vcpu_init` - The data required to initialize VCPU registers and other state.
609
/// * `vcpu_id` - The id of the given `vcpu`.
610
/// * `num_cpus` - Number of virtual CPUs the guest will have.
611
/// * `cpu_config` - CPU feature configurations.
612
fn configure_vcpu<V: Vm>(
613
vm: &V,
614
hypervisor: &dyn HypervisorArch,
615
irq_chip: &mut dyn IrqChipArch,
616
vcpu: &mut dyn VcpuArch,
617
vcpu_init: VcpuInitArch,
618
vcpu_id: usize,
619
num_cpus: usize,
620
cpu_config: Option<CpuConfigArch>,
621
) -> Result<(), Self::Error>;
622
623
/// Configures and add a pci device into vm
624
fn register_pci_device<V: VmArch, Vcpu: VcpuArch>(
625
linux: &mut RunnableLinuxVm<V, Vcpu>,
626
device: Box<dyn PciDevice>,
627
#[cfg(any(target_os = "android", target_os = "linux"))] minijail: Option<Minijail>,
628
resources: &mut SystemAllocator,
629
hp_control_tube: &mpsc::Sender<PciRootCommand>,
630
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
631
) -> Result<PciAddress, Self::Error>;
632
633
/// Returns frequency map for each of the host's logical cores.
634
fn get_host_cpu_frequencies_khz() -> Result<BTreeMap<usize, Vec<u32>>, Self::Error>;
635
636
/// Returns max-freq map of the host's logical cores.
637
fn get_host_cpu_max_freq_khz() -> Result<BTreeMap<usize, u32>, Self::Error>;
638
639
/// Returns capacity map of the host's logical cores.
640
fn get_host_cpu_capacity() -> Result<BTreeMap<usize, u32>, Self::Error>;
641
642
/// Returns cluster masks for each of the host's logical cores.
643
fn get_host_cpu_clusters() -> Result<Vec<CpuSet>, Self::Error>;
644
}
645
646
#[cfg(feature = "gdb")]
647
pub trait GdbOps<T: VcpuArch> {
648
type Error: StdError;
649
650
/// Reads vCPU's registers.
651
fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers, Self::Error>;
652
653
/// Writes vCPU's registers.
654
fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<(), Self::Error>;
655
656
/// Reads bytes from the guest memory.
657
fn read_memory(
658
vcpu: &T,
659
guest_mem: &GuestMemory,
660
vaddr: GuestAddress,
661
len: usize,
662
) -> Result<Vec<u8>, Self::Error>;
663
664
/// Writes bytes to the specified guest memory.
665
fn write_memory(
666
vcpu: &T,
667
guest_mem: &GuestMemory,
668
vaddr: GuestAddress,
669
buf: &[u8],
670
) -> Result<(), Self::Error>;
671
672
/// Reads bytes from the guest register.
673
///
674
/// Returns an empty vector if `reg_id` is valid but the register is not available.
675
fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>, Self::Error>;
676
677
/// Writes bytes to the specified guest register.
678
fn write_register(
679
vcpu: &T,
680
reg_id: <GdbArch as Arch>::RegId,
681
data: &[u8],
682
) -> Result<(), Self::Error>;
683
684
/// Make the next vCPU's run single-step.
685
fn enable_singlestep(vcpu: &T) -> Result<(), Self::Error>;
686
687
/// Get maximum number of hardware breakpoints.
688
fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize, Self::Error>;
689
690
/// Set hardware breakpoints at the given addresses.
691
fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<(), Self::Error>;
692
}
693
694
/// Errors for device manager.
695
#[sorted]
696
#[derive(Error, Debug)]
697
pub enum DeviceRegistrationError {
698
/// No more MMIO space available.
699
#[error("no more addresses are available")]
700
AddrsExhausted,
701
/// Could not allocate device address space for the device.
702
#[error("Allocating device addresses: {0}")]
703
AllocateDeviceAddrs(PciDeviceError),
704
/// Could not allocate IO space for the device.
705
#[error("Allocating IO addresses: {0}")]
706
AllocateIoAddrs(PciDeviceError),
707
/// Could not allocate MMIO or IO resource for the device.
708
#[error("Allocating IO resource: {0}")]
709
AllocateIoResource(resources::Error),
710
/// Could not allocate an IRQ number.
711
#[error("Allocating IRQ number")]
712
AllocateIrq,
713
/// Could not allocate IRQ resource for the device.
714
#[cfg(any(target_os = "android", target_os = "linux"))]
715
#[error("Allocating IRQ resource: {0}")]
716
AllocateIrqResource(devices::vfio::VfioError),
717
#[error("failed to attach the device to its power domain: {0}")]
718
AttachDevicePowerDomain(anyhow::Error),
719
/// Broken pci topology
720
#[error("pci topology is broken")]
721
BrokenPciTopology,
722
/// Unable to clone a jail for the device.
723
#[cfg(any(target_os = "android", target_os = "linux"))]
724
#[error("failed to clone jail: {0}")]
725
CloneJail(minijail::Error),
726
/// Appending to kernel command line failed.
727
#[error("unable to add device to kernel command line: {0}")]
728
Cmdline(kernel_cmdline::Error),
729
/// Configure window size failed.
730
#[error("failed to configure window size: {0}")]
731
ConfigureWindowSize(PciDeviceError),
732
// Unable to create a pipe.
733
#[error("failed to create pipe: {0}")]
734
CreatePipe(base::Error),
735
// Unable to create a root.
736
#[error("failed to create pci root: {0}")]
737
CreateRoot(anyhow::Error),
738
// Unable to create serial device from serial parameters
739
#[error("failed to create serial device: {0}")]
740
CreateSerialDevice(devices::SerialError),
741
// Unable to create tube
742
#[error("failed to create tube: {0}")]
743
CreateTube(base::TubeError),
744
/// Could not clone an event.
745
#[error("failed to clone event: {0}")]
746
EventClone(base::Error),
747
/// Could not create an event.
748
#[error("failed to create event: {0}")]
749
EventCreate(base::Error),
750
/// Failed to generate ACPI content.
751
#[error("failed to generate ACPI content")]
752
GenerateAcpi,
753
/// No more IRQs are available.
754
#[error("no more IRQs are available")]
755
IrqsExhausted,
756
/// VFIO device is missing a DT symbol.
757
#[error("cannot match VFIO device to DT node due to a missing symbol")]
758
MissingDeviceTreeSymbol,
759
/// Missing a required serial device.
760
#[error("missing required serial device {0}")]
761
MissingRequiredSerialDevice(u8),
762
/// Could not add a device to the mmio bus.
763
#[error("failed to add to mmio bus: {0}")]
764
MmioInsert(BusError),
765
/// Failed to insert device into PCI root.
766
#[error("failed to insert device into PCI root: {0}")]
767
PciRootAddDevice(PciDeviceError),
768
#[cfg(any(target_os = "android", target_os = "linux"))]
769
/// Failed to initialize proxy device for jailed device.
770
#[error("failed to create proxy device: {0}")]
771
ProxyDeviceCreation(devices::ProxyError),
772
#[cfg(any(target_os = "android", target_os = "linux"))]
773
/// Failed to register battery device.
774
#[error("failed to register battery device to VM: {0}")]
775
RegisterBattery(devices::BatteryError),
776
/// Could not register PCI device to pci root bus
777
#[error("failed to register PCI device to pci root bus")]
778
RegisterDevice(SendError<PciRootCommand>),
779
/// Could not register PCI device capabilities.
780
#[error("could not register PCI device capabilities: {0}")]
781
RegisterDeviceCapabilities(PciDeviceError),
782
/// Failed to register ioevent with VM.
783
#[error("failed to register ioevent to VM: {0}")]
784
RegisterIoevent(base::Error),
785
/// Failed to register irq event with VM.
786
#[error("failed to register irq event to VM: {0}")]
787
RegisterIrqfd(base::Error),
788
/// Could not setup VFIO platform IRQ for the device.
789
#[error("Setting up VFIO platform IRQ: {0}")]
790
SetupVfioPlatformIrq(anyhow::Error),
791
}
792
793
/// Config a PCI device for used by this vm.
794
pub fn configure_pci_device<V: VmArch, Vcpu: VcpuArch>(
795
linux: &mut RunnableLinuxVm<V, Vcpu>,
796
mut device: Box<dyn PciDevice>,
797
#[cfg(any(target_os = "android", target_os = "linux"))] jail: Option<Minijail>,
798
resources: &mut SystemAllocator,
799
hp_control_tube: &mpsc::Sender<PciRootCommand>,
800
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
801
) -> Result<PciAddress, DeviceRegistrationError> {
802
// Allocate PCI device address before allocating BARs.
803
let pci_address = device
804
.allocate_address(resources)
805
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
806
807
// Allocate ranges that may need to be in the low MMIO region (MmioType::Low).
808
let mmio_ranges = device
809
.allocate_io_bars(resources)
810
.map_err(DeviceRegistrationError::AllocateIoAddrs)?;
811
812
// Allocate device ranges that may be in low or high MMIO after low-only ranges.
813
let device_ranges = device
814
.allocate_device_bars(resources)
815
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
816
817
// If device is a pcie bridge, add its pci bus to pci root
818
if let Some(pci_bus) = device.get_new_pci_bus() {
819
hp_control_tube
820
.send(PciRootCommand::AddBridge(pci_bus))
821
.map_err(DeviceRegistrationError::RegisterDevice)?;
822
let bar_ranges = Vec::new();
823
device
824
.configure_bridge_window(resources, &bar_ranges)
825
.map_err(DeviceRegistrationError::ConfigureWindowSize)?;
826
}
827
828
// Do not suggest INTx for hot-plug devices.
829
let intx_event = devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
830
831
if let PreferredIrq::Fixed { pin, gsi } = device.preferred_irq() {
832
resources.reserve_irq(gsi);
833
834
device.assign_irq(
835
intx_event
836
.try_clone()
837
.map_err(DeviceRegistrationError::EventClone)?,
838
pin,
839
gsi,
840
);
841
842
linux
843
.irq_chip
844
.as_irq_chip_mut()
845
.register_level_irq_event(gsi, &intx_event, IrqEventSource::from_device(&device))
846
.map_err(DeviceRegistrationError::RegisterIrqfd)?;
847
}
848
849
let mut keep_rds = device.keep_rds();
850
syslog::push_descriptors(&mut keep_rds);
851
cros_tracing::push_descriptors!(&mut keep_rds);
852
metrics::push_descriptors(&mut keep_rds);
853
854
device
855
.register_device_capabilities()
856
.map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
857
858
#[cfg(any(target_os = "android", target_os = "linux"))]
859
let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
860
let proxy = ProxyDevice::new(
861
device,
862
jail,
863
keep_rds,
864
#[cfg(feature = "swap")]
865
swap_controller,
866
)
867
.map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
868
linux
869
.pid_debug_label_map
870
.insert(proxy.pid() as u32, proxy.debug_label());
871
Arc::new(Mutex::new(proxy))
872
} else {
873
device.on_sandboxed();
874
Arc::new(Mutex::new(device))
875
};
876
877
#[cfg(windows)]
878
let arced_dev = {
879
device.on_sandboxed();
880
Arc::new(Mutex::new(device))
881
};
882
883
#[cfg(any(target_os = "android", target_os = "linux"))]
884
hp_control_tube
885
.send(PciRootCommand::Add(pci_address, arced_dev.clone()))
886
.map_err(DeviceRegistrationError::RegisterDevice)?;
887
888
for range in &mmio_ranges {
889
linux
890
.mmio_bus
891
.insert(arced_dev.clone(), range.addr, range.size)
892
.map_err(DeviceRegistrationError::MmioInsert)?;
893
}
894
895
for range in &device_ranges {
896
linux
897
.mmio_bus
898
.insert(arced_dev.clone(), range.addr, range.size)
899
.map_err(DeviceRegistrationError::MmioInsert)?;
900
}
901
902
Ok(pci_address)
903
}
904
905
// Generate pci topology starting from parent bus
906
fn generate_pci_topology(
907
parent_bus: Arc<Mutex<PciBus>>,
908
resources: &mut SystemAllocator,
909
io_ranges: &mut BTreeMap<usize, Vec<BarRange>>,
910
device_ranges: &mut BTreeMap<usize, Vec<BarRange>>,
911
device_addrs: &[PciAddress],
912
devices: &mut Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
913
) -> Result<(Vec<BarRange>, u8), DeviceRegistrationError> {
914
let mut bar_ranges = Vec::new();
915
let bus_num = parent_bus.lock().get_bus_num();
916
let mut subordinate_bus = bus_num;
917
for (dev_idx, addr) in device_addrs.iter().enumerate() {
918
// Only target for devices that located on this bus
919
if addr.bus == bus_num {
920
// If this device is a pci bridge (a.k.a., it has a pci bus structure),
921
// create its topology recursively
922
if let Some(child_bus) = devices[dev_idx].0.get_new_pci_bus() {
923
let (child_bar_ranges, child_sub_bus) = generate_pci_topology(
924
child_bus.clone(),
925
resources,
926
io_ranges,
927
device_ranges,
928
device_addrs,
929
devices,
930
)?;
931
let device = &mut devices[dev_idx].0;
932
parent_bus
933
.lock()
934
.add_child_bus(child_bus.clone())
935
.map_err(|_| DeviceRegistrationError::BrokenPciTopology)?;
936
let bridge_window = device
937
.configure_bridge_window(resources, &child_bar_ranges)
938
.map_err(DeviceRegistrationError::ConfigureWindowSize)?;
939
bar_ranges.extend(bridge_window);
940
941
let ranges = device
942
.allocate_io_bars(resources)
943
.map_err(DeviceRegistrationError::AllocateIoAddrs)?;
944
io_ranges.insert(dev_idx, ranges.clone());
945
bar_ranges.extend(ranges);
946
947
let ranges = device
948
.allocate_device_bars(resources)
949
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
950
device_ranges.insert(dev_idx, ranges.clone());
951
bar_ranges.extend(ranges);
952
953
device.set_subordinate_bus(child_sub_bus);
954
955
subordinate_bus = std::cmp::max(subordinate_bus, child_sub_bus);
956
}
957
}
958
}
959
960
for (dev_idx, addr) in device_addrs.iter().enumerate() {
961
if addr.bus == bus_num {
962
let device = &mut devices[dev_idx].0;
963
// Allocate MMIO for non-bridge devices
964
if device.get_new_pci_bus().is_none() {
965
let ranges = device
966
.allocate_io_bars(resources)
967
.map_err(DeviceRegistrationError::AllocateIoAddrs)?;
968
io_ranges.insert(dev_idx, ranges.clone());
969
bar_ranges.extend(ranges);
970
971
let ranges = device
972
.allocate_device_bars(resources)
973
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
974
device_ranges.insert(dev_idx, ranges.clone());
975
bar_ranges.extend(ranges);
976
}
977
}
978
}
979
Ok((bar_ranges, subordinate_bus))
980
}
981
982
/// Ensure all PCI devices have an assigned PCI address.
983
pub fn assign_pci_addresses(
984
devices: &mut [(Box<dyn BusDeviceObj>, Option<Minijail>)],
985
resources: &mut SystemAllocator,
986
) -> Result<(), DeviceRegistrationError> {
987
// First allocate devices with a preferred address.
988
for pci_device in devices
989
.iter_mut()
990
.filter_map(|(device, _jail)| device.as_pci_device_mut())
991
.filter(|pci_device| pci_device.preferred_address().is_some())
992
{
993
let _ = pci_device
994
.allocate_address(resources)
995
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
996
}
997
998
// Then allocate addresses for the remaining devices.
999
for pci_device in devices
1000
.iter_mut()
1001
.filter_map(|(device, _jail)| device.as_pci_device_mut())
1002
.filter(|pci_device| pci_device.preferred_address().is_none())
1003
{
1004
let _ = pci_device
1005
.allocate_address(resources)
1006
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
1007
}
1008
1009
Ok(())
1010
}
1011
1012
/// Creates a root PCI device for use by this Vm.
1013
pub fn generate_pci_root(
1014
mut devices: Vec<(Box<dyn PciDevice>, Option<Minijail>)>,
1015
irq_chip: &mut dyn IrqChip,
1016
mmio_bus: Arc<Bus>,
1017
mmio_base: GuestAddress,
1018
mmio_register_bit_num: usize,
1019
io_bus: Arc<Bus>,
1020
resources: &mut SystemAllocator,
1021
vm: &mut impl Vm,
1022
max_irqs: usize,
1023
vcfg_base: Option<u64>,
1024
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
1025
) -> Result<
1026
(
1027
PciRoot,
1028
Vec<(PciAddress, u32, PciInterruptPin)>,
1029
BTreeMap<u32, String>,
1030
BTreeMap<PciAddress, Vec<u8>>,
1031
BTreeMap<PciAddress, Vec<u8>>,
1032
),
1033
DeviceRegistrationError,
1034
> {
1035
let mut device_addrs = Vec::new();
1036
1037
for (device, _jail) in devices.iter_mut() {
1038
let address = device
1039
.allocate_address(resources)
1040
.map_err(DeviceRegistrationError::AllocateDeviceAddrs)?;
1041
device_addrs.push(address);
1042
}
1043
1044
let mut device_ranges = BTreeMap::new();
1045
let mut io_ranges = BTreeMap::new();
1046
let root_bus = Arc::new(Mutex::new(PciBus::new(0, 0, false)));
1047
1048
generate_pci_topology(
1049
root_bus.clone(),
1050
resources,
1051
&mut io_ranges,
1052
&mut device_ranges,
1053
&device_addrs,
1054
&mut devices,
1055
)?;
1056
1057
let mut root = PciRoot::new(
1058
vm,
1059
Arc::downgrade(&mmio_bus),
1060
mmio_base,
1061
mmio_register_bit_num,
1062
Arc::downgrade(&io_bus),
1063
root_bus,
1064
)
1065
.map_err(DeviceRegistrationError::CreateRoot)?;
1066
#[cfg_attr(windows, allow(unused_mut))]
1067
let mut pid_labels = BTreeMap::new();
1068
1069
// Allocate legacy INTx
1070
let mut pci_irqs = Vec::new();
1071
let mut irqs: Vec<u32> = Vec::new();
1072
1073
// Mapping of (bus, dev, pin) -> IRQ number.
1074
let mut dev_pin_irq = BTreeMap::new();
1075
1076
for (dev_idx, (device, _jail)) in devices.iter_mut().enumerate() {
1077
let pci_address = device_addrs[dev_idx];
1078
1079
let irq = match device.preferred_irq() {
1080
PreferredIrq::Fixed { pin, gsi } => {
1081
// The device reported a preferred IRQ, so use that rather than allocating one.
1082
resources.reserve_irq(gsi);
1083
Some((pin, gsi))
1084
}
1085
PreferredIrq::Any => {
1086
// The device did not provide a preferred IRQ but requested one, so allocate one.
1087
1088
// Choose a pin based on the slot's function number. Function 0 must always use
1089
// INTA# for single-function devices per the PCI spec, and we choose to use INTA#
1090
// for function 0 on multifunction devices and distribute the remaining functions
1091
// evenly across the other pins.
1092
let pin = match pci_address.func % 4 {
1093
0 => PciInterruptPin::IntA,
1094
1 => PciInterruptPin::IntB,
1095
2 => PciInterruptPin::IntC,
1096
_ => PciInterruptPin::IntD,
1097
};
1098
1099
// If an IRQ number has already been assigned for a different function with this
1100
// (bus, device, pin) combination, use it. Otherwise allocate a new one and insert
1101
// it into the map.
1102
let pin_key = (pci_address.bus, pci_address.dev, pin);
1103
let irq_num = if let Some(irq_num) = dev_pin_irq.get(&pin_key) {
1104
*irq_num
1105
} else {
1106
// If we have allocated fewer than `max_irqs` total, add a new irq to the `irqs`
1107
// pool. Otherwise, share one of the existing `irqs`.
1108
let irq_num = if irqs.len() < max_irqs {
1109
let irq_num = resources
1110
.allocate_irq()
1111
.ok_or(DeviceRegistrationError::AllocateIrq)?;
1112
irqs.push(irq_num);
1113
irq_num
1114
} else {
1115
// Pick one of the existing IRQs to share, using `dev_idx` to distribute IRQ
1116
// sharing evenly across devices.
1117
irqs[dev_idx % max_irqs]
1118
};
1119
1120
dev_pin_irq.insert(pin_key, irq_num);
1121
irq_num
1122
};
1123
Some((pin, irq_num))
1124
}
1125
PreferredIrq::None => {
1126
// The device does not want an INTx# IRQ.
1127
None
1128
}
1129
};
1130
1131
if let Some((pin, gsi)) = irq {
1132
let intx_event =
1133
devices::IrqLevelEvent::new().map_err(DeviceRegistrationError::EventCreate)?;
1134
1135
device.assign_irq(
1136
intx_event
1137
.try_clone()
1138
.map_err(DeviceRegistrationError::EventClone)?,
1139
pin,
1140
gsi,
1141
);
1142
1143
irq_chip
1144
.register_level_irq_event(gsi, &intx_event, IrqEventSource::from_device(device))
1145
.map_err(DeviceRegistrationError::RegisterIrqfd)?;
1146
1147
pci_irqs.push((pci_address, gsi, pin));
1148
}
1149
}
1150
1151
// To prevent issues where device's on_sandbox may spawn thread before all
1152
// sandboxed devices are sandboxed we partition iterator to go over sandboxed
1153
// first. This is needed on linux platforms. On windows, this is a no-op since
1154
// jails are always None, even for sandboxed devices.
1155
let devices = {
1156
let (sandboxed, non_sandboxed): (Vec<_>, Vec<_>) = devices
1157
.into_iter()
1158
.enumerate()
1159
.partition(|(_, (_, jail))| jail.is_some());
1160
sandboxed.into_iter().chain(non_sandboxed)
1161
};
1162
1163
let mut amls = BTreeMap::new();
1164
let mut gpe_scope_amls = BTreeMap::new();
1165
for (dev_idx, dev_value) in devices {
1166
#[cfg(any(target_os = "android", target_os = "linux"))]
1167
let (mut device, jail) = dev_value;
1168
#[cfg(windows)]
1169
let (mut device, _) = dev_value;
1170
let address = device_addrs[dev_idx];
1171
1172
let mut keep_rds = device.keep_rds();
1173
syslog::push_descriptors(&mut keep_rds);
1174
cros_tracing::push_descriptors!(&mut keep_rds);
1175
metrics::push_descriptors(&mut keep_rds);
1176
keep_rds.append(&mut vm.get_memory().as_raw_descriptors());
1177
1178
let ranges = io_ranges.remove(&dev_idx).unwrap_or_default();
1179
let device_ranges = device_ranges.remove(&dev_idx).unwrap_or_default();
1180
device
1181
.register_device_capabilities()
1182
.map_err(DeviceRegistrationError::RegisterDeviceCapabilities)?;
1183
1184
if let Some(vcfg_base) = vcfg_base {
1185
let (methods, shm) = device.generate_acpi_methods();
1186
if !methods.is_empty() {
1187
amls.insert(address, methods);
1188
}
1189
if let Some((offset, mmap)) = shm {
1190
let _ = vm.add_memory_region(
1191
GuestAddress(vcfg_base + offset as u64),
1192
Box::new(mmap),
1193
false,
1194
false,
1195
MemCacheType::CacheCoherent,
1196
);
1197
}
1198
}
1199
let gpe_nr = device.set_gpe(resources);
1200
1201
#[cfg(any(target_os = "android", target_os = "linux"))]
1202
let arced_dev: Arc<Mutex<dyn BusDevice>> = if let Some(jail) = jail {
1203
let proxy = ProxyDevice::new(
1204
device,
1205
jail,
1206
keep_rds,
1207
#[cfg(feature = "swap")]
1208
swap_controller,
1209
)
1210
.map_err(DeviceRegistrationError::ProxyDeviceCreation)?;
1211
pid_labels.insert(proxy.pid() as u32, proxy.debug_label());
1212
Arc::new(Mutex::new(proxy))
1213
} else {
1214
device.on_sandboxed();
1215
Arc::new(Mutex::new(device))
1216
};
1217
#[cfg(windows)]
1218
let arced_dev = {
1219
device.on_sandboxed();
1220
Arc::new(Mutex::new(device))
1221
};
1222
root.add_device(address, arced_dev.clone(), vm)
1223
.map_err(DeviceRegistrationError::PciRootAddDevice)?;
1224
for range in &ranges {
1225
mmio_bus
1226
.insert(arced_dev.clone(), range.addr, range.size)
1227
.map_err(DeviceRegistrationError::MmioInsert)?;
1228
}
1229
1230
for range in &device_ranges {
1231
mmio_bus
1232
.insert(arced_dev.clone(), range.addr, range.size)
1233
.map_err(DeviceRegistrationError::MmioInsert)?;
1234
}
1235
1236
if let Some(gpe_nr) = gpe_nr {
1237
if let Some(acpi_path) = root.acpi_path(&address) {
1238
let mut gpe_aml = Vec::new();
1239
1240
GpeScope {}.cast_to_aml_bytes(
1241
&mut gpe_aml,
1242
gpe_nr,
1243
format!("\\{acpi_path}").as_str(),
1244
);
1245
if !gpe_aml.is_empty() {
1246
gpe_scope_amls.insert(address, gpe_aml);
1247
}
1248
}
1249
}
1250
}
1251
1252
Ok((root, pci_irqs, pid_labels, amls, gpe_scope_amls))
1253
}
1254
1255
/// Errors for image loading.
1256
#[sorted]
1257
#[derive(Error, Debug)]
1258
pub enum LoadImageError {
1259
#[error("Alignment not a power of two: {0}")]
1260
BadAlignment(u64),
1261
#[error("Getting image size failed: {0}")]
1262
GetLen(io::Error),
1263
#[error("GuestMemory get slice failed: {0}")]
1264
GuestMemorySlice(GuestMemoryError),
1265
#[error("Image size too large: {0}")]
1266
ImageSizeTooLarge(u64),
1267
#[error("No suitable memory region found")]
1268
NoSuitableMemoryRegion,
1269
#[error("Reading image into memory failed: {0}")]
1270
ReadToMemory(io::Error),
1271
#[error("Cannot load zero-sized image")]
1272
ZeroSizedImage,
1273
}
1274
1275
/// Load an image from a file into guest memory.
1276
///
1277
/// # Arguments
1278
///
1279
/// * `guest_mem` - The memory to be used by the guest.
1280
/// * `guest_addr` - The starting address to load the image in the guest memory.
1281
/// * `max_size` - The amount of space in bytes available in the guest memory for the image.
1282
/// * `image` - The file containing the image to be loaded.
1283
///
1284
/// The size in bytes of the loaded image is returned.
1285
pub fn load_image<F>(
1286
guest_mem: &GuestMemory,
1287
image: &mut F,
1288
guest_addr: GuestAddress,
1289
max_size: u64,
1290
) -> Result<u32, LoadImageError>
1291
where
1292
F: FileReadWriteAtVolatile + FileGetLen,
1293
{
1294
let size = image.get_len().map_err(LoadImageError::GetLen)?;
1295
1296
if size > u32::MAX as u64 || size > max_size {
1297
return Err(LoadImageError::ImageSizeTooLarge(size));
1298
}
1299
1300
// This is safe due to the bounds check above.
1301
let size = size as u32;
1302
1303
let guest_slice = guest_mem
1304
.get_slice_at_addr(guest_addr, size as usize)
1305
.map_err(LoadImageError::GuestMemorySlice)?;
1306
image
1307
.read_exact_at_volatile(guest_slice, 0)
1308
.map_err(LoadImageError::ReadToMemory)?;
1309
1310
Ok(size)
1311
}
1312
1313
/// Load an image from a file into guest memory at the highest possible address.
1314
///
1315
/// # Arguments
1316
///
1317
/// * `guest_mem` - The memory to be used by the guest.
1318
/// * `image` - The file containing the image to be loaded.
1319
/// * `min_guest_addr` - The minimum address of the start of the image.
1320
/// * `max_guest_addr` - The address to load the last byte of the image.
1321
/// * `region_filter` - The optional filter function for determining if the given guest memory
1322
/// region is suitable for loading the image into it.
1323
/// * `align` - The minimum alignment of the start address of the image in bytes (must be a power of
1324
/// two).
1325
///
1326
/// The guest address and size in bytes of the loaded image are returned.
1327
pub fn load_image_high<F>(
1328
guest_mem: &GuestMemory,
1329
image: &mut F,
1330
min_guest_addr: GuestAddress,
1331
max_guest_addr: GuestAddress,
1332
region_filter: Option<fn(&MemoryRegionInformation) -> bool>,
1333
align: u64,
1334
) -> Result<(GuestAddress, u32), LoadImageError>
1335
where
1336
F: FileReadWriteAtVolatile + FileGetLen,
1337
{
1338
if !align.is_power_of_two() {
1339
return Err(LoadImageError::BadAlignment(align));
1340
}
1341
1342
let max_size = max_guest_addr.offset_from(min_guest_addr) & !(align - 1);
1343
let size = image.get_len().map_err(LoadImageError::GetLen)?;
1344
1345
if size == 0 {
1346
return Err(LoadImageError::ZeroSizedImage);
1347
}
1348
1349
if size > u32::MAX as u64 || size > max_size {
1350
return Err(LoadImageError::ImageSizeTooLarge(size));
1351
}
1352
1353
// Sort the list of guest memory regions by address so we can iterate over them in reverse order
1354
// (high to low).
1355
let mut regions: Vec<_> = guest_mem
1356
.regions()
1357
.filter(region_filter.unwrap_or(|_| true))
1358
.collect();
1359
regions.sort_unstable_by(|a, b| a.guest_addr.cmp(&b.guest_addr));
1360
1361
// Find the highest valid address inside a guest memory region that satisfies the requested
1362
// alignment and min/max address requirements while having enough space for the image.
1363
let guest_addr = regions
1364
.into_iter()
1365
.rev()
1366
.filter_map(|r| {
1367
// Highest address within this region.
1368
let rgn_max_addr = r
1369
.guest_addr
1370
.checked_add((r.size as u64).checked_sub(1)?)?
1371
.min(max_guest_addr);
1372
// Lowest aligned address within this region.
1373
let rgn_start_aligned = r.guest_addr.align(align)?;
1374
// Hypothetical address of the image if loaded at the end of the region.
1375
let image_addr = rgn_max_addr.checked_sub(size - 1)? & !(align - 1);
1376
1377
// Would the image fit within the region?
1378
if image_addr >= rgn_start_aligned {
1379
Some(image_addr)
1380
} else {
1381
None
1382
}
1383
})
1384
.find(|&addr| addr >= min_guest_addr)
1385
.ok_or(LoadImageError::NoSuitableMemoryRegion)?;
1386
1387
// This is safe due to the bounds check above.
1388
let size = size as u32;
1389
1390
let guest_slice = guest_mem
1391
.get_slice_at_addr(guest_addr, size as usize)
1392
.map_err(LoadImageError::GuestMemorySlice)?;
1393
image
1394
.read_exact_at_volatile(guest_slice, 0)
1395
.map_err(LoadImageError::ReadToMemory)?;
1396
1397
Ok((guest_addr, size))
1398
}
1399
1400
/// SMBIOS table configuration
1401
#[derive(Clone, Debug, Default, Serialize, Deserialize, FromKeyValues, PartialEq, Eq)]
1402
#[serde(deny_unknown_fields, rename_all = "kebab-case")]
1403
pub struct SmbiosOptions {
1404
/// BIOS vendor name.
1405
pub bios_vendor: Option<String>,
1406
1407
/// BIOS version number (free-form string).
1408
pub bios_version: Option<String>,
1409
1410
/// System manufacturer name.
1411
pub manufacturer: Option<String>,
1412
1413
/// System product name.
1414
pub product_name: Option<String>,
1415
1416
/// System serial number (free-form string).
1417
pub serial_number: Option<String>,
1418
1419
/// System UUID.
1420
pub uuid: Option<Uuid>,
1421
1422
/// Additional OEM strings to add to SMBIOS table.
1423
#[serde(default)]
1424
pub oem_strings: Vec<String>,
1425
}
1426
1427
#[cfg(test)]
1428
mod tests {
1429
use serde_keyvalue::from_key_values;
1430
use tempfile::tempfile;
1431
1432
use super::*;
1433
1434
#[test]
1435
fn parse_pstore() {
1436
let res: Pstore = from_key_values("path=/some/path,size=16384").unwrap();
1437
assert_eq!(
1438
res,
1439
Pstore {
1440
path: "/some/path".into(),
1441
size: 16384,
1442
}
1443
);
1444
1445
let res = from_key_values::<Pstore>("path=/some/path");
1446
assert!(res.is_err());
1447
1448
let res = from_key_values::<Pstore>("size=16384");
1449
assert!(res.is_err());
1450
1451
let res = from_key_values::<Pstore>("");
1452
assert!(res.is_err());
1453
}
1454
1455
#[test]
1456
fn deserialize_cpuset_serde_kv() {
1457
let res: CpuSet = from_key_values("[0,4,7]").unwrap();
1458
assert_eq!(res, CpuSet::new(vec![0, 4, 7]));
1459
1460
let res: CpuSet = from_key_values("[9-12]").unwrap();
1461
assert_eq!(res, CpuSet::new(vec![9, 10, 11, 12]));
1462
1463
let res: CpuSet = from_key_values("[0,4,7,9-12,15]").unwrap();
1464
assert_eq!(res, CpuSet::new(vec![0, 4, 7, 9, 10, 11, 12, 15]));
1465
}
1466
1467
#[test]
1468
fn deserialize_serialize_cpuset_json() {
1469
let json_str = "[0,4,7]";
1470
let cpuset = CpuSet::new(vec![0, 4, 7]);
1471
let res: CpuSet = serde_json::from_str(json_str).unwrap();
1472
assert_eq!(res, cpuset);
1473
assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
1474
1475
let json_str = r#"["9-12"]"#;
1476
let cpuset = CpuSet::new(vec![9, 10, 11, 12]);
1477
let res: CpuSet = serde_json::from_str(json_str).unwrap();
1478
assert_eq!(res, cpuset);
1479
assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
1480
1481
let json_str = r#"[0,4,7,"9-12",15]"#;
1482
let cpuset = CpuSet::new(vec![0, 4, 7, 9, 10, 11, 12, 15]);
1483
let res: CpuSet = serde_json::from_str(json_str).unwrap();
1484
assert_eq!(res, cpuset);
1485
assert_eq!(serde_json::to_string(&cpuset).unwrap(), json_str);
1486
}
1487
1488
#[test]
1489
fn load_image_high_max_4g() {
1490
let mem = GuestMemory::new(&[
1491
(GuestAddress(0x0000_0000), 0x4000_0000), // 0x00000000..0x40000000
1492
(GuestAddress(0x8000_0000), 0x4000_0000), // 0x80000000..0xC0000000
1493
])
1494
.unwrap();
1495
1496
const TEST_IMAGE_SIZE: u64 = 1234;
1497
let mut test_image = tempfile().unwrap();
1498
test_image.set_len(TEST_IMAGE_SIZE).unwrap();
1499
1500
const TEST_ALIGN: u64 = 0x8000;
1501
let (addr, size) = load_image_high(
1502
&mem,
1503
&mut test_image,
1504
GuestAddress(0x8000),
1505
GuestAddress(0xFFFF_FFFF), // max_guest_addr beyond highest guest memory region
1506
None,
1507
TEST_ALIGN,
1508
)
1509
.unwrap();
1510
1511
assert_eq!(addr, GuestAddress(0xBFFF_8000));
1512
assert_eq!(addr.offset() % TEST_ALIGN, 0);
1513
assert_eq!(size, TEST_IMAGE_SIZE as u32);
1514
}
1515
}
1516
1517