Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/hypervisor/src/whpx/vm.rs
5394 views
1
// Copyright 2022 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
use core::ffi::c_void;
6
use std::cmp::Reverse;
7
use std::collections::BTreeMap;
8
use std::collections::BinaryHeap;
9
use std::convert::TryInto;
10
use std::sync::Arc;
11
12
use base::error;
13
use base::info;
14
use base::pagesize;
15
use base::AsRawDescriptor;
16
use base::Error;
17
use base::Event;
18
use base::MappedRegion;
19
use base::MmapError;
20
use base::Protection;
21
use base::RawDescriptor;
22
use base::Result;
23
use base::SafeDescriptor;
24
use base::SendTube;
25
use fnv::FnvHashMap;
26
use libc::EEXIST;
27
use libc::EFAULT;
28
use libc::EINVAL;
29
use libc::EIO;
30
use libc::ENODEV;
31
use libc::ENOENT;
32
use libc::ENOSPC;
33
use libc::ENOTSUP;
34
use libc::EOVERFLOW;
35
use sync::Mutex;
36
use vm_memory::GuestAddress;
37
use vm_memory::GuestMemory;
38
use winapi::shared::winerror::ERROR_BUSY;
39
use winapi::shared::winerror::ERROR_SUCCESS;
40
use winapi::um::memoryapi::OfferVirtualMemory;
41
use winapi::um::memoryapi::ReclaimVirtualMemory;
42
use winapi::um::memoryapi::VmOfferPriorityBelowNormal;
43
use winapi::um::winnt::RtlZeroMemory;
44
45
use super::types::*;
46
use super::*;
47
use crate::host_phys_addr_bits;
48
use crate::whpx::whpx_sys::*;
49
use crate::BalloonEvent;
50
use crate::ClockState;
51
use crate::Datamatch;
52
use crate::DeliveryMode;
53
use crate::DestinationMode;
54
use crate::DeviceKind;
55
use crate::HypervisorKind;
56
use crate::IoEventAddress;
57
use crate::LapicState;
58
use crate::MemCacheType;
59
use crate::MemSlot;
60
use crate::TriggerMode;
61
use crate::VcpuX86_64;
62
use crate::Vm;
63
use crate::VmCap;
64
use crate::VmX86_64;
65
66
pub struct WhpxVm {
67
whpx: Whpx,
68
// reference counted, since we need to implement try_clone or some variation.
69
// There is only ever 1 create/1 delete partition unlike dup/close handle variations.
70
vm_partition: Arc<SafePartition>,
71
guest_mem: GuestMemory,
72
mem_regions: Arc<Mutex<BTreeMap<MemSlot, (GuestAddress, Box<dyn MappedRegion>)>>>,
73
/// A min heap of MemSlot numbers that were used and then removed and can now be re-used
74
mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
75
// WHPX's implementation of ioevents makes several assumptions about how crosvm uses ioevents:
76
// 1. All ioevents are registered during device setup, and thus can be cloned when the vm is
77
// cloned instead of locked in an Arc<Mutex<>>. This will make handling ioevents in each
78
// vcpu thread easier because no locks will need to be acquired.
79
// 2. All ioevents use Datamatch::AnyLength. We don't bother checking the datamatch, which
80
// will make this faster.
81
// 3. We only ever register one eventfd to each address. This simplifies our data structure.
82
ioevents: FnvHashMap<IoEventAddress, Event>,
83
// Tube to send events to control.
84
vm_evt_wrtube: Option<SendTube>,
85
}
86
87
impl WhpxVm {
88
pub fn new(
89
whpx: &Whpx,
90
cpu_count: usize,
91
guest_mem: GuestMemory,
92
cpuid: CpuId,
93
apic_emulation: bool,
94
vm_evt_wrtube: Option<SendTube>,
95
) -> WhpxResult<WhpxVm> {
96
let partition = SafePartition::new()?;
97
// setup partition defaults.
98
let mut property: WHV_PARTITION_PROPERTY = Default::default();
99
property.ProcessorCount = cpu_count as u32;
100
// safe because we own this partition, and the partition property is allocated on the stack.
101
check_whpx!(unsafe {
102
WHvSetPartitionProperty(
103
partition.partition,
104
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeProcessorCount,
105
&property as *const _ as *const c_void,
106
std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
107
)
108
})
109
.map_err(WhpxError::SetProcessorCount)?;
110
111
// Pre-set any cpuid results in cpuid.
112
let mut cpuid_results: Vec<WHV_X64_CPUID_RESULT> = cpuid
113
.cpu_id_entries
114
.iter()
115
.map(WHV_X64_CPUID_RESULT::from)
116
.collect();
117
118
// Leaf HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS tells linux that it's running under Hyper-V.
119
cpuid_results.push(WHV_X64_CPUID_RESULT {
120
Function: HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
121
Reserved: [0u32; 3],
122
// HYPERV_CPUID_MIN is the minimum leaf that we need to support returning to the guest
123
Eax: HYPERV_CPUID_MIN,
124
Ebx: u32::from_le_bytes([b'M', b'i', b'c', b'r']),
125
Ecx: u32::from_le_bytes([b'o', b's', b'o', b'f']),
126
Edx: u32::from_le_bytes([b't', b' ', b'H', b'v']),
127
});
128
129
// HYPERV_CPUID_FEATURES leaf tells linux which Hyper-V features we support
130
cpuid_results.push(WHV_X64_CPUID_RESULT {
131
Function: HYPERV_CPUID_FEATURES,
132
Reserved: [0u32; 3],
133
// We only support frequency MSRs and the HV_ACCESS_TSC_INVARIANT feature, which means
134
// TSC scaling/offseting is handled in hardware, not the guest.
135
Eax: HV_ACCESS_FREQUENCY_MSRS
136
| HV_ACCESS_TSC_INVARIANT
137
| HV_MSR_REFERENCE_TSC_AVAILABLE,
138
Ebx: 0,
139
Edx: HV_FEATURE_FREQUENCY_MSRS_AVAILABLE,
140
Ecx: 0,
141
});
142
143
// safe because we own this partition, and the cpuid_results vec is local to this function.
144
check_whpx!(unsafe {
145
WHvSetPartitionProperty(
146
partition.partition,
147
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidResultList,
148
cpuid_results.as_ptr() as *const _ as *const c_void,
149
(std::mem::size_of::<WHV_X64_CPUID_RESULT>() * cpuid_results.len()) as UINT32,
150
)
151
})
152
.map_err(WhpxError::SetCpuidResultList)?;
153
154
// Setup exiting for cpuid leaves that we want crosvm to adjust, but that we can't pre-set.
155
// We can't pre-set leaves that rely on irqchip information, and we cannot pre-set leaves
156
// that return different results per-cpu.
157
let exit_list: Vec<u32> = vec![0x1, 0x4, 0xB, 0x1F, 0x15];
158
// safe because we own this partition, and the exit_list vec local to this function.
159
check_whpx!(unsafe {
160
WHvSetPartitionProperty(
161
partition.partition,
162
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidExitList,
163
exit_list.as_ptr() as *const _ as *const c_void,
164
(std::mem::size_of::<u32>() * exit_list.len()) as UINT32,
165
)
166
})
167
.map_err(WhpxError::SetCpuidExitList)?;
168
169
// Setup exits for CPUID instruction.
170
let mut property: WHV_PARTITION_PROPERTY = Default::default();
171
// safe because we own this partition, and the partition property is allocated on the stack.
172
unsafe {
173
property
174
.ExtendedVmExits
175
.__bindgen_anon_1
176
.set_X64CpuidExit(1);
177
// X64MsrExit essentially causes WHPX to exit to crosvm when it would normally fail an
178
// MSR access and inject a GP fault. Crosvm, in turn, now handles select MSR accesses
179
// related to Hyper-V (see the handle_msr_* functions in vcpu.rs) and injects a GP
180
// fault for any unhandled MSR accesses.
181
property.ExtendedVmExits.__bindgen_anon_1.set_X64MsrExit(1);
182
}
183
// safe because we own this partition, and the partition property is allocated on the stack.
184
check_whpx!(unsafe {
185
WHvSetPartitionProperty(
186
partition.partition,
187
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeExtendedVmExits,
188
&property as *const _ as *const c_void,
189
std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
190
)
191
})
192
.map_err(WhpxError::SetExtendedVmExits)?;
193
194
if apic_emulation && !Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)? {
195
return Err(WhpxError::LocalApicEmulationNotSupported);
196
}
197
198
// Setup apic emulation mode
199
let mut property: WHV_PARTITION_PROPERTY = Default::default();
200
property.LocalApicEmulationMode = if apic_emulation {
201
// TODO(b/180966070): figure out if x2apic emulation mode is available on the host and
202
// enable it if it is.
203
WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeXApic
204
} else {
205
WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeNone
206
};
207
208
// safe because we own this partition, and the partition property is allocated on the stack.
209
check_whpx!(unsafe {
210
WHvSetPartitionProperty(
211
partition.partition,
212
WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeLocalApicEmulationMode,
213
&property as *const _ as *const c_void,
214
std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
215
)
216
})
217
.map_err(WhpxError::SetLocalApicEmulationMode)?;
218
219
// safe because we own this partition
220
check_whpx!(unsafe { WHvSetupPartition(partition.partition) })
221
.map_err(WhpxError::SetupPartition)?;
222
223
for region in guest_mem.regions() {
224
unsafe {
225
// Safe because the guest regions are guaranteed not to overlap.
226
set_user_memory_region(
227
&partition,
228
false, // read_only
229
false, // track dirty pages
230
region.guest_addr.offset(),
231
region.size as u64,
232
region.host_addr as *mut u8,
233
)
234
}
235
.map_err(WhpxError::MapGpaRange)?;
236
}
237
238
Ok(WhpxVm {
239
whpx: whpx.clone(),
240
vm_partition: Arc::new(partition),
241
guest_mem,
242
mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
243
mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
244
ioevents: FnvHashMap::default(),
245
vm_evt_wrtube,
246
})
247
}
248
249
/// Get the current state of the specified VCPU's local APIC
250
pub fn get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
251
let buffer = WhpxLapicState { regs: [0u32; 1024] };
252
let mut written_size = 0u32;
253
let size = std::mem::size_of::<WhpxLapicState>();
254
255
check_whpx!(unsafe {
256
WHvGetVirtualProcessorInterruptControllerState(
257
self.vm_partition.partition,
258
vcpu_id as u32,
259
buffer.regs.as_ptr() as *mut c_void,
260
size as u32,
261
&mut written_size,
262
)
263
})?;
264
265
Ok(LapicState::from(&buffer))
266
}
267
268
/// Set the current state of the specified VCPU's local APIC
269
pub fn set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
270
let buffer = WhpxLapicState::from(state);
271
check_whpx!(unsafe {
272
WHvSetVirtualProcessorInterruptControllerState(
273
self.vm_partition.partition,
274
vcpu_id as u32,
275
buffer.regs.as_ptr() as *mut c_void,
276
std::mem::size_of::<WhpxLapicState>() as u32,
277
)
278
})?;
279
Ok(())
280
}
281
282
/// Request an interrupt be delivered to one or more virtualized interrupt controllers. This
283
/// should only be used with ApicEmulationModeXApic or ApicEmulationModeX2Apic.
284
pub fn request_interrupt(
285
&self,
286
vector: u8,
287
dest_id: u8,
288
dest_mode: DestinationMode,
289
trigger: TriggerMode,
290
delivery: DeliveryMode,
291
) -> Result<()> {
292
// The WHV_INTERRUPT_CONTROL does not seem to support the dest_shorthand
293
let mut interrupt = WHV_INTERRUPT_CONTROL {
294
Destination: dest_id as u32,
295
Vector: vector as u32,
296
..Default::default()
297
};
298
interrupt.set_DestinationMode(match dest_mode {
299
DestinationMode::Physical => {
300
WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModePhysical
301
}
302
DestinationMode::Logical => {
303
WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModeLogical
304
}
305
} as u64);
306
interrupt.set_TriggerMode(match trigger {
307
TriggerMode::Edge => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeEdge,
308
TriggerMode::Level => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeLevel,
309
} as u64);
310
interrupt.set_Type(match delivery {
311
DeliveryMode::Fixed => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeFixed,
312
DeliveryMode::Lowest => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeLowestPriority,
313
DeliveryMode::SMI => {
314
error!("WHPX does not support requesting an SMI");
315
return Err(Error::new(ENOTSUP));
316
}
317
DeliveryMode::RemoteRead => {
318
// This is also no longer supported by intel.
319
error!("Remote Read interrupts are not supported by WHPX");
320
return Err(Error::new(ENOTSUP));
321
}
322
DeliveryMode::NMI => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeNmi,
323
DeliveryMode::Init => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeInit,
324
DeliveryMode::Startup => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeSipi,
325
DeliveryMode::External => {
326
error!("WHPX does not support requesting an external interrupt");
327
return Err(Error::new(ENOTSUP));
328
}
329
} as u64);
330
331
check_whpx!(unsafe {
332
WHvRequestInterrupt(
333
self.vm_partition.partition,
334
&interrupt,
335
std::mem::size_of::<WHV_INTERRUPT_CONTROL>() as u32,
336
)
337
})
338
}
339
340
/// In order to fully unmap a memory range such that the host can reclaim the memory,
341
/// we unmap it from the hypervisor partition, and then mark crosvm's process as uninterested
342
/// in the memory.
343
///
344
/// This will make crosvm unable to access the memory, and allow Windows to reclaim it for other
345
/// uses when memory is in demand.
346
fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
347
info!(
348
"Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
349
guest_address, size
350
);
351
// Safe because WHPX does proper error checking, even if an out-of-bounds address is
352
// provided.
353
unsafe {
354
check_whpx!(WHvUnmapGpaRange(
355
self.vm_partition.partition,
356
guest_address.offset(),
357
size,
358
))?;
359
}
360
361
let host_address = self
362
.guest_mem
363
.get_host_address(guest_address)
364
.map_err(|_| Error::new(1))? as *mut c_void;
365
366
// Safe because we have just successfully unmapped this range from the
367
// guest partition, so we know it's unused.
368
let result =
369
unsafe { OfferVirtualMemory(host_address, size as usize, VmOfferPriorityBelowNormal) };
370
371
if result != ERROR_SUCCESS {
372
let err = Error::new(result);
373
error!("Freeing memory failed with error: {}", err);
374
return Err(err);
375
}
376
Ok(())
377
}
378
379
/// Remap memory that has previously been unmapped with #handle_inflate. Note
380
/// that attempts to remap pages that were not previously unmapped, or addresses that are not
381
/// page-aligned, will result in failure.
382
///
383
/// To do this, reclaim the memory from Windows first, then remap it into the hypervisor
384
/// partition. Remapped memory has no guarantee of content, and the guest should not expect
385
/// it to.
386
fn handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
387
info!(
388
"Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
389
guest_address, size
390
);
391
392
let host_address = self
393
.guest_mem
394
.get_host_address(guest_address)
395
.map_err(|_| Error::new(1))? as *const c_void;
396
397
// Note that we aren't doing any validation here that this range was previously unmapped.
398
// However, we can avoid that expensive validation by relying on Windows error checking for
399
// ReclaimVirtualMemory. The call will fail if:
400
// - If the range is not currently "offered"
401
// - The range is outside of current guest mem (GuestMemory will fail to convert the
402
// address)
403
// In short, security is guaranteed by ensuring the guest can never reclaim ranges it
404
// hadn't previously forfeited (and even then, the contents will be zeroed).
405
//
406
// Safe because the memory ranges in question are managed by Windows, not Rust.
407
// Also, ReclaimVirtualMemory has built-in error checking for bad parameters.
408
let result = unsafe { ReclaimVirtualMemory(host_address, size as usize) };
409
410
if result == ERROR_BUSY || result == ERROR_SUCCESS {
411
// In either of these cases, the contents of the reclaimed memory
412
// are preserved or undefined. Regardless, zero the memory
413
// to ensure no unintentional memory contents are shared.
414
//
415
// Safe because we just reclaimed the region in question and haven't yet remapped
416
// it to the guest partition, so we know it's unused.
417
unsafe { RtlZeroMemory(host_address as RawDescriptor, size as usize) };
418
} else {
419
let err = Error::new(result);
420
error!("Reclaiming memory failed with error: {}", err);
421
return Err(err);
422
}
423
424
// Safe because no-overlap is guaranteed by the success of ReclaimVirtualMemory,
425
// Which would fail if it was called on areas which were not unmapped.
426
unsafe {
427
set_user_memory_region(
428
&self.vm_partition,
429
false, // read_only
430
false, // track dirty pages
431
guest_address.offset(),
432
size,
433
host_address as *mut u8,
434
)
435
}
436
}
437
}
438
439
// Wrapper around WHvMapGpaRange, which creates, modifies, or deletes a mapping
440
// from guest physical to host user pages.
441
//
442
// Safe when the guest regions are guaranteed not to overlap.
443
unsafe fn set_user_memory_region(
444
partition: &SafePartition,
445
read_only: bool,
446
track_dirty_pages: bool,
447
guest_addr: u64,
448
memory_size: u64,
449
userspace_addr: *mut u8,
450
) -> Result<()> {
451
let mut flags = WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagRead
452
| WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagExecute;
453
if !read_only {
454
flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagWrite
455
}
456
if track_dirty_pages {
457
flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagTrackDirtyPages;
458
}
459
460
let ret = WHvMapGpaRange(
461
partition.partition,
462
userspace_addr as *mut c_void,
463
guest_addr,
464
memory_size,
465
flags,
466
);
467
check_whpx!(ret)
468
}
469
470
/// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
471
/// size.
472
///
473
/// # Arguments
474
///
475
/// * `size` - Number of bytes in the memory region being queried.
476
pub fn dirty_log_bitmap_size(size: usize) -> usize {
477
let page_size = pagesize();
478
(((size + page_size - 1) / page_size) + 7) / 8
479
}
480
481
impl Vm for WhpxVm {
482
/// Makes a shallow clone of this `Vm`.
483
fn try_clone(&self) -> Result<Self> {
484
let mut ioevents = FnvHashMap::default();
485
for (addr, evt) in self.ioevents.iter() {
486
ioevents.insert(*addr, evt.try_clone()?);
487
}
488
Ok(WhpxVm {
489
whpx: self.whpx.try_clone()?,
490
vm_partition: self.vm_partition.clone(),
491
guest_mem: self.guest_mem.clone(),
492
mem_regions: self.mem_regions.clone(),
493
mem_slot_gaps: self.mem_slot_gaps.clone(),
494
ioevents,
495
vm_evt_wrtube: self
496
.vm_evt_wrtube
497
.as_ref()
498
.map(|t| t.try_clone().expect("could not clone vm_evt_wrtube")),
499
})
500
}
501
502
fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
503
Err(Error::new(ENOTSUP))
504
}
505
506
fn hypervisor_kind(&self) -> HypervisorKind {
507
HypervisorKind::Whpx
508
}
509
510
fn check_capability(&self, c: VmCap) -> bool {
511
match c {
512
VmCap::DirtyLog => Whpx::check_whpx_feature(WhpxFeature::DirtyPageTracking)
513
.unwrap_or_else(|e| {
514
error!(
515
"failed to check whpx feature {:?}: {}",
516
WhpxFeature::DirtyPageTracking,
517
e
518
);
519
false
520
}),
521
// there is a pvclock like thing already done w/ hyperv, but we can't get the state.
522
VmCap::PvClock => false,
523
VmCap::Protected => false,
524
// whpx initializes cpuid early during VM creation.
525
VmCap::EarlyInitCpuid => true,
526
#[cfg(target_arch = "x86_64")]
527
VmCap::BusLockDetect => false,
528
VmCap::ReadOnlyMemoryRegion => true,
529
VmCap::MemNoncoherentDma => false,
530
}
531
}
532
533
fn get_memory(&self) -> &GuestMemory {
534
&self.guest_mem
535
}
536
537
fn add_memory_region(
538
&mut self,
539
guest_addr: GuestAddress,
540
mem: Box<dyn MappedRegion>,
541
read_only: bool,
542
log_dirty_pages: bool,
543
_cache: MemCacheType,
544
) -> Result<MemSlot> {
545
let size = mem.size() as u64;
546
let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?;
547
if self.guest_mem.range_overlap(guest_addr, end_addr) {
548
return Err(Error::new(ENOSPC));
549
}
550
let mut regions = self.mem_regions.lock();
551
let mut gaps = self.mem_slot_gaps.lock();
552
let slot = match gaps.pop() {
553
Some(gap) => gap.0,
554
None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
555
};
556
557
// Safe because we check that the given guest address is valid and has no overlaps. We also
558
// know that the pointer and size are correct because the MemoryMapping interface ensures
559
// this. We take ownership of the memory mapping so that it won't be unmapped until the slot
560
// is removed.
561
let res = unsafe {
562
set_user_memory_region(
563
&self.vm_partition,
564
read_only,
565
log_dirty_pages,
566
guest_addr.offset(),
567
size,
568
mem.as_ptr(),
569
)
570
};
571
572
if let Err(e) = res {
573
gaps.push(Reverse(slot));
574
return Err(e);
575
}
576
regions.insert(slot, (guest_addr, mem));
577
Ok(slot)
578
}
579
580
fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
581
let mut regions = self.mem_regions.lock();
582
let (_, mem) = regions.get_mut(&slot).ok_or(Error::new(ENOENT))?;
583
584
mem.msync(offset, size).map_err(|err| match err {
585
MmapError::InvalidAddress => Error::new(EFAULT),
586
MmapError::NotPageAligned => Error::new(EINVAL),
587
MmapError::SystemCallFailed(e) => e,
588
_ => Error::new(EIO),
589
})
590
}
591
592
fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
593
let mut regions = self.mem_regions.lock();
594
if !regions.contains_key(&slot) {
595
return Err(Error::new(ENOENT));
596
}
597
if let Some((guest_addr, mem)) = regions.get(&slot) {
598
// Safe because the slot is checked against the list of memory slots.
599
unsafe {
600
check_whpx!(WHvUnmapGpaRange(
601
self.vm_partition.partition,
602
guest_addr.offset(),
603
mem.size() as u64,
604
))?;
605
}
606
self.mem_slot_gaps.lock().push(Reverse(slot));
607
Ok(regions.remove(&slot).unwrap().1)
608
} else {
609
Err(Error::new(ENOENT))
610
}
611
}
612
613
fn create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor> {
614
// Whpx does not support in-kernel devices
615
Err(Error::new(libc::ENXIO))
616
}
617
618
fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
619
let regions = self.mem_regions.lock();
620
if let Some((guest_addr, mem)) = regions.get(&slot) {
621
// Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
622
if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
623
return Err(Error::new(EINVAL));
624
}
625
let bitmap_size = if dirty_log.len() % 8 == 0 {
626
dirty_log.len() / 8
627
} else {
628
dirty_log.len() / 8 + 1
629
};
630
let mut bitmap = vec![0u64; bitmap_size];
631
check_whpx!(unsafe {
632
WHvQueryGpaRangeDirtyBitmap(
633
self.vm_partition.partition,
634
guest_addr.offset(),
635
mem.size() as u64,
636
bitmap.as_mut_ptr() as *mut u64,
637
(bitmap.len() * 8) as u32,
638
)
639
})?;
640
// safe because we have allocated a vec of u64, which we can cast to a u8 slice.
641
let buffer = unsafe {
642
std::slice::from_raw_parts(bitmap.as_ptr() as *const u8, bitmap.len() * 8)
643
};
644
dirty_log.copy_from_slice(&buffer[..dirty_log.len()]);
645
Ok(())
646
} else {
647
Err(Error::new(ENOENT))
648
}
649
}
650
651
fn register_ioevent(
652
&mut self,
653
evt: &Event,
654
addr: IoEventAddress,
655
datamatch: Datamatch,
656
) -> Result<()> {
657
if datamatch != Datamatch::AnyLength {
658
error!("WHPX currently only supports Datamatch::AnyLength");
659
return Err(Error::new(ENOTSUP));
660
}
661
662
if self.ioevents.contains_key(&addr) {
663
error!("WHPX does not support multiple ioevents for the same address");
664
return Err(Error::new(EEXIST));
665
}
666
667
self.ioevents.insert(addr, evt.try_clone()?);
668
669
Ok(())
670
}
671
672
fn unregister_ioevent(
673
&mut self,
674
evt: &Event,
675
addr: IoEventAddress,
676
datamatch: Datamatch,
677
) -> Result<()> {
678
if datamatch != Datamatch::AnyLength {
679
error!("WHPX only supports Datamatch::AnyLength");
680
return Err(Error::new(ENOTSUP));
681
}
682
683
match self.ioevents.get(&addr) {
684
Some(existing_evt) => {
685
// evt should match the existing evt associated with addr
686
if evt != existing_evt {
687
return Err(Error::new(ENOENT));
688
}
689
self.ioevents.remove(&addr);
690
}
691
692
None => {
693
return Err(Error::new(ENOENT));
694
}
695
};
696
Ok(())
697
}
698
699
/// Trigger any io events based on the memory mapped IO at `addr`. If the hypervisor does
700
/// in-kernel IO event delivery, this is a no-op.
701
fn handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()> {
702
match self.ioevents.get(&addr) {
703
None => {}
704
Some(evt) => {
705
evt.signal()?;
706
}
707
};
708
Ok(())
709
}
710
711
fn enable_hypercalls(&mut self, _nr: u64, _count: usize) -> Result<()> {
712
Err(Error::new(ENOTSUP))
713
}
714
715
fn get_pvclock(&self) -> Result<ClockState> {
716
Err(Error::new(ENODEV))
717
}
718
719
fn set_pvclock(&self, _state: &ClockState) -> Result<()> {
720
Err(Error::new(ENODEV))
721
}
722
723
fn add_fd_mapping(
724
&mut self,
725
slot: u32,
726
offset: usize,
727
size: usize,
728
fd: &dyn AsRawDescriptor,
729
fd_offset: u64,
730
prot: Protection,
731
) -> Result<()> {
732
let mut regions = self.mem_regions.lock();
733
let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
734
735
match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
736
Ok(()) => Ok(()),
737
Err(MmapError::SystemCallFailed(e)) => Err(e),
738
Err(_) => Err(Error::new(EIO)),
739
}
740
}
741
742
fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
743
let mut regions = self.mem_regions.lock();
744
let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
745
746
match region.remove_mapping(offset, size) {
747
Ok(()) => Ok(()),
748
Err(MmapError::SystemCallFailed(e)) => Err(e),
749
Err(_) => Err(Error::new(EIO)),
750
}
751
}
752
753
fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
754
match event {
755
BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
756
BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
757
BalloonEvent::BalloonTargetReached(_) => Ok(()),
758
}
759
}
760
761
fn get_guest_phys_addr_bits(&self) -> u8 {
762
// Assume the guest physical address size is the same as the host.
763
host_phys_addr_bits()
764
}
765
}
766
767
impl VmX86_64 for WhpxVm {
768
fn get_hypervisor(&self) -> &dyn HypervisorX86_64 {
769
&self.whpx
770
}
771
772
fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
773
Ok(Box::new(WhpxVcpu::new(
774
self.vm_partition.clone(),
775
id.try_into().unwrap(),
776
)?))
777
}
778
779
/// Sets the address of the three-page region in the VM's address space.
780
/// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
781
/// WHPX.
782
fn set_tss_addr(&self, _addr: GuestAddress) -> Result<()> {
783
Ok(())
784
}
785
786
/// Sets the address of a one-page region in the VM's address space.
787
/// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
788
/// WHPX.
789
fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
790
Ok(())
791
}
792
793
fn load_protected_vm_firmware(
794
&mut self,
795
_fw_addr: GuestAddress,
796
_fw_max_size: u64,
797
) -> Result<()> {
798
// WHPX does not support protected VMs
799
Err(Error::new(libc::ENXIO))
800
}
801
}
802
803
// NOTE: WHPX Tests need to be run serially as otherwise it barfs unless we map new regions of guest
804
// memory.
805
#[cfg(test)]
806
mod tests {
807
use std::thread;
808
use std::time::Duration;
809
810
use base::EventWaitResult;
811
use base::MemoryMappingBuilder;
812
use base::SharedMemory;
813
814
use super::*;
815
816
fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
817
let whpx = Whpx::new().expect("failed to instantiate whpx");
818
let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
819
.expect("failed to get whpx features");
820
WhpxVm::new(
821
&whpx,
822
cpu_count,
823
mem,
824
CpuId::new(0),
825
local_apic_supported,
826
None,
827
)
828
.expect("failed to create whpx vm")
829
}
830
831
#[test]
832
fn create_vm() {
833
if !Whpx::is_enabled() {
834
return;
835
}
836
let cpu_count = 1;
837
let mem =
838
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
839
new_vm(cpu_count, mem);
840
}
841
842
#[test]
843
fn create_vcpu() {
844
if !Whpx::is_enabled() {
845
return;
846
}
847
let cpu_count = 1;
848
let mem =
849
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
850
let vm = new_vm(cpu_count, mem);
851
vm.create_vcpu(0).expect("failed to create vcpu");
852
}
853
854
#[test]
855
fn try_clone() {
856
if !Whpx::is_enabled() {
857
return;
858
}
859
let cpu_count = 1;
860
let mem =
861
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
862
let vm = new_vm(cpu_count, mem);
863
let _vm_clone = vm.try_clone().expect("failed to clone whpx vm");
864
}
865
866
#[test]
867
fn send_vm() {
868
if !Whpx::is_enabled() {
869
return;
870
}
871
let cpu_count = 1;
872
let mem =
873
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
874
let vm = new_vm(cpu_count, mem);
875
thread::spawn(move || {
876
let _vm = vm;
877
})
878
.join()
879
.unwrap();
880
}
881
882
#[test]
883
fn check_vm_capability() {
884
if !Whpx::is_enabled() {
885
return;
886
}
887
let cpu_count = 1;
888
let mem =
889
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
890
let vm = new_vm(cpu_count, mem);
891
assert!(vm.check_capability(VmCap::DirtyLog));
892
assert!(!vm.check_capability(VmCap::PvClock));
893
}
894
895
#[test]
896
fn dirty_log_size() {
897
let page_size = pagesize();
898
assert_eq!(dirty_log_bitmap_size(0), 0);
899
assert_eq!(dirty_log_bitmap_size(page_size), 1);
900
assert_eq!(dirty_log_bitmap_size(page_size * 8), 1);
901
assert_eq!(dirty_log_bitmap_size(page_size * 8 + 1), 2);
902
assert_eq!(dirty_log_bitmap_size(page_size * 100), 13);
903
}
904
905
#[test]
906
fn register_ioevent() {
907
if !Whpx::is_enabled() {
908
return;
909
}
910
let cpu_count = 1;
911
let mem =
912
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
913
let mut vm = new_vm(cpu_count, mem);
914
let evt = Event::new().expect("failed to create event");
915
let otherevt = Event::new().expect("failed to create event");
916
vm.register_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
917
.unwrap();
918
vm.register_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
919
.unwrap();
920
921
vm.register_ioevent(
922
&otherevt,
923
IoEventAddress::Mmio(0x1000),
924
Datamatch::AnyLength,
925
)
926
.expect_err("WHPX should not allow you to register two events for the same address");
927
928
vm.register_ioevent(
929
&otherevt,
930
IoEventAddress::Mmio(0x1000),
931
Datamatch::U8(None),
932
)
933
.expect_err(
934
"WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
935
);
936
937
vm.register_ioevent(
938
&otherevt,
939
IoEventAddress::Mmio(0x1000),
940
Datamatch::U32(Some(0xf6)),
941
)
942
.expect_err(
943
"WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
944
);
945
946
vm.unregister_ioevent(&otherevt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
947
.expect_err("unregistering an unknown event should fail");
948
vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf5), Datamatch::AnyLength)
949
.expect_err("unregistering an unknown PIO address should fail");
950
vm.unregister_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
951
.expect_err("unregistering an unknown PIO address should fail");
952
vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0xf4), Datamatch::AnyLength)
953
.expect_err("unregistering an unknown MMIO address should fail");
954
vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
955
.unwrap();
956
vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
957
.unwrap();
958
}
959
960
#[test]
961
fn handle_io_events() {
962
if !Whpx::is_enabled() {
963
return;
964
}
965
let cpu_count = 1;
966
let mem =
967
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
968
let mut vm = new_vm(cpu_count, mem);
969
let evt = Event::new().expect("failed to create event");
970
let evt2 = Event::new().expect("failed to create event");
971
vm.register_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
972
.unwrap();
973
vm.register_ioevent(&evt2, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
974
.unwrap();
975
976
// Check a pio address
977
vm.handle_io_events(IoEventAddress::Pio(0x1000), &[])
978
.expect("failed to handle_io_events");
979
assert_ne!(
980
evt.wait_timeout(Duration::from_millis(10))
981
.expect("failed to read event"),
982
EventWaitResult::TimedOut
983
);
984
assert_eq!(
985
evt2.wait_timeout(Duration::from_millis(10))
986
.expect("failed to read event"),
987
EventWaitResult::TimedOut
988
);
989
// Check an mmio address
990
vm.handle_io_events(IoEventAddress::Mmio(0x1000), &[])
991
.expect("failed to handle_io_events");
992
assert_eq!(
993
evt.wait_timeout(Duration::from_millis(10))
994
.expect("failed to read event"),
995
EventWaitResult::TimedOut
996
);
997
assert_ne!(
998
evt2.wait_timeout(Duration::from_millis(10))
999
.expect("failed to read event"),
1000
EventWaitResult::TimedOut
1001
);
1002
1003
// Check an address that does not match any registered ioevents
1004
vm.handle_io_events(IoEventAddress::Pio(0x1001), &[])
1005
.expect("failed to handle_io_events");
1006
assert_eq!(
1007
evt.wait_timeout(Duration::from_millis(10))
1008
.expect("failed to read event"),
1009
EventWaitResult::TimedOut
1010
);
1011
assert_eq!(
1012
evt2.wait_timeout(Duration::from_millis(10))
1013
.expect("failed to read event"),
1014
EventWaitResult::TimedOut
1015
);
1016
}
1017
1018
#[test]
1019
fn add_memory_ro() {
1020
if !Whpx::is_enabled() {
1021
return;
1022
}
1023
let cpu_count = 1;
1024
let mem =
1025
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1026
let mut vm = new_vm(cpu_count, mem);
1027
let mem_size = 0x1000;
1028
let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1029
let mem = MemoryMappingBuilder::new(mem_size)
1030
.from_shared_memory(&shm)
1031
.build()
1032
.unwrap();
1033
vm.add_memory_region(
1034
GuestAddress(0x1000),
1035
Box::new(mem),
1036
true,
1037
false,
1038
MemCacheType::CacheCoherent,
1039
)
1040
.unwrap();
1041
}
1042
1043
#[test]
1044
fn remove_memory() {
1045
if !Whpx::is_enabled() {
1046
return;
1047
}
1048
let cpu_count = 1;
1049
let mem =
1050
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1051
let mut vm = new_vm(cpu_count, mem);
1052
let mem_size = 0x1000;
1053
let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1054
let mem = MemoryMappingBuilder::new(mem_size)
1055
.from_shared_memory(&shm)
1056
.build()
1057
.unwrap();
1058
let mem_ptr = mem.as_ptr();
1059
let slot = vm
1060
.add_memory_region(
1061
GuestAddress(0x1000),
1062
Box::new(mem),
1063
false,
1064
false,
1065
MemCacheType::CacheCoherent,
1066
)
1067
.unwrap();
1068
let removed_mem = vm.remove_memory_region(slot).unwrap();
1069
assert_eq!(removed_mem.size(), mem_size);
1070
assert_eq!(removed_mem.as_ptr(), mem_ptr);
1071
}
1072
1073
#[test]
1074
fn remove_invalid_memory() {
1075
if !Whpx::is_enabled() {
1076
return;
1077
}
1078
let cpu_count = 1;
1079
let mem =
1080
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1081
let mut vm = new_vm(cpu_count, mem);
1082
assert!(vm.remove_memory_region(0).is_err());
1083
}
1084
1085
#[test]
1086
fn overlap_memory() {
1087
if !Whpx::is_enabled() {
1088
return;
1089
}
1090
let cpu_count = 1;
1091
let mem =
1092
GuestMemory::new(&[(GuestAddress(0), 0x10000)]).expect("failed to create guest memory");
1093
let mut vm = new_vm(cpu_count, mem);
1094
let mem_size = 0x2000;
1095
let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1096
let mem = MemoryMappingBuilder::new(mem_size)
1097
.from_shared_memory(&shm)
1098
.build()
1099
.unwrap();
1100
assert!(vm
1101
.add_memory_region(
1102
GuestAddress(0x2000),
1103
Box::new(mem),
1104
false,
1105
false,
1106
MemCacheType::CacheCoherent
1107
)
1108
.is_err());
1109
}
1110
1111
#[test]
1112
fn sync_memory() {
1113
if !Whpx::is_enabled() {
1114
return;
1115
}
1116
let cpu_count = 1;
1117
let mem =
1118
GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1119
let mut vm = new_vm(cpu_count, mem);
1120
let mem_size = 0x1000;
1121
let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1122
let mem = MemoryMappingBuilder::new(mem_size)
1123
.from_shared_memory(&shm)
1124
.build()
1125
.unwrap();
1126
let slot = vm
1127
.add_memory_region(
1128
GuestAddress(0x10000),
1129
Box::new(mem),
1130
false,
1131
false,
1132
MemCacheType::CacheCoherent,
1133
)
1134
.unwrap();
1135
vm.msync_memory_region(slot, mem_size - 1, 0).unwrap();
1136
vm.msync_memory_region(slot, 0, mem_size).unwrap();
1137
assert!(vm.msync_memory_region(slot, mem_size, 0).is_err());
1138
assert!(vm.msync_memory_region(slot + 1, mem_size, 0).is_err());
1139
}
1140
}
1141
1142