CoCalc -- vm.rs

GitHub Repository: google/crosvm
Path: blob/main/hypervisor/src/whpx/vm.rs
⁵³⁹⁴ views
1
// Copyright 2022 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4

5
use core::ffi::c_void;
6
use std::cmp::Reverse;
7
use std::collections::BTreeMap;
8
use std::collections::BinaryHeap;
9
use std::convert::TryInto;
10
use std::sync::Arc;
11

12
use base::error;
13
use base::info;
14
use base::pagesize;
15
use base::AsRawDescriptor;
16
use base::Error;
17
use base::Event;
18
use base::MappedRegion;
19
use base::MmapError;
20
use base::Protection;
21
use base::RawDescriptor;
22
use base::Result;
23
use base::SafeDescriptor;
24
use base::SendTube;
25
use fnv::FnvHashMap;
26
use libc::EEXIST;
27
use libc::EFAULT;
28
use libc::EINVAL;
29
use libc::EIO;
30
use libc::ENODEV;
31
use libc::ENOENT;
32
use libc::ENOSPC;
33
use libc::ENOTSUP;
34
use libc::EOVERFLOW;
35
use sync::Mutex;
36
use vm_memory::GuestAddress;
37
use vm_memory::GuestMemory;
38
use winapi::shared::winerror::ERROR_BUSY;
39
use winapi::shared::winerror::ERROR_SUCCESS;
40
use winapi::um::memoryapi::OfferVirtualMemory;
41
use winapi::um::memoryapi::ReclaimVirtualMemory;
42
use winapi::um::memoryapi::VmOfferPriorityBelowNormal;
43
use winapi::um::winnt::RtlZeroMemory;
44

45
use super::types::*;
46
use super::*;
47
use crate::host_phys_addr_bits;
48
use crate::whpx::whpx_sys::*;
49
use crate::BalloonEvent;
50
use crate::ClockState;
51
use crate::Datamatch;
52
use crate::DeliveryMode;
53
use crate::DestinationMode;
54
use crate::DeviceKind;
55
use crate::HypervisorKind;
56
use crate::IoEventAddress;
57
use crate::LapicState;
58
use crate::MemCacheType;
59
use crate::MemSlot;
60
use crate::TriggerMode;
61
use crate::VcpuX86_64;
62
use crate::Vm;
63
use crate::VmCap;
64
use crate::VmX86_64;
65

66
pub struct WhpxVm {
67
    whpx: Whpx,
68
    // reference counted, since we need to implement try_clone or some variation.
69
    // There is only ever 1 create/1 delete partition unlike dup/close handle variations.
70
    vm_partition: Arc<SafePartition>,
71
    guest_mem: GuestMemory,
72
    mem_regions: Arc<Mutex<BTreeMap<MemSlot, (GuestAddress, Box<dyn MappedRegion>)>>>,
73
    /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
74
    mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
75
    // WHPX's implementation of ioevents makes several assumptions about how crosvm uses ioevents:
76
    //   1. All ioevents are registered during device setup, and thus can be cloned when the vm is
77
    //      cloned instead of locked in an Arc<Mutex<>>. This will make handling ioevents in each
78
    //      vcpu thread easier because no locks will need to be acquired.
79
    //   2. All ioevents use Datamatch::AnyLength. We don't bother checking the datamatch, which
80
    //      will make this faster.
81
    //   3. We only ever register one eventfd to each address. This simplifies our data structure.
82
    ioevents: FnvHashMap<IoEventAddress, Event>,
83
    // Tube to send events to control.
84
    vm_evt_wrtube: Option<SendTube>,
85
}
86

87
impl WhpxVm {
88
    pub fn new(
89
        whpx: &Whpx,
90
        cpu_count: usize,
91
        guest_mem: GuestMemory,
92
        cpuid: CpuId,
93
        apic_emulation: bool,
94
        vm_evt_wrtube: Option<SendTube>,
95
    ) -> WhpxResult<WhpxVm> {
96
        let partition = SafePartition::new()?;
97
        // setup partition defaults.
98
        let mut property: WHV_PARTITION_PROPERTY = Default::default();
99
        property.ProcessorCount = cpu_count as u32;
100
        // safe because we own this partition, and the partition property is allocated on the stack.
101
        check_whpx!(unsafe {
102
            WHvSetPartitionProperty(
103
                partition.partition,
104
                WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeProcessorCount,
105
                &property as *const _ as *const c_void,
106
                std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
107
            )
108
        })
109
        .map_err(WhpxError::SetProcessorCount)?;
110

111
        // Pre-set any cpuid results in cpuid.
112
        let mut cpuid_results: Vec<WHV_X64_CPUID_RESULT> = cpuid
113
            .cpu_id_entries
114
            .iter()
115
            .map(WHV_X64_CPUID_RESULT::from)
116
            .collect();
117

118
        // Leaf HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS tells linux that it's running under Hyper-V.
119
        cpuid_results.push(WHV_X64_CPUID_RESULT {
120
            Function: HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
121
            Reserved: [0u32; 3],
122
            // HYPERV_CPUID_MIN is the minimum leaf that we need to support returning to the guest
123
            Eax: HYPERV_CPUID_MIN,
124
            Ebx: u32::from_le_bytes([b'M', b'i', b'c', b'r']),
125
            Ecx: u32::from_le_bytes([b'o', b's', b'o', b'f']),
126
            Edx: u32::from_le_bytes([b't', b' ', b'H', b'v']),
127
        });
128

129
        // HYPERV_CPUID_FEATURES leaf tells linux which Hyper-V features we support
130
        cpuid_results.push(WHV_X64_CPUID_RESULT {
131
            Function: HYPERV_CPUID_FEATURES,
132
            Reserved: [0u32; 3],
133
            // We only support frequency MSRs and the HV_ACCESS_TSC_INVARIANT feature, which means
134
            // TSC scaling/offseting is handled in hardware, not the guest.
135
            Eax: HV_ACCESS_FREQUENCY_MSRS
136
                | HV_ACCESS_TSC_INVARIANT
137
                | HV_MSR_REFERENCE_TSC_AVAILABLE,
138
            Ebx: 0,
139
            Edx: HV_FEATURE_FREQUENCY_MSRS_AVAILABLE,
140
            Ecx: 0,
141
        });
142

143
        // safe because we own this partition, and the cpuid_results vec is local to this function.
144
        check_whpx!(unsafe {
145
            WHvSetPartitionProperty(
146
                partition.partition,
147
                WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidResultList,
148
                cpuid_results.as_ptr() as *const _ as *const c_void,
149
                (std::mem::size_of::<WHV_X64_CPUID_RESULT>() * cpuid_results.len()) as UINT32,
150
            )
151
        })
152
        .map_err(WhpxError::SetCpuidResultList)?;
153

154
        // Setup exiting for cpuid leaves that we want crosvm to adjust, but that we can't pre-set.
155
        // We can't pre-set leaves that rely on irqchip information, and we cannot pre-set leaves
156
        // that return different results per-cpu.
157
        let exit_list: Vec<u32> = vec![0x1, 0x4, 0xB, 0x1F, 0x15];
158
        // safe because we own this partition, and the exit_list vec local to this function.
159
        check_whpx!(unsafe {
160
            WHvSetPartitionProperty(
161
                partition.partition,
162
                WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidExitList,
163
                exit_list.as_ptr() as *const _ as *const c_void,
164
                (std::mem::size_of::<u32>() * exit_list.len()) as UINT32,
165
            )
166
        })
167
        .map_err(WhpxError::SetCpuidExitList)?;
168

169
        // Setup exits for CPUID instruction.
170
        let mut property: WHV_PARTITION_PROPERTY = Default::default();
171
        // safe because we own this partition, and the partition property is allocated on the stack.
172
        unsafe {
173
            property
174
                .ExtendedVmExits
175
                .__bindgen_anon_1
176
                .set_X64CpuidExit(1);
177
            // X64MsrExit essentially causes WHPX to exit to crosvm when it would normally fail an
178
            // MSR access and inject a GP fault. Crosvm, in turn, now handles select MSR accesses
179
            // related to Hyper-V (see the handle_msr_* functions in vcpu.rs) and injects a GP
180
            // fault for any unhandled MSR accesses.
181
            property.ExtendedVmExits.__bindgen_anon_1.set_X64MsrExit(1);
182
        }
183
        // safe because we own this partition, and the partition property is allocated on the stack.
184
        check_whpx!(unsafe {
185
            WHvSetPartitionProperty(
186
                partition.partition,
187
                WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeExtendedVmExits,
188
                &property as *const _ as *const c_void,
189
                std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
190
            )
191
        })
192
        .map_err(WhpxError::SetExtendedVmExits)?;
193

194
        if apic_emulation && !Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)? {
195
            return Err(WhpxError::LocalApicEmulationNotSupported);
196
        }
197

198
        // Setup apic emulation mode
199
        let mut property: WHV_PARTITION_PROPERTY = Default::default();
200
        property.LocalApicEmulationMode = if apic_emulation {
201
            // TODO(b/180966070): figure out if x2apic emulation mode is available on the host and
202
            // enable it if it is.
203
            WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeXApic
204
        } else {
205
            WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeNone
206
        };
207

208
        // safe because we own this partition, and the partition property is allocated on the stack.
209
        check_whpx!(unsafe {
210
            WHvSetPartitionProperty(
211
                partition.partition,
212
                WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeLocalApicEmulationMode,
213
                &property as *const _ as *const c_void,
214
                std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
215
            )
216
        })
217
        .map_err(WhpxError::SetLocalApicEmulationMode)?;
218

219
        // safe because we own this partition
220
        check_whpx!(unsafe { WHvSetupPartition(partition.partition) })
221
            .map_err(WhpxError::SetupPartition)?;
222

223
        for region in guest_mem.regions() {
224
            unsafe {
225
                // Safe because the guest regions are guaranteed not to overlap.
226
                set_user_memory_region(
227
                    &partition,
228
                    false, // read_only
229
                    false, // track dirty pages
230
                    region.guest_addr.offset(),
231
                    region.size as u64,
232
                    region.host_addr as *mut u8,
233
                )
234
            }
235
            .map_err(WhpxError::MapGpaRange)?;
236
        }
237

238
        Ok(WhpxVm {
239
            whpx: whpx.clone(),
240
            vm_partition: Arc::new(partition),
241
            guest_mem,
242
            mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
243
            mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
244
            ioevents: FnvHashMap::default(),
245
            vm_evt_wrtube,
246
        })
247
    }
248

249
    /// Get the current state of the specified VCPU's local APIC
250
    pub fn get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
251
        let buffer = WhpxLapicState { regs: [0u32; 1024] };
252
        let mut written_size = 0u32;
253
        let size = std::mem::size_of::<WhpxLapicState>();
254

255
        check_whpx!(unsafe {
256
            WHvGetVirtualProcessorInterruptControllerState(
257
                self.vm_partition.partition,
258
                vcpu_id as u32,
259
                buffer.regs.as_ptr() as *mut c_void,
260
                size as u32,
261
                &mut written_size,
262
            )
263
        })?;
264

265
        Ok(LapicState::from(&buffer))
266
    }
267

268
    /// Set the current state of the specified VCPU's local APIC
269
    pub fn set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
270
        let buffer = WhpxLapicState::from(state);
271
        check_whpx!(unsafe {
272
            WHvSetVirtualProcessorInterruptControllerState(
273
                self.vm_partition.partition,
274
                vcpu_id as u32,
275
                buffer.regs.as_ptr() as *mut c_void,
276
                std::mem::size_of::<WhpxLapicState>() as u32,
277
            )
278
        })?;
279
        Ok(())
280
    }
281

282
    /// Request an interrupt be delivered to one or more virtualized interrupt controllers. This
283
    /// should only be used with ApicEmulationModeXApic or ApicEmulationModeX2Apic.
284
    pub fn request_interrupt(
285
        &self,
286
        vector: u8,
287
        dest_id: u8,
288
        dest_mode: DestinationMode,
289
        trigger: TriggerMode,
290
        delivery: DeliveryMode,
291
    ) -> Result<()> {
292
        // The WHV_INTERRUPT_CONTROL does not seem to support the dest_shorthand
293
        let mut interrupt = WHV_INTERRUPT_CONTROL {
294
            Destination: dest_id as u32,
295
            Vector: vector as u32,
296
            ..Default::default()
297
        };
298
        interrupt.set_DestinationMode(match dest_mode {
299
            DestinationMode::Physical => {
300
                WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModePhysical
301
            }
302
            DestinationMode::Logical => {
303
                WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModeLogical
304
            }
305
        } as u64);
306
        interrupt.set_TriggerMode(match trigger {
307
            TriggerMode::Edge => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeEdge,
308
            TriggerMode::Level => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeLevel,
309
        } as u64);
310
        interrupt.set_Type(match delivery {
311
            DeliveryMode::Fixed => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeFixed,
312
            DeliveryMode::Lowest => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeLowestPriority,
313
            DeliveryMode::SMI => {
314
                error!("WHPX does not support requesting an SMI");
315
                return Err(Error::new(ENOTSUP));
316
            }
317
            DeliveryMode::RemoteRead => {
318
                // This is also no longer supported by intel.
319
                error!("Remote Read interrupts are not supported by WHPX");
320
                return Err(Error::new(ENOTSUP));
321
            }
322
            DeliveryMode::NMI => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeNmi,
323
            DeliveryMode::Init => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeInit,
324
            DeliveryMode::Startup => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeSipi,
325
            DeliveryMode::External => {
326
                error!("WHPX does not support requesting an external interrupt");
327
                return Err(Error::new(ENOTSUP));
328
            }
329
        } as u64);
330

331
        check_whpx!(unsafe {
332
            WHvRequestInterrupt(
333
                self.vm_partition.partition,
334
                &interrupt,
335
                std::mem::size_of::<WHV_INTERRUPT_CONTROL>() as u32,
336
            )
337
        })
338
    }
339

340
    /// In order to fully unmap a memory range such that the host can reclaim the memory,
341
    /// we unmap it from the hypervisor partition, and then mark crosvm's process as uninterested
342
    /// in the memory.
343
    ///
344
    /// This will make crosvm unable to access the memory, and allow Windows to reclaim it for other
345
    /// uses when memory is in demand.
346
    fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
347
        info!(
348
            "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
349
            guest_address, size
350
        );
351
        // Safe because WHPX does proper error checking, even if an out-of-bounds address is
352
        // provided.
353
        unsafe {
354
            check_whpx!(WHvUnmapGpaRange(
355
                self.vm_partition.partition,
356
                guest_address.offset(),
357
                size,
358
            ))?;
359
        }
360

361
        let host_address = self
362
            .guest_mem
363
            .get_host_address(guest_address)
364
            .map_err(|_| Error::new(1))? as *mut c_void;
365

366
        // Safe because we have just successfully unmapped this range from the
367
        // guest partition, so we know it's unused.
368
        let result =
369
            unsafe { OfferVirtualMemory(host_address, size as usize, VmOfferPriorityBelowNormal) };
370

371
        if result != ERROR_SUCCESS {
372
            let err = Error::new(result);
373
            error!("Freeing memory failed with error: {}", err);
374
            return Err(err);
375
        }
376
        Ok(())
377
    }
378

379
    /// Remap memory that has previously been unmapped with #handle_inflate. Note
380
    /// that attempts to remap pages that were not previously unmapped, or addresses that are not
381
    /// page-aligned, will result in failure.
382
    ///
383
    /// To do this, reclaim the memory from Windows first, then remap it into the hypervisor
384
    /// partition. Remapped memory has no guarantee of content, and the guest should not expect
385
    /// it to.
386
    fn handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
387
        info!(
388
            "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
389
            guest_address, size
390
        );
391

392
        let host_address = self
393
            .guest_mem
394
            .get_host_address(guest_address)
395
            .map_err(|_| Error::new(1))? as *const c_void;
396

397
        // Note that we aren't doing any validation here that this range was previously unmapped.
398
        // However, we can avoid that expensive validation by relying on Windows error checking for
399
        // ReclaimVirtualMemory. The call will fail if:
400
        // - If the range is not currently "offered"
401
        // - The range is outside of current guest mem (GuestMemory will fail to convert the
402
        //   address)
403
        // In short, security is guaranteed by ensuring the guest can never reclaim ranges it
404
        // hadn't previously forfeited (and even then, the contents will be zeroed).
405
        //
406
        // Safe because the memory ranges in question are managed by Windows, not Rust.
407
        // Also, ReclaimVirtualMemory has built-in error checking for bad parameters.
408
        let result = unsafe { ReclaimVirtualMemory(host_address, size as usize) };
409

410
        if result == ERROR_BUSY || result == ERROR_SUCCESS {
411
            // In either of these cases, the contents of the reclaimed memory
412
            // are preserved or undefined. Regardless, zero the memory
413
            // to ensure no unintentional memory contents are shared.
414
            //
415
            // Safe because we just reclaimed the region in question and haven't yet remapped
416
            // it to the guest partition, so we know it's unused.
417
            unsafe { RtlZeroMemory(host_address as RawDescriptor, size as usize) };
418
        } else {
419
            let err = Error::new(result);
420
            error!("Reclaiming memory failed with error: {}", err);
421
            return Err(err);
422
        }
423

424
        // Safe because no-overlap is guaranteed by the success of ReclaimVirtualMemory,
425
        // Which would fail if it was called on areas which were not unmapped.
426
        unsafe {
427
            set_user_memory_region(
428
                &self.vm_partition,
429
                false, // read_only
430
                false, // track dirty pages
431
                guest_address.offset(),
432
                size,
433
                host_address as *mut u8,
434
            )
435
        }
436
    }
437
}
438

439
// Wrapper around WHvMapGpaRange, which creates, modifies, or deletes a mapping
440
// from guest physical to host user pages.
441
//
442
// Safe when the guest regions are guaranteed not to overlap.
443
unsafe fn set_user_memory_region(
444
    partition: &SafePartition,
445
    read_only: bool,
446
    track_dirty_pages: bool,
447
    guest_addr: u64,
448
    memory_size: u64,
449
    userspace_addr: *mut u8,
450
) -> Result<()> {
451
    let mut flags = WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagRead
452
        | WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagExecute;
453
    if !read_only {
454
        flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagWrite
455
    }
456
    if track_dirty_pages {
457
        flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagTrackDirtyPages;
458
    }
459

460
    let ret = WHvMapGpaRange(
461
        partition.partition,
462
        userspace_addr as *mut c_void,
463
        guest_addr,
464
        memory_size,
465
        flags,
466
    );
467
    check_whpx!(ret)
468
}
469

470
/// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
471
/// size.
472
///
473
/// # Arguments
474
///
475
/// * `size` - Number of bytes in the memory region being queried.
476
pub fn dirty_log_bitmap_size(size: usize) -> usize {
477
    let page_size = pagesize();
478
    (((size + page_size - 1) / page_size) + 7) / 8
479
}
480

481
impl Vm for WhpxVm {
482
    /// Makes a shallow clone of this `Vm`.
483
    fn try_clone(&self) -> Result<Self> {
484
        let mut ioevents = FnvHashMap::default();
485
        for (addr, evt) in self.ioevents.iter() {
486
            ioevents.insert(*addr, evt.try_clone()?);
487
        }
488
        Ok(WhpxVm {
489
            whpx: self.whpx.try_clone()?,
490
            vm_partition: self.vm_partition.clone(),
491
            guest_mem: self.guest_mem.clone(),
492
            mem_regions: self.mem_regions.clone(),
493
            mem_slot_gaps: self.mem_slot_gaps.clone(),
494
            ioevents,
495
            vm_evt_wrtube: self
496
                .vm_evt_wrtube
497
                .as_ref()
498
                .map(|t| t.try_clone().expect("could not clone vm_evt_wrtube")),
499
        })
500
    }
501

502
    fn try_clone_descriptor(&self) -> Result<SafeDescriptor> {
503
        Err(Error::new(ENOTSUP))
504
    }
505

506
    fn hypervisor_kind(&self) -> HypervisorKind {
507
        HypervisorKind::Whpx
508
    }
509

510
    fn check_capability(&self, c: VmCap) -> bool {
511
        match c {
512
            VmCap::DirtyLog => Whpx::check_whpx_feature(WhpxFeature::DirtyPageTracking)
513
                .unwrap_or_else(|e| {
514
                    error!(
515
                        "failed to check whpx feature {:?}: {}",
516
                        WhpxFeature::DirtyPageTracking,
517
                        e
518
                    );
519
                    false
520
                }),
521
            // there is a pvclock like thing already done w/ hyperv, but we can't get the state.
522
            VmCap::PvClock => false,
523
            VmCap::Protected => false,
524
            // whpx initializes cpuid early during VM creation.
525
            VmCap::EarlyInitCpuid => true,
526
            #[cfg(target_arch = "x86_64")]
527
            VmCap::BusLockDetect => false,
528
            VmCap::ReadOnlyMemoryRegion => true,
529
            VmCap::MemNoncoherentDma => false,
530
        }
531
    }
532

533
    fn get_memory(&self) -> &GuestMemory {
534
        &self.guest_mem
535
    }
536

537
    fn add_memory_region(
538
        &mut self,
539
        guest_addr: GuestAddress,
540
        mem: Box<dyn MappedRegion>,
541
        read_only: bool,
542
        log_dirty_pages: bool,
543
        _cache: MemCacheType,
544
    ) -> Result<MemSlot> {
545
        let size = mem.size() as u64;
546
        let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?;
547
        if self.guest_mem.range_overlap(guest_addr, end_addr) {
548
            return Err(Error::new(ENOSPC));
549
        }
550
        let mut regions = self.mem_regions.lock();
551
        let mut gaps = self.mem_slot_gaps.lock();
552
        let slot = match gaps.pop() {
553
            Some(gap) => gap.0,
554
            None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
555
        };
556

557
        // Safe because we check that the given guest address is valid and has no overlaps. We also
558
        // know that the pointer and size are correct because the MemoryMapping interface ensures
559
        // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
560
        // is removed.
561
        let res = unsafe {
562
            set_user_memory_region(
563
                &self.vm_partition,
564
                read_only,
565
                log_dirty_pages,
566
                guest_addr.offset(),
567
                size,
568
                mem.as_ptr(),
569
            )
570
        };
571

572
        if let Err(e) = res {
573
            gaps.push(Reverse(slot));
574
            return Err(e);
575
        }
576
        regions.insert(slot, (guest_addr, mem));
577
        Ok(slot)
578
    }
579

580
    fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
581
        let mut regions = self.mem_regions.lock();
582
        let (_, mem) = regions.get_mut(&slot).ok_or(Error::new(ENOENT))?;
583

584
        mem.msync(offset, size).map_err(|err| match err {
585
            MmapError::InvalidAddress => Error::new(EFAULT),
586
            MmapError::NotPageAligned => Error::new(EINVAL),
587
            MmapError::SystemCallFailed(e) => e,
588
            _ => Error::new(EIO),
589
        })
590
    }
591

592
    fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
593
        let mut regions = self.mem_regions.lock();
594
        if !regions.contains_key(&slot) {
595
            return Err(Error::new(ENOENT));
596
        }
597
        if let Some((guest_addr, mem)) = regions.get(&slot) {
598
            // Safe because the slot is checked against the list of memory slots.
599
            unsafe {
600
                check_whpx!(WHvUnmapGpaRange(
601
                    self.vm_partition.partition,
602
                    guest_addr.offset(),
603
                    mem.size() as u64,
604
                ))?;
605
            }
606
            self.mem_slot_gaps.lock().push(Reverse(slot));
607
            Ok(regions.remove(&slot).unwrap().1)
608
        } else {
609
            Err(Error::new(ENOENT))
610
        }
611
    }
612

613
    fn create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor> {
614
        // Whpx does not support in-kernel devices
615
        Err(Error::new(libc::ENXIO))
616
    }
617

618
    fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
619
        let regions = self.mem_regions.lock();
620
        if let Some((guest_addr, mem)) = regions.get(&slot) {
621
            // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
622
            if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
623
                return Err(Error::new(EINVAL));
624
            }
625
            let bitmap_size = if dirty_log.len() % 8 == 0 {
626
                dirty_log.len() / 8
627
            } else {
628
                dirty_log.len() / 8 + 1
629
            };
630
            let mut bitmap = vec![0u64; bitmap_size];
631
            check_whpx!(unsafe {
632
                WHvQueryGpaRangeDirtyBitmap(
633
                    self.vm_partition.partition,
634
                    guest_addr.offset(),
635
                    mem.size() as u64,
636
                    bitmap.as_mut_ptr() as *mut u64,
637
                    (bitmap.len() * 8) as u32,
638
                )
639
            })?;
640
            // safe because we have allocated a vec of u64, which we can cast to a u8 slice.
641
            let buffer = unsafe {
642
                std::slice::from_raw_parts(bitmap.as_ptr() as *const u8, bitmap.len() * 8)
643
            };
644
            dirty_log.copy_from_slice(&buffer[..dirty_log.len()]);
645
            Ok(())
646
        } else {
647
            Err(Error::new(ENOENT))
648
        }
649
    }
650

651
    fn register_ioevent(
652
        &mut self,
653
        evt: &Event,
654
        addr: IoEventAddress,
655
        datamatch: Datamatch,
656
    ) -> Result<()> {
657
        if datamatch != Datamatch::AnyLength {
658
            error!("WHPX currently only supports Datamatch::AnyLength");
659
            return Err(Error::new(ENOTSUP));
660
        }
661

662
        if self.ioevents.contains_key(&addr) {
663
            error!("WHPX does not support multiple ioevents for the same address");
664
            return Err(Error::new(EEXIST));
665
        }
666

667
        self.ioevents.insert(addr, evt.try_clone()?);
668

669
        Ok(())
670
    }
671

672
    fn unregister_ioevent(
673
        &mut self,
674
        evt: &Event,
675
        addr: IoEventAddress,
676
        datamatch: Datamatch,
677
    ) -> Result<()> {
678
        if datamatch != Datamatch::AnyLength {
679
            error!("WHPX only supports Datamatch::AnyLength");
680
            return Err(Error::new(ENOTSUP));
681
        }
682

683
        match self.ioevents.get(&addr) {
684
            Some(existing_evt) => {
685
                // evt should match the existing evt associated with addr
686
                if evt != existing_evt {
687
                    return Err(Error::new(ENOENT));
688
                }
689
                self.ioevents.remove(&addr);
690
            }
691

692
            None => {
693
                return Err(Error::new(ENOENT));
694
            }
695
        };
696
        Ok(())
697
    }
698

699
    /// Trigger any io events based on the memory mapped IO at `addr`.  If the hypervisor does
700
    /// in-kernel IO event delivery, this is a no-op.
701
    fn handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()> {
702
        match self.ioevents.get(&addr) {
703
            None => {}
704
            Some(evt) => {
705
                evt.signal()?;
706
            }
707
        };
708
        Ok(())
709
    }
710

711
    fn enable_hypercalls(&mut self, _nr: u64, _count: usize) -> Result<()> {
712
        Err(Error::new(ENOTSUP))
713
    }
714

715
    fn get_pvclock(&self) -> Result<ClockState> {
716
        Err(Error::new(ENODEV))
717
    }
718

719
    fn set_pvclock(&self, _state: &ClockState) -> Result<()> {
720
        Err(Error::new(ENODEV))
721
    }
722

723
    fn add_fd_mapping(
724
        &mut self,
725
        slot: u32,
726
        offset: usize,
727
        size: usize,
728
        fd: &dyn AsRawDescriptor,
729
        fd_offset: u64,
730
        prot: Protection,
731
    ) -> Result<()> {
732
        let mut regions = self.mem_regions.lock();
733
        let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
734

735
        match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
736
            Ok(()) => Ok(()),
737
            Err(MmapError::SystemCallFailed(e)) => Err(e),
738
            Err(_) => Err(Error::new(EIO)),
739
        }
740
    }
741

742
    fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
743
        let mut regions = self.mem_regions.lock();
744
        let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
745

746
        match region.remove_mapping(offset, size) {
747
            Ok(()) => Ok(()),
748
            Err(MmapError::SystemCallFailed(e)) => Err(e),
749
            Err(_) => Err(Error::new(EIO)),
750
        }
751
    }
752

753
    fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
754
        match event {
755
            BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
756
            BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
757
            BalloonEvent::BalloonTargetReached(_) => Ok(()),
758
        }
759
    }
760

761
    fn get_guest_phys_addr_bits(&self) -> u8 {
762
        // Assume the guest physical address size is the same as the host.
763
        host_phys_addr_bits()
764
    }
765
}
766

767
impl VmX86_64 for WhpxVm {
768
    fn get_hypervisor(&self) -> &dyn HypervisorX86_64 {
769
        &self.whpx
770
    }
771

772
    fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
773
        Ok(Box::new(WhpxVcpu::new(
774
            self.vm_partition.clone(),
775
            id.try_into().unwrap(),
776
        )?))
777
    }
778

779
    /// Sets the address of the three-page region in the VM's address space.
780
    /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
781
    /// WHPX.
782
    fn set_tss_addr(&self, _addr: GuestAddress) -> Result<()> {
783
        Ok(())
784
    }
785

786
    /// Sets the address of a one-page region in the VM's address space.
787
    /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
788
    /// WHPX.
789
    fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
790
        Ok(())
791
    }
792

793
    fn load_protected_vm_firmware(
794
        &mut self,
795
        _fw_addr: GuestAddress,
796
        _fw_max_size: u64,
797
    ) -> Result<()> {
798
        // WHPX does not support protected VMs
799
        Err(Error::new(libc::ENXIO))
800
    }
801
}
802

803
// NOTE: WHPX Tests need to be run serially as otherwise it barfs unless we map new regions of guest
804
// memory.
805
#[cfg(test)]
806
mod tests {
807
    use std::thread;
808
    use std::time::Duration;
809

810
    use base::EventWaitResult;
811
    use base::MemoryMappingBuilder;
812
    use base::SharedMemory;
813

814
    use super::*;
815

816
    fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
817
        let whpx = Whpx::new().expect("failed to instantiate whpx");
818
        let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
819
            .expect("failed to get whpx features");
820
        WhpxVm::new(
821
            &whpx,
822
            cpu_count,
823
            mem,
824
            CpuId::new(0),
825
            local_apic_supported,
826
            None,
827
        )
828
        .expect("failed to create whpx vm")
829
    }
830

831
    #[test]
832
    fn create_vm() {
833
        if !Whpx::is_enabled() {
834
            return;
835
        }
836
        let cpu_count = 1;
837
        let mem =
838
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
839
        new_vm(cpu_count, mem);
840
    }
841

842
    #[test]
843
    fn create_vcpu() {
844
        if !Whpx::is_enabled() {
845
            return;
846
        }
847
        let cpu_count = 1;
848
        let mem =
849
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
850
        let vm = new_vm(cpu_count, mem);
851
        vm.create_vcpu(0).expect("failed to create vcpu");
852
    }
853

854
    #[test]
855
    fn try_clone() {
856
        if !Whpx::is_enabled() {
857
            return;
858
        }
859
        let cpu_count = 1;
860
        let mem =
861
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
862
        let vm = new_vm(cpu_count, mem);
863
        let _vm_clone = vm.try_clone().expect("failed to clone whpx vm");
864
    }
865

866
    #[test]
867
    fn send_vm() {
868
        if !Whpx::is_enabled() {
869
            return;
870
        }
871
        let cpu_count = 1;
872
        let mem =
873
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
874
        let vm = new_vm(cpu_count, mem);
875
        thread::spawn(move || {
876
            let _vm = vm;
877
        })
878
        .join()
879
        .unwrap();
880
    }
881

882
    #[test]
883
    fn check_vm_capability() {
884
        if !Whpx::is_enabled() {
885
            return;
886
        }
887
        let cpu_count = 1;
888
        let mem =
889
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
890
        let vm = new_vm(cpu_count, mem);
891
        assert!(vm.check_capability(VmCap::DirtyLog));
892
        assert!(!vm.check_capability(VmCap::PvClock));
893
    }
894

895
    #[test]
896
    fn dirty_log_size() {
897
        let page_size = pagesize();
898
        assert_eq!(dirty_log_bitmap_size(0), 0);
899
        assert_eq!(dirty_log_bitmap_size(page_size), 1);
900
        assert_eq!(dirty_log_bitmap_size(page_size * 8), 1);
901
        assert_eq!(dirty_log_bitmap_size(page_size * 8 + 1), 2);
902
        assert_eq!(dirty_log_bitmap_size(page_size * 100), 13);
903
    }
904

905
    #[test]
906
    fn register_ioevent() {
907
        if !Whpx::is_enabled() {
908
            return;
909
        }
910
        let cpu_count = 1;
911
        let mem =
912
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
913
        let mut vm = new_vm(cpu_count, mem);
914
        let evt = Event::new().expect("failed to create event");
915
        let otherevt = Event::new().expect("failed to create event");
916
        vm.register_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
917
            .unwrap();
918
        vm.register_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
919
            .unwrap();
920

921
        vm.register_ioevent(
922
            &otherevt,
923
            IoEventAddress::Mmio(0x1000),
924
            Datamatch::AnyLength,
925
        )
926
        .expect_err("WHPX should not allow you to register two events for the same address");
927

928
        vm.register_ioevent(
929
            &otherevt,
930
            IoEventAddress::Mmio(0x1000),
931
            Datamatch::U8(None),
932
        )
933
        .expect_err(
934
            "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
935
        );
936

937
        vm.register_ioevent(
938
            &otherevt,
939
            IoEventAddress::Mmio(0x1000),
940
            Datamatch::U32(Some(0xf6)),
941
        )
942
        .expect_err(
943
            "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
944
        );
945

946
        vm.unregister_ioevent(&otherevt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
947
            .expect_err("unregistering an unknown event should fail");
948
        vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf5), Datamatch::AnyLength)
949
            .expect_err("unregistering an unknown PIO address should fail");
950
        vm.unregister_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
951
            .expect_err("unregistering an unknown PIO address should fail");
952
        vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0xf4), Datamatch::AnyLength)
953
            .expect_err("unregistering an unknown MMIO address should fail");
954
        vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
955
            .unwrap();
956
        vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
957
            .unwrap();
958
    }
959

960
    #[test]
961
    fn handle_io_events() {
962
        if !Whpx::is_enabled() {
963
            return;
964
        }
965
        let cpu_count = 1;
966
        let mem =
967
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
968
        let mut vm = new_vm(cpu_count, mem);
969
        let evt = Event::new().expect("failed to create event");
970
        let evt2 = Event::new().expect("failed to create event");
971
        vm.register_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
972
            .unwrap();
973
        vm.register_ioevent(&evt2, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
974
            .unwrap();
975

976
        // Check a pio address
977
        vm.handle_io_events(IoEventAddress::Pio(0x1000), &[])
978
            .expect("failed to handle_io_events");
979
        assert_ne!(
980
            evt.wait_timeout(Duration::from_millis(10))
981
                .expect("failed to read event"),
982
            EventWaitResult::TimedOut
983
        );
984
        assert_eq!(
985
            evt2.wait_timeout(Duration::from_millis(10))
986
                .expect("failed to read event"),
987
            EventWaitResult::TimedOut
988
        );
989
        // Check an mmio address
990
        vm.handle_io_events(IoEventAddress::Mmio(0x1000), &[])
991
            .expect("failed to handle_io_events");
992
        assert_eq!(
993
            evt.wait_timeout(Duration::from_millis(10))
994
                .expect("failed to read event"),
995
            EventWaitResult::TimedOut
996
        );
997
        assert_ne!(
998
            evt2.wait_timeout(Duration::from_millis(10))
999
                .expect("failed to read event"),
1000
            EventWaitResult::TimedOut
1001
        );
1002

1003
        // Check an address that does not match any registered ioevents
1004
        vm.handle_io_events(IoEventAddress::Pio(0x1001), &[])
1005
            .expect("failed to handle_io_events");
1006
        assert_eq!(
1007
            evt.wait_timeout(Duration::from_millis(10))
1008
                .expect("failed to read event"),
1009
            EventWaitResult::TimedOut
1010
        );
1011
        assert_eq!(
1012
            evt2.wait_timeout(Duration::from_millis(10))
1013
                .expect("failed to read event"),
1014
            EventWaitResult::TimedOut
1015
        );
1016
    }
1017

1018
    #[test]
1019
    fn add_memory_ro() {
1020
        if !Whpx::is_enabled() {
1021
            return;
1022
        }
1023
        let cpu_count = 1;
1024
        let mem =
1025
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1026
        let mut vm = new_vm(cpu_count, mem);
1027
        let mem_size = 0x1000;
1028
        let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1029
        let mem = MemoryMappingBuilder::new(mem_size)
1030
            .from_shared_memory(&shm)
1031
            .build()
1032
            .unwrap();
1033
        vm.add_memory_region(
1034
            GuestAddress(0x1000),
1035
            Box::new(mem),
1036
            true,
1037
            false,
1038
            MemCacheType::CacheCoherent,
1039
        )
1040
        .unwrap();
1041
    }
1042

1043
    #[test]
1044
    fn remove_memory() {
1045
        if !Whpx::is_enabled() {
1046
            return;
1047
        }
1048
        let cpu_count = 1;
1049
        let mem =
1050
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1051
        let mut vm = new_vm(cpu_count, mem);
1052
        let mem_size = 0x1000;
1053
        let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1054
        let mem = MemoryMappingBuilder::new(mem_size)
1055
            .from_shared_memory(&shm)
1056
            .build()
1057
            .unwrap();
1058
        let mem_ptr = mem.as_ptr();
1059
        let slot = vm
1060
            .add_memory_region(
1061
                GuestAddress(0x1000),
1062
                Box::new(mem),
1063
                false,
1064
                false,
1065
                MemCacheType::CacheCoherent,
1066
            )
1067
            .unwrap();
1068
        let removed_mem = vm.remove_memory_region(slot).unwrap();
1069
        assert_eq!(removed_mem.size(), mem_size);
1070
        assert_eq!(removed_mem.as_ptr(), mem_ptr);
1071
    }
1072

1073
    #[test]
1074
    fn remove_invalid_memory() {
1075
        if !Whpx::is_enabled() {
1076
            return;
1077
        }
1078
        let cpu_count = 1;
1079
        let mem =
1080
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1081
        let mut vm = new_vm(cpu_count, mem);
1082
        assert!(vm.remove_memory_region(0).is_err());
1083
    }
1084

1085
    #[test]
1086
    fn overlap_memory() {
1087
        if !Whpx::is_enabled() {
1088
            return;
1089
        }
1090
        let cpu_count = 1;
1091
        let mem =
1092
            GuestMemory::new(&[(GuestAddress(0), 0x10000)]).expect("failed to create guest memory");
1093
        let mut vm = new_vm(cpu_count, mem);
1094
        let mem_size = 0x2000;
1095
        let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1096
        let mem = MemoryMappingBuilder::new(mem_size)
1097
            .from_shared_memory(&shm)
1098
            .build()
1099
            .unwrap();
1100
        assert!(vm
1101
            .add_memory_region(
1102
                GuestAddress(0x2000),
1103
                Box::new(mem),
1104
                false,
1105
                false,
1106
                MemCacheType::CacheCoherent
1107
            )
1108
            .is_err());
1109
    }
1110

1111
    #[test]
1112
    fn sync_memory() {
1113
        if !Whpx::is_enabled() {
1114
            return;
1115
        }
1116
        let cpu_count = 1;
1117
        let mem =
1118
            GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1119
        let mut vm = new_vm(cpu_count, mem);
1120
        let mem_size = 0x1000;
1121
        let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1122
        let mem = MemoryMappingBuilder::new(mem_size)
1123
            .from_shared_memory(&shm)
1124
            .build()
1125
            .unwrap();
1126
        let slot = vm
1127
            .add_memory_region(
1128
                GuestAddress(0x10000),
1129
                Box::new(mem),
1130
                false,
1131
                false,
1132
                MemCacheType::CacheCoherent,
1133
            )
1134
            .unwrap();
1135
        vm.msync_memory_region(slot, mem_size - 1, 0).unwrap();
1136
        vm.msync_memory_region(slot, 0, mem_size).unwrap();
1137
        assert!(vm.msync_memory_region(slot, mem_size, 0).is_err());
1138
        assert!(vm.msync_memory_region(slot + 1, mem_size, 0).is_err());
1139
    }
1140
}
1141

1142
Product

Resources

Company