// Copyright 2020 The ChromiumOS Authors1// Use of this source code is governed by a BSD-style license that can be2// found in the LICENSE file.34use std::arch::x86_64::CpuidResult;5#[cfg(any(unix, feature = "haxm", feature = "whpx"))]6use std::arch::x86_64::__cpuid;7use std::arch::x86_64::_rdtsc;8use std::collections::BTreeMap;9use std::collections::HashSet;1011use anyhow::Context;12use base::custom_serde::deserialize_seq_to_arr;13use base::custom_serde::serialize_arr;14use base::error;15use base::warn;16use base::Result;17use bit_field::*;18use downcast_rs::impl_downcast;19use libc::c_void;20use serde::Deserialize;21use serde::Serialize;22use snapshot::AnySnapshot;23use vm_memory::GuestAddress;2425use crate::Hypervisor;26use crate::IrqRoute;27use crate::IrqSource;28use crate::IrqSourceChip;29use crate::Vcpu;30use crate::Vm;3132const MSR_F15H_PERF_CTL0: u32 = 0xc0010200;33const MSR_F15H_PERF_CTL1: u32 = 0xc0010202;34const MSR_F15H_PERF_CTL2: u32 = 0xc0010204;35const MSR_F15H_PERF_CTL3: u32 = 0xc0010206;36const MSR_F15H_PERF_CTL4: u32 = 0xc0010208;37const MSR_F15H_PERF_CTL5: u32 = 0xc001020a;38const MSR_F15H_PERF_CTR0: u32 = 0xc0010201;39const MSR_F15H_PERF_CTR1: u32 = 0xc0010203;40const MSR_F15H_PERF_CTR2: u32 = 0xc0010205;41const MSR_F15H_PERF_CTR3: u32 = 0xc0010207;42const MSR_F15H_PERF_CTR4: u32 = 0xc0010209;43const MSR_F15H_PERF_CTR5: u32 = 0xc001020b;44const MSR_IA32_PERF_CAPABILITIES: u32 = 0x00000345;4546/// A trait for managing cpuids for an x86_64 hypervisor and for checking its capabilities.47pub trait HypervisorX86_64: Hypervisor {48/// Get the system supported CPUID values.49fn get_supported_cpuid(&self) -> Result<CpuId>;5051/// Gets the list of supported MSRs.52fn get_msr_index_list(&self) -> Result<Vec<u32>>;53}5455/// A wrapper for using a VM on x86_64 and getting/setting its state.56pub trait VmX86_64: Vm {57/// Gets the `HypervisorX86_64` that created this VM.58fn get_hypervisor(&self) -> &dyn HypervisorX86_64;5960/// Create a Vcpu with the specified Vcpu ID.61fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>;6263/// Sets the address of the three-page region in the VM's address space.64fn set_tss_addr(&self, addr: GuestAddress) -> Result<()>;6566/// Sets the address of a one-page region in the VM's address space.67fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>;6869/// Load pVM firmware for the VM, creating a memslot for it as needed.70///71/// Only works on protected VMs (i.e. those with vm_type == KVM_X86_PKVM_PROTECTED_VM).72fn load_protected_vm_firmware(&mut self, fw_addr: GuestAddress, fw_max_size: u64)73-> Result<()>;74}7576/// A wrapper around creating and using a VCPU on x86_64.77pub trait VcpuX86_64: Vcpu {78/// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject79/// interrupts into the guest.80fn set_interrupt_window_requested(&self, requested: bool);8182/// Checks if we can inject an interrupt into the VCPU.83fn ready_for_interrupt(&self) -> bool;8485/// Injects interrupt vector `irq` into the VCPU.86///87/// This function should only be called when [`Self::ready_for_interrupt`] returns true.88/// Otherwise the interrupt injection may fail or the next VCPU run may fail. However, if89/// [`Self::interrupt`] returns [`Ok`], the implementation must guarantee that the interrupt90/// isn't injected in an uninterruptible window (e.g. right after the mov ss instruction).91///92/// The caller should avoid calling this function more than 1 time for one VMEXIT, because the93/// hypervisor may behave differently: some hypervisors(e.g. WHPX, KVM) will only try to inject94/// the last `irq` requested, while some other hypervisors(e.g. HAXM) may try to inject all95/// `irq`s requested.96fn interrupt(&self, irq: u8) -> Result<()>;9798/// Injects a non-maskable interrupt into the VCPU.99fn inject_nmi(&self) -> Result<()>;100101/// Gets the VCPU general purpose registers.102fn get_regs(&self) -> Result<Regs>;103104/// Sets the VCPU general purpose registers.105fn set_regs(&self, regs: &Regs) -> Result<()>;106107/// Gets the VCPU special registers.108fn get_sregs(&self) -> Result<Sregs>;109110/// Sets the VCPU special registers.111fn set_sregs(&self, sregs: &Sregs) -> Result<()>;112113/// Gets the VCPU FPU registers.114fn get_fpu(&self) -> Result<Fpu>;115116/// Sets the VCPU FPU registers.117fn set_fpu(&self, fpu: &Fpu) -> Result<()>;118119/// Gets the VCPU debug registers.120fn get_debugregs(&self) -> Result<DebugRegs>;121122/// Sets the VCPU debug registers.123fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>;124125/// Gets the VCPU extended control registers.126fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>>;127128/// Sets a VCPU extended control register.129fn set_xcr(&self, xcr: u32, value: u64) -> Result<()>;130131/// Gets the VCPU x87 FPU, MMX, XMM, YMM and MXCSR registers.132fn get_xsave(&self) -> Result<Xsave>;133134/// Sets the VCPU x87 FPU, MMX, XMM, YMM and MXCSR registers.135fn set_xsave(&self, xsave: &Xsave) -> Result<()>;136137/// Gets hypervisor specific state for this VCPU that must be138/// saved/restored for snapshotting.139/// This state is fetched after VCPUs are frozen and interrupts are flushed.140fn get_hypervisor_specific_state(&self) -> Result<AnySnapshot>;141142/// Sets hypervisor specific state for this VCPU. Only used for143/// snapshotting.144fn set_hypervisor_specific_state(&self, data: AnySnapshot) -> Result<()>;145146/// Gets a single model-specific register's value.147fn get_msr(&self, msr_index: u32) -> Result<u64>;148149/// Gets the model-specific registers. Returns all the MSRs for the VCPU.150fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>;151152/// Sets a single model-specific register's value.153fn set_msr(&self, msr_index: u32, value: u64) -> Result<()>;154155/// Sets up the data returned by the CPUID instruction.156fn set_cpuid(&self, cpuid: &CpuId) -> Result<()>;157158/// Sets up debug registers and configure vcpu for handling guest debug events.159fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>;160161/// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`162/// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function163/// will then set the appropriate registers on the vcpu.164fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>;165166/// Gets the guest->host TSC offset.167///168/// The default implementation uses [`VcpuX86_64::get_msr()`] to read the guest TSC.169fn get_tsc_offset(&self) -> Result<u64> {170// SAFETY:171// Safe because _rdtsc takes no arguments172let host_before_tsc = unsafe { _rdtsc() };173174// get guest TSC value from our hypervisor175let guest_tsc = self.get_msr(crate::MSR_IA32_TSC)?;176177// SAFETY:178// Safe because _rdtsc takes no arguments179let host_after_tsc = unsafe { _rdtsc() };180181// Average the before and after host tsc to get the best value182let host_tsc = ((host_before_tsc as u128 + host_after_tsc as u128) / 2) as u64;183184Ok(guest_tsc.wrapping_sub(host_tsc))185}186187/// Sets the guest->host TSC offset.188///189/// The default implementation uses [`VcpuX86_64::set_tsc_value()`] to set the TSC value.190///191/// It sets TSC_OFFSET (VMCS / CB field) by setting the TSC MSR to the current192/// host TSC value plus the desired offset. We rely on the fact that hypervisors193/// determine the value of TSC_OFFSET by computing TSC_OFFSET = `new_tsc_value - _rdtsc()` =194/// `_rdtsc() + offset - _rdtsc()` ~= `offset`. Note that the ~= is important: this is an195/// approximate operation, because the two _rdtsc() calls196/// are separated by at least a few ticks.197///198/// Note: TSC_OFFSET, host TSC, guest TSC, and TSC MSR are all different199/// concepts.200/// * When a guest executes rdtsc, the value (guest TSC) returned is host_tsc * TSC_MULTIPLIER +201/// TSC_OFFSET + TSC_ADJUST.202/// * The TSC MSR is a special MSR that when written to by the host, will cause TSC_OFFSET to be203/// set accordingly by the hypervisor.204/// * When the guest *writes* to TSC MSR, it actually changes the TSC_ADJUST MSR *for the205/// guest*. Generally this is only happens if the guest is trying to re-zero or synchronize206/// TSCs.207fn set_tsc_offset(&self, offset: u64) -> Result<()> {208// SAFETY: _rdtsc takes no arguments.209let host_tsc = unsafe { _rdtsc() };210self.set_tsc_value(host_tsc.wrapping_add(offset))211}212213/// Sets the guest TSC exactly to the provided value.214///215/// The default implementation sets the guest's TSC by writing the value to the MSR directly.216///217/// See [`VcpuX86_64::set_tsc_offset()`] for an explanation of how this value is actually read218/// by the guest after being set.219fn set_tsc_value(&self, value: u64) -> Result<()> {220self.set_msr(crate::MSR_IA32_TSC, value)221}222223/// Some hypervisors require special handling to restore timekeeping when224/// a snapshot is restored. They are provided with a host TSC reference225/// moment, guaranteed to be the same across all Vcpus, and the Vcpu's TSC226/// offset at the moment it was snapshotted.227fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>;228229/// Snapshot vCPU state230fn snapshot(&self) -> anyhow::Result<VcpuSnapshot> {231Ok(VcpuSnapshot {232vcpu_id: self.id(),233regs: self.get_regs()?,234sregs: self.get_sregs()?,235debug_regs: self.get_debugregs()?,236xcrs: self.get_xcrs()?,237msrs: self.get_all_msrs()?,238xsave: self.get_xsave()?,239hypervisor_data: self.get_hypervisor_specific_state()?,240tsc_offset: self.get_tsc_offset()?,241})242}243244fn restore(245&mut self,246snapshot: &VcpuSnapshot,247host_tsc_reference_moment: u64,248) -> anyhow::Result<()> {249// List of MSRs that may fail to restore due to lack of support in the host kernel.250// Some hosts are may be running older kernels which do not support all MSRs, but251// get_all_msrs will still fetch the MSRs supported by the CPU. Trying to set those MSRs252// will result in failures, so they will throw a warning instead.253let msr_allowlist = HashSet::from([254MSR_F15H_PERF_CTL0,255MSR_F15H_PERF_CTL1,256MSR_F15H_PERF_CTL2,257MSR_F15H_PERF_CTL3,258MSR_F15H_PERF_CTL4,259MSR_F15H_PERF_CTL5,260MSR_F15H_PERF_CTR0,261MSR_F15H_PERF_CTR1,262MSR_F15H_PERF_CTR2,263MSR_F15H_PERF_CTR3,264MSR_F15H_PERF_CTR4,265MSR_F15H_PERF_CTR5,266MSR_IA32_PERF_CAPABILITIES,267]);268assert_eq!(snapshot.vcpu_id, self.id());269self.set_regs(&snapshot.regs)?;270self.set_sregs(&snapshot.sregs)?;271self.set_debugregs(&snapshot.debug_regs)?;272for (xcr_index, value) in &snapshot.xcrs {273self.set_xcr(*xcr_index, *value)?;274}275276for (msr_index, value) in snapshot.msrs.iter() {277if self.get_msr(*msr_index) == Ok(*value) {278continue; // no need to set MSR since the values are the same.279}280if let Err(e) = self.set_msr(*msr_index, *value) {281if msr_allowlist.contains(msr_index) {282warn!(283"Failed to set MSR. MSR might not be supported in this kernel. Err: {}",284e285);286} else {287return Err(e).context(288"Failed to set MSR. MSR might not be supported by the CPU or by the kernel,289and was not allow-listed.",290);291}292};293}294self.set_xsave(&snapshot.xsave)?;295self.set_hypervisor_specific_state(snapshot.hypervisor_data.clone())?;296self.restore_timekeeping(host_tsc_reference_moment, snapshot.tsc_offset)?;297Ok(())298}299}300301/// x86 specific vCPU snapshot.302#[derive(Clone, Debug, Serialize, Deserialize)]303pub struct VcpuSnapshot {304pub vcpu_id: usize,305regs: Regs,306sregs: Sregs,307debug_regs: DebugRegs,308xcrs: BTreeMap<u32, u64>,309msrs: BTreeMap<u32, u64>,310xsave: Xsave,311hypervisor_data: AnySnapshot,312tsc_offset: u64,313}314315impl_downcast!(VcpuX86_64);316317// TSC MSR318pub const MSR_IA32_TSC: u32 = 0x00000010;319320/// Gets host cpu max physical address bits.321#[cfg(any(unix, feature = "haxm", feature = "whpx"))]322pub(crate) fn host_phys_addr_bits() -> u8 {323// SAFETY: trivially safe324let highest_ext_function = unsafe { __cpuid(0x80000000) };325if highest_ext_function.eax >= 0x80000008 {326// SAFETY: trivially safe327let addr_size = unsafe { __cpuid(0x80000008) };328// Low 8 bits of 0x80000008 leaf: host physical address size in bits.329addr_size.eax as u8330} else {33136332}333}334335/// Initial state for x86_64 VCPUs.336#[derive(Clone, Default)]337pub struct VcpuInitX86_64 {338/// General-purpose registers.339pub regs: Regs,340341/// Special registers.342pub sregs: Sregs,343344/// Floating-point registers.345pub fpu: Fpu,346347/// Machine-specific registers.348pub msrs: BTreeMap<u32, u64>,349}350351/// Hold the CPU feature configurations that are needed to setup a vCPU.352#[derive(Clone, Debug, PartialEq, Eq)]353pub struct CpuConfigX86_64 {354/// whether to force using a calibrated TSC leaf (0x15).355pub force_calibrated_tsc_leaf: bool,356357/// whether enabling host cpu topology.358pub host_cpu_topology: bool,359360/// whether expose HWP feature to the guest.361pub enable_hwp: bool,362363/// Wheter diabling SMT (Simultaneous Multithreading).364pub no_smt: bool,365366/// whether enabling ITMT scheduler367pub itmt: bool,368369/// whether setting hybrid CPU type370pub hybrid_type: Option<CpuHybridType>,371}372373impl CpuConfigX86_64 {374pub fn new(375force_calibrated_tsc_leaf: bool,376host_cpu_topology: bool,377enable_hwp: bool,378no_smt: bool,379itmt: bool,380hybrid_type: Option<CpuHybridType>,381) -> Self {382CpuConfigX86_64 {383force_calibrated_tsc_leaf,384host_cpu_topology,385enable_hwp,386no_smt,387itmt,388hybrid_type,389}390}391}392393/// A CpuId Entry contains supported feature information for the given processor.394/// This can be modified by the hypervisor to pass additional information to the guest kernel395/// about the hypervisor or vm. Information is returned in the eax, ebx, ecx and edx registers396/// by the cpu for a given function and index/subfunction (passed into the cpu via the eax and ecx397/// register respectively).398#[repr(C)]399#[derive(Clone, Copy, Debug, PartialEq, Eq)]400pub struct CpuIdEntry {401pub function: u32,402pub index: u32,403// flags is needed for KVM. We store it on CpuIdEntry to preserve the flags across404// get_supported_cpuids() -> kvm_cpuid2 -> CpuId -> kvm_cpuid2 -> set_cpuid().405pub flags: u32,406pub cpuid: CpuidResult,407}408409/// A container for the list of cpu id entries for the hypervisor and underlying cpu.410pub struct CpuId {411pub cpu_id_entries: Vec<CpuIdEntry>,412}413414impl CpuId {415/// Constructs a new CpuId, with space allocated for `initial_capacity` CpuIdEntries.416pub fn new(initial_capacity: usize) -> Self {417CpuId {418cpu_id_entries: Vec::with_capacity(initial_capacity),419}420}421}422423#[bitfield]424#[derive(Clone, Copy, Debug, PartialEq, Eq)]425pub enum DestinationMode {426Physical = 0,427Logical = 1,428}429430#[bitfield]431#[derive(Clone, Copy, Debug, PartialEq, Eq)]432pub enum TriggerMode {433Edge = 0,434Level = 1,435}436437#[bitfield]438#[derive(Debug, Clone, Copy, PartialEq, Eq)]439pub enum DeliveryMode {440Fixed = 0b000,441Lowest = 0b001,442SMI = 0b010, // System management interrupt443RemoteRead = 0b011, // This is no longer supported by intel.444NMI = 0b100, // Non maskable interrupt445Init = 0b101,446Startup = 0b110,447External = 0b111,448}449450// These MSI structures are for Intel's implementation of MSI. The PCI spec defines most of MSI,451// but the Intel spec defines the format of messages for raising interrupts. The PCI spec defines452// three u32s -- the address, address_high, and data -- but Intel only makes use of the address and453// data. The Intel portion of the specification is in Volume 3 section 10.11.454#[bitfield]455#[derive(Clone, Copy, PartialEq, Eq)]456pub struct MsiAddressMessage {457pub reserved: BitField2,458#[bits = 1]459pub destination_mode: DestinationMode,460pub redirection_hint: BitField1,461pub reserved_2: BitField8,462pub destination_id: BitField8,463// According to Intel's implementation of MSI, these bits must always be 0xfee.464pub always_0xfee: BitField12,465}466467#[bitfield]468#[derive(Clone, Copy, PartialEq, Eq)]469pub struct MsiDataMessage {470pub vector: BitField8,471#[bits = 3]472pub delivery_mode: DeliveryMode,473pub reserved: BitField3,474#[bits = 1]475pub level: Level,476#[bits = 1]477pub trigger: TriggerMode,478pub reserved2: BitField16,479}480481#[bitfield]482#[derive(Debug, Clone, Copy, PartialEq, Eq)]483pub enum DeliveryStatus {484Idle = 0,485Pending = 1,486}487488/// The level of a level-triggered interrupt: asserted or deasserted.489#[bitfield]490#[derive(Debug, Clone, Copy, PartialEq, Eq)]491pub enum Level {492Deassert = 0,493Assert = 1,494}495496/// Represents a IOAPIC redirection table entry.497#[bitfield]498#[derive(Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]499pub struct IoapicRedirectionTableEntry {500vector: BitField8,501#[bits = 3]502delivery_mode: DeliveryMode,503#[bits = 1]504dest_mode: DestinationMode,505#[bits = 1]506delivery_status: DeliveryStatus,507polarity: BitField1,508remote_irr: bool,509#[bits = 1]510trigger_mode: TriggerMode,511interrupt_mask: bool, // true iff interrupts are masked.512reserved: BitField39,513dest_id: BitField8,514}515516/// Number of pins on the standard KVM/IOAPIC.517pub const NUM_IOAPIC_PINS: usize = 24;518519/// Represents the state of the IOAPIC.520#[repr(C)]521#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]522pub struct IoapicState {523/// base_address is the memory base address for this IOAPIC. It cannot be changed.524pub base_address: u64,525/// ioregsel register. Used for selecting which entry of the redirect table to read/write.526pub ioregsel: u8,527/// ioapicid register. Bits 24 - 27 contain the APIC ID for this device.528pub ioapicid: u32,529/// current_interrupt_level_bitmap represents a bitmap of the state of all of the irq lines530pub current_interrupt_level_bitmap: u32,531/// redirect_table contains the irq settings for each irq line532#[serde(533serialize_with = "serialize_arr",534deserialize_with = "deserialize_seq_to_arr"535)]536pub redirect_table: [IoapicRedirectionTableEntry; NUM_IOAPIC_PINS],537}538539impl Default for IoapicState {540fn default() -> IoapicState {541// SAFETY: trivially safe542unsafe { std::mem::zeroed() }543}544}545546#[repr(C)]547#[derive(Debug, Clone, Copy, PartialEq, Eq)]548pub enum PicSelect {549Primary = 0,550Secondary = 1,551}552553#[repr(C)]554#[derive(enumn::N, Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]555pub enum PicInitState {556#[default]557Icw1 = 0,558Icw2 = 1,559Icw3 = 2,560Icw4 = 3,561}562563/// Convenience implementation for converting from a u8564impl From<u8> for PicInitState {565fn from(item: u8) -> Self {566PicInitState::n(item).unwrap_or_else(|| {567error!("Invalid PicInitState {}, setting to 0", item);568PicInitState::Icw1569})570}571}572573/// Represents the state of the PIC.574#[repr(C)]575#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, Serialize, Deserialize)]576pub struct PicState {577/// Edge detection.578pub last_irr: u8,579/// Interrupt Request Register.580pub irr: u8,581/// Interrupt Mask Register.582pub imr: u8,583/// Interrupt Service Register.584pub isr: u8,585/// Highest priority, for priority rotation.586pub priority_add: u8,587pub irq_base: u8,588pub read_reg_select: bool,589pub poll: bool,590pub special_mask: bool,591pub init_state: PicInitState,592pub auto_eoi: bool,593pub rotate_on_auto_eoi: bool,594pub special_fully_nested_mode: bool,595/// PIC takes either 3 or 4 bytes of initialization command word during596/// initialization. use_4_byte_icw is true if 4 bytes of ICW are needed.597pub use_4_byte_icw: bool,598/// "Edge/Level Control Registers", for edge trigger selection.599/// When a particular bit is set, the corresponding IRQ is in level-triggered mode. Otherwise600/// it is in edge-triggered mode.601pub elcr: u8,602pub elcr_mask: u8,603}604605/// The LapicState represents the state of an x86 CPU's Local APIC.606/// The Local APIC consists of 64 128-bit registers, but only the first 32-bits of each register607/// can be used, so this structure only stores the first 32-bits of each register.608#[repr(C)]609#[derive(Clone, Copy, Serialize, Deserialize)]610pub struct LapicState {611#[serde(612serialize_with = "serialize_arr",613deserialize_with = "deserialize_seq_to_arr"614)]615pub regs: [LapicRegister; 64],616}617618pub type LapicRegister = u32;619620// rust arrays longer than 32 need custom implementations of Debug621impl std::fmt::Debug for LapicState {622fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {623self.regs[..].fmt(formatter)624}625}626627// rust arrays longer than 32 need custom implementations of PartialEq628impl PartialEq for LapicState {629fn eq(&self, other: &LapicState) -> bool {630self.regs[..] == other.regs[..]631}632}633634// Lapic equality is reflexive, so we impl Eq635impl Eq for LapicState {}636637/// The PitState represents the state of the PIT (aka the Programmable Interval Timer).638/// The state is simply the state of it's three channels.639#[repr(C)]640#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]641pub struct PitState {642pub channels: [PitChannelState; 3],643/// Hypervisor-specific flags for setting the pit state.644pub flags: u32,645}646647/// The PitRWMode enum represents the access mode of a PIT channel.648/// Reads and writes to the Pit happen over Port-mapped I/O, which happens one byte at a time,649/// but the count values and latch values are two bytes. So the access mode controls which of the650/// two bytes will be read when.651#[repr(C)]652#[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]653pub enum PitRWMode {654/// None mode means that no access mode has been set.655None = 0,656/// Least mode means all reads/writes will read/write the least significant byte.657Least = 1,658/// Most mode means all reads/writes will read/write the most significant byte.659Most = 2,660/// Both mode means first the least significant byte will be read/written, then the661/// next read/write will read/write the most significant byte.662Both = 3,663}664665/// Convenience implementation for converting from a u8666impl From<u8> for PitRWMode {667fn from(item: u8) -> Self {668PitRWMode::n(item).unwrap_or_else(|| {669error!("Invalid PitRWMode value {}, setting to 0", item);670PitRWMode::None671})672}673}674675/// The PitRWState enum represents the state of reading to or writing from a channel.676/// This is related to the PitRWMode, it mainly gives more detail about the state of the channel677/// with respect to PitRWMode::Both.678#[repr(C)]679#[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]680pub enum PitRWState {681/// None mode means that no access mode has been set.682None = 0,683/// LSB means that the channel is in PitRWMode::Least access mode.684LSB = 1,685/// MSB means that the channel is in PitRWMode::Most access mode.686MSB = 2,687/// Word0 means that the channel is in PitRWMode::Both mode, and the least sginificant byte688/// has not been read/written yet.689Word0 = 3,690/// Word1 means that the channel is in PitRWMode::Both mode and the least significant byte691/// has already been read/written, and the next byte to be read/written will be the most692/// significant byte.693Word1 = 4,694}695696/// Convenience implementation for converting from a u8697impl From<u8> for PitRWState {698fn from(item: u8) -> Self {699PitRWState::n(item).unwrap_or_else(|| {700error!("Invalid PitRWState value {}, setting to 0", item);701PitRWState::None702})703}704}705706/// The PitChannelState represents the state of one of the PIT's three counters.707#[repr(C)]708#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]709pub struct PitChannelState {710/// The starting value for the counter.711pub count: u32,712/// Stores the channel count from the last time the count was latched.713pub latched_count: u16,714/// Indicates the PitRWState state of reading the latch value.715pub count_latched: PitRWState,716/// Indicates whether ReadBack status has been latched.717pub status_latched: bool,718/// Stores the channel status from the last time the status was latched. The status contains719/// information about the access mode of this channel, but changing those bits in the status720/// will not change the behavior of the pit.721pub status: u8,722/// Indicates the PitRWState state of reading the counter.723pub read_state: PitRWState,724/// Indicates the PitRWState state of writing the counter.725pub write_state: PitRWState,726/// Stores the value with which the counter was initialized. Counters are 16-727/// bit values with an effective range of 1-65536 (65536 represented by 0).728pub reload_value: u16,729/// The command access mode of this channel.730pub rw_mode: PitRWMode,731/// The operation mode of this channel.732pub mode: u8,733/// Whether or not we are in bcd mode. Not supported by KVM or crosvm's PIT implementation.734pub bcd: bool,735/// Value of the gate input pin. This only applies to channel 2.736pub gate: bool,737/// Nanosecond timestamp of when the count value was loaded.738pub count_load_time: u64,739}740741// Convenience constructors for IrqRoutes742impl IrqRoute {743pub fn ioapic_irq_route(irq_num: u32) -> IrqRoute {744IrqRoute {745gsi: irq_num,746source: IrqSource::Irqchip {747chip: IrqSourceChip::Ioapic,748pin: irq_num,749},750}751}752753pub fn pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute {754IrqRoute {755gsi: irq_num,756source: IrqSource::Irqchip {757chip: id,758pin: irq_num % 8,759},760}761}762}763764/// State of a VCPU's general purpose registers.765#[repr(C)]766#[derive(Debug, Copy, Clone, Serialize, Deserialize)]767pub struct Regs {768pub rax: u64,769pub rbx: u64,770pub rcx: u64,771pub rdx: u64,772pub rsi: u64,773pub rdi: u64,774pub rsp: u64,775pub rbp: u64,776pub r8: u64,777pub r9: u64,778pub r10: u64,779pub r11: u64,780pub r12: u64,781pub r13: u64,782pub r14: u64,783pub r15: u64,784pub rip: u64,785pub rflags: u64,786}787788impl Default for Regs {789fn default() -> Self {790Regs {791rax: 0,792rbx: 0,793rcx: 0,794rdx: 0,795rsi: 0,796rdi: 0,797rsp: 0,798rbp: 0,799r8: 0,800r9: 0,801r10: 0,802r11: 0,803r12: 0,804r13: 0,805r14: 0,806r15: 0,807rip: 0xfff0, // Reset vector.808rflags: 0x2, // Bit 1 (0x2) is always 1.809}810}811}812813/// State of a memory segment.814#[repr(C)]815#[derive(Debug, Default, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]816pub struct Segment {817pub base: u64,818/// Limit of the segment - always in bytes, regardless of granularity (`g`) field.819pub limit_bytes: u32,820pub selector: u16,821pub type_: u8,822pub present: u8,823pub dpl: u8,824pub db: u8,825pub s: u8,826pub l: u8,827pub g: u8,828pub avl: u8,829}830831/// State of a global descriptor table or interrupt descriptor table.832#[repr(C)]833#[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]834pub struct DescriptorTable {835pub base: u64,836pub limit: u16,837}838839/// State of a VCPU's special registers.840#[repr(C)]841#[derive(Debug, Copy, Clone, Serialize, Deserialize)]842pub struct Sregs {843pub cs: Segment,844pub ds: Segment,845pub es: Segment,846pub fs: Segment,847pub gs: Segment,848pub ss: Segment,849pub tr: Segment,850pub ldt: Segment,851pub gdt: DescriptorTable,852pub idt: DescriptorTable,853pub cr0: u64,854pub cr2: u64,855pub cr3: u64,856pub cr4: u64,857pub cr8: u64,858pub efer: u64,859}860861impl Default for Sregs {862fn default() -> Self {863// Intel SDM Vol. 3A, 3.4.5.1 ("Code- and Data-Segment Descriptor Types")864const SEG_TYPE_DATA: u8 = 0b0000;865const SEG_TYPE_DATA_WRITABLE: u8 = 0b0010;866867const SEG_TYPE_CODE: u8 = 0b1000;868const SEG_TYPE_CODE_READABLE: u8 = 0b0010;869870const SEG_TYPE_ACCESSED: u8 = 0b0001;871872// Intel SDM Vol. 3A, 3.4.5 ("Segment Descriptors")873const SEG_S_SYSTEM: u8 = 0; // System segment.874const SEG_S_CODE_OR_DATA: u8 = 1; // Data/code segment.875876// 16-bit real-mode code segment (reset vector).877let code_seg = Segment {878base: 0xffff0000,879limit_bytes: 0xffff,880selector: 0xf000,881type_: SEG_TYPE_CODE | SEG_TYPE_CODE_READABLE | SEG_TYPE_ACCESSED, // 11882present: 1,883s: SEG_S_CODE_OR_DATA,884..Default::default()885};886887// 16-bit real-mode data segment.888let data_seg = Segment {889base: 0,890limit_bytes: 0xffff,891selector: 0,892type_: SEG_TYPE_DATA | SEG_TYPE_DATA_WRITABLE | SEG_TYPE_ACCESSED, // 3893present: 1,894s: SEG_S_CODE_OR_DATA,895..Default::default()896};897898// 16-bit TSS segment.899let task_seg = Segment {900base: 0,901limit_bytes: 0xffff,902selector: 0,903type_: SEG_TYPE_CODE | SEG_TYPE_CODE_READABLE | SEG_TYPE_ACCESSED, // 11904present: 1,905s: SEG_S_SYSTEM,906..Default::default()907};908909// Local descriptor table.910let ldt = Segment {911base: 0,912limit_bytes: 0xffff,913selector: 0,914type_: SEG_TYPE_DATA | SEG_TYPE_DATA_WRITABLE, // 2915present: 1,916s: SEG_S_SYSTEM,917..Default::default()918};919920// Global descriptor table.921let gdt = DescriptorTable {922base: 0,923limit: 0xffff,924};925926// Interrupt descriptor table.927let idt = DescriptorTable {928base: 0,929limit: 0xffff,930};931932let cr0 = (1 << 4) // CR0.ET (reserved, always 1)933| (1 << 30); // CR0.CD (cache disable)934935Sregs {936cs: code_seg,937ds: data_seg,938es: data_seg,939fs: data_seg,940gs: data_seg,941ss: data_seg,942tr: task_seg,943ldt,944gdt,945idt,946cr0,947cr2: 0,948cr3: 0,949cr4: 0,950cr8: 0,951efer: 0,952}953}954}955956/// x87 80-bit floating point value.957#[repr(C)]958#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]959pub struct FpuReg {960/// 64-bit mantissa.961pub significand: u64,962963/// 15-bit biased exponent and sign bit.964pub sign_exp: u16,965}966967impl FpuReg {968/// Convert an array of 8x16-byte arrays to an array of 8 `FpuReg`.969///970/// Ignores any data in the upper 6 bytes of each element; the values represent 80-bit FPU971/// registers, so the upper 48 bits are unused.972pub fn from_16byte_arrays(byte_arrays: &[[u8; 16]; 8]) -> [FpuReg; 8] {973let mut regs = [FpuReg::default(); 8];974for (dst, src) in regs.iter_mut().zip(byte_arrays.iter()) {975let tbyte: [u8; 10] = src[0..10].try_into().unwrap();976*dst = FpuReg::from(tbyte);977}978regs979}980981/// Convert an array of 8 `FpuReg` into 8x16-byte arrays.982pub fn to_16byte_arrays(regs: &[FpuReg; 8]) -> [[u8; 16]; 8] {983let mut byte_arrays = [[0u8; 16]; 8];984for (dst, src) in byte_arrays.iter_mut().zip(regs.iter()) {985*dst = (*src).into();986}987byte_arrays988}989}990991impl From<[u8; 10]> for FpuReg {992/// Construct a `FpuReg` from an 80-bit representation.993fn from(value: [u8; 10]) -> FpuReg {994// These array sub-slices can't fail, but there's no (safe) way to express that in Rust995// without an `unwrap()`.996let significand_bytes = value[0..8].try_into().unwrap();997let significand = u64::from_le_bytes(significand_bytes);998let sign_exp_bytes = value[8..10].try_into().unwrap();999let sign_exp = u16::from_le_bytes(sign_exp_bytes);1000FpuReg {1001significand,1002sign_exp,1003}1004}1005}10061007impl From<FpuReg> for [u8; 10] {1008/// Convert an `FpuReg` into its 80-bit "TBYTE" representation.1009fn from(value: FpuReg) -> [u8; 10] {1010let mut bytes = [0u8; 10];1011bytes[0..8].copy_from_slice(&value.significand.to_le_bytes());1012bytes[8..10].copy_from_slice(&value.sign_exp.to_le_bytes());1013bytes1014}1015}10161017impl From<FpuReg> for [u8; 16] {1018/// Convert an `FpuReg` into its 80-bit representation plus 6 unused upper bytes.1019/// This is a convenience function for converting to hypervisor types.1020fn from(value: FpuReg) -> [u8; 16] {1021let mut bytes = [0u8; 16];1022bytes[0..8].copy_from_slice(&value.significand.to_le_bytes());1023bytes[8..10].copy_from_slice(&value.sign_exp.to_le_bytes());1024bytes1025}1026}10271028/// State of a VCPU's floating point unit.1029#[repr(C)]1030#[derive(Debug, Copy, Clone, Serialize, Deserialize)]1031pub struct Fpu {1032pub fpr: [FpuReg; 8],1033pub fcw: u16,1034pub fsw: u16,1035pub ftwx: u8,1036pub last_opcode: u16,1037pub last_ip: u64,1038pub last_dp: u64,1039pub xmm: [[u8; 16usize]; 16usize],1040pub mxcsr: u32,1041}10421043impl Default for Fpu {1044fn default() -> Self {1045Fpu {1046fpr: Default::default(),1047fcw: 0x37f, // Intel SDM Vol. 1, 13.61048fsw: 0,1049ftwx: 0,1050last_opcode: 0,1051last_ip: 0,1052last_dp: 0,1053xmm: Default::default(),1054mxcsr: 0x1f80, // Intel SDM Vol. 1, 11.6.41055}1056}1057}10581059/// State of a VCPU's debug registers.1060#[repr(C)]1061#[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]1062pub struct DebugRegs {1063pub db: [u64; 4usize],1064pub dr6: u64,1065pub dr7: u64,1066}10671068/// The hybrid type for intel hybrid CPU.1069#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]1070pub enum CpuHybridType {1071/// Intel Atom.1072Atom,1073/// Intel Core.1074Core,1075}10761077/// State of the VCPU's x87 FPU, MMX, XMM, YMM registers.1078/// May contain more state depending on enabled extensions.1079#[derive(Clone, Debug, Serialize, Deserialize)]1080pub struct Xsave {1081data: Vec<u32>,10821083// Actual length in bytes. May be smaller than data if a non-u32 multiple of bytes is1084// requested.1085len: usize,1086}10871088impl Xsave {1089/// Create a new buffer to store Xsave data.1090///1091/// # Argments1092/// * `len` size in bytes.1093pub fn new(len: usize) -> Self {1094Xsave {1095data: vec![0; len.div_ceil(4)],1096len,1097}1098}10991100pub fn as_ptr(&self) -> *const c_void {1101self.data.as_ptr() as *const c_void1102}11031104pub fn as_mut_ptr(&mut self) -> *mut c_void {1105self.data.as_mut_ptr() as *mut c_void1106}11071108/// Length in bytes of the XSAVE data.1109pub fn len(&self) -> usize {1110self.len1111}11121113/// Returns true is length of XSAVE data is zero1114pub fn is_empty(&self) -> bool {1115self.len() == 01116}1117}111811191120