Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/hypervisor/src/x86_64.rs
5394 views
1
// Copyright 2020 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
use std::arch::x86_64::CpuidResult;
6
#[cfg(any(unix, feature = "haxm", feature = "whpx"))]
7
use std::arch::x86_64::__cpuid;
8
use std::arch::x86_64::_rdtsc;
9
use std::collections::BTreeMap;
10
use std::collections::HashSet;
11
12
use anyhow::Context;
13
use base::custom_serde::deserialize_seq_to_arr;
14
use base::custom_serde::serialize_arr;
15
use base::error;
16
use base::warn;
17
use base::Result;
18
use bit_field::*;
19
use downcast_rs::impl_downcast;
20
use libc::c_void;
21
use serde::Deserialize;
22
use serde::Serialize;
23
use snapshot::AnySnapshot;
24
use vm_memory::GuestAddress;
25
26
use crate::Hypervisor;
27
use crate::IrqRoute;
28
use crate::IrqSource;
29
use crate::IrqSourceChip;
30
use crate::Vcpu;
31
use crate::Vm;
32
33
const MSR_F15H_PERF_CTL0: u32 = 0xc0010200;
34
const MSR_F15H_PERF_CTL1: u32 = 0xc0010202;
35
const MSR_F15H_PERF_CTL2: u32 = 0xc0010204;
36
const MSR_F15H_PERF_CTL3: u32 = 0xc0010206;
37
const MSR_F15H_PERF_CTL4: u32 = 0xc0010208;
38
const MSR_F15H_PERF_CTL5: u32 = 0xc001020a;
39
const MSR_F15H_PERF_CTR0: u32 = 0xc0010201;
40
const MSR_F15H_PERF_CTR1: u32 = 0xc0010203;
41
const MSR_F15H_PERF_CTR2: u32 = 0xc0010205;
42
const MSR_F15H_PERF_CTR3: u32 = 0xc0010207;
43
const MSR_F15H_PERF_CTR4: u32 = 0xc0010209;
44
const MSR_F15H_PERF_CTR5: u32 = 0xc001020b;
45
const MSR_IA32_PERF_CAPABILITIES: u32 = 0x00000345;
46
47
/// A trait for managing cpuids for an x86_64 hypervisor and for checking its capabilities.
48
pub trait HypervisorX86_64: Hypervisor {
49
/// Get the system supported CPUID values.
50
fn get_supported_cpuid(&self) -> Result<CpuId>;
51
52
/// Gets the list of supported MSRs.
53
fn get_msr_index_list(&self) -> Result<Vec<u32>>;
54
}
55
56
/// A wrapper for using a VM on x86_64 and getting/setting its state.
57
pub trait VmX86_64: Vm {
58
/// Gets the `HypervisorX86_64` that created this VM.
59
fn get_hypervisor(&self) -> &dyn HypervisorX86_64;
60
61
/// Create a Vcpu with the specified Vcpu ID.
62
fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>;
63
64
/// Sets the address of the three-page region in the VM's address space.
65
fn set_tss_addr(&self, addr: GuestAddress) -> Result<()>;
66
67
/// Sets the address of a one-page region in the VM's address space.
68
fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>;
69
70
/// Load pVM firmware for the VM, creating a memslot for it as needed.
71
///
72
/// Only works on protected VMs (i.e. those with vm_type == KVM_X86_PKVM_PROTECTED_VM).
73
fn load_protected_vm_firmware(&mut self, fw_addr: GuestAddress, fw_max_size: u64)
74
-> Result<()>;
75
}
76
77
/// A wrapper around creating and using a VCPU on x86_64.
78
pub trait VcpuX86_64: Vcpu {
79
/// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
80
/// interrupts into the guest.
81
fn set_interrupt_window_requested(&self, requested: bool);
82
83
/// Checks if we can inject an interrupt into the VCPU.
84
fn ready_for_interrupt(&self) -> bool;
85
86
/// Injects interrupt vector `irq` into the VCPU.
87
///
88
/// This function should only be called when [`Self::ready_for_interrupt`] returns true.
89
/// Otherwise the interrupt injection may fail or the next VCPU run may fail. However, if
90
/// [`Self::interrupt`] returns [`Ok`], the implementation must guarantee that the interrupt
91
/// isn't injected in an uninterruptible window (e.g. right after the mov ss instruction).
92
///
93
/// The caller should avoid calling this function more than 1 time for one VMEXIT, because the
94
/// hypervisor may behave differently: some hypervisors(e.g. WHPX, KVM) will only try to inject
95
/// the last `irq` requested, while some other hypervisors(e.g. HAXM) may try to inject all
96
/// `irq`s requested.
97
fn interrupt(&self, irq: u8) -> Result<()>;
98
99
/// Injects a non-maskable interrupt into the VCPU.
100
fn inject_nmi(&self) -> Result<()>;
101
102
/// Gets the VCPU general purpose registers.
103
fn get_regs(&self) -> Result<Regs>;
104
105
/// Sets the VCPU general purpose registers.
106
fn set_regs(&self, regs: &Regs) -> Result<()>;
107
108
/// Gets the VCPU special registers.
109
fn get_sregs(&self) -> Result<Sregs>;
110
111
/// Sets the VCPU special registers.
112
fn set_sregs(&self, sregs: &Sregs) -> Result<()>;
113
114
/// Gets the VCPU FPU registers.
115
fn get_fpu(&self) -> Result<Fpu>;
116
117
/// Sets the VCPU FPU registers.
118
fn set_fpu(&self, fpu: &Fpu) -> Result<()>;
119
120
/// Gets the VCPU debug registers.
121
fn get_debugregs(&self) -> Result<DebugRegs>;
122
123
/// Sets the VCPU debug registers.
124
fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>;
125
126
/// Gets the VCPU extended control registers.
127
fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>>;
128
129
/// Sets a VCPU extended control register.
130
fn set_xcr(&self, xcr: u32, value: u64) -> Result<()>;
131
132
/// Gets the VCPU x87 FPU, MMX, XMM, YMM and MXCSR registers.
133
fn get_xsave(&self) -> Result<Xsave>;
134
135
/// Sets the VCPU x87 FPU, MMX, XMM, YMM and MXCSR registers.
136
fn set_xsave(&self, xsave: &Xsave) -> Result<()>;
137
138
/// Gets hypervisor specific state for this VCPU that must be
139
/// saved/restored for snapshotting.
140
/// This state is fetched after VCPUs are frozen and interrupts are flushed.
141
fn get_hypervisor_specific_state(&self) -> Result<AnySnapshot>;
142
143
/// Sets hypervisor specific state for this VCPU. Only used for
144
/// snapshotting.
145
fn set_hypervisor_specific_state(&self, data: AnySnapshot) -> Result<()>;
146
147
/// Gets a single model-specific register's value.
148
fn get_msr(&self, msr_index: u32) -> Result<u64>;
149
150
/// Gets the model-specific registers. Returns all the MSRs for the VCPU.
151
fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>;
152
153
/// Sets a single model-specific register's value.
154
fn set_msr(&self, msr_index: u32, value: u64) -> Result<()>;
155
156
/// Sets up the data returned by the CPUID instruction.
157
fn set_cpuid(&self, cpuid: &CpuId) -> Result<()>;
158
159
/// Sets up debug registers and configure vcpu for handling guest debug events.
160
fn set_guest_debug(&self, addrs: &[GuestAddress], enable_singlestep: bool) -> Result<()>;
161
162
/// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`
163
/// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function
164
/// will then set the appropriate registers on the vcpu.
165
fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>;
166
167
/// Gets the guest->host TSC offset.
168
///
169
/// The default implementation uses [`VcpuX86_64::get_msr()`] to read the guest TSC.
170
fn get_tsc_offset(&self) -> Result<u64> {
171
// SAFETY:
172
// Safe because _rdtsc takes no arguments
173
let host_before_tsc = unsafe { _rdtsc() };
174
175
// get guest TSC value from our hypervisor
176
let guest_tsc = self.get_msr(crate::MSR_IA32_TSC)?;
177
178
// SAFETY:
179
// Safe because _rdtsc takes no arguments
180
let host_after_tsc = unsafe { _rdtsc() };
181
182
// Average the before and after host tsc to get the best value
183
let host_tsc = ((host_before_tsc as u128 + host_after_tsc as u128) / 2) as u64;
184
185
Ok(guest_tsc.wrapping_sub(host_tsc))
186
}
187
188
/// Sets the guest->host TSC offset.
189
///
190
/// The default implementation uses [`VcpuX86_64::set_tsc_value()`] to set the TSC value.
191
///
192
/// It sets TSC_OFFSET (VMCS / CB field) by setting the TSC MSR to the current
193
/// host TSC value plus the desired offset. We rely on the fact that hypervisors
194
/// determine the value of TSC_OFFSET by computing TSC_OFFSET = `new_tsc_value - _rdtsc()` =
195
/// `_rdtsc() + offset - _rdtsc()` ~= `offset`. Note that the ~= is important: this is an
196
/// approximate operation, because the two _rdtsc() calls
197
/// are separated by at least a few ticks.
198
///
199
/// Note: TSC_OFFSET, host TSC, guest TSC, and TSC MSR are all different
200
/// concepts.
201
/// * When a guest executes rdtsc, the value (guest TSC) returned is host_tsc * TSC_MULTIPLIER +
202
/// TSC_OFFSET + TSC_ADJUST.
203
/// * The TSC MSR is a special MSR that when written to by the host, will cause TSC_OFFSET to be
204
/// set accordingly by the hypervisor.
205
/// * When the guest *writes* to TSC MSR, it actually changes the TSC_ADJUST MSR *for the
206
/// guest*. Generally this is only happens if the guest is trying to re-zero or synchronize
207
/// TSCs.
208
fn set_tsc_offset(&self, offset: u64) -> Result<()> {
209
// SAFETY: _rdtsc takes no arguments.
210
let host_tsc = unsafe { _rdtsc() };
211
self.set_tsc_value(host_tsc.wrapping_add(offset))
212
}
213
214
/// Sets the guest TSC exactly to the provided value.
215
///
216
/// The default implementation sets the guest's TSC by writing the value to the MSR directly.
217
///
218
/// See [`VcpuX86_64::set_tsc_offset()`] for an explanation of how this value is actually read
219
/// by the guest after being set.
220
fn set_tsc_value(&self, value: u64) -> Result<()> {
221
self.set_msr(crate::MSR_IA32_TSC, value)
222
}
223
224
/// Some hypervisors require special handling to restore timekeeping when
225
/// a snapshot is restored. They are provided with a host TSC reference
226
/// moment, guaranteed to be the same across all Vcpus, and the Vcpu's TSC
227
/// offset at the moment it was snapshotted.
228
fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>;
229
230
/// Snapshot vCPU state
231
fn snapshot(&self) -> anyhow::Result<VcpuSnapshot> {
232
Ok(VcpuSnapshot {
233
vcpu_id: self.id(),
234
regs: self.get_regs()?,
235
sregs: self.get_sregs()?,
236
debug_regs: self.get_debugregs()?,
237
xcrs: self.get_xcrs()?,
238
msrs: self.get_all_msrs()?,
239
xsave: self.get_xsave()?,
240
hypervisor_data: self.get_hypervisor_specific_state()?,
241
tsc_offset: self.get_tsc_offset()?,
242
})
243
}
244
245
fn restore(
246
&mut self,
247
snapshot: &VcpuSnapshot,
248
host_tsc_reference_moment: u64,
249
) -> anyhow::Result<()> {
250
// List of MSRs that may fail to restore due to lack of support in the host kernel.
251
// Some hosts are may be running older kernels which do not support all MSRs, but
252
// get_all_msrs will still fetch the MSRs supported by the CPU. Trying to set those MSRs
253
// will result in failures, so they will throw a warning instead.
254
let msr_allowlist = HashSet::from([
255
MSR_F15H_PERF_CTL0,
256
MSR_F15H_PERF_CTL1,
257
MSR_F15H_PERF_CTL2,
258
MSR_F15H_PERF_CTL3,
259
MSR_F15H_PERF_CTL4,
260
MSR_F15H_PERF_CTL5,
261
MSR_F15H_PERF_CTR0,
262
MSR_F15H_PERF_CTR1,
263
MSR_F15H_PERF_CTR2,
264
MSR_F15H_PERF_CTR3,
265
MSR_F15H_PERF_CTR4,
266
MSR_F15H_PERF_CTR5,
267
MSR_IA32_PERF_CAPABILITIES,
268
]);
269
assert_eq!(snapshot.vcpu_id, self.id());
270
self.set_regs(&snapshot.regs)?;
271
self.set_sregs(&snapshot.sregs)?;
272
self.set_debugregs(&snapshot.debug_regs)?;
273
for (xcr_index, value) in &snapshot.xcrs {
274
self.set_xcr(*xcr_index, *value)?;
275
}
276
277
for (msr_index, value) in snapshot.msrs.iter() {
278
if self.get_msr(*msr_index) == Ok(*value) {
279
continue; // no need to set MSR since the values are the same.
280
}
281
if let Err(e) = self.set_msr(*msr_index, *value) {
282
if msr_allowlist.contains(msr_index) {
283
warn!(
284
"Failed to set MSR. MSR might not be supported in this kernel. Err: {}",
285
e
286
);
287
} else {
288
return Err(e).context(
289
"Failed to set MSR. MSR might not be supported by the CPU or by the kernel,
290
and was not allow-listed.",
291
);
292
}
293
};
294
}
295
self.set_xsave(&snapshot.xsave)?;
296
self.set_hypervisor_specific_state(snapshot.hypervisor_data.clone())?;
297
self.restore_timekeeping(host_tsc_reference_moment, snapshot.tsc_offset)?;
298
Ok(())
299
}
300
}
301
302
/// x86 specific vCPU snapshot.
303
#[derive(Clone, Debug, Serialize, Deserialize)]
304
pub struct VcpuSnapshot {
305
pub vcpu_id: usize,
306
regs: Regs,
307
sregs: Sregs,
308
debug_regs: DebugRegs,
309
xcrs: BTreeMap<u32, u64>,
310
msrs: BTreeMap<u32, u64>,
311
xsave: Xsave,
312
hypervisor_data: AnySnapshot,
313
tsc_offset: u64,
314
}
315
316
impl_downcast!(VcpuX86_64);
317
318
// TSC MSR
319
pub const MSR_IA32_TSC: u32 = 0x00000010;
320
321
/// Gets host cpu max physical address bits.
322
#[cfg(any(unix, feature = "haxm", feature = "whpx"))]
323
pub(crate) fn host_phys_addr_bits() -> u8 {
324
// SAFETY: trivially safe
325
let highest_ext_function = unsafe { __cpuid(0x80000000) };
326
if highest_ext_function.eax >= 0x80000008 {
327
// SAFETY: trivially safe
328
let addr_size = unsafe { __cpuid(0x80000008) };
329
// Low 8 bits of 0x80000008 leaf: host physical address size in bits.
330
addr_size.eax as u8
331
} else {
332
36
333
}
334
}
335
336
/// Initial state for x86_64 VCPUs.
337
#[derive(Clone, Default)]
338
pub struct VcpuInitX86_64 {
339
/// General-purpose registers.
340
pub regs: Regs,
341
342
/// Special registers.
343
pub sregs: Sregs,
344
345
/// Floating-point registers.
346
pub fpu: Fpu,
347
348
/// Machine-specific registers.
349
pub msrs: BTreeMap<u32, u64>,
350
}
351
352
/// Hold the CPU feature configurations that are needed to setup a vCPU.
353
#[derive(Clone, Debug, PartialEq, Eq)]
354
pub struct CpuConfigX86_64 {
355
/// whether to force using a calibrated TSC leaf (0x15).
356
pub force_calibrated_tsc_leaf: bool,
357
358
/// whether enabling host cpu topology.
359
pub host_cpu_topology: bool,
360
361
/// whether expose HWP feature to the guest.
362
pub enable_hwp: bool,
363
364
/// Wheter diabling SMT (Simultaneous Multithreading).
365
pub no_smt: bool,
366
367
/// whether enabling ITMT scheduler
368
pub itmt: bool,
369
370
/// whether setting hybrid CPU type
371
pub hybrid_type: Option<CpuHybridType>,
372
}
373
374
impl CpuConfigX86_64 {
375
pub fn new(
376
force_calibrated_tsc_leaf: bool,
377
host_cpu_topology: bool,
378
enable_hwp: bool,
379
no_smt: bool,
380
itmt: bool,
381
hybrid_type: Option<CpuHybridType>,
382
) -> Self {
383
CpuConfigX86_64 {
384
force_calibrated_tsc_leaf,
385
host_cpu_topology,
386
enable_hwp,
387
no_smt,
388
itmt,
389
hybrid_type,
390
}
391
}
392
}
393
394
/// A CpuId Entry contains supported feature information for the given processor.
395
/// This can be modified by the hypervisor to pass additional information to the guest kernel
396
/// about the hypervisor or vm. Information is returned in the eax, ebx, ecx and edx registers
397
/// by the cpu for a given function and index/subfunction (passed into the cpu via the eax and ecx
398
/// register respectively).
399
#[repr(C)]
400
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
401
pub struct CpuIdEntry {
402
pub function: u32,
403
pub index: u32,
404
// flags is needed for KVM. We store it on CpuIdEntry to preserve the flags across
405
// get_supported_cpuids() -> kvm_cpuid2 -> CpuId -> kvm_cpuid2 -> set_cpuid().
406
pub flags: u32,
407
pub cpuid: CpuidResult,
408
}
409
410
/// A container for the list of cpu id entries for the hypervisor and underlying cpu.
411
pub struct CpuId {
412
pub cpu_id_entries: Vec<CpuIdEntry>,
413
}
414
415
impl CpuId {
416
/// Constructs a new CpuId, with space allocated for `initial_capacity` CpuIdEntries.
417
pub fn new(initial_capacity: usize) -> Self {
418
CpuId {
419
cpu_id_entries: Vec::with_capacity(initial_capacity),
420
}
421
}
422
}
423
424
#[bitfield]
425
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
426
pub enum DestinationMode {
427
Physical = 0,
428
Logical = 1,
429
}
430
431
#[bitfield]
432
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
433
pub enum TriggerMode {
434
Edge = 0,
435
Level = 1,
436
}
437
438
#[bitfield]
439
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
440
pub enum DeliveryMode {
441
Fixed = 0b000,
442
Lowest = 0b001,
443
SMI = 0b010, // System management interrupt
444
RemoteRead = 0b011, // This is no longer supported by intel.
445
NMI = 0b100, // Non maskable interrupt
446
Init = 0b101,
447
Startup = 0b110,
448
External = 0b111,
449
}
450
451
// These MSI structures are for Intel's implementation of MSI. The PCI spec defines most of MSI,
452
// but the Intel spec defines the format of messages for raising interrupts. The PCI spec defines
453
// three u32s -- the address, address_high, and data -- but Intel only makes use of the address and
454
// data. The Intel portion of the specification is in Volume 3 section 10.11.
455
#[bitfield]
456
#[derive(Clone, Copy, PartialEq, Eq)]
457
pub struct MsiAddressMessage {
458
pub reserved: BitField2,
459
#[bits = 1]
460
pub destination_mode: DestinationMode,
461
pub redirection_hint: BitField1,
462
pub reserved_2: BitField8,
463
pub destination_id: BitField8,
464
// According to Intel's implementation of MSI, these bits must always be 0xfee.
465
pub always_0xfee: BitField12,
466
}
467
468
#[bitfield]
469
#[derive(Clone, Copy, PartialEq, Eq)]
470
pub struct MsiDataMessage {
471
pub vector: BitField8,
472
#[bits = 3]
473
pub delivery_mode: DeliveryMode,
474
pub reserved: BitField3,
475
#[bits = 1]
476
pub level: Level,
477
#[bits = 1]
478
pub trigger: TriggerMode,
479
pub reserved2: BitField16,
480
}
481
482
#[bitfield]
483
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
484
pub enum DeliveryStatus {
485
Idle = 0,
486
Pending = 1,
487
}
488
489
/// The level of a level-triggered interrupt: asserted or deasserted.
490
#[bitfield]
491
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
492
pub enum Level {
493
Deassert = 0,
494
Assert = 1,
495
}
496
497
/// Represents a IOAPIC redirection table entry.
498
#[bitfield]
499
#[derive(Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
500
pub struct IoapicRedirectionTableEntry {
501
vector: BitField8,
502
#[bits = 3]
503
delivery_mode: DeliveryMode,
504
#[bits = 1]
505
dest_mode: DestinationMode,
506
#[bits = 1]
507
delivery_status: DeliveryStatus,
508
polarity: BitField1,
509
remote_irr: bool,
510
#[bits = 1]
511
trigger_mode: TriggerMode,
512
interrupt_mask: bool, // true iff interrupts are masked.
513
reserved: BitField39,
514
dest_id: BitField8,
515
}
516
517
/// Number of pins on the standard KVM/IOAPIC.
518
pub const NUM_IOAPIC_PINS: usize = 24;
519
520
/// Represents the state of the IOAPIC.
521
#[repr(C)]
522
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
523
pub struct IoapicState {
524
/// base_address is the memory base address for this IOAPIC. It cannot be changed.
525
pub base_address: u64,
526
/// ioregsel register. Used for selecting which entry of the redirect table to read/write.
527
pub ioregsel: u8,
528
/// ioapicid register. Bits 24 - 27 contain the APIC ID for this device.
529
pub ioapicid: u32,
530
/// current_interrupt_level_bitmap represents a bitmap of the state of all of the irq lines
531
pub current_interrupt_level_bitmap: u32,
532
/// redirect_table contains the irq settings for each irq line
533
#[serde(
534
serialize_with = "serialize_arr",
535
deserialize_with = "deserialize_seq_to_arr"
536
)]
537
pub redirect_table: [IoapicRedirectionTableEntry; NUM_IOAPIC_PINS],
538
}
539
540
impl Default for IoapicState {
541
fn default() -> IoapicState {
542
// SAFETY: trivially safe
543
unsafe { std::mem::zeroed() }
544
}
545
}
546
547
#[repr(C)]
548
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
549
pub enum PicSelect {
550
Primary = 0,
551
Secondary = 1,
552
}
553
554
#[repr(C)]
555
#[derive(enumn::N, Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
556
pub enum PicInitState {
557
#[default]
558
Icw1 = 0,
559
Icw2 = 1,
560
Icw3 = 2,
561
Icw4 = 3,
562
}
563
564
/// Convenience implementation for converting from a u8
565
impl From<u8> for PicInitState {
566
fn from(item: u8) -> Self {
567
PicInitState::n(item).unwrap_or_else(|| {
568
error!("Invalid PicInitState {}, setting to 0", item);
569
PicInitState::Icw1
570
})
571
}
572
}
573
574
/// Represents the state of the PIC.
575
#[repr(C)]
576
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq, Serialize, Deserialize)]
577
pub struct PicState {
578
/// Edge detection.
579
pub last_irr: u8,
580
/// Interrupt Request Register.
581
pub irr: u8,
582
/// Interrupt Mask Register.
583
pub imr: u8,
584
/// Interrupt Service Register.
585
pub isr: u8,
586
/// Highest priority, for priority rotation.
587
pub priority_add: u8,
588
pub irq_base: u8,
589
pub read_reg_select: bool,
590
pub poll: bool,
591
pub special_mask: bool,
592
pub init_state: PicInitState,
593
pub auto_eoi: bool,
594
pub rotate_on_auto_eoi: bool,
595
pub special_fully_nested_mode: bool,
596
/// PIC takes either 3 or 4 bytes of initialization command word during
597
/// initialization. use_4_byte_icw is true if 4 bytes of ICW are needed.
598
pub use_4_byte_icw: bool,
599
/// "Edge/Level Control Registers", for edge trigger selection.
600
/// When a particular bit is set, the corresponding IRQ is in level-triggered mode. Otherwise
601
/// it is in edge-triggered mode.
602
pub elcr: u8,
603
pub elcr_mask: u8,
604
}
605
606
/// The LapicState represents the state of an x86 CPU's Local APIC.
607
/// The Local APIC consists of 64 128-bit registers, but only the first 32-bits of each register
608
/// can be used, so this structure only stores the first 32-bits of each register.
609
#[repr(C)]
610
#[derive(Clone, Copy, Serialize, Deserialize)]
611
pub struct LapicState {
612
#[serde(
613
serialize_with = "serialize_arr",
614
deserialize_with = "deserialize_seq_to_arr"
615
)]
616
pub regs: [LapicRegister; 64],
617
}
618
619
pub type LapicRegister = u32;
620
621
// rust arrays longer than 32 need custom implementations of Debug
622
impl std::fmt::Debug for LapicState {
623
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
624
self.regs[..].fmt(formatter)
625
}
626
}
627
628
// rust arrays longer than 32 need custom implementations of PartialEq
629
impl PartialEq for LapicState {
630
fn eq(&self, other: &LapicState) -> bool {
631
self.regs[..] == other.regs[..]
632
}
633
}
634
635
// Lapic equality is reflexive, so we impl Eq
636
impl Eq for LapicState {}
637
638
/// The PitState represents the state of the PIT (aka the Programmable Interval Timer).
639
/// The state is simply the state of it's three channels.
640
#[repr(C)]
641
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
642
pub struct PitState {
643
pub channels: [PitChannelState; 3],
644
/// Hypervisor-specific flags for setting the pit state.
645
pub flags: u32,
646
}
647
648
/// The PitRWMode enum represents the access mode of a PIT channel.
649
/// Reads and writes to the Pit happen over Port-mapped I/O, which happens one byte at a time,
650
/// but the count values and latch values are two bytes. So the access mode controls which of the
651
/// two bytes will be read when.
652
#[repr(C)]
653
#[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
654
pub enum PitRWMode {
655
/// None mode means that no access mode has been set.
656
None = 0,
657
/// Least mode means all reads/writes will read/write the least significant byte.
658
Least = 1,
659
/// Most mode means all reads/writes will read/write the most significant byte.
660
Most = 2,
661
/// Both mode means first the least significant byte will be read/written, then the
662
/// next read/write will read/write the most significant byte.
663
Both = 3,
664
}
665
666
/// Convenience implementation for converting from a u8
667
impl From<u8> for PitRWMode {
668
fn from(item: u8) -> Self {
669
PitRWMode::n(item).unwrap_or_else(|| {
670
error!("Invalid PitRWMode value {}, setting to 0", item);
671
PitRWMode::None
672
})
673
}
674
}
675
676
/// The PitRWState enum represents the state of reading to or writing from a channel.
677
/// This is related to the PitRWMode, it mainly gives more detail about the state of the channel
678
/// with respect to PitRWMode::Both.
679
#[repr(C)]
680
#[derive(enumn::N, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
681
pub enum PitRWState {
682
/// None mode means that no access mode has been set.
683
None = 0,
684
/// LSB means that the channel is in PitRWMode::Least access mode.
685
LSB = 1,
686
/// MSB means that the channel is in PitRWMode::Most access mode.
687
MSB = 2,
688
/// Word0 means that the channel is in PitRWMode::Both mode, and the least sginificant byte
689
/// has not been read/written yet.
690
Word0 = 3,
691
/// Word1 means that the channel is in PitRWMode::Both mode and the least significant byte
692
/// has already been read/written, and the next byte to be read/written will be the most
693
/// significant byte.
694
Word1 = 4,
695
}
696
697
/// Convenience implementation for converting from a u8
698
impl From<u8> for PitRWState {
699
fn from(item: u8) -> Self {
700
PitRWState::n(item).unwrap_or_else(|| {
701
error!("Invalid PitRWState value {}, setting to 0", item);
702
PitRWState::None
703
})
704
}
705
}
706
707
/// The PitChannelState represents the state of one of the PIT's three counters.
708
#[repr(C)]
709
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
710
pub struct PitChannelState {
711
/// The starting value for the counter.
712
pub count: u32,
713
/// Stores the channel count from the last time the count was latched.
714
pub latched_count: u16,
715
/// Indicates the PitRWState state of reading the latch value.
716
pub count_latched: PitRWState,
717
/// Indicates whether ReadBack status has been latched.
718
pub status_latched: bool,
719
/// Stores the channel status from the last time the status was latched. The status contains
720
/// information about the access mode of this channel, but changing those bits in the status
721
/// will not change the behavior of the pit.
722
pub status: u8,
723
/// Indicates the PitRWState state of reading the counter.
724
pub read_state: PitRWState,
725
/// Indicates the PitRWState state of writing the counter.
726
pub write_state: PitRWState,
727
/// Stores the value with which the counter was initialized. Counters are 16-
728
/// bit values with an effective range of 1-65536 (65536 represented by 0).
729
pub reload_value: u16,
730
/// The command access mode of this channel.
731
pub rw_mode: PitRWMode,
732
/// The operation mode of this channel.
733
pub mode: u8,
734
/// Whether or not we are in bcd mode. Not supported by KVM or crosvm's PIT implementation.
735
pub bcd: bool,
736
/// Value of the gate input pin. This only applies to channel 2.
737
pub gate: bool,
738
/// Nanosecond timestamp of when the count value was loaded.
739
pub count_load_time: u64,
740
}
741
742
// Convenience constructors for IrqRoutes
743
impl IrqRoute {
744
pub fn ioapic_irq_route(irq_num: u32) -> IrqRoute {
745
IrqRoute {
746
gsi: irq_num,
747
source: IrqSource::Irqchip {
748
chip: IrqSourceChip::Ioapic,
749
pin: irq_num,
750
},
751
}
752
}
753
754
pub fn pic_irq_route(id: IrqSourceChip, irq_num: u32) -> IrqRoute {
755
IrqRoute {
756
gsi: irq_num,
757
source: IrqSource::Irqchip {
758
chip: id,
759
pin: irq_num % 8,
760
},
761
}
762
}
763
}
764
765
/// State of a VCPU's general purpose registers.
766
#[repr(C)]
767
#[derive(Debug, Copy, Clone, Serialize, Deserialize)]
768
pub struct Regs {
769
pub rax: u64,
770
pub rbx: u64,
771
pub rcx: u64,
772
pub rdx: u64,
773
pub rsi: u64,
774
pub rdi: u64,
775
pub rsp: u64,
776
pub rbp: u64,
777
pub r8: u64,
778
pub r9: u64,
779
pub r10: u64,
780
pub r11: u64,
781
pub r12: u64,
782
pub r13: u64,
783
pub r14: u64,
784
pub r15: u64,
785
pub rip: u64,
786
pub rflags: u64,
787
}
788
789
impl Default for Regs {
790
fn default() -> Self {
791
Regs {
792
rax: 0,
793
rbx: 0,
794
rcx: 0,
795
rdx: 0,
796
rsi: 0,
797
rdi: 0,
798
rsp: 0,
799
rbp: 0,
800
r8: 0,
801
r9: 0,
802
r10: 0,
803
r11: 0,
804
r12: 0,
805
r13: 0,
806
r14: 0,
807
r15: 0,
808
rip: 0xfff0, // Reset vector.
809
rflags: 0x2, // Bit 1 (0x2) is always 1.
810
}
811
}
812
}
813
814
/// State of a memory segment.
815
#[repr(C)]
816
#[derive(Debug, Default, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
817
pub struct Segment {
818
pub base: u64,
819
/// Limit of the segment - always in bytes, regardless of granularity (`g`) field.
820
pub limit_bytes: u32,
821
pub selector: u16,
822
pub type_: u8,
823
pub present: u8,
824
pub dpl: u8,
825
pub db: u8,
826
pub s: u8,
827
pub l: u8,
828
pub g: u8,
829
pub avl: u8,
830
}
831
832
/// State of a global descriptor table or interrupt descriptor table.
833
#[repr(C)]
834
#[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
835
pub struct DescriptorTable {
836
pub base: u64,
837
pub limit: u16,
838
}
839
840
/// State of a VCPU's special registers.
841
#[repr(C)]
842
#[derive(Debug, Copy, Clone, Serialize, Deserialize)]
843
pub struct Sregs {
844
pub cs: Segment,
845
pub ds: Segment,
846
pub es: Segment,
847
pub fs: Segment,
848
pub gs: Segment,
849
pub ss: Segment,
850
pub tr: Segment,
851
pub ldt: Segment,
852
pub gdt: DescriptorTable,
853
pub idt: DescriptorTable,
854
pub cr0: u64,
855
pub cr2: u64,
856
pub cr3: u64,
857
pub cr4: u64,
858
pub cr8: u64,
859
pub efer: u64,
860
}
861
862
impl Default for Sregs {
863
fn default() -> Self {
864
// Intel SDM Vol. 3A, 3.4.5.1 ("Code- and Data-Segment Descriptor Types")
865
const SEG_TYPE_DATA: u8 = 0b0000;
866
const SEG_TYPE_DATA_WRITABLE: u8 = 0b0010;
867
868
const SEG_TYPE_CODE: u8 = 0b1000;
869
const SEG_TYPE_CODE_READABLE: u8 = 0b0010;
870
871
const SEG_TYPE_ACCESSED: u8 = 0b0001;
872
873
// Intel SDM Vol. 3A, 3.4.5 ("Segment Descriptors")
874
const SEG_S_SYSTEM: u8 = 0; // System segment.
875
const SEG_S_CODE_OR_DATA: u8 = 1; // Data/code segment.
876
877
// 16-bit real-mode code segment (reset vector).
878
let code_seg = Segment {
879
base: 0xffff0000,
880
limit_bytes: 0xffff,
881
selector: 0xf000,
882
type_: SEG_TYPE_CODE | SEG_TYPE_CODE_READABLE | SEG_TYPE_ACCESSED, // 11
883
present: 1,
884
s: SEG_S_CODE_OR_DATA,
885
..Default::default()
886
};
887
888
// 16-bit real-mode data segment.
889
let data_seg = Segment {
890
base: 0,
891
limit_bytes: 0xffff,
892
selector: 0,
893
type_: SEG_TYPE_DATA | SEG_TYPE_DATA_WRITABLE | SEG_TYPE_ACCESSED, // 3
894
present: 1,
895
s: SEG_S_CODE_OR_DATA,
896
..Default::default()
897
};
898
899
// 16-bit TSS segment.
900
let task_seg = Segment {
901
base: 0,
902
limit_bytes: 0xffff,
903
selector: 0,
904
type_: SEG_TYPE_CODE | SEG_TYPE_CODE_READABLE | SEG_TYPE_ACCESSED, // 11
905
present: 1,
906
s: SEG_S_SYSTEM,
907
..Default::default()
908
};
909
910
// Local descriptor table.
911
let ldt = Segment {
912
base: 0,
913
limit_bytes: 0xffff,
914
selector: 0,
915
type_: SEG_TYPE_DATA | SEG_TYPE_DATA_WRITABLE, // 2
916
present: 1,
917
s: SEG_S_SYSTEM,
918
..Default::default()
919
};
920
921
// Global descriptor table.
922
let gdt = DescriptorTable {
923
base: 0,
924
limit: 0xffff,
925
};
926
927
// Interrupt descriptor table.
928
let idt = DescriptorTable {
929
base: 0,
930
limit: 0xffff,
931
};
932
933
let cr0 = (1 << 4) // CR0.ET (reserved, always 1)
934
| (1 << 30); // CR0.CD (cache disable)
935
936
Sregs {
937
cs: code_seg,
938
ds: data_seg,
939
es: data_seg,
940
fs: data_seg,
941
gs: data_seg,
942
ss: data_seg,
943
tr: task_seg,
944
ldt,
945
gdt,
946
idt,
947
cr0,
948
cr2: 0,
949
cr3: 0,
950
cr4: 0,
951
cr8: 0,
952
efer: 0,
953
}
954
}
955
}
956
957
/// x87 80-bit floating point value.
958
#[repr(C)]
959
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
960
pub struct FpuReg {
961
/// 64-bit mantissa.
962
pub significand: u64,
963
964
/// 15-bit biased exponent and sign bit.
965
pub sign_exp: u16,
966
}
967
968
impl FpuReg {
969
/// Convert an array of 8x16-byte arrays to an array of 8 `FpuReg`.
970
///
971
/// Ignores any data in the upper 6 bytes of each element; the values represent 80-bit FPU
972
/// registers, so the upper 48 bits are unused.
973
pub fn from_16byte_arrays(byte_arrays: &[[u8; 16]; 8]) -> [FpuReg; 8] {
974
let mut regs = [FpuReg::default(); 8];
975
for (dst, src) in regs.iter_mut().zip(byte_arrays.iter()) {
976
let tbyte: [u8; 10] = src[0..10].try_into().unwrap();
977
*dst = FpuReg::from(tbyte);
978
}
979
regs
980
}
981
982
/// Convert an array of 8 `FpuReg` into 8x16-byte arrays.
983
pub fn to_16byte_arrays(regs: &[FpuReg; 8]) -> [[u8; 16]; 8] {
984
let mut byte_arrays = [[0u8; 16]; 8];
985
for (dst, src) in byte_arrays.iter_mut().zip(regs.iter()) {
986
*dst = (*src).into();
987
}
988
byte_arrays
989
}
990
}
991
992
impl From<[u8; 10]> for FpuReg {
993
/// Construct a `FpuReg` from an 80-bit representation.
994
fn from(value: [u8; 10]) -> FpuReg {
995
// These array sub-slices can't fail, but there's no (safe) way to express that in Rust
996
// without an `unwrap()`.
997
let significand_bytes = value[0..8].try_into().unwrap();
998
let significand = u64::from_le_bytes(significand_bytes);
999
let sign_exp_bytes = value[8..10].try_into().unwrap();
1000
let sign_exp = u16::from_le_bytes(sign_exp_bytes);
1001
FpuReg {
1002
significand,
1003
sign_exp,
1004
}
1005
}
1006
}
1007
1008
impl From<FpuReg> for [u8; 10] {
1009
/// Convert an `FpuReg` into its 80-bit "TBYTE" representation.
1010
fn from(value: FpuReg) -> [u8; 10] {
1011
let mut bytes = [0u8; 10];
1012
bytes[0..8].copy_from_slice(&value.significand.to_le_bytes());
1013
bytes[8..10].copy_from_slice(&value.sign_exp.to_le_bytes());
1014
bytes
1015
}
1016
}
1017
1018
impl From<FpuReg> for [u8; 16] {
1019
/// Convert an `FpuReg` into its 80-bit representation plus 6 unused upper bytes.
1020
/// This is a convenience function for converting to hypervisor types.
1021
fn from(value: FpuReg) -> [u8; 16] {
1022
let mut bytes = [0u8; 16];
1023
bytes[0..8].copy_from_slice(&value.significand.to_le_bytes());
1024
bytes[8..10].copy_from_slice(&value.sign_exp.to_le_bytes());
1025
bytes
1026
}
1027
}
1028
1029
/// State of a VCPU's floating point unit.
1030
#[repr(C)]
1031
#[derive(Debug, Copy, Clone, Serialize, Deserialize)]
1032
pub struct Fpu {
1033
pub fpr: [FpuReg; 8],
1034
pub fcw: u16,
1035
pub fsw: u16,
1036
pub ftwx: u8,
1037
pub last_opcode: u16,
1038
pub last_ip: u64,
1039
pub last_dp: u64,
1040
pub xmm: [[u8; 16usize]; 16usize],
1041
pub mxcsr: u32,
1042
}
1043
1044
impl Default for Fpu {
1045
fn default() -> Self {
1046
Fpu {
1047
fpr: Default::default(),
1048
fcw: 0x37f, // Intel SDM Vol. 1, 13.6
1049
fsw: 0,
1050
ftwx: 0,
1051
last_opcode: 0,
1052
last_ip: 0,
1053
last_dp: 0,
1054
xmm: Default::default(),
1055
mxcsr: 0x1f80, // Intel SDM Vol. 1, 11.6.4
1056
}
1057
}
1058
}
1059
1060
/// State of a VCPU's debug registers.
1061
#[repr(C)]
1062
#[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
1063
pub struct DebugRegs {
1064
pub db: [u64; 4usize],
1065
pub dr6: u64,
1066
pub dr7: u64,
1067
}
1068
1069
/// The hybrid type for intel hybrid CPU.
1070
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
1071
pub enum CpuHybridType {
1072
/// Intel Atom.
1073
Atom,
1074
/// Intel Core.
1075
Core,
1076
}
1077
1078
/// State of the VCPU's x87 FPU, MMX, XMM, YMM registers.
1079
/// May contain more state depending on enabled extensions.
1080
#[derive(Clone, Debug, Serialize, Deserialize)]
1081
pub struct Xsave {
1082
data: Vec<u32>,
1083
1084
// Actual length in bytes. May be smaller than data if a non-u32 multiple of bytes is
1085
// requested.
1086
len: usize,
1087
}
1088
1089
impl Xsave {
1090
/// Create a new buffer to store Xsave data.
1091
///
1092
/// # Argments
1093
/// * `len` size in bytes.
1094
pub fn new(len: usize) -> Self {
1095
Xsave {
1096
data: vec![0; len.div_ceil(4)],
1097
len,
1098
}
1099
}
1100
1101
pub fn as_ptr(&self) -> *const c_void {
1102
self.data.as_ptr() as *const c_void
1103
}
1104
1105
pub fn as_mut_ptr(&mut self) -> *mut c_void {
1106
self.data.as_mut_ptr() as *mut c_void
1107
}
1108
1109
/// Length in bytes of the XSAVE data.
1110
pub fn len(&self) -> usize {
1111
self.len
1112
}
1113
1114
/// Returns true is length of XSAVE data is zero
1115
pub fn is_empty(&self) -> bool {
1116
self.len() == 0
1117
}
1118
}
1119
1120