Path: blob/main/devices/src/virtio/vhost_user_frontend/mod.rs
5394 views
// Copyright 2022 The ChromiumOS Authors1// Use of this source code is governed by a BSD-style license that can be2// found in the LICENSE file.34//! VirtioDevice implementation for the VMM side of a vhost-user connection.56mod error;7mod handler;8mod sys;9mod worker;1011use std::cell::RefCell;12use std::collections::BTreeMap;13use std::io::Read;14use std::io::Write;1516use anyhow::bail;17use anyhow::Context;18use base::error;19use base::trace;20use base::AsRawDescriptor;21#[cfg(windows)]22use base::CloseNotifier;23use base::Event;24use base::RawDescriptor;25use base::ReadNotifier;26use base::SafeDescriptor;27use base::SendTube;28use base::WorkerThread;29use snapshot::AnySnapshot;30use vm_memory::GuestMemory;31use vmm_vhost::message::VhostUserConfigFlags;32use vmm_vhost::message::VhostUserMigrationPhase;33use vmm_vhost::message::VhostUserProtocolFeatures;34use vmm_vhost::message::VhostUserTransferDirection;35use vmm_vhost::BackendClient;36use vmm_vhost::VhostUserMemoryRegionInfo;37use vmm_vhost::VringConfigData;38use vmm_vhost::VHOST_USER_F_PROTOCOL_FEATURES;3940use crate::virtio::device_constants::VIRTIO_DEVICE_TYPE_SPECIFIC_FEATURES_MASK;41use crate::virtio::vhost_user_frontend::error::Error;42use crate::virtio::vhost_user_frontend::error::Result;43use crate::virtio::vhost_user_frontend::handler::BackendReqHandler;44use crate::virtio::vhost_user_frontend::handler::BackendReqHandlerImpl;45use crate::virtio::vhost_user_frontend::sys::create_backend_req_handler;46use crate::virtio::vhost_user_frontend::worker::Worker;47use crate::virtio::DeviceType;48use crate::virtio::Interrupt;49use crate::virtio::Queue;50use crate::virtio::SharedMemoryMapper;51use crate::virtio::SharedMemoryRegion;52use crate::virtio::VirtioDevice;53use crate::PciAddress;5455pub struct VhostUserFrontend {56device_type: DeviceType,57worker_thread: Option<WorkerThread<(Option<BackendReqHandler>, SendTube)>>,5859backend_client: BackendClient,60avail_features: u64,61acked_features: u64,62// Last `acked_features` we sent to the backend.63last_acked_features: u64,64protocol_features: VhostUserProtocolFeatures,65// `backend_req_handler` is only present if the backend supports BACKEND_REQ. `worker_thread`66// takes ownership of `backend_req_handler` when it starts. The worker thread will always67// return ownershp of the handler when stopped.68backend_req_handler: Option<BackendReqHandler>,69// Shared memory region info. IPC result from backend is saved with outer Option.70shmem_region: RefCell<Option<Option<SharedMemoryRegion>>>,7172queue_sizes: Vec<u16>,73expose_shmem_descriptors_with_viommu: bool,74pci_address: Option<PciAddress>,75vm_evt_wrtube: SendTube,7677// Queues that have been sent to the backend. Always `Some` when active and not asleep. Saved78// for use in `virtio_sleep`. Since the backend is managing them, the local state of the queue79// is likely stale.80sent_queues: Option<BTreeMap<usize, Queue>>,81}8283// Returns the largest power of two that is less than or equal to `val`.84fn power_of_two_le(val: u16) -> Option<u16> {85if val == 0 {86None87} else if val.is_power_of_two() {88Some(val)89} else {90val.checked_next_power_of_two()91.map(|next_pow_two| next_pow_two / 2)92}93}9495impl VhostUserFrontend {96/// Create a new VirtioDevice for a vhost-user device frontend.97///98/// # Arguments99///100/// - `device_type`: virtio device type101/// - `base_features`: base virtio device features (e.g. `VIRTIO_F_VERSION_1`)102/// - `connection`: connection to the device backend103/// - `max_queue_size`: maximum number of entries in each queue (default: [`Queue::MAX_SIZE`])104pub fn new(105device_type: DeviceType,106mut base_features: u64,107connection: vmm_vhost::Connection,108vm_evt_wrtube: SendTube,109max_queue_size: Option<u16>,110pci_address: Option<PciAddress>,111) -> Result<VhostUserFrontend> {112// Don't allow packed queues even if requested. We don't handle them properly yet at the113// protocol layer.114// TODO: b/331466964 - Remove once packed queue support is added to BackendClient.115if base_features & (1 << virtio_sys::virtio_config::VIRTIO_F_RING_PACKED) != 0 {116base_features &= !(1 << virtio_sys::virtio_config::VIRTIO_F_RING_PACKED);117base::warn!(118"VIRTIO_F_RING_PACKED requested, but not yet supported by vhost-user frontend. \119Automatically disabled."120);121}122123#[cfg(windows)]124let backend_pid = connection.target_pid();125126let mut backend_client = BackendClient::new(connection);127128backend_client.set_owner().map_err(Error::SetOwner)?;129130let allow_features = VIRTIO_DEVICE_TYPE_SPECIFIC_FEATURES_MASK131| base_features132| 1 << VHOST_USER_F_PROTOCOL_FEATURES;133let avail_features =134allow_features & backend_client.get_features().map_err(Error::GetFeatures)?;135let mut acked_features = 0;136137let allow_protocol_features = VhostUserProtocolFeatures::CONFIG138| VhostUserProtocolFeatures::MQ139| VhostUserProtocolFeatures::BACKEND_REQ140| VhostUserProtocolFeatures::DEVICE_STATE141| VhostUserProtocolFeatures::SHMEM_MAP142// NOTE: We advertise REPLY_ACK, but we don't actually set the "need_reply" bit in any143// `BackendClient` requests because there is a theoretical latency penalty and no144// obvious advantage at the moment. Instead, we negotiate it only so that the backend145// can choose to set the "need_reply" in the backend-to-frontend requests (e.g. to146// avoid race conditions when using SHMEM_MAP).147| VhostUserProtocolFeatures::REPLY_ACK;148149let mut protocol_features = VhostUserProtocolFeatures::empty();150if avail_features & 1 << VHOST_USER_F_PROTOCOL_FEATURES != 0 {151// The vhost-user backend supports VHOST_USER_F_PROTOCOL_FEATURES.152// Per the vhost-user protocol, the backend must support153// `VHOST_USER_GET_PROTOCOL_FEATURES` and `VHOST_USER_SET_PROTOCOL_FEATURES` even154// before acknowledging the feature, so we don't need to call `set_features()` yet155// (and doing so before driver feature negotiation may confuse some backends),156// but add it to `acked_features` so it will be included in any future157// `set_features()` calls.158acked_features |= 1 << VHOST_USER_F_PROTOCOL_FEATURES;159160let avail_protocol_features = backend_client161.get_protocol_features()162.map_err(Error::GetProtocolFeatures)?;163protocol_features = allow_protocol_features & avail_protocol_features;164backend_client165.set_protocol_features(protocol_features)166.map_err(Error::SetProtocolFeatures)?;167}168169// if protocol feature `VhostUserProtocolFeatures::BACKEND_REQ` is negotiated.170let backend_req_handler =171if protocol_features.contains(VhostUserProtocolFeatures::BACKEND_REQ) {172let (mut handler, tx_fd) = create_backend_req_handler(173BackendReqHandlerImpl::new(),174#[cfg(windows)]175backend_pid,176)?;177handler.set_reply_ack_flag(178protocol_features.contains(VhostUserProtocolFeatures::REPLY_ACK),179);180backend_client181.set_backend_req_fd(&tx_fd)182.map_err(Error::SetDeviceRequestChannel)?;183Some(handler)184} else {185None186};187188// If the device supports VHOST_USER_PROTOCOL_F_MQ, use VHOST_USER_GET_QUEUE_NUM to189// determine the number of queues supported. Otherwise, use the minimum number of queues190// required by the spec for this device type.191let num_queues = if protocol_features.contains(VhostUserProtocolFeatures::MQ) {192trace!("backend supports VHOST_USER_PROTOCOL_F_MQ");193let num_queues = backend_client.get_queue_num().map_err(Error::GetQueueNum)?;194trace!("VHOST_USER_GET_QUEUE_NUM returned {num_queues}");195num_queues as usize196} else {197trace!("backend does not support VHOST_USER_PROTOCOL_F_MQ");198device_type.min_queues()199};200201// Clamp the maximum queue size to the largest power of 2 <= max_queue_size.202let max_queue_size = max_queue_size203.and_then(power_of_two_le)204.unwrap_or(Queue::MAX_SIZE);205206trace!(207"vhost-user {device_type} frontend with {num_queues} queues x {max_queue_size} entries\208{}",209if let Some(pci_address) = pci_address {210format!(" pci-address {pci_address}")211} else {212"".to_string()213}214);215216let queue_sizes = vec![max_queue_size; num_queues];217218Ok(VhostUserFrontend {219device_type,220worker_thread: None,221backend_client,222avail_features,223acked_features,224last_acked_features: acked_features,225protocol_features,226backend_req_handler,227shmem_region: RefCell::new(None),228queue_sizes,229expose_shmem_descriptors_with_viommu: device_type == DeviceType::Gpu,230pci_address,231vm_evt_wrtube,232sent_queues: None,233})234}235236fn set_mem_table(&mut self, mem: &GuestMemory) -> Result<()> {237let regions: Vec<_> = mem238.regions()239.map(|region| VhostUserMemoryRegionInfo {240guest_phys_addr: region.guest_addr.0,241memory_size: region.size as u64,242userspace_addr: region.host_addr as u64,243mmap_offset: region.shm_offset,244mmap_handle: region.shm.as_raw_descriptor(),245})246.collect();247248self.backend_client249.set_mem_table(regions.as_slice())250.map_err(Error::SetMemTable)?;251252Ok(())253}254255/// Activates a vring for the given `queue`.256fn activate_vring(257&mut self,258mem: &GuestMemory,259queue_index: usize,260queue: &Queue,261irqfd: &Event,262) -> Result<()> {263self.backend_client264.set_vring_num(queue_index, queue.size())265.map_err(Error::SetVringNum)?;266267let config_data = VringConfigData {268queue_size: queue.size(),269flags: 0u32,270desc_table_addr: mem271.get_host_address(queue.desc_table())272.map_err(Error::GetHostAddress)? as u64,273used_ring_addr: mem274.get_host_address(queue.used_ring())275.map_err(Error::GetHostAddress)? as u64,276avail_ring_addr: mem277.get_host_address(queue.avail_ring())278.map_err(Error::GetHostAddress)? as u64,279log_addr: None,280};281self.backend_client282.set_vring_addr(queue_index, &config_data)283.map_err(Error::SetVringAddr)?;284285self.backend_client286.set_vring_base(queue_index, queue.next_avail_to_process())287.map_err(Error::SetVringBase)?;288289self.backend_client290.set_vring_call(queue_index, irqfd)291.map_err(Error::SetVringCall)?;292self.backend_client293.set_vring_kick(queue_index, queue.event())294.map_err(Error::SetVringKick)?;295296// Per protocol documentation, `VHOST_USER_SET_VRING_ENABLE` should be sent only when297// `VHOST_USER_F_PROTOCOL_FEATURES` has been negotiated.298if self.acked_features & 1 << VHOST_USER_F_PROTOCOL_FEATURES != 0 {299self.backend_client300.set_vring_enable(queue_index, true)301.map_err(Error::SetVringEnable)?;302}303304Ok(())305}306307/// Stops the vring for the given `queue`, returning its base index.308fn deactivate_vring(&self, queue_index: usize) -> Result<u16> {309if self.acked_features & 1 << VHOST_USER_F_PROTOCOL_FEATURES != 0 {310self.backend_client311.set_vring_enable(queue_index, false)312.map_err(Error::SetVringEnable)?;313}314315let vring_base = self316.backend_client317.get_vring_base(queue_index)318.map_err(Error::GetVringBase)?;319320vring_base321.try_into()322.map_err(|_| Error::VringBaseTooBig(vring_base))323}324325/// Helper to start up the worker thread that will be used with handling interrupts and requests326/// from the device process.327fn start_worker(&mut self, interrupt: Interrupt, non_msix_evt: Event) {328assert!(329self.worker_thread.is_none(),330"BUG: attempted to start worker twice"331);332333let label = self.debug_label();334335let mut backend_req_handler = self.backend_req_handler.take();336if let Some(handler) = &mut backend_req_handler {337// Using unwrap here to get the mutex protected value338handler.frontend_mut().set_interrupt(interrupt.clone());339}340341let backend_client_read_notifier =342SafeDescriptor::try_from(self.backend_client.get_read_notifier())343.expect("failed to get backend read notifier");344#[cfg(windows)]345let backend_client_close_notifier =346SafeDescriptor::try_from(self.backend_client.get_close_notifier())347.expect("failed to get backend close notifier");348349let vm_evt_wrtube = self350.vm_evt_wrtube351.try_clone()352.expect("failed to clone vm_evt_wrtube");353354self.worker_thread = Some(WorkerThread::start(label.clone(), move |kill_evt| {355let mut worker = Worker {356kill_evt,357non_msix_evt,358backend_req_handler,359backend_client_read_notifier,360#[cfg(windows)]361backend_client_close_notifier,362};363if let Err(e) = worker364.run(interrupt)365.with_context(|| format!("{label}: vhost_user_frontend worker failed"))366{367error!("vhost-user worker thread exited with an error: {:#}", e);368369if let Err(e) = vm_evt_wrtube.send(&base::VmEventType::DeviceCrashed) {370error!("failed to send crash event: {}", e);371}372}373(worker.backend_req_handler, vm_evt_wrtube)374}));375}376}377378impl VirtioDevice for VhostUserFrontend {379// Override the default debug label to differentiate vhost-user devices from virtio.380fn debug_label(&self) -> String {381format!("vu-{}", self.device_type())382}383384fn keep_rds(&self) -> Vec<RawDescriptor> {385Vec::new()386}387388fn device_type(&self) -> DeviceType {389self.device_type390}391392fn queue_max_sizes(&self) -> &[u16] {393&self.queue_sizes394}395396fn features(&self) -> u64 {397self.avail_features398}399400fn ack_features(&mut self, features: u64) {401self.acked_features |= features & self.avail_features;402}403404fn read_config(&self, offset: u64, data: &mut [u8]) {405let Ok(offset) = offset.try_into() else {406error!("failed to read config: invalid config offset is given: {offset}");407return;408};409let Ok(data_len) = data.len().try_into() else {410error!(411"failed to read config: invalid config length is given: {}",412data.len()413);414return;415};416let (_, config) = match self.backend_client.get_config(417offset,418data_len,419VhostUserConfigFlags::WRITABLE,420data,421) {422Ok(x) => x,423Err(e) => {424error!("failed to read config: {}", Error::GetConfig(e));425return;426}427};428data.copy_from_slice(&config);429}430431fn write_config(&mut self, offset: u64, data: &[u8]) {432let Ok(offset) = offset.try_into() else {433error!("failed to write config: invalid config offset is given: {offset}");434return;435};436if let Err(e) = self437.backend_client438.set_config(offset, VhostUserConfigFlags::empty(), data)439.map_err(Error::SetConfig)440{441error!("failed to write config: {}", e);442}443}444445fn activate(446&mut self,447mem: GuestMemory,448interrupt: Interrupt,449queues: BTreeMap<usize, Queue>,450) -> anyhow::Result<()> {451if self.last_acked_features != self.acked_features {452self.backend_client453.set_features(self.acked_features)454.map_err(Error::SetFeatures)?;455self.last_acked_features = self.acked_features;456}457458self.set_mem_table(&mem)?;459460let msix_config_opt = interrupt461.get_msix_config()462.as_ref()463.ok_or(Error::MsixConfigUnavailable)?;464let msix_config = msix_config_opt.lock();465466let non_msix_evt = Event::new().map_err(Error::CreateEvent)?;467for (&queue_index, queue) in queues.iter() {468let irqfd = msix_config469.get_irqfd(queue.vector() as usize)470.unwrap_or(&non_msix_evt);471self.activate_vring(&mem, queue_index, queue, irqfd)?;472}473474self.sent_queues = Some(queues);475476drop(msix_config);477478self.start_worker(interrupt, non_msix_evt);479Ok(())480}481482fn reset(&mut self) -> anyhow::Result<()> {483// TODO: Reset SHMEM_MAP mappings. The vhost-user spec says "mappings are automatically484// unmapped by the front-end across device reset operation".485486if let Some(sent_queues) = self.sent_queues.take() {487for queue_index in sent_queues.into_keys() {488let _vring_base = self489.deactivate_vring(queue_index)490.context("deactivate_vring failed during reset")?;491}492}493494if let Some(w) = self.worker_thread.take() {495let (backend_req_handler, vm_evt_wrtube) = w.stop();496self.backend_req_handler = backend_req_handler;497self.vm_evt_wrtube = vm_evt_wrtube;498}499500Ok(())501}502503fn pci_address(&self) -> Option<PciAddress> {504self.pci_address505}506507fn get_shared_memory_region(&self) -> Option<SharedMemoryRegion> {508if !self509.protocol_features510.contains(VhostUserProtocolFeatures::SHMEM_MAP)511{512return None;513}514if let Some(r) = self.shmem_region.borrow().as_ref() {515return *r;516}517let regions = match self518.backend_client519.get_shmem_config()520.map_err(Error::ShmemRegions)521{522Ok(x) => x,523Err(e) => {524error!("Failed to get shared memory config {}", e);525return None;526}527};528let region = match regions.len() {5290 => None,5301 => Some(regions[0]),531n => {532error!(533"Failed to get shared memory region {}",534Error::TooManyShmemRegions(n)535);536return None;537}538};539*self.shmem_region.borrow_mut() = Some(region);540region541}542543fn set_shared_memory_mapper(&mut self, mapper: Box<dyn SharedMemoryMapper>) {544// Return error if backend request handler is not available. This indicates545// that `VhostUserProtocolFeatures::BACKEND_REQ` is not negotiated.546let Some(backend_req_handler) = self.backend_req_handler.as_mut() else {547error!(548"Error setting shared memory mapper {}",549Error::ProtocolFeatureNotNegoiated(VhostUserProtocolFeatures::BACKEND_REQ)550);551return;552};553554// The virtio framework will only call this if get_shared_memory_region returned a region555let shmid = self556.shmem_region557.borrow()558.flatten()559.expect("missing shmid")560.id;561562backend_req_handler563.frontend_mut()564.set_shared_mapper_state(mapper, shmid);565}566567fn expose_shmem_descriptors_with_viommu(&self) -> bool {568self.expose_shmem_descriptors_with_viommu569}570571fn virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>> {572let Some(mut queues) = self.sent_queues.take() else {573return Ok(None);574};575576for (&queue_index, queue) in queues.iter_mut() {577let vring_base = self578.deactivate_vring(queue_index)579.context("deactivate_vring failed during sleep")?;580queue.vhost_user_reclaim(vring_base);581}582583if let Some(w) = self.worker_thread.take() {584let (backend_req_handler, vm_evt_wrtube) = w.stop();585self.backend_req_handler = backend_req_handler;586self.vm_evt_wrtube = vm_evt_wrtube;587}588589Ok(Some(queues))590}591592fn virtio_wake(593&mut self,594queues_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>,595) -> anyhow::Result<()> {596if let Some((mem, interrupt, queues)) = queues_state {597self.activate(mem, interrupt, queues)?;598}599Ok(())600}601602fn virtio_snapshot(&mut self) -> anyhow::Result<AnySnapshot> {603if !self604.protocol_features605.contains(VhostUserProtocolFeatures::DEVICE_STATE)606{607bail!("snapshot requires VHOST_USER_PROTOCOL_F_DEVICE_STATE");608}609// Send the backend an FD to write the device state to. If it gives us an FD back, then610// we need to read from that instead.611let (mut r, w) = new_pipe_pair()?;612let backend_r = self613.backend_client614.set_device_state_fd(615VhostUserTransferDirection::Save,616VhostUserMigrationPhase::Stopped,617&w,618)619.context("failed to negotiate device state fd")?;620// EOF signals end of the device state bytes, so it is important to close our copy of621// the write FD before we start reading.622std::mem::drop(w);623// Read the device state.624let mut snapshot_bytes = Vec::new();625if let Some(mut backend_r) = backend_r {626backend_r.read_to_end(&mut snapshot_bytes)627} else {628r.read_to_end(&mut snapshot_bytes)629}630.context("failed to read device state")?;631// Call `check_device_state` to ensure the data transfer was successful.632self.backend_client633.check_device_state()634.context("failed to transfer device state")?;635Ok(AnySnapshot::to_any(VhostUserDeviceState {636acked_features: self.acked_features,637backend_state: snapshot_bytes,638})639.map_err(Error::SliceToSerdeValue)?)640}641642fn virtio_restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {643if !self644.protocol_features645.contains(VhostUserProtocolFeatures::DEVICE_STATE)646{647bail!("restore requires VHOST_USER_PROTOCOL_F_DEVICE_STATE");648}649650let device_state: VhostUserDeviceState =651AnySnapshot::from_any(data).map_err(Error::SerdeValueToSlice)?;652653// Restore and negotiate features before restoring backend state.654let missing_features = !self.avail_features & device_state.acked_features;655if missing_features != 0 {656bail!("The destination backend doesn't support all features acknowledged by the source, missing: {}", missing_features);657}658self.acked_features = device_state.acked_features;659if self.last_acked_features != self.acked_features {660self.backend_client661.set_features(self.acked_features)662.map_err(Error::SetFeatures)?;663self.last_acked_features = self.acked_features;664}665666// Send the backend an FD to read the device state from. If it gives us an FD back,667// then we need to write to that instead.668let (r, w) = new_pipe_pair()?;669let backend_w = self670.backend_client671.set_device_state_fd(672VhostUserTransferDirection::Load,673VhostUserMigrationPhase::Stopped,674&r,675)676.context("failed to negotiate device state fd")?;677// Write the device state.678{679// EOF signals the end of the device state bytes, so we need to ensure the write680// objects are dropped before the `check_device_state` call. Done here by moving681// them into this scope.682let backend_w = backend_w;683let mut w = w;684if let Some(mut backend_w) = backend_w {685backend_w.write_all(device_state.backend_state.as_slice())686} else {687w.write_all(device_state.backend_state.as_slice())688}689.context("failed to write device state")?;690}691// Call `check_device_state` to ensure the data transfer was successful.692self.backend_client693.check_device_state()694.context("failed to transfer device state")?;695Ok(())696}697}698699#[derive(serde::Serialize, serde::Deserialize, Debug)]700struct VhostUserDeviceState {701acked_features: u64,702backend_state: Vec<u8>,703}704705#[cfg(unix)]706fn new_pipe_pair() -> anyhow::Result<(impl AsRawDescriptor + Read, impl AsRawDescriptor + Write)> {707base::pipe().context("failed to create pipe")708}709710#[cfg(windows)]711fn new_pipe_pair() -> anyhow::Result<(impl AsRawDescriptor + Read, impl AsRawDescriptor + Write)> {712base::named_pipes::pair(713&base::named_pipes::FramingMode::Byte,714&base::named_pipes::BlockingMode::Wait,715/* timeout= */ 0,716)717.context("failed to create named pipes")718}719720721