Path: blob/main/devices/src/virtio/vhost_user_backend/fs/sys/linux.rs
5394 views
// Copyright 2022 The ChromiumOS Authors1// Use of this source code is governed by a BSD-style license that can be2// found in the LICENSE file.34use std::os::unix::process::ExitStatusExt;5use std::path::Path;6use std::path::PathBuf;78use anyhow::bail;9use anyhow::Context;10use base::error;11use base::linux::max_open_files;12use base::sys::wait_for_pid;13use base::AsRawDescriptor;14use base::AsRawDescriptors;15use base::RawDescriptor;16use cros_async::Executor;17use jail::create_base_minijail;18use jail::create_base_minijail_without_pivot_root;19use minijail::Minijail;2021use crate::virtio::vhost_user_backend::fs::FsBackend;22use crate::virtio::vhost_user_backend::fs::Options;23use crate::virtio::vhost_user_backend::BackendConnection;2425fn default_uidmap() -> String {26// SAFETY: trivially safe27let euid = unsafe { libc::geteuid() };28format!("{euid} {euid} 1")29}3031fn default_gidmap() -> String {32// SAFETY: trivially safe33let egid = unsafe { libc::getegid() };34format!("{egid} {egid} 1")35}3637#[allow(clippy::unnecessary_cast)]38fn jail_and_fork(39mut keep_rds: Vec<RawDescriptor>,40dir_path: PathBuf,41uid: u32,42gid: u32,43uid_map: Option<String>,44gid_map: Option<String>,45disable_sandbox: bool,46pivot_root: bool,47) -> anyhow::Result<i32> {48let limit = max_open_files()49.context("failed to get max open files")?50.rlim_max;51// Create new minijail sandbox52let jail = if disable_sandbox {53if pivot_root {54create_base_minijail(dir_path.as_path(), limit)55} else {56create_base_minijail_without_pivot_root(dir_path.as_path(), limit)57}?58} else {59let mut j: Minijail = Minijail::new()?;6061j.namespace_pids();62j.namespace_user();63j.namespace_user_disable_setgroups();64if uid != 0 {65j.change_uid(uid);66}67if gid != 0 {68j.change_gid(gid);69}70j.uidmap(&uid_map.unwrap_or_else(default_uidmap))?;71j.gidmap(&gid_map.unwrap_or_else(default_gidmap))?;72j.run_as_init();7374j.namespace_vfs();75j.namespace_net();76j.no_new_privs();7778// Only pivot_root if we are not re-using the current root directory.79if dir_path != Path::new("/") {80// It's safe to call `namespace_vfs` multiple times.81j.namespace_vfs();82j.enter_pivot_root(&dir_path)?;83}84j.set_remount_mode(libc::MS_SLAVE);8586j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)?;87// vvu locks around 512k memory. Just give 1M.88j.set_rlimit(libc::RLIMIT_MEMLOCK as i32, 1 << 20, 1 << 20)?;89#[cfg(not(feature = "seccomp_trace"))]90jail::set_embedded_bpf_program(&mut j, "fs_device_vhost_user")?;91j.use_seccomp_filter();92j93};9495// Make sure there are no duplicates in keep_rds96keep_rds.sort_unstable();97keep_rds.dedup();9899// fork on the jail here100// SAFETY: trivially safe101let pid = unsafe { jail.fork(Some(&keep_rds))? };102103if pid > 0 {104// Current FS driver jail does not use seccomp and jail_and_fork() does not have other105// users, so we do nothing here for seccomp_trace106// SAFETY: trivially safe107unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) };108}109110if pid < 0 {111bail!("Fork error! {}", std::io::Error::last_os_error());112}113114Ok(pid)115}116117/// Starts a vhost-user fs device.118/// Returns an error if the given `args` is invalid or the device fails to run.119#[allow(unused_mut)]120pub fn start_device(mut opts: Options) -> anyhow::Result<()> {121#[allow(unused_mut)]122let mut is_pivot_root_required = true;123#[cfg(feature = "fs_runtime_ugid_map")]124if let Some(ref mut cfg) = opts.cfg {125if !cfg.ugid_map.is_empty() && (!opts.disable_sandbox || !opts.skip_pivot_root) {126bail!("uid_gid_map can only be set with disable sandbox and skip_pivot_root option");127}128129if cfg.unmap_guest_memory_on_fork && !opts.disable_sandbox {130bail!("unmap_guest_memory_on_fork requires --disable-sandbox");131}132133if opts.skip_pivot_root {134is_pivot_root_required = false;135}136}137let ex = Executor::new().context("Failed to create executor")?;138let fs_device = FsBackend::new(139&opts.tag,140opts.shared_dir141.to_str()142.expect("Failed to convert opts.shared_dir to str()"),143opts.skip_pivot_root,144opts.cfg,145)?;146147let mut keep_rds = fs_device.keep_rds.clone();148keep_rds.append(&mut ex.as_raw_descriptors());149150let conn =151BackendConnection::from_opts(opts.socket.as_deref(), opts.socket_path.as_deref(), opts.fd)?;152keep_rds.push(conn.as_raw_descriptor());153154base::syslog::push_descriptors(&mut keep_rds);155cros_tracing::push_descriptors!(&mut keep_rds);156metrics::push_descriptors(&mut keep_rds);157let pid = jail_and_fork(158keep_rds,159opts.shared_dir,160opts.uid,161opts.gid,162opts.uid_map,163opts.gid_map,164opts.disable_sandbox,165is_pivot_root_required,166)?;167168match pid {1690 => {170// Child process runs the device and exits, not returns.171if let Err(e) = ex.run_until(conn.run_backend(fs_device, &ex)) {172error!("Error in vhost-user-fs device: {:#}", e);173std::process::exit(1);174}175std::process::exit(0);176}177pid if pid < 0 => {178unreachable!("fork error must have been handled in jail_and_fork()");179}180_ => {181// fs_device is not needed in the parent process.182drop(fs_device);183184let (_child_pid, status) =185wait_for_pid(pid, 0).context("failed to wait for child process")?;186if let Some(signal) = status.signal() {187panic!("Child process {pid} was killed by signal {signal}");188}189if let Some(exit_code) = status.code() {190if exit_code != 0 {191bail!("Child process {pid} exited with code {exit_code}");192}193}194}195};196Ok(())197}198199200