#![deny(missing_docs)]
#![allow(dead_code)]
use std::path::Path;
use std::str;
use std::sync::LazyLock;
use anyhow::bail;
use anyhow::Context;
use anyhow::Result;
#[cfg(feature = "seccomp_trace")]
use base::debug;
use base::getegid;
use base::geteuid;
#[cfg(feature = "seccomp_trace")]
use base::warn;
use libc::c_ulong;
use minijail::Minijail;
#[cfg(feature = "seccomp_trace")]
use static_assertions::const_assert;
#[cfg(feature = "seccomp_trace")]
use zerocopy::Immutable;
#[cfg(feature = "seccomp_trace")]
use zerocopy::IntoBytes;
use crate::config::JailConfig;
static EMBEDDED_BPFS: LazyLock<std::collections::HashMap<&str, Vec<u8>>> =
LazyLock::new(|| include!(concat!(env!("OUT_DIR"), "/bpf_includes.in")));
pub const MAX_OPEN_FILES_DEFAULT: u64 = 4096;
const MAX_OPEN_FILES_FOR_GPU: u64 = 32768;
pub const MAX_OPEN_FILES_FOR_JAIL_WARDEN: u64 = 65536;
pub enum RunAsUser {
Unspecified,
CurrentUser,
Root,
Specified(u32, u32),
}
pub struct SandboxConfig<'a> {
pub limit_caps: bool,
log_failures: bool,
seccomp_policy_dir: Option<&'a Path>,
seccomp_policy_name: &'a str,
pub ugid_map: Option<(&'a str, &'a str)>,
pub remount_mode: Option<c_ulong>,
pub namespace_net: bool,
pub bind_mounts: bool,
pub run_as: RunAsUser,
}
impl<'a> SandboxConfig<'a> {
pub fn new(jail_config: &'a JailConfig, policy: &'a str) -> Self {
Self {
limit_caps: true,
log_failures: jail_config.seccomp_log_failures,
seccomp_policy_dir: jail_config.seccomp_policy_dir.as_ref().map(Path::new),
seccomp_policy_name: policy,
ugid_map: None,
remount_mode: None,
namespace_net: true,
bind_mounts: false,
run_as: RunAsUser::Unspecified,
}
}
}
pub struct ScopedMinijail(pub Minijail);
impl Drop for ScopedMinijail {
fn drop(&mut self) {
let _ = self.0.kill();
}
}
#[allow(clippy::unnecessary_cast)]
pub fn create_base_minijail(root: &Path, max_open_files: u64) -> Result<Minijail> {
if !root.is_dir() {
bail!("{:?} is not a directory, cannot create jail", root);
}
if !root.is_absolute() {
bail!("{:?} is not absolute path", root);
}
let mut jail = Minijail::new().context("failed to jail device")?;
if root != Path::new("/") {
jail.namespace_vfs();
jail.enter_pivot_root(root)
.context("failed to pivot root device")?;
}
jail.set_rlimit(libc::RLIMIT_NOFILE as i32, max_open_files, max_open_files)
.context("error setting max open files")?;
Ok(jail)
}
#[allow(clippy::unnecessary_cast)]
pub fn create_base_minijail_without_pivot_root(
root: &Path,
max_open_files: u64,
) -> Result<Minijail> {
if !root.is_dir() {
bail!("{:?} is not a directory, cannot create jail", root);
}
if !root.is_absolute() {
bail!("{:?} is not absolute path", root);
}
let mut jail = Minijail::new().context("failed to jail device")?;
jail.set_rlimit(libc::RLIMIT_NOFILE as i32, max_open_files, max_open_files)
.context("error setting max open files")?;
Ok(jail)
}
pub fn create_sandbox_minijail(
root: &Path,
max_open_files: u64,
config: &SandboxConfig,
) -> Result<Minijail> {
let mut jail = create_base_minijail(root, max_open_files)?;
jail.namespace_pids();
jail.namespace_user();
jail.namespace_user_disable_setgroups();
if config.limit_caps {
jail.use_caps(0);
}
match config.run_as {
RunAsUser::Unspecified => {
if config.bind_mounts && config.ugid_map.is_none() {
add_current_user_to_jail(&mut jail)?;
}
}
RunAsUser::CurrentUser => {
add_current_user_to_jail(&mut jail)?;
}
RunAsUser::Root => {
let crosvm_uid = geteuid();
let crosvm_gid = getegid();
jail.uidmap(&format!("0 {crosvm_uid} 1"))
.context("error setting UID map")?;
jail.gidmap(&format!("0 {crosvm_gid} 1"))
.context("error setting GID map")?;
}
RunAsUser::Specified(uid, gid) => {
if uid != 0 {
jail.change_uid(uid)
}
if gid != 0 {
jail.change_gid(gid)
}
}
}
if config.bind_mounts {
jail.mount_with_data(
Path::new("none"),
Path::new("/"),
"tmpfs",
(libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
"size=67108864",
)?;
}
if let Some((uid_map, gid_map)) = config.ugid_map {
jail.uidmap(uid_map).context("error setting UID map")?;
jail.gidmap(gid_map).context("error setting GID map")?;
}
jail.namespace_vfs();
if config.namespace_net {
jail.namespace_net();
}
jail.no_new_privs();
#[cfg(feature = "seccomp_trace")]
{
#[repr(C)]
#[derive(Immutable, IntoBytes)]
struct sock_filter {
code: u16,
jt: u8,
jf: u8,
k: u32,
}
const SECCOMP_RET_TRACE: u32 = 0x7ff00000;
const SECCOMP_RET_LOG: u32 = 0x7ffc0000;
const BPF_RET: u16 = 0x06;
const BPF_K: u16 = 0x00;
const FILTER_RET_LOG_BLOCK: sock_filter = sock_filter {
code: BPF_RET | BPF_K,
jt: 0,
jf: 0,
k: SECCOMP_RET_LOG,
};
warn!("The running crosvm is compiled with seccomp_trace feature, and is striclty used for debugging purpose only. DO NOT USE IN PRODUCTION!!!");
debug!(
"seccomp_trace {{\"event\": \"minijail_create\", \"name\": \"{}\", \"jail_addr\": \"0x{:x}\"}}",
config.seccomp_policy_name,
read_jail_addr(&jail),
);
jail.parse_seccomp_bytes(FILTER_RET_LOG_BLOCK.as_bytes())
.unwrap();
}
#[cfg(not(feature = "seccomp_trace"))]
if let Some(seccomp_policy_dir) = config.seccomp_policy_dir {
let seccomp_policy_path = seccomp_policy_dir.join(config.seccomp_policy_name);
let bpf_policy_file = seccomp_policy_path.with_extension("bpf");
if bpf_policy_file.exists() && !config.log_failures {
jail.parse_seccomp_program(&bpf_policy_file)
.with_context(|| {
format!(
"failed to parse precompiled seccomp policy: {}",
bpf_policy_file.display()
)
})?;
} else {
jail.set_seccomp_filter_tsync();
if config.log_failures {
jail.log_seccomp_filter_failures();
}
let bpf_policy_file = seccomp_policy_path.with_extension("policy");
jail.parse_seccomp_filters(&bpf_policy_file)
.with_context(|| {
format!(
"failed to parse seccomp policy: {}",
bpf_policy_file.display()
)
})?;
}
} else {
set_embedded_bpf_program(&mut jail, config.seccomp_policy_name)?;
}
jail.use_seccomp_filter();
jail.run_as_init();
if let Some(mode) = config.remount_mode {
jail.set_remount_mode(mode);
}
Ok(jail)
}
pub fn simple_jail(jail_config: Option<&JailConfig>, policy: &str) -> Result<Option<Minijail>> {
if let Some(jail_config) = jail_config {
let config = SandboxConfig::new(jail_config, policy);
Ok(Some(create_sandbox_minijail(
&jail_config.pivot_root,
MAX_OPEN_FILES_DEFAULT,
&config,
)?))
} else {
Ok(None)
}
}
pub fn create_gpu_minijail(
root: &Path,
config: &SandboxConfig,
render_node_only: bool,
snapshot_scratch_directory: Option<&Path>,
) -> Result<Minijail> {
let mut jail = create_sandbox_minijail(root, MAX_OPEN_FILES_FOR_GPU, config)?;
let sys_dev_char_path = Path::new("/sys/dev/char");
jail.mount_bind(sys_dev_char_path, sys_dev_char_path, false)?;
let sys_cpuset_path = Path::new("/sys/fs/cgroup/cpuset");
if sys_cpuset_path.exists() {
jail.mount_bind(sys_cpuset_path, sys_cpuset_path, true)?;
}
let sys_devices_path = Path::new("/sys/devices");
jail.mount_bind(sys_devices_path, sys_devices_path, false)?;
jail_mount_bind_drm(&mut jail, render_node_only)?;
let mali0_path = Path::new("/dev/mali0");
if mali0_path.exists() {
jail.mount_bind(mali0_path, mali0_path, true)?;
}
let pvr_sync_path = Path::new("/dev/pvr_sync");
if pvr_sync_path.exists() {
jail.mount_bind(pvr_sync_path, pvr_sync_path, true)?;
}
let udmabuf_path = Path::new("/dev/udmabuf");
if udmabuf_path.exists() {
jail.mount_bind(udmabuf_path, udmabuf_path, true)?;
}
jail_mount_bind_if_exists(
&mut jail,
&[
"/usr/lib",
"/usr/lib64",
"/lib",
"/lib64",
"/usr/share/drirc.d",
"/usr/share/glvnd",
"/usr/share/libdrm",
"/usr/share/vulkan",
],
)?;
mount_proc(&mut jail)?;
let perfetto_path = Path::new("/run/perfetto");
if perfetto_path.exists() {
jail.mount_bind(perfetto_path, perfetto_path, true)?;
}
if let Some(snapshot_scratch_directory) = snapshot_scratch_directory {
jail.mount_with_data(
Path::new("none"),
snapshot_scratch_directory,
"tmpfs",
(libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
"size=4294967296",
)?;
}
Ok(jail)
}
pub fn jail_mount_bind_drm(jail: &mut Minijail, render_node_only: bool) -> Result<()> {
if render_node_only {
const DRM_NUM_NODES: u32 = 63;
const DRM_RENDER_NODE_START: u32 = 128;
for offset in 0..DRM_NUM_NODES {
let path_str = format!("/dev/dri/renderD{}", DRM_RENDER_NODE_START + offset);
let drm_dri_path = Path::new(&path_str);
if !drm_dri_path.exists() {
break;
}
jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
}
} else {
let drm_dri_path = Path::new("/dev/dri");
if drm_dri_path.exists() {
jail.mount_bind(drm_dri_path, drm_dri_path, false)?;
}
}
Ok(())
}
pub fn jail_mount_bind_if_exists<P: AsRef<std::ffi::OsStr>>(
jail: &mut Minijail,
dirs: &[P],
) -> Result<()> {
for dir in dirs {
let dir_path = Path::new(dir);
if dir_path.exists() {
jail.mount_bind(dir_path, dir_path, false)?;
}
}
Ok(())
}
pub fn mount_proc(jail: &mut Minijail) -> Result<()> {
jail.mount(
Path::new("proc"),
Path::new("/proc"),
"proc",
(libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY) as usize,
)?;
Ok(())
}
#[cfg(feature = "seccomp_trace")]
pub fn read_jail_addr(jail: &Minijail) -> usize {
const_assert!(std::mem::size_of::<Minijail>() >= std::mem::size_of::<usize>());
unsafe { *(jail as *const Minijail as *const usize) }
}
fn add_current_user_to_jail(jail: &mut Minijail) -> Result<()> {
let crosvm_uid = geteuid();
let crosvm_gid = getegid();
jail.uidmap(&format!("{crosvm_uid} {crosvm_uid} 1"))
.context("error setting UID map")?;
jail.gidmap(&format!("{crosvm_gid} {crosvm_gid} 1"))
.context("error setting GID map")?;
if crosvm_uid != 0 {
jail.change_uid(crosvm_uid);
}
if crosvm_gid != 0 {
jail.change_gid(crosvm_gid);
}
Ok(())
}
pub fn set_embedded_bpf_program(jail: &mut Minijail, seccomp_policy_name: &str) -> Result<()> {
let bpf_program = EMBEDDED_BPFS.get(seccomp_policy_name).with_context(|| {
format!("failed to find embedded seccomp policy: {seccomp_policy_name}")
})?;
jail.parse_seccomp_bytes(bpf_program).with_context(|| {
format!("failed to parse embedded seccomp policy: {seccomp_policy_name}")
})?;
Ok(())
}