#[cfg(target_os = "android")]
mod android;
#[cfg(target_os = "android")]
use android as target_os;
#[cfg(target_os = "linux")]
#[allow(clippy::module_inception)]
mod linux;
#[cfg(target_os = "linux")]
use linux as target_os;
use log::warn;
#[macro_use]
pub mod ioctl;
#[macro_use]
pub mod syslog;
mod acpi_event;
mod capabilities;
mod descriptor;
mod event;
mod file;
mod file_traits;
mod mmap;
mod net;
mod netlink;
mod notifiers;
pub mod platform_timer_resolution;
mod poll;
mod priority;
mod sched;
mod shm;
pub mod signal;
mod signalfd;
mod terminal;
mod timer;
pub mod vsock;
mod write_zeroes;
use std::ffi::CString;
use std::fs::remove_file;
use std::fs::File;
use std::fs::OpenOptions;
use std::mem;
use std::mem::MaybeUninit;
use std::ops::Deref;
use std::os::unix::io::FromRawFd;
use std::os::unix::io::RawFd;
use std::os::unix::net::UnixDatagram;
use std::os::unix::net::UnixListener;
use std::os::unix::process::ExitStatusExt;
use std::path::Path;
use std::path::PathBuf;
use std::process::ExitStatus;
use std::ptr;
use std::sync::OnceLock;
use std::time::Duration;
pub use acpi_event::*;
pub use capabilities::drop_capabilities;
pub use event::EventExt;
pub(crate) use event::PlatformEvent;
pub use file::find_next_data;
pub use file::FileDataIterator;
pub(crate) use file_traits::lib::*;
pub use ioctl::*;
use libc::c_int;
use libc::c_long;
use libc::fcntl;
use libc::pipe2;
use libc::prctl;
use libc::syscall;
use libc::waitpid;
use libc::SYS_getpid;
use libc::SYS_getppid;
use libc::SYS_gettid;
use libc::EINVAL;
use libc::O_CLOEXEC;
use libc::PR_SET_NAME;
use libc::SIGKILL;
use libc::WNOHANG;
pub use mmap::*;
pub(in crate::sys) use net::sendmsg_nosignal as sendmsg;
pub(in crate::sys) use net::sockaddr_un;
pub(in crate::sys) use net::sockaddrv4_to_lib_c;
pub(in crate::sys) use net::sockaddrv6_to_lib_c;
pub use netlink::*;
pub use poll::EventContext;
pub use priority::*;
pub use sched::*;
pub use shm::MemfdSeals;
pub use shm::SharedMemoryLinux;
pub use signal::*;
pub use signalfd::Error as SignalFdError;
pub use signalfd::*;
pub use terminal::*;
pub(crate) use write_zeroes::file_punch_hole;
pub(crate) use write_zeroes::file_write_zeroes_at;
use crate::descriptor::FromRawDescriptor;
use crate::descriptor::SafeDescriptor;
pub use crate::errno::Error;
pub use crate::errno::Result;
pub use crate::errno::*;
use crate::number_of_logical_cores;
use crate::round_up_to_page_size;
pub use crate::sys::unix::descriptor::*;
use crate::syscall;
use crate::AsRawDescriptor;
use crate::Pid;
pub type Uid = libc::uid_t;
pub type Gid = libc::gid_t;
pub type Mode = libc::mode_t;
const CPU_DIR: &str = "/sys/devices/system/cpu";
#[inline(always)]
pub fn set_thread_name(name: &str) -> Result<()> {
let name = CString::new(name).or(Err(Error::new(EINVAL)))?;
let ret = unsafe { prctl(PR_SET_NAME, name.as_c_str()) };
if ret == 0 {
Ok(())
} else {
errno_result()
}
}
#[inline(always)]
pub fn getpid() -> Pid {
unsafe { syscall(SYS_getpid as c_long) as Pid }
}
#[inline(always)]
pub fn getppid() -> Pid {
unsafe { syscall(SYS_getppid as c_long) as Pid }
}
pub fn gettid() -> Pid {
unsafe { syscall(SYS_gettid as c_long) as Pid }
}
#[inline(always)]
pub fn geteuid() -> Uid {
unsafe { libc::geteuid() }
}
#[inline(always)]
pub fn getegid() -> Gid {
unsafe { libc::getegid() }
}
pub enum FlockOperation {
LockShared,
LockExclusive,
Unlock,
}
#[inline(always)]
pub fn flock<F: AsRawDescriptor>(file: &F, op: FlockOperation, nonblocking: bool) -> Result<()> {
let mut operation = match op {
FlockOperation::LockShared => libc::LOCK_SH,
FlockOperation::LockExclusive => libc::LOCK_EX,
FlockOperation::Unlock => libc::LOCK_UN,
};
if nonblocking {
operation |= libc::LOCK_NB;
}
syscall!(unsafe { libc::flock(file.as_raw_descriptor(), operation) }).map(|_| ())
}
pub enum FallocateMode {
PunchHole,
ZeroRange,
Allocate,
}
impl From<FallocateMode> for i32 {
fn from(value: FallocateMode) -> Self {
match value {
FallocateMode::Allocate => libc::FALLOC_FL_KEEP_SIZE,
FallocateMode::PunchHole => libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
FallocateMode::ZeroRange => libc::FALLOC_FL_ZERO_RANGE | libc::FALLOC_FL_KEEP_SIZE,
}
}
}
impl From<FallocateMode> for u32 {
fn from(value: FallocateMode) -> Self {
Into::<i32>::into(value) as u32
}
}
pub fn fallocate<F: AsRawDescriptor>(
file: &F,
mode: FallocateMode,
offset: u64,
len: u64,
) -> Result<()> {
let offset = if offset > libc::off64_t::MAX as u64 {
return Err(Error::new(libc::EINVAL));
} else {
offset as libc::off64_t
};
let len = if len > libc::off64_t::MAX as u64 {
return Err(Error::new(libc::EINVAL));
} else {
len as libc::off64_t
};
syscall!(unsafe { libc::fallocate64(file.as_raw_descriptor(), mode.into(), offset, len) })
.map(|_| ())
}
pub fn fstat<F: AsRawDescriptor>(f: &F) -> Result<libc::stat64> {
let mut st = MaybeUninit::<libc::stat64>::zeroed();
syscall!(unsafe { libc::fstat64(f.as_raw_descriptor(), st.as_mut_ptr()) })?;
Ok(unsafe { st.assume_init() })
}
pub fn is_block_file<F: AsRawDescriptor>(file: &F) -> Result<bool> {
let stat = fstat(file)?;
Ok((stat.st_mode & libc::S_IFMT) == libc::S_IFBLK)
}
const BLOCK_IO_TYPE: u32 = 0x12;
ioctl_io_nr!(BLKDISCARD, BLOCK_IO_TYPE, 119);
pub fn discard_block<F: AsRawDescriptor>(file: &F, offset: u64, len: u64) -> Result<()> {
let range: [u64; 2] = [offset, len];
syscall!(unsafe { libc::ioctl(file.as_raw_descriptor(), BLKDISCARD, &range) }).map(|_| ())
}
pub trait AsRawPid {
fn as_raw_pid(&self) -> Pid;
}
impl AsRawPid for Pid {
fn as_raw_pid(&self) -> Pid {
*self
}
}
impl AsRawPid for std::process::Child {
fn as_raw_pid(&self) -> Pid {
self.id() as Pid
}
}
pub fn wait_for_pid<A: AsRawPid>(pid: A, options: c_int) -> Result<(Option<Pid>, ExitStatus)> {
let pid = pid.as_raw_pid();
let mut status: c_int = 1;
let ret = unsafe { libc::waitpid(pid, &mut status, options) };
if ret < 0 {
return errno_result();
}
Ok((
if ret == 0 { None } else { Some(ret) },
ExitStatus::from_raw(status),
))
}
pub fn reap_child() -> Result<Pid> {
let ret = unsafe { waitpid(-1, ptr::null_mut(), WNOHANG) };
if ret == -1 {
errno_result()
} else {
Ok(ret)
}
}
pub fn kill_process_group() -> Result<()> {
unsafe { kill(0, SIGKILL) }?;
unreachable!();
}
pub fn pipe() -> Result<(File, File)> {
let mut pipe_fds = [-1; 2];
let ret = unsafe { pipe2(&mut pipe_fds[0], O_CLOEXEC) };
if ret == -1 {
errno_result()
} else {
Ok(unsafe {
(
File::from_raw_fd(pipe_fds[0]),
File::from_raw_fd(pipe_fds[1]),
)
})
}
}
pub fn set_pipe_size(fd: RawFd, size: usize) -> Result<usize> {
syscall!(unsafe { fcntl(fd, libc::F_SETPIPE_SZ, size as c_int) }).map(|ret| ret as usize)
}
pub fn new_pipe_full() -> Result<(File, File)> {
use std::io::Write;
let (rx, mut tx) = pipe()?;
let page_size = set_pipe_size(tx.as_raw_descriptor(), round_up_to_page_size(1))?;
let buf = vec![0u8; page_size];
tx.write_all(&buf)?;
Ok((rx, tx))
}
pub struct UnlinkUnixDatagram(pub UnixDatagram);
impl AsRef<UnixDatagram> for UnlinkUnixDatagram {
fn as_ref(&self) -> &UnixDatagram {
&self.0
}
}
impl Drop for UnlinkUnixDatagram {
fn drop(&mut self) {
if let Ok(addr) = self.0.local_addr() {
if let Some(path) = addr.as_pathname() {
if let Err(e) = remove_file(path) {
warn!("failed to remove control socket file: {}", e);
}
}
}
}
}
pub struct UnlinkUnixListener(pub UnixListener);
impl AsRef<UnixListener> for UnlinkUnixListener {
fn as_ref(&self) -> &UnixListener {
&self.0
}
}
impl Deref for UnlinkUnixListener {
type Target = UnixListener;
fn deref(&self) -> &UnixListener {
&self.0
}
}
impl Drop for UnlinkUnixListener {
fn drop(&mut self) {
if let Ok(addr) = self.0.local_addr() {
if let Some(path) = addr.as_pathname() {
if let Err(e) = remove_file(path) {
warn!("failed to remove control socket file: {}", e);
}
}
}
}
}
pub fn validate_raw_descriptor(raw_descriptor: RawDescriptor) -> Result<RawDescriptor> {
validate_raw_fd(&raw_descriptor)
}
pub fn validate_raw_fd(raw_fd: &RawFd) -> Result<RawFd> {
let flags = unsafe { libc::fcntl(*raw_fd, libc::F_GETFD) };
if flags < 0 || (flags & libc::FD_CLOEXEC) != 0 {
return Err(Error::new(libc::EBADF));
}
let dup_fd = unsafe { libc::fcntl(*raw_fd, libc::F_DUPFD_CLOEXEC, 0) };
if dup_fd < 0 {
return Err(Error::last());
}
Ok(dup_fd as RawFd)
}
pub fn poll_in<F: AsRawDescriptor>(fd: &F) -> bool {
let mut fds = libc::pollfd {
fd: fd.as_raw_descriptor(),
events: libc::POLLIN,
revents: 0,
};
let ret = unsafe { libc::poll(&mut fds, 1, 0) };
if ret == -1 {
return false;
}
fds.revents & libc::POLLIN != 0
}
pub fn max_timeout() -> Duration {
Duration::new(libc::time_t::MAX as u64, 999999999)
}
pub fn safe_descriptor_from_path<P: AsRef<Path>>(path: P) -> Result<Option<SafeDescriptor>> {
let path = path.as_ref();
if path.parent() == Some(Path::new("/proc/self/fd")) {
let raw_descriptor = path
.file_name()
.and_then(|fd_osstr| fd_osstr.to_str())
.and_then(|fd_str| fd_str.parse::<RawFd>().ok())
.ok_or_else(|| Error::new(EINVAL))?;
let validated_fd = validate_raw_fd(&raw_descriptor)?;
Ok(Some(
unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
))
} else {
Ok(None)
}
}
pub fn safe_descriptor_from_cmdline_fd(fd: &RawFd) -> Result<SafeDescriptor> {
let validated_fd = validate_raw_fd(fd)?;
Ok(
unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
)
}
pub fn open_file_or_duplicate<P: AsRef<Path>>(path: P, options: &OpenOptions) -> Result<File> {
let path = path.as_ref();
Ok(if let Some(fd) = safe_descriptor_from_path(path)? {
fd.into()
} else {
options.open(path)?
})
}
pub fn max_open_files() -> Result<libc::rlimit64> {
let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
if res == 0 {
let limit = unsafe { buf.assume_init() };
Ok(limit)
} else {
errno_result()
}
}
pub fn call_with_extended_max_files<T, E>(
callback: impl FnOnce() -> std::result::Result<T, E>,
) -> Result<std::result::Result<T, E>> {
let cur_limit = max_open_files()?;
let new_limit = libc::rlimit64 {
rlim_cur: cur_limit.rlim_max,
..cur_limit
};
let needs_extension = cur_limit.rlim_cur < new_limit.rlim_cur;
if needs_extension {
set_max_open_files(new_limit)?;
}
let r = callback();
if needs_extension {
set_max_open_files(cur_limit)?;
}
Ok(r)
}
fn set_max_open_files(limit: libc::rlimit64) -> Result<()> {
let res = unsafe { libc::setrlimit64(libc::RLIMIT_NOFILE, &limit) };
if res == 0 {
Ok(())
} else {
errno_result()
}
}
pub fn move_to_cgroup(cgroup_path: PathBuf, id_to_write: Pid, cgroup_file: &str) -> Result<()> {
use std::io::Write;
let gpu_cgroup_file = cgroup_path.join(cgroup_file);
let mut f = File::create(gpu_cgroup_file)?;
f.write_all(id_to_write.to_string().as_bytes())?;
Ok(())
}
pub fn move_task_to_cgroup(cgroup_path: PathBuf, thread_id: Pid) -> Result<()> {
move_to_cgroup(cgroup_path, thread_id, "tasks")
}
pub fn move_proc_to_cgroup(cgroup_path: PathBuf, process_id: Pid) -> Result<()> {
move_to_cgroup(cgroup_path, process_id, "cgroup.procs")
}
fn read_sysfs_cpu_info_in_dir(cpu_dir: &str, cpu_id: usize, property: &str) -> Result<String> {
let path = Path::new(cpu_dir)
.join(format!("cpu{cpu_id}"))
.join(property);
std::fs::read_to_string(path).map_err(|e| e.into())
}
fn parse_sysfs_cpu_info_vec(cpu_id: usize, property: &str) -> Result<Vec<u32>> {
parse_sysfs_cpu_info_vec_in_dir(CPU_DIR, cpu_id, property)
}
fn parse_sysfs_cpu_info_vec_in_dir(
cpu_dir: &str,
cpu_id: usize,
property: &str,
) -> Result<Vec<u32>> {
read_sysfs_cpu_info_in_dir(cpu_dir, cpu_id, property)?
.split_whitespace()
.map(|x| x.parse().map_err(|_| Error::new(libc::EINVAL)))
.collect()
}
pub fn logical_core_frequencies_khz(cpu_id: usize) -> Result<Vec<u32>> {
parse_sysfs_cpu_info_vec(cpu_id, "cpufreq/scaling_available_frequencies")
}
fn parse_sysfs_cpu_info(cpu_id: usize, property: &str) -> Result<u32> {
parse_sysfs_cpu_info_in_dir(CPU_DIR, cpu_id, property)
}
fn parse_sysfs_cpu_info_in_dir(cpu_dir: &str, cpu_id: usize, property: &str) -> Result<u32> {
read_sysfs_cpu_info_in_dir(cpu_dir, cpu_id, property)?
.trim()
.parse()
.map_err(|_| Error::new(libc::EINVAL))
}
pub fn logical_core_capacity(cpu_id: usize) -> Result<u32> {
static CPU_MAX_FREQS: OnceLock<Option<Vec<u32>>> = OnceLock::new();
let cpu_capacity = parse_sysfs_cpu_info(cpu_id, "cpu_capacity")?;
let cpu_max_freqs = CPU_MAX_FREQS.get_or_init(|| {
(0..number_of_logical_cores().ok()?)
.map(|cpu_id| logical_core_max_freq_khz(cpu_id).ok())
.collect()
});
if let Some(cpu_max_freqs) = cpu_max_freqs {
let largest_max_freq = *cpu_max_freqs.iter().max().ok_or(Error::new(EINVAL))?;
let cpu_max_freq = *cpu_max_freqs.get(cpu_id).ok_or(Error::new(EINVAL))?;
let normalized_cpu_capacity = (u64::from(cpu_capacity) * u64::from(largest_max_freq))
.checked_div(u64::from(cpu_max_freq))
.ok_or(Error::new(EINVAL))?;
normalized_cpu_capacity
.try_into()
.map_err(|_| Error::new(EINVAL))
} else {
Ok(cpu_capacity)
}
}
pub fn logical_core_cluster_id(cpu_id: usize) -> Result<u32> {
parse_sysfs_cpu_info(cpu_id, "topology/physical_package_id")
}
pub fn logical_core_max_freq_khz(cpu_id: usize) -> Result<u32> {
parse_sysfs_cpu_info(cpu_id, "cpufreq/cpuinfo_max_freq")
}
pub fn is_cpu_online(cpu_id: usize) -> Result<bool> {
let result = parse_sysfs_cpu_info(cpu_id, "online");
match result {
Err(e) => {
if e.errno() == libc::ENOENT {
Ok(true)
} else {
Err(e)
}
}
Ok(online) => Ok(online == 1),
}
}
#[repr(C)]
pub struct sched_attr {
pub size: u32,
pub sched_policy: u32,
pub sched_flags: u64,
pub sched_nice: i32,
pub sched_priority: u32,
pub sched_runtime: u64,
pub sched_deadline: u64,
pub sched_period: u64,
pub sched_util_min: u32,
pub sched_util_max: u32,
}
impl Default for sched_attr {
fn default() -> Self {
Self {
size: std::mem::size_of::<sched_attr>() as u32,
sched_policy: 0,
sched_flags: 0,
sched_nice: 0,
sched_priority: 0,
sched_runtime: 0,
sched_deadline: 0,
sched_period: 0,
sched_util_min: 0,
sched_util_max: 0,
}
}
}
pub fn sched_setattr(pid: Pid, attr: &mut sched_attr, flags: u32) -> Result<()> {
let ret = unsafe {
libc::syscall(
libc::SYS_sched_setattr,
pid as usize,
attr as *mut sched_attr as usize,
flags as usize,
)
};
if ret < 0 {
return Err(Error::last());
}
Ok(())
}
#[cfg(test)]
mod tests {
use std::fs::create_dir_all;
use std::fs::File;
use std::io::Write;
use std::os::fd::AsRawFd;
use tempfile::TempDir;
use super::*;
use crate::unix::add_fd_flags;
fn create_temp_file(path: &Path, content: &str) {
if let Some(parent) = path.parent() {
create_dir_all(parent).unwrap();
}
let mut file = File::create(path).unwrap();
file.write_all(content.as_bytes()).unwrap();
}
#[test]
fn pipe_size_and_fill() {
let (_rx, mut tx) = new_pipe_full().expect("Failed to pipe");
add_fd_flags(tx.as_raw_fd(), libc::O_NONBLOCK).expect("Failed to set tx non blocking");
tx.write(&[0u8; 8])
.expect_err("Write after fill didn't fail");
}
#[test]
fn test_parse_sysfs_cpu_info() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let cpu_dir = root.join("sys/devices/system/cpu");
let cpu = 0;
let property = "cpufreq/cpuinfo_max_freq";
create_temp_file(
&root.join("sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"),
"1000",
);
assert_eq!(
parse_sysfs_cpu_info_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap(),
1000
);
}
#[test]
fn test_parse_sysfs_cpu_info_error() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let cpu_dir = root.join("sys/devices/system/cpu");
let cpu = 0;
let property = "cpufreq/cpuinfo_max_freq";
let err =
parse_sysfs_cpu_info_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap_err();
assert_eq!(err, Error::new(libc::ENOENT));
}
#[test]
fn test_parse_sysfs_cpu_info_vec() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let cpu_dir = root.join("sys/devices/system/cpu");
let cpu = 0;
let property = "cpufreq/scaling_available_frequencies";
create_temp_file(
&root.join("sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies"),
"1000 2000",
);
assert_eq!(
parse_sysfs_cpu_info_vec_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap(),
vec![1000, 2000]
);
}
#[test]
fn test_parse_sysfs_cpu_info_vec_error() {
let temp_dir = TempDir::new().unwrap();
let root = temp_dir.path();
let cpu_dir = root.join("sys/devices/system/cpu");
let cpu = 0;
let property = "cpufreq/scaling_available_frequencies";
let err =
parse_sysfs_cpu_info_vec_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap_err();
assert_eq!(err, Error::new(libc::ENOENT));
}
}