Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/base/src/sys/linux/mod.rs
5394 views
1
// Copyright 2017 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
//! Small system utility modules for usage by other modules.
6
7
#[cfg(target_os = "android")]
8
mod android;
9
#[cfg(target_os = "android")]
10
use android as target_os;
11
#[cfg(target_os = "linux")]
12
#[allow(clippy::module_inception)]
13
mod linux;
14
#[cfg(target_os = "linux")]
15
use linux as target_os;
16
use log::warn;
17
#[macro_use]
18
pub mod ioctl;
19
#[macro_use]
20
pub mod syslog;
21
mod acpi_event;
22
mod capabilities;
23
mod descriptor;
24
mod event;
25
mod file;
26
mod file_traits;
27
mod mmap;
28
mod net;
29
mod netlink;
30
mod notifiers;
31
pub mod platform_timer_resolution;
32
mod poll;
33
mod priority;
34
mod sched;
35
mod shm;
36
pub mod signal;
37
mod signalfd;
38
mod terminal;
39
mod timer;
40
pub mod vsock;
41
mod write_zeroes;
42
43
use std::ffi::CString;
44
use std::fs::remove_file;
45
use std::fs::File;
46
use std::fs::OpenOptions;
47
use std::mem;
48
use std::mem::MaybeUninit;
49
use std::ops::Deref;
50
use std::os::unix::io::FromRawFd;
51
use std::os::unix::io::RawFd;
52
use std::os::unix::net::UnixDatagram;
53
use std::os::unix::net::UnixListener;
54
use std::os::unix::process::ExitStatusExt;
55
use std::path::Path;
56
use std::path::PathBuf;
57
use std::process::ExitStatus;
58
use std::ptr;
59
use std::sync::OnceLock;
60
use std::time::Duration;
61
62
pub use acpi_event::*;
63
pub use capabilities::drop_capabilities;
64
pub use event::EventExt;
65
pub(crate) use event::PlatformEvent;
66
pub use file::find_next_data;
67
pub use file::FileDataIterator;
68
pub(crate) use file_traits::lib::*;
69
pub use ioctl::*;
70
use libc::c_int;
71
use libc::c_long;
72
use libc::fcntl;
73
use libc::pipe2;
74
use libc::prctl;
75
use libc::syscall;
76
use libc::waitpid;
77
use libc::SYS_getpid;
78
use libc::SYS_getppid;
79
use libc::SYS_gettid;
80
use libc::EINVAL;
81
use libc::O_CLOEXEC;
82
use libc::PR_SET_NAME;
83
use libc::SIGKILL;
84
use libc::WNOHANG;
85
pub use mmap::*;
86
pub(in crate::sys) use net::sendmsg_nosignal as sendmsg;
87
pub(in crate::sys) use net::sockaddr_un;
88
pub(in crate::sys) use net::sockaddrv4_to_lib_c;
89
pub(in crate::sys) use net::sockaddrv6_to_lib_c;
90
pub use netlink::*;
91
pub use poll::EventContext;
92
pub use priority::*;
93
pub use sched::*;
94
pub use shm::MemfdSeals;
95
pub use shm::SharedMemoryLinux;
96
pub use signal::*;
97
pub use signalfd::Error as SignalFdError;
98
pub use signalfd::*;
99
pub use terminal::*;
100
pub(crate) use write_zeroes::file_punch_hole;
101
pub(crate) use write_zeroes::file_write_zeroes_at;
102
103
use crate::descriptor::FromRawDescriptor;
104
use crate::descriptor::SafeDescriptor;
105
pub use crate::errno::Error;
106
pub use crate::errno::Result;
107
pub use crate::errno::*;
108
use crate::number_of_logical_cores;
109
use crate::round_up_to_page_size;
110
pub use crate::sys::unix::descriptor::*;
111
use crate::syscall;
112
use crate::AsRawDescriptor;
113
use crate::Pid;
114
115
/// Re-export libc types that are part of the API.
116
pub type Uid = libc::uid_t;
117
pub type Gid = libc::gid_t;
118
pub type Mode = libc::mode_t;
119
120
// Directory that holds cpu sysinfo files.
121
const CPU_DIR: &str = "/sys/devices/system/cpu";
122
123
/// Safe wrapper for PR_SET_NAME(2const)
124
#[inline(always)]
125
pub fn set_thread_name(name: &str) -> Result<()> {
126
let name = CString::new(name).or(Err(Error::new(EINVAL)))?;
127
// SAFETY: prctl copies name and doesn't expect it to outlive this function.
128
let ret = unsafe { prctl(PR_SET_NAME, name.as_c_str()) };
129
if ret == 0 {
130
Ok(())
131
} else {
132
errno_result()
133
}
134
}
135
136
/// This bypasses `libc`'s caching `getpid(2)` wrapper which can be invalid if a raw clone was used
137
/// elsewhere.
138
#[inline(always)]
139
pub fn getpid() -> Pid {
140
// SAFETY:
141
// Safe because this syscall can never fail and we give it a valid syscall number.
142
unsafe { syscall(SYS_getpid as c_long) as Pid }
143
}
144
145
/// Safe wrapper for the geppid Linux systemcall.
146
#[inline(always)]
147
pub fn getppid() -> Pid {
148
// SAFETY:
149
// Safe because this syscall can never fail and we give it a valid syscall number.
150
unsafe { syscall(SYS_getppid as c_long) as Pid }
151
}
152
153
/// Safe wrapper for the gettid Linux systemcall.
154
pub fn gettid() -> Pid {
155
// SAFETY:
156
// Calling the gettid() sycall is always safe.
157
unsafe { syscall(SYS_gettid as c_long) as Pid }
158
}
159
160
/// Safe wrapper for `geteuid(2)`.
161
#[inline(always)]
162
pub fn geteuid() -> Uid {
163
// SAFETY:
164
// trivially safe
165
unsafe { libc::geteuid() }
166
}
167
168
/// Safe wrapper for `getegid(2)`.
169
#[inline(always)]
170
pub fn getegid() -> Gid {
171
// SAFETY:
172
// trivially safe
173
unsafe { libc::getegid() }
174
}
175
176
/// The operation to perform with `flock`.
177
pub enum FlockOperation {
178
LockShared,
179
LockExclusive,
180
Unlock,
181
}
182
183
/// Safe wrapper for flock(2) with the operation `op` and optionally `nonblocking`. The lock will be
184
/// dropped automatically when `file` is dropped.
185
#[inline(always)]
186
pub fn flock<F: AsRawDescriptor>(file: &F, op: FlockOperation, nonblocking: bool) -> Result<()> {
187
let mut operation = match op {
188
FlockOperation::LockShared => libc::LOCK_SH,
189
FlockOperation::LockExclusive => libc::LOCK_EX,
190
FlockOperation::Unlock => libc::LOCK_UN,
191
};
192
193
if nonblocking {
194
operation |= libc::LOCK_NB;
195
}
196
197
// SAFETY:
198
// Safe since we pass in a valid fd and flock operation, and check the return value.
199
syscall!(unsafe { libc::flock(file.as_raw_descriptor(), operation) }).map(|_| ())
200
}
201
202
/// The operation to perform with `fallocate`.
203
pub enum FallocateMode {
204
PunchHole,
205
ZeroRange,
206
Allocate,
207
}
208
209
impl From<FallocateMode> for i32 {
210
fn from(value: FallocateMode) -> Self {
211
match value {
212
FallocateMode::Allocate => libc::FALLOC_FL_KEEP_SIZE,
213
FallocateMode::PunchHole => libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
214
FallocateMode::ZeroRange => libc::FALLOC_FL_ZERO_RANGE | libc::FALLOC_FL_KEEP_SIZE,
215
}
216
}
217
}
218
219
impl From<FallocateMode> for u32 {
220
fn from(value: FallocateMode) -> Self {
221
Into::<i32>::into(value) as u32
222
}
223
}
224
225
/// Safe wrapper for `fallocate()`.
226
pub fn fallocate<F: AsRawDescriptor>(
227
file: &F,
228
mode: FallocateMode,
229
offset: u64,
230
len: u64,
231
) -> Result<()> {
232
let offset = if offset > libc::off64_t::MAX as u64 {
233
return Err(Error::new(libc::EINVAL));
234
} else {
235
offset as libc::off64_t
236
};
237
238
let len = if len > libc::off64_t::MAX as u64 {
239
return Err(Error::new(libc::EINVAL));
240
} else {
241
len as libc::off64_t
242
};
243
244
// SAFETY:
245
// Safe since we pass in a valid fd and fallocate mode, validate offset and len,
246
// and check the return value.
247
syscall!(unsafe { libc::fallocate64(file.as_raw_descriptor(), mode.into(), offset, len) })
248
.map(|_| ())
249
}
250
251
/// Safe wrapper for `fstat()`.
252
pub fn fstat<F: AsRawDescriptor>(f: &F) -> Result<libc::stat64> {
253
let mut st = MaybeUninit::<libc::stat64>::zeroed();
254
255
// SAFETY:
256
// Safe because the kernel will only write data in `st` and we check the return
257
// value.
258
syscall!(unsafe { libc::fstat64(f.as_raw_descriptor(), st.as_mut_ptr()) })?;
259
260
// SAFETY:
261
// Safe because the kernel guarantees that the struct is now fully initialized.
262
Ok(unsafe { st.assume_init() })
263
}
264
265
/// Checks whether a file is a block device fie or not.
266
pub fn is_block_file<F: AsRawDescriptor>(file: &F) -> Result<bool> {
267
let stat = fstat(file)?;
268
Ok((stat.st_mode & libc::S_IFMT) == libc::S_IFBLK)
269
}
270
271
const BLOCK_IO_TYPE: u32 = 0x12;
272
ioctl_io_nr!(BLKDISCARD, BLOCK_IO_TYPE, 119);
273
274
/// Discards the given range of a block file.
275
pub fn discard_block<F: AsRawDescriptor>(file: &F, offset: u64, len: u64) -> Result<()> {
276
let range: [u64; 2] = [offset, len];
277
// SAFETY:
278
// Safe because
279
// - we check the return value.
280
// - ioctl(BLKDISCARD) does not hold the descriptor after the call.
281
// - ioctl(BLKDISCARD) does not break the file descriptor.
282
// - ioctl(BLKDISCARD) does not modify the given range.
283
syscall!(unsafe { libc::ioctl(file.as_raw_descriptor(), BLKDISCARD, &range) }).map(|_| ())
284
}
285
286
/// A trait used to abstract types that provide a process id that can be operated on.
287
pub trait AsRawPid {
288
fn as_raw_pid(&self) -> Pid;
289
}
290
291
impl AsRawPid for Pid {
292
fn as_raw_pid(&self) -> Pid {
293
*self
294
}
295
}
296
297
impl AsRawPid for std::process::Child {
298
fn as_raw_pid(&self) -> Pid {
299
self.id() as Pid
300
}
301
}
302
303
/// A safe wrapper around waitpid.
304
///
305
/// On success if a process was reaped, it will be returned as the first value.
306
/// The second returned value is the ExitStatus from the libc::waitpid() call.
307
///
308
/// Note: this can block if libc::WNOHANG is not set and EINTR is not handled internally.
309
pub fn wait_for_pid<A: AsRawPid>(pid: A, options: c_int) -> Result<(Option<Pid>, ExitStatus)> {
310
let pid = pid.as_raw_pid();
311
let mut status: c_int = 1;
312
// SAFETY:
313
// Safe because status is owned and the error is checked.
314
let ret = unsafe { libc::waitpid(pid, &mut status, options) };
315
if ret < 0 {
316
return errno_result();
317
}
318
Ok((
319
if ret == 0 { None } else { Some(ret) },
320
ExitStatus::from_raw(status),
321
))
322
}
323
324
/// Reaps a child process that has terminated.
325
///
326
/// Returns `Ok(pid)` where `pid` is the process that was reaped or `Ok(0)` if none of the children
327
/// have terminated. An `Error` is with `errno == ECHILD` if there are no children left to reap.
328
///
329
/// # Examples
330
///
331
/// Reaps all child processes until there are no terminated children to reap.
332
///
333
/// ```
334
/// fn reap_children() {
335
/// loop {
336
/// match base::linux::reap_child() {
337
/// Ok(0) => println!("no children ready to reap"),
338
/// Ok(pid) => {
339
/// println!("reaped {}", pid);
340
/// continue
341
/// },
342
/// Err(e) if e.errno() == libc::ECHILD => println!("no children left"),
343
/// Err(e) => println!("error reaping children: {}", e),
344
/// }
345
/// break
346
/// }
347
/// }
348
/// ```
349
pub fn reap_child() -> Result<Pid> {
350
// SAFETY:
351
// Safe because we pass in no memory, prevent blocking with WNOHANG, and check for error.
352
let ret = unsafe { waitpid(-1, ptr::null_mut(), WNOHANG) };
353
if ret == -1 {
354
errno_result()
355
} else {
356
Ok(ret)
357
}
358
}
359
360
/// Kill all processes in the current process group.
361
///
362
/// On success, this kills all processes in the current process group, including the current
363
/// process, meaning this will not return. This is equivalent to a call to `kill(0, SIGKILL)`.
364
pub fn kill_process_group() -> Result<()> {
365
// SAFETY: Safe because pid is 'self group' and return value doesn't matter.
366
unsafe { kill(0, SIGKILL) }?;
367
// Kill succeeded, so this process never reaches here.
368
unreachable!();
369
}
370
371
/// Spawns a pipe pair where the first pipe is the read end and the second pipe is the write end.
372
///
373
/// The `O_CLOEXEC` flag will be set during pipe creation.
374
pub fn pipe() -> Result<(File, File)> {
375
let mut pipe_fds = [-1; 2];
376
// SAFETY:
377
// Safe because pipe2 will only write 2 element array of i32 to the given pointer, and we check
378
// for error.
379
let ret = unsafe { pipe2(&mut pipe_fds[0], O_CLOEXEC) };
380
if ret == -1 {
381
errno_result()
382
} else {
383
// SAFETY:
384
// Safe because both fds must be valid for pipe2 to have returned sucessfully and we have
385
// exclusive ownership of them.
386
Ok(unsafe {
387
(
388
File::from_raw_fd(pipe_fds[0]),
389
File::from_raw_fd(pipe_fds[1]),
390
)
391
})
392
}
393
}
394
395
/// Sets the pipe signified with fd to `size`.
396
///
397
/// Returns the new size of the pipe or an error if the OS fails to set the pipe size.
398
pub fn set_pipe_size(fd: RawFd, size: usize) -> Result<usize> {
399
// SAFETY:
400
// Safe because fcntl with the `F_SETPIPE_SZ` arg doesn't touch memory.
401
syscall!(unsafe { fcntl(fd, libc::F_SETPIPE_SZ, size as c_int) }).map(|ret| ret as usize)
402
}
403
404
/// Test-only function used to create a pipe that is full. The pipe is created, has its size set to
405
/// the minimum and then has that much data written to it. Use `new_pipe_full` to test handling of
406
/// blocking `write` calls in unit tests.
407
pub fn new_pipe_full() -> Result<(File, File)> {
408
use std::io::Write;
409
410
let (rx, mut tx) = pipe()?;
411
// The smallest allowed size of a pipe is the system page size on linux.
412
let page_size = set_pipe_size(tx.as_raw_descriptor(), round_up_to_page_size(1))?;
413
414
// Fill the pipe with page_size zeros so the next write call will block.
415
let buf = vec![0u8; page_size];
416
tx.write_all(&buf)?;
417
418
Ok((rx, tx))
419
}
420
421
/// Used to attempt to clean up a named pipe after it is no longer used.
422
pub struct UnlinkUnixDatagram(pub UnixDatagram);
423
impl AsRef<UnixDatagram> for UnlinkUnixDatagram {
424
fn as_ref(&self) -> &UnixDatagram {
425
&self.0
426
}
427
}
428
impl Drop for UnlinkUnixDatagram {
429
fn drop(&mut self) {
430
if let Ok(addr) = self.0.local_addr() {
431
if let Some(path) = addr.as_pathname() {
432
if let Err(e) = remove_file(path) {
433
warn!("failed to remove control socket file: {}", e);
434
}
435
}
436
}
437
}
438
}
439
440
/// Used to attempt to clean up a named pipe after it is no longer used.
441
pub struct UnlinkUnixListener(pub UnixListener);
442
443
impl AsRef<UnixListener> for UnlinkUnixListener {
444
fn as_ref(&self) -> &UnixListener {
445
&self.0
446
}
447
}
448
449
impl Deref for UnlinkUnixListener {
450
type Target = UnixListener;
451
452
fn deref(&self) -> &UnixListener {
453
&self.0
454
}
455
}
456
457
impl Drop for UnlinkUnixListener {
458
fn drop(&mut self) {
459
if let Ok(addr) = self.0.local_addr() {
460
if let Some(path) = addr.as_pathname() {
461
if let Err(e) = remove_file(path) {
462
warn!("failed to remove control socket file: {}", e);
463
}
464
}
465
}
466
}
467
}
468
469
/// Verifies that |raw_descriptor| is actually owned by this process and duplicates it
470
/// to ensure that we have a unique handle to it.
471
pub fn validate_raw_descriptor(raw_descriptor: RawDescriptor) -> Result<RawDescriptor> {
472
validate_raw_fd(&raw_descriptor)
473
}
474
475
/// Verifies that |raw_fd| is actually owned by this process and duplicates it to ensure that
476
/// we have a unique handle to it.
477
pub fn validate_raw_fd(raw_fd: &RawFd) -> Result<RawFd> {
478
// Checking that close-on-exec isn't set helps filter out FDs that were opened by
479
// crosvm as all crosvm FDs are close on exec.
480
// SAFETY:
481
// Safe because this doesn't modify any memory and we check the return value.
482
let flags = unsafe { libc::fcntl(*raw_fd, libc::F_GETFD) };
483
if flags < 0 || (flags & libc::FD_CLOEXEC) != 0 {
484
return Err(Error::new(libc::EBADF));
485
}
486
487
// SAFETY:
488
// Duplicate the fd to ensure that we don't accidentally close an fd previously
489
// opened by another subsystem. Safe because this doesn't modify any memory and
490
// we check the return value.
491
let dup_fd = unsafe { libc::fcntl(*raw_fd, libc::F_DUPFD_CLOEXEC, 0) };
492
if dup_fd < 0 {
493
return Err(Error::last());
494
}
495
Ok(dup_fd as RawFd)
496
}
497
498
/// Utility function that returns true if the given FD is readable without blocking.
499
///
500
/// On an error, such as an invalid or incompatible FD, this will return false, which can not be
501
/// distinguished from a non-ready to read FD.
502
pub fn poll_in<F: AsRawDescriptor>(fd: &F) -> bool {
503
let mut fds = libc::pollfd {
504
fd: fd.as_raw_descriptor(),
505
events: libc::POLLIN,
506
revents: 0,
507
};
508
// SAFETY:
509
// Safe because we give a valid pointer to a list (of 1) FD and check the return value.
510
let ret = unsafe { libc::poll(&mut fds, 1, 0) };
511
// An error probably indicates an invalid FD, or an FD that can't be polled. Returning false in
512
// that case is probably correct as such an FD is unlikely to be readable, although there are
513
// probably corner cases in which that is wrong.
514
if ret == -1 {
515
return false;
516
}
517
fds.revents & libc::POLLIN != 0
518
}
519
520
/// Return the maximum Duration that can be used with libc::timespec.
521
pub fn max_timeout() -> Duration {
522
Duration::new(libc::time_t::MAX as u64, 999999999)
523
}
524
525
/// If the given path is of the form /proc/self/fd/N for some N, returns `Ok(Some(N))`. Otherwise
526
/// returns `Ok(None)`.
527
pub fn safe_descriptor_from_path<P: AsRef<Path>>(path: P) -> Result<Option<SafeDescriptor>> {
528
let path = path.as_ref();
529
if path.parent() == Some(Path::new("/proc/self/fd")) {
530
let raw_descriptor = path
531
.file_name()
532
.and_then(|fd_osstr| fd_osstr.to_str())
533
.and_then(|fd_str| fd_str.parse::<RawFd>().ok())
534
.ok_or_else(|| Error::new(EINVAL))?;
535
let validated_fd = validate_raw_fd(&raw_descriptor)?;
536
Ok(Some(
537
// SAFETY:
538
// Safe because nothing else has access to validated_fd after this call.
539
unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
540
))
541
} else {
542
Ok(None)
543
}
544
}
545
546
/// Check FD is not opened by crosvm and returns a FD that is freshly DUPFD_CLOEXEC's.
547
/// A SafeDescriptor is created from the duplicated fd. It does not take ownership of
548
/// fd passed by argument.
549
pub fn safe_descriptor_from_cmdline_fd(fd: &RawFd) -> Result<SafeDescriptor> {
550
let validated_fd = validate_raw_fd(fd)?;
551
Ok(
552
// SAFETY:
553
// Safe because nothing else has access to validated_fd after this call.
554
unsafe { SafeDescriptor::from_raw_descriptor(validated_fd) },
555
)
556
}
557
558
/// Open the file with the given path, or if it is of the form `/proc/self/fd/N` then just use the
559
/// file descriptor.
560
///
561
/// Note that this will not work properly if the same `/proc/self/fd/N` path is used twice in
562
/// different places, as the metadata (including the offset) will be shared between both file
563
/// descriptors.
564
pub fn open_file_or_duplicate<P: AsRef<Path>>(path: P, options: &OpenOptions) -> Result<File> {
565
let path = path.as_ref();
566
// Special case '/proc/self/fd/*' paths. The FD is already open, just use it.
567
Ok(if let Some(fd) = safe_descriptor_from_path(path)? {
568
fd.into()
569
} else {
570
options.open(path)?
571
})
572
}
573
574
/// Get the soft and hard limits of max number of open files allowed by the environment.
575
pub fn max_open_files() -> Result<libc::rlimit64> {
576
let mut buf = mem::MaybeUninit::<libc::rlimit64>::zeroed();
577
578
// SAFETY:
579
// Safe because this will only modify `buf` and we check the return value.
580
let res = unsafe { libc::prlimit64(0, libc::RLIMIT_NOFILE, ptr::null(), buf.as_mut_ptr()) };
581
if res == 0 {
582
// SAFETY:
583
// Safe because the kernel guarantees that the struct is fully initialized.
584
let limit = unsafe { buf.assume_init() };
585
Ok(limit)
586
} else {
587
errno_result()
588
}
589
}
590
591
/// Executes the given callback with extended soft limit of max number of open files. After the
592
/// callback executed, restore the limit.
593
pub fn call_with_extended_max_files<T, E>(
594
callback: impl FnOnce() -> std::result::Result<T, E>,
595
) -> Result<std::result::Result<T, E>> {
596
let cur_limit = max_open_files()?;
597
let new_limit = libc::rlimit64 {
598
rlim_cur: cur_limit.rlim_max,
599
..cur_limit
600
};
601
let needs_extension = cur_limit.rlim_cur < new_limit.rlim_cur;
602
if needs_extension {
603
set_max_open_files(new_limit)?;
604
}
605
606
let r = callback();
607
608
// Restore the soft limit.
609
if needs_extension {
610
set_max_open_files(cur_limit)?;
611
}
612
613
Ok(r)
614
}
615
616
/// Set the soft and hard limits of max number of open files to the given value.
617
fn set_max_open_files(limit: libc::rlimit64) -> Result<()> {
618
// SAFETY: RLIMIT_NOFILE is known only to read a buffer of size rlimit64, and we have always
619
// rlimit64 allocated.
620
let res = unsafe { libc::setrlimit64(libc::RLIMIT_NOFILE, &limit) };
621
if res == 0 {
622
Ok(())
623
} else {
624
errno_result()
625
}
626
}
627
628
/// Moves the requested PID/TID to a particular cgroup
629
pub fn move_to_cgroup(cgroup_path: PathBuf, id_to_write: Pid, cgroup_file: &str) -> Result<()> {
630
use std::io::Write;
631
632
let gpu_cgroup_file = cgroup_path.join(cgroup_file);
633
let mut f = File::create(gpu_cgroup_file)?;
634
f.write_all(id_to_write.to_string().as_bytes())?;
635
Ok(())
636
}
637
638
pub fn move_task_to_cgroup(cgroup_path: PathBuf, thread_id: Pid) -> Result<()> {
639
move_to_cgroup(cgroup_path, thread_id, "tasks")
640
}
641
642
pub fn move_proc_to_cgroup(cgroup_path: PathBuf, process_id: Pid) -> Result<()> {
643
move_to_cgroup(cgroup_path, process_id, "cgroup.procs")
644
}
645
646
fn read_sysfs_cpu_info_in_dir(cpu_dir: &str, cpu_id: usize, property: &str) -> Result<String> {
647
let path = Path::new(cpu_dir)
648
.join(format!("cpu{cpu_id}"))
649
.join(property);
650
651
std::fs::read_to_string(path).map_err(|e| e.into())
652
}
653
654
/// Queries the property of a specified CPU sysfs node.
655
fn parse_sysfs_cpu_info_vec(cpu_id: usize, property: &str) -> Result<Vec<u32>> {
656
parse_sysfs_cpu_info_vec_in_dir(CPU_DIR, cpu_id, property)
657
}
658
659
fn parse_sysfs_cpu_info_vec_in_dir(
660
cpu_dir: &str,
661
cpu_id: usize,
662
property: &str,
663
) -> Result<Vec<u32>> {
664
read_sysfs_cpu_info_in_dir(cpu_dir, cpu_id, property)?
665
.split_whitespace()
666
.map(|x| x.parse().map_err(|_| Error::new(libc::EINVAL)))
667
.collect()
668
}
669
670
/// Returns a list of supported frequencies in kHz for a given logical core.
671
pub fn logical_core_frequencies_khz(cpu_id: usize) -> Result<Vec<u32>> {
672
parse_sysfs_cpu_info_vec(cpu_id, "cpufreq/scaling_available_frequencies")
673
}
674
675
/// Queries the property of a specified CPU sysfs node.
676
fn parse_sysfs_cpu_info(cpu_id: usize, property: &str) -> Result<u32> {
677
parse_sysfs_cpu_info_in_dir(CPU_DIR, cpu_id, property)
678
}
679
680
fn parse_sysfs_cpu_info_in_dir(cpu_dir: &str, cpu_id: usize, property: &str) -> Result<u32> {
681
read_sysfs_cpu_info_in_dir(cpu_dir, cpu_id, property)?
682
.trim()
683
.parse()
684
.map_err(|_| Error::new(libc::EINVAL))
685
}
686
687
/// Returns the capacity (measure of performance) of a given logical core.
688
pub fn logical_core_capacity(cpu_id: usize) -> Result<u32> {
689
static CPU_MAX_FREQS: OnceLock<Option<Vec<u32>>> = OnceLock::new();
690
691
let cpu_capacity = parse_sysfs_cpu_info(cpu_id, "cpu_capacity")?;
692
693
// Collect and cache the maximum frequencies of all cores. We need to know
694
// the largest maximum frequency between all cores to reverse normalization,
695
// so collect all the values once on the first call to this function.
696
let cpu_max_freqs = CPU_MAX_FREQS.get_or_init(|| {
697
(0..number_of_logical_cores().ok()?)
698
.map(|cpu_id| logical_core_max_freq_khz(cpu_id).ok())
699
.collect()
700
});
701
702
if let Some(cpu_max_freqs) = cpu_max_freqs {
703
let largest_max_freq = *cpu_max_freqs.iter().max().ok_or(Error::new(EINVAL))?;
704
let cpu_max_freq = *cpu_max_freqs.get(cpu_id).ok_or(Error::new(EINVAL))?;
705
let normalized_cpu_capacity = (u64::from(cpu_capacity) * u64::from(largest_max_freq))
706
.checked_div(u64::from(cpu_max_freq))
707
.ok_or(Error::new(EINVAL))?;
708
normalized_cpu_capacity
709
.try_into()
710
.map_err(|_| Error::new(EINVAL))
711
} else {
712
// cpu-freq is not enabled. Fall back to using the normalized capacity.
713
Ok(cpu_capacity)
714
}
715
}
716
717
/// Returns the cluster ID of a given logical core.
718
pub fn logical_core_cluster_id(cpu_id: usize) -> Result<u32> {
719
parse_sysfs_cpu_info(cpu_id, "topology/physical_package_id")
720
}
721
722
/// Returns the maximum frequency (in kHz) of a given logical core.
723
pub fn logical_core_max_freq_khz(cpu_id: usize) -> Result<u32> {
724
parse_sysfs_cpu_info(cpu_id, "cpufreq/cpuinfo_max_freq")
725
}
726
727
/// Returns a bool if the CPU is online, or an error if there was an issue reading the system
728
/// properties.
729
pub fn is_cpu_online(cpu_id: usize) -> Result<bool> {
730
let result = parse_sysfs_cpu_info(cpu_id, "online");
731
match result {
732
Err(e) => {
733
if e.errno() == libc::ENOENT {
734
// Some systems don't have a file for CPU 0 if the system considers CPU 0 to be
735
// always-online. Or if CONFIG_HOTPLUG_CPU=n, then the "online" property/file will
736
// never be created in drivers/base/cpu.c.
737
Ok(true)
738
} else {
739
Err(e)
740
}
741
}
742
Ok(online) => Ok(online == 1),
743
}
744
}
745
746
#[repr(C)]
747
pub struct sched_attr {
748
pub size: u32,
749
750
pub sched_policy: u32,
751
pub sched_flags: u64,
752
pub sched_nice: i32,
753
754
pub sched_priority: u32,
755
756
pub sched_runtime: u64,
757
pub sched_deadline: u64,
758
pub sched_period: u64,
759
760
pub sched_util_min: u32,
761
pub sched_util_max: u32,
762
}
763
764
impl Default for sched_attr {
765
fn default() -> Self {
766
Self {
767
size: std::mem::size_of::<sched_attr>() as u32,
768
sched_policy: 0,
769
sched_flags: 0,
770
sched_nice: 0,
771
sched_priority: 0,
772
sched_runtime: 0,
773
sched_deadline: 0,
774
sched_period: 0,
775
sched_util_min: 0,
776
sched_util_max: 0,
777
}
778
}
779
}
780
781
pub fn sched_setattr(pid: Pid, attr: &mut sched_attr, flags: u32) -> Result<()> {
782
// SAFETY: Safe becuase all the args are valid and the return valud is checked.
783
let ret = unsafe {
784
libc::syscall(
785
libc::SYS_sched_setattr,
786
pid as usize,
787
attr as *mut sched_attr as usize,
788
flags as usize,
789
)
790
};
791
792
if ret < 0 {
793
return Err(Error::last());
794
}
795
Ok(())
796
}
797
798
#[cfg(test)]
799
mod tests {
800
use std::fs::create_dir_all;
801
use std::fs::File;
802
use std::io::Write;
803
use std::os::fd::AsRawFd;
804
805
use tempfile::TempDir;
806
807
use super::*;
808
use crate::unix::add_fd_flags;
809
810
fn create_temp_file(path: &Path, content: &str) {
811
if let Some(parent) = path.parent() {
812
create_dir_all(parent).unwrap();
813
}
814
let mut file = File::create(path).unwrap();
815
file.write_all(content.as_bytes()).unwrap();
816
}
817
818
#[test]
819
fn pipe_size_and_fill() {
820
let (_rx, mut tx) = new_pipe_full().expect("Failed to pipe");
821
822
// To check that setting the size worked, set the descriptor to non blocking and check that
823
// write returns an error.
824
add_fd_flags(tx.as_raw_fd(), libc::O_NONBLOCK).expect("Failed to set tx non blocking");
825
tx.write(&[0u8; 8])
826
.expect_err("Write after fill didn't fail");
827
}
828
829
#[test]
830
fn test_parse_sysfs_cpu_info() {
831
let temp_dir = TempDir::new().unwrap();
832
let root = temp_dir.path();
833
let cpu_dir = root.join("sys/devices/system/cpu");
834
let cpu = 0;
835
let property = "cpufreq/cpuinfo_max_freq";
836
create_temp_file(
837
&root.join("sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"),
838
"1000",
839
);
840
841
assert_eq!(
842
parse_sysfs_cpu_info_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap(),
843
1000
844
);
845
}
846
847
#[test]
848
fn test_parse_sysfs_cpu_info_error() {
849
let temp_dir = TempDir::new().unwrap();
850
let root = temp_dir.path();
851
let cpu_dir = root.join("sys/devices/system/cpu");
852
let cpu = 0;
853
let property = "cpufreq/cpuinfo_max_freq";
854
// Not creating the sysinfo file should result in an error trying to read from it.
855
856
let err =
857
parse_sysfs_cpu_info_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap_err();
858
assert_eq!(err, Error::new(libc::ENOENT));
859
}
860
861
#[test]
862
fn test_parse_sysfs_cpu_info_vec() {
863
let temp_dir = TempDir::new().unwrap();
864
let root = temp_dir.path();
865
let cpu_dir = root.join("sys/devices/system/cpu");
866
let cpu = 0;
867
let property = "cpufreq/scaling_available_frequencies";
868
create_temp_file(
869
&root.join("sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies"),
870
"1000 2000",
871
);
872
873
assert_eq!(
874
parse_sysfs_cpu_info_vec_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap(),
875
vec![1000, 2000]
876
);
877
}
878
879
#[test]
880
fn test_parse_sysfs_cpu_info_vec_error() {
881
let temp_dir = TempDir::new().unwrap();
882
let root = temp_dir.path();
883
let cpu_dir = root.join("sys/devices/system/cpu");
884
let cpu = 0;
885
let property = "cpufreq/scaling_available_frequencies";
886
// Not creating the sysinfo file should result in an error trying to read from it.
887
888
let err =
889
parse_sysfs_cpu_info_vec_in_dir(cpu_dir.to_str().unwrap(), cpu, property).unwrap_err();
890
assert_eq!(err, Error::new(libc::ENOENT));
891
}
892
}
893
894