Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/devices/src/virtio/fs/passthrough.rs
5394 views
1
// Copyright 2019 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
use std::borrow::Cow;
6
use std::cell::RefCell;
7
use std::cmp;
8
use std::collections::btree_map;
9
use std::collections::BTreeMap;
10
use std::ffi::CStr;
11
use std::ffi::CString;
12
#[cfg(feature = "fs_runtime_ugid_map")]
13
use std::ffi::OsStr;
14
use std::fs::File;
15
use std::io;
16
use std::mem;
17
use std::mem::size_of;
18
use std::mem::MaybeUninit;
19
use std::os::raw::c_int;
20
use std::os::raw::c_long;
21
#[cfg(feature = "fs_runtime_ugid_map")]
22
use std::os::unix::ffi::OsStrExt;
23
#[cfg(feature = "fs_runtime_ugid_map")]
24
use std::path::Path;
25
use std::ptr;
26
use std::ptr::addr_of;
27
use std::ptr::addr_of_mut;
28
use std::sync::atomic::AtomicBool;
29
use std::sync::atomic::AtomicU64;
30
use std::sync::atomic::Ordering;
31
use std::sync::Arc;
32
use std::sync::MutexGuard;
33
#[cfg(feature = "fs_permission_translation")]
34
use std::sync::RwLock;
35
use std::time::Duration;
36
37
#[cfg(feature = "arc_quota")]
38
use base::debug;
39
use base::error;
40
use base::ioctl_ior_nr;
41
use base::ioctl_iow_nr;
42
use base::ioctl_iowr_nr;
43
use base::ioctl_with_mut_ptr;
44
use base::ioctl_with_ptr;
45
use base::syscall;
46
use base::unix::FileFlags;
47
use base::warn;
48
use base::AsRawDescriptor;
49
use base::FromRawDescriptor;
50
use base::IntoRawDescriptor;
51
use base::IoctlNr;
52
use base::Protection;
53
use base::RawDescriptor;
54
use fuse::filesystem::Context;
55
use fuse::filesystem::DirectoryIterator;
56
use fuse::filesystem::Entry;
57
use fuse::filesystem::FileSystem;
58
use fuse::filesystem::FsOptions;
59
use fuse::filesystem::GetxattrReply;
60
use fuse::filesystem::IoctlFlags;
61
use fuse::filesystem::IoctlReply;
62
use fuse::filesystem::ListxattrReply;
63
use fuse::filesystem::OpenOptions;
64
use fuse::filesystem::RemoveMappingOne;
65
use fuse::filesystem::SetattrValid;
66
use fuse::filesystem::ZeroCopyReader;
67
use fuse::filesystem::ZeroCopyWriter;
68
use fuse::filesystem::ROOT_ID;
69
use fuse::sys::WRITE_KILL_PRIV;
70
use fuse::Mapper;
71
#[cfg(feature = "arc_quota")]
72
use protobuf::Message;
73
use sync::Mutex;
74
#[cfg(feature = "arc_quota")]
75
use system_api::client::OrgChromiumSpaced;
76
#[cfg(feature = "arc_quota")]
77
use system_api::spaced::SetProjectIdReply;
78
#[cfg(feature = "arc_quota")]
79
use system_api::spaced::SetProjectInheritanceFlagReply;
80
use zerocopy::FromBytes;
81
use zerocopy::FromZeros;
82
use zerocopy::Immutable;
83
use zerocopy::IntoBytes;
84
use zerocopy::KnownLayout;
85
86
#[cfg(feature = "arc_quota")]
87
use crate::virtio::fs::arc_ioctl::FsPathXattrDataBuffer;
88
#[cfg(feature = "arc_quota")]
89
use crate::virtio::fs::arc_ioctl::FsPermissionDataBuffer;
90
#[cfg(feature = "arc_quota")]
91
use crate::virtio::fs::arc_ioctl::XattrData;
92
use crate::virtio::fs::caps::Capability;
93
use crate::virtio::fs::caps::Caps;
94
use crate::virtio::fs::caps::Set as CapSet;
95
use crate::virtio::fs::caps::Value as CapValue;
96
use crate::virtio::fs::config::CachePolicy;
97
use crate::virtio::fs::config::Config;
98
#[cfg(feature = "fs_permission_translation")]
99
use crate::virtio::fs::config::PermissionData;
100
use crate::virtio::fs::expiring_map::ExpiringMap;
101
use crate::virtio::fs::multikey::MultikeyBTreeMap;
102
use crate::virtio::fs::read_dir::ReadDir;
103
104
const EMPTY_CSTR: &CStr = c"";
105
const PROC_CSTR: &CStr = c"/proc";
106
const UNLABELED_CSTR: &CStr = c"unlabeled";
107
108
const USER_VIRTIOFS_XATTR: &[u8] = b"user.virtiofs.";
109
const SECURITY_XATTR: &[u8] = b"security.";
110
const SELINUX_XATTR: &[u8] = b"security.selinux";
111
112
const FSCRYPT_KEY_DESCRIPTOR_SIZE: usize = 8;
113
const FSCRYPT_KEY_IDENTIFIER_SIZE: usize = 16;
114
115
#[cfg(feature = "arc_quota")]
116
const FS_PROJINHERIT_FL: c_int = 0x20000000;
117
118
// 25 seconds is the default timeout for dbus-send.
119
#[cfg(feature = "arc_quota")]
120
const DEFAULT_DBUS_TIMEOUT: Duration = Duration::from_secs(25);
121
122
/// Internal utility wrapper for `cros_tracing::trace_event!()` macro with VirtioFS calls.
123
macro_rules! fs_trace {
124
($tag:expr, $name:expr, $($arg:expr),+) => {
125
cros_tracing::trace_event!(VirtioFs, $name, $tag, $($arg),*)
126
};
127
}
128
129
#[repr(C)]
130
#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
131
struct fscrypt_policy_v1 {
132
_version: u8,
133
_contents_encryption_mode: u8,
134
_filenames_encryption_mode: u8,
135
_flags: u8,
136
_master_key_descriptor: [u8; FSCRYPT_KEY_DESCRIPTOR_SIZE],
137
}
138
139
#[repr(C)]
140
#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
141
struct fscrypt_policy_v2 {
142
_version: u8,
143
_contents_encryption_mode: u8,
144
_filenames_encryption_mode: u8,
145
_flags: u8,
146
__reserved: [u8; 4],
147
master_key_identifier: [u8; FSCRYPT_KEY_IDENTIFIER_SIZE],
148
}
149
150
#[repr(C)]
151
#[derive(Copy, Clone, FromBytes, Immutable, KnownLayout)]
152
union fscrypt_policy {
153
_version: u8,
154
_v1: fscrypt_policy_v1,
155
_v2: fscrypt_policy_v2,
156
}
157
158
#[repr(C)]
159
#[derive(Copy, Clone, FromBytes, Immutable, KnownLayout)]
160
struct fscrypt_get_policy_ex_arg {
161
policy_size: u64, /* input/output */
162
policy: fscrypt_policy, /* output */
163
}
164
165
impl From<&fscrypt_get_policy_ex_arg> for &[u8] {
166
fn from(value: &fscrypt_get_policy_ex_arg) -> Self {
167
assert!(value.policy_size <= size_of::<fscrypt_policy>() as u64);
168
let data_raw: *const fscrypt_get_policy_ex_arg = value;
169
// SAFETY: the length of the output slice is asserted to be within the struct it points to
170
unsafe {
171
std::slice::from_raw_parts(
172
data_raw.cast(),
173
value.policy_size as usize + size_of::<u64>(),
174
)
175
}
176
}
177
}
178
179
ioctl_iowr_nr!(FS_IOC_GET_ENCRYPTION_POLICY_EX, 'f' as u32, 22, [u8; 9]);
180
181
#[repr(C)]
182
#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
183
struct fsxattr {
184
fsx_xflags: u32, /* xflags field value (get/set) */
185
fsx_extsize: u32, /* extsize field value (get/set) */
186
fsx_nextents: u32, /* nextents field value (get) */
187
fsx_projid: u32, /* project identifier (get/set) */
188
fsx_cowextsize: u32, /* CoW extsize field value (get/set) */
189
fsx_pad: [u8; 8],
190
}
191
192
ioctl_ior_nr!(FS_IOC_FSGETXATTR, 'X' as u32, 31, fsxattr);
193
ioctl_iow_nr!(FS_IOC_FSSETXATTR, 'X' as u32, 32, fsxattr);
194
195
ioctl_ior_nr!(FS_IOC_GETFLAGS, 'f' as u32, 1, c_long);
196
ioctl_iow_nr!(FS_IOC_SETFLAGS, 'f' as u32, 2, c_long);
197
198
ioctl_ior_nr!(FS_IOC32_GETFLAGS, 'f' as u32, 1, u32);
199
ioctl_iow_nr!(FS_IOC32_SETFLAGS, 'f' as u32, 2, u32);
200
201
ioctl_ior_nr!(FS_IOC64_GETFLAGS, 'f' as u32, 1, u64);
202
ioctl_iow_nr!(FS_IOC64_SETFLAGS, 'f' as u32, 2, u64);
203
204
#[cfg(feature = "arc_quota")]
205
ioctl_iow_nr!(FS_IOC_SETPERMISSION, 'f' as u32, 1, FsPermissionDataBuffer);
206
#[cfg(feature = "arc_quota")]
207
ioctl_iow_nr!(FS_IOC_SETPATHXATTR, 'f' as u32, 1, FsPathXattrDataBuffer);
208
209
#[repr(C)]
210
#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
211
struct fsverity_enable_arg {
212
_version: u32,
213
_hash_algorithm: u32,
214
_block_size: u32,
215
salt_size: u32,
216
salt_ptr: u64,
217
sig_size: u32,
218
__reserved1: u32,
219
sig_ptr: u64,
220
__reserved2: [u64; 11],
221
}
222
223
#[repr(C)]
224
#[derive(Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)]
225
struct fsverity_digest {
226
_digest_algorithm: u16,
227
digest_size: u16,
228
// __u8 digest[];
229
}
230
231
ioctl_iow_nr!(FS_IOC_ENABLE_VERITY, 'f' as u32, 133, fsverity_enable_arg);
232
ioctl_iowr_nr!(FS_IOC_MEASURE_VERITY, 'f' as u32, 134, fsverity_digest);
233
234
pub type Inode = u64;
235
type Handle = u64;
236
237
#[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq)]
238
struct InodeAltKey {
239
ino: libc::ino64_t,
240
dev: libc::dev_t,
241
}
242
243
#[derive(PartialEq, Eq, Debug)]
244
enum FileType {
245
Regular,
246
Directory,
247
Other,
248
}
249
250
impl From<libc::mode_t> for FileType {
251
fn from(mode: libc::mode_t) -> Self {
252
match mode & libc::S_IFMT {
253
libc::S_IFREG => FileType::Regular,
254
libc::S_IFDIR => FileType::Directory,
255
_ => FileType::Other,
256
}
257
}
258
}
259
260
#[derive(Debug)]
261
struct OpenedFile {
262
file: Option<File>,
263
open_flags: libc::c_int,
264
}
265
266
impl AsRawDescriptor for OpenedFile {
267
fn as_raw_descriptor(&self) -> RawDescriptor {
268
self.file().as_raw_descriptor()
269
}
270
}
271
272
impl OpenedFile {
273
fn new(file: File, open_flags: libc::c_int) -> Self {
274
OpenedFile {
275
file: Some(file),
276
open_flags,
277
}
278
}
279
280
fn file(&self) -> &File {
281
self.file.as_ref().expect("must have a file")
282
}
283
284
fn file_mut(&mut self) -> &mut File {
285
self.file.as_mut().expect("must have a file")
286
}
287
288
/// Leaks the file descriptor and makes the struct unusable.
289
///
290
/// This is an optimization to speed up dropping `OpenedFile` instances, which is useful
291
/// during an abrupt shutdown. Instead of properly closing the file descriptor, which
292
/// involves a syscall, this function effectively forgets the file descriptor, relying on the
293
/// OS to clean it up when the process terminates.
294
fn leak_fd(&mut self) {
295
let f = self.file.take().expect("must have a file");
296
let _ = f.into_raw_descriptor();
297
}
298
}
299
300
#[derive(Debug)]
301
struct InodeData {
302
inode: Inode,
303
// (File, open_flags)
304
file: Mutex<OpenedFile>,
305
refcount: AtomicU64,
306
filetype: FileType,
307
path: String,
308
// This needs to be atomic because we need to set it through a shared reference.
309
unsafe_leak_fd: AtomicBool,
310
}
311
312
impl AsRawDescriptor for InodeData {
313
fn as_raw_descriptor(&self) -> RawDescriptor {
314
self.file.lock().as_raw_descriptor()
315
}
316
}
317
318
impl Drop for InodeData {
319
/// If `unsafe_leak_fd` is set, this `drop` implementation will "leak" the file descriptor.
320
/// This is an optimization to speed up the cleanup process, based on the
321
/// assumption that the OS will handle the cleanup of file descriptors after the process
322
/// terminates. This is only okay if the process is guaranteed to terminate immediately
323
/// after the `PassthroughFs` instance is dropped.
324
fn drop(&mut self) {
325
if self.unsafe_leak_fd.load(Ordering::Relaxed) {
326
self.file.get_mut().leak_fd();
327
}
328
}
329
}
330
331
impl InodeData {
332
fn set_unsafe_leak_fd(&self) {
333
self.unsafe_leak_fd.store(true, Ordering::Relaxed);
334
}
335
}
336
337
#[derive(Debug)]
338
struct HandleData {
339
inode: Inode,
340
file: Mutex<OpenedFile>,
341
342
unsafe_leak_fd: AtomicBool,
343
}
344
345
impl AsRawDescriptor for HandleData {
346
fn as_raw_descriptor(&self) -> RawDescriptor {
347
self.file.lock().as_raw_descriptor()
348
}
349
}
350
351
impl Drop for HandleData {
352
/// If `unsafe_leak_fd` is set, this `drop` implementation will "leak" the file descriptor by
353
/// forgetting it. This is an optimization to speed up the cleanup process, based on the
354
/// assumption that the OS will handle the cleanup of file descriptors after the process
355
// terminates. This is only safe if the process is guaranteed to terminate immediately
356
/// after the `PassthroughFs` instance is dropped.
357
fn drop(&mut self) {
358
if self.unsafe_leak_fd.load(Ordering::Relaxed) {
359
self.file.get_mut().leak_fd();
360
}
361
}
362
}
363
364
impl HandleData {
365
fn set_unsafe_leak_fd(&self) {
366
self.unsafe_leak_fd.store(true, Ordering::Relaxed);
367
}
368
}
369
370
macro_rules! scoped_cred {
371
($name:ident, $ty:ty, $syscall_nr:expr) => {
372
#[derive(Debug)]
373
struct $name {
374
old: $ty,
375
}
376
377
impl $name {
378
// Changes the effective uid/gid of the current thread to `val`. Changes the thread's
379
// credentials back to `old` when the returned struct is dropped.
380
fn new(val: $ty, old: $ty) -> io::Result<Option<$name>> {
381
if val == old {
382
// Nothing to do since we already have the correct value.
383
return Ok(None);
384
}
385
386
// We want credential changes to be per-thread because otherwise
387
// we might interfere with operations being carried out on other
388
// threads with different uids/gids. However, posix requires that
389
// all threads in a process share the same credentials. To do this
390
// libc uses signals to ensure that when one thread changes its
391
// credentials the other threads do the same thing.
392
//
393
// So instead we invoke the syscall directly in order to get around
394
// this limitation. Another option is to use the setfsuid and
395
// setfsgid systems calls. However since those calls have no way to
396
// return an error, it's preferable to do this instead.
397
398
// SAFETY: this call is safe because it doesn't modify any memory and we
399
// check the return value.
400
let res = unsafe { libc::syscall($syscall_nr, -1, val, -1) };
401
if res == 0 {
402
Ok(Some($name { old }))
403
} else {
404
Err(io::Error::last_os_error())
405
}
406
}
407
}
408
409
impl Drop for $name {
410
fn drop(&mut self) {
411
// SAFETY: trivially safe
412
let res = unsafe { libc::syscall($syscall_nr, -1, self.old, -1) };
413
if res < 0 {
414
error!(
415
"failed to change credentials back to {}: {}",
416
self.old,
417
io::Error::last_os_error(),
418
);
419
}
420
}
421
}
422
};
423
}
424
scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid);
425
scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid);
426
427
const SYS_GETEUID: libc::c_long = libc::SYS_geteuid;
428
const SYS_GETEGID: libc::c_long = libc::SYS_getegid;
429
430
thread_local! {
431
// SAFETY: both calls take no parameters and only return an integer value. The kernel also
432
// guarantees that they can never fail.
433
static THREAD_EUID: libc::uid_t = unsafe { libc::syscall(SYS_GETEUID) as libc::uid_t };
434
// SAFETY: both calls take no parameters and only return an integer value. The kernel also
435
// guarantees that they can never fail.
436
static THREAD_EGID: libc::gid_t = unsafe { libc::syscall(SYS_GETEGID) as libc::gid_t };
437
}
438
439
fn set_creds(
440
uid: libc::uid_t,
441
gid: libc::gid_t,
442
) -> io::Result<(Option<ScopedUid>, Option<ScopedGid>)> {
443
let olduid = THREAD_EUID.with(|uid| *uid);
444
let oldgid = THREAD_EGID.with(|gid| *gid);
445
446
// We have to change the gid before we change the uid because if we change the uid first then we
447
// lose the capability to change the gid. However changing back can happen in any order.
448
ScopedGid::new(gid, oldgid).and_then(|gid| Ok((ScopedUid::new(uid, olduid)?, gid)))
449
}
450
451
thread_local!(static THREAD_FSCREATE: RefCell<Option<File>> = const { RefCell::new(None) });
452
453
// Opens and returns a write-only handle to /proc/thread-self/attr/fscreate. Panics if it fails to
454
// open the file.
455
fn open_fscreate(proc: &File) -> File {
456
let fscreate = c"thread-self/attr/fscreate";
457
458
// SAFETY: this doesn't modify any memory and we check the return value.
459
let raw_descriptor = unsafe {
460
libc::openat(
461
proc.as_raw_descriptor(),
462
fscreate.as_ptr(),
463
libc::O_CLOEXEC | libc::O_WRONLY,
464
)
465
};
466
467
// We don't expect this to fail and we're not in a position to return an error here so just
468
// panic.
469
if raw_descriptor < 0 {
470
panic!(
471
"Failed to open /proc/thread-self/attr/fscreate: {}",
472
io::Error::last_os_error()
473
);
474
}
475
476
// SAFETY: safe because we just opened this descriptor.
477
unsafe { File::from_raw_descriptor(raw_descriptor) }
478
}
479
480
struct ScopedSecurityContext;
481
482
impl ScopedSecurityContext {
483
fn new(proc: &File, ctx: &CStr) -> io::Result<ScopedSecurityContext> {
484
THREAD_FSCREATE.with(|thread_fscreate| {
485
let mut fscreate = thread_fscreate.borrow_mut();
486
let file = fscreate.get_or_insert_with(|| open_fscreate(proc));
487
// SAFETY: this doesn't modify any memory and we check the return value.
488
let ret = unsafe {
489
libc::write(
490
file.as_raw_descriptor(),
491
ctx.as_ptr() as *const libc::c_void,
492
ctx.to_bytes_with_nul().len(),
493
)
494
};
495
if ret < 0 {
496
Err(io::Error::last_os_error())
497
} else {
498
Ok(ScopedSecurityContext)
499
}
500
})
501
}
502
}
503
504
impl Drop for ScopedSecurityContext {
505
fn drop(&mut self) {
506
THREAD_FSCREATE.with(|thread_fscreate| {
507
// expect is safe here because the thread local would have been initialized by the call
508
// to `new` above.
509
let fscreate = thread_fscreate.borrow();
510
let file = fscreate
511
.as_ref()
512
.expect("Uninitialized thread-local when dropping ScopedSecurityContext");
513
514
// SAFETY: this doesn't modify any memory and we check the return value.
515
let ret = unsafe { libc::write(file.as_raw_descriptor(), ptr::null(), 0) };
516
517
if ret < 0 {
518
warn!(
519
"Failed to restore security context: {}",
520
io::Error::last_os_error()
521
);
522
}
523
})
524
}
525
}
526
527
struct ScopedUmask {
528
old: libc::mode_t,
529
mask: libc::mode_t,
530
}
531
532
impl ScopedUmask {
533
fn new(mask: libc::mode_t) -> ScopedUmask {
534
ScopedUmask {
535
// SAFETY: this doesn't modify any memory and always succeeds.
536
old: unsafe { libc::umask(mask) },
537
mask,
538
}
539
}
540
}
541
542
impl Drop for ScopedUmask {
543
fn drop(&mut self) {
544
// SAFETY: this doesn't modify any memory and always succeeds.
545
let previous = unsafe { libc::umask(self.old) };
546
debug_assert_eq!(
547
previous, self.mask,
548
"umask changed while holding ScopedUmask"
549
);
550
}
551
}
552
553
struct ScopedFsetid(Caps);
554
impl Drop for ScopedFsetid {
555
fn drop(&mut self) {
556
if let Err(e) = raise_cap_fsetid(&mut self.0) {
557
error!(
558
"Failed to restore CAP_FSETID: {}. Some operations may be broken.",
559
e
560
)
561
}
562
}
563
}
564
565
fn raise_cap_fsetid(c: &mut Caps) -> io::Result<()> {
566
c.update(&[Capability::Fsetid], CapSet::Effective, CapValue::Set)?;
567
c.apply()
568
}
569
570
// Drops CAP_FSETID from the effective set for the current thread and returns an RAII guard that
571
// adds the capability back when it is dropped.
572
fn drop_cap_fsetid() -> io::Result<ScopedFsetid> {
573
let mut caps = Caps::for_current_thread()?;
574
caps.update(&[Capability::Fsetid], CapSet::Effective, CapValue::Clear)?;
575
caps.apply()?;
576
Ok(ScopedFsetid(caps))
577
}
578
579
fn ebadf() -> io::Error {
580
io::Error::from_raw_os_error(libc::EBADF)
581
}
582
583
fn eexist() -> io::Error {
584
io::Error::from_raw_os_error(libc::EEXIST)
585
}
586
587
fn stat<F: AsRawDescriptor + ?Sized>(f: &F) -> io::Result<libc::stat64> {
588
let mut st: MaybeUninit<libc::stat64> = MaybeUninit::<libc::stat64>::zeroed();
589
590
// SAFETY: the kernel will only write data in `st` and we check the return value.
591
syscall!(unsafe {
592
libc::fstatat64(
593
f.as_raw_descriptor(),
594
EMPTY_CSTR.as_ptr(),
595
st.as_mut_ptr(),
596
libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
597
)
598
})?;
599
600
// SAFETY: the kernel guarantees that the struct is now fully initialized.
601
Ok(unsafe { st.assume_init() })
602
}
603
604
fn statat<D: AsRawDescriptor>(dir: &D, name: &CStr) -> io::Result<libc::stat64> {
605
let mut st = MaybeUninit::<libc::stat64>::zeroed();
606
607
// SAFETY: the kernel will only write data in `st` and we check the return value.
608
syscall!(unsafe {
609
libc::fstatat64(
610
dir.as_raw_descriptor(),
611
name.as_ptr(),
612
st.as_mut_ptr(),
613
libc::AT_SYMLINK_NOFOLLOW,
614
)
615
})?;
616
617
// SAFETY: the kernel guarantees that the struct is now fully initialized.
618
Ok(unsafe { st.assume_init() })
619
}
620
621
#[cfg(feature = "arc_quota")]
622
fn is_android_project_id(project_id: u32) -> bool {
623
// The following constants defines the valid range of project ID used by
624
// Android and are taken from android_filesystem_config.h in Android
625
// codebase.
626
//
627
// Project IDs reserved for Android files on external storage. Total 100 IDs
628
// from PROJECT_ID_EXT_DEFAULT (1000) are reserved.
629
const PROJECT_ID_FOR_ANDROID_FILES: std::ops::RangeInclusive<u32> = 1000..=1099;
630
// Project IDs reserved for Android apps.
631
// The lower-limit of the range is PROJECT_ID_EXT_DATA_START.
632
// The upper-limit of the range differs before and after T. Here we use that
633
// of T (PROJECT_ID_APP_CACHE_END) as it is larger.
634
const PROJECT_ID_FOR_ANDROID_APPS: std::ops::RangeInclusive<u32> = 20000..=69999;
635
636
PROJECT_ID_FOR_ANDROID_FILES.contains(&project_id)
637
|| PROJECT_ID_FOR_ANDROID_APPS.contains(&project_id)
638
}
639
640
/// Per-directory cache for `PassthroughFs::ascii_casefold_lookup()`.
641
///
642
/// The key of the underlying `BTreeMap` is a lower-cased file name in the direcoty.
643
/// The value is the case-sensitive file name stored in the host file system.
644
/// We assume that if PassthroughFs has exclusive access to the filesystem, this cache exhaustively
645
/// covers all file names that exist within the directory.
646
/// So every `PassthroughFs`'s handler that adds or removes files in the directory is expected to
647
/// update this cache.
648
struct CasefoldCache(BTreeMap<Vec<u8>, CString>);
649
650
impl CasefoldCache {
651
fn new(dir: &InodeData) -> io::Result<Self> {
652
let mut mp = BTreeMap::new();
653
654
let mut buf = [0u8; 1024];
655
let mut offset = 0;
656
loop {
657
let mut read_dir = ReadDir::new(dir, offset, &mut buf[..])?;
658
if read_dir.remaining() == 0 {
659
break;
660
}
661
662
while let Some(entry) = read_dir.next() {
663
offset = entry.offset as libc::off64_t;
664
let entry_name = entry.name;
665
mp.insert(
666
entry_name.to_bytes().to_ascii_lowercase(),
667
entry_name.to_owned(),
668
);
669
}
670
}
671
Ok(Self(mp))
672
}
673
674
fn insert(&mut self, name: &CStr) {
675
let lower_case = name.to_bytes().to_ascii_lowercase();
676
self.0.insert(lower_case, name.into());
677
}
678
679
fn lookup(&self, name: &[u8]) -> Option<CString> {
680
let lower = name.to_ascii_lowercase();
681
self.0.get(&lower).cloned()
682
}
683
684
fn remove(&mut self, name: &CStr) {
685
let lower_case = name.to_bytes().to_ascii_lowercase();
686
self.0.remove(&lower_case);
687
}
688
}
689
690
/// Time expiring mapping from an inode of a directory to `CasefoldCache` for the directory.
691
/// Each entry will be expired after `timeout`.
692
/// When ascii_casefold is disabled, this struct does nothing.
693
struct ExpiringCasefoldLookupCaches {
694
inner: ExpiringMap<Inode, CasefoldCache>,
695
}
696
697
impl ExpiringCasefoldLookupCaches {
698
fn new(timeout: Duration) -> Self {
699
Self {
700
inner: ExpiringMap::new(timeout),
701
}
702
}
703
704
fn insert(&mut self, parent: Inode, name: &CStr) {
705
if let Some(dir_cache) = self.inner.get_mut(&parent) {
706
dir_cache.insert(name);
707
}
708
}
709
710
fn remove(&mut self, parent: Inode, name: &CStr) {
711
if let Some(dir_cache) = self.inner.get_mut(&parent) {
712
dir_cache.remove(name);
713
}
714
}
715
716
fn forget(&mut self, parent: Inode) {
717
self.inner.remove(&parent);
718
}
719
720
/// Get `CasefoldCache` for the given directory.
721
/// If the cache doesn't exist, generate it by fetching directory information with
722
/// `getdents64()`.
723
fn get(&mut self, parent: &InodeData) -> io::Result<&CasefoldCache> {
724
self.inner
725
.get_or_insert_with(&parent.inode, || CasefoldCache::new(parent))
726
}
727
728
#[cfg(test)]
729
fn exists_in_cache(&mut self, parent: Inode, name: &CStr) -> bool {
730
if let Some(dir_cache) = self.inner.get(&parent) {
731
dir_cache.lookup(name.to_bytes()).is_some()
732
} else {
733
false
734
}
735
}
736
}
737
738
#[cfg(feature = "fs_permission_translation")]
739
impl PermissionData {
740
pub(crate) fn need_set_permission(&self, path: &str) -> bool {
741
path.starts_with(&self.perm_path)
742
}
743
}
744
745
/// A file system that simply "passes through" all requests it receives to the underlying file
746
/// system. To keep the implementation simple it servers the contents of its root directory. Users
747
/// that wish to serve only a specific directory should set up the environment so that that
748
/// directory ends up as the root of the file system process. One way to accomplish this is via a
749
/// combination of mount namespaces and the pivot_root system call.
750
///
751
/// # Safety
752
///
753
/// The `Drop` implementation for this struct intentionally leaks all open file
754
/// descriptors. It is **critical** that an instance of `PassthroughFs` is
755
/// only dropped immediately prior to process termination. Failure to uphold
756
/// this invariant **will** result in resource leaks. This is a deliberate
757
/// performance optimization for abrupt shutdowns, where we let the OS
758
/// handle resource cleanup.
759
pub struct PassthroughFs {
760
// Mutex that must be acquired before executing a process-wide operation such as fchdir.
761
process_lock: Mutex<()>,
762
// virtio-fs tag that the guest uses when mounting. This is only used for debugging
763
// when tracing is enabled.
764
tag: String,
765
766
// File descriptors for various points in the file system tree.
767
inodes: Mutex<MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>>,
768
next_inode: AtomicU64,
769
770
// File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be
771
// used for reading and writing data.
772
handles: Mutex<BTreeMap<Handle, Arc<HandleData>>>,
773
next_handle: AtomicU64,
774
775
// File descriptor pointing to the `/proc` directory. This is used to convert an fd from
776
// `inodes` into one that can go into `handles`. This is accomplished by reading the
777
// `self/fd/{}` symlink. We keep an open fd here in case the file system tree that we are meant
778
// to be serving doesn't have access to `/proc`.
779
proc: File,
780
781
// Whether writeback caching is enabled for this directory. This will only be true when
782
// `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`.
783
writeback: AtomicBool,
784
785
// Whether zero message opens are supported by the kernel driver.
786
zero_message_open: AtomicBool,
787
788
// Whether zero message opendir is supported by the kernel driver.
789
zero_message_opendir: AtomicBool,
790
791
// Used to communicate with other processes using D-Bus.
792
#[cfg(feature = "arc_quota")]
793
dbus_connection: Option<Mutex<dbus::blocking::Connection>>,
794
#[cfg(feature = "arc_quota")]
795
dbus_fd: Option<std::os::unix::io::RawFd>,
796
797
// Time-expiring cache for `ascii_casefold_lookup()`.
798
// The key is an inode of a directory, and the value is a cache for the directory.
799
// Each value will be expired `cfg.timeout` after it's created.
800
//
801
// TODO(b/267748212): Instead of per-device Mutex, we might want to have per-directory Mutex
802
// if we use PassthroughFs in multi-threaded environments.
803
expiring_casefold_lookup_caches: Option<Mutex<ExpiringCasefoldLookupCaches>>,
804
805
// paths and coresponding permission setting set by `crosvm_client_fs_permission_set` API
806
#[cfg(feature = "fs_permission_translation")]
807
permission_paths: RwLock<Vec<PermissionData>>,
808
809
// paths and coresponding xattr setting set by `crosvm_client_fs_xattr_set` API
810
#[cfg(feature = "arc_quota")]
811
xattr_paths: RwLock<Vec<XattrData>>,
812
813
cfg: Config,
814
815
// Set the root directory when pivot root isn't enabled for jailed process.
816
//
817
// virtio-fs typically uses mount namespaces and pivot_root for file system isolation,
818
// making the jailed process's root directory "/".
819
//
820
// However, Android's security model prevents crosvm from having the necessary SYS_ADMIN
821
// capability for mount namespaces and pivot_root. This lack of isolation means that
822
// root_dir defaults to the path provided via "--shared-dir".
823
root_dir: String,
824
}
825
826
impl std::fmt::Debug for PassthroughFs {
827
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
828
f.debug_struct("PassthroughFs")
829
.field("tag", &self.tag)
830
.field("next_inode", &self.next_inode)
831
.field("next_handle", &self.next_handle)
832
.field("proc", &self.proc)
833
.field("writeback", &self.writeback)
834
.field("zero_message_open", &self.zero_message_open)
835
.field("zero_message_opendir", &self.zero_message_opendir)
836
.field("cfg", &self.cfg)
837
.finish()
838
}
839
}
840
841
impl PassthroughFs {
842
pub fn new(tag: &str, cfg: Config) -> io::Result<PassthroughFs> {
843
// SAFETY: this doesn't modify any memory and we check the return value.
844
let raw_descriptor = syscall!(unsafe {
845
libc::openat64(
846
libc::AT_FDCWD,
847
PROC_CSTR.as_ptr(),
848
libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC,
849
)
850
})?;
851
852
// Privileged UIDs can use D-Bus to perform some operations.
853
#[cfg(feature = "arc_quota")]
854
let (dbus_connection, dbus_fd) = if cfg.privileged_quota_uids.is_empty() {
855
(None, None)
856
} else {
857
let mut channel = dbus::channel::Channel::get_private(dbus::channel::BusType::System)
858
.map_err(io::Error::other)?;
859
channel.set_watch_enabled(true);
860
let dbus_fd = channel.watch().fd;
861
channel.set_watch_enabled(false);
862
(
863
Some(Mutex::new(dbus::blocking::Connection::from(channel))),
864
Some(dbus_fd),
865
)
866
};
867
868
// SAFETY: safe because we just opened this descriptor.
869
let proc = unsafe { File::from_raw_descriptor(raw_descriptor) };
870
871
let expiring_casefold_lookup_caches = if cfg.ascii_casefold {
872
Some(Mutex::new(ExpiringCasefoldLookupCaches::new(cfg.timeout)))
873
} else {
874
None
875
};
876
877
#[allow(unused_mut)]
878
let mut passthroughfs = PassthroughFs {
879
process_lock: Mutex::new(()),
880
tag: tag.to_string(),
881
inodes: Mutex::new(MultikeyBTreeMap::new()),
882
next_inode: AtomicU64::new(ROOT_ID + 1),
883
884
handles: Mutex::new(BTreeMap::new()),
885
next_handle: AtomicU64::new(1),
886
887
proc,
888
889
writeback: AtomicBool::new(false),
890
zero_message_open: AtomicBool::new(false),
891
zero_message_opendir: AtomicBool::new(false),
892
893
#[cfg(feature = "arc_quota")]
894
dbus_connection,
895
#[cfg(feature = "arc_quota")]
896
dbus_fd,
897
expiring_casefold_lookup_caches,
898
#[cfg(feature = "fs_permission_translation")]
899
permission_paths: RwLock::new(Vec::new()),
900
#[cfg(feature = "arc_quota")]
901
xattr_paths: RwLock::new(Vec::new()),
902
cfg,
903
root_dir: "/".to_string(),
904
};
905
906
#[cfg(feature = "fs_runtime_ugid_map")]
907
passthroughfs.set_permission_path();
908
909
cros_tracing::trace_simple_print!(
910
VirtioFs,
911
"New PassthroughFS initialized: {:?}",
912
passthroughfs
913
);
914
Ok(passthroughfs)
915
}
916
917
#[cfg(feature = "fs_runtime_ugid_map")]
918
fn set_permission_path(&mut self) {
919
if !self.cfg.ugid_map.is_empty() {
920
let mut write_lock = self
921
.permission_paths
922
.write()
923
.expect("Failed to acquire write lock on permission_paths");
924
*write_lock = self.cfg.ugid_map.clone();
925
}
926
}
927
928
#[cfg(feature = "fs_runtime_ugid_map")]
929
pub fn set_root_dir(&mut self, shared_dir: String) -> io::Result<()> {
930
let canonicalized_root = match std::fs::canonicalize(shared_dir) {
931
Ok(path) => path,
932
Err(e) => {
933
return Err(io::Error::new(
934
io::ErrorKind::InvalidInput,
935
format!("Failed to canonicalize root_dir: {e}"),
936
));
937
}
938
};
939
self.root_dir = canonicalized_root.to_string_lossy().to_string();
940
Ok(())
941
}
942
943
pub fn cfg(&self) -> &Config {
944
&self.cfg
945
}
946
947
pub fn keep_rds(&self) -> Vec<RawDescriptor> {
948
#[cfg_attr(not(feature = "arc_quota"), allow(unused_mut))]
949
let mut keep_rds = vec![self.proc.as_raw_descriptor()];
950
#[cfg(feature = "arc_quota")]
951
if let Some(fd) = self.dbus_fd {
952
keep_rds.push(fd);
953
}
954
keep_rds
955
}
956
957
fn rewrite_xattr_name<'xattr>(&self, name: &'xattr CStr) -> Cow<'xattr, CStr> {
958
if !self.cfg.rewrite_security_xattrs {
959
return Cow::Borrowed(name);
960
}
961
962
// Does not include nul-terminator.
963
let buf = name.to_bytes();
964
if !buf.starts_with(SECURITY_XATTR) || buf == SELINUX_XATTR {
965
return Cow::Borrowed(name);
966
}
967
968
let mut newname = USER_VIRTIOFS_XATTR.to_vec();
969
newname.extend_from_slice(buf);
970
971
// The unwrap is safe here because the prefix doesn't contain any interior nul-bytes and the
972
// to_bytes() call above will not return a byte slice with any interior nul-bytes either.
973
Cow::Owned(CString::new(newname).expect("Failed to re-write xattr name"))
974
}
975
976
fn find_inode(&self, inode: Inode) -> io::Result<Arc<InodeData>> {
977
self.inodes.lock().get(&inode).cloned().ok_or_else(ebadf)
978
}
979
980
fn find_handle(&self, handle: Handle, inode: Inode) -> io::Result<Arc<HandleData>> {
981
self.handles
982
.lock()
983
.get(&handle)
984
.filter(|hd| hd.inode == inode)
985
.cloned()
986
.ok_or_else(ebadf)
987
}
988
989
fn open_fd(&self, fd: RawDescriptor, flags: i32) -> io::Result<File> {
990
let pathname = CString::new(format!("self/fd/{fd}"))
991
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
992
993
// SAFETY: this doesn't modify any memory and we check the return value. We don't really
994
// check `flags` because if the kernel can't handle poorly specified flags then we have
995
// much bigger problems. Also, clear the `O_NOFOLLOW` flag if it is set since we need
996
// to follow the `/proc/self/fd` symlink to get the file.
997
let raw_descriptor = syscall!(unsafe {
998
libc::openat64(
999
self.proc.as_raw_descriptor(),
1000
pathname.as_ptr(),
1001
(flags | libc::O_CLOEXEC) & !(libc::O_NOFOLLOW | libc::O_DIRECT),
1002
)
1003
})?;
1004
1005
// SAFETY: safe because we just opened this descriptor.
1006
Ok(unsafe { File::from_raw_descriptor(raw_descriptor) })
1007
}
1008
1009
/// Modifies the provided open flags based on the writeback caching configuration.
1010
/// Return the updated open flags.
1011
fn update_open_flags(&self, mut flags: i32) -> i32 {
1012
// When writeback caching is enabled, the kernel may send read requests even if the
1013
// userspace program opened the file write-only. So we need to ensure that we have opened
1014
// the file for reading as well as writing.
1015
let writeback = self.writeback.load(Ordering::Relaxed);
1016
if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY {
1017
flags &= !libc::O_ACCMODE;
1018
flags |= libc::O_RDWR;
1019
}
1020
1021
// When writeback caching is enabled the kernel is responsible for handling `O_APPEND`.
1022
// However, this breaks atomicity as the file may have changed on disk, invalidating the
1023
// cached copy of the data in the kernel and the offset that the kernel thinks is the end of
1024
// the file. Just allow this for now as it is the user's responsibility to enable writeback
1025
// caching only for directories that are not shared. It also means that we need to clear the
1026
// `O_APPEND` flag.
1027
if writeback && flags & libc::O_APPEND != 0 {
1028
flags &= !libc::O_APPEND;
1029
}
1030
1031
flags
1032
}
1033
1034
fn open_inode(&self, inode: &InodeData, mut flags: i32) -> io::Result<File> {
1035
// handle writeback caching cases
1036
flags = self.update_open_flags(flags);
1037
1038
self.open_fd(inode.as_raw_descriptor(), flags)
1039
}
1040
1041
// Increases the inode refcount and returns the inode.
1042
fn increase_inode_refcount(&self, inode_data: &InodeData) -> Inode {
1043
// Matches with the release store in `forget`.
1044
inode_data.refcount.fetch_add(1, Ordering::Acquire);
1045
inode_data.inode
1046
}
1047
1048
// Creates a new entry for `f` or increases the refcount of the existing entry for `f`.
1049
// The inodes mutex lock must not be already taken by the same thread otherwise this
1050
// will deadlock.
1051
fn add_entry(
1052
&self,
1053
f: File,
1054
#[cfg_attr(not(feature = "fs_permission_translation"), allow(unused_mut))]
1055
mut st: libc::stat64,
1056
open_flags: libc::c_int,
1057
path: String,
1058
) -> Entry {
1059
#[cfg(feature = "arc_quota")]
1060
self.set_permission(&mut st, &path);
1061
#[cfg(feature = "fs_runtime_ugid_map")]
1062
self.set_ugid_permission(&mut st, &path);
1063
let mut inodes = self.inodes.lock();
1064
1065
let altkey = InodeAltKey {
1066
ino: st.st_ino,
1067
dev: st.st_dev,
1068
};
1069
1070
let inode = if let Some(data) = inodes.get_alt(&altkey) {
1071
self.increase_inode_refcount(data)
1072
} else {
1073
let inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
1074
inodes.insert(
1075
inode,
1076
altkey,
1077
Arc::new(InodeData {
1078
inode,
1079
file: Mutex::new(OpenedFile::new(f, open_flags)),
1080
refcount: AtomicU64::new(1),
1081
filetype: st.st_mode.into(),
1082
path,
1083
unsafe_leak_fd: AtomicBool::new(false),
1084
}),
1085
);
1086
1087
inode
1088
};
1089
1090
Entry {
1091
inode,
1092
generation: 0,
1093
attr: st,
1094
// We use the same timeout for the attribute and the entry.
1095
attr_timeout: self.cfg.timeout,
1096
entry_timeout: self.cfg.timeout,
1097
}
1098
}
1099
1100
/// Acquires lock of `expiring_casefold_lookup_caches` if `ascii_casefold` is enabled.
1101
fn lock_casefold_lookup_caches(&self) -> Option<MutexGuard<'_, ExpiringCasefoldLookupCaches>> {
1102
self.expiring_casefold_lookup_caches
1103
.as_ref()
1104
.map(|c| c.lock())
1105
}
1106
1107
// Returns an actual case-sensitive file name that matches with the given `name`.
1108
// Returns `Ok(None)` if no file matches with the give `name`.
1109
// This function will panic if casefold is not enabled.
1110
fn get_case_unfolded_name(
1111
&self,
1112
parent: &InodeData,
1113
name: &[u8],
1114
) -> io::Result<Option<CString>> {
1115
let mut caches = self
1116
.lock_casefold_lookup_caches()
1117
.expect("casefold must be enabled");
1118
let dir_cache = caches.get(parent)?;
1119
Ok(dir_cache.lookup(name))
1120
}
1121
1122
// Performs an ascii case insensitive lookup.
1123
fn ascii_casefold_lookup(&self, parent: &InodeData, name: &[u8]) -> io::Result<Entry> {
1124
match self.get_case_unfolded_name(parent, name)? {
1125
None => Err(io::Error::from_raw_os_error(libc::ENOENT)),
1126
Some(actual_name) => self.do_lookup(parent, &actual_name),
1127
}
1128
}
1129
1130
#[cfg(test)]
1131
fn exists_in_casefold_cache(&self, parent: Inode, name: &CStr) -> bool {
1132
let mut cache = self
1133
.lock_casefold_lookup_caches()
1134
.expect("casefold must be enabled");
1135
cache.exists_in_cache(parent, name)
1136
}
1137
1138
fn do_lookup(&self, parent: &InodeData, name: &CStr) -> io::Result<Entry> {
1139
#[cfg_attr(not(feature = "fs_permission_translation"), allow(unused_mut))]
1140
let mut st = statat(parent, name)?;
1141
1142
let altkey = InodeAltKey {
1143
ino: st.st_ino,
1144
dev: st.st_dev,
1145
};
1146
1147
let path = format!(
1148
"{}/{}",
1149
parent.path.clone(),
1150
name.to_str().unwrap_or("<non UTF-8 str>")
1151
);
1152
1153
// Check if we already have an entry before opening a new file.
1154
if let Some(data) = self.inodes.lock().get_alt(&altkey) {
1155
// Return the same inode with the reference counter increased.
1156
#[cfg(feature = "arc_quota")]
1157
self.set_permission(&mut st, &path);
1158
#[cfg(feature = "fs_runtime_ugid_map")]
1159
self.set_ugid_permission(&mut st, &path);
1160
return Ok(Entry {
1161
inode: self.increase_inode_refcount(data),
1162
generation: 0,
1163
attr: st,
1164
// We use the same timeout for the attribute and the entry.
1165
attr_timeout: self.cfg.timeout,
1166
entry_timeout: self.cfg.timeout,
1167
});
1168
}
1169
1170
// Open a regular file with O_RDONLY to store in `InodeData` so explicit open requests can
1171
// be skipped later if the ZERO_MESSAGE_{OPEN,OPENDIR} features are enabled.
1172
// If the crosvm process doesn't have a read permission, fall back to O_PATH below.
1173
let mut flags = libc::O_RDONLY | libc::O_NOFOLLOW | libc::O_CLOEXEC;
1174
match FileType::from(st.st_mode) {
1175
FileType::Regular => {}
1176
FileType::Directory => flags |= libc::O_DIRECTORY,
1177
FileType::Other => flags |= libc::O_PATH,
1178
};
1179
1180
// SAFETY: this doesn't modify any memory and we check the return value.
1181
let fd = match unsafe {
1182
syscall!(libc::openat64(
1183
parent.as_raw_descriptor(),
1184
name.as_ptr(),
1185
flags
1186
))
1187
} {
1188
Ok(fd) => fd,
1189
Err(e) if e.errno() == libc::EACCES => {
1190
// If O_RDONLY is unavailable, fall back to O_PATH to get an FD to store in
1191
// `InodeData`.
1192
// Note that some operations which should be allowed without read permissions
1193
// require syscalls that don't support O_PATH fds. For those syscalls, we will
1194
// need to fall back to their path-based equivalents with /self/fd/${FD}.
1195
// e.g. `fgetxattr()` for an O_PATH FD fails while `getxaattr()` for /self/fd/${FD}
1196
// works.
1197
flags |= libc::O_PATH;
1198
// SAFETY: this doesn't modify any memory and we check the return value.
1199
unsafe {
1200
syscall!(libc::openat64(
1201
parent.as_raw_descriptor(),
1202
name.as_ptr(),
1203
flags
1204
))
1205
}?
1206
}
1207
Err(e) => {
1208
return Err(e.into());
1209
}
1210
};
1211
1212
// SAFETY: safe because we own the fd.
1213
let f = unsafe { File::from_raw_descriptor(fd) };
1214
// We made sure the lock acquired for `self.inodes` is released automatically when
1215
// the if block above is exited, so a call to `self.add_entry()` should not cause a deadlock
1216
// here. This would not be the case if this were executed in an else block instead.
1217
Ok(self.add_entry(f, st, flags, path))
1218
}
1219
1220
fn get_cache_open_options(&self, flags: u32) -> OpenOptions {
1221
let mut opts = OpenOptions::empty();
1222
match self.cfg.cache_policy {
1223
// We only set the direct I/O option on files.
1224
CachePolicy::Never => opts.set(
1225
OpenOptions::DIRECT_IO,
1226
flags & (libc::O_DIRECTORY as u32) == 0,
1227
),
1228
CachePolicy::Always => {
1229
opts |= if flags & (libc::O_DIRECTORY as u32) == 0 {
1230
OpenOptions::KEEP_CACHE
1231
} else {
1232
OpenOptions::CACHE_DIR
1233
}
1234
}
1235
_ => {}
1236
};
1237
opts
1238
}
1239
1240
// Performs lookup using original name first, if it fails and ascii_casefold is enabled,
1241
// it tries to unfold the name and do lookup again.
1242
fn do_lookup_with_casefold_fallback(
1243
&self,
1244
parent: &InodeData,
1245
name: &CStr,
1246
) -> io::Result<Entry> {
1247
let mut res = self.do_lookup(parent, name);
1248
// If `ascii_casefold` is enabled, fallback to `ascii_casefold_lookup()`.
1249
if res.is_err() && self.cfg.ascii_casefold {
1250
res = self.ascii_casefold_lookup(parent, name.to_bytes());
1251
}
1252
res
1253
}
1254
1255
fn do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option<Handle>, OpenOptions)> {
1256
let inode_data = self.find_inode(inode)?;
1257
1258
let file = self.open_inode(&inode_data, flags as i32)?;
1259
1260
let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1261
let data = HandleData {
1262
inode,
1263
file: Mutex::new(OpenedFile::new(file, flags as i32)),
1264
unsafe_leak_fd: AtomicBool::new(false),
1265
};
1266
1267
self.handles.lock().insert(handle, Arc::new(data));
1268
1269
let opts = self.get_cache_open_options(flags);
1270
1271
Ok((Some(handle), opts))
1272
}
1273
1274
fn do_open_at(
1275
&self,
1276
parent_data: Arc<InodeData>,
1277
name: &CStr,
1278
inode: Inode,
1279
flags: u32,
1280
) -> io::Result<(Option<Handle>, OpenOptions)> {
1281
let open_flags = self.update_open_flags(flags as i32);
1282
1283
let fd_open = syscall!(
1284
// SAFETY: return value is checked.
1285
unsafe {
1286
libc::openat64(
1287
parent_data.as_raw_descriptor(),
1288
name.as_ptr(),
1289
(open_flags | libc::O_CLOEXEC) & !(libc::O_NOFOLLOW | libc::O_DIRECT),
1290
)
1291
}
1292
)?;
1293
1294
// SAFETY: fd_open is valid
1295
let file_open = unsafe { File::from_raw_descriptor(fd_open) };
1296
let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1297
let data = HandleData {
1298
inode,
1299
file: Mutex::new(OpenedFile::new(file_open, open_flags)),
1300
unsafe_leak_fd: AtomicBool::new(false),
1301
};
1302
1303
self.handles.lock().insert(handle, Arc::new(data));
1304
1305
let opts = self.get_cache_open_options(open_flags as u32);
1306
Ok((Some(handle), opts))
1307
}
1308
1309
fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> {
1310
let mut handles = self.handles.lock();
1311
1312
if let btree_map::Entry::Occupied(e) = handles.entry(handle) {
1313
if e.get().inode == inode {
1314
// We don't need to close the file here because that will happen automatically when
1315
// the last `Arc` is dropped.
1316
e.remove();
1317
return Ok(());
1318
}
1319
}
1320
1321
Err(ebadf())
1322
}
1323
1324
fn do_getattr(&self, inode: &InodeData) -> io::Result<(libc::stat64, Duration)> {
1325
#[allow(unused_mut)]
1326
let mut st = stat(inode)?;
1327
1328
#[cfg(feature = "arc_quota")]
1329
self.set_permission(&mut st, &inode.path);
1330
#[cfg(feature = "fs_runtime_ugid_map")]
1331
self.set_ugid_permission(&mut st, &inode.path);
1332
Ok((st, self.cfg.timeout))
1333
}
1334
1335
fn do_unlink(&self, parent: &InodeData, name: &CStr, flags: libc::c_int) -> io::Result<()> {
1336
// SAFETY: this doesn't modify any memory and we check the return value.
1337
syscall!(unsafe { libc::unlinkat(parent.as_raw_descriptor(), name.as_ptr(), flags) })?;
1338
Ok(())
1339
}
1340
1341
fn do_fsync<F: AsRawDescriptor>(&self, file: &F, datasync: bool) -> io::Result<()> {
1342
// SAFETY: this doesn't modify any memory and we check the return value.
1343
syscall!(unsafe {
1344
if datasync {
1345
libc::fdatasync(file.as_raw_descriptor())
1346
} else {
1347
libc::fsync(file.as_raw_descriptor())
1348
}
1349
})?;
1350
1351
Ok(())
1352
}
1353
1354
// Changes the CWD to `self.proc`, runs `f`, and then changes the CWD back to the root
1355
// directory. This effectively emulates an *at syscall starting at /proc, which is useful when
1356
// there is no *at syscall available. Panics if any of the fchdir calls fail or if there is no
1357
// root inode.
1358
//
1359
// NOTE: this method acquires an `self`-wide lock. If any locks are acquired in `f`, care must
1360
// be taken to avoid the risk of deadlocks.
1361
fn with_proc_chdir<F, T>(&self, f: F) -> T
1362
where
1363
F: FnOnce() -> T,
1364
{
1365
let root = self.find_inode(ROOT_ID).expect("failed to find root inode");
1366
1367
// Acquire a lock for `fchdir`.
1368
let _proc_lock = self.process_lock.lock();
1369
// SAFETY: this doesn't modify any memory and we check the return value. Since the
1370
// fchdir should never fail we just use debug_asserts.
1371
let proc_cwd = unsafe { libc::fchdir(self.proc.as_raw_descriptor()) };
1372
debug_assert_eq!(
1373
proc_cwd,
1374
0,
1375
"failed to fchdir to /proc: {}",
1376
io::Error::last_os_error()
1377
);
1378
1379
let res = f();
1380
1381
// SAFETY: this doesn't modify any memory and we check the return value. Since the
1382
// fchdir should never fail we just use debug_asserts.
1383
let root_cwd = unsafe { libc::fchdir(root.as_raw_descriptor()) };
1384
debug_assert_eq!(
1385
root_cwd,
1386
0,
1387
"failed to fchdir back to root directory: {}",
1388
io::Error::last_os_error()
1389
);
1390
1391
res
1392
}
1393
1394
fn do_getxattr(&self, inode: &InodeData, name: &CStr, value: &mut [u8]) -> io::Result<usize> {
1395
let file = inode.file.lock();
1396
let o_path_file = (file.open_flags & libc::O_PATH) != 0;
1397
let res = if o_path_file {
1398
// For FDs opened with `O_PATH`, we cannot call `fgetxattr` normally. Instead we
1399
// emulate an _at syscall by changing the CWD to /proc, running the path based syscall,
1400
// and then setting the CWD back to the root directory.
1401
let path = CString::new(format!("self/fd/{}", file.as_raw_descriptor()))
1402
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1403
1404
// SAFETY: this will only modify `value` and we check the return value.
1405
self.with_proc_chdir(|| unsafe {
1406
libc::getxattr(
1407
path.as_ptr(),
1408
name.as_ptr(),
1409
value.as_mut_ptr() as *mut libc::c_void,
1410
value.len() as libc::size_t,
1411
)
1412
})
1413
} else {
1414
// For regular files and directories, we can just use fgetxattr.
1415
// SAFETY: this will only write to `value` and we check the return value.
1416
unsafe {
1417
libc::fgetxattr(
1418
file.as_raw_descriptor(),
1419
name.as_ptr(),
1420
value.as_mut_ptr() as *mut libc::c_void,
1421
value.len() as libc::size_t,
1422
)
1423
}
1424
};
1425
1426
if res < 0 {
1427
Err(io::Error::last_os_error())
1428
} else {
1429
Ok(res as usize)
1430
}
1431
}
1432
1433
fn get_encryption_policy_ex<R: io::Read>(
1434
&self,
1435
inode: Inode,
1436
handle: Handle,
1437
mut r: R,
1438
) -> io::Result<IoctlReply> {
1439
let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1440
self.find_inode(inode)?
1441
} else {
1442
self.find_handle(handle, inode)?
1443
};
1444
1445
// SAFETY: this struct only has integer fields and any value is valid.
1446
let mut arg = unsafe { MaybeUninit::<fscrypt_get_policy_ex_arg>::zeroed().assume_init() };
1447
r.read_exact(arg.policy_size.as_mut_bytes())?;
1448
1449
let policy_size = cmp::min(arg.policy_size, size_of::<fscrypt_policy>() as u64);
1450
arg.policy_size = policy_size;
1451
1452
let res =
1453
// SAFETY: the kernel will only write to `arg` and we check the return value.
1454
unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_GET_ENCRYPTION_POLICY_EX, &mut arg) };
1455
if res < 0 {
1456
Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1457
} else {
1458
let len = size_of::<u64>() + arg.policy_size as usize;
1459
Ok(IoctlReply::Done(Ok(<&[u8]>::from(&arg)[..len].to_vec())))
1460
}
1461
}
1462
1463
fn get_fsxattr(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply> {
1464
let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1465
self.find_inode(inode)?
1466
} else {
1467
self.find_handle(handle, inode)?
1468
};
1469
1470
let mut buf = MaybeUninit::<fsxattr>::zeroed();
1471
1472
// SAFETY: the kernel will only write to `buf` and we check the return value.
1473
let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_FSGETXATTR, buf.as_mut_ptr()) };
1474
if res < 0 {
1475
Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1476
} else {
1477
// SAFETY: the kernel guarantees that the policy is now initialized.
1478
let xattr = unsafe { buf.assume_init() };
1479
Ok(IoctlReply::Done(Ok(xattr.as_bytes().to_vec())))
1480
}
1481
}
1482
1483
fn set_fsxattr<R: io::Read>(
1484
&self,
1485
#[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context,
1486
inode: Inode,
1487
handle: Handle,
1488
mut r: R,
1489
) -> io::Result<IoctlReply> {
1490
let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1491
self.find_inode(inode)?
1492
} else {
1493
self.find_handle(handle, inode)?
1494
};
1495
1496
let mut in_attr = fsxattr::new_zeroed();
1497
r.read_exact(in_attr.as_mut_bytes())?;
1498
1499
#[cfg(feature = "arc_quota")]
1500
let st = stat(&*data)?;
1501
1502
#[cfg(feature = "arc_quota")]
1503
let ctx_uid = self.lookup_host_uid(&ctx, inode);
1504
1505
// Changing quota project ID requires CAP_FOWNER or being file owner.
1506
// Here we use privileged_quota_uids because we cannot perform a CAP_FOWNER check.
1507
#[cfg(feature = "arc_quota")]
1508
if ctx_uid == st.st_uid || self.cfg.privileged_quota_uids.contains(&ctx_uid) {
1509
// Get the current fsxattr.
1510
let mut buf = MaybeUninit::<fsxattr>::zeroed();
1511
// SAFETY: the kernel will only write to `buf` and we check the return value.
1512
let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_FSGETXATTR, buf.as_mut_ptr()) };
1513
if res < 0 {
1514
return Ok(IoctlReply::Done(Err(io::Error::last_os_error())));
1515
}
1516
// SAFETY: the kernel guarantees that the policy is now initialized.
1517
let current_attr = unsafe { buf.assume_init() };
1518
1519
// Project ID cannot be changed inside a user namespace.
1520
// Use Spaced to avoid this restriction.
1521
if current_attr.fsx_projid != in_attr.fsx_projid {
1522
let connection = self.dbus_connection.as_ref().unwrap().lock();
1523
let proxy = connection.with_proxy(
1524
"org.chromium.Spaced",
1525
"/org/chromium/Spaced",
1526
DEFAULT_DBUS_TIMEOUT,
1527
);
1528
let project_id = in_attr.fsx_projid;
1529
if !is_android_project_id(project_id) {
1530
return Err(io::Error::from_raw_os_error(libc::EINVAL));
1531
}
1532
let file_clone = base::SafeDescriptor::try_from(&*data)?;
1533
match proxy.set_project_id(file_clone.into(), project_id) {
1534
Ok(r) => {
1535
let r = SetProjectIdReply::parse_from_bytes(&r)
1536
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1537
if !r.success {
1538
return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1539
r.error,
1540
))));
1541
}
1542
}
1543
Err(e) => {
1544
return Err(io::Error::other(e));
1545
}
1546
};
1547
}
1548
}
1549
1550
// SAFETY: this doesn't modify any memory and we check the return value.
1551
let res = unsafe { ioctl_with_ptr(&*data, FS_IOC_FSSETXATTR, &in_attr) };
1552
if res < 0 {
1553
Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1554
} else {
1555
Ok(IoctlReply::Done(Ok(Vec::new())))
1556
}
1557
}
1558
1559
fn get_flags(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply> {
1560
let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1561
self.find_inode(inode)?
1562
} else {
1563
self.find_handle(handle, inode)?
1564
};
1565
1566
// The ioctl encoding is a long but the parameter is actually an int.
1567
let mut flags: c_int = 0;
1568
1569
// SAFETY: the kernel will only write to `flags` and we check the return value.
1570
let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_GETFLAGS, &mut flags) };
1571
if res < 0 {
1572
Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1573
} else {
1574
Ok(IoctlReply::Done(Ok(flags.to_ne_bytes().to_vec())))
1575
}
1576
}
1577
1578
fn set_flags<R: io::Read>(
1579
&self,
1580
#[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context,
1581
inode: Inode,
1582
handle: Handle,
1583
mut r: R,
1584
) -> io::Result<IoctlReply> {
1585
let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1586
self.find_inode(inode)?
1587
} else {
1588
self.find_handle(handle, inode)?
1589
};
1590
1591
// The ioctl encoding is a long but the parameter is actually an int.
1592
let mut in_flags: c_int = 0;
1593
r.read_exact(in_flags.as_mut_bytes())?;
1594
1595
#[cfg(feature = "arc_quota")]
1596
let st = stat(&*data)?;
1597
1598
#[cfg(feature = "arc_quota")]
1599
let ctx_uid = self.lookup_host_uid(&ctx, inode);
1600
1601
// Only privleged uid can perform FS_IOC_SETFLAGS through cryptohome.
1602
#[cfg(feature = "arc_quota")]
1603
if ctx_uid == st.st_uid || self.cfg.privileged_quota_uids.contains(&ctx_uid) {
1604
// Get the current flag.
1605
let mut buf = MaybeUninit::<c_int>::zeroed();
1606
// SAFETY: the kernel will only write to `buf` and we check the return value.
1607
let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_GETFLAGS, buf.as_mut_ptr()) };
1608
if res < 0 {
1609
return Ok(IoctlReply::Done(Err(io::Error::last_os_error())));
1610
}
1611
// SAFETY: the kernel guarantees that the policy is now initialized.
1612
let current_flags = unsafe { buf.assume_init() };
1613
1614
// Project inheritance flag cannot be changed inside a user namespace.
1615
// Use Spaced to avoid this restriction.
1616
if (in_flags & FS_PROJINHERIT_FL) != (current_flags & FS_PROJINHERIT_FL) {
1617
let connection = self.dbus_connection.as_ref().unwrap().lock();
1618
let proxy = connection.with_proxy(
1619
"org.chromium.Spaced",
1620
"/org/chromium/Spaced",
1621
DEFAULT_DBUS_TIMEOUT,
1622
);
1623
// If the input flags contain FS_PROJINHERIT_FL, then it is a set. Otherwise it is a
1624
// reset.
1625
let enable = (in_flags & FS_PROJINHERIT_FL) == FS_PROJINHERIT_FL;
1626
let file_clone = base::SafeDescriptor::try_from(&*data)?;
1627
match proxy.set_project_inheritance_flag(file_clone.into(), enable) {
1628
Ok(r) => {
1629
let r = SetProjectInheritanceFlagReply::parse_from_bytes(&r)
1630
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1631
if !r.success {
1632
return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1633
r.error,
1634
))));
1635
}
1636
}
1637
Err(e) => {
1638
return Err(io::Error::other(e));
1639
}
1640
};
1641
}
1642
}
1643
1644
// SAFETY: this doesn't modify any memory and we check the return value.
1645
let res = unsafe { ioctl_with_ptr(&*data, FS_IOC_SETFLAGS, &in_flags) };
1646
if res < 0 {
1647
Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1648
} else {
1649
Ok(IoctlReply::Done(Ok(Vec::new())))
1650
}
1651
}
1652
1653
fn enable_verity<R: io::Read>(
1654
&self,
1655
inode: Inode,
1656
handle: Handle,
1657
mut r: R,
1658
) -> io::Result<IoctlReply> {
1659
let inode_data = self.find_inode(inode)?;
1660
1661
// These match the return codes from `fsverity_ioctl_enable` in the kernel.
1662
match inode_data.filetype {
1663
FileType::Regular => {}
1664
FileType::Directory => return Err(io::Error::from_raw_os_error(libc::EISDIR)),
1665
FileType::Other => return Err(io::Error::from_raw_os_error(libc::EINVAL)),
1666
}
1667
1668
{
1669
// We cannot enable verity while holding a writable fd so get a new one, if necessary.
1670
let mut file = inode_data.file.lock();
1671
let mut flags = file.open_flags;
1672
match flags & libc::O_ACCMODE {
1673
libc::O_WRONLY | libc::O_RDWR => {
1674
flags &= !libc::O_ACCMODE;
1675
flags |= libc::O_RDONLY;
1676
1677
// We need to get a read-only handle for this file.
1678
let newfile = self.open_fd(file.as_raw_descriptor(), libc::O_RDONLY)?;
1679
*file = OpenedFile::new(newfile, flags);
1680
}
1681
libc::O_RDONLY => {}
1682
_ => panic!("Unexpected flags: {flags:#x}"),
1683
}
1684
}
1685
1686
let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1687
inode_data
1688
} else {
1689
let data = self.find_handle(handle, inode)?;
1690
1691
{
1692
// We can't enable verity while holding a writable fd. We don't know whether the
1693
// file was opened for writing so check it here. We don't expect
1694
// this to be a frequent operation so the extra latency should be
1695
// fine.
1696
let mut file = data.file.lock();
1697
let flags = FileFlags::from_file(&*file).map_err(io::Error::from)?;
1698
match flags {
1699
FileFlags::ReadWrite | FileFlags::Write => {
1700
// We need to get a read-only handle for this file.
1701
*file = OpenedFile::new(
1702
self.open_fd(file.as_raw_descriptor(), libc::O_RDONLY)?,
1703
libc::O_RDONLY,
1704
);
1705
}
1706
FileFlags::Read => {}
1707
}
1708
}
1709
1710
data
1711
};
1712
1713
let mut arg = fsverity_enable_arg::new_zeroed();
1714
r.read_exact(arg.as_mut_bytes())?;
1715
1716
let mut salt;
1717
if arg.salt_size > 0 {
1718
if arg.salt_size > self.max_buffer_size() {
1719
return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1720
libc::ENOMEM,
1721
))));
1722
}
1723
salt = vec![0; arg.salt_size as usize];
1724
r.read_exact(&mut salt)?;
1725
arg.salt_ptr = salt.as_ptr() as usize as u64;
1726
} else {
1727
arg.salt_ptr = 0;
1728
}
1729
1730
let mut sig;
1731
if arg.sig_size > 0 {
1732
if arg.sig_size > self.max_buffer_size() {
1733
return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1734
libc::ENOMEM,
1735
))));
1736
}
1737
sig = vec![0; arg.sig_size as usize];
1738
r.read_exact(&mut sig)?;
1739
arg.sig_ptr = sig.as_ptr() as usize as u64;
1740
} else {
1741
arg.sig_ptr = 0;
1742
}
1743
1744
// SAFETY: this doesn't modify any memory and we check the return value.
1745
let res = unsafe { ioctl_with_ptr(&*data, FS_IOC_ENABLE_VERITY, &arg) };
1746
if res < 0 {
1747
Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1748
} else {
1749
Ok(IoctlReply::Done(Ok(Vec::new())))
1750
}
1751
}
1752
1753
fn measure_verity<R: io::Read>(
1754
&self,
1755
inode: Inode,
1756
handle: Handle,
1757
mut r: R,
1758
out_size: u32,
1759
) -> io::Result<IoctlReply> {
1760
let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1761
self.find_inode(inode)?
1762
} else {
1763
self.find_handle(handle, inode)?
1764
};
1765
1766
let mut digest = fsverity_digest::new_zeroed();
1767
r.read_exact(digest.as_mut_bytes())?;
1768
1769
// Taken from fs/verity/fsverity_private.h.
1770
const FS_VERITY_MAX_DIGEST_SIZE: u16 = 64;
1771
1772
// This digest size is what the fsverity command line utility uses.
1773
const DIGEST_SIZE: u16 = FS_VERITY_MAX_DIGEST_SIZE * 2 + 1;
1774
const BUFLEN: usize = size_of::<fsverity_digest>() + DIGEST_SIZE as usize;
1775
const ROUNDED_LEN: usize = BUFLEN.div_ceil(size_of::<fsverity_digest>());
1776
1777
// Make sure we get a properly aligned allocation.
1778
let mut buf = [MaybeUninit::<fsverity_digest>::uninit(); ROUNDED_LEN];
1779
1780
// SAFETY: we are only writing data and not reading uninitialized memory.
1781
unsafe {
1782
// TODO: Replace with `MaybeUninit::slice_as_mut_ptr` once it is stabilized.
1783
addr_of_mut!((*(buf.as_mut_ptr() as *mut fsverity_digest)).digest_size)
1784
.write(DIGEST_SIZE)
1785
};
1786
1787
// SAFETY: this will only modify `buf` and we check the return value.
1788
let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_MEASURE_VERITY, buf.as_mut_ptr()) };
1789
if res < 0 {
1790
Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1791
} else {
1792
let digest_size =
1793
// SAFETY: this value was initialized by us already and then overwritten by the kernel.
1794
// TODO: Replace with `MaybeUninit::slice_as_ptr` once it is stabilized.
1795
unsafe { addr_of!((*(buf.as_ptr() as *const fsverity_digest)).digest_size).read() };
1796
let outlen = size_of::<fsverity_digest>() as u32 + u32::from(digest_size);
1797
1798
// The kernel guarantees this but it doesn't hurt to be paranoid.
1799
debug_assert!(outlen <= (ROUNDED_LEN * size_of::<fsverity_digest>()) as u32);
1800
if digest.digest_size < digest_size || out_size < outlen {
1801
return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1802
libc::EOVERFLOW,
1803
))));
1804
}
1805
1806
let buf: [MaybeUninit<u8>; ROUNDED_LEN * size_of::<fsverity_digest>()] =
1807
// SAFETY: any bit pattern is valid for `MaybeUninit<u8>` and `fsverity_digest`
1808
// doesn't contain any references.
1809
unsafe { mem::transmute(buf) };
1810
1811
let buf =
1812
// SAFETY: Casting to `*const [u8]` is safe because the kernel guarantees that the
1813
// first `outlen` bytes of `buf` are initialized and `MaybeUninit<u8>` is guaranteed
1814
// to have the same layout as `u8`.
1815
// TODO: Replace with `MaybeUninit::slice_assume_init_ref` once it is stabilized.
1816
unsafe { &*(&buf[..outlen as usize] as *const [MaybeUninit<u8>] as *const [u8]) };
1817
Ok(IoctlReply::Done(Ok(buf.to_vec())))
1818
}
1819
}
1820
}
1821
1822
#[cfg(feature = "fs_runtime_ugid_map")]
1823
impl PassthroughFs {
1824
fn find_and_set_ugid_permission(
1825
&self,
1826
st: &mut libc::stat64,
1827
path: &str,
1828
is_root_path: bool,
1829
) -> bool {
1830
for perm_data in self
1831
.permission_paths
1832
.read()
1833
.expect("acquire permission_paths read lock")
1834
.iter()
1835
{
1836
if (is_root_path && perm_data.perm_path == "/")
1837
|| (!is_root_path
1838
&& perm_data.perm_path != "/"
1839
&& perm_data.need_set_permission(path))
1840
{
1841
self.set_permission_from_data(st, perm_data);
1842
return true;
1843
}
1844
}
1845
false
1846
}
1847
1848
fn set_permission_from_data(&self, st: &mut libc::stat64, perm_data: &PermissionData) {
1849
st.st_uid = perm_data.guest_uid;
1850
st.st_gid = perm_data.guest_gid;
1851
st.st_mode = (st.st_mode & libc::S_IFMT) | (0o777 & !perm_data.umask);
1852
}
1853
1854
/// Set permission according to path
1855
fn set_ugid_permission(&self, st: &mut libc::stat64, path: &str) {
1856
let is_root_path = path.is_empty();
1857
1858
if self.find_and_set_ugid_permission(st, path, is_root_path) {
1859
return;
1860
}
1861
1862
if let Some(perm_data) = self
1863
.permission_paths
1864
.read()
1865
.expect("acquire permission_paths read lock")
1866
.iter()
1867
.find(|pd| pd.perm_path == "/")
1868
{
1869
self.set_permission_from_data(st, perm_data);
1870
}
1871
}
1872
1873
/// Set host uid/gid to configured value according to path
1874
fn change_ugid_creds(&self, ctx: &Context, parent_data: &InodeData, name: &CStr) -> (u32, u32) {
1875
let path = format!(
1876
"{}/{}",
1877
parent_data.path.clone(),
1878
name.to_str().unwrap_or("<non UTF-8 str>")
1879
);
1880
1881
let is_root_path = path.is_empty();
1882
1883
if self.find_ugid_creds_for_path(&path, is_root_path).is_some() {
1884
return self.find_ugid_creds_for_path(&path, is_root_path).unwrap();
1885
}
1886
1887
if let Some(perm_data) = self
1888
.permission_paths
1889
.read()
1890
.expect("acquire permission_paths read lock")
1891
.iter()
1892
.find(|pd| pd.perm_path == "/")
1893
{
1894
return (perm_data.host_uid, perm_data.host_gid);
1895
}
1896
1897
(ctx.uid, ctx.gid)
1898
}
1899
1900
fn find_ugid_creds_for_path(&self, path: &str, is_root_path: bool) -> Option<(u32, u32)> {
1901
for perm_data in self
1902
.permission_paths
1903
.read()
1904
.expect("acquire permission_paths read lock")
1905
.iter()
1906
{
1907
if (is_root_path && perm_data.perm_path == "/")
1908
|| (!is_root_path
1909
&& perm_data.perm_path != "/"
1910
&& perm_data.need_set_permission(path))
1911
{
1912
return Some((perm_data.host_uid, perm_data.host_gid));
1913
}
1914
}
1915
None
1916
}
1917
}
1918
1919
#[cfg(feature = "arc_quota")]
1920
impl PassthroughFs {
1921
/// Convert u8 slice to string
1922
fn string_from_u8_slice(&self, buf: &[u8]) -> io::Result<String> {
1923
match CStr::from_bytes_until_nul(buf).map(|s| s.to_string_lossy().to_string()) {
1924
Ok(s) => Ok(s),
1925
Err(e) => {
1926
error!("fail to convert u8 slice to string: {}", e);
1927
Err(io::Error::from_raw_os_error(libc::EINVAL))
1928
}
1929
}
1930
}
1931
1932
/// Set permission according to path
1933
fn set_permission(&self, st: &mut libc::stat64, path: &str) {
1934
for perm_data in self
1935
.permission_paths
1936
.read()
1937
.expect("acquire permission_paths read lock")
1938
.iter()
1939
{
1940
if perm_data.need_set_permission(path) {
1941
st.st_uid = perm_data.guest_uid;
1942
st.st_gid = perm_data.guest_gid;
1943
st.st_mode = (st.st_mode & libc::S_IFMT) | (0o777 & !perm_data.umask);
1944
}
1945
}
1946
}
1947
1948
/// Set host uid/gid to configured value according to path
1949
fn change_creds(&self, ctx: &Context, parent_data: &InodeData, name: &CStr) -> (u32, u32) {
1950
let path = format!(
1951
"{}/{}",
1952
parent_data.path.clone(),
1953
name.to_str().unwrap_or("<non UTF-8 str>")
1954
);
1955
1956
for perm_data in self
1957
.permission_paths
1958
.read()
1959
.expect("acquire permission_paths read lock")
1960
.iter()
1961
{
1962
if perm_data.need_set_permission(&path) {
1963
return (perm_data.host_uid, perm_data.host_gid);
1964
}
1965
}
1966
1967
(ctx.uid, ctx.gid)
1968
}
1969
1970
fn read_permission_data<R: io::Read>(&self, mut r: R) -> io::Result<PermissionData> {
1971
let mut fs_permission_data = FsPermissionDataBuffer::new_zeroed();
1972
r.read_exact(fs_permission_data.as_mut_bytes())?;
1973
1974
let perm_path = self.string_from_u8_slice(&fs_permission_data.perm_path)?;
1975
if !perm_path.starts_with('/') {
1976
error!("FS_IOC_SETPERMISSION: perm path must start with '/'");
1977
return Err(io::Error::from_raw_os_error(libc::EINVAL));
1978
}
1979
Ok(PermissionData {
1980
guest_uid: fs_permission_data.guest_uid,
1981
guest_gid: fs_permission_data.guest_gid,
1982
host_uid: fs_permission_data.host_uid,
1983
host_gid: fs_permission_data.host_gid,
1984
umask: fs_permission_data.umask,
1985
perm_path,
1986
})
1987
}
1988
1989
/// Sets uid/gid/umask for all files and directories under a specific path.
1990
///
1991
/// This ioctl does not correspond to any upstream FUSE feature. It is used for arcvm
1992
/// It associates the specified path with the provide uid, gid, and umask values within the
1993
/// filesystem metadata.
1994
///
1995
/// During subsequent lookup operations, the stored uid/gid/umask values are retrieved and
1996
/// applied to all files and directories found under the registered path. Before sending
1997
/// file stat information to the client, the uid and gid are substituted by `guest_uid` and
1998
/// `guest_gid` if the file falls under the registered path. The file mode is masked by the
1999
/// umask.
2000
///
2001
/// When the guest creates a file within the specified path, the file gid/uid stat in host
2002
/// will be overwritten to `host_uid` and `host_gid` values.
2003
///
2004
/// This functionality enables dynamic configuration of ownership and permissions for a
2005
/// specific directory hierarchy within the filesystem.
2006
///
2007
/// # Notes
2008
/// - This method affects all existing and future files under the registered path.
2009
/// - The original file ownership and permissions are overridden by the provided values.
2010
/// - The registered path should not be renamed
2011
/// - Refer go/remove-mount-passthrough-fuse for more design details
2012
fn set_permission_by_path<R: io::Read>(&self, r: R) -> IoctlReply {
2013
if self
2014
.permission_paths
2015
.read()
2016
.expect("acquire permission_paths read lock")
2017
.len()
2018
>= self.cfg.max_dynamic_perm
2019
{
2020
error!(
2021
"FS_IOC_SETPERMISSION exceeds limits of max_dynamic_perm: {}",
2022
self.cfg.max_dynamic_perm
2023
);
2024
return IoctlReply::Done(Err(io::Error::from_raw_os_error(libc::EPERM)));
2025
}
2026
2027
let perm_data = match self.read_permission_data(r) {
2028
Ok(data) => data,
2029
Err(e) => {
2030
error!("fail to read permission data: {}", e);
2031
return IoctlReply::Done(Err(e));
2032
}
2033
};
2034
2035
self.permission_paths
2036
.write()
2037
.expect("acquire permission_paths write lock")
2038
.push(perm_data);
2039
2040
IoctlReply::Done(Ok(Vec::new()))
2041
}
2042
2043
// Get xattr value according to path and name
2044
fn get_xattr_by_path(&self, path: &str, name: &str) -> Option<String> {
2045
self.xattr_paths
2046
.read()
2047
.expect("acquire permission_paths read lock")
2048
.iter()
2049
.find(|data| data.need_set_guest_xattr(path, name))
2050
.map(|data| data.xattr_value.clone())
2051
}
2052
2053
fn skip_host_set_xattr(&self, path: &str, name: &str) -> bool {
2054
self.get_xattr_by_path(path, name).is_some()
2055
}
2056
2057
fn read_xattr_data<R: io::Read>(&self, mut r: R) -> io::Result<XattrData> {
2058
let mut fs_path_xattr_data = FsPathXattrDataBuffer::new_zeroed();
2059
r.read_exact(fs_path_xattr_data.as_mut_bytes())?;
2060
2061
let xattr_path = self.string_from_u8_slice(&fs_path_xattr_data.path)?;
2062
if !xattr_path.starts_with('/') {
2063
error!("FS_IOC_SETPATHXATTR: perm path must start with '/'");
2064
return Err(io::Error::from_raw_os_error(libc::EINVAL));
2065
}
2066
let xattr_name = self.string_from_u8_slice(&fs_path_xattr_data.xattr_name)?;
2067
let xattr_value = self.string_from_u8_slice(&fs_path_xattr_data.xattr_value)?;
2068
2069
Ok(XattrData {
2070
xattr_path,
2071
xattr_name,
2072
xattr_value,
2073
})
2074
}
2075
2076
/// Sets xattr value for all files and directories under a specific path.
2077
///
2078
/// This ioctl does not correspond to any upstream FUSE feature. It is used for arcvm.
2079
/// It associates the specified path and xattr name with a value.
2080
///
2081
/// When the getxattr is called for the specified path and name, the predefined
2082
/// value is returned.
2083
///
2084
/// # Notes
2085
/// - This method affects all existing and future files under the registered path.
2086
/// - The SECURITY_CONTEXT feature will be disabled if this ioctl is enabled.
2087
/// - The registered path should not be renamed
2088
/// - Refer go/remove-mount-passthrough-fuse for more design details
2089
fn set_xattr_by_path<R: io::Read>(&self, r: R) -> IoctlReply {
2090
if self
2091
.xattr_paths
2092
.read()
2093
.expect("acquire xattr_paths read lock")
2094
.len()
2095
>= self.cfg.max_dynamic_xattr
2096
{
2097
error!(
2098
"FS_IOC_SETPATHXATTR exceeds limits of max_dynamic_xattr: {}",
2099
self.cfg.max_dynamic_xattr
2100
);
2101
return IoctlReply::Done(Err(io::Error::from_raw_os_error(libc::EPERM)));
2102
}
2103
2104
let xattr_data = match self.read_xattr_data(r) {
2105
Ok(data) => data,
2106
Err(e) => {
2107
error!("fail to read xattr data: {}", e);
2108
return IoctlReply::Done(Err(e));
2109
}
2110
};
2111
2112
self.xattr_paths
2113
.write()
2114
.expect("acquire xattr_paths write lock")
2115
.push(xattr_data);
2116
2117
IoctlReply::Done(Ok(Vec::new()))
2118
}
2119
2120
fn do_getxattr_with_filter(
2121
&self,
2122
data: Arc<InodeData>,
2123
name: Cow<CStr>,
2124
buf: &mut [u8],
2125
) -> io::Result<usize> {
2126
let res: usize = match self.get_xattr_by_path(&data.path, &name.to_string_lossy()) {
2127
Some(predifined_xattr) => {
2128
let x = predifined_xattr.into_bytes();
2129
if x.len() > buf.len() {
2130
return Err(io::Error::from_raw_os_error(libc::ERANGE));
2131
}
2132
buf[..x.len()].copy_from_slice(&x);
2133
x.len()
2134
}
2135
None => self.do_getxattr(&data, &name, &mut buf[..])?,
2136
};
2137
Ok(res)
2138
}
2139
2140
/// Looks up the host uid according to the path of file that inode is referring to.
2141
fn lookup_host_uid(&self, ctx: &Context, inode: Inode) -> u32 {
2142
if let Ok(inode_data) = self.find_inode(inode) {
2143
let path = &inode_data.path;
2144
for perm_data in self
2145
.permission_paths
2146
.read()
2147
.expect("acquire permission_paths read lock")
2148
.iter()
2149
{
2150
if perm_data.need_set_permission(path) {
2151
return perm_data.host_uid;
2152
}
2153
}
2154
}
2155
ctx.uid
2156
}
2157
}
2158
2159
/// Decrements the refcount of the inode.
2160
/// Returns `true` if the refcount became 0.
2161
fn forget_one(
2162
inodes: &mut MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>,
2163
inode: Inode,
2164
count: u64,
2165
) -> bool {
2166
if let Some(data) = inodes.get(&inode) {
2167
// Acquiring the write lock on the inode map prevents new lookups from incrementing the
2168
// refcount but there is the possibility that a previous lookup already acquired a
2169
// reference to the inode data and is in the process of updating the refcount so we need
2170
// to loop here until we can decrement successfully.
2171
loop {
2172
let refcount = data.refcount.load(Ordering::Relaxed);
2173
2174
// Saturating sub because it doesn't make sense for a refcount to go below zero and
2175
// we don't want misbehaving clients to cause integer overflow.
2176
let new_count = refcount.saturating_sub(count);
2177
2178
// Synchronizes with the acquire load in `do_lookup`.
2179
if data
2180
.refcount
2181
.compare_exchange_weak(refcount, new_count, Ordering::Release, Ordering::Relaxed)
2182
.is_ok()
2183
{
2184
if new_count == 0 {
2185
// We just removed the last refcount for this inode. There's no need for an
2186
// acquire fence here because we hold a write lock on the inode map and any
2187
// thread that is waiting to do a forget on the same inode will have to wait
2188
// until we release the lock. So there's is no other release store for us to
2189
// synchronize with before deleting the entry.
2190
inodes.remove(&inode);
2191
return true;
2192
}
2193
break;
2194
}
2195
}
2196
}
2197
false
2198
}
2199
2200
// Strips any `user.virtiofs.` prefix from `buf`. If buf contains one or more nul-bytes, each
2201
// nul-byte-separated slice is treated as a C string and the prefix is stripped from each one.
2202
fn strip_xattr_prefix(buf: &mut Vec<u8>) {
2203
fn next_cstr(b: &[u8], start: usize) -> Option<&[u8]> {
2204
if start >= b.len() {
2205
return None;
2206
}
2207
2208
let end = b[start..]
2209
.iter()
2210
.position(|&c| c == b'\0')
2211
.map(|p| start + p + 1)
2212
.unwrap_or(b.len());
2213
2214
Some(&b[start..end])
2215
}
2216
2217
let mut pos = 0;
2218
while let Some(name) = next_cstr(buf, pos) {
2219
if !name.starts_with(USER_VIRTIOFS_XATTR) {
2220
pos += name.len();
2221
continue;
2222
}
2223
2224
let newlen = name.len() - USER_VIRTIOFS_XATTR.len();
2225
buf.drain(pos..pos + USER_VIRTIOFS_XATTR.len());
2226
pos += newlen;
2227
}
2228
}
2229
2230
impl Drop for PassthroughFs {
2231
/// The `Drop` implementation for this struct intentionally leaks all open file descriptors.
2232
/// It sets the `unsafe_leak_fd` flag on all `InodeData` and `HandleData` instances, which
2233
/// causes their `drop` implementations to forget the underlying `File` objects.
2234
///
2235
/// This is a deliberate performance optimization for abrupt shutdowns. It relies on the
2236
/// operating system to clean up the file descriptors when the process terminates. It is
2237
/// **critical** that an instance of `PassthroughFs` is only dropped immediately prior to
2238
/// process termination.
2239
fn drop(&mut self) {
2240
let inodes = self.inodes.lock();
2241
inodes.apply(|v| {
2242
v.set_unsafe_leak_fd();
2243
});
2244
let handles = self.handles.lock();
2245
handles.values().for_each(|v| v.set_unsafe_leak_fd());
2246
}
2247
}
2248
2249
impl FileSystem for PassthroughFs {
2250
type Inode = Inode;
2251
type Handle = Handle;
2252
type DirIter = ReadDir<Box<[u8]>>;
2253
2254
fn init(&self, capable: FsOptions) -> io::Result<FsOptions> {
2255
let root = CString::new(self.root_dir.clone())
2256
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
2257
2258
let flags = libc::O_DIRECTORY | libc::O_NOFOLLOW | libc::O_CLOEXEC;
2259
// SAFETY: this doesn't modify any memory and we check the return value.
2260
let raw_descriptor = unsafe { libc::openat64(libc::AT_FDCWD, root.as_ptr(), flags) };
2261
if raw_descriptor < 0 {
2262
return Err(io::Error::last_os_error());
2263
}
2264
2265
// SAFETY: safe because we just opened this descriptor above.
2266
let f = unsafe { File::from_raw_descriptor(raw_descriptor) };
2267
2268
let st = stat(&f)?;
2269
2270
// SAFETY: this doesn't modify any memory and there is no need to check the return
2271
// value because this system call always succeeds. We need to clear the umask here because
2272
// we want the client to be able to set all the bits in the mode.
2273
unsafe { libc::umask(0o000) };
2274
2275
let mut inodes = self.inodes.lock();
2276
2277
// Not sure why the root inode gets a refcount of 2 but that's what libfuse does.
2278
inodes.insert(
2279
ROOT_ID,
2280
InodeAltKey {
2281
ino: st.st_ino,
2282
dev: st.st_dev,
2283
},
2284
Arc::new(InodeData {
2285
inode: ROOT_ID,
2286
file: Mutex::new(OpenedFile::new(f, flags)),
2287
refcount: AtomicU64::new(2),
2288
filetype: st.st_mode.into(),
2289
path: "".to_string(),
2290
unsafe_leak_fd: AtomicBool::new(false),
2291
}),
2292
);
2293
2294
let mut opts = FsOptions::DO_READDIRPLUS
2295
| FsOptions::READDIRPLUS_AUTO
2296
| FsOptions::EXPORT_SUPPORT
2297
| FsOptions::DONT_MASK
2298
| FsOptions::CACHE_SYMLINKS;
2299
2300
// Device using dynamic xattr feature will have different security context in
2301
// host and guests. The SECURITY_CONTEXT feature should not be enabled in the
2302
// device.
2303
if self.cfg.max_dynamic_xattr == 0 && self.cfg.security_ctx {
2304
opts |= FsOptions::SECURITY_CONTEXT;
2305
}
2306
2307
if self.cfg.posix_acl {
2308
opts |= FsOptions::POSIX_ACL;
2309
}
2310
if self.cfg.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) {
2311
opts |= FsOptions::WRITEBACK_CACHE;
2312
self.writeback.store(true, Ordering::Relaxed);
2313
}
2314
if self.cfg.cache_policy == CachePolicy::Always {
2315
if capable.contains(FsOptions::ZERO_MESSAGE_OPEN) {
2316
opts |= FsOptions::ZERO_MESSAGE_OPEN;
2317
self.zero_message_open.store(true, Ordering::Relaxed);
2318
}
2319
if capable.contains(FsOptions::ZERO_MESSAGE_OPENDIR) {
2320
opts |= FsOptions::ZERO_MESSAGE_OPENDIR;
2321
self.zero_message_opendir.store(true, Ordering::Relaxed);
2322
}
2323
}
2324
Ok(opts)
2325
}
2326
2327
fn destroy(&self) {
2328
cros_tracing::trace_simple_print!(VirtioFs, "{:?}: destroy", self);
2329
self.handles.lock().clear();
2330
self.inodes.lock().clear();
2331
}
2332
2333
fn statfs(&self, _ctx: Context, inode: Inode) -> io::Result<libc::statvfs64> {
2334
let _trace = fs_trace!(self.tag, "statfs", inode);
2335
let data = self.find_inode(inode)?;
2336
2337
let mut out = MaybeUninit::<libc::statvfs64>::zeroed();
2338
2339
// SAFETY: this will only modify `out` and we check the return value.
2340
syscall!(unsafe { libc::fstatvfs64(data.as_raw_descriptor(), out.as_mut_ptr()) })?;
2341
2342
// SAFETY: the kernel guarantees that `out` has been initialized.
2343
Ok(unsafe { out.assume_init() })
2344
}
2345
2346
fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
2347
let data = self.find_inode(parent)?;
2348
#[allow(unused_variables)]
2349
let path = format!(
2350
"{}/{}",
2351
data.path,
2352
name.to_str().unwrap_or("<non UTF-8 path>")
2353
);
2354
let _trace = fs_trace!(self.tag, "lookup", parent, path);
2355
2356
let mut res = self.do_lookup_with_casefold_fallback(&data, name);
2357
2358
// FUSE takes a inode=0 as a request to do negative dentry cache.
2359
// So, if `negative_timeout` is set, return success with the timeout value and inode=0 as a
2360
// response.
2361
if let Err(e) = &res {
2362
if e.kind() == std::io::ErrorKind::NotFound && !self.cfg.negative_timeout.is_zero() {
2363
res = Ok(Entry::new_negative(self.cfg.negative_timeout));
2364
}
2365
}
2366
2367
res
2368
}
2369
2370
fn forget(&self, _ctx: Context, inode: Inode, count: u64) {
2371
let _trace = fs_trace!(self.tag, "forget", inode, count);
2372
let mut inodes = self.inodes.lock();
2373
let caches = self.lock_casefold_lookup_caches();
2374
if forget_one(&mut inodes, inode, count) {
2375
if let Some(mut c) = caches {
2376
c.forget(inode);
2377
}
2378
}
2379
}
2380
2381
fn batch_forget(&self, _ctx: Context, requests: Vec<(Inode, u64)>) {
2382
let mut inodes = self.inodes.lock();
2383
let mut caches = self.lock_casefold_lookup_caches();
2384
for (inode, count) in requests {
2385
if forget_one(&mut inodes, inode, count) {
2386
if let Some(c) = caches.as_mut() {
2387
c.forget(inode);
2388
}
2389
}
2390
}
2391
}
2392
2393
fn opendir(
2394
&self,
2395
_ctx: Context,
2396
inode: Inode,
2397
flags: u32,
2398
) -> io::Result<(Option<Handle>, OpenOptions)> {
2399
let _trace = fs_trace!(self.tag, "opendir", inode, flags);
2400
if self.zero_message_opendir.load(Ordering::Relaxed) {
2401
Err(io::Error::from_raw_os_error(libc::ENOSYS))
2402
} else {
2403
self.do_open(inode, flags | (libc::O_DIRECTORY as u32))
2404
}
2405
}
2406
2407
fn releasedir(
2408
&self,
2409
_ctx: Context,
2410
inode: Inode,
2411
_flags: u32,
2412
handle: Handle,
2413
) -> io::Result<()> {
2414
let _trace = fs_trace!(self.tag, "releasedir", inode, handle);
2415
if self.zero_message_opendir.load(Ordering::Relaxed) {
2416
Ok(())
2417
} else {
2418
self.do_release(inode, handle)
2419
}
2420
}
2421
2422
fn mkdir(
2423
&self,
2424
ctx: Context,
2425
parent: Inode,
2426
name: &CStr,
2427
mode: u32,
2428
umask: u32,
2429
security_ctx: Option<&CStr>,
2430
) -> io::Result<Entry> {
2431
let _trace = fs_trace!(self.tag, "mkdir", parent, name, mode, umask, security_ctx);
2432
let data = self.find_inode(parent)?;
2433
2434
let _ctx = security_ctx
2435
.filter(|ctx| *ctx != UNLABELED_CSTR)
2436
.map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
2437
.transpose()?;
2438
2439
#[allow(unused_variables)]
2440
#[cfg(feature = "arc_quota")]
2441
let (uid, gid) = self.change_creds(&ctx, &data, name);
2442
#[cfg(feature = "fs_runtime_ugid_map")]
2443
let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
2444
#[cfg(not(feature = "fs_permission_translation"))]
2445
let (uid, gid) = (ctx.uid, ctx.gid);
2446
2447
let (_uid, _gid) = set_creds(uid, gid)?;
2448
{
2449
let casefold_cache = self.lock_casefold_lookup_caches();
2450
let _scoped_umask = ScopedUmask::new(umask);
2451
2452
// SAFETY: this doesn't modify any memory and we check the return value.
2453
syscall!(unsafe { libc::mkdirat(data.as_raw_descriptor(), name.as_ptr(), mode) })?;
2454
if let Some(mut c) = casefold_cache {
2455
c.insert(data.inode, name);
2456
}
2457
}
2458
self.do_lookup(&data, name)
2459
}
2460
2461
fn rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> {
2462
let _trace = fs_trace!(self.tag, "rmdir", parent, name);
2463
let data = self.find_inode(parent)?;
2464
let casefold_cache = self.lock_casefold_lookup_caches();
2465
// TODO(b/278691962): If ascii_casefold is enabled, we need to call
2466
// `get_case_unfolded_name()` to get the actual name to be unlinked.
2467
self.do_unlink(&data, name, libc::AT_REMOVEDIR)?;
2468
if let Some(mut c) = casefold_cache {
2469
c.remove(data.inode, name);
2470
}
2471
Ok(())
2472
}
2473
2474
fn readdir(
2475
&self,
2476
_ctx: Context,
2477
inode: Inode,
2478
handle: Handle,
2479
size: u32,
2480
offset: u64,
2481
) -> io::Result<Self::DirIter> {
2482
let _trace = fs_trace!(self.tag, "readdir", inode, handle, size, offset);
2483
let buf = vec![0; size as usize].into_boxed_slice();
2484
2485
if self.zero_message_opendir.load(Ordering::Relaxed) {
2486
let data = self.find_inode(inode)?;
2487
ReadDir::new(&*data, offset as libc::off64_t, buf)
2488
} else {
2489
let data = self.find_handle(handle, inode)?;
2490
2491
let dir = data.file.lock();
2492
2493
ReadDir::new(&*dir, offset as libc::off64_t, buf)
2494
}
2495
}
2496
2497
fn open(
2498
&self,
2499
_ctx: Context,
2500
inode: Inode,
2501
flags: u32,
2502
) -> io::Result<(Option<Handle>, OpenOptions)> {
2503
if self.zero_message_open.load(Ordering::Relaxed) {
2504
let _trace = fs_trace!(self.tag, "open (zero-message)", inode, flags);
2505
Err(io::Error::from_raw_os_error(libc::ENOSYS))
2506
} else {
2507
let _trace = fs_trace!(self.tag, "open", inode, flags);
2508
self.do_open(inode, flags)
2509
}
2510
}
2511
2512
fn release(
2513
&self,
2514
_ctx: Context,
2515
inode: Inode,
2516
_flags: u32,
2517
handle: Handle,
2518
_flush: bool,
2519
_flock_release: bool,
2520
_lock_owner: Option<u64>,
2521
) -> io::Result<()> {
2522
if self.zero_message_open.load(Ordering::Relaxed) {
2523
let _trace = fs_trace!(self.tag, "release (zero-message)", inode, handle);
2524
Ok(())
2525
} else {
2526
let _trace = fs_trace!(self.tag, "release", inode, handle);
2527
self.do_release(inode, handle)
2528
}
2529
}
2530
2531
fn chromeos_tmpfile(
2532
&self,
2533
ctx: Context,
2534
parent: Self::Inode,
2535
mode: u32,
2536
umask: u32,
2537
security_ctx: Option<&CStr>,
2538
) -> io::Result<Entry> {
2539
let _trace = fs_trace!(
2540
self.tag,
2541
"chromeos_tempfile",
2542
parent,
2543
mode,
2544
umask,
2545
security_ctx
2546
);
2547
let data = self.find_inode(parent)?;
2548
2549
let _ctx = security_ctx
2550
.filter(|ctx| *ctx != UNLABELED_CSTR)
2551
.map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
2552
.transpose()?;
2553
2554
let tmpflags = libc::O_RDWR | libc::O_TMPFILE | libc::O_CLOEXEC | libc::O_NOFOLLOW;
2555
2556
let current_dir = c".";
2557
2558
#[allow(unused_variables)]
2559
#[cfg(feature = "arc_quota")]
2560
let (uid, gid) = self.change_creds(&ctx, &data, current_dir);
2561
#[cfg(feature = "fs_runtime_ugid_map")]
2562
let (uid, gid) = self.change_ugid_creds(&ctx, &data, current_dir);
2563
#[cfg(not(feature = "fs_permission_translation"))]
2564
let (uid, gid) = (ctx.uid, ctx.gid);
2565
2566
let (_uid, _gid) = set_creds(uid, gid)?;
2567
2568
let fd = {
2569
let _scoped_umask = ScopedUmask::new(umask);
2570
2571
// SAFETY: this doesn't modify any memory and we check the return value.
2572
syscall!(unsafe {
2573
libc::openat64(
2574
data.as_raw_descriptor(),
2575
current_dir.as_ptr(),
2576
tmpflags,
2577
mode,
2578
)
2579
})?
2580
};
2581
// No need to add casefold_cache becuase we created an anonymous file.
2582
2583
// SAFETY: safe because we just opened this fd.
2584
let tmpfile = unsafe { File::from_raw_descriptor(fd) };
2585
let st = stat(&tmpfile)?;
2586
let path = format!(
2587
"{}/{}",
2588
data.path.clone(),
2589
current_dir.to_str().unwrap_or("<non UTF-8 str>")
2590
);
2591
Ok(self.add_entry(tmpfile, st, tmpflags, path))
2592
}
2593
2594
fn create(
2595
&self,
2596
ctx: Context,
2597
parent: Inode,
2598
name: &CStr,
2599
mode: u32,
2600
flags: u32,
2601
umask: u32,
2602
security_ctx: Option<&CStr>,
2603
) -> io::Result<(Entry, Option<Handle>, OpenOptions)> {
2604
let _trace = fs_trace!(
2605
self.tag,
2606
"create",
2607
parent,
2608
name,
2609
mode,
2610
flags,
2611
umask,
2612
security_ctx
2613
);
2614
let data = self.find_inode(parent)?;
2615
2616
let _ctx = security_ctx
2617
.filter(|ctx| *ctx != UNLABELED_CSTR)
2618
.map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
2619
.transpose()?;
2620
2621
#[allow(unused_variables)]
2622
#[cfg(feature = "arc_quota")]
2623
let (uid, gid) = self.change_creds(&ctx, &data, name);
2624
#[cfg(feature = "fs_runtime_ugid_map")]
2625
let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
2626
#[cfg(not(feature = "fs_permission_translation"))]
2627
let (uid, gid) = (ctx.uid, ctx.gid);
2628
2629
let (_uid, _gid) = set_creds(uid, gid)?;
2630
2631
let flags = self.update_open_flags(flags as i32);
2632
let create_flags =
2633
(flags | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW) & !libc::O_DIRECT;
2634
2635
let fd = {
2636
let _scoped_umask = ScopedUmask::new(umask);
2637
let casefold_cache = self.lock_casefold_lookup_caches();
2638
2639
// SAFETY: this doesn't modify any memory and we check the return value. We don't really
2640
// check `flags` because if the kernel can't handle poorly specified flags then we have
2641
// much bigger problems.
2642
// TODO(b/278691962): If ascii_casefold is enabled, we need to call
2643
// `get_case_unfolded_name()` to get the actual name to be created.
2644
let fd = syscall!(unsafe {
2645
libc::openat64(data.as_raw_descriptor(), name.as_ptr(), create_flags, mode)
2646
})?;
2647
if let Some(mut c) = casefold_cache {
2648
c.insert(parent, name);
2649
}
2650
fd
2651
};
2652
2653
// SAFETY: safe because we just opened this fd.
2654
let file = unsafe { File::from_raw_descriptor(fd) };
2655
2656
let st = stat(&file)?;
2657
let path = format!(
2658
"{}/{}",
2659
data.path.clone(),
2660
name.to_str().unwrap_or("<non UTF-8 str>")
2661
);
2662
let entry = self.add_entry(file, st, create_flags, path);
2663
2664
let (handle, opts) = if self.zero_message_open.load(Ordering::Relaxed) {
2665
(None, OpenOptions::KEEP_CACHE)
2666
} else {
2667
self.do_open_at(
2668
data,
2669
name,
2670
entry.inode,
2671
flags as u32 & !((libc::O_CREAT | libc::O_EXCL | libc::O_NOCTTY) as u32),
2672
)
2673
.inspect_err(|_e| {
2674
// Don't leak the entry.
2675
self.forget(ctx, entry.inode, 1);
2676
})?
2677
};
2678
Ok((entry, handle, opts))
2679
}
2680
2681
fn unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> {
2682
let _trace = fs_trace!(self.tag, "unlink", parent, name);
2683
let data = self.find_inode(parent)?;
2684
let casefold_cache = self.lock_casefold_lookup_caches();
2685
// TODO(b/278691962): If ascii_casefold is enabled, we need to call
2686
// `get_case_unfolded_name()` to get the actual name to be unlinked.
2687
self.do_unlink(&data, name, 0)?;
2688
if let Some(mut c) = casefold_cache {
2689
c.remove(data.inode, name);
2690
}
2691
Ok(())
2692
}
2693
2694
fn read<W: io::Write + ZeroCopyWriter>(
2695
&self,
2696
_ctx: Context,
2697
inode: Inode,
2698
handle: Handle,
2699
mut w: W,
2700
size: u32,
2701
offset: u64,
2702
_lock_owner: Option<u64>,
2703
_flags: u32,
2704
) -> io::Result<usize> {
2705
if self.zero_message_open.load(Ordering::Relaxed) {
2706
let _trace = fs_trace!(self.tag, "read (zero-message)", inode, handle, size, offset);
2707
let data = self.find_inode(inode)?;
2708
2709
let mut file = data.file.lock();
2710
let mut flags = file.open_flags;
2711
match flags & libc::O_ACCMODE {
2712
libc::O_WRONLY => {
2713
flags &= !libc::O_WRONLY;
2714
flags |= libc::O_RDWR;
2715
2716
// We need to get a readable handle for this file.
2717
let newfile = self.open_fd(file.as_raw_descriptor(), libc::O_RDWR)?;
2718
*file = OpenedFile::new(newfile, flags);
2719
}
2720
libc::O_RDONLY | libc::O_RDWR => {}
2721
_ => panic!("Unexpected flags: {flags:#x}"),
2722
}
2723
2724
w.write_from(file.file_mut(), size as usize, offset)
2725
} else {
2726
let _trace = fs_trace!(self.tag, "read", inode, handle, size, offset);
2727
let data = self.find_handle(handle, inode)?;
2728
2729
let mut f = data.file.lock();
2730
w.write_from(f.file_mut(), size as usize, offset)
2731
}
2732
}
2733
2734
fn write<R: io::Read + ZeroCopyReader>(
2735
&self,
2736
_ctx: Context,
2737
inode: Inode,
2738
handle: Handle,
2739
mut r: R,
2740
size: u32,
2741
offset: u64,
2742
_lock_owner: Option<u64>,
2743
_delayed_write: bool,
2744
flags: u32,
2745
) -> io::Result<usize> {
2746
// When the WRITE_KILL_PRIV flag is set, drop CAP_FSETID so that the kernel will
2747
// automatically clear the setuid and setgid bits for us.
2748
let _fsetid = if flags & WRITE_KILL_PRIV != 0 {
2749
Some(drop_cap_fsetid()?)
2750
} else {
2751
None
2752
};
2753
2754
if self.zero_message_open.load(Ordering::Relaxed) {
2755
let _trace = fs_trace!(
2756
self.tag,
2757
"write (zero-message)",
2758
inode,
2759
handle,
2760
size,
2761
offset
2762
);
2763
2764
let data = self.find_inode(inode)?;
2765
2766
let mut file = data.file.lock();
2767
let mut flags = file.open_flags;
2768
match flags & libc::O_ACCMODE {
2769
libc::O_RDONLY => {
2770
flags &= !libc::O_RDONLY;
2771
flags |= libc::O_RDWR;
2772
2773
// We need to get a writable handle for this file.
2774
let newfile = self.open_fd(file.as_raw_descriptor(), libc::O_RDWR)?;
2775
*file = OpenedFile::new(newfile, flags);
2776
}
2777
libc::O_WRONLY | libc::O_RDWR => {}
2778
_ => panic!("Unexpected flags: {flags:#x}"),
2779
}
2780
2781
r.read_to(file.file_mut(), size as usize, offset)
2782
} else {
2783
let _trace = fs_trace!(self.tag, "write", inode, handle, size, offset);
2784
2785
let data = self.find_handle(handle, inode)?;
2786
2787
let mut f = data.file.lock();
2788
r.read_to(f.file_mut(), size as usize, offset)
2789
}
2790
}
2791
2792
fn getattr(
2793
&self,
2794
_ctx: Context,
2795
inode: Inode,
2796
_handle: Option<Handle>,
2797
) -> io::Result<(libc::stat64, Duration)> {
2798
let _trace = fs_trace!(self.tag, "getattr", inode, _handle);
2799
2800
let data = self.find_inode(inode)?;
2801
self.do_getattr(&data)
2802
}
2803
2804
fn setattr(
2805
&self,
2806
_ctx: Context,
2807
inode: Inode,
2808
attr: libc::stat64,
2809
handle: Option<Handle>,
2810
valid: SetattrValid,
2811
) -> io::Result<(libc::stat64, Duration)> {
2812
let _trace = fs_trace!(self.tag, "setattr", inode, handle);
2813
let inode_data = self.find_inode(inode)?;
2814
2815
enum Data<'a> {
2816
Handle(MutexGuard<'a, OpenedFile>),
2817
ProcPath(CString),
2818
}
2819
2820
// If we have a handle then use it otherwise get a new fd from the inode.
2821
let hd;
2822
let data = if let Some(handle) = handle.filter(|&h| h != 0) {
2823
hd = self.find_handle(handle, inode)?;
2824
Data::Handle(hd.file.lock())
2825
} else {
2826
let pathname = CString::new(format!("self/fd/{}", inode_data.as_raw_descriptor()))
2827
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2828
Data::ProcPath(pathname)
2829
};
2830
2831
if valid.contains(SetattrValid::MODE) {
2832
// SAFETY: this doesn't modify any memory and we check the return value.
2833
syscall!(unsafe {
2834
match data {
2835
Data::Handle(ref fd) => libc::fchmod(fd.as_raw_descriptor(), attr.st_mode),
2836
Data::ProcPath(ref p) => {
2837
libc::fchmodat(self.proc.as_raw_descriptor(), p.as_ptr(), attr.st_mode, 0)
2838
}
2839
}
2840
})?;
2841
}
2842
2843
if valid.intersects(SetattrValid::UID | SetattrValid::GID) {
2844
let uid = if valid.contains(SetattrValid::UID) {
2845
attr.st_uid
2846
} else {
2847
// Cannot use -1 here because these are unsigned values.
2848
u32::MAX
2849
};
2850
let gid = if valid.contains(SetattrValid::GID) {
2851
attr.st_gid
2852
} else {
2853
// Cannot use -1 here because these are unsigned values.
2854
u32::MAX
2855
};
2856
2857
// SAFETY: this doesn't modify any memory and we check the return value.
2858
syscall!(unsafe {
2859
libc::fchownat(
2860
inode_data.as_raw_descriptor(),
2861
EMPTY_CSTR.as_ptr(),
2862
uid,
2863
gid,
2864
libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
2865
)
2866
})?;
2867
}
2868
2869
if valid.contains(SetattrValid::SIZE) {
2870
syscall!(match data {
2871
Data::Handle(ref fd) => {
2872
// SAFETY: this doesn't modify any memory and we check the return value.
2873
unsafe { libc::ftruncate64(fd.as_raw_descriptor(), attr.st_size) }
2874
}
2875
_ => {
2876
// There is no `ftruncateat` so we need to get a new fd and truncate it.
2877
let f = self.open_inode(&inode_data, libc::O_NONBLOCK | libc::O_RDWR)?;
2878
// SAFETY: this doesn't modify any memory and we check the return value.
2879
unsafe { libc::ftruncate64(f.as_raw_descriptor(), attr.st_size) }
2880
}
2881
})?;
2882
}
2883
2884
if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) {
2885
let mut tvs = [
2886
libc::timespec {
2887
tv_sec: 0,
2888
tv_nsec: libc::UTIME_OMIT,
2889
},
2890
libc::timespec {
2891
tv_sec: 0,
2892
tv_nsec: libc::UTIME_OMIT,
2893
},
2894
];
2895
2896
if valid.contains(SetattrValid::ATIME_NOW) {
2897
tvs[0].tv_nsec = libc::UTIME_NOW;
2898
} else if valid.contains(SetattrValid::ATIME) {
2899
tvs[0].tv_sec = attr.st_atime;
2900
tvs[0].tv_nsec = attr.st_atime_nsec;
2901
}
2902
2903
if valid.contains(SetattrValid::MTIME_NOW) {
2904
tvs[1].tv_nsec = libc::UTIME_NOW;
2905
} else if valid.contains(SetattrValid::MTIME) {
2906
tvs[1].tv_sec = attr.st_mtime;
2907
tvs[1].tv_nsec = attr.st_mtime_nsec;
2908
}
2909
2910
// SAFETY: this doesn't modify any memory and we check the return value.
2911
syscall!(unsafe {
2912
match data {
2913
Data::Handle(ref fd) => libc::futimens(fd.as_raw_descriptor(), tvs.as_ptr()),
2914
Data::ProcPath(ref p) => {
2915
libc::utimensat(self.proc.as_raw_descriptor(), p.as_ptr(), tvs.as_ptr(), 0)
2916
}
2917
}
2918
})?;
2919
}
2920
2921
self.do_getattr(&inode_data)
2922
}
2923
2924
fn rename(
2925
&self,
2926
_ctx: Context,
2927
olddir: Inode,
2928
oldname: &CStr,
2929
newdir: Inode,
2930
newname: &CStr,
2931
flags: u32,
2932
) -> io::Result<()> {
2933
let _trace = fs_trace!(self.tag, "rename", olddir, oldname, newdir, newname, flags);
2934
2935
let old_inode = self.find_inode(olddir)?;
2936
let new_inode = self.find_inode(newdir)?;
2937
{
2938
let casefold_cache = self.lock_casefold_lookup_caches();
2939
2940
// SAFETY: this doesn't modify any memory and we check the return value.
2941
// TODO: Switch to libc::renameat2 once https://github.com/rust-lang/libc/pull/1508 lands
2942
// and we have glibc 2.28.
2943
syscall!(unsafe {
2944
libc::syscall(
2945
libc::SYS_renameat2,
2946
old_inode.as_raw_descriptor(),
2947
oldname.as_ptr(),
2948
new_inode.as_raw_descriptor(),
2949
newname.as_ptr(),
2950
flags,
2951
)
2952
})?;
2953
if let Some(mut c) = casefold_cache {
2954
c.remove(olddir, oldname);
2955
c.insert(newdir, newname);
2956
}
2957
}
2958
2959
Ok(())
2960
}
2961
2962
fn mknod(
2963
&self,
2964
ctx: Context,
2965
parent: Inode,
2966
name: &CStr,
2967
mode: u32,
2968
rdev: u32,
2969
umask: u32,
2970
security_ctx: Option<&CStr>,
2971
) -> io::Result<Entry> {
2972
let _trace = fs_trace!(
2973
self.tag,
2974
"mknod",
2975
parent,
2976
name,
2977
mode,
2978
rdev,
2979
umask,
2980
security_ctx
2981
);
2982
let data = self.find_inode(parent)?;
2983
2984
let _ctx = security_ctx
2985
.filter(|ctx| *ctx != UNLABELED_CSTR)
2986
.map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
2987
.transpose()?;
2988
2989
#[allow(unused_variables)]
2990
#[cfg(feature = "arc_quota")]
2991
let (uid, gid) = self.change_creds(&ctx, &data, name);
2992
#[cfg(feature = "fs_runtime_ugid_map")]
2993
let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
2994
#[cfg(not(feature = "fs_permission_translation"))]
2995
let (uid, gid) = (ctx.uid, ctx.gid);
2996
2997
let (_uid, _gid) = set_creds(uid, gid)?;
2998
{
2999
let _scoped_umask = ScopedUmask::new(umask);
3000
let casefold_cache = self.lock_casefold_lookup_caches();
3001
3002
// SAFETY: this doesn't modify any memory and we check the return value.
3003
syscall!(unsafe {
3004
libc::mknodat(
3005
data.as_raw_descriptor(),
3006
name.as_ptr(),
3007
mode as libc::mode_t,
3008
rdev as libc::dev_t,
3009
)
3010
})?;
3011
if let Some(mut c) = casefold_cache {
3012
c.insert(parent, name);
3013
}
3014
}
3015
3016
self.do_lookup(&data, name)
3017
}
3018
3019
fn link(
3020
&self,
3021
_ctx: Context,
3022
inode: Inode,
3023
newparent: Inode,
3024
newname: &CStr,
3025
) -> io::Result<Entry> {
3026
let _trace = fs_trace!(self.tag, "link", inode, newparent, newname);
3027
let data = self.find_inode(inode)?;
3028
let new_inode = self.find_inode(newparent)?;
3029
3030
let path = CString::new(format!("self/fd/{}", data.as_raw_descriptor()))
3031
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
3032
3033
{
3034
let casefold_cache = self.lock_casefold_lookup_caches();
3035
// SAFETY: this doesn't modify any memory and we check the return value.
3036
syscall!(unsafe {
3037
libc::linkat(
3038
self.proc.as_raw_descriptor(),
3039
path.as_ptr(),
3040
new_inode.as_raw_descriptor(),
3041
newname.as_ptr(),
3042
libc::AT_SYMLINK_FOLLOW,
3043
)
3044
})?;
3045
if let Some(mut c) = casefold_cache {
3046
c.insert(newparent, newname);
3047
}
3048
}
3049
3050
self.do_lookup(&new_inode, newname)
3051
}
3052
3053
fn symlink(
3054
&self,
3055
ctx: Context,
3056
linkname: &CStr,
3057
parent: Inode,
3058
name: &CStr,
3059
security_ctx: Option<&CStr>,
3060
) -> io::Result<Entry> {
3061
let _trace = fs_trace!(self.tag, "symlink", parent, linkname, name, security_ctx);
3062
let data = self.find_inode(parent)?;
3063
3064
let _ctx = security_ctx
3065
.filter(|ctx| *ctx != UNLABELED_CSTR)
3066
.map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
3067
.transpose()?;
3068
3069
#[allow(unused_variables)]
3070
#[cfg(feature = "arc_quota")]
3071
let (uid, gid) = self.change_creds(&ctx, &data, name);
3072
#[cfg(feature = "fs_runtime_ugid_map")]
3073
let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
3074
#[cfg(not(feature = "fs_permission_translation"))]
3075
let (uid, gid) = (ctx.uid, ctx.gid);
3076
3077
let (_uid, _gid) = set_creds(uid, gid)?;
3078
{
3079
let casefold_cache = self.lock_casefold_lookup_caches();
3080
// SAFETY: this doesn't modify any memory and we check the return value.
3081
syscall!(unsafe {
3082
libc::symlinkat(linkname.as_ptr(), data.as_raw_descriptor(), name.as_ptr())
3083
})?;
3084
if let Some(mut c) = casefold_cache {
3085
c.insert(parent, name);
3086
}
3087
}
3088
3089
self.do_lookup(&data, name)
3090
}
3091
3092
fn readlink(&self, _ctx: Context, inode: Inode) -> io::Result<Vec<u8>> {
3093
let _trace = fs_trace!(self.tag, "readlink", inode);
3094
let data = self.find_inode(inode)?;
3095
3096
let mut buf = vec![0; libc::PATH_MAX as usize];
3097
3098
// SAFETY: this will only modify the contents of `buf` and we check the return value.
3099
let res = syscall!(unsafe {
3100
libc::readlinkat(
3101
data.as_raw_descriptor(),
3102
EMPTY_CSTR.as_ptr(),
3103
buf.as_mut_ptr() as *mut libc::c_char,
3104
buf.len(),
3105
)
3106
})?;
3107
3108
buf.resize(res as usize, 0);
3109
3110
#[cfg(feature = "fs_runtime_ugid_map")]
3111
{
3112
let link_target = Path::new(OsStr::from_bytes(&buf[..res as usize]));
3113
if !link_target.starts_with(&self.root_dir) {
3114
return Err(io::Error::new(
3115
io::ErrorKind::InvalidInput,
3116
"Symbolic link points outside of root_dir",
3117
));
3118
}
3119
}
3120
Ok(buf)
3121
}
3122
3123
fn flush(
3124
&self,
3125
_ctx: Context,
3126
inode: Inode,
3127
handle: Handle,
3128
_lock_owner: u64,
3129
) -> io::Result<()> {
3130
let _trace = fs_trace!(self.tag, "flush", inode, handle);
3131
let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
3132
self.find_inode(inode)?
3133
} else {
3134
self.find_handle(handle, inode)?
3135
};
3136
3137
// SAFETY:
3138
// Since this method is called whenever an fd is closed in the client, we can emulate that
3139
// behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe
3140
// because this doesn't modify any memory and we check the return values.
3141
unsafe {
3142
let newfd = syscall!(libc::fcntl(
3143
data.as_raw_descriptor(),
3144
libc::F_DUPFD_CLOEXEC,
3145
0
3146
))?;
3147
3148
syscall!(libc::close(newfd))?;
3149
}
3150
Ok(())
3151
}
3152
3153
fn fsync(&self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()> {
3154
if self.zero_message_open.load(Ordering::Relaxed) {
3155
let _trace = fs_trace!(self.tag, "fsync (zero-message)", inode, datasync, handle);
3156
let data = self.find_inode(inode)?;
3157
self.do_fsync(&*data, datasync)
3158
} else {
3159
let _trace = fs_trace!(self.tag, "fsync", inode, datasync, handle);
3160
let data = self.find_handle(handle, inode)?;
3161
3162
let file = data.file.lock();
3163
self.do_fsync(&*file, datasync)
3164
}
3165
}
3166
3167
fn fsyncdir(
3168
&self,
3169
_ctx: Context,
3170
inode: Inode,
3171
datasync: bool,
3172
handle: Handle,
3173
) -> io::Result<()> {
3174
if self.zero_message_opendir.load(Ordering::Relaxed) {
3175
let _trace = fs_trace!(self.tag, "fsyncdir (zero-message)", inode, datasync, handle);
3176
let data = self.find_inode(inode)?;
3177
self.do_fsync(&*data, datasync)
3178
} else {
3179
let _trace = fs_trace!(self.tag, "fsyncdir", inode, datasync, handle);
3180
let data = self.find_handle(handle, inode)?;
3181
3182
let file = data.file.lock();
3183
self.do_fsync(&*file, datasync)
3184
}
3185
}
3186
3187
fn access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()> {
3188
let _trace = fs_trace!(self.tag, "access", inode, mask);
3189
let data = self.find_inode(inode)?;
3190
3191
let st = stat(&*data)?;
3192
let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK);
3193
3194
if mode == libc::F_OK {
3195
// The file exists since we were able to call `stat(2)` on it.
3196
return Ok(());
3197
}
3198
3199
if (mode & libc::R_OK) != 0 {
3200
if ctx.uid != 0
3201
&& (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0)
3202
&& (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0)
3203
&& st.st_mode & 0o004 == 0
3204
{
3205
return Err(io::Error::from_raw_os_error(libc::EACCES));
3206
}
3207
}
3208
3209
if (mode & libc::W_OK) != 0 {
3210
if ctx.uid != 0
3211
&& (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0)
3212
&& (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0)
3213
&& st.st_mode & 0o002 == 0
3214
{
3215
return Err(io::Error::from_raw_os_error(libc::EACCES));
3216
}
3217
}
3218
3219
// root can only execute something if it is executable by one of the owner, the group, or
3220
// everyone.
3221
if (mode & libc::X_OK) != 0 {
3222
if (ctx.uid != 0 || st.st_mode & 0o111 == 0)
3223
&& (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0)
3224
&& (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0)
3225
&& st.st_mode & 0o001 == 0
3226
{
3227
return Err(io::Error::from_raw_os_error(libc::EACCES));
3228
}
3229
}
3230
3231
Ok(())
3232
}
3233
3234
fn setxattr(
3235
&self,
3236
_ctx: Context,
3237
inode: Inode,
3238
name: &CStr,
3239
value: &[u8],
3240
flags: u32,
3241
) -> io::Result<()> {
3242
let _trace = fs_trace!(self.tag, "setxattr", inode, name, flags);
3243
// We can't allow the VM to set this xattr because an unprivileged process may use it to set
3244
// a privileged xattr.
3245
if self.cfg.rewrite_security_xattrs && name.to_bytes().starts_with(USER_VIRTIOFS_XATTR) {
3246
return Err(io::Error::from_raw_os_error(libc::EPERM));
3247
}
3248
3249
let data = self.find_inode(inode)?;
3250
let name = self.rewrite_xattr_name(name);
3251
3252
#[cfg(feature = "arc_quota")]
3253
if self.skip_host_set_xattr(&data.path, &name.to_string_lossy()) {
3254
debug!(
3255
"ignore setxattr for path:{} xattr_name:{}",
3256
&data.path,
3257
&name.to_string_lossy()
3258
);
3259
return Ok(());
3260
}
3261
3262
let file = data.file.lock();
3263
let o_path_file = (file.open_flags & libc::O_PATH) != 0;
3264
if o_path_file {
3265
// For FDs opened with `O_PATH`, we cannot call `fsetxattr` normally. Instead we emulate
3266
// an _at syscall by changing the CWD to /proc, running the path based syscall, and then
3267
// setting the CWD back to the root directory.
3268
let path = CString::new(format!("self/fd/{}", file.as_raw_descriptor()))
3269
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
3270
3271
syscall!(self.with_proc_chdir(|| {
3272
// SAFETY: this doesn't modify any memory and we check the return value.
3273
unsafe {
3274
libc::setxattr(
3275
path.as_ptr(),
3276
name.as_ptr(),
3277
value.as_ptr() as *const libc::c_void,
3278
value.len() as libc::size_t,
3279
flags as c_int,
3280
)
3281
}
3282
}))?;
3283
} else {
3284
syscall!(
3285
// For regular files and directories, we can just use fsetxattr.
3286
// SAFETY: this doesn't modify any memory and we check the return value.
3287
unsafe {
3288
libc::fsetxattr(
3289
file.as_raw_descriptor(),
3290
name.as_ptr(),
3291
value.as_ptr() as *const libc::c_void,
3292
value.len() as libc::size_t,
3293
flags as c_int,
3294
)
3295
}
3296
)?;
3297
}
3298
3299
Ok(())
3300
}
3301
3302
fn getxattr(
3303
&self,
3304
_ctx: Context,
3305
inode: Inode,
3306
name: &CStr,
3307
size: u32,
3308
) -> io::Result<GetxattrReply> {
3309
let _trace = fs_trace!(self.tag, "getxattr", inode, name, size);
3310
// We don't allow the VM to set this xattr so we also pretend there is no value associated
3311
// with it.
3312
if self.cfg.rewrite_security_xattrs && name.to_bytes().starts_with(USER_VIRTIOFS_XATTR) {
3313
return Err(io::Error::from_raw_os_error(libc::ENODATA));
3314
}
3315
3316
let data = self.find_inode(inode)?;
3317
let name = self.rewrite_xattr_name(name);
3318
let mut buf = vec![0u8; size as usize];
3319
3320
#[cfg(feature = "arc_quota")]
3321
let res = self.do_getxattr_with_filter(data, name, &mut buf)?;
3322
3323
#[cfg(not(feature = "arc_quota"))]
3324
let res = self.do_getxattr(&data, &name, &mut buf[..])?;
3325
3326
if size == 0 {
3327
Ok(GetxattrReply::Count(res as u32))
3328
} else {
3329
buf.truncate(res);
3330
Ok(GetxattrReply::Value(buf))
3331
}
3332
}
3333
3334
fn listxattr(&self, _ctx: Context, inode: Inode, size: u32) -> io::Result<ListxattrReply> {
3335
let _trace = fs_trace!(self.tag, "listxattr", inode, size);
3336
let data = self.find_inode(inode)?;
3337
3338
let mut buf = vec![0u8; size as usize];
3339
3340
let file = data.file.lock();
3341
let o_path_file = (file.open_flags & libc::O_PATH) != 0;
3342
let res = if o_path_file {
3343
// For FDs opened with `O_PATH`, we cannot call `flistxattr` normally. Instead we
3344
// emulate an _at syscall by changing the CWD to /proc, running the path based syscall,
3345
// and then setting the CWD back to the root directory.
3346
let path = CString::new(format!("self/fd/{}", file.as_raw_descriptor()))
3347
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
3348
3349
// SAFETY: this will only modify `buf` and we check the return value.
3350
syscall!(self.with_proc_chdir(|| unsafe {
3351
libc::listxattr(
3352
path.as_ptr(),
3353
buf.as_mut_ptr() as *mut libc::c_char,
3354
buf.len() as libc::size_t,
3355
)
3356
}))?
3357
} else {
3358
// For regular files and directories, we can just flistxattr.
3359
// SAFETY: this will only write to `buf` and we check the return value.
3360
syscall!(unsafe {
3361
libc::flistxattr(
3362
file.as_raw_descriptor(),
3363
buf.as_mut_ptr() as *mut libc::c_char,
3364
buf.len() as libc::size_t,
3365
)
3366
})?
3367
};
3368
3369
if size == 0 {
3370
Ok(ListxattrReply::Count(res as u32))
3371
} else {
3372
buf.truncate(res as usize);
3373
3374
if self.cfg.rewrite_security_xattrs {
3375
strip_xattr_prefix(&mut buf);
3376
}
3377
Ok(ListxattrReply::Names(buf))
3378
}
3379
}
3380
3381
fn removexattr(&self, _ctx: Context, inode: Inode, name: &CStr) -> io::Result<()> {
3382
let _trace = fs_trace!(self.tag, "removexattr", inode, name);
3383
// We don't allow the VM to set this xattr so we also pretend there is no value associated
3384
// with it.
3385
if self.cfg.rewrite_security_xattrs && name.to_bytes().starts_with(USER_VIRTIOFS_XATTR) {
3386
return Err(io::Error::from_raw_os_error(libc::ENODATA));
3387
}
3388
3389
let data = self.find_inode(inode)?;
3390
let name = self.rewrite_xattr_name(name);
3391
3392
let file = data.file.lock();
3393
let o_path_file = (file.open_flags & libc::O_PATH) != 0;
3394
if o_path_file {
3395
// For files opened with `O_PATH`, we cannot call `fremovexattr` normally. Instead we
3396
// emulate an _at syscall by changing the CWD to /proc, running the path based syscall,
3397
// and then setting the CWD back to the root directory.
3398
let path = CString::new(format!("self/fd/{}", file.as_raw_descriptor()))
3399
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
3400
3401
syscall!(self.with_proc_chdir(||
3402
// SAFETY: this doesn't modify any memory and we check the return value.
3403
unsafe { libc::removexattr(path.as_ptr(), name.as_ptr()) }))?;
3404
} else {
3405
// For regular files and directories, we can just use fremovexattr.
3406
syscall!(
3407
// SAFETY: this doesn't modify any memory and we check the return value.
3408
unsafe { libc::fremovexattr(file.as_raw_descriptor(), name.as_ptr()) }
3409
)?;
3410
}
3411
3412
Ok(())
3413
}
3414
3415
fn fallocate(
3416
&self,
3417
_ctx: Context,
3418
inode: Inode,
3419
handle: Handle,
3420
mode: u32,
3421
offset: u64,
3422
length: u64,
3423
) -> io::Result<()> {
3424
let _trace = fs_trace!(self.tag, "fallocate", inode, handle, mode, offset, length);
3425
3426
let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
3427
let data = self.find_inode(inode)?;
3428
3429
{
3430
// fallocate needs a writable fd
3431
let mut file = data.file.lock();
3432
let mut flags = file.open_flags;
3433
match flags & libc::O_ACCMODE {
3434
libc::O_RDONLY => {
3435
flags &= !libc::O_RDONLY;
3436
flags |= libc::O_RDWR;
3437
3438
// We need to get a writable handle for this file.
3439
let newfile = self.open_fd(file.as_raw_descriptor(), libc::O_RDWR)?;
3440
*file = OpenedFile::new(newfile, flags);
3441
}
3442
libc::O_WRONLY | libc::O_RDWR => {}
3443
_ => panic!("Unexpected flags: {flags:#x}"),
3444
}
3445
}
3446
3447
data
3448
} else {
3449
self.find_handle(handle, inode)?
3450
};
3451
3452
let fd = data.as_raw_descriptor();
3453
// SAFETY: this doesn't modify any memory and we check the return value.
3454
syscall!(unsafe {
3455
libc::fallocate64(
3456
fd,
3457
mode as libc::c_int,
3458
offset as libc::off64_t,
3459
length as libc::off64_t,
3460
)
3461
})?;
3462
3463
Ok(())
3464
}
3465
3466
#[allow(clippy::unnecessary_cast)]
3467
fn ioctl<R: io::Read>(
3468
&self,
3469
ctx: Context,
3470
inode: Inode,
3471
handle: Handle,
3472
_flags: IoctlFlags,
3473
cmd: u32,
3474
_arg: u64,
3475
in_size: u32,
3476
out_size: u32,
3477
r: R,
3478
) -> io::Result<IoctlReply> {
3479
let _trace = fs_trace!(self.tag, "ioctl", inode, handle, cmd, in_size, out_size);
3480
3481
match cmd as IoctlNr {
3482
FS_IOC_GET_ENCRYPTION_POLICY_EX => self.get_encryption_policy_ex(inode, handle, r),
3483
FS_IOC_FSGETXATTR => {
3484
if out_size < size_of::<fsxattr>() as u32 {
3485
Err(io::Error::from_raw_os_error(libc::ENOMEM))
3486
} else {
3487
self.get_fsxattr(inode, handle)
3488
}
3489
}
3490
FS_IOC_FSSETXATTR => {
3491
if in_size < size_of::<fsxattr>() as u32 {
3492
Err(io::Error::from_raw_os_error(libc::EINVAL))
3493
} else {
3494
self.set_fsxattr(ctx, inode, handle, r)
3495
}
3496
}
3497
FS_IOC32_GETFLAGS | FS_IOC64_GETFLAGS => {
3498
if out_size < size_of::<c_int>() as u32 {
3499
Err(io::Error::from_raw_os_error(libc::ENOMEM))
3500
} else {
3501
self.get_flags(inode, handle)
3502
}
3503
}
3504
FS_IOC32_SETFLAGS | FS_IOC64_SETFLAGS => {
3505
if in_size < size_of::<c_int>() as u32 {
3506
Err(io::Error::from_raw_os_error(libc::ENOMEM))
3507
} else {
3508
self.set_flags(ctx, inode, handle, r)
3509
}
3510
}
3511
FS_IOC_ENABLE_VERITY => {
3512
if in_size < size_of::<fsverity_enable_arg>() as u32 {
3513
Err(io::Error::from_raw_os_error(libc::ENOMEM))
3514
} else {
3515
self.enable_verity(inode, handle, r)
3516
}
3517
}
3518
FS_IOC_MEASURE_VERITY => {
3519
if in_size < size_of::<fsverity_digest>() as u32
3520
|| out_size < size_of::<fsverity_digest>() as u32
3521
{
3522
Err(io::Error::from_raw_os_error(libc::ENOMEM))
3523
} else {
3524
self.measure_verity(inode, handle, r, out_size)
3525
}
3526
}
3527
// The following is ARCVM-specific ioctl
3528
// Refer go/remove-mount-passthrough-fuse for more design details
3529
#[cfg(feature = "arc_quota")]
3530
FS_IOC_SETPERMISSION => {
3531
if in_size != size_of::<FsPermissionDataBuffer>() as u32 {
3532
Err(io::Error::from_raw_os_error(libc::EINVAL))
3533
} else {
3534
Ok(self.set_permission_by_path(r))
3535
}
3536
}
3537
#[cfg(feature = "arc_quota")]
3538
FS_IOC_SETPATHXATTR => {
3539
if in_size != size_of::<FsPathXattrDataBuffer>() as u32 {
3540
Err(io::Error::from_raw_os_error(libc::EINVAL))
3541
} else {
3542
Ok(self.set_xattr_by_path(r))
3543
}
3544
}
3545
_ => Err(io::Error::from_raw_os_error(libc::ENOTTY)),
3546
}
3547
}
3548
3549
fn copy_file_range(
3550
&self,
3551
ctx: Context,
3552
inode_src: Inode,
3553
handle_src: Handle,
3554
offset_src: u64,
3555
inode_dst: Inode,
3556
handle_dst: Handle,
3557
offset_dst: u64,
3558
length: u64,
3559
flags: u64,
3560
) -> io::Result<usize> {
3561
let _trace = fs_trace!(
3562
self.tag,
3563
"copy_file_range",
3564
inode_src,
3565
handle_src,
3566
offset_src,
3567
inode_dst,
3568
handle_dst,
3569
offset_dst,
3570
length,
3571
flags
3572
);
3573
// We need to change credentials during a write so that the kernel will remove setuid or
3574
// setgid bits from the file if it was written to by someone other than the owner.
3575
let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
3576
let (src_data, dst_data): (Arc<dyn AsRawDescriptor>, Arc<dyn AsRawDescriptor>) =
3577
if self.zero_message_open.load(Ordering::Relaxed) {
3578
(self.find_inode(inode_src)?, self.find_inode(inode_dst)?)
3579
} else {
3580
(
3581
self.find_handle(handle_src, inode_src)?,
3582
self.find_handle(handle_dst, inode_dst)?,
3583
)
3584
};
3585
3586
let src = src_data.as_raw_descriptor();
3587
let dst = dst_data.as_raw_descriptor();
3588
3589
Ok(syscall!(
3590
// SAFETY: this call is safe because it doesn't modify any memory and we
3591
// check the return value.
3592
unsafe {
3593
libc::syscall(
3594
libc::SYS_copy_file_range,
3595
src,
3596
&offset_src,
3597
dst,
3598
&offset_dst,
3599
length,
3600
flags,
3601
)
3602
}
3603
)? as usize)
3604
}
3605
3606
fn set_up_mapping<M: Mapper>(
3607
&self,
3608
_ctx: Context,
3609
inode: Self::Inode,
3610
_handle: Self::Handle,
3611
file_offset: u64,
3612
mem_offset: u64,
3613
size: usize,
3614
prot: u32,
3615
mapper: M,
3616
) -> io::Result<()> {
3617
let _trace = fs_trace!(
3618
self.tag,
3619
"set_up_mapping",
3620
inode,
3621
file_offset,
3622
mem_offset,
3623
size,
3624
prot
3625
);
3626
if !self.cfg.use_dax {
3627
return Err(io::Error::from_raw_os_error(libc::ENOSYS));
3628
}
3629
3630
let read = prot & libc::PROT_READ as u32 != 0;
3631
let write = prot & libc::PROT_WRITE as u32 != 0;
3632
let (mmap_flags, prot) = match (read, write) {
3633
(true, true) => (libc::O_RDWR, Protection::read_write()),
3634
(true, false) => (libc::O_RDONLY, Protection::read()),
3635
// Write-only is mapped to O_RDWR since mmap always requires an fd opened for reading.
3636
(false, true) => (libc::O_RDWR, Protection::write()),
3637
(false, false) => return Err(io::Error::from_raw_os_error(libc::EINVAL)),
3638
};
3639
3640
let data = self.find_inode(inode)?;
3641
3642
if self.zero_message_open.load(Ordering::Relaxed) {
3643
let mut file = data.file.lock();
3644
let mut open_flags = file.open_flags;
3645
match (mmap_flags, open_flags & libc::O_ACCMODE) {
3646
(libc::O_RDONLY, libc::O_WRONLY)
3647
| (libc::O_RDWR, libc::O_RDONLY)
3648
| (libc::O_RDWR, libc::O_WRONLY) => {
3649
// We have a read-only or write-only fd and we need to upgrade it.
3650
open_flags &= !libc::O_ACCMODE;
3651
open_flags |= libc::O_RDWR;
3652
3653
let newfile = self.open_fd(file.as_raw_descriptor(), libc::O_RDWR)?;
3654
*file = OpenedFile::new(newfile, open_flags);
3655
}
3656
(libc::O_RDONLY, libc::O_RDONLY)
3657
| (libc::O_RDONLY, libc::O_RDWR)
3658
| (libc::O_RDWR, libc::O_RDWR) => {}
3659
(m, o) => panic!("Unexpected combination of access flags: ({m:#x}, {o:#x})"),
3660
}
3661
mapper.map(mem_offset, size, file.file(), file_offset, prot)
3662
} else {
3663
let file = self.open_inode(&data, mmap_flags | libc::O_NONBLOCK)?;
3664
mapper.map(mem_offset, size, &file, file_offset, prot)
3665
}
3666
}
3667
3668
fn remove_mapping<M: Mapper>(&self, msgs: &[RemoveMappingOne], mapper: M) -> io::Result<()> {
3669
let _trace = fs_trace!(self.tag, "remove_mapping", msgs);
3670
if !self.cfg.use_dax {
3671
return Err(io::Error::from_raw_os_error(libc::ENOSYS));
3672
}
3673
3674
for RemoveMappingOne { moffset, len } in msgs {
3675
mapper.unmap(*moffset, *len)?;
3676
}
3677
Ok(())
3678
}
3679
3680
fn atomic_open(
3681
&self,
3682
ctx: Context,
3683
parent: Self::Inode,
3684
name: &CStr,
3685
mode: u32,
3686
flags: u32,
3687
umask: u32,
3688
security_ctx: Option<&CStr>,
3689
) -> io::Result<(Entry, Option<Self::Handle>, OpenOptions)> {
3690
let _trace = fs_trace!(
3691
self.tag,
3692
"atomic_open",
3693
parent,
3694
name,
3695
mode,
3696
flags,
3697
umask,
3698
security_ctx
3699
);
3700
// Perform lookup but not create negative dentry
3701
let data = self.find_inode(parent)?;
3702
3703
#[allow(unused_variables)]
3704
#[cfg(feature = "arc_quota")]
3705
let (uid, gid) = self.change_creds(&ctx, &data, name);
3706
#[cfg(feature = "fs_runtime_ugid_map")]
3707
let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
3708
#[cfg(not(feature = "fs_permission_translation"))]
3709
let (uid, gid) = (ctx.uid, ctx.gid);
3710
3711
let (_uid, _gid) = set_creds(uid, gid)?;
3712
3713
// This lookup serves two purposes:
3714
// 1. If the O_CREATE flag is not set, it retrieves the d_entry for the file.
3715
// 2. If the O_CREATE flag is set, it checks whether the file exists.
3716
let res = self.do_lookup_with_casefold_fallback(&data, name);
3717
3718
if let Err(e) = res {
3719
if e.kind() == std::io::ErrorKind::NotFound && (flags as i32 & libc::O_CREAT) != 0 {
3720
// If the file did not exist & O_CREAT is set,
3721
// create file & set FILE_CREATED bits in open options
3722
let (entry, handler, mut opts) =
3723
self.create(ctx, parent, name, mode, flags, umask, security_ctx)?;
3724
opts |= OpenOptions::FILE_CREATED;
3725
return Ok((entry, handler, opts));
3726
} else if e.kind() == std::io::ErrorKind::NotFound
3727
&& !self.cfg.negative_timeout.is_zero()
3728
{
3729
return Ok((
3730
Entry::new_negative(self.cfg.negative_timeout),
3731
None,
3732
OpenOptions::empty(),
3733
));
3734
}
3735
return Err(e);
3736
}
3737
3738
// SAFETY: checked res is not error before
3739
let entry = res.unwrap();
3740
3741
if entry.attr.st_mode & libc::S_IFMT == libc::S_IFLNK {
3742
return Ok((entry, None, OpenOptions::empty()));
3743
}
3744
3745
if (flags as i32 & (libc::O_CREAT | libc::O_EXCL)) == (libc::O_CREAT | libc::O_EXCL) {
3746
return Err(eexist());
3747
}
3748
3749
let (handler, opts) = if self.zero_message_open.load(Ordering::Relaxed) {
3750
(None, OpenOptions::KEEP_CACHE)
3751
} else {
3752
let (handler, opts) = self.do_open(entry.inode, flags)?;
3753
(handler, opts)
3754
};
3755
Ok((entry, handler, opts))
3756
}
3757
}
3758
3759
#[cfg(test)]
3760
mod tests {
3761
use std::path::Path;
3762
3763
use named_lock::NamedLock;
3764
use tempfile::TempDir;
3765
3766
use super::*;
3767
#[cfg(feature = "arc_quota")]
3768
use crate::virtio::fs::arc_ioctl::FS_IOCTL_PATH_MAX_LEN;
3769
#[cfg(feature = "arc_quota")]
3770
use crate::virtio::fs::arc_ioctl::FS_IOCTL_XATTR_NAME_MAX_LEN;
3771
#[cfg(feature = "arc_quota")]
3772
use crate::virtio::fs::arc_ioctl::FS_IOCTL_XATTR_VALUE_MAX_LEN;
3773
3774
const UNITTEST_LOCK_NAME: &str = "passthroughfs_unittest_lock";
3775
3776
// Create an instance of `Context` with valid uid, gid, and pid.
3777
// The correct ids are necessary for test cases where new files are created.
3778
fn get_context() -> Context {
3779
// SAFETY: both calls take no parameters and only return an integer value. The kernel also
3780
// guarantees that they can never fail.
3781
let uid = unsafe { libc::syscall(SYS_GETEUID) as libc::uid_t };
3782
// SAFETY: both calls take no parameters and only return an integer value. The kernel also
3783
// guarantees that they can never fail.
3784
let gid = unsafe { libc::syscall(SYS_GETEGID) as libc::gid_t };
3785
let pid = std::process::id() as libc::pid_t;
3786
Context { uid, gid, pid }
3787
}
3788
3789
/// Creates the given directories and files under `temp_dir`.
3790
fn create_test_data(temp_dir: &TempDir, dirs: &[&str], files: &[&str]) {
3791
let path = temp_dir.path();
3792
3793
for d in dirs {
3794
std::fs::create_dir_all(path.join(d)).unwrap();
3795
}
3796
3797
for f in files {
3798
File::create(path.join(f)).unwrap();
3799
}
3800
}
3801
3802
/// Looks up the given `path` in `fs`.
3803
fn lookup(fs: &PassthroughFs, path: &Path) -> io::Result<Inode> {
3804
let mut inode = 1;
3805
let ctx = get_context();
3806
for name in path.iter() {
3807
let name = CString::new(name.to_str().unwrap()).unwrap();
3808
let ent = match fs.lookup(ctx, inode, &name) {
3809
Ok(ent) => ent,
3810
Err(e) => {
3811
return Err(e);
3812
}
3813
};
3814
inode = ent.inode;
3815
}
3816
Ok(inode)
3817
}
3818
3819
/// Looks up the given `path` in `fs`.
3820
#[cfg(feature = "arc_quota")]
3821
fn lookup_ent(fs: &PassthroughFs, path: &Path) -> io::Result<Entry> {
3822
let mut inode = 1;
3823
let ctx = get_context();
3824
let mut entry = Entry::new_negative(Duration::from_secs(10));
3825
for name in path.iter() {
3826
let name = CString::new(name.to_str().unwrap()).unwrap();
3827
entry = match fs.lookup(ctx, inode, &name) {
3828
Ok(ent) => ent,
3829
Err(e) => {
3830
return Err(e);
3831
}
3832
};
3833
inode = entry.inode;
3834
}
3835
Ok(entry)
3836
}
3837
3838
/// Creates a file at the given `path`.
3839
fn create(fs: &PassthroughFs, path: &Path) -> io::Result<Entry> {
3840
let parent = path.parent().unwrap();
3841
let filename = CString::new(path.file_name().unwrap().to_str().unwrap()).unwrap();
3842
let parent_inode = lookup(fs, parent)?;
3843
let ctx = get_context();
3844
let security_ctx = None;
3845
fs.create(
3846
ctx,
3847
parent_inode,
3848
&filename,
3849
0o666,
3850
libc::O_RDWR as u32,
3851
0,
3852
security_ctx,
3853
)
3854
.map(|(entry, _, _)| entry)
3855
}
3856
3857
/// Removes a file at the given `path`.
3858
fn unlink(fs: &PassthroughFs, path: &Path) -> io::Result<()> {
3859
let parent = path.parent().unwrap();
3860
let filename = CString::new(path.file_name().unwrap().to_str().unwrap()).unwrap();
3861
let parent_inode = lookup(fs, parent)?;
3862
let ctx = get_context();
3863
fs.unlink(ctx, parent_inode, &filename)
3864
}
3865
3866
/// Forgets cache.
3867
fn forget(fs: &PassthroughFs, path: &Path) -> io::Result<()> {
3868
let ctx = get_context();
3869
let inode = lookup(fs, path)?;
3870
// Pass `u64::MAX` to ensure that the refcount goes to 0 and we forget inode.
3871
fs.forget(ctx, inode, u64::MAX);
3872
Ok(())
3873
}
3874
3875
/// Looks up and open the given `path` in `fs`.
3876
fn atomic_open(
3877
fs: &PassthroughFs,
3878
path: &Path,
3879
mode: u32,
3880
flags: u32,
3881
umask: u32,
3882
security_ctx: Option<&CStr>,
3883
) -> io::Result<(Entry, Option<Handle>, OpenOptions)> {
3884
let mut inode = 1;
3885
let ctx = get_context();
3886
3887
let path_vec: Vec<_> = path.iter().collect();
3888
let vec_len = path_vec.len();
3889
3890
// Do lookup before util (vec_len-1)-th pathname, this operation is to simulate
3891
// the behavior of VFS, since when VFS call atomic_open only at last look up.
3892
for name in &path_vec[0..vec_len - 1] {
3893
let name = CString::new(name.to_str().unwrap()).unwrap();
3894
let ent = fs.lookup(ctx, inode, &name)?;
3895
inode = ent.inode;
3896
}
3897
3898
let name = CString::new(path_vec[vec_len - 1].to_str().unwrap()).unwrap();
3899
3900
fs.atomic_open(ctx, inode, &name, mode, flags, umask, security_ctx)
3901
}
3902
3903
fn symlink(
3904
fs: &PassthroughFs,
3905
linkname: &Path,
3906
name: &Path,
3907
security_ctx: Option<&CStr>,
3908
) -> io::Result<Entry> {
3909
let inode = 1;
3910
let ctx = get_context();
3911
let name = CString::new(name.to_str().unwrap()).unwrap();
3912
let linkname = CString::new(linkname.to_str().unwrap()).unwrap();
3913
fs.symlink(ctx, &linkname, inode, &name, security_ctx)
3914
}
3915
3916
// In this ioctl inode,handle,flags,arg and out_size is irrelavant, set to empty value.
3917
#[cfg(feature = "arc_quota")]
3918
fn fs_ioc_setpermission<R: io::Read>(
3919
fs: &PassthroughFs,
3920
in_size: u32,
3921
r: R,
3922
) -> io::Result<IoctlReply> {
3923
let ctx = get_context();
3924
fs.ioctl(
3925
ctx,
3926
0,
3927
0,
3928
IoctlFlags::empty(),
3929
FS_IOC_SETPERMISSION as u32,
3930
0,
3931
in_size,
3932
0,
3933
r,
3934
)
3935
}
3936
3937
// In this ioctl inode,handle,flags,arg and out_size is irrelavant, set to empty value.
3938
#[cfg(feature = "arc_quota")]
3939
fn fs_ioc_setpathxattr<R: io::Read>(
3940
fs: &PassthroughFs,
3941
in_size: u32,
3942
r: R,
3943
) -> io::Result<IoctlReply> {
3944
let ctx = get_context();
3945
fs.ioctl(
3946
ctx,
3947
0,
3948
0,
3949
IoctlFlags::empty(),
3950
FS_IOC_SETPATHXATTR as u32,
3951
0,
3952
in_size,
3953
0,
3954
r,
3955
)
3956
}
3957
3958
#[test]
3959
fn rewrite_xattr_names() {
3960
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
3961
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
3962
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
3963
let _guard = lock.lock().expect("acquire named lock");
3964
3965
let cfg = Config {
3966
rewrite_security_xattrs: true,
3967
..Default::default()
3968
};
3969
3970
let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
3971
3972
// Selinux shouldn't get overwritten.
3973
let selinux = c"security.selinux";
3974
assert_eq!(p.rewrite_xattr_name(selinux).to_bytes(), selinux.to_bytes());
3975
3976
// user, trusted, and system should not be changed either.
3977
let user = c"user.foobar";
3978
assert_eq!(p.rewrite_xattr_name(user).to_bytes(), user.to_bytes());
3979
let trusted = c"trusted.foobar";
3980
assert_eq!(p.rewrite_xattr_name(trusted).to_bytes(), trusted.to_bytes());
3981
let system = c"system.foobar";
3982
assert_eq!(p.rewrite_xattr_name(system).to_bytes(), system.to_bytes());
3983
3984
// sehash should be re-written.
3985
let sehash = c"security.sehash";
3986
assert_eq!(
3987
p.rewrite_xattr_name(sehash).to_bytes(),
3988
b"user.virtiofs.security.sehash"
3989
);
3990
}
3991
3992
#[test]
3993
fn strip_xattr_names() {
3994
let only_nuls = b"\0\0\0\0\0";
3995
let mut actual = only_nuls.to_vec();
3996
strip_xattr_prefix(&mut actual);
3997
assert_eq!(&actual[..], &only_nuls[..]);
3998
3999
let no_nuls = b"security.sehashuser.virtiofs";
4000
let mut actual = no_nuls.to_vec();
4001
strip_xattr_prefix(&mut actual);
4002
assert_eq!(&actual[..], &no_nuls[..]);
4003
4004
let empty = b"";
4005
let mut actual = empty.to_vec();
4006
strip_xattr_prefix(&mut actual);
4007
assert_eq!(&actual[..], &empty[..]);
4008
4009
let no_strippable_names = b"security.selinux\0user.foobar\0system.test\0";
4010
let mut actual = no_strippable_names.to_vec();
4011
strip_xattr_prefix(&mut actual);
4012
assert_eq!(&actual[..], &no_strippable_names[..]);
4013
4014
let only_strippable_names = b"user.virtiofs.security.sehash\0user.virtiofs.security.wat\0";
4015
let mut actual = only_strippable_names.to_vec();
4016
strip_xattr_prefix(&mut actual);
4017
assert_eq!(&actual[..], b"security.sehash\0security.wat\0");
4018
4019
let mixed_names = b"user.virtiofs.security.sehash\0security.selinux\0user.virtiofs.security.wat\0user.foobar\0";
4020
let mut actual = mixed_names.to_vec();
4021
strip_xattr_prefix(&mut actual);
4022
let expected = b"security.sehash\0security.selinux\0security.wat\0user.foobar\0";
4023
assert_eq!(&actual[..], &expected[..]);
4024
4025
let no_nul_with_prefix = b"user.virtiofs.security.sehash";
4026
let mut actual = no_nul_with_prefix.to_vec();
4027
strip_xattr_prefix(&mut actual);
4028
assert_eq!(&actual[..], b"security.sehash");
4029
}
4030
4031
#[test]
4032
fn lookup_files() {
4033
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4034
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4035
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4036
let _guard = lock.lock().expect("acquire named lock");
4037
4038
let temp_dir = TempDir::new().unwrap();
4039
create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/b.txt"]);
4040
4041
let cfg = Default::default();
4042
let fs = PassthroughFs::new("tag", cfg).unwrap();
4043
4044
let capable = FsOptions::empty();
4045
fs.init(capable).unwrap();
4046
4047
assert!(lookup(&fs, &temp_dir.path().join("a.txt")).is_ok());
4048
assert!(lookup(&fs, &temp_dir.path().join("dir")).is_ok());
4049
assert!(lookup(&fs, &temp_dir.path().join("dir/b.txt")).is_ok());
4050
4051
assert_eq!(
4052
lookup(&fs, &temp_dir.path().join("nonexistent-file"))
4053
.expect_err("file must not exist")
4054
.kind(),
4055
io::ErrorKind::NotFound
4056
);
4057
// "A.txt" is different from "a.txt".
4058
assert_eq!(
4059
lookup(&fs, &temp_dir.path().join("A.txt"))
4060
.expect_err("file must not exist")
4061
.kind(),
4062
io::ErrorKind::NotFound
4063
);
4064
}
4065
4066
#[test]
4067
fn lookup_files_ascii_casefold() {
4068
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4069
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4070
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4071
let _guard = lock.lock().expect("acquire named lock");
4072
4073
let temp_dir = TempDir::new().unwrap();
4074
create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/b.txt"]);
4075
4076
let cfg = Config {
4077
ascii_casefold: true,
4078
..Default::default()
4079
};
4080
let fs = PassthroughFs::new("tag", cfg).unwrap();
4081
4082
let capable = FsOptions::empty();
4083
fs.init(capable).unwrap();
4084
4085
// Ensure that "A.txt" is equated with "a.txt".
4086
let a_inode = lookup(&fs, &temp_dir.path().join("a.txt")).expect("a.txt must be found");
4087
assert_eq!(
4088
lookup(&fs, &temp_dir.path().join("A.txt")).expect("A.txt must exist"),
4089
a_inode
4090
);
4091
4092
let dir_inode = lookup(&fs, &temp_dir.path().join("dir")).expect("dir must be found");
4093
assert_eq!(
4094
lookup(&fs, &temp_dir.path().join("DiR")).expect("DiR must exist"),
4095
dir_inode
4096
);
4097
4098
let b_inode =
4099
lookup(&fs, &temp_dir.path().join("dir/b.txt")).expect("dir/b.txt must be found");
4100
assert_eq!(
4101
lookup(&fs, &temp_dir.path().join("dIr/B.TxT")).expect("dIr/B.TxT must exist"),
4102
b_inode
4103
);
4104
4105
assert_eq!(
4106
lookup(&fs, &temp_dir.path().join("nonexistent-file"))
4107
.expect_err("file must not exist")
4108
.kind(),
4109
io::ErrorKind::NotFound
4110
);
4111
}
4112
4113
fn test_create_and_remove(ascii_casefold: bool) {
4114
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4115
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4116
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4117
let _guard = lock.lock().expect("acquire named lock");
4118
4119
let temp_dir = TempDir::new().unwrap();
4120
let timeout = Duration::from_millis(10);
4121
let cfg = Config {
4122
timeout,
4123
cache_policy: CachePolicy::Auto,
4124
ascii_casefold,
4125
..Default::default()
4126
};
4127
let fs = PassthroughFs::new("tag", cfg).unwrap();
4128
4129
let capable = FsOptions::empty();
4130
fs.init(capable).unwrap();
4131
4132
// Create a.txt and b.txt.
4133
let a_path = temp_dir.path().join("a.txt");
4134
let b_path = temp_dir.path().join("b.txt");
4135
let a_entry = create(&fs, &a_path).expect("create a.txt");
4136
let b_entry = create(&fs, &b_path).expect("create b.txt");
4137
assert_eq!(
4138
a_entry.inode,
4139
lookup(&fs, &a_path).expect("lookup a.txt"),
4140
"Created file 'a.txt' must be looked up"
4141
);
4142
assert_eq!(
4143
b_entry.inode,
4144
lookup(&fs, &b_path).expect("lookup b.txt"),
4145
"Created file 'b.txt' must be looked up"
4146
);
4147
4148
// Remove a.txt only
4149
unlink(&fs, &a_path).expect("Remove");
4150
assert_eq!(
4151
lookup(&fs, &a_path)
4152
.expect_err("file must not exist")
4153
.kind(),
4154
io::ErrorKind::NotFound,
4155
"a.txt must be removed"
4156
);
4157
// "A.TXT" must not be found regardless of whether casefold is enabled or not.
4158
let upper_a_path = temp_dir.path().join("A.TXT");
4159
assert_eq!(
4160
lookup(&fs, &upper_a_path)
4161
.expect_err("file must not exist")
4162
.kind(),
4163
io::ErrorKind::NotFound,
4164
"A.txt must be removed"
4165
);
4166
4167
// Check if the host file system doesn't have a.txt but does b.txt.
4168
assert!(!a_path.exists(), "a.txt must be removed");
4169
assert!(b_path.exists(), "b.txt must exist");
4170
}
4171
4172
#[test]
4173
fn create_and_remove() {
4174
test_create_and_remove(false /* casefold */);
4175
}
4176
4177
#[test]
4178
fn create_and_remove_casefold() {
4179
test_create_and_remove(true /* casefold */);
4180
}
4181
4182
fn test_create_and_forget(ascii_casefold: bool) {
4183
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4184
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4185
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4186
let _guard = lock.lock().expect("acquire named lock");
4187
4188
let temp_dir = TempDir::new().unwrap();
4189
let timeout = Duration::from_millis(10);
4190
let cfg = Config {
4191
timeout,
4192
cache_policy: CachePolicy::Auto,
4193
ascii_casefold,
4194
..Default::default()
4195
};
4196
let fs = PassthroughFs::new("tag", cfg).unwrap();
4197
4198
let capable = FsOptions::empty();
4199
fs.init(capable).unwrap();
4200
4201
// Create a.txt.
4202
let a_path = temp_dir.path().join("a.txt");
4203
let a_entry = create(&fs, &a_path).expect("create a.txt");
4204
assert_eq!(
4205
a_entry.inode,
4206
lookup(&fs, &a_path).expect("lookup a.txt"),
4207
"Created file 'a.txt' must be looked up"
4208
);
4209
4210
// Forget a.txt's inode from PassthroughFs's internal cache.
4211
forget(&fs, &a_path).expect("forget a.txt");
4212
4213
if ascii_casefold {
4214
let upper_a_path = temp_dir.path().join("A.TXT");
4215
let new_a_inode = lookup(&fs, &upper_a_path).expect("lookup a.txt");
4216
assert_ne!(
4217
a_entry.inode, new_a_inode,
4218
"inode must be changed after forget()"
4219
);
4220
assert_eq!(
4221
new_a_inode,
4222
lookup(&fs, &a_path).expect("lookup a.txt"),
4223
"inode must be same for a.txt and A.TXT"
4224
);
4225
} else {
4226
assert_ne!(
4227
a_entry.inode,
4228
lookup(&fs, &a_path).expect("lookup a.txt"),
4229
"inode must be changed after forget()"
4230
);
4231
}
4232
}
4233
4234
#[test]
4235
fn create_and_forget() {
4236
test_create_and_forget(false /* ascii_casefold */);
4237
}
4238
4239
#[test]
4240
fn create_and_forget_casefold() {
4241
test_create_and_forget(true /* ascii_casefold */);
4242
}
4243
4244
#[test]
4245
fn casefold_lookup_cache() {
4246
let temp_dir = TempDir::new().unwrap();
4247
// Prepare `a.txt` before starting the test.
4248
create_test_data(&temp_dir, &[], &["a.txt"]);
4249
4250
let cfg = Config {
4251
ascii_casefold: true,
4252
..Default::default()
4253
};
4254
let fs = PassthroughFs::new("tag", cfg).unwrap();
4255
4256
let capable = FsOptions::empty();
4257
fs.init(capable).unwrap();
4258
4259
let parent = lookup(&fs, temp_dir.path()).expect("lookup temp_dir");
4260
4261
// Since `a.txt` exists, "A.TXT" must exist.
4262
let large_a_path = temp_dir.path().join("A.TXT");
4263
// Looking up "A.TXT" must create a CasefoldCache entry.
4264
lookup(&fs, &large_a_path).expect("A.TXT must exist");
4265
assert!(fs.exists_in_casefold_cache(parent, &CString::new("A.TXT").unwrap()));
4266
4267
// Create b.txt.
4268
let b_path = temp_dir.path().join("b.txt");
4269
create(&fs, &b_path).expect("create b.txt");
4270
// Then, b.txt must exists in the cache.
4271
assert!(fs.exists_in_casefold_cache(parent, &CString::new("B.TXT").unwrap()));
4272
// When removing b.txt, it must be removed from the cache as well.
4273
unlink(&fs, &b_path).expect("remove b.txt");
4274
assert!(!fs.exists_in_casefold_cache(parent, &CString::new("B.TXT").unwrap()));
4275
}
4276
4277
#[test]
4278
fn lookup_negative_cache() {
4279
let temp_dir = TempDir::new().unwrap();
4280
// Prepare `a.txt` before starting the test.
4281
create_test_data(&temp_dir, &[], &[]);
4282
4283
let cfg = Config {
4284
negative_timeout: Duration::from_secs(5),
4285
..Default::default()
4286
};
4287
let fs = PassthroughFs::new("tag", cfg).unwrap();
4288
4289
let capable = FsOptions::empty();
4290
fs.init(capable).unwrap();
4291
4292
let a_path = temp_dir.path().join("a.txt");
4293
// a.txt hasn't existed yet.
4294
// Since negative_timeout is enabled, success with inode=0 is expected.
4295
assert_eq!(
4296
0,
4297
lookup(&fs, &a_path).expect("lookup a.txt"),
4298
"Entry with inode=0 is expected for non-existing file 'a.txt'"
4299
);
4300
// Create a.txt
4301
let a_entry = create(&fs, &a_path).expect("create a.txt");
4302
assert_eq!(
4303
a_entry.inode,
4304
lookup(&fs, &a_path).expect("lookup a.txt"),
4305
"Created file 'a.txt' must be looked up"
4306
);
4307
// Remove a.txt
4308
unlink(&fs, &a_path).expect("Remove");
4309
assert_eq!(
4310
0,
4311
lookup(&fs, &a_path).expect("lookup a.txt"),
4312
"Entry with inode=0 is expected for the removed file 'a.txt'"
4313
);
4314
}
4315
#[test]
4316
fn test_atomic_open_existing_file() {
4317
atomic_open_existing_file(false);
4318
}
4319
4320
#[test]
4321
fn test_atomic_open_existing_file_zero_message() {
4322
atomic_open_existing_file(true);
4323
}
4324
4325
fn atomic_open_existing_file(zero_message_open: bool) {
4326
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4327
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4328
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4329
let _guard = lock.lock().expect("acquire named lock");
4330
4331
let temp_dir = TempDir::new().unwrap();
4332
create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/b.txt", "dir/c.txt"]);
4333
4334
let cache_policy = match zero_message_open {
4335
true => CachePolicy::Always,
4336
false => CachePolicy::Auto,
4337
};
4338
4339
let cfg = Config {
4340
cache_policy,
4341
..Default::default()
4342
};
4343
let fs = PassthroughFs::new("tag", cfg).unwrap();
4344
4345
let capable = FsOptions::ZERO_MESSAGE_OPEN;
4346
fs.init(capable).unwrap();
4347
4348
// atomic_open with flag O_RDWR, should return positive dentry and file handler
4349
let res = atomic_open(
4350
&fs,
4351
&temp_dir.path().join("a.txt"),
4352
0o666,
4353
libc::O_RDWR as u32,
4354
0,
4355
None,
4356
);
4357
assert!(res.is_ok());
4358
let (entry, handler, open_options) = res.unwrap();
4359
assert_ne!(entry.inode, 0);
4360
4361
if zero_message_open {
4362
assert!(handler.is_none());
4363
assert_eq!(open_options, OpenOptions::KEEP_CACHE);
4364
} else {
4365
assert!(handler.is_some());
4366
assert_ne!(
4367
open_options & OpenOptions::FILE_CREATED,
4368
OpenOptions::FILE_CREATED
4369
);
4370
}
4371
4372
// atomic_open with flag O_RDWR | O_CREATE, should return positive dentry and file handler
4373
let res = atomic_open(
4374
&fs,
4375
&temp_dir.path().join("dir/b.txt"),
4376
0o666,
4377
(libc::O_RDWR | libc::O_CREAT) as u32,
4378
0,
4379
None,
4380
);
4381
assert!(res.is_ok());
4382
let (entry, handler, open_options) = res.unwrap();
4383
assert_ne!(entry.inode, 0);
4384
4385
if zero_message_open {
4386
assert!(handler.is_none());
4387
assert_eq!(open_options, OpenOptions::KEEP_CACHE);
4388
} else {
4389
assert!(handler.is_some());
4390
assert_ne!(
4391
open_options & OpenOptions::FILE_CREATED,
4392
OpenOptions::FILE_CREATED
4393
);
4394
}
4395
4396
// atomic_open with flag O_RDWR | O_CREATE | O_EXCL, should return positive dentry and file
4397
// handler
4398
let res = atomic_open(
4399
&fs,
4400
&temp_dir.path().join("dir/c.txt"),
4401
0o666,
4402
(libc::O_RDWR | libc::O_CREAT | libc::O_EXCL) as u32,
4403
0,
4404
None,
4405
);
4406
assert!(res.is_err());
4407
let err_kind = res.unwrap_err().kind();
4408
assert_eq!(err_kind, io::ErrorKind::AlreadyExists);
4409
}
4410
4411
#[test]
4412
fn test_atomic_open_non_existing_file() {
4413
atomic_open_non_existing_file(false);
4414
}
4415
4416
#[test]
4417
fn test_atomic_open_non_existing_file_zero_message() {
4418
atomic_open_non_existing_file(true);
4419
}
4420
4421
fn atomic_open_non_existing_file(zero_message_open: bool) {
4422
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4423
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4424
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4425
let _guard = lock.lock().expect("acquire named lock");
4426
4427
let temp_dir = TempDir::new().unwrap();
4428
4429
let cache_policy = match zero_message_open {
4430
true => CachePolicy::Always,
4431
false => CachePolicy::Auto,
4432
};
4433
4434
let cfg = Config {
4435
cache_policy,
4436
..Default::default()
4437
};
4438
let fs = PassthroughFs::new("tag", cfg).unwrap();
4439
4440
let capable = FsOptions::ZERO_MESSAGE_OPEN;
4441
fs.init(capable).unwrap();
4442
4443
// atomic_open with flag O_RDWR, should return NO_EXIST error
4444
let res = atomic_open(
4445
&fs,
4446
&temp_dir.path().join("a.txt"),
4447
0o666,
4448
libc::O_RDWR as u32,
4449
0,
4450
None,
4451
);
4452
assert!(res.is_err());
4453
let err_kind = res.unwrap_err().kind();
4454
assert_eq!(err_kind, io::ErrorKind::NotFound);
4455
4456
// atomic_open with flag O_RDWR | O_CREATE, should return positive dentry and file handler
4457
let res = atomic_open(
4458
&fs,
4459
&temp_dir.path().join("b.txt"),
4460
0o666,
4461
(libc::O_RDWR | libc::O_CREAT) as u32,
4462
0,
4463
None,
4464
);
4465
assert!(res.is_ok());
4466
let (entry, handler, open_options) = res.unwrap();
4467
assert_ne!(entry.inode, 0);
4468
4469
if zero_message_open {
4470
assert!(handler.is_none());
4471
assert_eq!(
4472
open_options & OpenOptions::KEEP_CACHE,
4473
OpenOptions::KEEP_CACHE
4474
);
4475
} else {
4476
assert!(handler.is_some());
4477
}
4478
assert_eq!(
4479
open_options & OpenOptions::FILE_CREATED,
4480
OpenOptions::FILE_CREATED
4481
);
4482
}
4483
4484
#[test]
4485
fn atomic_open_symbol_link() {
4486
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4487
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4488
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4489
let _guard = lock.lock().expect("acquire named lock");
4490
4491
let temp_dir = TempDir::new().unwrap();
4492
create_test_data(&temp_dir, &["dir"], &["a.txt"]);
4493
4494
let cfg = Default::default();
4495
let fs = PassthroughFs::new("tag", cfg).unwrap();
4496
4497
let capable = FsOptions::empty();
4498
fs.init(capable).unwrap();
4499
4500
// atomic open the link destination file
4501
let res_dst = atomic_open(
4502
&fs,
4503
&temp_dir.path().join("a.txt"),
4504
0o666,
4505
libc::O_RDWR as u32,
4506
0,
4507
None,
4508
);
4509
assert!(res_dst.is_ok());
4510
let (entry_dst, handler_dst, _) = res_dst.unwrap();
4511
assert_ne!(entry_dst.inode, 0);
4512
assert!(handler_dst.is_some());
4513
4514
// create depth 1 symbol link
4515
let sym1_res = symlink(
4516
&fs,
4517
&temp_dir.path().join("a.txt"),
4518
&temp_dir.path().join("blink"),
4519
None,
4520
);
4521
assert!(sym1_res.is_ok());
4522
let sym1_entry = sym1_res.unwrap();
4523
assert_ne!(sym1_entry.inode, 0);
4524
4525
// atomic_open symbol link, should return dentry with no handler
4526
let res = atomic_open(
4527
&fs,
4528
&temp_dir.path().join("blink"),
4529
0o666,
4530
libc::O_RDWR as u32,
4531
0,
4532
None,
4533
);
4534
assert!(res.is_ok());
4535
let (entry, handler, open_options) = res.unwrap();
4536
assert_eq!(entry.inode, sym1_entry.inode);
4537
assert!(handler.is_none());
4538
assert_eq!(open_options, OpenOptions::empty());
4539
4540
// delete link destination
4541
unlink(&fs, &temp_dir.path().join("a.txt")).expect("Remove");
4542
assert_eq!(
4543
lookup(&fs, &temp_dir.path().join("a.txt"))
4544
.expect_err("file must not exist")
4545
.kind(),
4546
io::ErrorKind::NotFound,
4547
"a.txt must be removed"
4548
);
4549
4550
// after link destination removed, should still return valid dentry
4551
let res = atomic_open(
4552
&fs,
4553
&temp_dir.path().join("blink"),
4554
0o666,
4555
libc::O_RDWR as u32,
4556
0,
4557
None,
4558
);
4559
assert!(res.is_ok());
4560
let (entry, handler, open_options) = res.unwrap();
4561
assert_eq!(entry.inode, sym1_entry.inode);
4562
assert!(handler.is_none());
4563
assert_eq!(open_options, OpenOptions::empty());
4564
}
4565
4566
#[test]
4567
#[cfg(feature = "arc_quota")]
4568
fn set_permission_ioctl_valid_data() {
4569
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4570
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4571
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4572
let _guard = lock.lock().expect("acquire named lock");
4573
4574
let cfg = Config {
4575
max_dynamic_perm: 1,
4576
..Default::default()
4577
};
4578
let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
4579
4580
let perm_path_string = String::from("/test");
4581
let fs_permission_data_buffer = FsPermissionDataBuffer {
4582
guest_uid: 1,
4583
guest_gid: 2,
4584
host_uid: 3,
4585
host_gid: 4,
4586
umask: 5,
4587
pad: 0,
4588
perm_path: {
4589
let mut perm_path: [u8; FS_IOCTL_PATH_MAX_LEN] = [0; FS_IOCTL_PATH_MAX_LEN];
4590
perm_path[..perm_path_string.len()].copy_from_slice(perm_path_string.as_bytes());
4591
perm_path
4592
},
4593
};
4594
let r = std::io::Cursor::new(fs_permission_data_buffer.as_bytes());
4595
4596
let res = fs_ioc_setpermission(
4597
&p,
4598
mem::size_of_val(&fs_permission_data_buffer) as u32,
4599
r.clone(),
4600
)
4601
.expect("valid input should get IoctlReply");
4602
assert!(matches!(res, IoctlReply::Done(Ok(data)) if data.is_empty()));
4603
4604
let read_guard = p
4605
.permission_paths
4606
.read()
4607
.expect("read permission_paths failed");
4608
let permission_data = read_guard
4609
.first()
4610
.expect("permission path should not be empty");
4611
4612
// Check expected data item is added to permission_paths.
4613
let expected_data = PermissionData {
4614
guest_uid: 1,
4615
guest_gid: 2,
4616
host_uid: 3,
4617
host_gid: 4,
4618
umask: 5,
4619
perm_path: perm_path_string,
4620
};
4621
assert_eq!(*permission_data, expected_data);
4622
4623
// Second ioctl should not succeed since max_dynamic_perm is set to 1
4624
let res = fs_ioc_setpermission(
4625
&p,
4626
mem::size_of_val(&fs_permission_data_buffer) as u32,
4627
r.clone(),
4628
)
4629
.expect("valid input should get IoctlReply");
4630
assert!(
4631
matches!(res, IoctlReply::Done(Err(err)) if err.raw_os_error().is_some_and(|errno| {
4632
errno == libc::EPERM
4633
}))
4634
);
4635
}
4636
4637
#[test]
4638
#[cfg(feature = "arc_quota")]
4639
fn set_permission_ioctl_invalid_data() {
4640
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4641
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4642
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4643
let _guard = lock.lock().expect("acquire named lock");
4644
4645
let cfg = Config {
4646
max_dynamic_perm: 1,
4647
..Default::default()
4648
};
4649
let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
4650
4651
// The perm_path is not valid since it does not start with /.
4652
let perm_path_string = String::from("test");
4653
let fs_permission_data_buffer = FsPermissionDataBuffer {
4654
guest_uid: 1,
4655
guest_gid: 2,
4656
host_uid: 3,
4657
host_gid: 4,
4658
umask: 5,
4659
pad: 0,
4660
perm_path: {
4661
let mut perm_path: [u8; FS_IOCTL_PATH_MAX_LEN] = [0; FS_IOCTL_PATH_MAX_LEN];
4662
perm_path[..perm_path_string.len()].copy_from_slice(perm_path_string.as_bytes());
4663
perm_path
4664
},
4665
};
4666
4667
let r = std::io::Cursor::new(fs_permission_data_buffer.as_bytes());
4668
// In this ioctl inode,handle,flags,arg and out_size is irrelavant, set to empty value.
4669
// This call is supposed to get EINVAL ioctlReply, since the perm_path is invalid.
4670
let res = fs_ioc_setpermission(&p, mem::size_of_val(&fs_permission_data_buffer) as u32, r)
4671
.expect("invalid perm_path should get IoctlReply");
4672
assert!(
4673
matches!(res, IoctlReply::Done(Err(err)) if err.raw_os_error().is_some_and(|errno| {
4674
errno == libc::EINVAL
4675
}))
4676
);
4677
4678
let fake_data_buffer: [u8; 128] = [0; 128];
4679
let r = std::io::Cursor::new(fake_data_buffer.as_bytes());
4680
4681
// This call is supposed to get EINVAL ioctlReply, since the in_size is not the size of
4682
// struct FsPermissionDataBuffer.
4683
let res = fs_ioc_setpermission(&p, mem::size_of_val(&fake_data_buffer) as u32, r)
4684
.expect_err("invalid in_size should get Error");
4685
assert!(res
4686
.raw_os_error()
4687
.is_some_and(|errno| { errno == libc::EINVAL }));
4688
}
4689
4690
#[test]
4691
#[cfg(feature = "arc_quota")]
4692
fn permission_data_path_matching() {
4693
let ctx = get_context();
4694
let temp_dir = TempDir::new().unwrap();
4695
// Prepare `a.txt` before starting the test.
4696
create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/a.txt"]);
4697
4698
let cfg = Config {
4699
max_dynamic_perm: 1,
4700
..Default::default()
4701
};
4702
let fs = PassthroughFs::new("tag", cfg).unwrap();
4703
4704
let capable = FsOptions::empty();
4705
fs.init(capable).unwrap();
4706
4707
const BY_PATH_UID: u32 = 655360;
4708
const BY_PATH_GID: u32 = 655361;
4709
const BY_PATH_UMASK: u32 = 0o007;
4710
4711
let dir_path = temp_dir.path().join("dir");
4712
let permission_data = PermissionData {
4713
guest_uid: BY_PATH_UID,
4714
guest_gid: BY_PATH_GID,
4715
host_uid: ctx.uid,
4716
host_gid: ctx.gid,
4717
umask: BY_PATH_UMASK,
4718
perm_path: dir_path.to_string_lossy().into_owned(),
4719
};
4720
fs.permission_paths
4721
.write()
4722
.expect("permission_path lock must be acquired")
4723
.push(permission_data);
4724
4725
// a_path is the path with out set permission by path
4726
let a_path = temp_dir.path().join("a.txt");
4727
let in_dir_a_path = dir_path.join("a.txt");
4728
4729
// a.txt should not be set with guest_uid/guest_uid/umask by path
4730
let a_entry = lookup_ent(&fs, &a_path).expect("a.txt must exist");
4731
assert_ne!(a_entry.attr.st_uid, BY_PATH_UID);
4732
assert_ne!(a_entry.attr.st_gid, BY_PATH_GID);
4733
4734
// a.txt in dir should be set guest_uid/guest_uid/umask by path
4735
let in_dir_a_entry = lookup_ent(&fs, &in_dir_a_path).expect("dir/a.txt must exist");
4736
assert_eq!(in_dir_a_entry.attr.st_uid, BY_PATH_UID);
4737
assert_eq!(in_dir_a_entry.attr.st_gid, BY_PATH_GID);
4738
assert_eq!(in_dir_a_entry.attr.st_mode & 0o777, !BY_PATH_UMASK & 0o777);
4739
4740
// Create dir/b.txt.
4741
let in_dir_b_path = dir_path.join("b.txt");
4742
create(&fs, &in_dir_b_path).expect("create b.txt");
4743
4744
// newly created b.txt in dir should be set guest_uid/guest_uid/umask by path
4745
let in_dir_b_entry = lookup_ent(&fs, &in_dir_a_path).expect("dir/b.txt must exist");
4746
assert_eq!(in_dir_b_entry.attr.st_uid, BY_PATH_UID);
4747
assert_eq!(in_dir_b_entry.attr.st_gid, BY_PATH_GID);
4748
assert_eq!(in_dir_b_entry.attr.st_mode & 0o777, !BY_PATH_UMASK & 0o777);
4749
}
4750
4751
#[test]
4752
#[cfg(feature = "arc_quota")]
4753
fn set_path_xattr_ioctl_valid_data() {
4754
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4755
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4756
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4757
let _guard = lock.lock().expect("acquire named lock");
4758
4759
let cfg: Config = Config {
4760
max_dynamic_xattr: 1,
4761
..Default::default()
4762
};
4763
let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
4764
4765
let path_string = String::from("/test");
4766
let xattr_name_string = String::from("test_name");
4767
let xattr_value_string = String::from("test_value");
4768
let fs_path_xattr_data_buffer = FsPathXattrDataBuffer {
4769
path: {
4770
let mut path: [u8; FS_IOCTL_PATH_MAX_LEN] = [0; FS_IOCTL_PATH_MAX_LEN];
4771
path[..path_string.len()].copy_from_slice(path_string.as_bytes());
4772
path
4773
},
4774
xattr_name: {
4775
let mut xattr_name: [u8; FS_IOCTL_XATTR_NAME_MAX_LEN] =
4776
[0; FS_IOCTL_XATTR_NAME_MAX_LEN];
4777
xattr_name[..xattr_name_string.len()].copy_from_slice(xattr_name_string.as_bytes());
4778
xattr_name
4779
},
4780
xattr_value: {
4781
let mut xattr_value: [u8; FS_IOCTL_XATTR_VALUE_MAX_LEN] =
4782
[0; FS_IOCTL_XATTR_VALUE_MAX_LEN];
4783
xattr_value[..xattr_value_string.len()]
4784
.copy_from_slice(xattr_value_string.as_bytes());
4785
xattr_value
4786
},
4787
};
4788
let r = std::io::Cursor::new(fs_path_xattr_data_buffer.as_bytes());
4789
4790
let res = fs_ioc_setpathxattr(
4791
&p,
4792
mem::size_of_val(&fs_path_xattr_data_buffer) as u32,
4793
r.clone(),
4794
)
4795
.expect("valid input should get IoctlReply");
4796
assert!(matches!(res, IoctlReply::Done(Ok(data)) if data.is_empty()));
4797
4798
let read_guard = p.xattr_paths.read().expect("read xattr_paths failed");
4799
let xattr_data = read_guard.first().expect("xattr_paths should not be empty");
4800
4801
// Check expected data item is added to permission_paths.
4802
let expected_data = XattrData {
4803
xattr_path: path_string,
4804
xattr_name: xattr_name_string,
4805
xattr_value: xattr_value_string,
4806
};
4807
assert_eq!(*xattr_data, expected_data);
4808
4809
// Second ioctl should not succeed since max_dynamic_perm is set to 1
4810
let res = fs_ioc_setpathxattr(
4811
&p,
4812
mem::size_of_val(&fs_path_xattr_data_buffer) as u32,
4813
r.clone(),
4814
)
4815
.expect("valid input should get IoctlReply");
4816
assert!(
4817
matches!(res, IoctlReply::Done(Err(err)) if err.raw_os_error().is_some_and(|errno| {
4818
errno == libc::EPERM
4819
}))
4820
);
4821
}
4822
#[test]
4823
#[cfg(feature = "arc_quota")]
4824
fn set_path_xattr_ioctl_invalid_data() {
4825
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4826
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4827
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4828
let _guard = lock.lock().expect("acquire named lock");
4829
4830
let cfg: Config = Config {
4831
max_dynamic_xattr: 1,
4832
..Default::default()
4833
};
4834
let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
4835
4836
let path_string = String::from("test");
4837
let xattr_name_string = String::from("test_name");
4838
let xattr_value_string = String::from("test_value");
4839
let fs_path_xattr_data_buffer = FsPathXattrDataBuffer {
4840
path: {
4841
let mut path: [u8; FS_IOCTL_PATH_MAX_LEN] = [0; FS_IOCTL_PATH_MAX_LEN];
4842
path[..path_string.len()].copy_from_slice(path_string.as_bytes());
4843
path
4844
},
4845
xattr_name: {
4846
let mut xattr_name: [u8; FS_IOCTL_XATTR_NAME_MAX_LEN] =
4847
[0; FS_IOCTL_XATTR_NAME_MAX_LEN];
4848
xattr_name[..xattr_name_string.len()].copy_from_slice(xattr_name_string.as_bytes());
4849
xattr_name
4850
},
4851
xattr_value: {
4852
let mut xattr_value: [u8; FS_IOCTL_XATTR_VALUE_MAX_LEN] =
4853
[0; FS_IOCTL_XATTR_VALUE_MAX_LEN];
4854
xattr_value[..xattr_value_string.len()]
4855
.copy_from_slice(xattr_value_string.as_bytes());
4856
xattr_value
4857
},
4858
};
4859
let r = std::io::Cursor::new(fs_path_xattr_data_buffer.as_bytes());
4860
4861
// This call is supposed to get EINVAL ioctlReply, since the perm_path is invalid.
4862
let res = fs_ioc_setpathxattr(
4863
&p,
4864
mem::size_of_val(&fs_path_xattr_data_buffer) as u32,
4865
r.clone(),
4866
)
4867
.expect("valid input should get IoctlReply");
4868
assert!(
4869
matches!(res, IoctlReply::Done(Err(err)) if err.raw_os_error().is_some_and(|errno| {
4870
errno == libc::EINVAL
4871
}))
4872
);
4873
4874
let fake_data_buffer: [u8; 128] = [0; 128];
4875
let r = std::io::Cursor::new(fake_data_buffer.as_bytes());
4876
// This call is supposed to get EINVAL ioctlReply, since the in_size is not the size of
4877
// struct FsPathXattrDataBuffer.
4878
let res = fs_ioc_setpathxattr(&p, mem::size_of_val(&fake_data_buffer) as u32, r.clone())
4879
.expect_err("valid input should get IoctlReply");
4880
assert!(res
4881
.raw_os_error()
4882
.is_some_and(|errno| { errno == libc::EINVAL }));
4883
}
4884
4885
#[test]
4886
#[cfg(feature = "arc_quota")]
4887
fn xattr_data_path_matching() {
4888
let ctx = get_context();
4889
let temp_dir = TempDir::new().unwrap();
4890
// Prepare `a.txt` before starting the test.
4891
create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/a.txt"]);
4892
4893
let cfg = Config {
4894
max_dynamic_xattr: 1,
4895
..Default::default()
4896
};
4897
let fs = PassthroughFs::new("tag", cfg).unwrap();
4898
4899
let capable = FsOptions::empty();
4900
fs.init(capable).unwrap();
4901
4902
let dir_path = temp_dir.path().join("dir");
4903
let xattr_name_string = String::from("test_name");
4904
let xattr_name_cstring = CString::new(xattr_name_string.clone()).expect("create c string");
4905
let xattr_value_string = String::from("test_value");
4906
let xattr_value_bytes = xattr_value_string.clone().into_bytes();
4907
4908
let xattr_data = XattrData {
4909
xattr_name: xattr_name_string,
4910
xattr_value: xattr_value_string,
4911
xattr_path: dir_path.to_string_lossy().into_owned(),
4912
};
4913
fs.xattr_paths
4914
.write()
4915
.expect("xattr_paths lock must be acquired")
4916
.push(xattr_data);
4917
4918
// a_path is the path with out set xattr by path
4919
let a_path: std::path::PathBuf = temp_dir.path().join("a.txt");
4920
let in_dir_a_path = dir_path.join("a.txt");
4921
4922
let a_node = lookup(&fs, a_path.as_path()).expect("lookup a node");
4923
// a.txt should not be set with xattr by path
4924
assert!(fs
4925
.getxattr(
4926
ctx,
4927
a_node,
4928
&xattr_name_cstring,
4929
xattr_value_bytes.len() as u32
4930
)
4931
.is_err());
4932
4933
let in_dir_a_node = lookup(&fs, in_dir_a_path.as_path()).expect("lookup in dir a node");
4934
// a.txt in dir should be set xattr by path
4935
let in_dir_a_reply = fs
4936
.getxattr(
4937
ctx,
4938
in_dir_a_node,
4939
&xattr_name_cstring,
4940
xattr_value_bytes.len() as u32,
4941
)
4942
.expect("Getxattr should success");
4943
assert!(matches!(in_dir_a_reply, GetxattrReply::Value(v) if v == xattr_value_bytes));
4944
// Create dir/b.txt.
4945
let in_dir_b_path = dir_path.join("b.txt");
4946
create(&fs, &in_dir_b_path).expect("create b.txt");
4947
4948
// newly created b.txt in dir should be set xattr by path
4949
let in_dir_b_node = lookup(&fs, in_dir_a_path.as_path()).expect("lookup in dir b node");
4950
let in_dir_b_reply = fs
4951
.getxattr(
4952
ctx,
4953
in_dir_b_node,
4954
&xattr_name_cstring,
4955
xattr_value_bytes.len() as u32,
4956
)
4957
.expect("Getxattr should success");
4958
assert!(matches!(in_dir_b_reply, GetxattrReply::Value(v) if v == xattr_value_bytes));
4959
}
4960
4961
/// Creates and open a new file by atomic_open with O_APPEND flag.
4962
/// We check O_APPEND is properly handled, depending on writeback cache is enabled or not.
4963
fn atomic_open_create_o_append(writeback: bool) {
4964
// Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4965
// `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4966
let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4967
let _guard = lock.lock().expect("acquire named lock");
4968
4969
let temp_dir = TempDir::new().unwrap();
4970
4971
let cfg = Config {
4972
cache_policy: CachePolicy::Always,
4973
writeback,
4974
..Default::default()
4975
};
4976
let fs = PassthroughFs::new("tag", cfg).unwrap();
4977
4978
let capable = FsOptions::ZERO_MESSAGE_OPEN | FsOptions::WRITEBACK_CACHE;
4979
fs.init(capable).unwrap();
4980
4981
let (entry, _, _) = atomic_open(
4982
&fs,
4983
&temp_dir.path().join("a.txt"),
4984
0o666,
4985
(libc::O_RDWR | libc::O_CREAT | libc::O_APPEND) as u32,
4986
0,
4987
None,
4988
)
4989
.expect("atomic_open");
4990
assert_ne!(entry.inode, 0);
4991
4992
let inodes = fs.inodes.lock();
4993
let data = inodes.get(&entry.inode).unwrap();
4994
let flags = data.file.lock().open_flags;
4995
if writeback {
4996
// When writeback is enabled, O_APPEND must be handled by the guest kernel.
4997
// So, it must be cleared.
4998
assert_eq!(flags & libc::O_APPEND, 0);
4999
} else {
5000
// Without writeback cache, O_APPEND must not be cleared.
5001
assert_eq!(flags & libc::O_APPEND, libc::O_APPEND);
5002
}
5003
}
5004
5005
#[test]
5006
fn test_atomic_open_create_o_append_no_writeback() {
5007
atomic_open_create_o_append(false);
5008
}
5009
5010
#[test]
5011
fn test_atomic_open_create_o_append_writeback() {
5012
atomic_open_create_o_append(true);
5013
}
5014
}
5015
5016