Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/disk/src/disk.rs
5394 views
1
// Copyright 2019 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
//! VM disk image file format I/O.
6
7
use std::cmp::min;
8
use std::fmt::Debug;
9
use std::fs::File;
10
use std::io;
11
use std::io::Seek;
12
use std::io::SeekFrom;
13
use std::path::PathBuf;
14
use std::sync::Arc;
15
16
use async_trait::async_trait;
17
use base::info;
18
use base::AsRawDescriptors;
19
use base::FileAllocate;
20
use base::FileReadWriteAtVolatile;
21
use base::FileSetLen;
22
use cros_async::BackingMemory;
23
use cros_async::Executor;
24
use cros_async::IoSource;
25
use cros_async::MemRegionIter;
26
use thiserror::Error as ThisError;
27
28
mod asynchronous;
29
#[allow(unused)]
30
pub(crate) use asynchronous::AsyncDiskFileWrapper;
31
#[cfg(feature = "qcow")]
32
mod qcow;
33
#[cfg(feature = "qcow")]
34
pub use qcow::QcowFile;
35
#[cfg(feature = "qcow")]
36
pub use qcow::QCOW_MAGIC;
37
mod sys;
38
39
#[cfg(feature = "composite-disk")]
40
mod composite;
41
#[cfg(feature = "composite-disk")]
42
use composite::CompositeDiskFile;
43
#[cfg(feature = "composite-disk")]
44
use composite::CDISK_MAGIC;
45
#[cfg(feature = "composite-disk")]
46
mod gpt;
47
#[cfg(feature = "composite-disk")]
48
pub use composite::create_composite_disk;
49
#[cfg(feature = "composite-disk")]
50
pub use composite::create_zero_filler;
51
#[cfg(feature = "composite-disk")]
52
pub use composite::Error as CompositeError;
53
#[cfg(feature = "composite-disk")]
54
pub use composite::ImagePartitionType;
55
#[cfg(feature = "composite-disk")]
56
pub use composite::PartitionInfo;
57
#[cfg(feature = "composite-disk")]
58
pub use gpt::Error as GptError;
59
60
#[cfg(feature = "android-sparse")]
61
mod android_sparse;
62
#[cfg(feature = "android-sparse")]
63
use android_sparse::AndroidSparse;
64
#[cfg(feature = "android-sparse")]
65
use android_sparse::SPARSE_HEADER_MAGIC;
66
use sys::read_from_disk;
67
68
#[cfg(feature = "zstd")]
69
mod zstd;
70
#[cfg(feature = "zstd")]
71
use zstd::ZstdDisk;
72
#[cfg(feature = "zstd")]
73
use zstd::ZSTD_FRAME_MAGIC;
74
#[cfg(feature = "zstd")]
75
use zstd::ZSTD_SKIPPABLE_MAGIC_HIGH;
76
#[cfg(feature = "zstd")]
77
use zstd::ZSTD_SKIPPABLE_MAGIC_LOW;
78
79
/// Nesting depth limit for disk formats that can open other disk files.
80
const MAX_NESTING_DEPTH: u32 = 10;
81
82
#[derive(ThisError, Debug)]
83
pub enum Error {
84
#[error("failed to create block device: {0}")]
85
BlockDeviceNew(base::Error),
86
#[error("requested file conversion not supported")]
87
ConversionNotSupported,
88
#[cfg(feature = "android-sparse")]
89
#[error("failure in android sparse disk: {0}")]
90
CreateAndroidSparseDisk(android_sparse::Error),
91
#[cfg(feature = "composite-disk")]
92
#[error("failure in composite disk: {0}")]
93
CreateCompositeDisk(composite::Error),
94
#[cfg(feature = "zstd")]
95
#[error("failure in zstd disk: {0}")]
96
CreateZstdDisk(anyhow::Error),
97
#[error("failure creating single file disk: {0}")]
98
CreateSingleFileDisk(cros_async::AsyncError),
99
#[error("failed to set O_DIRECT on disk image: {0}")]
100
DirectFailed(base::Error),
101
#[error("failure with fdatasync: {0}")]
102
Fdatasync(cros_async::AsyncError),
103
#[error("failure with fsync: {0}")]
104
Fsync(cros_async::AsyncError),
105
#[error("failed to lock file: {0}")]
106
LockFileFailure(base::Error),
107
#[error("failure with fdatasync: {0}")]
108
IoFdatasync(io::Error),
109
#[error("failure with flush: {0}")]
110
IoFlush(io::Error),
111
#[error("failure with fsync: {0}")]
112
IoFsync(io::Error),
113
#[error("failure to punch hole: {0}")]
114
IoPunchHole(io::Error),
115
#[error("checking host fs type: {0}")]
116
HostFsType(base::Error),
117
#[error("maximum disk nesting depth exceeded")]
118
MaxNestingDepthExceeded,
119
#[error("failed to open disk file \"{0}\": {1}")]
120
OpenFile(String, base::Error),
121
#[error("failure to punch hole: {0}")]
122
PunchHole(cros_async::AsyncError),
123
#[error("failure to punch hole for block device file: {0}")]
124
PunchHoleBlockDeviceFile(base::Error),
125
#[cfg(feature = "qcow")]
126
#[error("failure in qcow: {0}")]
127
QcowError(qcow::Error),
128
#[error("failed to read data: {0}")]
129
ReadingData(io::Error),
130
#[error("failed to read header: {0}")]
131
ReadingHeader(io::Error),
132
#[error("failed to read to memory: {0}")]
133
ReadToMem(cros_async::AsyncError),
134
#[error("failed to seek file: {0}")]
135
SeekingFile(io::Error),
136
#[error("failed to set file size: {0}")]
137
SettingFileSize(io::Error),
138
#[error("unknown disk type")]
139
UnknownType,
140
#[error("failed to write from memory: {0}")]
141
WriteFromMem(cros_async::AsyncError),
142
#[error("failed to write from vec: {0}")]
143
WriteFromVec(cros_async::AsyncError),
144
#[error("failed to write zeroes: {0}")]
145
WriteZeroes(io::Error),
146
#[error("failed to write data: {0}")]
147
WritingData(io::Error),
148
#[error("failed to convert to async: {0}")]
149
ToAsync(cros_async::AsyncError),
150
#[cfg(windows)]
151
#[error("failed to set disk file sparse: {0}")]
152
SetSparseFailure(io::Error),
153
#[error("failure with guest memory access: {0}")]
154
GuestMemory(cros_async::mem::Error),
155
#[error("unsupported operation")]
156
UnsupportedOperation,
157
}
158
159
pub type Result<T> = std::result::Result<T, Error>;
160
161
/// A trait for getting the length of a disk image or raw block device.
162
pub trait DiskGetLen {
163
/// Get the current length of the disk in bytes.
164
fn get_len(&self) -> io::Result<u64>;
165
}
166
167
impl DiskGetLen for File {
168
fn get_len(&self) -> io::Result<u64> {
169
let mut s = self;
170
let orig_seek = s.stream_position()?;
171
let end = s.seek(SeekFrom::End(0))?;
172
s.seek(SeekFrom::Start(orig_seek))?;
173
Ok(end)
174
}
175
}
176
177
/// The prerequisites necessary to support a block device.
178
pub trait DiskFile:
179
FileSetLen + DiskGetLen + FileReadWriteAtVolatile + ToAsyncDisk + Send + AsRawDescriptors + Debug
180
{
181
/// Creates a new DiskFile instance that shares the same underlying disk file image. IO
182
/// operations to a DiskFile should affect all DiskFile instances with the same underlying disk
183
/// file image.
184
///
185
/// `try_clone()` returns [`io::ErrorKind::Unsupported`] Error if a DiskFile does not support
186
/// creating an instance with the same underlying disk file image.
187
fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
188
Err(io::Error::new(
189
io::ErrorKind::Unsupported,
190
"unsupported operation",
191
))
192
}
193
}
194
195
/// A `DiskFile` that can be converted for asychronous access.
196
pub trait ToAsyncDisk: AsRawDescriptors + DiskGetLen + Send {
197
/// Convert a boxed self in to a box-wrapped implementaiton of AsyncDisk.
198
/// Used to convert a standard disk image to an async disk image. This conversion and the
199
/// inverse are needed so that the `Send` DiskImage can be given to the block thread where it is
200
/// converted to a non-`Send` AsyncDisk. The AsyncDisk can then be converted back and returned
201
/// to the main device thread if the block device is destroyed or reset.
202
fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>;
203
}
204
205
impl ToAsyncDisk for File {
206
fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>> {
207
Ok(Box::new(SingleFileDisk::new(*self, ex)?))
208
}
209
}
210
211
/// The variants of image files on the host that can be used as virtual disks.
212
#[derive(Debug, PartialEq, Eq)]
213
pub enum ImageType {
214
Raw,
215
Qcow2,
216
CompositeDisk,
217
AndroidSparse,
218
Zstd,
219
}
220
221
/// Detect the type of an image file by checking for a valid header of the supported formats.
222
pub fn detect_image_type(file: &File, overlapped_mode: bool) -> Result<ImageType> {
223
let mut f = file;
224
let disk_size = f.get_len().map_err(Error::SeekingFile)?;
225
let orig_seek = f.stream_position().map_err(Error::SeekingFile)?;
226
227
info!("disk size {}", disk_size);
228
229
// Try to read the disk in a nicely-aligned block size unless the whole file is smaller.
230
const MAGIC_BLOCK_SIZE: usize = 4096;
231
#[repr(align(4096))]
232
struct BlockAlignedBuffer {
233
data: [u8; MAGIC_BLOCK_SIZE],
234
}
235
let mut magic = BlockAlignedBuffer {
236
data: [0u8; MAGIC_BLOCK_SIZE],
237
};
238
let magic_read_len = if disk_size > MAGIC_BLOCK_SIZE as u64 {
239
MAGIC_BLOCK_SIZE
240
} else {
241
// This cast is safe since we know disk_size is less than MAGIC_BLOCK_SIZE (4096) and
242
// therefore is representable in usize.
243
disk_size as usize
244
};
245
246
read_from_disk(f, 0, &mut magic.data[0..magic_read_len], overlapped_mode)?;
247
f.seek(SeekFrom::Start(orig_seek))
248
.map_err(Error::SeekingFile)?;
249
250
#[cfg(feature = "composite-disk")]
251
if let Some(cdisk_magic) = magic.data.get(0..CDISK_MAGIC.len()) {
252
if cdisk_magic == CDISK_MAGIC.as_bytes() {
253
return Ok(ImageType::CompositeDisk);
254
}
255
}
256
257
#[allow(unused_variables)] // magic4 is only used with the qcow/android-sparse/zstd features.
258
if let Some(magic4) = magic
259
.data
260
.get(0..4)
261
.and_then(|v| <&[u8] as std::convert::TryInto<[u8; 4]>>::try_into(v).ok())
262
{
263
#[cfg(feature = "qcow")]
264
if magic4 == QCOW_MAGIC.to_be_bytes() {
265
return Ok(ImageType::Qcow2);
266
}
267
#[cfg(feature = "android-sparse")]
268
if magic4 == SPARSE_HEADER_MAGIC.to_le_bytes() {
269
return Ok(ImageType::AndroidSparse);
270
}
271
#[cfg(feature = "zstd")]
272
if u32::from_le_bytes(magic4) == ZSTD_FRAME_MAGIC
273
|| (u32::from_le_bytes(magic4) >= ZSTD_SKIPPABLE_MAGIC_LOW
274
&& u32::from_le_bytes(magic4) <= ZSTD_SKIPPABLE_MAGIC_HIGH)
275
{
276
return Ok(ImageType::Zstd);
277
}
278
}
279
280
Ok(ImageType::Raw)
281
}
282
283
impl DiskFile for File {
284
fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> {
285
Ok(Box::new(self.try_clone()?))
286
}
287
}
288
289
pub struct DiskFileParams {
290
pub path: PathBuf,
291
pub is_read_only: bool,
292
// Whether to call `base::set_sparse_file` on the file. Currently only affects Windows and is
293
// irrelevant for read only files.
294
pub is_sparse_file: bool,
295
// Whether to open the file in overlapped mode. Only affects Windows.
296
pub is_overlapped: bool,
297
// Whether to disable OS page caches / buffering.
298
pub is_direct: bool,
299
// Whether to lock the file.
300
pub lock: bool,
301
// The nesting depth of the file. Used to avoid infinite recursion. Users outside the disk
302
// crate should set this to zero.
303
pub depth: u32,
304
}
305
306
/// Inspect the image file type and create an appropriate disk file to match it.
307
pub fn open_disk_file(params: DiskFileParams) -> Result<Box<dyn DiskFile>> {
308
if params.depth > MAX_NESTING_DEPTH {
309
return Err(Error::MaxNestingDepthExceeded);
310
}
311
312
let raw_image = sys::open_raw_disk_image(&params)?;
313
let image_type = detect_image_type(&raw_image, params.is_overlapped)?;
314
Ok(match image_type {
315
ImageType::Raw => {
316
sys::apply_raw_disk_file_options(&raw_image, params.is_sparse_file)?;
317
Box::new(raw_image) as Box<dyn DiskFile>
318
}
319
#[cfg(feature = "qcow")]
320
ImageType::Qcow2 => Box::new(QcowFile::from(raw_image, params).map_err(Error::QcowError)?)
321
as Box<dyn DiskFile>,
322
#[cfg(feature = "composite-disk")]
323
ImageType::CompositeDisk => {
324
// Valid composite disk header present
325
Box::new(
326
CompositeDiskFile::from_file(raw_image, params)
327
.map_err(Error::CreateCompositeDisk)?,
328
) as Box<dyn DiskFile>
329
}
330
#[cfg(feature = "android-sparse")]
331
ImageType::AndroidSparse => {
332
Box::new(AndroidSparse::from_file(raw_image).map_err(Error::CreateAndroidSparseDisk)?)
333
as Box<dyn DiskFile>
334
}
335
#[cfg(feature = "zstd")]
336
ImageType::Zstd => Box::new(ZstdDisk::from_file(raw_image).map_err(Error::CreateZstdDisk)?)
337
as Box<dyn DiskFile>,
338
#[allow(unreachable_patterns)]
339
_ => return Err(Error::UnknownType),
340
})
341
}
342
343
/// An asynchronously accessible disk.
344
#[async_trait(?Send)]
345
pub trait AsyncDisk: DiskGetLen + FileSetLen + FileAllocate {
346
/// Flush intermediary buffers and/or dirty state to file. fsync not required.
347
async fn flush(&self) -> Result<()>;
348
349
/// Asynchronously fsyncs any completed operations to the disk.
350
async fn fsync(&self) -> Result<()>;
351
352
/// Asynchronously fdatasyncs any completed operations to the disk.
353
/// Note that an implementation may simply call fsync for fdatasync.
354
async fn fdatasync(&self) -> Result<()>;
355
356
/// Reads from the file at 'file_offset' into memory `mem` at `mem_offsets`.
357
/// `mem_offsets` is similar to an iovec except relative to the start of `mem`.
358
async fn read_to_mem<'a>(
359
&'a self,
360
file_offset: u64,
361
mem: Arc<dyn BackingMemory + Send + Sync>,
362
mem_offsets: cros_async::MemRegionIter<'a>,
363
) -> Result<usize>;
364
365
/// Writes to the file at 'file_offset' from memory `mem` at `mem_offsets`.
366
async fn write_from_mem<'a>(
367
&'a self,
368
file_offset: u64,
369
mem: Arc<dyn BackingMemory + Send + Sync>,
370
mem_offsets: cros_async::MemRegionIter<'a>,
371
) -> Result<usize>;
372
373
/// Replaces a range of bytes with a hole.
374
async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()>;
375
376
/// Writes up to `length` bytes of zeroes to the stream, returning how many bytes were written.
377
async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>;
378
379
/// Reads from the file at 'file_offset' into `buf`.
380
///
381
/// Less efficient than `read_to_mem` because of extra copies and allocations.
382
async fn read_double_buffered(&self, file_offset: u64, buf: &mut [u8]) -> Result<usize> {
383
let backing_mem = Arc::new(cros_async::VecIoWrapper::from(vec![0u8; buf.len()]));
384
let region = cros_async::MemRegion {
385
offset: 0,
386
len: buf.len(),
387
};
388
let n = self
389
.read_to_mem(
390
file_offset,
391
backing_mem.clone(),
392
MemRegionIter::new(&[region]),
393
)
394
.await?;
395
backing_mem
396
.get_volatile_slice(region)
397
.expect("BUG: the VecIoWrapper shrank?")
398
.sub_slice(0, n)
399
.expect("BUG: read_to_mem return value too large?")
400
.copy_to(buf);
401
Ok(n)
402
}
403
404
/// Writes to the file at 'file_offset' from `buf`.
405
///
406
/// Less efficient than `write_from_mem` because of extra copies and allocations.
407
async fn write_double_buffered(&self, file_offset: u64, buf: &[u8]) -> Result<usize> {
408
let backing_mem = Arc::new(cros_async::VecIoWrapper::from(buf.to_vec()));
409
let region = cros_async::MemRegion {
410
offset: 0,
411
len: buf.len(),
412
};
413
self.write_from_mem(
414
file_offset,
415
backing_mem,
416
cros_async::MemRegionIter::new(&[region]),
417
)
418
.await
419
}
420
}
421
422
/// A disk backed by a single file that implements `AsyncDisk` for access.
423
pub struct SingleFileDisk {
424
inner: IoSource<File>,
425
// Whether the backed file is a block device since the punch-hole needs different operation.
426
#[cfg(any(target_os = "android", target_os = "linux"))]
427
is_block_device_file: bool,
428
}
429
430
impl DiskGetLen for SingleFileDisk {
431
fn get_len(&self) -> io::Result<u64> {
432
self.inner.as_source().get_len()
433
}
434
}
435
436
impl FileSetLen for SingleFileDisk {
437
fn set_len(&self, len: u64) -> io::Result<()> {
438
self.inner.as_source().set_len(len)
439
}
440
}
441
442
impl FileAllocate for SingleFileDisk {
443
fn allocate(&self, offset: u64, len: u64) -> io::Result<()> {
444
self.inner.as_source().allocate(offset, len)
445
}
446
}
447
448
#[async_trait(?Send)]
449
impl AsyncDisk for SingleFileDisk {
450
async fn flush(&self) -> Result<()> {
451
// Nothing to flush, all file mutations are immediately sent to the OS.
452
Ok(())
453
}
454
455
async fn fsync(&self) -> Result<()> {
456
self.inner.fsync().await.map_err(Error::Fsync)
457
}
458
459
async fn fdatasync(&self) -> Result<()> {
460
self.inner.fdatasync().await.map_err(Error::Fdatasync)
461
}
462
463
async fn read_to_mem<'a>(
464
&'a self,
465
file_offset: u64,
466
mem: Arc<dyn BackingMemory + Send + Sync>,
467
mem_offsets: cros_async::MemRegionIter<'a>,
468
) -> Result<usize> {
469
self.inner
470
.read_to_mem(Some(file_offset), mem, mem_offsets)
471
.await
472
.map_err(Error::ReadToMem)
473
}
474
475
async fn write_from_mem<'a>(
476
&'a self,
477
file_offset: u64,
478
mem: Arc<dyn BackingMemory + Send + Sync>,
479
mem_offsets: cros_async::MemRegionIter<'a>,
480
) -> Result<usize> {
481
self.inner
482
.write_from_mem(Some(file_offset), mem, mem_offsets)
483
.await
484
.map_err(Error::WriteFromMem)
485
}
486
487
async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()> {
488
#[cfg(any(target_os = "android", target_os = "linux"))]
489
if self.is_block_device_file {
490
return base::linux::discard_block(self.inner.as_source(), file_offset, length)
491
.map_err(Error::PunchHoleBlockDeviceFile);
492
}
493
self.inner
494
.punch_hole(file_offset, length)
495
.await
496
.map_err(Error::PunchHole)
497
}
498
499
async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()> {
500
if self
501
.inner
502
.write_zeroes_at(file_offset, length)
503
.await
504
.is_ok()
505
{
506
return Ok(());
507
}
508
509
// Fall back to filling zeros if more efficient write_zeroes_at doesn't work.
510
let buf_size = min(length, 0x10000);
511
let mut nwritten = 0;
512
while nwritten < length {
513
let remaining = length - nwritten;
514
let write_size = min(remaining, buf_size) as usize;
515
let buf = vec![0u8; write_size];
516
nwritten += self
517
.inner
518
.write_from_vec(Some(file_offset + nwritten), buf)
519
.await
520
.map(|(n, _)| n as u64)
521
.map_err(Error::WriteFromVec)?;
522
}
523
Ok(())
524
}
525
}
526
527