Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
google
GitHub Repository: google/crosvm
Path: blob/main/devices/src/virtio/pmem.rs
5394 views
1
// Copyright 2019 The ChromiumOS Authors
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file.
4
5
use std::collections::BTreeMap;
6
use std::fs::File;
7
use std::io;
8
use std::mem::size_of;
9
use std::time::Duration;
10
11
use anyhow::anyhow;
12
use anyhow::Context;
13
use base::error;
14
use base::AsRawDescriptor;
15
use base::Error as SysError;
16
use base::Event;
17
use base::RawDescriptor;
18
use base::Result as SysResult;
19
use base::Timer;
20
use base::Tube;
21
use base::TubeError;
22
use base::WorkerThread;
23
use cros_async::select2;
24
use cros_async::select3;
25
use cros_async::AsyncError;
26
use cros_async::EventAsync;
27
use cros_async::Executor;
28
use cros_async::TimerAsync;
29
use data_model::Le32;
30
use data_model::Le64;
31
use futures::pin_mut;
32
use remain::sorted;
33
use snapshot::AnySnapshot;
34
use thiserror::Error;
35
use vm_control::MemSlot;
36
use vm_control::VmMemoryMappingRequest;
37
use vm_control::VmMemoryMappingResponse;
38
use vm_memory::GuestAddress;
39
use vm_memory::GuestMemory;
40
use zerocopy::FromBytes;
41
use zerocopy::Immutable;
42
use zerocopy::IntoBytes;
43
use zerocopy::KnownLayout;
44
45
use super::async_utils;
46
use super::copy_config;
47
use super::DescriptorChain;
48
use super::DeviceType;
49
use super::Interrupt;
50
use super::Queue;
51
use super::VirtioDevice;
52
53
const QUEUE_SIZE: u16 = 256;
54
const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
55
56
/* Feature bits */
57
const VIRTIO_PMEM_F_DISCARD: u32 = 63;
58
59
const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0;
60
const VIRTIO_PMEM_REQ_TYPE_DISCARD: u32 = u32::MAX;
61
const VIRTIO_PMEM_RESP_TYPE_OK: u32 = 0;
62
const VIRTIO_PMEM_RESP_TYPE_EIO: u32 = 1;
63
64
#[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
65
#[repr(C)]
66
struct virtio_pmem_config {
67
start_address: Le64,
68
size: Le64,
69
}
70
71
#[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
72
#[repr(C)]
73
struct virtio_pmem_resp {
74
status_code: Le32,
75
}
76
77
#[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
78
#[repr(C)]
79
struct virtio_pmem_req {
80
type_: Le32,
81
}
82
83
#[derive(Copy, Clone, Debug, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
84
#[repr(C)]
85
struct virtio_pmem_range_req {
86
type_: Le32,
87
padding_: Le32,
88
start_address: Le64,
89
size: Le64,
90
}
91
92
#[sorted]
93
#[derive(Error, Debug)]
94
enum Error {
95
/// Failed to get value from pageout timer.
96
#[error("failed to get value from pageout timer: {0}")]
97
PageoutTimer(AsyncError),
98
/// Failed to read from virtqueue.
99
#[error("failed to read from virtqueue: {0}")]
100
ReadQueue(io::Error),
101
/// Failed to receive tube response.
102
#[error("failed to receive tube response: {0}")]
103
ReceiveResponse(TubeError),
104
/// Failed to send tube request.
105
#[error("failed to send tube request: {0}")]
106
SendingRequest(TubeError),
107
/// Failed to write to virtqueue.
108
#[error("failed to write to virtqueue: {0}")]
109
WriteQueue(io::Error),
110
}
111
112
type Result<T> = ::std::result::Result<T, Error>;
113
114
async fn pageout(
115
ex: &Executor,
116
swap_interval: Duration,
117
pmem_device_tube: &Tube,
118
mapping_arena_slot: u32,
119
mapping_size: usize,
120
) -> Result<()> {
121
let timer = Timer::new().expect("Failed to create a timer");
122
let mut pageout_timer =
123
TimerAsync::new(timer, ex).expect("Failed to create an async pageout timer");
124
pageout_timer
125
.reset_repeating(swap_interval)
126
.expect("Failed to reset pageout timer");
127
128
loop {
129
pageout_timer.wait().await.map_err(Error::PageoutTimer)?;
130
let request = VmMemoryMappingRequest::MadvisePageout {
131
slot: mapping_arena_slot,
132
offset: 0,
133
size: mapping_size,
134
};
135
136
pmem_device_tube
137
.send(&request)
138
.map_err(Error::SendingRequest)?;
139
match pmem_device_tube
140
.recv::<VmMemoryMappingResponse>()
141
.map_err(Error::ReceiveResponse)?
142
{
143
VmMemoryMappingResponse::Ok => {}
144
VmMemoryMappingResponse::Err(e) => {
145
error!("failed to page out the memory mapping: {}", e);
146
}
147
};
148
}
149
}
150
151
fn execute_request(
152
request_type: u32,
153
start_address: u64,
154
size: u64,
155
pmem_device_tube: &Tube,
156
mapping_arena_slot: u32,
157
mapping_size: usize,
158
) -> u32 {
159
match request_type {
160
VIRTIO_PMEM_REQ_TYPE_FLUSH => {
161
let request = VmMemoryMappingRequest::MsyncArena {
162
slot: mapping_arena_slot,
163
offset: 0, // The pmem backing file is always at offset 0 in the arena.
164
size: mapping_size,
165
};
166
167
if let Err(e) = pmem_device_tube.send(&request) {
168
error!("failed to send request: {}", e);
169
return VIRTIO_PMEM_RESP_TYPE_EIO;
170
}
171
172
match pmem_device_tube.recv() {
173
Ok(response) => match response {
174
VmMemoryMappingResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK,
175
VmMemoryMappingResponse::Err(e) => {
176
error!("failed flushing disk image: {}", e);
177
VIRTIO_PMEM_RESP_TYPE_EIO
178
}
179
},
180
Err(e) => {
181
error!("failed to receive data: {}", e);
182
VIRTIO_PMEM_RESP_TYPE_EIO
183
}
184
}
185
}
186
187
VIRTIO_PMEM_REQ_TYPE_DISCARD => {
188
let request = VmMemoryMappingRequest::MadviseRemove {
189
slot: mapping_arena_slot,
190
offset: usize::try_from(start_address).unwrap(),
191
size: usize::try_from(size).unwrap(),
192
};
193
194
if let Err(e) = pmem_device_tube.send(&request) {
195
error!("failed to send request: {}", e);
196
return VIRTIO_PMEM_RESP_TYPE_EIO;
197
}
198
199
match pmem_device_tube.recv() {
200
Ok(response) => match response {
201
VmMemoryMappingResponse::Ok => VIRTIO_PMEM_RESP_TYPE_OK,
202
VmMemoryMappingResponse::Err(e) => {
203
error!("failed to discard memory range: {}", e);
204
VIRTIO_PMEM_RESP_TYPE_EIO
205
}
206
},
207
Err(e) => {
208
error!("failed to receive data: {}", e);
209
VIRTIO_PMEM_RESP_TYPE_EIO
210
}
211
}
212
}
213
214
_ => {
215
error!("unknown request type: {}", request_type);
216
VIRTIO_PMEM_RESP_TYPE_EIO
217
}
218
}
219
}
220
221
fn handle_request(
222
avail_desc: &mut DescriptorChain,
223
pmem_device_tube: &Tube,
224
mapping_arena_slot: u32,
225
mapping_size: usize,
226
) -> Result<usize> {
227
let (request_type, start_address, size) =
228
if avail_desc.reader.available_bytes() == size_of::<virtio_pmem_req>() {
229
let request = avail_desc
230
.reader
231
.read_obj::<virtio_pmem_req>()
232
.map_err(Error::ReadQueue)?;
233
(request.type_.to_native(), 0, 0)
234
} else {
235
let request = avail_desc
236
.reader
237
.read_obj::<virtio_pmem_range_req>()
238
.map_err(Error::ReadQueue)?;
239
(
240
request.type_.to_native(),
241
request.start_address.to_native(),
242
request.size.to_native(),
243
)
244
};
245
let status_code = execute_request(
246
request_type,
247
start_address,
248
size,
249
pmem_device_tube,
250
mapping_arena_slot,
251
mapping_size,
252
);
253
254
let response = virtio_pmem_resp {
255
status_code: status_code.into(),
256
};
257
258
avail_desc
259
.writer
260
.write_obj(response)
261
.map_err(Error::WriteQueue)?;
262
263
Ok(avail_desc.writer.bytes_written())
264
}
265
266
async fn handle_queue(
267
queue: &mut Queue,
268
mut queue_event: EventAsync,
269
pmem_device_tube: &Tube,
270
mapping_arena_slot: u32,
271
mapping_size: usize,
272
) {
273
loop {
274
let mut avail_desc = match queue.next_async(&mut queue_event).await {
275
Err(e) => {
276
error!("Failed to read descriptor {}", e);
277
return;
278
}
279
Ok(d) => d,
280
};
281
282
let written = match handle_request(
283
&mut avail_desc,
284
pmem_device_tube,
285
mapping_arena_slot,
286
mapping_size,
287
) {
288
Ok(n) => n,
289
Err(e) => {
290
error!("pmem: failed to handle request: {}", e);
291
0
292
}
293
};
294
queue.add_used_with_bytes_written(avail_desc, written as u32);
295
queue.trigger_interrupt();
296
}
297
}
298
299
fn run_worker(
300
queue: &mut Queue,
301
pmem_device_tube: &Tube,
302
kill_evt: Event,
303
mapping_arena_slot: u32,
304
mapping_size: usize,
305
swap_interval: Option<Duration>,
306
) {
307
let ex = Executor::new().unwrap();
308
309
let queue_evt = queue
310
.event()
311
.try_clone()
312
.expect("failed to clone queue event");
313
let queue_evt = EventAsync::new(queue_evt, &ex).expect("failed to set up the queue event");
314
315
// Process requests from the virtio queue.
316
let queue_fut = handle_queue(
317
queue,
318
queue_evt,
319
pmem_device_tube,
320
mapping_arena_slot,
321
mapping_size,
322
);
323
pin_mut!(queue_fut);
324
325
// Exit if the kill event is triggered.
326
let kill = async_utils::await_and_exit(&ex, kill_evt);
327
pin_mut!(kill);
328
329
let interval = swap_interval.unwrap_or(Duration::ZERO);
330
if interval.is_zero() {
331
if let Err(e) = ex.run_until(select2(queue_fut, kill)) {
332
error!("error happened in executor: {}", e);
333
}
334
} else {
335
let pageout_fut = pageout(
336
&ex,
337
interval,
338
pmem_device_tube,
339
mapping_arena_slot,
340
mapping_size,
341
);
342
pin_mut!(pageout_fut);
343
if let Err(e) = ex.run_until(select3(queue_fut, kill, pageout_fut)) {
344
error!("error happened in executor: {}", e);
345
}
346
}
347
}
348
349
/// Specifies how memory slot is initialized.
350
pub enum MemSlotConfig {
351
/// The memory region has already been mapped to the guest.
352
MemSlot {
353
/// index of the guest-mapped memory regions.
354
idx: MemSlot,
355
},
356
/// The memory region that is not initialized yet and whose slot index will be provided via
357
/// `Tube` later. e.g. pmem-ext2 device, where fs construction will be done in the main
358
/// process.
359
LazyInit { tube: Tube },
360
}
361
362
pub struct Pmem {
363
worker_thread: Option<WorkerThread<(Queue, Tube)>>,
364
features: u64,
365
disk_image: Option<File>,
366
mapping_address: GuestAddress,
367
mem_slot: MemSlotConfig,
368
mapping_size: u64,
369
pmem_device_tube: Option<Tube>,
370
swap_interval: Option<Duration>,
371
}
372
373
#[derive(serde::Serialize, serde::Deserialize)]
374
struct PmemSnapshot {
375
mapping_address: GuestAddress,
376
mapping_size: u64,
377
}
378
379
/// Configuration of a virtio-pmem device.
380
pub struct PmemConfig {
381
/// Disk image exposed to the guest.
382
/// If the memory region is not backed by a file, this should be `None`.
383
pub disk_image: Option<File>,
384
/// Guest physical address where the memory will be mapped.
385
pub mapping_address: GuestAddress,
386
pub mem_slot: MemSlotConfig,
387
/// The size of the mapped region.
388
pub mapping_size: u64,
389
/// A communication channel to the main process to send memory requests.
390
pub pmem_device_tube: Tube,
391
/// Interval for periodic swap out of memory mapping
392
pub swap_interval: Option<Duration>,
393
/// Whether the region is writeble or not.
394
pub mapping_writable: bool,
395
}
396
397
impl Pmem {
398
pub fn new(base_features: u64, cfg: PmemConfig) -> SysResult<Pmem> {
399
if cfg.mapping_size > usize::MAX as u64 {
400
return Err(SysError::new(libc::EOVERFLOW));
401
}
402
403
let mut avail_features = base_features;
404
if cfg.mapping_writable {
405
if let MemSlotConfig::LazyInit { .. } = cfg.mem_slot {
406
error!("pmem-ext2 must be a read-only device");
407
return Err(SysError::new(libc::EINVAL));
408
}
409
410
avail_features |= 1 << VIRTIO_PMEM_F_DISCARD;
411
}
412
413
Ok(Pmem {
414
worker_thread: None,
415
features: avail_features,
416
disk_image: cfg.disk_image,
417
mapping_address: cfg.mapping_address,
418
mem_slot: cfg.mem_slot,
419
mapping_size: cfg.mapping_size,
420
pmem_device_tube: Some(cfg.pmem_device_tube),
421
swap_interval: cfg.swap_interval,
422
})
423
}
424
}
425
426
impl VirtioDevice for Pmem {
427
fn keep_rds(&self) -> Vec<RawDescriptor> {
428
let mut keep_rds = Vec::new();
429
if let Some(disk_image) = &self.disk_image {
430
keep_rds.push(disk_image.as_raw_descriptor());
431
}
432
433
if let Some(ref pmem_device_tube) = self.pmem_device_tube {
434
keep_rds.push(pmem_device_tube.as_raw_descriptor());
435
}
436
437
if let MemSlotConfig::LazyInit { tube } = &self.mem_slot {
438
keep_rds.push(tube.as_raw_descriptor());
439
}
440
441
keep_rds
442
}
443
444
fn device_type(&self) -> DeviceType {
445
DeviceType::Pmem
446
}
447
448
fn queue_max_sizes(&self) -> &[u16] {
449
QUEUE_SIZES
450
}
451
452
fn features(&self) -> u64 {
453
self.features
454
}
455
456
fn read_config(&self, offset: u64, data: &mut [u8]) {
457
let config = virtio_pmem_config {
458
start_address: Le64::from(self.mapping_address.offset()),
459
size: Le64::from(self.mapping_size),
460
};
461
copy_config(data, 0, config.as_bytes(), offset);
462
}
463
464
fn activate(
465
&mut self,
466
_memory: GuestMemory,
467
_interrupt: Interrupt,
468
mut queues: BTreeMap<usize, Queue>,
469
) -> anyhow::Result<()> {
470
if queues.len() != 1 {
471
return Err(anyhow!("expected 1 queue, got {}", queues.len()));
472
}
473
474
let mut queue = queues.remove(&0).unwrap();
475
476
// We checked that this fits in a usize in `Pmem::new`.
477
let mapping_size = self.mapping_size as usize;
478
479
let pmem_device_tube = self
480
.pmem_device_tube
481
.take()
482
.context("missing pmem device tube")?;
483
484
let swap_interval = self.swap_interval;
485
486
let mapping_arena_slot = match &self.mem_slot {
487
MemSlotConfig::MemSlot { idx } => *idx,
488
MemSlotConfig::LazyInit { tube } => tube
489
.recv::<u32>()
490
.context("failed to receive memory slot for ext2 pmem device")?,
491
};
492
493
self.worker_thread = Some(WorkerThread::start("v_pmem", move |kill_event| {
494
run_worker(
495
&mut queue,
496
&pmem_device_tube,
497
kill_event,
498
mapping_arena_slot,
499
mapping_size,
500
swap_interval,
501
);
502
(queue, pmem_device_tube)
503
}));
504
505
Ok(())
506
}
507
508
fn reset(&mut self) -> anyhow::Result<()> {
509
if let Some(worker_thread) = self.worker_thread.take() {
510
let (_queue, pmem_device_tube) = worker_thread.stop();
511
self.pmem_device_tube = Some(pmem_device_tube);
512
}
513
Ok(())
514
}
515
516
fn virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>> {
517
if let Some(worker_thread) = self.worker_thread.take() {
518
let (queue, pmem_device_tube) = worker_thread.stop();
519
self.pmem_device_tube = Some(pmem_device_tube);
520
return Ok(Some(BTreeMap::from([(0, queue)])));
521
}
522
Ok(None)
523
}
524
525
fn virtio_wake(
526
&mut self,
527
queues_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>,
528
) -> anyhow::Result<()> {
529
if let Some((mem, interrupt, queues)) = queues_state {
530
self.activate(mem, interrupt, queues)?;
531
}
532
Ok(())
533
}
534
535
fn virtio_snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
536
AnySnapshot::to_any(PmemSnapshot {
537
mapping_address: self.mapping_address,
538
mapping_size: self.mapping_size,
539
})
540
.context("failed to serialize pmem snapshot")
541
}
542
543
fn virtio_restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {
544
let snapshot: PmemSnapshot =
545
AnySnapshot::from_any(data).context("failed to deserialize pmem snapshot")?;
546
anyhow::ensure!(
547
snapshot.mapping_address == self.mapping_address
548
&& snapshot.mapping_size == self.mapping_size,
549
"pmem snapshot doesn't match config: expected {:?}, got {:?}",
550
(self.mapping_address, self.mapping_size),
551
(snapshot.mapping_address, snapshot.mapping_size),
552
);
553
Ok(())
554
}
555
}
556
557