Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/crates/wizer/src/lib.rs
2458 views
1
//! Wizer: the WebAssembly pre-initializer!
2
//!
3
//! See the [`Wizer`] struct for details.
4
5
#![deny(missing_docs)]
6
#![cfg_attr(docsrs, feature(doc_cfg))]
7
8
mod info;
9
mod instrument;
10
mod parse;
11
mod rewrite;
12
mod snapshot;
13
14
#[cfg(feature = "wasmtime")]
15
mod wasmtime;
16
#[cfg(feature = "wasmtime")]
17
pub use wasmtime::*;
18
#[cfg(feature = "component-model")]
19
mod component;
20
#[cfg(feature = "component-model")]
21
pub use component::*;
22
23
pub use crate::info::ModuleContext;
24
pub use crate::snapshot::SnapshotVal;
25
use anyhow::Context;
26
use std::collections::{HashMap, HashSet};
27
28
const DEFAULT_KEEP_INIT_FUNC: bool = false;
29
30
/// Wizer: the WebAssembly pre-initializer!
31
///
32
/// Don't wait for your Wasm module to initialize itself, pre-initialize it!
33
/// Wizer instantiates your WebAssembly module, executes its initialization
34
/// function, and then serializes the instance's initialized state out into a
35
/// new WebAssembly module. Now you can use this new, pre-initialized
36
/// WebAssembly module to hit the ground running, without making your users wait
37
/// for that first-time set up code to complete.
38
///
39
/// ## Caveats
40
///
41
/// * The initialization function may not call any imported functions. Doing so
42
/// will trigger a trap and `wizer` will exit.
43
///
44
/// * The Wasm module may not import globals, tables, or memories.
45
///
46
/// * Reference types are not supported yet. This is tricky because it would
47
/// allow the Wasm module to mutate tables, and we would need to be able to
48
/// snapshot the new table state, but funcrefs and externrefs don't have
49
/// identity and aren't comparable in the Wasm spec, which makes snapshotting
50
/// difficult.
51
#[derive(Clone, Debug)]
52
#[cfg_attr(feature = "clap", derive(clap::Parser))]
53
pub struct Wizer {
54
/// The Wasm export name of the function that should be executed to
55
/// initialize the Wasm module.
56
#[cfg_attr(
57
feature = "clap",
58
arg(short = 'f', long, default_value = "wizer-initialize")
59
)]
60
init_func: String,
61
62
/// Any function renamings to perform.
63
///
64
/// A renaming specification `dst=src` renames a function export `src` to
65
/// `dst`, overwriting any previous `dst` export.
66
///
67
/// Multiple renamings can be specified. It is an error to specify more than
68
/// one source to rename to a destination name, or to specify more than one
69
/// renaming destination for one source.
70
///
71
/// This option can be used, for example, to replace a `_start` entry point
72
/// in an initialized module with an alternate entry point.
73
///
74
/// When module linking is enabled, these renames are only applied to the
75
/// outermost module.
76
#[cfg_attr(
77
feature = "clap",
78
arg(
79
short = 'r',
80
long = "rename-func",
81
alias = "func-rename",
82
value_name = "dst=src",
83
value_parser = parse_rename,
84
),
85
)]
86
func_renames: Vec<(String, String)>,
87
88
/// After initialization, should the Wasm module still export the
89
/// initialization function?
90
///
91
/// This is `false` by default, meaning that the initialization function is
92
/// no longer exported from the Wasm module.
93
#[cfg_attr(
94
feature = "clap",
95
arg(long, require_equals = true, value_name = "true|false")
96
)]
97
keep_init_func: Option<Option<bool>>,
98
}
99
100
#[cfg(feature = "clap")]
101
fn parse_rename(s: &str) -> anyhow::Result<(String, String)> {
102
let parts: Vec<&str> = s.splitn(2, '=').collect();
103
if parts.len() != 2 {
104
anyhow::bail!("must contain exactly one equals character ('=')");
105
}
106
Ok((parts[0].into(), parts[1].into()))
107
}
108
109
#[derive(Default)]
110
struct FuncRenames {
111
/// For a given export name that we encounter in the original module, a map
112
/// to a new name, if any, to emit in the output module.
113
rename_src_to_dst: HashMap<String, String>,
114
/// A set of export names that we ignore in the original module (because
115
/// they are overwritten by renamings).
116
rename_dsts: HashSet<String>,
117
}
118
119
impl FuncRenames {
120
fn parse(renames: &[(String, String)]) -> anyhow::Result<FuncRenames> {
121
let mut ret = FuncRenames {
122
rename_src_to_dst: HashMap::new(),
123
rename_dsts: HashSet::new(),
124
};
125
if renames.is_empty() {
126
return Ok(ret);
127
}
128
129
for (dst, src) in renames {
130
if ret.rename_dsts.contains(dst) {
131
anyhow::bail!("Duplicated function rename dst {dst}");
132
}
133
if ret.rename_src_to_dst.contains_key(src) {
134
anyhow::bail!("Duplicated function rename src {src}");
135
}
136
ret.rename_dsts.insert(dst.clone());
137
ret.rename_src_to_dst.insert(src.clone(), dst.clone());
138
}
139
140
Ok(ret)
141
}
142
}
143
144
impl Wizer {
145
/// Construct a new `Wizer` builder.
146
pub fn new() -> Self {
147
Wizer {
148
init_func: "wizer-initialize".to_string(),
149
func_renames: vec![],
150
keep_init_func: None,
151
}
152
}
153
154
/// The export name of the initializer function.
155
///
156
/// Defaults to `"wizer-initialize"`.
157
pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
158
self.init_func = init_func.into();
159
self
160
}
161
162
/// Returns the initialization function that will be run for wizer.
163
pub fn get_init_func(&self) -> &str {
164
&self.init_func
165
}
166
167
/// Add a function rename to perform.
168
pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
169
self.func_renames
170
.push((new_name.to_string(), old_name.to_string()));
171
self
172
}
173
174
/// After initialization, should the Wasm module still export the
175
/// initialization function?
176
///
177
/// This is `false` by default, meaning that the initialization function is
178
/// no longer exported from the Wasm module.
179
pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
180
self.keep_init_func = Some(Some(keep));
181
self
182
}
183
184
/// First half of [`Self::run`] which instruments the provided `wasm` and
185
/// produces a new wasm module which should be run by a runtime.
186
///
187
/// After the returned wasm is executed the context returned here and the
188
/// state of the instance should be passed to [`Self::snapshot`].
189
pub fn instrument<'a>(&self, wasm: &'a [u8]) -> anyhow::Result<(ModuleContext<'a>, Vec<u8>)> {
190
// Make sure we're given valid Wasm from the get go.
191
self.wasm_validate(&wasm)?;
192
193
let mut cx = parse::parse(wasm)?;
194
195
// When wizening core modules directly some imports aren't supported,
196
// so check for those here.
197
for import in cx.imports() {
198
match import.ty {
199
wasmparser::TypeRef::Global(_) => {
200
anyhow::bail!("imported globals are not supported")
201
}
202
wasmparser::TypeRef::Table(_) => {
203
anyhow::bail!("imported tables are not supported")
204
}
205
wasmparser::TypeRef::Memory(_) => {
206
anyhow::bail!("imported memories are not supported")
207
}
208
wasmparser::TypeRef::Func(_) => {}
209
wasmparser::TypeRef::FuncExact(_) => {}
210
wasmparser::TypeRef::Tag(_) => {}
211
}
212
}
213
214
let instrumented_wasm = instrument::instrument(&mut cx);
215
self.debug_assert_valid_wasm(&instrumented_wasm);
216
217
Ok((cx, instrumented_wasm))
218
}
219
220
/// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
221
/// by [`Self::instrument`] and the state of the `instance` after it has
222
/// possibly executed its initialization function.
223
///
224
/// This returns a new WebAssembly binary which has all state
225
/// pre-initialized.
226
pub async fn snapshot(
227
&self,
228
mut cx: ModuleContext<'_>,
229
instance: &mut impl InstanceState,
230
) -> anyhow::Result<Vec<u8>> {
231
// Parse rename spec.
232
let renames = FuncRenames::parse(&self.func_renames)?;
233
234
let snapshot = snapshot::snapshot(&cx, instance).await;
235
let rewritten_wasm = self.rewrite(&mut cx, &snapshot, &renames);
236
237
self.debug_assert_valid_wasm(&rewritten_wasm);
238
239
Ok(rewritten_wasm)
240
}
241
242
fn debug_assert_valid_wasm(&self, wasm: &[u8]) {
243
if !cfg!(debug_assertions) {
244
return;
245
}
246
if let Err(error) = self.wasm_validate(&wasm) {
247
#[cfg(feature = "wasmprinter")]
248
let wat = wasmprinter::print_bytes(&wasm)
249
.unwrap_or_else(|e| format!("Disassembling to WAT failed: {}", e));
250
#[cfg(not(feature = "wasmprinter"))]
251
let wat = "`wasmprinter` cargo feature is not enabled".to_string();
252
panic!("instrumented Wasm is not valid: {error:?}\n\nWAT:\n{wat}");
253
}
254
}
255
256
fn wasm_validate(&self, wasm: &[u8]) -> anyhow::Result<()> {
257
log::debug!("Validating input Wasm");
258
259
wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
260
.validate_all(wasm)
261
.context("wasm validation failed")?;
262
263
for payload in wasmparser::Parser::new(0).parse_all(wasm) {
264
match payload? {
265
wasmparser::Payload::CodeSectionEntry(code) => {
266
let mut ops = code.get_operators_reader()?;
267
while !ops.eof() {
268
match ops.read()? {
269
// Table mutations aren't allowed as wizer has no
270
// way to record a snapshot of a table at this time.
271
// The only table mutations allowed are those from
272
// active element segments which can be
273
// deterministically replayed, so disallow all other
274
// forms of mutating a table.
275
//
276
// Ideally Wizer could take a snapshot of a table
277
// post-instantiation and then ensure that after
278
// running initialization the table didn't get
279
// mutated, allowing these instructions, but that's
280
// also not possible at this time.
281
wasmparser::Operator::TableCopy { .. } => {
282
anyhow::bail!("unsupported `table.copy` instruction")
283
}
284
wasmparser::Operator::TableInit { .. } => {
285
anyhow::bail!("unsupported `table.init` instruction")
286
}
287
wasmparser::Operator::TableSet { .. } => {
288
anyhow::bail!("unsupported `table.set` instruction")
289
}
290
wasmparser::Operator::TableGrow { .. } => {
291
anyhow::bail!("unsupported `table.grow` instruction")
292
}
293
wasmparser::Operator::TableFill { .. } => {
294
anyhow::bail!("unsupported `table.fill` instruction")
295
}
296
297
// Wizer has no way of dynamically determining which
298
// element or data segments were dropped during
299
// execution so instead disallow these instructions
300
// entirely. Like above it'd be nice to allow them
301
// but just forbid their execution during the
302
// initialization function, but that can't be done
303
// easily at this time.
304
wasmparser::Operator::ElemDrop { .. } => {
305
anyhow::bail!("unsupported `elem.drop` instruction")
306
}
307
wasmparser::Operator::DataDrop { .. } => {
308
anyhow::bail!("unsupported `data.drop` instruction")
309
}
310
311
// Wizer can't snapshot GC references, so disallow
312
// any mutation of GC references. This prevents, for
313
// example, reading something from a table and then
314
// mutating it.
315
wasmparser::Operator::StructSet { .. } => {
316
anyhow::bail!("unsupported `struct.set` instruction")
317
}
318
wasmparser::Operator::ArraySet { .. } => {
319
anyhow::bail!("unsupported `array.set` instruction")
320
}
321
wasmparser::Operator::ArrayFill { .. } => {
322
anyhow::bail!("unsupported `array.fill` instruction")
323
}
324
wasmparser::Operator::ArrayCopy { .. } => {
325
anyhow::bail!("unsupported `array.copy` instruction")
326
}
327
wasmparser::Operator::ArrayInitData { .. } => {
328
anyhow::bail!("unsupported `array.init_data` instruction")
329
}
330
wasmparser::Operator::ArrayInitElem { .. } => {
331
anyhow::bail!("unsupported `array.init_elem` instruction")
332
}
333
334
_ => continue,
335
}
336
}
337
}
338
wasmparser::Payload::GlobalSection(globals) => {
339
for g in globals {
340
let g = g?.ty;
341
if !g.mutable {
342
continue;
343
}
344
match g.content_type {
345
wasmparser::ValType::I32
346
| wasmparser::ValType::I64
347
| wasmparser::ValType::F32
348
| wasmparser::ValType::F64
349
| wasmparser::ValType::V128 => {}
350
wasmparser::ValType::Ref(_) => {
351
anyhow::bail!(
352
"unsupported mutable global containing a reference type"
353
)
354
}
355
}
356
}
357
}
358
_ => {}
359
}
360
}
361
362
Ok(())
363
}
364
365
fn get_keep_init_func(&self) -> bool {
366
match self.keep_init_func {
367
Some(keep) => keep.unwrap_or(true),
368
None => DEFAULT_KEEP_INIT_FUNC,
369
}
370
}
371
}
372
373
/// Abstract ability to load state from a WebAssembly instance after it's been
374
/// instantiated and some exports have run.
375
pub trait InstanceState {
376
/// Loads the global specified by `name`, returning a `SnapshotVal`.
377
///
378
/// # Panics
379
///
380
/// This function panics if `name` isn't an exported global or if the type
381
/// of the global doesn't fit in `SnapshotVal`.
382
fn global_get(&mut self, name: &str) -> impl Future<Output = SnapshotVal> + Send;
383
384
/// Loads the contents of the memory specified by `name`, returning the
385
/// entier contents as a `Vec<u8>`.
386
///
387
/// # Panics
388
///
389
/// This function panics if `name` isn't an exported memory.
390
fn memory_contents(
391
&mut self,
392
name: &str,
393
contents: impl FnOnce(&[u8]) + Send,
394
) -> impl Future<Output = ()> + Send;
395
}
396
397