Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/crates/wizer/src/lib.rs
3050 views
1
//! Wizer: the WebAssembly pre-initializer!
2
//!
3
//! See the [`Wizer`] struct for details.
4
5
#![deny(missing_docs)]
6
#![cfg_attr(docsrs, feature(doc_cfg))]
7
8
mod info;
9
mod instrument;
10
mod parse;
11
mod rewrite;
12
mod snapshot;
13
14
#[cfg(feature = "wasmtime")]
15
mod wasmtime;
16
#[cfg(feature = "wasmtime")]
17
pub use wasmtime::*;
18
#[cfg(feature = "component-model")]
19
mod component;
20
#[cfg(feature = "component-model")]
21
pub use component::*;
22
#[cfg(not(feature = "rayon"))]
23
mod rayoff;
24
25
pub use crate::info::ModuleContext;
26
pub use crate::snapshot::SnapshotVal;
27
use ::wasmtime::{Result, bail, error::Context as _};
28
use std::collections::{HashMap, HashSet};
29
pub use wasmparser::ValType;
30
31
const DEFAULT_KEEP_INIT_FUNC: bool = false;
32
33
/// Wizer: the WebAssembly pre-initializer!
34
///
35
/// Don't wait for your Wasm module to initialize itself, pre-initialize it!
36
/// Wizer instantiates your WebAssembly module, executes its initialization
37
/// function, and then serializes the instance's initialized state out into a
38
/// new WebAssembly module. Now you can use this new, pre-initialized
39
/// WebAssembly module to hit the ground running, without making your users wait
40
/// for that first-time set up code to complete.
41
///
42
/// ## Caveats
43
///
44
/// * The initialization function may not call any imported functions. Doing so
45
/// will trigger a trap and `wizer` will exit.
46
///
47
/// * The Wasm module may not import globals, tables, or memories.
48
///
49
/// * Reference types are not supported yet. This is tricky because it would
50
/// allow the Wasm module to mutate tables, and we would need to be able to
51
/// snapshot the new table state, but funcrefs and externrefs don't have
52
/// identity and aren't comparable in the Wasm spec, which makes snapshotting
53
/// difficult.
54
#[derive(Clone, Debug)]
55
#[cfg_attr(feature = "clap", derive(clap::Parser))]
56
pub struct Wizer {
57
/// The Wasm export name of the function that should be executed to
58
/// initialize the Wasm module.
59
#[cfg_attr(
60
feature = "clap",
61
arg(short = 'f', long, default_value = "wizer-initialize")
62
)]
63
init_func: String,
64
65
/// Any function renamings to perform.
66
///
67
/// A renaming specification `dst=src` renames a function export `src` to
68
/// `dst`, overwriting any previous `dst` export.
69
///
70
/// Multiple renamings can be specified. It is an error to specify more than
71
/// one source to rename to a destination name, or to specify more than one
72
/// renaming destination for one source.
73
///
74
/// This option can be used, for example, to replace a `_start` entry point
75
/// in an initialized module with an alternate entry point.
76
///
77
/// When module linking is enabled, these renames are only applied to the
78
/// outermost module.
79
#[cfg_attr(
80
feature = "clap",
81
arg(
82
short = 'r',
83
long = "rename-func",
84
alias = "func-rename",
85
value_name = "dst=src",
86
value_parser = parse_rename,
87
),
88
)]
89
func_renames: Vec<(String, String)>,
90
91
/// After initialization, should the Wasm module still export the
92
/// initialization function?
93
///
94
/// This is `false` by default, meaning that the initialization function is
95
/// no longer exported from the Wasm module.
96
#[cfg_attr(
97
feature = "clap",
98
arg(long, require_equals = true, value_name = "true|false")
99
)]
100
keep_init_func: Option<Option<bool>>,
101
}
102
103
#[cfg(feature = "clap")]
104
fn parse_rename(s: &str) -> Result<(String, String)> {
105
let parts: Vec<&str> = s.splitn(2, '=').collect();
106
if parts.len() != 2 {
107
bail!("must contain exactly one equals character ('=')");
108
}
109
Ok((parts[0].into(), parts[1].into()))
110
}
111
112
#[derive(Default)]
113
struct FuncRenames {
114
/// For a given export name that we encounter in the original module, a map
115
/// to a new name, if any, to emit in the output module.
116
rename_src_to_dst: HashMap<String, String>,
117
/// A set of export names that we ignore in the original module (because
118
/// they are overwritten by renamings).
119
rename_dsts: HashSet<String>,
120
}
121
122
impl FuncRenames {
123
fn parse(renames: &[(String, String)]) -> Result<FuncRenames> {
124
let mut ret = FuncRenames {
125
rename_src_to_dst: HashMap::new(),
126
rename_dsts: HashSet::new(),
127
};
128
if renames.is_empty() {
129
return Ok(ret);
130
}
131
132
for (dst, src) in renames {
133
if ret.rename_dsts.contains(dst) {
134
bail!("Duplicated function rename dst {dst}");
135
}
136
if ret.rename_src_to_dst.contains_key(src) {
137
bail!("Duplicated function rename src {src}");
138
}
139
ret.rename_dsts.insert(dst.clone());
140
ret.rename_src_to_dst.insert(src.clone(), dst.clone());
141
}
142
143
Ok(ret)
144
}
145
}
146
147
impl Wizer {
148
/// Construct a new `Wizer` builder.
149
pub fn new() -> Self {
150
Wizer {
151
init_func: "wizer-initialize".to_string(),
152
func_renames: vec![],
153
keep_init_func: None,
154
}
155
}
156
157
/// The export name of the initializer function.
158
///
159
/// Defaults to `"wizer-initialize"`.
160
pub fn init_func(&mut self, init_func: impl Into<String>) -> &mut Self {
161
self.init_func = init_func.into();
162
self
163
}
164
165
/// Returns the initialization function that will be run for wizer.
166
pub fn get_init_func(&self) -> &str {
167
&self.init_func
168
}
169
170
/// Add a function rename to perform.
171
pub fn func_rename(&mut self, new_name: &str, old_name: &str) -> &mut Self {
172
self.func_renames
173
.push((new_name.to_string(), old_name.to_string()));
174
self
175
}
176
177
/// After initialization, should the Wasm module still export the
178
/// initialization function?
179
///
180
/// This is `false` by default, meaning that the initialization function is
181
/// no longer exported from the Wasm module.
182
pub fn keep_init_func(&mut self, keep: bool) -> &mut Self {
183
self.keep_init_func = Some(Some(keep));
184
self
185
}
186
187
/// First half of [`Self::run`] which instruments the provided `wasm` and
188
/// produces a new wasm module which should be run by a runtime.
189
///
190
/// After the returned wasm is executed the context returned here and the
191
/// state of the instance should be passed to [`Self::snapshot`].
192
pub fn instrument<'a>(&self, wasm: &'a [u8]) -> Result<(ModuleContext<'a>, Vec<u8>)> {
193
// Make sure we're given valid Wasm from the get go.
194
self.wasm_validate(&wasm)?;
195
196
let mut cx = parse::parse(wasm)?;
197
198
// When wizening core modules directly some imports aren't supported,
199
// so check for those here.
200
for import in cx.imports() {
201
match import.ty {
202
wasmparser::TypeRef::Global(_) => {
203
bail!("imported globals are not supported")
204
}
205
wasmparser::TypeRef::Table(_) => {
206
bail!("imported tables are not supported")
207
}
208
wasmparser::TypeRef::Memory(_) => {
209
bail!("imported memories are not supported")
210
}
211
wasmparser::TypeRef::Func(_) => {}
212
wasmparser::TypeRef::FuncExact(_) => {}
213
wasmparser::TypeRef::Tag(_) => {}
214
}
215
}
216
217
let instrumented_wasm = instrument::instrument(&mut cx);
218
self.debug_assert_valid_wasm(&instrumented_wasm);
219
220
Ok((cx, instrumented_wasm))
221
}
222
223
/// Second half of [`Self::run`] which takes the [`ModuleContext`] returned
224
/// by [`Self::instrument`] and the state of the `instance` after it has
225
/// possibly executed its initialization function.
226
///
227
/// This returns a new WebAssembly binary which has all state
228
/// pre-initialized.
229
pub async fn snapshot(
230
&self,
231
mut cx: ModuleContext<'_>,
232
instance: &mut impl InstanceState,
233
) -> Result<Vec<u8>> {
234
// Parse rename spec.
235
let renames = FuncRenames::parse(&self.func_renames)?;
236
237
let snapshot = snapshot::snapshot(&cx, instance).await;
238
let rewritten_wasm = self.rewrite(&mut cx, &snapshot, &renames);
239
240
self.debug_assert_valid_wasm(&rewritten_wasm);
241
242
Ok(rewritten_wasm)
243
}
244
245
fn debug_assert_valid_wasm(&self, wasm: &[u8]) {
246
if !cfg!(debug_assertions) {
247
return;
248
}
249
if let Err(error) = self.wasm_validate(&wasm) {
250
#[cfg(feature = "wasmprinter")]
251
let wat = wasmprinter::print_bytes(&wasm)
252
.unwrap_or_else(|e| format!("Disassembling to WAT failed: {}", e));
253
#[cfg(not(feature = "wasmprinter"))]
254
let wat = "`wasmprinter` cargo feature is not enabled".to_string();
255
panic!("instrumented Wasm is not valid: {error:?}\n\nWAT:\n{wat}");
256
}
257
}
258
259
fn wasm_validate(&self, wasm: &[u8]) -> Result<()> {
260
log::debug!("Validating input Wasm");
261
262
wasmparser::Validator::new_with_features(wasmparser::WasmFeatures::all())
263
.validate_all(wasm)
264
.context("wasm validation failed")?;
265
266
for payload in wasmparser::Parser::new(0).parse_all(wasm) {
267
match payload? {
268
wasmparser::Payload::CodeSectionEntry(code) => {
269
let mut ops = code.get_operators_reader()?;
270
while !ops.eof() {
271
match ops.read()? {
272
// Table mutations aren't allowed as wizer has no
273
// way to record a snapshot of a table at this time.
274
// The only table mutations allowed are those from
275
// active element segments which can be
276
// deterministically replayed, so disallow all other
277
// forms of mutating a table.
278
//
279
// Ideally Wizer could take a snapshot of a table
280
// post-instantiation and then ensure that after
281
// running initialization the table didn't get
282
// mutated, allowing these instructions, but that's
283
// also not possible at this time.
284
wasmparser::Operator::TableCopy { .. } => {
285
bail!("unsupported `table.copy` instruction")
286
}
287
wasmparser::Operator::TableInit { .. } => {
288
bail!("unsupported `table.init` instruction")
289
}
290
wasmparser::Operator::TableSet { .. } => {
291
bail!("unsupported `table.set` instruction")
292
}
293
wasmparser::Operator::TableGrow { .. } => {
294
bail!("unsupported `table.grow` instruction")
295
}
296
wasmparser::Operator::TableFill { .. } => {
297
bail!("unsupported `table.fill` instruction")
298
}
299
300
// Wizer has no way of dynamically determining which
301
// element or data segments were dropped during
302
// execution so instead disallow these instructions
303
// entirely. Like above it'd be nice to allow them
304
// but just forbid their execution during the
305
// initialization function, but that can't be done
306
// easily at this time.
307
wasmparser::Operator::ElemDrop { .. } => {
308
bail!("unsupported `elem.drop` instruction")
309
}
310
wasmparser::Operator::DataDrop { .. } => {
311
bail!("unsupported `data.drop` instruction")
312
}
313
314
// Wizer can't snapshot GC references, so disallow
315
// any mutation of GC references. This prevents, for
316
// example, reading something from a table and then
317
// mutating it.
318
wasmparser::Operator::StructSet { .. } => {
319
bail!("unsupported `struct.set` instruction")
320
}
321
wasmparser::Operator::ArraySet { .. } => {
322
bail!("unsupported `array.set` instruction")
323
}
324
wasmparser::Operator::ArrayFill { .. } => {
325
bail!("unsupported `array.fill` instruction")
326
}
327
wasmparser::Operator::ArrayCopy { .. } => {
328
bail!("unsupported `array.copy` instruction")
329
}
330
wasmparser::Operator::ArrayInitData { .. } => {
331
bail!("unsupported `array.init_data` instruction")
332
}
333
wasmparser::Operator::ArrayInitElem { .. } => {
334
bail!("unsupported `array.init_elem` instruction")
335
}
336
337
_ => continue,
338
}
339
}
340
}
341
wasmparser::Payload::GlobalSection(globals) => {
342
for g in globals {
343
let g = g?.ty;
344
if !g.mutable {
345
continue;
346
}
347
match g.content_type {
348
wasmparser::ValType::I32
349
| wasmparser::ValType::I64
350
| wasmparser::ValType::F32
351
| wasmparser::ValType::F64
352
| wasmparser::ValType::V128 => {}
353
wasmparser::ValType::Ref(_) => {
354
bail!("unsupported mutable global containing a reference type")
355
}
356
}
357
}
358
}
359
_ => {}
360
}
361
}
362
363
Ok(())
364
}
365
366
fn get_keep_init_func(&self) -> bool {
367
match self.keep_init_func {
368
Some(keep) => keep.unwrap_or(true),
369
None => DEFAULT_KEEP_INIT_FUNC,
370
}
371
}
372
}
373
374
/// Abstract ability to load state from a WebAssembly instance after it's been
375
/// instantiated and some exports have run.
376
pub trait InstanceState {
377
/// Loads the global specified by `name`, returning a `SnapshotVal`.
378
///
379
/// # Panics
380
///
381
/// This function panics if `name` isn't an exported global or if the type
382
/// of the global doesn't fit in `SnapshotVal`.
383
fn global_get(
384
&mut self,
385
name: &str,
386
type_hint: ValType,
387
) -> impl Future<Output = SnapshotVal> + Send;
388
389
/// Loads the contents of the memory specified by `name`, returning the
390
/// entier contents as a `Vec<u8>`.
391
///
392
/// # Panics
393
///
394
/// This function panics if `name` isn't an exported memory.
395
fn memory_contents(
396
&mut self,
397
name: &str,
398
contents: impl FnOnce(&[u8]) + Send,
399
) -> impl Future<Output = ()> + Send;
400
}
401
402