Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/crates/environ/src/compile/module_environ.rs
3050 views
1
use crate::error::{Result, bail};
2
use crate::module::{
3
FuncRefIndex, Initializer, MemoryInitialization, MemoryInitializer, Module, TableSegment,
4
TableSegmentElements,
5
};
6
use crate::prelude::*;
7
use crate::{
8
ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex,
9
EntityIndex, EntityType, FuncIndex, FuncKey, GlobalIndex, IndexType, InitMemory, MemoryIndex,
10
ModuleInternedTypeIndex, ModuleTypesBuilder, PrimaryMap, SizeOverflow, StaticMemoryInitializer,
11
StaticModuleIndex, TableIndex, TableInitialValue, Tag, TagIndex, Tunables, TypeConvert,
12
TypeIndex, WasmError, WasmHeapTopType, WasmHeapType, WasmResult, WasmValType,
13
WasmparserTypeConverter,
14
};
15
use cranelift_entity::SecondaryMap;
16
use cranelift_entity::packed_option::ReservedValue;
17
use std::borrow::Cow;
18
use std::collections::HashMap;
19
use std::mem;
20
use std::path::PathBuf;
21
use std::sync::Arc;
22
use wasmparser::{
23
CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,
24
FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,
25
Validator, ValidatorResources, types::Types,
26
};
27
28
/// Object containing the standalone environment information.
29
pub struct ModuleEnvironment<'a, 'data> {
30
/// The current module being translated
31
result: ModuleTranslation<'data>,
32
33
/// Intern'd types for this entire translation, shared by all modules.
34
types: &'a mut ModuleTypesBuilder,
35
36
// Various bits and pieces of configuration
37
validator: &'a mut Validator,
38
tunables: &'a Tunables,
39
}
40
41
/// The result of translating via `ModuleEnvironment`.
42
///
43
/// Function bodies are not yet translated, and data initializers have not yet
44
/// been copied out of the original buffer.
45
pub struct ModuleTranslation<'data> {
46
/// Module information.
47
pub module: Module,
48
49
/// The input wasm binary.
50
///
51
/// This can be useful, for example, when modules are parsed from a
52
/// component and the embedder wants access to the raw wasm modules
53
/// themselves.
54
pub wasm: &'data [u8],
55
56
/// References to the function bodies.
57
pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
58
59
/// For each imported function, the single statically-known function that
60
/// always satisfies that import, if any.
61
///
62
/// This is used to turn what would otherwise be indirect calls through the
63
/// imports table into direct calls, when possible.
64
///
65
/// When filled in, this only ever contains
66
/// `FuncKey::DefinedWasmFunction(..)`s and `FuncKey::Intrinsic(..)`s.
67
pub known_imported_functions: SecondaryMap<FuncIndex, Option<FuncKey>>,
68
69
/// A list of type signatures which are considered exported from this
70
/// module, or those that can possibly be called. This list is sorted, and
71
/// trampolines for each of these signatures are required.
72
pub exported_signatures: Vec<ModuleInternedTypeIndex>,
73
74
/// DWARF debug information, if enabled, parsed from the module.
75
pub debuginfo: DebugInfoData<'data>,
76
77
/// Set if debuginfo was found but it was not parsed due to `Tunables`
78
/// configuration.
79
pub has_unparsed_debuginfo: bool,
80
81
/// List of data segments found in this module which should be concatenated
82
/// together for the final compiled artifact.
83
///
84
/// These data segments, when concatenated, are indexed by the
85
/// `MemoryInitializer` type.
86
pub data: Vec<Cow<'data, [u8]>>,
87
88
/// The desired alignment of `data` in the final data section of the object
89
/// file that we'll emit.
90
///
91
/// Note that this is 1 by default but `MemoryInitialization::Static` might
92
/// switch this to a higher alignment to facilitate mmap-ing data from
93
/// an object file into a linear memory.
94
pub data_align: Option<u64>,
95
96
/// Total size of all data pushed onto `data` so far.
97
total_data: u32,
98
99
/// List of passive element segments found in this module which will get
100
/// concatenated for the final artifact.
101
pub passive_data: Vec<&'data [u8]>,
102
103
/// Total size of all passive data pushed into `passive_data` so far.
104
total_passive_data: u32,
105
106
/// When we're parsing the code section this will be incremented so we know
107
/// which function is currently being defined.
108
code_index: u32,
109
110
/// The type information of the current module made available at the end of the
111
/// validation process.
112
types: Option<Types>,
113
}
114
115
impl<'data> ModuleTranslation<'data> {
116
/// Create a new translation for the module with the given index.
117
pub fn new(module_index: StaticModuleIndex) -> Self {
118
Self {
119
module: Module::new(module_index),
120
wasm: &[],
121
function_body_inputs: PrimaryMap::default(),
122
known_imported_functions: SecondaryMap::default(),
123
exported_signatures: Vec::default(),
124
debuginfo: DebugInfoData::default(),
125
has_unparsed_debuginfo: false,
126
data: Vec::default(),
127
data_align: None,
128
total_data: 0,
129
passive_data: Vec::default(),
130
total_passive_data: 0,
131
code_index: 0,
132
types: None,
133
}
134
}
135
136
/// Returns a reference to the type information of the current module.
137
pub fn get_types(&self) -> &Types {
138
self.types
139
.as_ref()
140
.expect("module type information to be available")
141
}
142
143
/// Get this translation's module's index.
144
pub fn module_index(&self) -> StaticModuleIndex {
145
self.module.module_index
146
}
147
}
148
149
/// Contains function data: byte code and its offset in the module.
150
pub struct FunctionBodyData<'a> {
151
/// The body of the function, containing code and locals.
152
pub body: FunctionBody<'a>,
153
/// Validator for the function body
154
pub validator: FuncToValidate<ValidatorResources>,
155
}
156
157
#[derive(Debug, Default)]
158
#[expect(missing_docs, reason = "self-describing fields")]
159
pub struct DebugInfoData<'a> {
160
pub dwarf: Dwarf<'a>,
161
pub name_section: NameSection<'a>,
162
pub wasm_file: WasmFileInfo,
163
pub debug_loc: gimli::DebugLoc<Reader<'a>>,
164
pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,
165
pub debug_ranges: gimli::DebugRanges<Reader<'a>>,
166
pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,
167
pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,
168
pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,
169
}
170
171
#[expect(missing_docs, reason = "self-describing")]
172
pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;
173
174
type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
175
176
#[derive(Debug, Default)]
177
#[expect(missing_docs, reason = "self-describing fields")]
178
pub struct NameSection<'a> {
179
pub module_name: Option<&'a str>,
180
pub func_names: HashMap<FuncIndex, &'a str>,
181
pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
182
}
183
184
#[derive(Debug, Default)]
185
#[expect(missing_docs, reason = "self-describing fields")]
186
pub struct WasmFileInfo {
187
pub path: Option<PathBuf>,
188
pub code_section_offset: u64,
189
pub imported_func_count: u32,
190
pub funcs: Vec<FunctionMetadata>,
191
}
192
193
#[derive(Debug)]
194
#[expect(missing_docs, reason = "self-describing fields")]
195
pub struct FunctionMetadata {
196
pub params: Box<[WasmValType]>,
197
pub locals: Box<[(u32, WasmValType)]>,
198
}
199
200
impl<'a, 'data> ModuleEnvironment<'a, 'data> {
201
/// Allocates the environment data structures.
202
pub fn new(
203
tunables: &'a Tunables,
204
validator: &'a mut Validator,
205
types: &'a mut ModuleTypesBuilder,
206
module_index: StaticModuleIndex,
207
) -> Self {
208
Self {
209
result: ModuleTranslation::new(module_index),
210
types,
211
tunables,
212
validator,
213
}
214
}
215
216
/// Translate a wasm module using this environment.
217
///
218
/// This function will translate the `data` provided with `parser`,
219
/// validating everything along the way with this environment's validator.
220
///
221
/// The result of translation, [`ModuleTranslation`], contains everything
222
/// necessary to compile functions afterwards as well as learn type
223
/// information about the module at runtime.
224
pub fn translate(
225
mut self,
226
parser: Parser,
227
data: &'data [u8],
228
) -> Result<ModuleTranslation<'data>> {
229
self.result.wasm = data;
230
231
for payload in parser.parse_all(data) {
232
self.translate_payload(payload?)?;
233
}
234
235
Ok(self.result)
236
}
237
238
fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
239
match payload {
240
Payload::Version {
241
num,
242
encoding,
243
range,
244
} => {
245
self.validator.version(num, encoding, &range)?;
246
match encoding {
247
Encoding::Module => {}
248
Encoding::Component => {
249
bail!("expected a WebAssembly module but was given a WebAssembly component")
250
}
251
}
252
}
253
254
Payload::End(offset) => {
255
self.result.types = Some(self.validator.end(offset)?);
256
257
// With the `escaped_funcs` set of functions finished
258
// we can calculate the set of signatures that are exported as
259
// the set of exported functions' signatures.
260
self.result.exported_signatures = self
261
.result
262
.module
263
.functions
264
.iter()
265
.filter_map(|(_, func)| {
266
if func.is_escaping() {
267
Some(func.signature.unwrap_module_type_index())
268
} else {
269
None
270
}
271
})
272
.collect();
273
self.result.exported_signatures.sort_unstable();
274
self.result.exported_signatures.dedup();
275
}
276
277
Payload::TypeSection(types) => {
278
self.validator.type_section(&types)?;
279
280
let count = self.validator.types(0).unwrap().core_type_count_in_module();
281
log::trace!("interning {count} Wasm types");
282
283
let capacity = usize::try_from(count).unwrap();
284
self.result.module.types.reserve(capacity);
285
self.types.reserve_wasm_signatures(capacity);
286
287
// Iterate over each *rec group* -- not type -- defined in the
288
// types section. Rec groups are the unit of canonicalization
289
// and therefore the unit at which we need to process at a
290
// time. `wasmparser` has already done the hard work of
291
// de-duplicating and canonicalizing the rec groups within the
292
// module for us, we just need to translate them into our data
293
// structures. Note that, if the Wasm defines duplicate rec
294
// groups, we need copy the duplicates over (shallowly) as well,
295
// so that our types index space doesn't have holes.
296
let mut type_index = 0;
297
while type_index < count {
298
let validator_types = self.validator.types(0).unwrap();
299
300
// Get the rec group for the current type index, which is
301
// always the first type defined in a rec group.
302
log::trace!("looking up wasmparser type for index {type_index}");
303
let core_type_id = validator_types.core_type_at_in_module(type_index);
304
log::trace!(
305
" --> {core_type_id:?} = {:?}",
306
validator_types[core_type_id],
307
);
308
let rec_group_id = validator_types.rec_group_id_of(core_type_id);
309
debug_assert_eq!(
310
validator_types
311
.rec_group_elements(rec_group_id)
312
.position(|id| id == core_type_id),
313
Some(0)
314
);
315
316
// Intern the rec group and then fill in this module's types
317
// index space.
318
let interned = self.types.intern_rec_group(validator_types, rec_group_id)?;
319
let elems = self.types.rec_group_elements(interned);
320
let len = elems.len();
321
self.result.module.types.reserve(len);
322
for ty in elems {
323
self.result.module.types.push(ty.into());
324
}
325
326
// Advance `type_index` to the start of the next rec group.
327
type_index += u32::try_from(len).unwrap();
328
}
329
}
330
331
Payload::ImportSection(imports) => {
332
self.validator.import_section(&imports)?;
333
334
let cnt = usize::try_from(imports.count()).unwrap();
335
self.result.module.initializers.reserve(cnt);
336
337
for entry in imports.into_imports() {
338
let import = entry?;
339
let ty = match import.ty {
340
TypeRef::Func(index) => {
341
let index = TypeIndex::from_u32(index);
342
let interned_index = self.result.module.types[index];
343
self.result.module.num_imported_funcs += 1;
344
self.result.debuginfo.wasm_file.imported_func_count += 1;
345
EntityType::Function(interned_index)
346
}
347
TypeRef::Memory(ty) => {
348
self.result.module.num_imported_memories += 1;
349
EntityType::Memory(ty.into())
350
}
351
TypeRef::Global(ty) => {
352
self.result.module.num_imported_globals += 1;
353
EntityType::Global(self.convert_global_type(&ty)?)
354
}
355
TypeRef::Table(ty) => {
356
self.result.module.num_imported_tables += 1;
357
EntityType::Table(self.convert_table_type(&ty)?)
358
}
359
TypeRef::Tag(ty) => {
360
let index = TypeIndex::from_u32(ty.func_type_idx);
361
let signature = self.result.module.types[index];
362
let exception = self.types.define_exception_type_for_tag(
363
signature.unwrap_module_type_index(),
364
);
365
let tag = Tag {
366
signature,
367
exception: EngineOrModuleTypeIndex::Module(exception),
368
};
369
self.result.module.num_imported_tags += 1;
370
EntityType::Tag(tag)
371
}
372
TypeRef::FuncExact(_) => {
373
bail!("custom-descriptors proposal not implemented yet");
374
}
375
};
376
self.declare_import(import.module, import.name, ty);
377
}
378
}
379
380
Payload::FunctionSection(functions) => {
381
self.validator.function_section(&functions)?;
382
383
let cnt = usize::try_from(functions.count()).unwrap();
384
self.result.module.functions.reserve_exact(cnt);
385
386
for entry in functions {
387
let sigindex = entry?;
388
let ty = TypeIndex::from_u32(sigindex);
389
let interned_index = self.result.module.types[ty];
390
self.result.module.push_function(interned_index);
391
}
392
}
393
394
Payload::TableSection(tables) => {
395
self.validator.table_section(&tables)?;
396
let cnt = usize::try_from(tables.count()).unwrap();
397
self.result.module.tables.reserve_exact(cnt);
398
399
for entry in tables {
400
let wasmparser::Table { ty, init } = entry?;
401
let table = self.convert_table_type(&ty)?;
402
self.result.module.needs_gc_heap |= table.ref_type.is_vmgcref_type();
403
self.result.module.tables.push(table);
404
let init = match init {
405
wasmparser::TableInit::RefNull => TableInitialValue::Null {
406
precomputed: Vec::new(),
407
},
408
wasmparser::TableInit::Expr(expr) => {
409
let (init, escaped) = ConstExpr::from_wasmparser(self, expr)?;
410
for f in escaped {
411
self.flag_func_escaped(f);
412
}
413
TableInitialValue::Expr(init)
414
}
415
};
416
self.result
417
.module
418
.table_initialization
419
.initial_values
420
.push(init);
421
}
422
}
423
424
Payload::MemorySection(memories) => {
425
self.validator.memory_section(&memories)?;
426
427
let cnt = usize::try_from(memories.count()).unwrap();
428
self.result.module.memories.reserve_exact(cnt);
429
430
for entry in memories {
431
let memory = entry?;
432
self.result.module.memories.push(memory.into());
433
}
434
}
435
436
Payload::TagSection(tags) => {
437
self.validator.tag_section(&tags)?;
438
439
for entry in tags {
440
let sigindex = entry?.func_type_idx;
441
let ty = TypeIndex::from_u32(sigindex);
442
let interned_index = self.result.module.types[ty];
443
let exception = self
444
.types
445
.define_exception_type_for_tag(interned_index.unwrap_module_type_index());
446
self.result.module.push_tag(interned_index, exception);
447
}
448
}
449
450
Payload::GlobalSection(globals) => {
451
self.validator.global_section(&globals)?;
452
453
let cnt = usize::try_from(globals.count()).unwrap();
454
self.result.module.globals.reserve_exact(cnt);
455
456
for entry in globals {
457
let wasmparser::Global { ty, init_expr } = entry?;
458
let (initializer, escaped) = ConstExpr::from_wasmparser(self, init_expr)?;
459
for f in escaped {
460
self.flag_func_escaped(f);
461
}
462
let ty = self.convert_global_type(&ty)?;
463
self.result.module.globals.push(ty);
464
self.result.module.global_initializers.push(initializer);
465
}
466
}
467
468
Payload::ExportSection(exports) => {
469
self.validator.export_section(&exports)?;
470
471
let cnt = usize::try_from(exports.count()).unwrap();
472
self.result.module.exports.reserve(cnt);
473
474
for entry in exports {
475
let wasmparser::Export { name, kind, index } = entry?;
476
let entity = match kind {
477
ExternalKind::Func | ExternalKind::FuncExact => {
478
let index = FuncIndex::from_u32(index);
479
self.flag_func_escaped(index);
480
EntityIndex::Function(index)
481
}
482
ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),
483
ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),
484
ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),
485
ExternalKind::Tag => EntityIndex::Tag(TagIndex::from_u32(index)),
486
};
487
self.result
488
.module
489
.exports
490
.insert(String::from(name), entity);
491
}
492
}
493
494
Payload::StartSection { func, range } => {
495
self.validator.start_section(func, &range)?;
496
497
let func_index = FuncIndex::from_u32(func);
498
self.flag_func_escaped(func_index);
499
debug_assert!(self.result.module.start_func.is_none());
500
self.result.module.start_func = Some(func_index);
501
}
502
503
Payload::ElementSection(elements) => {
504
self.validator.element_section(&elements)?;
505
506
for (index, entry) in elements.into_iter().enumerate() {
507
let wasmparser::Element {
508
kind,
509
items,
510
range: _,
511
} = entry?;
512
513
// Build up a list of `FuncIndex` corresponding to all the
514
// entries listed in this segment. Note that it's not
515
// possible to create anything other than a `ref.null
516
// extern` for externref segments, so those just get
517
// translated to the reserved value of `FuncIndex`.
518
let elements = match items {
519
ElementItems::Functions(funcs) => {
520
let mut elems =
521
Vec::with_capacity(usize::try_from(funcs.count()).unwrap());
522
for func in funcs {
523
let func = FuncIndex::from_u32(func?);
524
self.flag_func_escaped(func);
525
elems.push(func);
526
}
527
TableSegmentElements::Functions(elems.into())
528
}
529
ElementItems::Expressions(_ty, items) => {
530
let mut exprs =
531
Vec::with_capacity(usize::try_from(items.count()).unwrap());
532
for expr in items {
533
let (expr, escaped) = ConstExpr::from_wasmparser(self, expr?)?;
534
exprs.push(expr);
535
for func in escaped {
536
self.flag_func_escaped(func);
537
}
538
}
539
TableSegmentElements::Expressions(exprs.into())
540
}
541
};
542
543
match kind {
544
ElementKind::Active {
545
table_index,
546
offset_expr,
547
} => {
548
let table_index = TableIndex::from_u32(table_index.unwrap_or(0));
549
let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
550
debug_assert!(escaped.is_empty());
551
552
self.result
553
.module
554
.table_initialization
555
.segments
556
.push(TableSegment {
557
table_index,
558
offset,
559
elements,
560
});
561
}
562
563
ElementKind::Passive => {
564
let elem_index = ElemIndex::from_u32(index as u32);
565
let index = self.result.module.passive_elements.len();
566
self.result.module.passive_elements.push(elements);
567
self.result
568
.module
569
.passive_elements_map
570
.insert(elem_index, index);
571
}
572
573
ElementKind::Declared => {}
574
}
575
}
576
}
577
578
Payload::CodeSectionStart { count, range, .. } => {
579
self.validator.code_section_start(&range)?;
580
let cnt = usize::try_from(count).unwrap();
581
self.result.function_body_inputs.reserve_exact(cnt);
582
self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;
583
}
584
585
Payload::CodeSectionEntry(body) => {
586
let validator = self.validator.code_section_entry(&body)?;
587
let func_index =
588
self.result.code_index + self.result.module.num_imported_funcs as u32;
589
let func_index = FuncIndex::from_u32(func_index);
590
591
if self.tunables.debug_native {
592
let sig_index = self.result.module.functions[func_index]
593
.signature
594
.unwrap_module_type_index();
595
let sig = self.types[sig_index].unwrap_func();
596
let mut locals = Vec::new();
597
for pair in body.get_locals_reader()? {
598
let (cnt, ty) = pair?;
599
let ty = self.convert_valtype(ty)?;
600
locals.push((cnt, ty));
601
}
602
self.result
603
.debuginfo
604
.wasm_file
605
.funcs
606
.push(FunctionMetadata {
607
locals: locals.into_boxed_slice(),
608
params: sig.params().into(),
609
});
610
}
611
if self.tunables.debug_guest {
612
// All functions are potentially reachable and
613
// callable by the guest debugger, so they must
614
// all be flagged as escaping.
615
self.flag_func_escaped(func_index);
616
}
617
self.result
618
.function_body_inputs
619
.push(FunctionBodyData { validator, body });
620
self.result.code_index += 1;
621
}
622
623
Payload::DataSection(data) => {
624
self.validator.data_section(&data)?;
625
626
let initializers = match &mut self.result.module.memory_initialization {
627
MemoryInitialization::Segmented(i) => i,
628
_ => unreachable!(),
629
};
630
631
let cnt = usize::try_from(data.count()).unwrap();
632
initializers.reserve_exact(cnt);
633
self.result.data.reserve_exact(cnt);
634
635
for (index, entry) in data.into_iter().enumerate() {
636
let wasmparser::Data {
637
kind,
638
data,
639
range: _,
640
} = entry?;
641
let mk_range = |total: &mut u32| -> Result<_, WasmError> {
642
let range = u32::try_from(data.len())
643
.ok()
644
.and_then(|size| {
645
let start = *total;
646
let end = start.checked_add(size)?;
647
Some(start..end)
648
})
649
.ok_or_else(|| {
650
WasmError::Unsupported(format!(
651
"more than 4 gigabytes of data in wasm module",
652
))
653
})?;
654
*total += range.end - range.start;
655
Ok(range)
656
};
657
match kind {
658
DataKind::Active {
659
memory_index,
660
offset_expr,
661
} => {
662
let range = mk_range(&mut self.result.total_data)?;
663
let memory_index = MemoryIndex::from_u32(memory_index);
664
let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
665
debug_assert!(escaped.is_empty());
666
667
let initializers = match &mut self.result.module.memory_initialization {
668
MemoryInitialization::Segmented(i) => i,
669
_ => unreachable!(),
670
};
671
initializers.push(MemoryInitializer {
672
memory_index,
673
offset,
674
data: range,
675
});
676
self.result.data.push(data.into());
677
}
678
DataKind::Passive => {
679
let data_index = DataIndex::from_u32(index as u32);
680
let range = mk_range(&mut self.result.total_passive_data)?;
681
self.result.passive_data.push(data);
682
self.result
683
.module
684
.passive_data_map
685
.insert(data_index, range);
686
}
687
}
688
}
689
}
690
691
Payload::DataCountSection { count, range } => {
692
self.validator.data_count_section(count, &range)?;
693
694
// Note: the count passed in here is the *total* segment count
695
// There is no way to reserve for just the passive segments as
696
// they are discovered when iterating the data section entries
697
// Given that the total segment count might be much larger than
698
// the passive count, do not reserve anything here.
699
}
700
701
Payload::CustomSection(s)
702
if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>
703
{
704
bail!(
705
"\
706
Support for interface types has temporarily been removed from `wasmtime`.
707
708
For more information about this temporary change you can read on the issue online:
709
710
https://github.com/bytecodealliance/wasmtime/issues/1271
711
712
and for re-adding support for interface types you can see this issue:
713
714
https://github.com/bytecodealliance/wasmtime/issues/677
715
"
716
)
717
}
718
719
Payload::CustomSection(s) => {
720
self.register_custom_section(&s);
721
}
722
723
// It's expected that validation will probably reject other
724
// payloads such as `UnknownSection` or those related to the
725
// component model. If, however, something gets past validation then
726
// that's a bug in Wasmtime as we forgot to implement something.
727
other => {
728
self.validator.payload(&other)?;
729
panic!("unimplemented section in wasm file {other:?}");
730
}
731
}
732
Ok(())
733
}
734
735
fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {
736
match section.as_known() {
737
KnownCustom::Name(name) => {
738
let result = self.name_section(name);
739
if let Err(e) = result {
740
log::warn!("failed to parse name section {e:?}");
741
}
742
}
743
_ => {
744
let name = section.name().trim_end_matches(".dwo");
745
if name.starts_with(".debug_") {
746
self.dwarf_section(name, section);
747
}
748
}
749
}
750
}
751
752
fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {
753
if !self.tunables.debug_native && !self.tunables.parse_wasm_debuginfo {
754
self.result.has_unparsed_debuginfo = true;
755
return;
756
}
757
let info = &mut self.result.debuginfo;
758
let dwarf = &mut info.dwarf;
759
let endian = gimli::LittleEndian;
760
let data = section.data();
761
let slice = gimli::EndianSlice::new(data, endian);
762
763
match name {
764
// `gimli::Dwarf` fields.
765
".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),
766
".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),
767
".debug_info" => {
768
dwarf.debug_info = gimli::DebugInfo::new(data, endian);
769
}
770
".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),
771
".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),
772
".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),
773
".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),
774
".debug_str_sup" => {
775
let mut dwarf_sup: Dwarf<'data> = Default::default();
776
dwarf_sup.debug_str = gimli::DebugStr::from(slice);
777
dwarf.sup = Some(Arc::new(dwarf_sup));
778
}
779
".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),
780
781
// Additional fields.
782
".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),
783
".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),
784
".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),
785
".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),
786
787
// DWARF package fields
788
".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),
789
".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),
790
791
// We don't use these at the moment.
792
".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,
793
other => {
794
log::warn!("unknown debug section `{other}`");
795
return;
796
}
797
}
798
799
dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);
800
dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);
801
}
802
803
/// Declares a new import with the `module` and `field` names, importing the
804
/// `ty` specified.
805
///
806
/// Note that this method is somewhat tricky due to the implementation of
807
/// the module linking proposal. In the module linking proposal two-level
808
/// imports are recast as single-level imports of instances. That recasting
809
/// happens here by recording an import of an instance for the first time
810
/// we see a two-level import.
811
///
812
/// When the module linking proposal is disabled, however, disregard this
813
/// logic and instead work directly with two-level imports since no
814
/// instances are defined.
815
fn declare_import(&mut self, module: &'data str, field: &'data str, ty: EntityType) {
816
let index = self.push_type(ty);
817
self.result.module.initializers.push(Initializer::Import {
818
name: module.to_owned(),
819
field: field.to_owned(),
820
index,
821
});
822
}
823
824
fn push_type(&mut self, ty: EntityType) -> EntityIndex {
825
match ty {
826
EntityType::Function(ty) => EntityIndex::Function({
827
let func_index = self
828
.result
829
.module
830
.push_function(ty.unwrap_module_type_index());
831
// Imported functions can escape; in fact, they've already done
832
// so to get here.
833
self.flag_func_escaped(func_index);
834
func_index
835
}),
836
EntityType::Table(ty) => EntityIndex::Table(self.result.module.tables.push(ty)),
837
EntityType::Memory(ty) => EntityIndex::Memory(self.result.module.memories.push(ty)),
838
EntityType::Global(ty) => EntityIndex::Global(self.result.module.globals.push(ty)),
839
EntityType::Tag(ty) => EntityIndex::Tag(self.result.module.tags.push(ty)),
840
}
841
}
842
843
fn flag_func_escaped(&mut self, func: FuncIndex) {
844
let ty = &mut self.result.module.functions[func];
845
// If this was already assigned a funcref index no need to re-assign it.
846
if ty.is_escaping() {
847
return;
848
}
849
let index = self.result.module.num_escaped_funcs as u32;
850
ty.func_ref = FuncRefIndex::from_u32(index);
851
self.result.module.num_escaped_funcs += 1;
852
}
853
854
/// Parses the Name section of the wasm module.
855
fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {
856
for subsection in names {
857
match subsection? {
858
wasmparser::Name::Function(names) => {
859
for name in names {
860
let Naming { index, name } = name?;
861
// Skip this naming if it's naming a function that
862
// doesn't actually exist.
863
if (index as usize) >= self.result.module.functions.len() {
864
continue;
865
}
866
867
// Store the name unconditionally, regardless of
868
// whether we're parsing debuginfo, since function
869
// names are almost always present in the
870
// final compilation artifact.
871
let index = FuncIndex::from_u32(index);
872
self.result
873
.debuginfo
874
.name_section
875
.func_names
876
.insert(index, name);
877
}
878
}
879
wasmparser::Name::Module { name, .. } => {
880
self.result.module.name = Some(name.to_string());
881
if self.tunables.debug_native {
882
self.result.debuginfo.name_section.module_name = Some(name);
883
}
884
}
885
wasmparser::Name::Local(reader) => {
886
if !self.tunables.debug_native {
887
continue;
888
}
889
for f in reader {
890
let f = f?;
891
// Skip this naming if it's naming a function that
892
// doesn't actually exist.
893
if (f.index as usize) >= self.result.module.functions.len() {
894
continue;
895
}
896
for name in f.names {
897
let Naming { index, name } = name?;
898
899
self.result
900
.debuginfo
901
.name_section
902
.locals_names
903
.entry(FuncIndex::from_u32(f.index))
904
.or_insert(HashMap::new())
905
.insert(index, name);
906
}
907
}
908
}
909
wasmparser::Name::Label(_)
910
| wasmparser::Name::Type(_)
911
| wasmparser::Name::Table(_)
912
| wasmparser::Name::Global(_)
913
| wasmparser::Name::Memory(_)
914
| wasmparser::Name::Element(_)
915
| wasmparser::Name::Data(_)
916
| wasmparser::Name::Tag(_)
917
| wasmparser::Name::Field(_)
918
| wasmparser::Name::Unknown { .. } => {}
919
}
920
}
921
Ok(())
922
}
923
}
924
925
impl TypeConvert for ModuleEnvironment<'_, '_> {
926
fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {
927
WasmparserTypeConverter::new(&self.types, |idx| {
928
self.result.module.types[idx].unwrap_module_type_index()
929
})
930
.lookup_heap_type(index)
931
}
932
933
fn lookup_type_index(&self, index: wasmparser::UnpackedIndex) -> EngineOrModuleTypeIndex {
934
WasmparserTypeConverter::new(&self.types, |idx| {
935
self.result.module.types[idx].unwrap_module_type_index()
936
})
937
.lookup_type_index(index)
938
}
939
}
940
941
impl ModuleTranslation<'_> {
942
/// Attempts to convert segmented memory initialization into static
943
/// initialization for the module that this translation represents.
944
///
945
/// If this module's memory initialization is not compatible with paged
946
/// initialization then this won't change anything. Otherwise if it is
947
/// compatible then the `memory_initialization` field will be updated.
948
///
949
/// Takes a `page_size` argument in order to ensure that all
950
/// initialization is page-aligned for mmap-ability, and
951
/// `max_image_size_always_allowed` to control how we decide
952
/// whether to use static init.
953
///
954
/// We will try to avoid generating very sparse images, which are
955
/// possible if e.g. a module has an initializer at offset 0 and a
956
/// very high offset (say, 1 GiB). To avoid this, we use a dual
957
/// condition: we always allow images less than
958
/// `max_image_size_always_allowed`, and the embedder of Wasmtime
959
/// can set this if desired to ensure that static init should
960
/// always be done if the size of the module or its heaps is
961
/// otherwise bounded by the system. We also allow images with
962
/// static init data bigger than that, but only if it is "dense",
963
/// defined as having at least half (50%) of its pages with some
964
/// data.
965
///
966
/// We could do something slightly better by building a dense part
967
/// and keeping a sparse list of outlier/leftover segments (see
968
/// issue #3820). This would also allow mostly-static init of
969
/// modules that have some dynamically-placed data segments. But,
970
/// for now, this is sufficient to allow a system that "knows what
971
/// it's doing" to always get static init.
972
pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
973
// This method only attempts to transform a `Segmented` memory init
974
// into a `Static` one, no other state.
975
if !self.module.memory_initialization.is_segmented() {
976
return;
977
}
978
979
// First a dry run of memory initialization is performed. This
980
// collects information about the extent of memory initialized for each
981
// memory as well as the size of all data segments being copied in.
982
struct Memory {
983
data_size: u64,
984
min_addr: u64,
985
max_addr: u64,
986
// The `usize` here is a pointer into `self.data` which is the list
987
// of data segments corresponding to what was found in the original
988
// wasm module.
989
segments: Vec<(usize, StaticMemoryInitializer)>,
990
}
991
let mut info = PrimaryMap::with_capacity(self.module.memories.len());
992
for _ in 0..self.module.memories.len() {
993
info.push(Memory {
994
data_size: 0,
995
min_addr: u64::MAX,
996
max_addr: 0,
997
segments: Vec::new(),
998
});
999
}
1000
1001
struct InitMemoryAtCompileTime<'a> {
1002
module: &'a Module,
1003
info: &'a mut PrimaryMap<MemoryIndex, Memory>,
1004
idx: usize,
1005
}
1006
impl InitMemory for InitMemoryAtCompileTime<'_> {
1007
fn memory_size_in_bytes(
1008
&mut self,
1009
memory_index: MemoryIndex,
1010
) -> Result<u64, SizeOverflow> {
1011
self.module.memories[memory_index].minimum_byte_size()
1012
}
1013
1014
fn eval_offset(&mut self, memory_index: MemoryIndex, expr: &ConstExpr) -> Option<u64> {
1015
match (expr.ops(), self.module.memories[memory_index].idx_type) {
1016
(&[ConstOp::I32Const(offset)], IndexType::I32) => {
1017
Some(offset.cast_unsigned().into())
1018
}
1019
(&[ConstOp::I64Const(offset)], IndexType::I64) => Some(offset.cast_unsigned()),
1020
_ => None,
1021
}
1022
}
1023
1024
fn write(&mut self, memory: MemoryIndex, init: &StaticMemoryInitializer) -> bool {
1025
// Currently `Static` only applies to locally-defined memories,
1026
// so if a data segment references an imported memory then
1027
// transitioning to a `Static` memory initializer is not
1028
// possible.
1029
if self.module.defined_memory_index(memory).is_none() {
1030
return false;
1031
};
1032
let info = &mut self.info[memory];
1033
let data_len = u64::from(init.data.end - init.data.start);
1034
if data_len > 0 {
1035
info.data_size += data_len;
1036
info.min_addr = info.min_addr.min(init.offset);
1037
info.max_addr = info.max_addr.max(init.offset + data_len);
1038
info.segments.push((self.idx, init.clone()));
1039
}
1040
self.idx += 1;
1041
true
1042
}
1043
}
1044
let ok = self
1045
.module
1046
.memory_initialization
1047
.init_memory(&mut InitMemoryAtCompileTime {
1048
idx: 0,
1049
module: &self.module,
1050
info: &mut info,
1051
});
1052
if !ok {
1053
return;
1054
}
1055
1056
// Validate that the memory information collected is indeed valid for
1057
// static memory initialization.
1058
for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {
1059
let image_size = info.max_addr - info.min_addr;
1060
1061
// Simplify things for now by bailing out entirely if any memory has
1062
// a page size smaller than the host's page size. This fixes a case
1063
// where currently initializers are created in host-page-size units
1064
// of length which means that a larger-than-the-entire-memory
1065
// initializer can be created. This can be handled technically but
1066
// would require some more changes to help fix the assert elsewhere
1067
// that this protects against.
1068
if self.module.memories[i].page_size() < page_size {
1069
return;
1070
}
1071
1072
// If the range of memory being initialized is less than twice the
1073
// total size of the data itself then it's assumed that static
1074
// initialization is ok. This means we'll at most double memory
1075
// consumption during the memory image creation process, which is
1076
// currently assumed to "probably be ok" but this will likely need
1077
// tweaks over time.
1078
if image_size < info.data_size.saturating_mul(2) {
1079
continue;
1080
}
1081
1082
// If the memory initialization image is larger than the size of all
1083
// data, then we still allow memory initialization if the image will
1084
// be of a relatively modest size, such as 1MB here.
1085
if image_size < max_image_size_always_allowed {
1086
continue;
1087
}
1088
1089
// At this point memory initialization is concluded to be too
1090
// expensive to do at compile time so it's entirely deferred to
1091
// happen at runtime.
1092
return;
1093
}
1094
1095
// Here's where we've now committed to changing to static memory. The
1096
// memory initialization image is built here from the page data and then
1097
// it's converted to a single initializer.
1098
let data = mem::replace(&mut self.data, Vec::new());
1099
let mut map = PrimaryMap::with_capacity(info.len());
1100
let mut module_data_size = 0u32;
1101
for (memory, info) in info.iter() {
1102
// Create the in-memory `image` which is the initialized contents of
1103
// this linear memory.
1104
let extent = if info.segments.len() > 0 {
1105
(info.max_addr - info.min_addr) as usize
1106
} else {
1107
0
1108
};
1109
let mut image = Vec::with_capacity(extent);
1110
for (idx, init) in info.segments.iter() {
1111
let data = &data[*idx];
1112
assert_eq!(data.len(), init.data.len());
1113
let offset = usize::try_from(init.offset - info.min_addr).unwrap();
1114
if image.len() < offset {
1115
image.resize(offset, 0u8);
1116
image.extend_from_slice(data);
1117
} else {
1118
image.splice(
1119
offset..(offset + data.len()).min(image.len()),
1120
data.iter().copied(),
1121
);
1122
}
1123
}
1124
assert_eq!(image.len(), extent);
1125
assert_eq!(image.capacity(), extent);
1126
let mut offset = if info.segments.len() > 0 {
1127
info.min_addr
1128
} else {
1129
0
1130
};
1131
1132
// Chop off trailing zeros from the image as memory is already
1133
// zero-initialized. Note that `i` is the position of a nonzero
1134
// entry here, so to not lose it we truncate to `i + 1`.
1135
if let Some(i) = image.iter().rposition(|i| *i != 0) {
1136
image.truncate(i + 1);
1137
}
1138
1139
// Also chop off leading zeros, if any.
1140
if let Some(i) = image.iter().position(|i| *i != 0) {
1141
offset += i as u64;
1142
image.drain(..i);
1143
}
1144
let mut len = u64::try_from(image.len()).unwrap();
1145
1146
// The goal is to enable mapping this image directly into memory, so
1147
// the offset into linear memory must be a multiple of the page
1148
// size. If that's not already the case then the image is padded at
1149
// the front and back with extra zeros as necessary
1150
if offset % page_size != 0 {
1151
let zero_padding = offset % page_size;
1152
self.data.push(vec![0; zero_padding as usize].into());
1153
offset -= zero_padding;
1154
len += zero_padding;
1155
}
1156
self.data.push(image.into());
1157
if len % page_size != 0 {
1158
let zero_padding = page_size - (len % page_size);
1159
self.data.push(vec![0; zero_padding as usize].into());
1160
len += zero_padding;
1161
}
1162
1163
// Offset/length should now always be page-aligned.
1164
assert!(offset % page_size == 0);
1165
assert!(len % page_size == 0);
1166
1167
// Create the `StaticMemoryInitializer` which describes this image,
1168
// only needed if the image is actually present and has a nonzero
1169
// length. The `offset` has been calculates above, originally
1170
// sourced from `info.min_addr`. The `data` field is the extent
1171
// within the final data segment we'll emit to an ELF image, which
1172
// is the concatenation of `self.data`, so here it's the size of
1173
// the section-so-far plus the current segment we're appending.
1174
let len = u32::try_from(len).unwrap();
1175
let init = if len > 0 {
1176
Some(StaticMemoryInitializer {
1177
offset,
1178
data: module_data_size..module_data_size + len,
1179
})
1180
} else {
1181
None
1182
};
1183
let idx = map.push(init);
1184
assert_eq!(idx, memory);
1185
module_data_size += len;
1186
}
1187
self.data_align = Some(page_size);
1188
self.module.memory_initialization = MemoryInitialization::Static { map };
1189
}
1190
1191
/// Attempts to convert the module's table initializers to
1192
/// FuncTable form where possible. This enables lazy table
1193
/// initialization later by providing a one-to-one map of initial
1194
/// table values, without having to parse all segments.
1195
pub fn try_func_table_init(&mut self) {
1196
// This should be large enough to support very large Wasm
1197
// modules with huge funcref tables, but small enough to avoid
1198
// OOMs or DoS on truly sparse tables.
1199
const MAX_FUNC_TABLE_SIZE: u64 = 1024 * 1024;
1200
1201
// First convert any element-initialized tables to images of just that
1202
// single function if the minimum size of the table allows doing so.
1203
for ((_, init), (_, table)) in self
1204
.module
1205
.table_initialization
1206
.initial_values
1207
.iter_mut()
1208
.zip(
1209
self.module
1210
.tables
1211
.iter()
1212
.skip(self.module.num_imported_tables),
1213
)
1214
{
1215
let table_size = table.limits.min;
1216
if table_size > MAX_FUNC_TABLE_SIZE {
1217
continue;
1218
}
1219
if let TableInitialValue::Expr(expr) = init {
1220
if let [ConstOp::RefFunc(f)] = expr.ops() {
1221
*init = TableInitialValue::Null {
1222
precomputed: vec![*f; table_size as usize],
1223
};
1224
}
1225
}
1226
}
1227
1228
let mut segments = mem::take(&mut self.module.table_initialization.segments)
1229
.into_iter()
1230
.peekable();
1231
1232
// The goal of this loop is to interpret a table segment and apply it
1233
// "statically" to a local table. This will iterate over segments and
1234
// apply them one-by-one to each table.
1235
//
1236
// If any segment can't be applied, however, then this loop exits and
1237
// all remaining segments are placed back into the segment list. This is
1238
// because segments are supposed to be initialized one-at-a-time which
1239
// means that intermediate state is visible with respect to traps. If
1240
// anything isn't statically known to not trap it's pessimistically
1241
// assumed to trap meaning all further segment initializers must be
1242
// applied manually at instantiation time.
1243
while let Some(segment) = segments.peek() {
1244
let defined_index = match self.module.defined_table_index(segment.table_index) {
1245
Some(index) => index,
1246
// Skip imported tables: we can't provide a preconstructed
1247
// table for them, because their values depend on the
1248
// imported table overlaid with whatever segments we have.
1249
None => break,
1250
};
1251
1252
// If the base of this segment is dynamic, then we can't
1253
// include it in the statically-built array of initial
1254
// contents.
1255
let offset = match segment.offset.ops() {
1256
&[ConstOp::I32Const(offset)] => u64::from(offset.cast_unsigned()),
1257
&[ConstOp::I64Const(offset)] => offset.cast_unsigned(),
1258
_ => break,
1259
};
1260
1261
// Get the end of this segment. If out-of-bounds, or too
1262
// large for our dense table representation, then skip the
1263
// segment.
1264
let top = match offset.checked_add(segment.elements.len()) {
1265
Some(top) => top,
1266
None => break,
1267
};
1268
let table_size = self.module.tables[segment.table_index].limits.min;
1269
if top > table_size || top > MAX_FUNC_TABLE_SIZE {
1270
break;
1271
}
1272
1273
match self.module.tables[segment.table_index]
1274
.ref_type
1275
.heap_type
1276
.top()
1277
{
1278
WasmHeapTopType::Func => {}
1279
// If this is not a funcref table, then we can't support a
1280
// pre-computed table of function indices. Technically this
1281
// initializer won't trap so we could continue processing
1282
// segments, but that's left as a future optimization if
1283
// necessary.
1284
WasmHeapTopType::Any
1285
| WasmHeapTopType::Extern
1286
| WasmHeapTopType::Cont
1287
| WasmHeapTopType::Exn => break,
1288
}
1289
1290
// Function indices can be optimized here, but fully general
1291
// expressions are deferred to get evaluated at runtime.
1292
let function_elements = match &segment.elements {
1293
TableSegmentElements::Functions(indices) => indices,
1294
TableSegmentElements::Expressions(_) => break,
1295
};
1296
1297
let precomputed =
1298
match &mut self.module.table_initialization.initial_values[defined_index] {
1299
TableInitialValue::Null { precomputed } => precomputed,
1300
1301
// If this table is still listed as an initial value here
1302
// then that means the initial size of the table doesn't
1303
// support a precomputed function list, so skip this.
1304
// Technically this won't trap so it's possible to process
1305
// further initializers, but that's left as a future
1306
// optimization.
1307
TableInitialValue::Expr(_) => break,
1308
};
1309
1310
// At this point we're committing to pre-initializing the table
1311
// with the `segment` that's being iterated over. This segment is
1312
// applied to the `precomputed` list for the table by ensuring
1313
// it's large enough to hold the segment and then copying the
1314
// segment into the precomputed list.
1315
if precomputed.len() < top as usize {
1316
precomputed.resize(top as usize, FuncIndex::reserved_value());
1317
}
1318
let dst = &mut precomputed[offset as usize..top as usize];
1319
dst.copy_from_slice(&function_elements);
1320
1321
// advance the iterator to see the next segment
1322
let _ = segments.next();
1323
}
1324
self.module.table_initialization.segments = segments.collect();
1325
}
1326
}
1327
1328