Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/crates/environ/src/compile/module_environ.rs
1693 views
1
use crate::module::{
2
FuncRefIndex, Initializer, MemoryInitialization, MemoryInitializer, Module, TableSegment,
3
TableSegmentElements,
4
};
5
use crate::{
6
ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex,
7
EntityIndex, EntityType, FuncIndex, GlobalIndex, IndexType, InitMemory, MemoryIndex,
8
ModuleInternedTypeIndex, ModuleTypesBuilder, PrimaryMap, SizeOverflow, StaticMemoryInitializer,
9
TableIndex, TableInitialValue, Tag, TagIndex, Tunables, TypeConvert, TypeIndex, WasmError,
10
WasmHeapTopType, WasmHeapType, WasmResult, WasmValType, WasmparserTypeConverter,
11
};
12
use crate::{StaticModuleIndex, prelude::*};
13
use anyhow::{Result, bail};
14
use cranelift_entity::SecondaryMap;
15
use cranelift_entity::packed_option::ReservedValue;
16
use std::borrow::Cow;
17
use std::collections::HashMap;
18
use std::mem;
19
use std::path::PathBuf;
20
use std::sync::Arc;
21
use wasmparser::{
22
CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,
23
FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,
24
Validator, ValidatorResources, types::Types,
25
};
26
27
/// Object containing the standalone environment information.
28
pub struct ModuleEnvironment<'a, 'data> {
29
/// The current module being translated
30
result: ModuleTranslation<'data>,
31
32
/// Intern'd types for this entire translation, shared by all modules.
33
types: &'a mut ModuleTypesBuilder,
34
35
// Various bits and pieces of configuration
36
validator: &'a mut Validator,
37
tunables: &'a Tunables,
38
}
39
40
/// The result of translating via `ModuleEnvironment`.
41
///
42
/// Function bodies are not yet translated, and data initializers have not yet
43
/// been copied out of the original buffer.
44
pub struct ModuleTranslation<'data> {
45
/// Module information.
46
pub module: Module,
47
48
/// This module's index.
49
pub module_index: StaticModuleIndex,
50
51
/// The input wasm binary.
52
///
53
/// This can be useful, for example, when modules are parsed from a
54
/// component and the embedder wants access to the raw wasm modules
55
/// themselves.
56
pub wasm: &'data [u8],
57
58
/// References to the function bodies.
59
pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
60
61
/// For each imported function, the single statically-known defined function
62
/// that satisfies that import, if any. This is used to turn what would
63
/// otherwise be indirect calls through the imports table into direct calls,
64
/// when possible.
65
pub known_imported_functions:
66
SecondaryMap<FuncIndex, Option<(StaticModuleIndex, DefinedFuncIndex)>>,
67
68
/// A list of type signatures which are considered exported from this
69
/// module, or those that can possibly be called. This list is sorted, and
70
/// trampolines for each of these signatures are required.
71
pub exported_signatures: Vec<ModuleInternedTypeIndex>,
72
73
/// DWARF debug information, if enabled, parsed from the module.
74
pub debuginfo: DebugInfoData<'data>,
75
76
/// Set if debuginfo was found but it was not parsed due to `Tunables`
77
/// configuration.
78
pub has_unparsed_debuginfo: bool,
79
80
/// List of data segments found in this module which should be concatenated
81
/// together for the final compiled artifact.
82
///
83
/// These data segments, when concatenated, are indexed by the
84
/// `MemoryInitializer` type.
85
pub data: Vec<Cow<'data, [u8]>>,
86
87
/// The desired alignment of `data` in the final data section of the object
88
/// file that we'll emit.
89
///
90
/// Note that this is 1 by default but `MemoryInitialization::Static` might
91
/// switch this to a higher alignment to facilitate mmap-ing data from
92
/// an object file into a linear memory.
93
pub data_align: Option<u64>,
94
95
/// Total size of all data pushed onto `data` so far.
96
total_data: u32,
97
98
/// List of passive element segments found in this module which will get
99
/// concatenated for the final artifact.
100
pub passive_data: Vec<&'data [u8]>,
101
102
/// Total size of all passive data pushed into `passive_data` so far.
103
total_passive_data: u32,
104
105
/// When we're parsing the code section this will be incremented so we know
106
/// which function is currently being defined.
107
code_index: u32,
108
109
/// The type information of the current module made available at the end of the
110
/// validation process.
111
types: Option<Types>,
112
}
113
114
impl<'data> ModuleTranslation<'data> {
115
/// Create a new translation for the module with the given index.
116
pub fn new(module_index: StaticModuleIndex) -> Self {
117
Self {
118
module_index,
119
module: Module::default(),
120
wasm: &[],
121
function_body_inputs: PrimaryMap::default(),
122
known_imported_functions: SecondaryMap::default(),
123
exported_signatures: Vec::default(),
124
debuginfo: DebugInfoData::default(),
125
has_unparsed_debuginfo: false,
126
data: Vec::default(),
127
data_align: None,
128
total_data: 0,
129
passive_data: Vec::default(),
130
total_passive_data: 0,
131
code_index: 0,
132
types: None,
133
}
134
}
135
136
/// Returns a reference to the type information of the current module.
137
pub fn get_types(&self) -> &Types {
138
self.types
139
.as_ref()
140
.expect("module type information to be available")
141
}
142
}
143
144
/// Contains function data: byte code and its offset in the module.
145
pub struct FunctionBodyData<'a> {
146
/// The body of the function, containing code and locals.
147
pub body: FunctionBody<'a>,
148
/// Validator for the function body
149
pub validator: FuncToValidate<ValidatorResources>,
150
}
151
152
#[derive(Debug, Default)]
153
#[expect(missing_docs, reason = "self-describing fields")]
154
pub struct DebugInfoData<'a> {
155
pub dwarf: Dwarf<'a>,
156
pub name_section: NameSection<'a>,
157
pub wasm_file: WasmFileInfo,
158
pub debug_loc: gimli::DebugLoc<Reader<'a>>,
159
pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,
160
pub debug_ranges: gimli::DebugRanges<Reader<'a>>,
161
pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,
162
pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,
163
pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,
164
}
165
166
#[expect(missing_docs, reason = "self-describing")]
167
pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;
168
169
type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
170
171
#[derive(Debug, Default)]
172
#[expect(missing_docs, reason = "self-describing fields")]
173
pub struct NameSection<'a> {
174
pub module_name: Option<&'a str>,
175
pub func_names: HashMap<FuncIndex, &'a str>,
176
pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
177
}
178
179
#[derive(Debug, Default)]
180
#[expect(missing_docs, reason = "self-describing fields")]
181
pub struct WasmFileInfo {
182
pub path: Option<PathBuf>,
183
pub code_section_offset: u64,
184
pub imported_func_count: u32,
185
pub funcs: Vec<FunctionMetadata>,
186
}
187
188
#[derive(Debug)]
189
#[expect(missing_docs, reason = "self-describing fields")]
190
pub struct FunctionMetadata {
191
pub params: Box<[WasmValType]>,
192
pub locals: Box<[(u32, WasmValType)]>,
193
}
194
195
impl<'a, 'data> ModuleEnvironment<'a, 'data> {
196
/// Allocates the environment data structures.
197
pub fn new(
198
tunables: &'a Tunables,
199
validator: &'a mut Validator,
200
types: &'a mut ModuleTypesBuilder,
201
module_index: StaticModuleIndex,
202
) -> Self {
203
Self {
204
result: ModuleTranslation::new(module_index),
205
types,
206
tunables,
207
validator,
208
}
209
}
210
211
/// Translate a wasm module using this environment.
212
///
213
/// This function will translate the `data` provided with `parser`,
214
/// validating everything along the way with this environment's validator.
215
///
216
/// The result of translation, [`ModuleTranslation`], contains everything
217
/// necessary to compile functions afterwards as well as learn type
218
/// information about the module at runtime.
219
pub fn translate(
220
mut self,
221
parser: Parser,
222
data: &'data [u8],
223
) -> Result<ModuleTranslation<'data>> {
224
self.result.wasm = data;
225
226
for payload in parser.parse_all(data) {
227
self.translate_payload(payload?)?;
228
}
229
230
Ok(self.result)
231
}
232
233
fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
234
match payload {
235
Payload::Version {
236
num,
237
encoding,
238
range,
239
} => {
240
self.validator.version(num, encoding, &range)?;
241
match encoding {
242
Encoding::Module => {}
243
Encoding::Component => {
244
bail!("expected a WebAssembly module but was given a WebAssembly component")
245
}
246
}
247
}
248
249
Payload::End(offset) => {
250
self.result.types = Some(self.validator.end(offset)?);
251
252
// With the `escaped_funcs` set of functions finished
253
// we can calculate the set of signatures that are exported as
254
// the set of exported functions' signatures.
255
self.result.exported_signatures = self
256
.result
257
.module
258
.functions
259
.iter()
260
.filter_map(|(_, func)| {
261
if func.is_escaping() {
262
Some(func.signature.unwrap_module_type_index())
263
} else {
264
None
265
}
266
})
267
.collect();
268
self.result.exported_signatures.sort_unstable();
269
self.result.exported_signatures.dedup();
270
}
271
272
Payload::TypeSection(types) => {
273
self.validator.type_section(&types)?;
274
275
let count = self.validator.types(0).unwrap().core_type_count_in_module();
276
log::trace!("interning {count} Wasm types");
277
278
let capacity = usize::try_from(count).unwrap();
279
self.result.module.types.reserve(capacity);
280
self.types.reserve_wasm_signatures(capacity);
281
282
// Iterate over each *rec group* -- not type -- defined in the
283
// types section. Rec groups are the unit of canonicalization
284
// and therefore the unit at which we need to process at a
285
// time. `wasmparser` has already done the hard work of
286
// de-duplicating and canonicalizing the rec groups within the
287
// module for us, we just need to translate them into our data
288
// structures. Note that, if the Wasm defines duplicate rec
289
// groups, we need copy the duplicates over (shallowly) as well,
290
// so that our types index space doesn't have holes.
291
let mut type_index = 0;
292
while type_index < count {
293
let validator_types = self.validator.types(0).unwrap();
294
295
// Get the rec group for the current type index, which is
296
// always the first type defined in a rec group.
297
log::trace!("looking up wasmparser type for index {type_index}");
298
let core_type_id = validator_types.core_type_at_in_module(type_index);
299
log::trace!(
300
" --> {core_type_id:?} = {:?}",
301
validator_types[core_type_id],
302
);
303
let rec_group_id = validator_types.rec_group_id_of(core_type_id);
304
debug_assert_eq!(
305
validator_types
306
.rec_group_elements(rec_group_id)
307
.position(|id| id == core_type_id),
308
Some(0)
309
);
310
311
// Intern the rec group and then fill in this module's types
312
// index space.
313
let interned = self.types.intern_rec_group(validator_types, rec_group_id)?;
314
let elems = self.types.rec_group_elements(interned);
315
let len = elems.len();
316
self.result.module.types.reserve(len);
317
for ty in elems {
318
self.result.module.types.push(ty.into());
319
}
320
321
// Advance `type_index` to the start of the next rec group.
322
type_index += u32::try_from(len).unwrap();
323
}
324
}
325
326
Payload::ImportSection(imports) => {
327
self.validator.import_section(&imports)?;
328
329
let cnt = usize::try_from(imports.count()).unwrap();
330
self.result.module.initializers.reserve(cnt);
331
332
for entry in imports {
333
let import = entry?;
334
let ty = match import.ty {
335
TypeRef::Func(index) => {
336
let index = TypeIndex::from_u32(index);
337
let interned_index = self.result.module.types[index];
338
self.result.module.num_imported_funcs += 1;
339
self.result.debuginfo.wasm_file.imported_func_count += 1;
340
EntityType::Function(interned_index)
341
}
342
TypeRef::Memory(ty) => {
343
self.result.module.num_imported_memories += 1;
344
EntityType::Memory(ty.into())
345
}
346
TypeRef::Global(ty) => {
347
self.result.module.num_imported_globals += 1;
348
EntityType::Global(self.convert_global_type(&ty)?)
349
}
350
TypeRef::Table(ty) => {
351
self.result.module.num_imported_tables += 1;
352
EntityType::Table(self.convert_table_type(&ty)?)
353
}
354
TypeRef::Tag(ty) => {
355
let index = TypeIndex::from_u32(ty.func_type_idx);
356
let signature = self.result.module.types[index];
357
let exception = self.types.define_exception_type_for_tag(
358
signature.unwrap_module_type_index(),
359
);
360
let tag = Tag {
361
signature,
362
exception: EngineOrModuleTypeIndex::Module(exception),
363
};
364
self.result.module.num_imported_tags += 1;
365
EntityType::Tag(tag)
366
}
367
};
368
self.declare_import(import.module, import.name, ty);
369
}
370
}
371
372
Payload::FunctionSection(functions) => {
373
self.validator.function_section(&functions)?;
374
375
let cnt = usize::try_from(functions.count()).unwrap();
376
self.result.module.functions.reserve_exact(cnt);
377
378
for entry in functions {
379
let sigindex = entry?;
380
let ty = TypeIndex::from_u32(sigindex);
381
let interned_index = self.result.module.types[ty];
382
self.result.module.push_function(interned_index);
383
}
384
}
385
386
Payload::TableSection(tables) => {
387
self.validator.table_section(&tables)?;
388
let cnt = usize::try_from(tables.count()).unwrap();
389
self.result.module.tables.reserve_exact(cnt);
390
391
for entry in tables {
392
let wasmparser::Table { ty, init } = entry?;
393
let table = self.convert_table_type(&ty)?;
394
self.result.module.needs_gc_heap |= table.ref_type.is_vmgcref_type();
395
self.result.module.tables.push(table);
396
let init = match init {
397
wasmparser::TableInit::RefNull => TableInitialValue::Null {
398
precomputed: Vec::new(),
399
},
400
wasmparser::TableInit::Expr(expr) => {
401
let (init, escaped) = ConstExpr::from_wasmparser(self, expr)?;
402
for f in escaped {
403
self.flag_func_escaped(f);
404
}
405
TableInitialValue::Expr(init)
406
}
407
};
408
self.result
409
.module
410
.table_initialization
411
.initial_values
412
.push(init);
413
}
414
}
415
416
Payload::MemorySection(memories) => {
417
self.validator.memory_section(&memories)?;
418
419
let cnt = usize::try_from(memories.count()).unwrap();
420
self.result.module.memories.reserve_exact(cnt);
421
422
for entry in memories {
423
let memory = entry?;
424
self.result.module.memories.push(memory.into());
425
}
426
}
427
428
Payload::TagSection(tags) => {
429
self.validator.tag_section(&tags)?;
430
431
for entry in tags {
432
let sigindex = entry?.func_type_idx;
433
let ty = TypeIndex::from_u32(sigindex);
434
let interned_index = self.result.module.types[ty];
435
let exception = self
436
.types
437
.define_exception_type_for_tag(interned_index.unwrap_module_type_index());
438
self.result.module.push_tag(interned_index, exception);
439
}
440
}
441
442
Payload::GlobalSection(globals) => {
443
self.validator.global_section(&globals)?;
444
445
let cnt = usize::try_from(globals.count()).unwrap();
446
self.result.module.globals.reserve_exact(cnt);
447
448
for entry in globals {
449
let wasmparser::Global { ty, init_expr } = entry?;
450
let (initializer, escaped) = ConstExpr::from_wasmparser(self, init_expr)?;
451
for f in escaped {
452
self.flag_func_escaped(f);
453
}
454
let ty = self.convert_global_type(&ty)?;
455
self.result.module.globals.push(ty);
456
self.result.module.global_initializers.push(initializer);
457
}
458
}
459
460
Payload::ExportSection(exports) => {
461
self.validator.export_section(&exports)?;
462
463
let cnt = usize::try_from(exports.count()).unwrap();
464
self.result.module.exports.reserve(cnt);
465
466
for entry in exports {
467
let wasmparser::Export { name, kind, index } = entry?;
468
let entity = match kind {
469
ExternalKind::Func => {
470
let index = FuncIndex::from_u32(index);
471
self.flag_func_escaped(index);
472
EntityIndex::Function(index)
473
}
474
ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),
475
ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),
476
ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),
477
ExternalKind::Tag => EntityIndex::Tag(TagIndex::from_u32(index)),
478
};
479
self.result
480
.module
481
.exports
482
.insert(String::from(name), entity);
483
}
484
}
485
486
Payload::StartSection { func, range } => {
487
self.validator.start_section(func, &range)?;
488
489
let func_index = FuncIndex::from_u32(func);
490
self.flag_func_escaped(func_index);
491
debug_assert!(self.result.module.start_func.is_none());
492
self.result.module.start_func = Some(func_index);
493
}
494
495
Payload::ElementSection(elements) => {
496
self.validator.element_section(&elements)?;
497
498
for (index, entry) in elements.into_iter().enumerate() {
499
let wasmparser::Element {
500
kind,
501
items,
502
range: _,
503
} = entry?;
504
505
// Build up a list of `FuncIndex` corresponding to all the
506
// entries listed in this segment. Note that it's not
507
// possible to create anything other than a `ref.null
508
// extern` for externref segments, so those just get
509
// translated to the reserved value of `FuncIndex`.
510
let elements = match items {
511
ElementItems::Functions(funcs) => {
512
let mut elems =
513
Vec::with_capacity(usize::try_from(funcs.count()).unwrap());
514
for func in funcs {
515
let func = FuncIndex::from_u32(func?);
516
self.flag_func_escaped(func);
517
elems.push(func);
518
}
519
TableSegmentElements::Functions(elems.into())
520
}
521
ElementItems::Expressions(_ty, items) => {
522
let mut exprs =
523
Vec::with_capacity(usize::try_from(items.count()).unwrap());
524
for expr in items {
525
let (expr, escaped) = ConstExpr::from_wasmparser(self, expr?)?;
526
exprs.push(expr);
527
for func in escaped {
528
self.flag_func_escaped(func);
529
}
530
}
531
TableSegmentElements::Expressions(exprs.into())
532
}
533
};
534
535
match kind {
536
ElementKind::Active {
537
table_index,
538
offset_expr,
539
} => {
540
let table_index = TableIndex::from_u32(table_index.unwrap_or(0));
541
let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
542
debug_assert!(escaped.is_empty());
543
544
self.result
545
.module
546
.table_initialization
547
.segments
548
.push(TableSegment {
549
table_index,
550
offset,
551
elements,
552
});
553
}
554
555
ElementKind::Passive => {
556
let elem_index = ElemIndex::from_u32(index as u32);
557
let index = self.result.module.passive_elements.len();
558
self.result.module.passive_elements.push(elements);
559
self.result
560
.module
561
.passive_elements_map
562
.insert(elem_index, index);
563
}
564
565
ElementKind::Declared => {}
566
}
567
}
568
}
569
570
Payload::CodeSectionStart { count, range, .. } => {
571
self.validator.code_section_start(&range)?;
572
let cnt = usize::try_from(count).unwrap();
573
self.result.function_body_inputs.reserve_exact(cnt);
574
self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;
575
}
576
577
Payload::CodeSectionEntry(body) => {
578
let validator = self.validator.code_section_entry(&body)?;
579
let func_index =
580
self.result.code_index + self.result.module.num_imported_funcs as u32;
581
let func_index = FuncIndex::from_u32(func_index);
582
583
if self.tunables.generate_native_debuginfo {
584
let sig_index = self.result.module.functions[func_index]
585
.signature
586
.unwrap_module_type_index();
587
let sig = self.types[sig_index].unwrap_func();
588
let mut locals = Vec::new();
589
for pair in body.get_locals_reader()? {
590
let (cnt, ty) = pair?;
591
let ty = self.convert_valtype(ty)?;
592
locals.push((cnt, ty));
593
}
594
self.result
595
.debuginfo
596
.wasm_file
597
.funcs
598
.push(FunctionMetadata {
599
locals: locals.into_boxed_slice(),
600
params: sig.params().into(),
601
});
602
}
603
self.result
604
.function_body_inputs
605
.push(FunctionBodyData { validator, body });
606
self.result.code_index += 1;
607
}
608
609
Payload::DataSection(data) => {
610
self.validator.data_section(&data)?;
611
612
let initializers = match &mut self.result.module.memory_initialization {
613
MemoryInitialization::Segmented(i) => i,
614
_ => unreachable!(),
615
};
616
617
let cnt = usize::try_from(data.count()).unwrap();
618
initializers.reserve_exact(cnt);
619
self.result.data.reserve_exact(cnt);
620
621
for (index, entry) in data.into_iter().enumerate() {
622
let wasmparser::Data {
623
kind,
624
data,
625
range: _,
626
} = entry?;
627
let mk_range = |total: &mut u32| -> Result<_, WasmError> {
628
let range = u32::try_from(data.len())
629
.ok()
630
.and_then(|size| {
631
let start = *total;
632
let end = start.checked_add(size)?;
633
Some(start..end)
634
})
635
.ok_or_else(|| {
636
WasmError::Unsupported(format!(
637
"more than 4 gigabytes of data in wasm module",
638
))
639
})?;
640
*total += range.end - range.start;
641
Ok(range)
642
};
643
match kind {
644
DataKind::Active {
645
memory_index,
646
offset_expr,
647
} => {
648
let range = mk_range(&mut self.result.total_data)?;
649
let memory_index = MemoryIndex::from_u32(memory_index);
650
let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;
651
debug_assert!(escaped.is_empty());
652
653
let initializers = match &mut self.result.module.memory_initialization {
654
MemoryInitialization::Segmented(i) => i,
655
_ => unreachable!(),
656
};
657
initializers.push(MemoryInitializer {
658
memory_index,
659
offset,
660
data: range,
661
});
662
self.result.data.push(data.into());
663
}
664
DataKind::Passive => {
665
let data_index = DataIndex::from_u32(index as u32);
666
let range = mk_range(&mut self.result.total_passive_data)?;
667
self.result.passive_data.push(data);
668
self.result
669
.module
670
.passive_data_map
671
.insert(data_index, range);
672
}
673
}
674
}
675
}
676
677
Payload::DataCountSection { count, range } => {
678
self.validator.data_count_section(count, &range)?;
679
680
// Note: the count passed in here is the *total* segment count
681
// There is no way to reserve for just the passive segments as
682
// they are discovered when iterating the data section entries
683
// Given that the total segment count might be much larger than
684
// the passive count, do not reserve anything here.
685
}
686
687
Payload::CustomSection(s)
688
if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>
689
{
690
bail!(
691
"\
692
Support for interface types has temporarily been removed from `wasmtime`.
693
694
For more information about this temporary change you can read on the issue online:
695
696
https://github.com/bytecodealliance/wasmtime/issues/1271
697
698
and for re-adding support for interface types you can see this issue:
699
700
https://github.com/bytecodealliance/wasmtime/issues/677
701
"
702
)
703
}
704
705
Payload::CustomSection(s) => {
706
self.register_custom_section(&s);
707
}
708
709
// It's expected that validation will probably reject other
710
// payloads such as `UnknownSection` or those related to the
711
// component model. If, however, something gets past validation then
712
// that's a bug in Wasmtime as we forgot to implement something.
713
other => {
714
self.validator.payload(&other)?;
715
panic!("unimplemented section in wasm file {other:?}");
716
}
717
}
718
Ok(())
719
}
720
721
fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {
722
match section.as_known() {
723
KnownCustom::Name(name) => {
724
let result = self.name_section(name);
725
if let Err(e) = result {
726
log::warn!("failed to parse name section {e:?}");
727
}
728
}
729
_ => {
730
let name = section.name().trim_end_matches(".dwo");
731
if name.starts_with(".debug_") {
732
self.dwarf_section(name, section);
733
}
734
}
735
}
736
}
737
738
fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {
739
if !self.tunables.generate_native_debuginfo && !self.tunables.parse_wasm_debuginfo {
740
self.result.has_unparsed_debuginfo = true;
741
return;
742
}
743
let info = &mut self.result.debuginfo;
744
let dwarf = &mut info.dwarf;
745
let endian = gimli::LittleEndian;
746
let data = section.data();
747
let slice = gimli::EndianSlice::new(data, endian);
748
749
match name {
750
// `gimli::Dwarf` fields.
751
".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),
752
".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),
753
".debug_info" => {
754
dwarf.debug_info = gimli::DebugInfo::new(data, endian);
755
}
756
".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),
757
".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),
758
".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),
759
".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),
760
".debug_str_sup" => {
761
let mut dwarf_sup: Dwarf<'data> = Default::default();
762
dwarf_sup.debug_str = gimli::DebugStr::from(slice);
763
dwarf.sup = Some(Arc::new(dwarf_sup));
764
}
765
".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),
766
767
// Additional fields.
768
".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),
769
".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),
770
".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),
771
".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),
772
773
// DWARF package fields
774
".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),
775
".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),
776
777
// We don't use these at the moment.
778
".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,
779
other => {
780
log::warn!("unknown debug section `{other}`");
781
return;
782
}
783
}
784
785
dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);
786
dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);
787
}
788
789
/// Declares a new import with the `module` and `field` names, importing the
790
/// `ty` specified.
791
///
792
/// Note that this method is somewhat tricky due to the implementation of
793
/// the module linking proposal. In the module linking proposal two-level
794
/// imports are recast as single-level imports of instances. That recasting
795
/// happens here by recording an import of an instance for the first time
796
/// we see a two-level import.
797
///
798
/// When the module linking proposal is disabled, however, disregard this
799
/// logic and instead work directly with two-level imports since no
800
/// instances are defined.
801
fn declare_import(&mut self, module: &'data str, field: &'data str, ty: EntityType) {
802
let index = self.push_type(ty);
803
self.result.module.initializers.push(Initializer::Import {
804
name: module.to_owned(),
805
field: field.to_owned(),
806
index,
807
});
808
}
809
810
fn push_type(&mut self, ty: EntityType) -> EntityIndex {
811
match ty {
812
EntityType::Function(ty) => EntityIndex::Function({
813
let func_index = self
814
.result
815
.module
816
.push_function(ty.unwrap_module_type_index());
817
// Imported functions can escape; in fact, they've already done
818
// so to get here.
819
self.flag_func_escaped(func_index);
820
func_index
821
}),
822
EntityType::Table(ty) => EntityIndex::Table(self.result.module.tables.push(ty)),
823
EntityType::Memory(ty) => EntityIndex::Memory(self.result.module.memories.push(ty)),
824
EntityType::Global(ty) => EntityIndex::Global(self.result.module.globals.push(ty)),
825
EntityType::Tag(ty) => EntityIndex::Tag(self.result.module.tags.push(ty)),
826
}
827
}
828
829
fn flag_func_escaped(&mut self, func: FuncIndex) {
830
let ty = &mut self.result.module.functions[func];
831
// If this was already assigned a funcref index no need to re-assign it.
832
if ty.is_escaping() {
833
return;
834
}
835
let index = self.result.module.num_escaped_funcs as u32;
836
ty.func_ref = FuncRefIndex::from_u32(index);
837
self.result.module.num_escaped_funcs += 1;
838
}
839
840
/// Parses the Name section of the wasm module.
841
fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {
842
for subsection in names {
843
match subsection? {
844
wasmparser::Name::Function(names) => {
845
for name in names {
846
let Naming { index, name } = name?;
847
// Skip this naming if it's naming a function that
848
// doesn't actually exist.
849
if (index as usize) >= self.result.module.functions.len() {
850
continue;
851
}
852
853
// Store the name unconditionally, regardless of
854
// whether we're parsing debuginfo, since function
855
// names are almost always present in the
856
// final compilation artifact.
857
let index = FuncIndex::from_u32(index);
858
self.result
859
.debuginfo
860
.name_section
861
.func_names
862
.insert(index, name);
863
}
864
}
865
wasmparser::Name::Module { name, .. } => {
866
self.result.module.name = Some(name.to_string());
867
if self.tunables.generate_native_debuginfo {
868
self.result.debuginfo.name_section.module_name = Some(name);
869
}
870
}
871
wasmparser::Name::Local(reader) => {
872
if !self.tunables.generate_native_debuginfo {
873
continue;
874
}
875
for f in reader {
876
let f = f?;
877
// Skip this naming if it's naming a function that
878
// doesn't actually exist.
879
if (f.index as usize) >= self.result.module.functions.len() {
880
continue;
881
}
882
for name in f.names {
883
let Naming { index, name } = name?;
884
885
self.result
886
.debuginfo
887
.name_section
888
.locals_names
889
.entry(FuncIndex::from_u32(f.index))
890
.or_insert(HashMap::new())
891
.insert(index, name);
892
}
893
}
894
}
895
wasmparser::Name::Label(_)
896
| wasmparser::Name::Type(_)
897
| wasmparser::Name::Table(_)
898
| wasmparser::Name::Global(_)
899
| wasmparser::Name::Memory(_)
900
| wasmparser::Name::Element(_)
901
| wasmparser::Name::Data(_)
902
| wasmparser::Name::Tag(_)
903
| wasmparser::Name::Field(_)
904
| wasmparser::Name::Unknown { .. } => {}
905
}
906
}
907
Ok(())
908
}
909
}
910
911
impl TypeConvert for ModuleEnvironment<'_, '_> {
912
fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {
913
WasmparserTypeConverter::new(&self.types, |idx| {
914
self.result.module.types[idx].unwrap_module_type_index()
915
})
916
.lookup_heap_type(index)
917
}
918
919
fn lookup_type_index(&self, index: wasmparser::UnpackedIndex) -> EngineOrModuleTypeIndex {
920
WasmparserTypeConverter::new(&self.types, |idx| {
921
self.result.module.types[idx].unwrap_module_type_index()
922
})
923
.lookup_type_index(index)
924
}
925
}
926
927
impl ModuleTranslation<'_> {
928
/// Attempts to convert segmented memory initialization into static
929
/// initialization for the module that this translation represents.
930
///
931
/// If this module's memory initialization is not compatible with paged
932
/// initialization then this won't change anything. Otherwise if it is
933
/// compatible then the `memory_initialization` field will be updated.
934
///
935
/// Takes a `page_size` argument in order to ensure that all
936
/// initialization is page-aligned for mmap-ability, and
937
/// `max_image_size_always_allowed` to control how we decide
938
/// whether to use static init.
939
///
940
/// We will try to avoid generating very sparse images, which are
941
/// possible if e.g. a module has an initializer at offset 0 and a
942
/// very high offset (say, 1 GiB). To avoid this, we use a dual
943
/// condition: we always allow images less than
944
/// `max_image_size_always_allowed`, and the embedder of Wasmtime
945
/// can set this if desired to ensure that static init should
946
/// always be done if the size of the module or its heaps is
947
/// otherwise bounded by the system. We also allow images with
948
/// static init data bigger than that, but only if it is "dense",
949
/// defined as having at least half (50%) of its pages with some
950
/// data.
951
///
952
/// We could do something slightly better by building a dense part
953
/// and keeping a sparse list of outlier/leftover segments (see
954
/// issue #3820). This would also allow mostly-static init of
955
/// modules that have some dynamically-placed data segments. But,
956
/// for now, this is sufficient to allow a system that "knows what
957
/// it's doing" to always get static init.
958
pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
959
// This method only attempts to transform a `Segmented` memory init
960
// into a `Static` one, no other state.
961
if !self.module.memory_initialization.is_segmented() {
962
return;
963
}
964
965
// First a dry run of memory initialization is performed. This
966
// collects information about the extent of memory initialized for each
967
// memory as well as the size of all data segments being copied in.
968
struct Memory {
969
data_size: u64,
970
min_addr: u64,
971
max_addr: u64,
972
// The `usize` here is a pointer into `self.data` which is the list
973
// of data segments corresponding to what was found in the original
974
// wasm module.
975
segments: Vec<(usize, StaticMemoryInitializer)>,
976
}
977
let mut info = PrimaryMap::with_capacity(self.module.memories.len());
978
for _ in 0..self.module.memories.len() {
979
info.push(Memory {
980
data_size: 0,
981
min_addr: u64::MAX,
982
max_addr: 0,
983
segments: Vec::new(),
984
});
985
}
986
987
struct InitMemoryAtCompileTime<'a> {
988
module: &'a Module,
989
info: &'a mut PrimaryMap<MemoryIndex, Memory>,
990
idx: usize,
991
}
992
impl InitMemory for InitMemoryAtCompileTime<'_> {
993
fn memory_size_in_bytes(
994
&mut self,
995
memory_index: MemoryIndex,
996
) -> Result<u64, SizeOverflow> {
997
self.module.memories[memory_index].minimum_byte_size()
998
}
999
1000
fn eval_offset(&mut self, memory_index: MemoryIndex, expr: &ConstExpr) -> Option<u64> {
1001
match (expr.ops(), self.module.memories[memory_index].idx_type) {
1002
(&[ConstOp::I32Const(offset)], IndexType::I32) => {
1003
Some(offset.cast_unsigned().into())
1004
}
1005
(&[ConstOp::I64Const(offset)], IndexType::I64) => Some(offset.cast_unsigned()),
1006
_ => None,
1007
}
1008
}
1009
1010
fn write(&mut self, memory: MemoryIndex, init: &StaticMemoryInitializer) -> bool {
1011
// Currently `Static` only applies to locally-defined memories,
1012
// so if a data segment references an imported memory then
1013
// transitioning to a `Static` memory initializer is not
1014
// possible.
1015
if self.module.defined_memory_index(memory).is_none() {
1016
return false;
1017
};
1018
let info = &mut self.info[memory];
1019
let data_len = u64::from(init.data.end - init.data.start);
1020
if data_len > 0 {
1021
info.data_size += data_len;
1022
info.min_addr = info.min_addr.min(init.offset);
1023
info.max_addr = info.max_addr.max(init.offset + data_len);
1024
info.segments.push((self.idx, init.clone()));
1025
}
1026
self.idx += 1;
1027
true
1028
}
1029
}
1030
let ok = self
1031
.module
1032
.memory_initialization
1033
.init_memory(&mut InitMemoryAtCompileTime {
1034
idx: 0,
1035
module: &self.module,
1036
info: &mut info,
1037
});
1038
if !ok {
1039
return;
1040
}
1041
1042
// Validate that the memory information collected is indeed valid for
1043
// static memory initialization.
1044
for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {
1045
let image_size = info.max_addr - info.min_addr;
1046
1047
// Simplify things for now by bailing out entirely if any memory has
1048
// a page size smaller than the host's page size. This fixes a case
1049
// where currently initializers are created in host-page-size units
1050
// of length which means that a larger-than-the-entire-memory
1051
// initializer can be created. This can be handled technically but
1052
// would require some more changes to help fix the assert elsewhere
1053
// that this protects against.
1054
if self.module.memories[i].page_size() < page_size {
1055
return;
1056
}
1057
1058
// If the range of memory being initialized is less than twice the
1059
// total size of the data itself then it's assumed that static
1060
// initialization is ok. This means we'll at most double memory
1061
// consumption during the memory image creation process, which is
1062
// currently assumed to "probably be ok" but this will likely need
1063
// tweaks over time.
1064
if image_size < info.data_size.saturating_mul(2) {
1065
continue;
1066
}
1067
1068
// If the memory initialization image is larger than the size of all
1069
// data, then we still allow memory initialization if the image will
1070
// be of a relatively modest size, such as 1MB here.
1071
if image_size < max_image_size_always_allowed {
1072
continue;
1073
}
1074
1075
// At this point memory initialization is concluded to be too
1076
// expensive to do at compile time so it's entirely deferred to
1077
// happen at runtime.
1078
return;
1079
}
1080
1081
// Here's where we've now committed to changing to static memory. The
1082
// memory initialization image is built here from the page data and then
1083
// it's converted to a single initializer.
1084
let data = mem::replace(&mut self.data, Vec::new());
1085
let mut map = PrimaryMap::with_capacity(info.len());
1086
let mut module_data_size = 0u32;
1087
for (memory, info) in info.iter() {
1088
// Create the in-memory `image` which is the initialized contents of
1089
// this linear memory.
1090
let extent = if info.segments.len() > 0 {
1091
(info.max_addr - info.min_addr) as usize
1092
} else {
1093
0
1094
};
1095
let mut image = Vec::with_capacity(extent);
1096
for (idx, init) in info.segments.iter() {
1097
let data = &data[*idx];
1098
assert_eq!(data.len(), init.data.len());
1099
let offset = usize::try_from(init.offset - info.min_addr).unwrap();
1100
if image.len() < offset {
1101
image.resize(offset, 0u8);
1102
image.extend_from_slice(data);
1103
} else {
1104
image.splice(
1105
offset..(offset + data.len()).min(image.len()),
1106
data.iter().copied(),
1107
);
1108
}
1109
}
1110
assert_eq!(image.len(), extent);
1111
assert_eq!(image.capacity(), extent);
1112
let mut offset = if info.segments.len() > 0 {
1113
info.min_addr
1114
} else {
1115
0
1116
};
1117
1118
// Chop off trailing zeros from the image as memory is already
1119
// zero-initialized. Note that `i` is the position of a nonzero
1120
// entry here, so to not lose it we truncate to `i + 1`.
1121
if let Some(i) = image.iter().rposition(|i| *i != 0) {
1122
image.truncate(i + 1);
1123
}
1124
1125
// Also chop off leading zeros, if any.
1126
if let Some(i) = image.iter().position(|i| *i != 0) {
1127
offset += i as u64;
1128
image.drain(..i);
1129
}
1130
let mut len = u64::try_from(image.len()).unwrap();
1131
1132
// The goal is to enable mapping this image directly into memory, so
1133
// the offset into linear memory must be a multiple of the page
1134
// size. If that's not already the case then the image is padded at
1135
// the front and back with extra zeros as necessary
1136
if offset % page_size != 0 {
1137
let zero_padding = offset % page_size;
1138
self.data.push(vec![0; zero_padding as usize].into());
1139
offset -= zero_padding;
1140
len += zero_padding;
1141
}
1142
self.data.push(image.into());
1143
if len % page_size != 0 {
1144
let zero_padding = page_size - (len % page_size);
1145
self.data.push(vec![0; zero_padding as usize].into());
1146
len += zero_padding;
1147
}
1148
1149
// Offset/length should now always be page-aligned.
1150
assert!(offset % page_size == 0);
1151
assert!(len % page_size == 0);
1152
1153
// Create the `StaticMemoryInitializer` which describes this image,
1154
// only needed if the image is actually present and has a nonzero
1155
// length. The `offset` has been calculates above, originally
1156
// sourced from `info.min_addr`. The `data` field is the extent
1157
// within the final data segment we'll emit to an ELF image, which
1158
// is the concatenation of `self.data`, so here it's the size of
1159
// the section-so-far plus the current segment we're appending.
1160
let len = u32::try_from(len).unwrap();
1161
let init = if len > 0 {
1162
Some(StaticMemoryInitializer {
1163
offset,
1164
data: module_data_size..module_data_size + len,
1165
})
1166
} else {
1167
None
1168
};
1169
let idx = map.push(init);
1170
assert_eq!(idx, memory);
1171
module_data_size += len;
1172
}
1173
self.data_align = Some(page_size);
1174
self.module.memory_initialization = MemoryInitialization::Static { map };
1175
}
1176
1177
/// Attempts to convert the module's table initializers to
1178
/// FuncTable form where possible. This enables lazy table
1179
/// initialization later by providing a one-to-one map of initial
1180
/// table values, without having to parse all segments.
1181
pub fn try_func_table_init(&mut self) {
1182
// This should be large enough to support very large Wasm
1183
// modules with huge funcref tables, but small enough to avoid
1184
// OOMs or DoS on truly sparse tables.
1185
const MAX_FUNC_TABLE_SIZE: u64 = 1024 * 1024;
1186
1187
// First convert any element-initialized tables to images of just that
1188
// single function if the minimum size of the table allows doing so.
1189
for ((_, init), (_, table)) in self
1190
.module
1191
.table_initialization
1192
.initial_values
1193
.iter_mut()
1194
.zip(
1195
self.module
1196
.tables
1197
.iter()
1198
.skip(self.module.num_imported_tables),
1199
)
1200
{
1201
let table_size = table.limits.min;
1202
if table_size > MAX_FUNC_TABLE_SIZE {
1203
continue;
1204
}
1205
if let TableInitialValue::Expr(expr) = init {
1206
if let [ConstOp::RefFunc(f)] = expr.ops() {
1207
*init = TableInitialValue::Null {
1208
precomputed: vec![*f; table_size as usize],
1209
};
1210
}
1211
}
1212
}
1213
1214
let mut segments = mem::take(&mut self.module.table_initialization.segments)
1215
.into_iter()
1216
.peekable();
1217
1218
// The goal of this loop is to interpret a table segment and apply it
1219
// "statically" to a local table. This will iterate over segments and
1220
// apply them one-by-one to each table.
1221
//
1222
// If any segment can't be applied, however, then this loop exits and
1223
// all remaining segments are placed back into the segment list. This is
1224
// because segments are supposed to be initialized one-at-a-time which
1225
// means that intermediate state is visible with respect to traps. If
1226
// anything isn't statically known to not trap it's pessimistically
1227
// assumed to trap meaning all further segment initializers must be
1228
// applied manually at instantiation time.
1229
while let Some(segment) = segments.peek() {
1230
let defined_index = match self.module.defined_table_index(segment.table_index) {
1231
Some(index) => index,
1232
// Skip imported tables: we can't provide a preconstructed
1233
// table for them, because their values depend on the
1234
// imported table overlaid with whatever segments we have.
1235
None => break,
1236
};
1237
1238
// If the base of this segment is dynamic, then we can't
1239
// include it in the statically-built array of initial
1240
// contents.
1241
let offset = match segment.offset.ops() {
1242
&[ConstOp::I32Const(offset)] => u64::from(offset.cast_unsigned()),
1243
&[ConstOp::I64Const(offset)] => offset.cast_unsigned(),
1244
_ => break,
1245
};
1246
1247
// Get the end of this segment. If out-of-bounds, or too
1248
// large for our dense table representation, then skip the
1249
// segment.
1250
let top = match offset.checked_add(segment.elements.len()) {
1251
Some(top) => top,
1252
None => break,
1253
};
1254
let table_size = self.module.tables[segment.table_index].limits.min;
1255
if top > table_size || top > MAX_FUNC_TABLE_SIZE {
1256
break;
1257
}
1258
1259
match self.module.tables[segment.table_index]
1260
.ref_type
1261
.heap_type
1262
.top()
1263
{
1264
WasmHeapTopType::Func => {}
1265
// If this is not a funcref table, then we can't support a
1266
// pre-computed table of function indices. Technically this
1267
// initializer won't trap so we could continue processing
1268
// segments, but that's left as a future optimization if
1269
// necessary.
1270
WasmHeapTopType::Any
1271
| WasmHeapTopType::Extern
1272
| WasmHeapTopType::Cont
1273
| WasmHeapTopType::Exn => break,
1274
}
1275
1276
// Function indices can be optimized here, but fully general
1277
// expressions are deferred to get evaluated at runtime.
1278
let function_elements = match &segment.elements {
1279
TableSegmentElements::Functions(indices) => indices,
1280
TableSegmentElements::Expressions(_) => break,
1281
};
1282
1283
let precomputed =
1284
match &mut self.module.table_initialization.initial_values[defined_index] {
1285
TableInitialValue::Null { precomputed } => precomputed,
1286
1287
// If this table is still listed as an initial value here
1288
// then that means the initial size of the table doesn't
1289
// support a precomputed function list, so skip this.
1290
// Technically this won't trap so it's possible to process
1291
// further initializers, but that's left as a future
1292
// optimization.
1293
TableInitialValue::Expr(_) => break,
1294
};
1295
1296
// At this point we're committing to pre-initializing the table
1297
// with the `segment` that's being iterated over. This segment is
1298
// applied to the `precomputed` list for the table by ensuring
1299
// it's large enough to hold the segment and then copying the
1300
// segment into the precomputed list.
1301
if precomputed.len() < top as usize {
1302
precomputed.resize(top as usize, FuncIndex::reserved_value());
1303
}
1304
let dst = &mut precomputed[offset as usize..top as usize];
1305
dst.copy_from_slice(&function_elements);
1306
1307
// advance the iterator to see the next segment
1308
let _ = segments.next();
1309
}
1310
self.module.table_initialization.segments = segments.collect();
1311
}
1312
}
1313
1314