Path: blob/main/crates/environ/src/compile/module_environ.rs
1693 views
use crate::module::{1FuncRefIndex, Initializer, MemoryInitialization, MemoryInitializer, Module, TableSegment,2TableSegmentElements,3};4use crate::{5ConstExpr, ConstOp, DataIndex, DefinedFuncIndex, ElemIndex, EngineOrModuleTypeIndex,6EntityIndex, EntityType, FuncIndex, GlobalIndex, IndexType, InitMemory, MemoryIndex,7ModuleInternedTypeIndex, ModuleTypesBuilder, PrimaryMap, SizeOverflow, StaticMemoryInitializer,8TableIndex, TableInitialValue, Tag, TagIndex, Tunables, TypeConvert, TypeIndex, WasmError,9WasmHeapTopType, WasmHeapType, WasmResult, WasmValType, WasmparserTypeConverter,10};11use crate::{StaticModuleIndex, prelude::*};12use anyhow::{Result, bail};13use cranelift_entity::SecondaryMap;14use cranelift_entity::packed_option::ReservedValue;15use std::borrow::Cow;16use std::collections::HashMap;17use std::mem;18use std::path::PathBuf;19use std::sync::Arc;20use wasmparser::{21CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,22FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,23Validator, ValidatorResources, types::Types,24};2526/// Object containing the standalone environment information.27pub struct ModuleEnvironment<'a, 'data> {28/// The current module being translated29result: ModuleTranslation<'data>,3031/// Intern'd types for this entire translation, shared by all modules.32types: &'a mut ModuleTypesBuilder,3334// Various bits and pieces of configuration35validator: &'a mut Validator,36tunables: &'a Tunables,37}3839/// The result of translating via `ModuleEnvironment`.40///41/// Function bodies are not yet translated, and data initializers have not yet42/// been copied out of the original buffer.43pub struct ModuleTranslation<'data> {44/// Module information.45pub module: Module,4647/// This module's index.48pub module_index: StaticModuleIndex,4950/// The input wasm binary.51///52/// This can be useful, for example, when modules are parsed from a53/// component and the embedder wants access to the raw wasm modules54/// themselves.55pub wasm: &'data [u8],5657/// References to the function bodies.58pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,5960/// For each imported function, the single statically-known defined function61/// that satisfies that import, if any. This is used to turn what would62/// otherwise be indirect calls through the imports table into direct calls,63/// when possible.64pub known_imported_functions:65SecondaryMap<FuncIndex, Option<(StaticModuleIndex, DefinedFuncIndex)>>,6667/// A list of type signatures which are considered exported from this68/// module, or those that can possibly be called. This list is sorted, and69/// trampolines for each of these signatures are required.70pub exported_signatures: Vec<ModuleInternedTypeIndex>,7172/// DWARF debug information, if enabled, parsed from the module.73pub debuginfo: DebugInfoData<'data>,7475/// Set if debuginfo was found but it was not parsed due to `Tunables`76/// configuration.77pub has_unparsed_debuginfo: bool,7879/// List of data segments found in this module which should be concatenated80/// together for the final compiled artifact.81///82/// These data segments, when concatenated, are indexed by the83/// `MemoryInitializer` type.84pub data: Vec<Cow<'data, [u8]>>,8586/// The desired alignment of `data` in the final data section of the object87/// file that we'll emit.88///89/// Note that this is 1 by default but `MemoryInitialization::Static` might90/// switch this to a higher alignment to facilitate mmap-ing data from91/// an object file into a linear memory.92pub data_align: Option<u64>,9394/// Total size of all data pushed onto `data` so far.95total_data: u32,9697/// List of passive element segments found in this module which will get98/// concatenated for the final artifact.99pub passive_data: Vec<&'data [u8]>,100101/// Total size of all passive data pushed into `passive_data` so far.102total_passive_data: u32,103104/// When we're parsing the code section this will be incremented so we know105/// which function is currently being defined.106code_index: u32,107108/// The type information of the current module made available at the end of the109/// validation process.110types: Option<Types>,111}112113impl<'data> ModuleTranslation<'data> {114/// Create a new translation for the module with the given index.115pub fn new(module_index: StaticModuleIndex) -> Self {116Self {117module_index,118module: Module::default(),119wasm: &[],120function_body_inputs: PrimaryMap::default(),121known_imported_functions: SecondaryMap::default(),122exported_signatures: Vec::default(),123debuginfo: DebugInfoData::default(),124has_unparsed_debuginfo: false,125data: Vec::default(),126data_align: None,127total_data: 0,128passive_data: Vec::default(),129total_passive_data: 0,130code_index: 0,131types: None,132}133}134135/// Returns a reference to the type information of the current module.136pub fn get_types(&self) -> &Types {137self.types138.as_ref()139.expect("module type information to be available")140}141}142143/// Contains function data: byte code and its offset in the module.144pub struct FunctionBodyData<'a> {145/// The body of the function, containing code and locals.146pub body: FunctionBody<'a>,147/// Validator for the function body148pub validator: FuncToValidate<ValidatorResources>,149}150151#[derive(Debug, Default)]152#[expect(missing_docs, reason = "self-describing fields")]153pub struct DebugInfoData<'a> {154pub dwarf: Dwarf<'a>,155pub name_section: NameSection<'a>,156pub wasm_file: WasmFileInfo,157pub debug_loc: gimli::DebugLoc<Reader<'a>>,158pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,159pub debug_ranges: gimli::DebugRanges<Reader<'a>>,160pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,161pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,162pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,163}164165#[expect(missing_docs, reason = "self-describing")]166pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;167168type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;169170#[derive(Debug, Default)]171#[expect(missing_docs, reason = "self-describing fields")]172pub struct NameSection<'a> {173pub module_name: Option<&'a str>,174pub func_names: HashMap<FuncIndex, &'a str>,175pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,176}177178#[derive(Debug, Default)]179#[expect(missing_docs, reason = "self-describing fields")]180pub struct WasmFileInfo {181pub path: Option<PathBuf>,182pub code_section_offset: u64,183pub imported_func_count: u32,184pub funcs: Vec<FunctionMetadata>,185}186187#[derive(Debug)]188#[expect(missing_docs, reason = "self-describing fields")]189pub struct FunctionMetadata {190pub params: Box<[WasmValType]>,191pub locals: Box<[(u32, WasmValType)]>,192}193194impl<'a, 'data> ModuleEnvironment<'a, 'data> {195/// Allocates the environment data structures.196pub fn new(197tunables: &'a Tunables,198validator: &'a mut Validator,199types: &'a mut ModuleTypesBuilder,200module_index: StaticModuleIndex,201) -> Self {202Self {203result: ModuleTranslation::new(module_index),204types,205tunables,206validator,207}208}209210/// Translate a wasm module using this environment.211///212/// This function will translate the `data` provided with `parser`,213/// validating everything along the way with this environment's validator.214///215/// The result of translation, [`ModuleTranslation`], contains everything216/// necessary to compile functions afterwards as well as learn type217/// information about the module at runtime.218pub fn translate(219mut self,220parser: Parser,221data: &'data [u8],222) -> Result<ModuleTranslation<'data>> {223self.result.wasm = data;224225for payload in parser.parse_all(data) {226self.translate_payload(payload?)?;227}228229Ok(self.result)230}231232fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {233match payload {234Payload::Version {235num,236encoding,237range,238} => {239self.validator.version(num, encoding, &range)?;240match encoding {241Encoding::Module => {}242Encoding::Component => {243bail!("expected a WebAssembly module but was given a WebAssembly component")244}245}246}247248Payload::End(offset) => {249self.result.types = Some(self.validator.end(offset)?);250251// With the `escaped_funcs` set of functions finished252// we can calculate the set of signatures that are exported as253// the set of exported functions' signatures.254self.result.exported_signatures = self255.result256.module257.functions258.iter()259.filter_map(|(_, func)| {260if func.is_escaping() {261Some(func.signature.unwrap_module_type_index())262} else {263None264}265})266.collect();267self.result.exported_signatures.sort_unstable();268self.result.exported_signatures.dedup();269}270271Payload::TypeSection(types) => {272self.validator.type_section(&types)?;273274let count = self.validator.types(0).unwrap().core_type_count_in_module();275log::trace!("interning {count} Wasm types");276277let capacity = usize::try_from(count).unwrap();278self.result.module.types.reserve(capacity);279self.types.reserve_wasm_signatures(capacity);280281// Iterate over each *rec group* -- not type -- defined in the282// types section. Rec groups are the unit of canonicalization283// and therefore the unit at which we need to process at a284// time. `wasmparser` has already done the hard work of285// de-duplicating and canonicalizing the rec groups within the286// module for us, we just need to translate them into our data287// structures. Note that, if the Wasm defines duplicate rec288// groups, we need copy the duplicates over (shallowly) as well,289// so that our types index space doesn't have holes.290let mut type_index = 0;291while type_index < count {292let validator_types = self.validator.types(0).unwrap();293294// Get the rec group for the current type index, which is295// always the first type defined in a rec group.296log::trace!("looking up wasmparser type for index {type_index}");297let core_type_id = validator_types.core_type_at_in_module(type_index);298log::trace!(299" --> {core_type_id:?} = {:?}",300validator_types[core_type_id],301);302let rec_group_id = validator_types.rec_group_id_of(core_type_id);303debug_assert_eq!(304validator_types305.rec_group_elements(rec_group_id)306.position(|id| id == core_type_id),307Some(0)308);309310// Intern the rec group and then fill in this module's types311// index space.312let interned = self.types.intern_rec_group(validator_types, rec_group_id)?;313let elems = self.types.rec_group_elements(interned);314let len = elems.len();315self.result.module.types.reserve(len);316for ty in elems {317self.result.module.types.push(ty.into());318}319320// Advance `type_index` to the start of the next rec group.321type_index += u32::try_from(len).unwrap();322}323}324325Payload::ImportSection(imports) => {326self.validator.import_section(&imports)?;327328let cnt = usize::try_from(imports.count()).unwrap();329self.result.module.initializers.reserve(cnt);330331for entry in imports {332let import = entry?;333let ty = match import.ty {334TypeRef::Func(index) => {335let index = TypeIndex::from_u32(index);336let interned_index = self.result.module.types[index];337self.result.module.num_imported_funcs += 1;338self.result.debuginfo.wasm_file.imported_func_count += 1;339EntityType::Function(interned_index)340}341TypeRef::Memory(ty) => {342self.result.module.num_imported_memories += 1;343EntityType::Memory(ty.into())344}345TypeRef::Global(ty) => {346self.result.module.num_imported_globals += 1;347EntityType::Global(self.convert_global_type(&ty)?)348}349TypeRef::Table(ty) => {350self.result.module.num_imported_tables += 1;351EntityType::Table(self.convert_table_type(&ty)?)352}353TypeRef::Tag(ty) => {354let index = TypeIndex::from_u32(ty.func_type_idx);355let signature = self.result.module.types[index];356let exception = self.types.define_exception_type_for_tag(357signature.unwrap_module_type_index(),358);359let tag = Tag {360signature,361exception: EngineOrModuleTypeIndex::Module(exception),362};363self.result.module.num_imported_tags += 1;364EntityType::Tag(tag)365}366};367self.declare_import(import.module, import.name, ty);368}369}370371Payload::FunctionSection(functions) => {372self.validator.function_section(&functions)?;373374let cnt = usize::try_from(functions.count()).unwrap();375self.result.module.functions.reserve_exact(cnt);376377for entry in functions {378let sigindex = entry?;379let ty = TypeIndex::from_u32(sigindex);380let interned_index = self.result.module.types[ty];381self.result.module.push_function(interned_index);382}383}384385Payload::TableSection(tables) => {386self.validator.table_section(&tables)?;387let cnt = usize::try_from(tables.count()).unwrap();388self.result.module.tables.reserve_exact(cnt);389390for entry in tables {391let wasmparser::Table { ty, init } = entry?;392let table = self.convert_table_type(&ty)?;393self.result.module.needs_gc_heap |= table.ref_type.is_vmgcref_type();394self.result.module.tables.push(table);395let init = match init {396wasmparser::TableInit::RefNull => TableInitialValue::Null {397precomputed: Vec::new(),398},399wasmparser::TableInit::Expr(expr) => {400let (init, escaped) = ConstExpr::from_wasmparser(self, expr)?;401for f in escaped {402self.flag_func_escaped(f);403}404TableInitialValue::Expr(init)405}406};407self.result408.module409.table_initialization410.initial_values411.push(init);412}413}414415Payload::MemorySection(memories) => {416self.validator.memory_section(&memories)?;417418let cnt = usize::try_from(memories.count()).unwrap();419self.result.module.memories.reserve_exact(cnt);420421for entry in memories {422let memory = entry?;423self.result.module.memories.push(memory.into());424}425}426427Payload::TagSection(tags) => {428self.validator.tag_section(&tags)?;429430for entry in tags {431let sigindex = entry?.func_type_idx;432let ty = TypeIndex::from_u32(sigindex);433let interned_index = self.result.module.types[ty];434let exception = self435.types436.define_exception_type_for_tag(interned_index.unwrap_module_type_index());437self.result.module.push_tag(interned_index, exception);438}439}440441Payload::GlobalSection(globals) => {442self.validator.global_section(&globals)?;443444let cnt = usize::try_from(globals.count()).unwrap();445self.result.module.globals.reserve_exact(cnt);446447for entry in globals {448let wasmparser::Global { ty, init_expr } = entry?;449let (initializer, escaped) = ConstExpr::from_wasmparser(self, init_expr)?;450for f in escaped {451self.flag_func_escaped(f);452}453let ty = self.convert_global_type(&ty)?;454self.result.module.globals.push(ty);455self.result.module.global_initializers.push(initializer);456}457}458459Payload::ExportSection(exports) => {460self.validator.export_section(&exports)?;461462let cnt = usize::try_from(exports.count()).unwrap();463self.result.module.exports.reserve(cnt);464465for entry in exports {466let wasmparser::Export { name, kind, index } = entry?;467let entity = match kind {468ExternalKind::Func => {469let index = FuncIndex::from_u32(index);470self.flag_func_escaped(index);471EntityIndex::Function(index)472}473ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),474ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),475ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),476ExternalKind::Tag => EntityIndex::Tag(TagIndex::from_u32(index)),477};478self.result479.module480.exports481.insert(String::from(name), entity);482}483}484485Payload::StartSection { func, range } => {486self.validator.start_section(func, &range)?;487488let func_index = FuncIndex::from_u32(func);489self.flag_func_escaped(func_index);490debug_assert!(self.result.module.start_func.is_none());491self.result.module.start_func = Some(func_index);492}493494Payload::ElementSection(elements) => {495self.validator.element_section(&elements)?;496497for (index, entry) in elements.into_iter().enumerate() {498let wasmparser::Element {499kind,500items,501range: _,502} = entry?;503504// Build up a list of `FuncIndex` corresponding to all the505// entries listed in this segment. Note that it's not506// possible to create anything other than a `ref.null507// extern` for externref segments, so those just get508// translated to the reserved value of `FuncIndex`.509let elements = match items {510ElementItems::Functions(funcs) => {511let mut elems =512Vec::with_capacity(usize::try_from(funcs.count()).unwrap());513for func in funcs {514let func = FuncIndex::from_u32(func?);515self.flag_func_escaped(func);516elems.push(func);517}518TableSegmentElements::Functions(elems.into())519}520ElementItems::Expressions(_ty, items) => {521let mut exprs =522Vec::with_capacity(usize::try_from(items.count()).unwrap());523for expr in items {524let (expr, escaped) = ConstExpr::from_wasmparser(self, expr?)?;525exprs.push(expr);526for func in escaped {527self.flag_func_escaped(func);528}529}530TableSegmentElements::Expressions(exprs.into())531}532};533534match kind {535ElementKind::Active {536table_index,537offset_expr,538} => {539let table_index = TableIndex::from_u32(table_index.unwrap_or(0));540let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;541debug_assert!(escaped.is_empty());542543self.result544.module545.table_initialization546.segments547.push(TableSegment {548table_index,549offset,550elements,551});552}553554ElementKind::Passive => {555let elem_index = ElemIndex::from_u32(index as u32);556let index = self.result.module.passive_elements.len();557self.result.module.passive_elements.push(elements);558self.result559.module560.passive_elements_map561.insert(elem_index, index);562}563564ElementKind::Declared => {}565}566}567}568569Payload::CodeSectionStart { count, range, .. } => {570self.validator.code_section_start(&range)?;571let cnt = usize::try_from(count).unwrap();572self.result.function_body_inputs.reserve_exact(cnt);573self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;574}575576Payload::CodeSectionEntry(body) => {577let validator = self.validator.code_section_entry(&body)?;578let func_index =579self.result.code_index + self.result.module.num_imported_funcs as u32;580let func_index = FuncIndex::from_u32(func_index);581582if self.tunables.generate_native_debuginfo {583let sig_index = self.result.module.functions[func_index]584.signature585.unwrap_module_type_index();586let sig = self.types[sig_index].unwrap_func();587let mut locals = Vec::new();588for pair in body.get_locals_reader()? {589let (cnt, ty) = pair?;590let ty = self.convert_valtype(ty)?;591locals.push((cnt, ty));592}593self.result594.debuginfo595.wasm_file596.funcs597.push(FunctionMetadata {598locals: locals.into_boxed_slice(),599params: sig.params().into(),600});601}602self.result603.function_body_inputs604.push(FunctionBodyData { validator, body });605self.result.code_index += 1;606}607608Payload::DataSection(data) => {609self.validator.data_section(&data)?;610611let initializers = match &mut self.result.module.memory_initialization {612MemoryInitialization::Segmented(i) => i,613_ => unreachable!(),614};615616let cnt = usize::try_from(data.count()).unwrap();617initializers.reserve_exact(cnt);618self.result.data.reserve_exact(cnt);619620for (index, entry) in data.into_iter().enumerate() {621let wasmparser::Data {622kind,623data,624range: _,625} = entry?;626let mk_range = |total: &mut u32| -> Result<_, WasmError> {627let range = u32::try_from(data.len())628.ok()629.and_then(|size| {630let start = *total;631let end = start.checked_add(size)?;632Some(start..end)633})634.ok_or_else(|| {635WasmError::Unsupported(format!(636"more than 4 gigabytes of data in wasm module",637))638})?;639*total += range.end - range.start;640Ok(range)641};642match kind {643DataKind::Active {644memory_index,645offset_expr,646} => {647let range = mk_range(&mut self.result.total_data)?;648let memory_index = MemoryIndex::from_u32(memory_index);649let (offset, escaped) = ConstExpr::from_wasmparser(self, offset_expr)?;650debug_assert!(escaped.is_empty());651652let initializers = match &mut self.result.module.memory_initialization {653MemoryInitialization::Segmented(i) => i,654_ => unreachable!(),655};656initializers.push(MemoryInitializer {657memory_index,658offset,659data: range,660});661self.result.data.push(data.into());662}663DataKind::Passive => {664let data_index = DataIndex::from_u32(index as u32);665let range = mk_range(&mut self.result.total_passive_data)?;666self.result.passive_data.push(data);667self.result668.module669.passive_data_map670.insert(data_index, range);671}672}673}674}675676Payload::DataCountSection { count, range } => {677self.validator.data_count_section(count, &range)?;678679// Note: the count passed in here is the *total* segment count680// There is no way to reserve for just the passive segments as681// they are discovered when iterating the data section entries682// Given that the total segment count might be much larger than683// the passive count, do not reserve anything here.684}685686Payload::CustomSection(s)687if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>688{689bail!(690"\691Support for interface types has temporarily been removed from `wasmtime`.692693For more information about this temporary change you can read on the issue online:694695https://github.com/bytecodealliance/wasmtime/issues/1271696697and for re-adding support for interface types you can see this issue:698699https://github.com/bytecodealliance/wasmtime/issues/677700"701)702}703704Payload::CustomSection(s) => {705self.register_custom_section(&s);706}707708// It's expected that validation will probably reject other709// payloads such as `UnknownSection` or those related to the710// component model. If, however, something gets past validation then711// that's a bug in Wasmtime as we forgot to implement something.712other => {713self.validator.payload(&other)?;714panic!("unimplemented section in wasm file {other:?}");715}716}717Ok(())718}719720fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {721match section.as_known() {722KnownCustom::Name(name) => {723let result = self.name_section(name);724if let Err(e) = result {725log::warn!("failed to parse name section {e:?}");726}727}728_ => {729let name = section.name().trim_end_matches(".dwo");730if name.starts_with(".debug_") {731self.dwarf_section(name, section);732}733}734}735}736737fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {738if !self.tunables.generate_native_debuginfo && !self.tunables.parse_wasm_debuginfo {739self.result.has_unparsed_debuginfo = true;740return;741}742let info = &mut self.result.debuginfo;743let dwarf = &mut info.dwarf;744let endian = gimli::LittleEndian;745let data = section.data();746let slice = gimli::EndianSlice::new(data, endian);747748match name {749// `gimli::Dwarf` fields.750".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),751".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),752".debug_info" => {753dwarf.debug_info = gimli::DebugInfo::new(data, endian);754}755".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),756".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),757".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),758".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),759".debug_str_sup" => {760let mut dwarf_sup: Dwarf<'data> = Default::default();761dwarf_sup.debug_str = gimli::DebugStr::from(slice);762dwarf.sup = Some(Arc::new(dwarf_sup));763}764".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),765766// Additional fields.767".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),768".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),769".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),770".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),771772// DWARF package fields773".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),774".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),775776// We don't use these at the moment.777".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,778other => {779log::warn!("unknown debug section `{other}`");780return;781}782}783784dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);785dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);786}787788/// Declares a new import with the `module` and `field` names, importing the789/// `ty` specified.790///791/// Note that this method is somewhat tricky due to the implementation of792/// the module linking proposal. In the module linking proposal two-level793/// imports are recast as single-level imports of instances. That recasting794/// happens here by recording an import of an instance for the first time795/// we see a two-level import.796///797/// When the module linking proposal is disabled, however, disregard this798/// logic and instead work directly with two-level imports since no799/// instances are defined.800fn declare_import(&mut self, module: &'data str, field: &'data str, ty: EntityType) {801let index = self.push_type(ty);802self.result.module.initializers.push(Initializer::Import {803name: module.to_owned(),804field: field.to_owned(),805index,806});807}808809fn push_type(&mut self, ty: EntityType) -> EntityIndex {810match ty {811EntityType::Function(ty) => EntityIndex::Function({812let func_index = self813.result814.module815.push_function(ty.unwrap_module_type_index());816// Imported functions can escape; in fact, they've already done817// so to get here.818self.flag_func_escaped(func_index);819func_index820}),821EntityType::Table(ty) => EntityIndex::Table(self.result.module.tables.push(ty)),822EntityType::Memory(ty) => EntityIndex::Memory(self.result.module.memories.push(ty)),823EntityType::Global(ty) => EntityIndex::Global(self.result.module.globals.push(ty)),824EntityType::Tag(ty) => EntityIndex::Tag(self.result.module.tags.push(ty)),825}826}827828fn flag_func_escaped(&mut self, func: FuncIndex) {829let ty = &mut self.result.module.functions[func];830// If this was already assigned a funcref index no need to re-assign it.831if ty.is_escaping() {832return;833}834let index = self.result.module.num_escaped_funcs as u32;835ty.func_ref = FuncRefIndex::from_u32(index);836self.result.module.num_escaped_funcs += 1;837}838839/// Parses the Name section of the wasm module.840fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {841for subsection in names {842match subsection? {843wasmparser::Name::Function(names) => {844for name in names {845let Naming { index, name } = name?;846// Skip this naming if it's naming a function that847// doesn't actually exist.848if (index as usize) >= self.result.module.functions.len() {849continue;850}851852// Store the name unconditionally, regardless of853// whether we're parsing debuginfo, since function854// names are almost always present in the855// final compilation artifact.856let index = FuncIndex::from_u32(index);857self.result858.debuginfo859.name_section860.func_names861.insert(index, name);862}863}864wasmparser::Name::Module { name, .. } => {865self.result.module.name = Some(name.to_string());866if self.tunables.generate_native_debuginfo {867self.result.debuginfo.name_section.module_name = Some(name);868}869}870wasmparser::Name::Local(reader) => {871if !self.tunables.generate_native_debuginfo {872continue;873}874for f in reader {875let f = f?;876// Skip this naming if it's naming a function that877// doesn't actually exist.878if (f.index as usize) >= self.result.module.functions.len() {879continue;880}881for name in f.names {882let Naming { index, name } = name?;883884self.result885.debuginfo886.name_section887.locals_names888.entry(FuncIndex::from_u32(f.index))889.or_insert(HashMap::new())890.insert(index, name);891}892}893}894wasmparser::Name::Label(_)895| wasmparser::Name::Type(_)896| wasmparser::Name::Table(_)897| wasmparser::Name::Global(_)898| wasmparser::Name::Memory(_)899| wasmparser::Name::Element(_)900| wasmparser::Name::Data(_)901| wasmparser::Name::Tag(_)902| wasmparser::Name::Field(_)903| wasmparser::Name::Unknown { .. } => {}904}905}906Ok(())907}908}909910impl TypeConvert for ModuleEnvironment<'_, '_> {911fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {912WasmparserTypeConverter::new(&self.types, |idx| {913self.result.module.types[idx].unwrap_module_type_index()914})915.lookup_heap_type(index)916}917918fn lookup_type_index(&self, index: wasmparser::UnpackedIndex) -> EngineOrModuleTypeIndex {919WasmparserTypeConverter::new(&self.types, |idx| {920self.result.module.types[idx].unwrap_module_type_index()921})922.lookup_type_index(index)923}924}925926impl ModuleTranslation<'_> {927/// Attempts to convert segmented memory initialization into static928/// initialization for the module that this translation represents.929///930/// If this module's memory initialization is not compatible with paged931/// initialization then this won't change anything. Otherwise if it is932/// compatible then the `memory_initialization` field will be updated.933///934/// Takes a `page_size` argument in order to ensure that all935/// initialization is page-aligned for mmap-ability, and936/// `max_image_size_always_allowed` to control how we decide937/// whether to use static init.938///939/// We will try to avoid generating very sparse images, which are940/// possible if e.g. a module has an initializer at offset 0 and a941/// very high offset (say, 1 GiB). To avoid this, we use a dual942/// condition: we always allow images less than943/// `max_image_size_always_allowed`, and the embedder of Wasmtime944/// can set this if desired to ensure that static init should945/// always be done if the size of the module or its heaps is946/// otherwise bounded by the system. We also allow images with947/// static init data bigger than that, but only if it is "dense",948/// defined as having at least half (50%) of its pages with some949/// data.950///951/// We could do something slightly better by building a dense part952/// and keeping a sparse list of outlier/leftover segments (see953/// issue #3820). This would also allow mostly-static init of954/// modules that have some dynamically-placed data segments. But,955/// for now, this is sufficient to allow a system that "knows what956/// it's doing" to always get static init.957pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {958// This method only attempts to transform a `Segmented` memory init959// into a `Static` one, no other state.960if !self.module.memory_initialization.is_segmented() {961return;962}963964// First a dry run of memory initialization is performed. This965// collects information about the extent of memory initialized for each966// memory as well as the size of all data segments being copied in.967struct Memory {968data_size: u64,969min_addr: u64,970max_addr: u64,971// The `usize` here is a pointer into `self.data` which is the list972// of data segments corresponding to what was found in the original973// wasm module.974segments: Vec<(usize, StaticMemoryInitializer)>,975}976let mut info = PrimaryMap::with_capacity(self.module.memories.len());977for _ in 0..self.module.memories.len() {978info.push(Memory {979data_size: 0,980min_addr: u64::MAX,981max_addr: 0,982segments: Vec::new(),983});984}985986struct InitMemoryAtCompileTime<'a> {987module: &'a Module,988info: &'a mut PrimaryMap<MemoryIndex, Memory>,989idx: usize,990}991impl InitMemory for InitMemoryAtCompileTime<'_> {992fn memory_size_in_bytes(993&mut self,994memory_index: MemoryIndex,995) -> Result<u64, SizeOverflow> {996self.module.memories[memory_index].minimum_byte_size()997}998999fn eval_offset(&mut self, memory_index: MemoryIndex, expr: &ConstExpr) -> Option<u64> {1000match (expr.ops(), self.module.memories[memory_index].idx_type) {1001(&[ConstOp::I32Const(offset)], IndexType::I32) => {1002Some(offset.cast_unsigned().into())1003}1004(&[ConstOp::I64Const(offset)], IndexType::I64) => Some(offset.cast_unsigned()),1005_ => None,1006}1007}10081009fn write(&mut self, memory: MemoryIndex, init: &StaticMemoryInitializer) -> bool {1010// Currently `Static` only applies to locally-defined memories,1011// so if a data segment references an imported memory then1012// transitioning to a `Static` memory initializer is not1013// possible.1014if self.module.defined_memory_index(memory).is_none() {1015return false;1016};1017let info = &mut self.info[memory];1018let data_len = u64::from(init.data.end - init.data.start);1019if data_len > 0 {1020info.data_size += data_len;1021info.min_addr = info.min_addr.min(init.offset);1022info.max_addr = info.max_addr.max(init.offset + data_len);1023info.segments.push((self.idx, init.clone()));1024}1025self.idx += 1;1026true1027}1028}1029let ok = self1030.module1031.memory_initialization1032.init_memory(&mut InitMemoryAtCompileTime {1033idx: 0,1034module: &self.module,1035info: &mut info,1036});1037if !ok {1038return;1039}10401041// Validate that the memory information collected is indeed valid for1042// static memory initialization.1043for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {1044let image_size = info.max_addr - info.min_addr;10451046// Simplify things for now by bailing out entirely if any memory has1047// a page size smaller than the host's page size. This fixes a case1048// where currently initializers are created in host-page-size units1049// of length which means that a larger-than-the-entire-memory1050// initializer can be created. This can be handled technically but1051// would require some more changes to help fix the assert elsewhere1052// that this protects against.1053if self.module.memories[i].page_size() < page_size {1054return;1055}10561057// If the range of memory being initialized is less than twice the1058// total size of the data itself then it's assumed that static1059// initialization is ok. This means we'll at most double memory1060// consumption during the memory image creation process, which is1061// currently assumed to "probably be ok" but this will likely need1062// tweaks over time.1063if image_size < info.data_size.saturating_mul(2) {1064continue;1065}10661067// If the memory initialization image is larger than the size of all1068// data, then we still allow memory initialization if the image will1069// be of a relatively modest size, such as 1MB here.1070if image_size < max_image_size_always_allowed {1071continue;1072}10731074// At this point memory initialization is concluded to be too1075// expensive to do at compile time so it's entirely deferred to1076// happen at runtime.1077return;1078}10791080// Here's where we've now committed to changing to static memory. The1081// memory initialization image is built here from the page data and then1082// it's converted to a single initializer.1083let data = mem::replace(&mut self.data, Vec::new());1084let mut map = PrimaryMap::with_capacity(info.len());1085let mut module_data_size = 0u32;1086for (memory, info) in info.iter() {1087// Create the in-memory `image` which is the initialized contents of1088// this linear memory.1089let extent = if info.segments.len() > 0 {1090(info.max_addr - info.min_addr) as usize1091} else {109201093};1094let mut image = Vec::with_capacity(extent);1095for (idx, init) in info.segments.iter() {1096let data = &data[*idx];1097assert_eq!(data.len(), init.data.len());1098let offset = usize::try_from(init.offset - info.min_addr).unwrap();1099if image.len() < offset {1100image.resize(offset, 0u8);1101image.extend_from_slice(data);1102} else {1103image.splice(1104offset..(offset + data.len()).min(image.len()),1105data.iter().copied(),1106);1107}1108}1109assert_eq!(image.len(), extent);1110assert_eq!(image.capacity(), extent);1111let mut offset = if info.segments.len() > 0 {1112info.min_addr1113} else {111401115};11161117// Chop off trailing zeros from the image as memory is already1118// zero-initialized. Note that `i` is the position of a nonzero1119// entry here, so to not lose it we truncate to `i + 1`.1120if let Some(i) = image.iter().rposition(|i| *i != 0) {1121image.truncate(i + 1);1122}11231124// Also chop off leading zeros, if any.1125if let Some(i) = image.iter().position(|i| *i != 0) {1126offset += i as u64;1127image.drain(..i);1128}1129let mut len = u64::try_from(image.len()).unwrap();11301131// The goal is to enable mapping this image directly into memory, so1132// the offset into linear memory must be a multiple of the page1133// size. If that's not already the case then the image is padded at1134// the front and back with extra zeros as necessary1135if offset % page_size != 0 {1136let zero_padding = offset % page_size;1137self.data.push(vec![0; zero_padding as usize].into());1138offset -= zero_padding;1139len += zero_padding;1140}1141self.data.push(image.into());1142if len % page_size != 0 {1143let zero_padding = page_size - (len % page_size);1144self.data.push(vec![0; zero_padding as usize].into());1145len += zero_padding;1146}11471148// Offset/length should now always be page-aligned.1149assert!(offset % page_size == 0);1150assert!(len % page_size == 0);11511152// Create the `StaticMemoryInitializer` which describes this image,1153// only needed if the image is actually present and has a nonzero1154// length. The `offset` has been calculates above, originally1155// sourced from `info.min_addr`. The `data` field is the extent1156// within the final data segment we'll emit to an ELF image, which1157// is the concatenation of `self.data`, so here it's the size of1158// the section-so-far plus the current segment we're appending.1159let len = u32::try_from(len).unwrap();1160let init = if len > 0 {1161Some(StaticMemoryInitializer {1162offset,1163data: module_data_size..module_data_size + len,1164})1165} else {1166None1167};1168let idx = map.push(init);1169assert_eq!(idx, memory);1170module_data_size += len;1171}1172self.data_align = Some(page_size);1173self.module.memory_initialization = MemoryInitialization::Static { map };1174}11751176/// Attempts to convert the module's table initializers to1177/// FuncTable form where possible. This enables lazy table1178/// initialization later by providing a one-to-one map of initial1179/// table values, without having to parse all segments.1180pub fn try_func_table_init(&mut self) {1181// This should be large enough to support very large Wasm1182// modules with huge funcref tables, but small enough to avoid1183// OOMs or DoS on truly sparse tables.1184const MAX_FUNC_TABLE_SIZE: u64 = 1024 * 1024;11851186// First convert any element-initialized tables to images of just that1187// single function if the minimum size of the table allows doing so.1188for ((_, init), (_, table)) in self1189.module1190.table_initialization1191.initial_values1192.iter_mut()1193.zip(1194self.module1195.tables1196.iter()1197.skip(self.module.num_imported_tables),1198)1199{1200let table_size = table.limits.min;1201if table_size > MAX_FUNC_TABLE_SIZE {1202continue;1203}1204if let TableInitialValue::Expr(expr) = init {1205if let [ConstOp::RefFunc(f)] = expr.ops() {1206*init = TableInitialValue::Null {1207precomputed: vec![*f; table_size as usize],1208};1209}1210}1211}12121213let mut segments = mem::take(&mut self.module.table_initialization.segments)1214.into_iter()1215.peekable();12161217// The goal of this loop is to interpret a table segment and apply it1218// "statically" to a local table. This will iterate over segments and1219// apply them one-by-one to each table.1220//1221// If any segment can't be applied, however, then this loop exits and1222// all remaining segments are placed back into the segment list. This is1223// because segments are supposed to be initialized one-at-a-time which1224// means that intermediate state is visible with respect to traps. If1225// anything isn't statically known to not trap it's pessimistically1226// assumed to trap meaning all further segment initializers must be1227// applied manually at instantiation time.1228while let Some(segment) = segments.peek() {1229let defined_index = match self.module.defined_table_index(segment.table_index) {1230Some(index) => index,1231// Skip imported tables: we can't provide a preconstructed1232// table for them, because their values depend on the1233// imported table overlaid with whatever segments we have.1234None => break,1235};12361237// If the base of this segment is dynamic, then we can't1238// include it in the statically-built array of initial1239// contents.1240let offset = match segment.offset.ops() {1241&[ConstOp::I32Const(offset)] => u64::from(offset.cast_unsigned()),1242&[ConstOp::I64Const(offset)] => offset.cast_unsigned(),1243_ => break,1244};12451246// Get the end of this segment. If out-of-bounds, or too1247// large for our dense table representation, then skip the1248// segment.1249let top = match offset.checked_add(segment.elements.len()) {1250Some(top) => top,1251None => break,1252};1253let table_size = self.module.tables[segment.table_index].limits.min;1254if top > table_size || top > MAX_FUNC_TABLE_SIZE {1255break;1256}12571258match self.module.tables[segment.table_index]1259.ref_type1260.heap_type1261.top()1262{1263WasmHeapTopType::Func => {}1264// If this is not a funcref table, then we can't support a1265// pre-computed table of function indices. Technically this1266// initializer won't trap so we could continue processing1267// segments, but that's left as a future optimization if1268// necessary.1269WasmHeapTopType::Any1270| WasmHeapTopType::Extern1271| WasmHeapTopType::Cont1272| WasmHeapTopType::Exn => break,1273}12741275// Function indices can be optimized here, but fully general1276// expressions are deferred to get evaluated at runtime.1277let function_elements = match &segment.elements {1278TableSegmentElements::Functions(indices) => indices,1279TableSegmentElements::Expressions(_) => break,1280};12811282let precomputed =1283match &mut self.module.table_initialization.initial_values[defined_index] {1284TableInitialValue::Null { precomputed } => precomputed,12851286// If this table is still listed as an initial value here1287// then that means the initial size of the table doesn't1288// support a precomputed function list, so skip this.1289// Technically this won't trap so it's possible to process1290// further initializers, but that's left as a future1291// optimization.1292TableInitialValue::Expr(_) => break,1293};12941295// At this point we're committing to pre-initializing the table1296// with the `segment` that's being iterated over. This segment is1297// applied to the `precomputed` list for the table by ensuring1298// it's large enough to hold the segment and then copying the1299// segment into the precomputed list.1300if precomputed.len() < top as usize {1301precomputed.resize(top as usize, FuncIndex::reserved_value());1302}1303let dst = &mut precomputed[offset as usize..top as usize];1304dst.copy_from_slice(&function_elements);13051306// advance the iterator to see the next segment1307let _ = segments.next();1308}1309self.module.table_initialization.segments = segments.collect();1310}1311}131213131314