use crate::InstanceState;1use crate::info::ModuleContext;2use rayon::iter::{IntoParallelIterator, ParallelExtend, ParallelIterator};3use std::convert::TryFrom;4use std::ops::Range;56/// The maximum number of data segments that we will emit. Most7/// engines support more than this, but we want to leave some8/// headroom.9const MAX_DATA_SEGMENTS: usize = 10_000;1011/// A "snapshot" of Wasm state from its default value after having been initialized.12pub struct Snapshot {13/// Maps global index to its initialized value.14///15/// Note that this only tracks defined mutable globals, not all globals.16pub globals: Vec<(u32, SnapshotVal)>,1718/// A new minimum size for each memory (in units of pages).19pub memory_mins: Vec<u64>,2021/// Segments of non-zero memory.22pub data_segments: Vec<DataSegment>,23}2425/// A value from a snapshot, currently a subset of wasm types that aren't26/// reference types.27#[expect(missing_docs, reason = "self-describing variants")]28pub enum SnapshotVal {29I32(i32),30I64(i64),31F32(u32),32F64(u64),33V128(u128),34}3536/// A data segment initializer for a memory.37#[derive(Clone)]38pub struct DataSegment {39/// The index of this data segment's memory.40pub memory_index: u32,4142/// This data segment's initialized memory that it originated from.43pub data: Vec<u8>,4445/// The offset within the memory that `data` should be copied to.46pub offset: u64,4748/// Whether or not `memory_index` is a 64-bit memory.49pub is64: bool,50}5152/// Snapshot the given instance's globals, memories, and instances from the Wasm53/// defaults.54//55// TODO: when we support reference types, we will have to snapshot tables.56pub async fn snapshot(module: &ModuleContext<'_>, ctx: &mut impl InstanceState) -> Snapshot {57log::debug!("Snapshotting the initialized state");5859let globals = snapshot_globals(module, ctx).await;60let (memory_mins, data_segments) = snapshot_memories(module, ctx).await;6162Snapshot {63globals,64memory_mins,65data_segments,66}67}6869/// Get the initialized values of all globals.70async fn snapshot_globals(71module: &ModuleContext<'_>,72ctx: &mut impl InstanceState,73) -> Vec<(u32, SnapshotVal)> {74log::debug!("Snapshotting global values");7576let mut ret = Vec::new();77for (i, name) in module.defined_global_exports.as_ref().unwrap().iter() {78let val = ctx.global_get(&name).await;79ret.push((*i, val));80}81ret82}8384#[derive(Clone)]85struct DataSegmentRange {86memory_index: u32,87range: Range<usize>,88}8990impl DataSegmentRange {91/// What is the gap between two consecutive data segments?92///93/// `self` must be in front of `other` and they must not overlap with each94/// other.95fn gap(&self, other: &Self) -> usize {96debug_assert_eq!(self.memory_index, other.memory_index);97debug_assert!(self.range.end <= other.range.start);98other.range.start - self.range.end99}100101/// Merge two consecutive data segments.102///103/// `self` must be in front of `other` and they must not overlap with each104/// other.105fn merge(&mut self, other: &Self) {106debug_assert_eq!(self.memory_index, other.memory_index);107debug_assert!(self.range.end <= other.range.start);108self.range.end = other.range.end;109}110}111112/// Find the initialized minimum page size of each memory, as well as all113/// regions of non-zero memory.114async fn snapshot_memories(115module: &ModuleContext<'_>,116instance: &mut impl InstanceState,117) -> (Vec<u64>, Vec<DataSegment>) {118log::debug!("Snapshotting memories");119120// Find and record non-zero regions of memory (in parallel).121let mut memory_mins = vec![];122let mut data_segments = vec![];123let iter = module124.defined_memories()125.zip(module.defined_memory_exports.as_ref().unwrap());126for ((memory_index, ty), name) in iter {127instance128.memory_contents(&name, |memory| {129let page_size = 1 << ty.page_size_log2.unwrap_or(16);130let num_wasm_pages = memory.len() / page_size;131memory_mins.push(num_wasm_pages as u64);132133let memory_data = &memory[..];134135// Consider each Wasm page in parallel. Create data segments for each136// region of non-zero memory.137data_segments.par_extend((0..num_wasm_pages).into_par_iter().flat_map(|i| {138let page_end = (i + 1) * page_size;139let mut start = i * page_size;140let mut segments = vec![];141while start < page_end {142let nonzero = match memory_data[start..page_end]143.iter()144.position(|byte| *byte != 0)145{146None => break,147Some(i) => i,148};149start += nonzero;150let end = memory_data[start..page_end]151.iter()152.position(|byte| *byte == 0)153.map_or(page_end, |zero| start + zero);154segments.push(DataSegmentRange {155memory_index,156range: start..end,157});158start = end;159}160segments161}));162})163.await;164}165166if data_segments.is_empty() {167return (memory_mins, Vec::new());168}169170// Sort data segments to enforce determinism in the face of the171// parallelism above.172data_segments.sort_by_key(|s| (s.memory_index, s.range.start));173174// Merge any contiguous segments (caused by spanning a Wasm page boundary,175// and therefore created in separate logical threads above) or pages that176// are within four bytes of each other. Four because this is the minimum177// overhead of defining a new active data segment: one for the memory index178// LEB, two for the memory offset init expression (one for the `i32.const`179// opcode and another for the constant immediate LEB), and finally one for180// the data length LEB).181const MIN_ACTIVE_SEGMENT_OVERHEAD: usize = 4;182let mut merged_data_segments = Vec::with_capacity(data_segments.len());183merged_data_segments.push(data_segments[0].clone());184for b in &data_segments[1..] {185let a = merged_data_segments.last_mut().unwrap();186187// Only merge segments for the same memory.188if a.memory_index != b.memory_index {189merged_data_segments.push(b.clone());190continue;191}192193// Only merge segments if they are contiguous or if it is definitely194// more size efficient than leaving them apart.195let gap = a.gap(b);196if gap > MIN_ACTIVE_SEGMENT_OVERHEAD {197merged_data_segments.push(b.clone());198continue;199}200201// Okay, merge them together into `a` (so that the next iteration can202// merge it with its predecessor) and then omit `b`!203a.merge(b);204}205206remove_excess_segments(&mut merged_data_segments);207208// With the final set of data segments now extract the actual data of each209// memory, copying it into a `DataSegment`, to return the final list of210// segments.211//212// Here the memories are iterated over again and, in tandem, the213// `merged_data_segments` list is traversed to extract a `DataSegment` for214// each range that `merged_data_segments` indicates. This relies on215// `merged_data_segments` being a sorted list by `memory_index` at least.216let mut final_data_segments = Vec::with_capacity(merged_data_segments.len());217let mut merged = merged_data_segments.iter().peekable();218let iter = module219.defined_memories()220.zip(module.defined_memory_exports.as_ref().unwrap());221for ((memory_index, ty), name) in iter {222instance223.memory_contents(&name, |memory| {224while let Some(segment) = merged.next_if(|s| s.memory_index == memory_index) {225final_data_segments.push(DataSegment {226memory_index,227data: memory[segment.range.clone()].to_vec(),228offset: segment.range.start.try_into().unwrap(),229is64: ty.memory64,230});231}232})233.await;234}235assert!(merged.next().is_none());236237(memory_mins, final_data_segments)238}239240/// Engines apply a limit on how many segments a module may contain, and Wizer241/// can run afoul of it. When that happens, we need to merge data segments242/// together until our number of data segments fits within the limit.243fn remove_excess_segments(merged_data_segments: &mut Vec<DataSegmentRange>) {244if merged_data_segments.len() < MAX_DATA_SEGMENTS {245return;246}247248// We need to remove `excess` number of data segments.249let excess = merged_data_segments.len() - MAX_DATA_SEGMENTS;250251#[derive(Clone, Copy, PartialEq, Eq)]252struct GapIndex {253gap: u32,254// Use a `u32` instead of `usize` to fit `GapIndex` within a word on255// 64-bit systems, using less memory.256index: u32,257}258259// Find the gaps between the start of one segment and the next (if they are260// both in the same memory). We will merge the `excess` segments with the261// smallest gaps together. Because they are the smallest gaps, this will262// bloat the size of our data segment the least.263let mut smallest_gaps = Vec::with_capacity(merged_data_segments.len() - 1);264for (index, w) in merged_data_segments.windows(2).enumerate() {265if w[0].memory_index != w[1].memory_index {266continue;267}268let gap = match u32::try_from(w[0].gap(&w[1])) {269Ok(gap) => gap,270// If the gap is larger than 4G then don't consider these two data271// segments for merging and assume there's enough other data272// segments close enough together to still consider for merging to273// get under the limit.274Err(_) => continue,275};276let index = u32::try_from(index).unwrap();277smallest_gaps.push(GapIndex { gap, index });278}279smallest_gaps.sort_unstable_by_key(|g| g.gap);280smallest_gaps.truncate(excess);281282// Now merge the chosen segments together in reverse index order so that283// merging two segments doesn't mess up the index of the next segments we284// will to merge.285smallest_gaps.sort_unstable_by(|a, b| a.index.cmp(&b.index).reverse());286for GapIndex { index, .. } in smallest_gaps {287let index = usize::try_from(index).unwrap();288let [a, b] = merged_data_segments289.get_disjoint_mut([index, index + 1])290.unwrap();291a.merge(b);292293// Okay to use `swap_remove` here because, even though it makes294// `merged_data_segments` unsorted, the segments are still sorted within295// the range `0..index` and future iterations will only operate within296// that subregion because we are iterating over largest to smallest297// indices.298merged_data_segments.swap_remove(index + 1);299}300301// Finally, sort the data segments again so that our output is302// deterministic.303merged_data_segments.sort_by_key(|s| (s.memory_index, s.range.start));304}305306307