Path: blob/main/crates/environ/src/compile/stack_maps.rs
1693 views
use crate::obj::ELF_WASMTIME_STACK_MAP;1use crate::prelude::*;2use cranelift_bitset::CompoundBitSet;3use object::write::{Object, StandardSegment};4use object::{LittleEndian, SectionKind, U32Bytes};56/// Builder for the `ELF_WASMTIME_STACK_MAP` section in compiled executables.7///8/// This format is parsed by `crate::stack_map`.9///10/// The current layout of the format is:11///12/// ```text13/// ┌─────────────────────┬───── 0x00 (relative, not necessarily aligned)14/// │ count: 4-byte LE │15/// ├─────────────────────┼───── 0x0416/// │ pc1: 4-byte LE │17/// │ pc2: 4-byte LE │18/// │ ... │19/// │ pcN: 4-byte LE │20/// ├─────────────────────┼───── 0x04 + 4 * count21/// │ offset1: 4-byte LE │22/// │ offset1: 4-byte LE │23/// │ ... │24/// │ offsetN: 4-byte LE │25/// ├─────────────────────┼───── 0x04 + 8 * count26/// │ data[0]: 4-byte LE │27/// │ data[1]: 4-byte LE │28/// │ ... │29/// │ data[M]: 4-byte LE │30/// └─────────────────────┴───── 0x04 + 8 * count + 4 * M31/// ```32///33/// Here `count` is the size of the `pcN` and `offsetN` arrays. The two arrays34/// are the same size and have corresponding entries in one another. When35/// looking up a stack map for a particular program counter:36///37/// * A binary search is performed on the `pcN` array.38/// * The corresponding `offsetM` value is looked up once the `pcM` entry,39/// matching the lookup pc, is found.40/// * The `offsetM` value is used to access `data[offsetM]` which is an array of41/// 4-byte entries located after the `offset*` array. This stack map is then42/// encoded as below.43///44/// This encoding scheme is chosen so parsing this data structure effectively45/// isn't required. It's usable at-rest from a compiled artifact in a section of46/// an executable. Notably having offsets into the data array means that a stack47/// map is just a slice into the data array, and the entire data structure can48/// be "parsed" by reading `count` and otherwise just making sure various49/// offsets are in-bounds.50///51/// A stack map located at `data[offsetM]` is encoded as:52///53/// ```text54/// ┌───────────────────────────────────────────────────────┐55/// │ data[offsetM + 0]: frame_size: 4-byte LE │56/// ├───────────────────────────────────────────────────────┤57/// │ data[offsetM + 1]: count: 4-byte LE │58/// ├───────────────────────────────────────────────────────┤59/// │ data[offsetM + 2 + 0]: bitmap: 4-byte LE │60/// │ data[offsetM + 2 + 1]: bitmap: 4-byte LE │61/// │ ... │62/// │ data[offsetM + 2 + count - 1]: bitmap: 4-byte LE │63/// └───────────────────────────────────────────────────────┘64/// ```65///66/// Here `frame_size` and `count` are always greater than 0. Entries in the bit67/// map represent `stack_slot / 4` so must be multiplied by 4 to get the actual68/// stack offset entry. This is because all stack slots are aligned at 4 bytes69/// so by dividing them all by 4 we're able to compress the bit map that much70/// more.71#[derive(Default)]72pub struct StackMapSection {73pcs: Vec<U32Bytes<LittleEndian>>,74pointers_to_stack_map: Vec<U32Bytes<LittleEndian>>,75stack_map_data: Vec<U32Bytes<LittleEndian>>,76last_offset: u32,77}7879impl StackMapSection {80/// Appends stack map information for `code_offset` which has the specified81/// `frame_size` and `frame_offsets` are the active GC references.82pub fn push(83&mut self,84code_offset: u64,85frame_size: u32,86frame_offsets: impl ExactSizeIterator<Item = u32>,87) {88// NB: for now this only supports <=4GB text sections in object files.89// Alternative schemes will need to be created for >32-bit offsets to90// avoid making this section overly large.91let code_offset = u32::try_from(code_offset).unwrap();9293// Sanity-check to ensure that functions are pushed in-order, otherwise94// the `pcs` array won't be sorted which is our goal.95assert!(code_offset >= self.last_offset);96self.last_offset = code_offset;9798// Skip encoding information for this code offset if there's not99// actually anything in the stack map.100if frame_offsets.len() == 0 {101return;102}103104// Record parallel entries in `pcs`/`pointers_to_stack_map`.105self.pcs.push(U32Bytes::new(LittleEndian, code_offset));106self.pointers_to_stack_map.push(U32Bytes::new(107LittleEndian,108u32::try_from(self.stack_map_data.len()).unwrap(),109));110111// The frame data starts with the frame size and is then followed by112// `offsets` represented as a bit set.113self.stack_map_data114.push(U32Bytes::new(LittleEndian, frame_size));115116let mut bits = CompoundBitSet::<u32>::default();117for offset in frame_offsets {118assert!(offset % 4 == 0);119bits.insert((offset / 4) as usize);120}121let count = bits.iter_scalars().count();122self.stack_map_data123.push(U32Bytes::new(LittleEndian, count as u32));124for scalar in bits.iter_scalars() {125self.stack_map_data126.push(U32Bytes::new(LittleEndian, scalar.0));127}128}129130/// Finishes encoding this section into the `Object` provided.131pub fn append_to(self, obj: &mut Object) {132// Don't append anything for this section if there weren't any actual133// stack maps present, no need to waste space!134if self.pcs.is_empty() {135return;136}137let section = obj.add_section(138obj.segment_name(StandardSegment::Data).to_vec(),139ELF_WASMTIME_STACK_MAP.as_bytes().to_vec(),140SectionKind::ReadOnlyData,141);142143// NB: this matches the encoding expected by `lookup` in the144// `crate::stack_maps` module.145let amt = u32::try_from(self.pcs.len()).unwrap();146obj.append_section_data(section, &amt.to_le_bytes(), 1);147obj.append_section_data(section, object::bytes_of_slice(&self.pcs), 1);148obj.append_section_data(149section,150object::bytes_of_slice(&self.pointers_to_stack_map),1511,152);153obj.append_section_data(section, object::bytes_of_slice(&self.stack_map_data), 1);154}155}156157#[cfg(test)]158mod tests {159use super::*;160use crate::stack_map::StackMap;161use object::{Object, ObjectSection};162163fn roundtrip(maps: &[(u64, u32, &[u32])]) {164let mut section = StackMapSection::default();165for (pc, frame, offsets) in maps {166println!("append {pc}");167section.push(*pc, *frame, offsets.iter().copied());168}169let mut object = object::write::Object::new(170object::BinaryFormat::Elf,171object::Architecture::X86_64,172object::Endianness::Little,173);174section.append_to(&mut object);175let elf = object.write().unwrap();176177let image = object::File::parse(&elf[..]).unwrap();178let data = image179.sections()180.find(|s| s.name().ok() == Some(ELF_WASMTIME_STACK_MAP))181.unwrap()182.data()183.unwrap();184185for (pc, frame, offsets) in maps {186println!("lookup {pc}");187let map = match StackMap::lookup(*pc as u32, data) {188Some(map) => map,189None => {190assert!(offsets.is_empty());191continue;192}193};194assert_eq!(map.frame_size(), *frame);195196let map_offsets = map.offsets().collect::<Vec<_>>();197assert_eq!(map_offsets, *offsets);198}199200let mut expected = maps.iter();201'outer: for (pc, map) in StackMap::iter(data).unwrap() {202while let Some((expected_pc, expected_frame, expected_offsets)) = expected.next() {203if expected_offsets.is_empty() {204continue;205}206assert_eq!(*expected_pc, u64::from(pc));207assert_eq!(*expected_frame, map.frame_size());208let offsets = map.offsets().collect::<Vec<_>>();209assert_eq!(offsets, *expected_offsets);210continue 'outer;211}212panic!("didn't find {pc:#x} in expected list");213}214assert!(expected.next().is_none());215}216217#[test]218fn roundtrip_many() {219roundtrip(&[(0, 4, &[0])]);220roundtrip(&[221(0, 4, &[0]),222(4, 200, &[0, 4, 20, 180]),223(200, 20, &[12]),224(600, 0, &[]),225(800, 20, &[0, 4, 8, 12, 16]),226(1200, 2000, &[1800, 1804, 1808, 1900]),227]);228}229}230231232