Path: blob/main/cranelift/codegen/src/alias_analysis.rs
1693 views
//! Alias analysis, consisting of a "last store" pass and a "memory1//! values" pass. These two passes operate as one fused pass, and so2//! are implemented together here.3//!4//! We partition memory state into several *disjoint pieces* of5//! "abstract state". There are a finite number of such pieces:6//! currently, we call them "heap", "table", "vmctx", and "other".Any7//! given address in memory belongs to exactly one disjoint piece.8//!9//! One never tracks which piece a concrete address belongs to at10//! runtime; this is a purely static concept. Instead, all11//! memory-accessing instructions (loads and stores) are labeled with12//! one of these four categories in the `MemFlags`. It is forbidden13//! for a load or store to access memory under one category and a14//! later load or store to access the same memory under a different15//! category. This is ensured to be true by construction during16//! frontend translation into CLIF and during legalization.17//!18//! Given that this non-aliasing property is ensured by the producer19//! of CLIF, we can compute a *may-alias* property: one load or store20//! may-alias another load or store if both access the same category21//! of abstract state.22//!23//! The "last store" pass helps to compute this aliasing: it scans the24//! code, finding at each program point the last instruction that25//! *might have* written to a given part of abstract state.26//!27//! We can't say for sure that the "last store" *did* actually write28//! that state, but we know for sure that no instruction *later* than29//! it (up to the current instruction) did. However, we can get a30//! must-alias property from this: if at a given load or store, we31//! look backward to the "last store", *AND* we find that it has32//! exactly the same address expression and type, then we know that33//! the current instruction's access *must* be to the same memory34//! location.35//!36//! To get this must-alias property, we compute a sparse table of37//! "memory values": these are known equivalences between SSA `Value`s38//! and particular locations in memory. The memory-values table is a39//! mapping from (last store, address expression, type) to SSA40//! value. At a store, we can insert into this table directly. At a41//! load, we can also insert, if we don't already have a value (from42//! the store that produced the load's value).43//!44//! Then we can do two optimizations at once given this table. If a45//! load accesses a location identified by a (last store, address,46//! type) key already in the table, we replace it with the SSA value47//! for that memory location. This is usually known as "redundant load48//! elimination" if the value came from an earlier load of the same49//! location, or "store-to-load forwarding" if the value came from an50//! earlier store to the same location.51//!52//! In theory we could also do *dead-store elimination*, where if a53//! store overwrites a key in the table, *and* if no other load/store54//! to the abstract state category occurred, *and* no other trapping55//! instruction occurred (at which point we need an up-to-date memory56//! state because post-trap-termination memory state can be observed),57//! *and* we can prove the original store could not have trapped, then58//! we can eliminate the original store. Because this is so complex,59//! and the conditions for doing it correctly when post-trap state60//! must be correct likely reduce the potential benefit, we don't yet61//! do this.6263use crate::{64cursor::{Cursor, FuncCursor},65dominator_tree::DominatorTree,66inst_predicates::{67has_memory_fence_semantics, inst_addr_offset_type, inst_store_data, visit_block_succs,68},69ir::{AliasRegion, Block, Function, Inst, Opcode, Type, Value, immediates::Offset32},70trace,71};72use cranelift_entity::{EntityRef, packed_option::PackedOption};73use rustc_hash::{FxHashMap, FxHashSet};7475/// For a given program point, the vector of last-store instruction76/// indices for each disjoint category of abstract state.77#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]78pub struct LastStores {79heap: PackedOption<Inst>,80table: PackedOption<Inst>,81vmctx: PackedOption<Inst>,82other: PackedOption<Inst>,83}8485impl LastStores {86fn update(&mut self, func: &Function, inst: Inst) {87let opcode = func.dfg.insts[inst].opcode();88if has_memory_fence_semantics(opcode) {89self.heap = inst.into();90self.table = inst.into();91self.vmctx = inst.into();92self.other = inst.into();93} else if opcode.can_store() {94if let Some(memflags) = func.dfg.insts[inst].memflags() {95match memflags.alias_region() {96None => self.other = inst.into(),97Some(AliasRegion::Heap) => self.heap = inst.into(),98Some(AliasRegion::Table) => self.table = inst.into(),99Some(AliasRegion::Vmctx) => self.vmctx = inst.into(),100}101} else {102self.heap = inst.into();103self.table = inst.into();104self.vmctx = inst.into();105self.other = inst.into();106}107}108}109110fn get_last_store(&self, func: &Function, inst: Inst) -> PackedOption<Inst> {111if let Some(memflags) = func.dfg.insts[inst].memflags() {112match memflags.alias_region() {113None => self.other,114Some(AliasRegion::Heap) => self.heap,115Some(AliasRegion::Table) => self.table,116Some(AliasRegion::Vmctx) => self.vmctx,117}118} else if func.dfg.insts[inst].opcode().can_load()119|| func.dfg.insts[inst].opcode().can_store()120{121inst.into()122} else {123PackedOption::default()124}125}126127fn meet_from(&mut self, other: &LastStores, loc: Inst) {128let meet = |a: PackedOption<Inst>, b: PackedOption<Inst>| -> PackedOption<Inst> {129match (a.into(), b.into()) {130(None, None) => None.into(),131(Some(a), None) => a,132(None, Some(b)) => b,133(Some(a), Some(b)) if a == b => a,134_ => loc.into(),135}136};137138self.heap = meet(self.heap, other.heap);139self.table = meet(self.table, other.table);140self.vmctx = meet(self.vmctx, other.vmctx);141self.other = meet(self.other, other.other);142}143}144145/// A key identifying a unique memory location.146///147/// For the result of a load to be equivalent to the result of another148/// load, or the store data from a store, we need for (i) the149/// "version" of memory (here ensured by having the same last store150/// instruction to touch the disjoint category of abstract state we're151/// accessing); (ii) the address must be the same (here ensured by152/// having the same SSA value, which doesn't change after computed);153/// (iii) the offset must be the same; and (iv) the accessed type and154/// extension mode (e.g., 8-to-32, signed) must be the same.155#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]156struct MemoryLoc {157last_store: PackedOption<Inst>,158address: Value,159offset: Offset32,160ty: Type,161/// We keep the *opcode* of the instruction that produced the162/// value we record at this key if the opcode is anything other163/// than an ordinary load or store. This is needed when we164/// consider loads that extend the value: e.g., an 8-to-32165/// sign-extending load will produce a 32-bit value from an 8-bit166/// value in memory, so we can only reuse that (as part of RLE)167/// for another load with the same extending opcode.168///169/// We could improve the transform to insert explicit extend ops170/// in place of extending loads when we know the memory value, but171/// we haven't yet done this.172extending_opcode: Option<Opcode>,173}174175/// An alias-analysis pass.176pub struct AliasAnalysis<'a> {177/// The domtree for the function.178domtree: &'a DominatorTree,179180/// Input state to a basic block.181block_input: FxHashMap<Block, LastStores>,182183/// Known memory-value equivalences. This is the result of the184/// analysis. This is a mapping from (last store, address185/// expression, offset, type) to SSA `Value`.186///187/// We keep the defining inst around for quick dominance checks.188mem_values: FxHashMap<MemoryLoc, (Inst, Value)>,189}190191impl<'a> AliasAnalysis<'a> {192/// Perform an alias analysis pass.193pub fn new(func: &Function, domtree: &'a DominatorTree) -> AliasAnalysis<'a> {194trace!("alias analysis: input is:\n{:?}", func);195let mut analysis = AliasAnalysis {196domtree,197block_input: FxHashMap::default(),198mem_values: FxHashMap::default(),199};200201analysis.compute_block_input_states(func);202analysis203}204205fn compute_block_input_states(&mut self, func: &Function) {206let mut queue = vec![];207let mut queue_set = FxHashSet::default();208let entry = func.layout.entry_block().unwrap();209queue.push(entry);210queue_set.insert(entry);211212while let Some(block) = queue.pop() {213queue_set.remove(&block);214let mut state = *self215.block_input216.entry(block)217.or_insert_with(|| LastStores::default());218219trace!(220"alias analysis: input to block{} is {:?}",221block.index(),222state223);224225for inst in func.layout.block_insts(block) {226state.update(func, inst);227trace!("after inst{}: state is {:?}", inst.index(), state);228}229230visit_block_succs(func, block, |_inst, succ, _from_table| {231let succ_first_inst = func.layout.block_insts(succ).next().unwrap();232let updated = match self.block_input.get_mut(&succ) {233Some(succ_state) => {234let old = *succ_state;235succ_state.meet_from(&state, succ_first_inst);236*succ_state != old237}238None => {239self.block_input.insert(succ, state);240true241}242};243244if updated && queue_set.insert(succ) {245queue.push(succ);246}247});248}249}250251/// Get the starting state for a block.252pub fn block_starting_state(&self, block: Block) -> LastStores {253self.block_input254.get(&block)255.cloned()256.unwrap_or_else(|| LastStores::default())257}258259/// Process one instruction. Meant to be invoked in program order260/// within a block, and ideally in RPO or at least some domtree261/// preorder for maximal reuse.262///263/// Returns `true` if instruction was removed.264pub fn process_inst(265&mut self,266func: &mut Function,267state: &mut LastStores,268inst: Inst,269) -> Option<Value> {270trace!(271"alias analysis: scanning at inst{} with state {:?} ({:?})",272inst.index(),273state,274func.dfg.insts[inst],275);276277let replacing_value = if let Some((address, offset, ty)) = inst_addr_offset_type(func, inst)278{279let address = func.dfg.resolve_aliases(address);280let opcode = func.dfg.insts[inst].opcode();281282if opcode.can_store() {283let store_data = inst_store_data(func, inst).unwrap();284let store_data = func.dfg.resolve_aliases(store_data);285let mem_loc = MemoryLoc {286last_store: inst.into(),287address,288offset,289ty,290extending_opcode: get_ext_opcode(opcode),291};292trace!(293"alias analysis: at inst{}: store with data v{} at loc {:?}",294inst.index(),295store_data.index(),296mem_loc297);298self.mem_values.insert(mem_loc, (inst, store_data));299300None301} else if opcode.can_load() {302let last_store = state.get_last_store(func, inst);303let load_result = func.dfg.inst_results(inst)[0];304let mem_loc = MemoryLoc {305last_store,306address,307offset,308ty,309extending_opcode: get_ext_opcode(opcode),310};311trace!(312"alias analysis: at inst{}: load with last_store inst{} at loc {:?}",313inst.index(),314last_store.map(|inst| inst.index()).unwrap_or(usize::MAX),315mem_loc316);317318// Is there a Value already known to be stored319// at this specific memory location? If so,320// we can alias the load result to this321// already-known Value.322//323// Check if the definition dominates this324// location; it might not, if it comes from a325// load (stores will always dominate though if326// their `last_store` survives through327// meet-points to this use-site).328let aliased =329if let Some((def_inst, value)) = self.mem_values.get(&mem_loc).cloned() {330trace!(331" -> sees known value v{} from inst{}",332value.index(),333def_inst.index()334);335if self.domtree.dominates(def_inst, inst, &func.layout) {336trace!(337" -> dominates; value equiv from v{} to v{} inserted",338load_result.index(),339value.index()340);341Some(value)342} else {343None344}345} else {346None347};348349// Otherwise, we can keep *this* load around350// as a new equivalent value.351if aliased.is_none() {352trace!(353" -> inserting load result v{} at loc {:?}",354load_result.index(),355mem_loc356);357self.mem_values.insert(mem_loc, (inst, load_result));358}359360aliased361} else {362None363}364} else {365None366};367368state.update(func, inst);369370replacing_value371}372373/// Make a pass and update known-redundant loads to aliased374/// values. We interleave the updates with the memory-location375/// tracking because resolving some aliases may expose others376/// (e.g. in cases of double-indirection with two separate chains377/// of loads).378pub fn compute_and_update_aliases(&mut self, func: &mut Function) {379let mut pos = FuncCursor::new(func);380381while let Some(block) = pos.next_block() {382let mut state = self.block_starting_state(block);383while let Some(inst) = pos.next_inst() {384if let Some(replaced_result) = self.process_inst(pos.func, &mut state, inst) {385let result = pos.func.dfg.inst_results(inst)[0];386pos.func.dfg.clear_results(inst);387pos.func.dfg.change_to_alias(result, replaced_result);388pos.remove_inst_and_step_back();389}390}391}392}393}394395fn get_ext_opcode(op: Opcode) -> Option<Opcode> {396debug_assert!(op.can_load() || op.can_store());397match op {398Opcode::Load | Opcode::Store => None,399_ => Some(op),400}401}402403404