Path: blob/main/cranelift/codegen/src/remove_constant_phis.rs
1693 views
//! A Constant-Phi-Node removal pass.12use crate::dominator_tree::DominatorTree;3use crate::ir;4use crate::ir::Function;5use crate::ir::{Block, BlockArg, BlockCall, Inst, Value};6use crate::timing;7use bumpalo::Bump;8use cranelift_entity::SecondaryMap;9use rustc_hash::{FxHashMap, FxHashSet};10use smallvec::SmallVec;1112// A note on notation. For the sake of clarity, this file uses the phrase13// "formal parameters" to mean the `Value`s listed in the block head, and14// "actual parameters" to mean the `Value`s passed in a branch or a jump:15//16// block4(v16: i32, v18: i32): <-- formal parameters17// ...18// brif v27, block7(v22, v24), block6 <-- actual parameters1920// This transformation pass (conceptually) partitions all values in the21// function into two groups:22//23// * Group A: values defined by block formal parameters, except for the entry block.24//25// * Group B: All other values: that is, values defined by instructions,26// and the formals of the entry block.27//28// For each value in Group A, it attempts to establish whether it will have29// the value of exactly one member of Group B. If so, the formal parameter is30// deleted, all corresponding actual parameters (in jumps/branches to the31// defining block) are deleted, and a rename is inserted.32//33// The entry block is special-cased because (1) we don't know what values flow34// to its formals and (2) in any case we can't change its formals.35//36// Work proceeds in three phases.37//38// * Phase 1: examine all instructions. For each block, make up a useful39// grab-bag of information, `BlockSummary`, that summarises the block's40// formals and jump/branch instruction. This is used by Phases 2 and 3.41//42// * Phase 2: for each value in Group A, try to find a single Group B value43// that flows to it. This is done using a classical iterative forward44// dataflow analysis over a simple constant-propagation style lattice. It45// converges quickly in practice -- I have seen at most 4 iterations. This46// is relatively cheap because the iteration is done over the47// `BlockSummary`s, and does not visit each instruction. The resulting48// fixed point is stored in a `SolverState`.49//50// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to51// remove redundant formals and actuals, and to insert suitable renames.52//53// Note that the effectiveness of the analysis depends on on the fact that54// there are no copy instructions in Cranelift's IR. If there were, the55// computation of `actual_absval` in Phase 2 would have to be extended to56// chase through such copies.57//58// For large functions, the analysis cost using the new AArch64 backend is about59// 0.6% of the non-optimising compile time, as measured by instruction counts.60// This transformation usually pays for itself several times over, though, by61// reducing the isel/regalloc cost downstream. Gains of up to 7% have been62// seen for large functions.6364/// The `Value`s (Group B) that can flow to a formal parameter (Group A).65#[derive(Clone, Copy, Debug, PartialEq)]66enum AbstractValue {67/// Two or more values flow to this formal.68Many,6970/// Exactly one value, as stated, flows to this formal. The `Value`s that71/// can appear here are exactly: `Value`s defined by `Inst`s, plus the72/// `Value`s defined by the formals of the entry block. Note that this is73/// exactly the set of `Value`s that are *not* tracked in the solver below74/// (see `SolverState`).75One(Value /*Group B*/),7677/// No value flows to this formal.78None,79}8081impl AbstractValue {82fn join(self, other: AbstractValue) -> AbstractValue {83match (self, other) {84// Joining with `None` has no effect85(AbstractValue::None, p2) => p2,86(p1, AbstractValue::None) => p1,87// Joining with `Many` produces `Many`88(AbstractValue::Many, _p2) => AbstractValue::Many,89(_p1, AbstractValue::Many) => AbstractValue::Many,90// The only interesting case91(AbstractValue::One(v1), AbstractValue::One(v2)) => {92if v1 == v2 {93AbstractValue::One(v1)94} else {95AbstractValue::Many96}97}98}99}100101fn is_one(self) -> bool {102matches!(self, AbstractValue::One(_))103}104}105106#[derive(Clone, Copy, Debug)]107struct OutEdge<'a> {108/// An instruction that transfers control.109inst: Inst,110/// The index into branch_destinations for this instruction that corresponds111/// to this edge.112branch_index: u32,113/// The block that control is transferred to.114block: Block,115/// The arguments to that block.116///117/// These values can be from both groups A and B.118args: &'a [BlockArg],119}120121impl<'a> OutEdge<'a> {122/// Construct a new `OutEdge` for the given instruction.123///124/// Returns `None` if this is an edge without any block arguments, which125/// means we can ignore it for this analysis's purposes.126#[inline]127fn new(128bump: &'a Bump,129dfg: &ir::DataFlowGraph,130inst: Inst,131branch_index: usize,132block: BlockCall,133) -> Option<Self> {134let inst_var_args = block.args(&dfg.value_lists);135136// Skip edges without params.137if inst_var_args.len() == 0 {138return None;139}140141Some(OutEdge {142inst,143branch_index: branch_index as u32,144block: block.block(&dfg.value_lists),145args: bump.alloc_slice_fill_iter(146inst_var_args.map(|arg| arg.map_value(|value| dfg.resolve_aliases(value))),147),148})149}150}151152/// For some block, a useful bundle of info. The `Block` itself is not stored153/// here since it will be the key in the associated `FxHashMap` -- see154/// `summaries` below. For the `SmallVec` tuning params: most blocks have155/// few parameters, hence `4`. And almost all blocks have either one or two156/// successors, hence `2`.157#[derive(Clone, Debug, Default)]158struct BlockSummary<'a> {159/// Formal parameters for this `Block`.160///161/// These values are from group A.162formals: &'a [Value],163164/// Each outgoing edge from this block.165///166/// We don't bother to include transfers that pass zero parameters167/// since that makes more work for the solver for no purpose.168///169/// We optimize for the case where a branch instruction has up to two170/// outgoing edges, as unconditional jumps and conditional branches are171/// more prominent than br_table.172dests: SmallVec<[OutEdge<'a>; 2]>,173}174175impl<'a> BlockSummary<'a> {176/// Construct a new `BlockSummary`, using `values` as its backing storage.177#[inline]178fn new(bump: &'a Bump, formals: &[Value]) -> Self {179Self {180formals: bump.alloc_slice_copy(formals),181dests: Default::default(),182}183}184}185186/// Solver state. This holds a AbstractValue for each formal parameter, except187/// for those from the entry block.188struct SolverState {189absvals: FxHashMap<Value /*Group A*/, AbstractValue>,190}191192impl SolverState {193fn new() -> Self {194Self {195absvals: FxHashMap::default(),196}197}198199fn get(&self, actual: Value) -> AbstractValue {200*self201.absvals202.get(&actual)203.unwrap_or_else(|| panic!("SolverState::get: formal param {actual:?} is untracked?!"))204}205206fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {207self.absvals.get(&actual)208}209210fn set(&mut self, actual: Value, lp: AbstractValue) {211match self.absvals.insert(actual, lp) {212Some(_old_lp) => {}213None => panic!("SolverState::set: formal param {actual:?} is untracked?!"),214}215}216}217218/// Detect phis in `func` that will only ever produce one value, using a219/// classic forward dataflow analysis. Then remove them.220#[inline(never)]221pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {222let _tt = timing::remove_constant_phis();223debug_assert!(domtree.is_valid());224225// Phase 1 of 3: for each block, make a summary containing all relevant226// info. The solver will iterate over the summaries, rather than having227// to inspect each instruction in each block.228let bump =229Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>());230let mut summaries =231SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());232233for b in domtree.cfg_rpo().copied() {234let formals = func.dfg.block_params(b);235let mut summary = BlockSummary::new(&bump, formals);236237for inst in func.layout.block_insts(b) {238for (ix, dest) in func.dfg.insts[inst]239.branch_destination(&func.dfg.jump_tables, &func.dfg.exception_tables)240.iter()241.enumerate()242{243if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, ix, *dest) {244summary.dests.push(edge);245}246}247}248249// Ensure the invariant that all blocks (except for the entry) appear250// in the summary, *unless* they have neither formals nor any251// param-carrying branches/jumps.252if formals.len() > 0 || summary.dests.len() > 0 {253summaries[b] = summary;254}255}256257// Phase 2 of 3: iterate over the summaries in reverse postorder,258// computing new `AbstractValue`s for each tracked `Value`. The set of259// tracked `Value`s is exactly Group A as described above.260261let entry_block = func262.layout263.entry_block()264.expect("remove_constant_phis: entry block unknown");265266// Set up initial solver state267let mut state = SolverState::new();268269for b in domtree.cfg_rpo().copied() {270// For each block, get the formals271if b == entry_block {272continue;273}274let formals = func.dfg.block_params(b);275for formal in formals {276let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);277assert!(mb_old_absval.is_none());278}279}280281// Solve: repeatedly traverse the blocks in reverse postorder, until there282// are no changes.283let mut iter_no = 0;284loop {285iter_no += 1;286let mut changed = false;287288for src in domtree.cfg_rpo().copied() {289let src_summary = &summaries[src];290for edge in &src_summary.dests {291assert!(edge.block != entry_block);292// By contrast, the dst block must have a summary. Phase 1293// will have only included an entry in `src_summary.dests` if294// that branch/jump carried at least one parameter. So the295// dst block does take parameters, so it must have a summary.296let dst_summary = &summaries[edge.block];297let dst_formals = &dst_summary.formals;298assert_eq!(edge.args.len(), dst_formals.len());299for (formal, actual) in dst_formals.iter().zip(edge.args) {300// Find the abstract value for `actual`. If it is a block301// formal parameter then the most recent abstract value is302// to be found in the solver state. If not, then it's a303// real value defining point (not a phi), in which case304// return it itself.305let actual_absval = match actual {306BlockArg::Value(actual) => match state.maybe_get(*actual) {307Some(pt) => *pt,308None => AbstractValue::One(*actual),309},310_ => AbstractValue::Many,311};312313// And `join` the new value with the old.314let formal_absval_old = state.get(*formal);315let formal_absval_new = formal_absval_old.join(actual_absval);316if formal_absval_new != formal_absval_old {317changed = true;318state.set(*formal, formal_absval_new);319}320}321}322}323324if !changed {325break;326}327}328329let mut n_consts = 0;330for absval in state.absvals.values() {331if absval.is_one() {332n_consts += 1;333}334}335336// Phase 3 of 3: edit the function to remove constant formals, using the337// summaries and the final solver state as a guide.338339// Make up a set of blocks that need editing.340let mut need_editing = FxHashSet::<Block>::default();341for (block, summary) in summaries.iter() {342if block == entry_block {343continue;344}345for formal in summary.formals {346let formal_absval = state.get(*formal);347if formal_absval.is_one() {348need_editing.insert(block);349break;350}351}352}353354// Firstly, deal with the formals. For each formal which is redundant,355// remove it, and also add a reroute from it to the constant value which356// it we know it to be.357for b in &need_editing {358let mut del_these = SmallVec::<[(Value, Value); 32]>::new();359let formals: &[Value] = func.dfg.block_params(*b);360for formal in formals {361// The state must give an absval for `formal`.362if let AbstractValue::One(replacement_val) = state.get(*formal) {363del_these.push((*formal, replacement_val));364}365}366// We can delete the formals in any order. However,367// `remove_block_param` works by sliding backwards all arguments to368// the right of the value it is asked to delete. Hence when removing more369// than one formal, it is significantly more efficient to ask it to370// remove the rightmost formal first, and hence this `rev()`.371for (redundant_formal, replacement_val) in del_these.into_iter().rev() {372func.dfg.remove_block_param(redundant_formal);373func.dfg.change_to_alias(redundant_formal, replacement_val);374}375}376377// Secondly, visit all branch insns. If the destination has had its378// formals changed, change the actuals accordingly. Don't scan all insns,379// rather just visit those as listed in the summaries we prepared earlier.380let mut old_actuals = alloc::vec::Vec::new();381for summary in summaries.values() {382for edge in &summary.dests {383if !need_editing.contains(&edge.block) {384continue;385}386387let dfg = &mut func.dfg;388let dests = dfg.insts[edge.inst]389.branch_destination_mut(&mut dfg.jump_tables, &mut dfg.exception_tables);390let block = &mut dests[edge.branch_index as usize];391392old_actuals.extend(block.args(&dfg.value_lists));393394// Check that the numbers of arguments make sense.395let formals = &summaries[edge.block].formals;396assert_eq!(formals.len(), old_actuals.len());397398// Filter out redundant block arguments.399let mut formals = formals.iter();400old_actuals.retain(|_| {401let formal_i = formals.next().unwrap();402!state.get(*formal_i).is_one()403});404405// Replace the block with a new one that only includes the non-redundant arguments.406// This leaks the value list from the old block,407// https://github.com/bytecodealliance/wasmtime/issues/5451 for more information.408let destination = block.block(&dfg.value_lists);409*block = BlockCall::new(destination, old_actuals.drain(..), &mut dfg.value_lists);410}411}412413log::debug!(414"do_remove_constant_phis: done, {} iters. {} formals, of which {} const.",415iter_no,416state.absvals.len(),417n_consts418);419}420421422