Path: blob/main/cranelift/codegen/src/egraph/elaborate.rs
3072 views
//! Elaboration phase: lowers EGraph back to sequences of operations1//! in CFG nodes.23use super::Stats;4use super::cost::Cost;5use crate::ctxhash::NullCtx;6use crate::dominator_tree::DominatorTree;7use crate::hash_map::Entry as HashEntry;8use crate::inst_predicates::is_pure_for_egraph;9use crate::ir::{Block, Function, Inst, Value, ValueDef};10use crate::loop_analysis::{Loop, LoopAnalysis};11use crate::scoped_hash_map::ScopedHashMap;12use crate::trace;13use crate::{FxHashMap, FxHashSet};14use alloc::vec::Vec;15use cranelift_control::ControlPlane;16use cranelift_entity::{EntitySet, SecondaryMap, packed_option::ReservedValue};17use smallvec::{SmallVec, smallvec};1819pub(crate) struct Elaborator<'a> {20func: &'a mut Function,21domtree: &'a DominatorTree,22loop_analysis: &'a LoopAnalysis,23/// Map from Value that is produced by a pure Inst (and was thus24/// not in the side-effecting skeleton) to the value produced by25/// an elaborated inst (placed in the layout) to whose results we26/// refer in the final code.27///28/// The first time we use some result of an instruction during29/// elaboration, we can place it and insert an identity map (inst30/// results to that same inst's results) in this scoped31/// map. Within that block and its dom-tree children, that mapping32/// is visible and we can continue to use it. This allows us to33/// avoid cloning the instruction. However, if we pop that scope34/// and use it somewhere else as well, we will need to35/// duplicate. We detect this case by checking, when a value that36/// we want is not present in this map, whether the producing inst37/// is already placed in the Layout. If so, we duplicate, and38/// insert non-identity mappings from the original inst's results39/// to the cloned inst's results.40///41/// Note that as values may refer to unions that represent a subset42/// of a larger eclass, it's not valid to walk towards the root of a43/// union tree: doing so would potentially equate values that fall44/// on different branches of the dominator tree.45value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,46/// Map from Value to the best (lowest-cost) Value in its eclass47/// (tree of union value-nodes).48value_to_best_value: SecondaryMap<Value, BestEntry>,49/// Stack of blocks and loops in current elaboration path.50loop_stack: SmallVec<[LoopStackEntry; 8]>,51/// The current block into which we are elaborating.52cur_block: Block,53/// Values that opt rules have indicated should be rematerialized54/// in every block they are used (e.g., immediates or other55/// "cheap-to-compute" ops).56remat_values: &'a FxHashSet<Value>,57/// Explicitly-unrolled value elaboration stack.58elab_stack: Vec<ElabStackEntry>,59/// Results from the elab stack.60elab_result_stack: Vec<ElaboratedValue>,61/// Explicitly-unrolled block elaboration stack.62block_stack: Vec<BlockStackEntry>,63/// Copies of values that have been rematerialized.64remat_copies: FxHashMap<(Block, Value), Value>,65/// Stats for various events during egraph processing, to help66/// with optimization of this infrastructure.67stats: &'a mut Stats,68/// Chaos-mode control-plane so we can test that we still get69/// correct results when our heuristics make bad decisions.70ctrl_plane: &'a mut ControlPlane,71}7273#[derive(Clone, Copy, Debug, PartialEq, Eq)]74struct BestEntry(Cost, Value);7576impl PartialOrd for BestEntry {77fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {78Some(self.cmp(other))79}80}8182impl Ord for BestEntry {83#[inline]84fn cmp(&self, other: &Self) -> core::cmp::Ordering {85self.0.cmp(&other.0).then_with(|| {86// Note that this comparison is reversed. When costs are equal,87// prefer the value with the bigger index. This is a heuristic that88// prefers results of rewrites to the original value, since we89// expect that our rewrites are generally improvements.90self.1.cmp(&other.1).reverse()91})92}93}9495#[derive(Clone, Copy, Debug)]96struct ElaboratedValue {97in_block: Block,98value: Value,99}100101#[derive(Clone, Debug)]102struct LoopStackEntry {103/// The loop identifier.104lp: Loop,105/// The hoist point: a block that immediately dominates this106/// loop. May not be an immediate predecessor, but will be a valid107/// point to place all loop-invariant ops: they must depend only108/// on inputs that dominate the loop, so are available at (the end109/// of) this block.110hoist_block: Block,111/// The depth in the scope map.112scope_depth: u32,113}114115#[derive(Clone, Debug)]116enum ElabStackEntry {117/// Next action is to resolve this value into an elaborated inst118/// (placed into the layout) that produces the value, and119/// recursively elaborate the insts that produce its args.120///121/// Any inserted ops should be inserted before `before`, which is122/// the instruction demanding this value.123Start { value: Value, before: Inst },124/// Args have been pushed; waiting for results.125PendingInst {126inst: Inst,127result_idx: usize,128num_args: usize,129before: Inst,130},131}132133#[derive(Clone, Debug)]134enum BlockStackEntry {135Elaborate { block: Block, idom: Option<Block> },136Pop,137}138139impl<'a> Elaborator<'a> {140pub(crate) fn new(141func: &'a mut Function,142domtree: &'a DominatorTree,143loop_analysis: &'a LoopAnalysis,144remat_values: &'a FxHashSet<Value>,145stats: &'a mut Stats,146ctrl_plane: &'a mut ControlPlane,147) -> Self {148let num_values = func.dfg.num_values();149let mut value_to_best_value =150SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));151value_to_best_value.resize(num_values);152Self {153func,154domtree,155loop_analysis,156value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),157value_to_best_value,158loop_stack: smallvec![],159cur_block: Block::reserved_value(),160remat_values,161elab_stack: vec![],162elab_result_stack: vec![],163block_stack: vec![],164remat_copies: FxHashMap::default(),165stats,166ctrl_plane,167}168}169170fn start_block(&mut self, idom: Option<Block>, block: Block) {171trace!(172"start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",173block,174idom,175self.loop_stack.len(),176self.value_to_elaborated_value.depth()177);178179// Pop any loop levels we're no longer in.180while let Some(inner_loop) = self.loop_stack.last() {181if self.loop_analysis.is_in_loop(block, inner_loop.lp) {182break;183}184self.loop_stack.pop();185}186187// Note that if the *entry* block is a loop header, we will188// not make note of the loop here because it will not have an189// immediate dominator. We must disallow this case because we190// will skip adding the `LoopStackEntry` here but our191// `LoopAnalysis` will otherwise still make note of this loop192// and loop depths will not match.193if let Some(idom) = idom {194if let Some(lp) = self.loop_analysis.is_loop_header(block) {195self.loop_stack.push(LoopStackEntry {196lp,197// Any code hoisted out of this loop will have code198// placed in `idom`, and will have def mappings199// inserted in to the scoped hashmap at that block's200// level.201hoist_block: idom,202scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,203});204trace!(205" -> loop header, pushing; depth now {}",206self.loop_stack.len()207);208}209} else {210debug_assert!(211self.loop_analysis.is_loop_header(block).is_none(),212"Entry block (domtree root) cannot be a loop header!"213);214}215216trace!("block {}: loop stack is {:?}", block, self.loop_stack);217218self.cur_block = block;219}220221fn topo_sorted_values(&self) -> Vec<Value> {222#[derive(Debug)]223enum Event {224Enter,225Exit,226}227let mut stack = Vec::<(Event, Value)>::new();228229// Traverse the CFG in pre-order so that, when we look at the230// instructions and operands inside each block, we see value defs before231// uses.232for block in crate::traversals::Dfs::new().pre_order_iter(&self.func) {233for inst in self.func.layout.block_insts(block) {234stack.extend(self.func.dfg.inst_values(inst).map(|v| (Event::Enter, v)));235}236}237238// We pushed in the desired order, so popping would implicitly reverse239// that. Avoid that by reversing the initial stack before we start240// traversing the DFG.241stack.reverse();242243let mut sorted = Vec::with_capacity(self.func.dfg.values().len());244let mut seen = EntitySet::<Value>::with_capacity(self.func.dfg.values().len());245246// Post-order traversal of the DFG, visiting value defs before uses.247while let Some((event, value)) = stack.pop() {248match event {249Event::Enter => {250if seen.insert(value) {251stack.push((Event::Exit, value));252match self.func.dfg.value_def(value) {253ValueDef::Result(inst, _) => {254stack.extend(255self.func256.dfg257.inst_values(inst)258.rev()259.filter(|v| !seen.contains(*v))260.map(|v| (Event::Enter, v)),261);262}263ValueDef::Union(a, b) => {264if !seen.contains(b) {265stack.push((Event::Enter, b));266}267if !seen.contains(a) {268stack.push((Event::Enter, a));269}270}271ValueDef::Param(..) => {}272}273}274}275Event::Exit => {276sorted.push(value);277}278}279}280281sorted282}283284fn compute_best_values(&mut self) {285let sorted_values = self.topo_sorted_values();286287let best = &mut self.value_to_best_value;288289// We can't make random decisions inside the fixpoint loop below because290// that could cause values to change on every iteration of the loop,291// which would make the loop never terminate. So in chaos testing292// mode we need a form of making suboptimal decisions that is fully293// deterministic. We choose to simply make the worst decision we know294// how to do instead of the best.295let use_worst = self.ctrl_plane.get_decision();296297trace!(298"Computing the {} values for each eclass",299if use_worst {300"worst (chaos mode)"301} else {302"best"303}304);305306// Because the values are topologically sorted, we know that we will see307// defs before uses, so an instruction's operands' costs will already be308// computed by the time we are computing the cost for the current value309// and its instruction.310for value in sorted_values.iter().copied() {311let def = self.func.dfg.value_def(value);312trace!("computing best for value {:?} def {:?}", value, def);313314match def {315// Pick the best of the two options based on min-cost. This316// works because each element of `best` is a `(cost, value)`317// tuple; `cost` comes first so the natural comparison works318// based on cost, and breaks ties based on value number.319ValueDef::Union(x, y) => {320debug_assert!(!best[x].1.is_reserved_value());321debug_assert!(!best[y].1.is_reserved_value());322best[value] = if use_worst {323core::cmp::max(best[x], best[y])324} else {325core::cmp::min(best[x], best[y])326};327trace!(328" -> best of union({:?}, {:?}) = {:?}",329best[x], best[y], best[value]330);331}332333ValueDef::Param(_, _) => {334best[value] = BestEntry(Cost::zero(), value);335}336337// If the Inst is inserted into the layout (which is,338// at this point, only the side-effecting skeleton),339// then it must be computed and thus we give it zero340// cost.341ValueDef::Result(inst, _) => {342if let Some(_) = self.func.layout.inst_block(inst) {343best[value] = BestEntry(Cost::zero(), value);344} else {345let inst_data = &self.func.dfg.insts[inst];346// N.B.: at this point we know that the opcode is347// pure, so `pure_op_cost`'s precondition is348// satisfied.349let cost = Cost::of_pure_op(350inst_data.opcode(),351self.func.dfg.inst_values(inst).map(|value| {352debug_assert!(!best[value].1.is_reserved_value());353best[value].0354}),355);356best[value] = BestEntry(cost, value);357trace!(" -> cost of value {} = {:?}", value, cost);358}359}360};361362// You might be expecting an assert that the best cost we just363// computed is not infinity, however infinite cost *can* happen in364// practice. First, note that our cost function doesn't know about365// any shared structure in the dataflow graph, it only sums operand366// costs. (And trying to avoid that by deduping a single operation's367// operands is a losing game because you can always just add one368// indirection and go from `add(x, x)` to `add(foo(x), bar(x))` to369// hide the shared structure.) Given that blindness to sharing, we370// can make cost grow exponentially with a linear sequence of371// operations:372//373// v0 = iconst.i32 1 ;; cost = 1374// v1 = iadd v0, v0 ;; cost = 3 + 1 + 1375// v2 = iadd v1, v1 ;; cost = 3 + 5 + 5376// v3 = iadd v2, v2 ;; cost = 3 + 13 + 13377// v4 = iadd v3, v3 ;; cost = 3 + 29 + 29378// v5 = iadd v4, v4 ;; cost = 3 + 61 + 61379// v6 = iadd v5, v5 ;; cost = 3 + 125 + 125380// ;; etc...381//382// Such a chain can cause cost to saturate to infinity. How do we383// choose which e-node is best when there are multiple that have384// saturated to infinity? It doesn't matter. As long as invariant385// (2) for optimization rules is upheld by our rule set (see386// `cranelift/codegen/src/opts/README.md`) it is safe to choose387// *any* e-node in the e-class. At worst we will produce suboptimal388// code, but never an incorrectness.389}390}391392/// Elaborate use of an eclass, inserting any needed new393/// instructions before the given inst `before`. Should only be394/// given values corresponding to results of instructions or395/// blockparams.396fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {397debug_assert_ne!(value, Value::reserved_value());398399// Kick off the process by requesting this result400// value.401self.elab_stack402.push(ElabStackEntry::Start { value, before });403404// Now run the explicit-stack recursion until we reach405// the root.406self.process_elab_stack();407debug_assert_eq!(self.elab_result_stack.len(), 1);408self.elab_result_stack.pop().unwrap()409}410411/// Possibly rematerialize the instruction producing the value in412/// `arg` and rewrite `arg` to refer to it, if needed. Returns413/// `true` if a rewrite occurred.414fn maybe_remat_arg(415remat_values: &FxHashSet<Value>,416func: &mut Function,417remat_copies: &mut FxHashMap<(Block, Value), Value>,418insert_block: Block,419before: Inst,420arg: &mut ElaboratedValue,421stats: &mut Stats,422) -> bool {423// TODO (#7313): we may want to consider recursive424// rematerialization as well. We could process the arguments of425// the rematerialized instruction up to a certain depth. This426// would affect, e.g., adds-with-one-constant-arg, which are427// currently rematerialized. Right now we don't do this, to428// avoid the need for another fixpoint loop here.429if arg.in_block != insert_block && remat_values.contains(&arg.value) {430let new_value = match remat_copies.entry((insert_block, arg.value)) {431HashEntry::Occupied(o) => *o.get(),432HashEntry::Vacant(v) => {433let inst = func.dfg.value_def(arg.value).inst().unwrap();434debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);435let new_inst = func.dfg.clone_inst(inst);436func.layout.insert_inst(new_inst, before);437let new_result = func.dfg.inst_results(new_inst)[0];438*v.insert(new_result)439}440};441trace!("rematerialized {} as {}", arg.value, new_value);442arg.value = new_value;443stats.elaborate_remat += 1;444true445} else {446false447}448}449450fn process_elab_stack(&mut self) {451while let Some(entry) = self.elab_stack.pop() {452match entry {453ElabStackEntry::Start { value, before } => {454debug_assert!(self.func.dfg.value_is_real(value));455456self.stats.elaborate_visit_node += 1;457458// Get the best option; we use `value` (latest459// value) here so we have a full view of the460// eclass.461trace!("looking up best value for {}", value);462let BestEntry(_, best_value) = self.value_to_best_value[value];463trace!("elaborate: value {} -> best {}", value, best_value);464debug_assert_ne!(best_value, Value::reserved_value());465466if let Some(elab_val) =467self.value_to_elaborated_value.get(&NullCtx, &best_value)468{469// Value is available; use it.470trace!("elaborate: value {} -> {:?}", value, elab_val);471self.stats.elaborate_memoize_hit += 1;472self.elab_result_stack.push(*elab_val);473continue;474}475476self.stats.elaborate_memoize_miss += 1;477478// Now resolve the value to its definition to see479// how we can compute it.480let (inst, result_idx) = match self.func.dfg.value_def(best_value) {481ValueDef::Result(inst, result_idx) => {482trace!(483" -> value {} is result {} of {}",484best_value, result_idx, inst485);486(inst, result_idx)487}488ValueDef::Param(in_block, _) => {489// We don't need to do anything to compute490// this value; just push its result on the491// result stack (blockparams are already492// available).493trace!(" -> value {} is a blockparam", best_value);494self.elab_result_stack.push(ElaboratedValue {495in_block,496value: best_value,497});498continue;499}500ValueDef::Union(_, _) => {501panic!("Should never have a Union value as the best value");502}503};504505trace!(506" -> result {} of inst {:?}",507result_idx, self.func.dfg.insts[inst]508);509510// We're going to need to use this instruction511// result, placing the instruction into the512// layout. First, enqueue all args to be513// elaborated. Push state to receive the results514// and later elab this inst.515let num_args = self.func.dfg.inst_values(inst).count();516self.elab_stack.push(ElabStackEntry::PendingInst {517inst,518result_idx,519num_args,520before,521});522523// Push args in reverse order so we process the524// first arg first.525for arg in self.func.dfg.inst_values(inst).rev() {526debug_assert_ne!(arg, Value::reserved_value());527self.elab_stack528.push(ElabStackEntry::Start { value: arg, before });529}530}531532ElabStackEntry::PendingInst {533inst,534result_idx,535num_args,536before,537} => {538trace!(539"PendingInst: {} result {} args {} before {}",540inst, result_idx, num_args, before541);542543// We should have all args resolved at this544// point. Grab them and drain them out, removing545// them.546let arg_idx = self.elab_result_stack.len() - num_args;547let arg_values = &mut self.elab_result_stack[arg_idx..];548549// Compute max loop depth.550//551// Note that if there are no arguments then this instruction552// is allowed to get hoisted up one loop. This is not553// usually used since no-argument values are things like554// constants which are typically rematerialized, but for the555// `vconst` instruction 128-bit constants aren't as easily556// rematerialized. They're hoisted out of inner loops but557// not to the function entry which may run the risk of558// placing too much register pressure on the entire559// function. This is modeled with the `.saturating_sub(1)`560// as the default if there's otherwise no maximum.561let loop_hoist_level = arg_values562.iter()563.map(|&value| {564// Find the outermost loop level at which565// the value's defining block *is not* a566// member. This is the loop-nest level567// whose hoist-block we hoist to.568let hoist_level = self569.loop_stack570.iter()571.position(|loop_entry| {572!self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)573})574.unwrap_or(self.loop_stack.len());575trace!(576" -> arg: elab_value {:?} hoist level {:?}",577value, hoist_level578);579hoist_level580})581.max()582.unwrap_or(self.loop_stack.len().saturating_sub(1));583trace!(584" -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",585loop_hoist_level,586self.loop_stack.len(),587self.loop_stack,588);589590// We know that this is a pure inst, because591// non-pure roots have already been placed in the592// value-to-elab'd-value map, so they will not593// reach this stage of processing.594//595// We now must determine the location at which we596// place the instruction. This is the current597// block *unless* we hoist above a loop when all598// args are loop-invariant (and this op is pure).599let (scope_depth, before, insert_block) = if loop_hoist_level600== self.loop_stack.len()601{602// Depends on some value at the current603// loop depth, or remat forces it here:604// place it at the current location.605(606self.value_to_elaborated_value.depth(),607before,608self.func.layout.inst_block(before).unwrap(),609)610} else {611// Does not depend on any args at current612// loop depth: hoist out of loop.613self.stats.elaborate_licm_hoist += 1;614let data = &self.loop_stack[loop_hoist_level];615// `data.hoist_block` should dominate `before`'s block.616let before_block = self.func.layout.inst_block(before).unwrap();617debug_assert!(self.domtree.block_dominates(data.hoist_block, before_block));618// Determine the instruction at which we619// insert in `data.hoist_block`.620let before = self.func.layout.last_inst(data.hoist_block).unwrap();621(data.scope_depth as usize, before, data.hoist_block)622};623624trace!(625" -> decided to place: before {} insert_block {}",626before, insert_block627);628629// Now that we have the location for the630// instruction, check if any of its args are remat631// values. If so, and if we don't have a copy of632// the rematerializing instruction for this block633// yet, create one.634let mut remat_arg = false;635for arg_value in arg_values.iter_mut() {636if Self::maybe_remat_arg(637&self.remat_values,638&mut self.func,639&mut self.remat_copies,640insert_block,641before,642arg_value,643&mut self.stats,644) {645remat_arg = true;646}647}648649// Now we need to place `inst` at the computed650// location (just before `before`). Note that651// `inst` may already have been placed somewhere652// else, because a pure node may be elaborated at653// more than one place. In this case, we need to654// duplicate the instruction (and return the655// `Value`s for that duplicated instance instead).656//657// Also clone if we rematerialized, because we658// don't want to rewrite the args in the original659// copy.660trace!("need inst {} before {}", inst, before);661let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {662// Clone the inst!663let new_inst = self.func.dfg.clone_inst(inst);664trace!(665" -> inst {} already has a location; cloned to {}",666inst, new_inst667);668// Create mappings in the669// value-to-elab'd-value map from original670// results to cloned results.671for (&result, &new_result) in self672.func673.dfg674.inst_results(inst)675.iter()676.zip(self.func.dfg.inst_results(new_inst).iter())677{678let elab_value = ElaboratedValue {679value: new_result,680in_block: insert_block,681};682let best_result = self.value_to_best_value[result];683self.value_to_elaborated_value.insert_if_absent_with_depth(684&NullCtx,685best_result.1,686elab_value,687scope_depth,688);689690self.value_to_best_value[new_result] = best_result;691692trace!(693" -> cloned inst has new result {} for orig {}",694new_result, result695);696}697new_inst698} else {699trace!(" -> no location; using original inst");700// Create identity mappings from result values701// to themselves in this scope, since we're702// using the original inst.703for &result in self.func.dfg.inst_results(inst) {704let elab_value = ElaboratedValue {705value: result,706in_block: insert_block,707};708let best_result = self.value_to_best_value[result];709self.value_to_elaborated_value.insert_if_absent_with_depth(710&NullCtx,711best_result.1,712elab_value,713scope_depth,714);715trace!(" -> inserting identity mapping for {}", result);716}717inst718};719720// Place the inst just before `before`.721assert!(722is_pure_for_egraph(self.func, inst),723"something has gone very wrong if we are elaborating effectful \724instructions, they should have remained in the skeleton"725);726self.func.layout.insert_inst(inst, before);727728// Update the inst's arguments.729self.func730.dfg731.overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));732733// Now that we've consumed the arg values, pop734// them off the stack.735self.elab_result_stack.truncate(arg_idx);736737// Push the requested result index of the738// instruction onto the elab-results stack.739self.elab_result_stack.push(ElaboratedValue {740in_block: insert_block,741value: self.func.dfg.inst_results(inst)[result_idx],742});743}744}745}746}747748fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {749trace!("elaborate_block: block {}", block);750self.start_block(idom, block);751752// Iterate over the side-effecting skeleton using the linked753// list in Layout. We will insert instructions that are754// elaborated *before* `inst`, so we can always use its755// next-link to continue the iteration.756let mut next_inst = self.func.layout.first_inst(block);757let mut first_branch = None;758while let Some(inst) = next_inst {759trace!(760"elaborating inst {} with results {:?}",761inst,762self.func.dfg.inst_results(inst)763);764// Record the first branch we see in the block; all765// elaboration for args of *any* branch must be inserted766// before the *first* branch, because the branch group767// must remain contiguous at the end of the block.768if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {769first_branch = Some(inst);770}771772// Determine where elaboration inserts insts.773let before = first_branch.unwrap_or(inst);774trace!(" -> inserting before {}", before);775776elab_values.extend(self.func.dfg.inst_values(inst));777for arg in elab_values.iter_mut() {778trace!(" -> arg {}", *arg);779// Elaborate the arg, placing any newly-inserted insts780// before `before`. Get the updated value, which may781// be different than the original.782let mut new_arg = self.elaborate_eclass_use(*arg, before);783Self::maybe_remat_arg(784&self.remat_values,785&mut self.func,786&mut self.remat_copies,787block,788inst,789&mut new_arg,790&mut self.stats,791);792trace!(" -> rewrote arg to {:?}", new_arg);793*arg = new_arg.value;794}795self.func796.dfg797.overwrite_inst_values(inst, elab_values.drain(..));798799// We need to put the results of this instruction in the800// map now.801for &result in self.func.dfg.inst_results(inst) {802trace!(" -> result {}", result);803let best_result = self.value_to_best_value[result];804self.value_to_elaborated_value.insert_if_absent(805&NullCtx,806best_result.1,807ElaboratedValue {808in_block: block,809value: result,810},811);812}813814next_inst = self.func.layout.next_inst(inst);815}816}817818fn elaborate_domtree(&mut self, domtree: &DominatorTree) {819self.block_stack.push(BlockStackEntry::Elaborate {820block: self.func.layout.entry_block().unwrap(),821idom: None,822});823824// A temporary workspace for elaborate_block, allocated here to maximize the use of the825// allocation.826let mut elab_values = Vec::new();827828while let Some(top) = self.block_stack.pop() {829match top {830BlockStackEntry::Elaborate { block, idom } => {831self.block_stack.push(BlockStackEntry::Pop);832self.value_to_elaborated_value.increment_depth();833834self.elaborate_block(&mut elab_values, idom, block);835836// Push children. We are doing a preorder837// traversal so we do this after processing this838// block above.839let block_stack_end = self.block_stack.len();840for child in self.ctrl_plane.shuffled(domtree.children(block)) {841self.block_stack.push(BlockStackEntry::Elaborate {842block: child,843idom: Some(block),844});845}846// Reverse what we just pushed so we elaborate in847// original block order. (The domtree iter is a848// single-ended iter over a singly-linked list so849// we can't `.rev()` above.)850self.block_stack[block_stack_end..].reverse();851}852BlockStackEntry::Pop => {853self.value_to_elaborated_value.decrement_depth();854}855}856}857}858859pub(crate) fn elaborate(&mut self) {860self.stats.elaborate_func += 1;861self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;862self.compute_best_values();863self.elaborate_domtree(&self.domtree);864self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;865}866}867868869