Path: blob/main/cranelift/codegen/src/egraph/elaborate.rs
1693 views
//! Elaboration phase: lowers EGraph back to sequences of operations1//! in CFG nodes.23use super::Stats;4use super::cost::Cost;5use crate::ctxhash::NullCtx;6use crate::dominator_tree::DominatorTree;7use crate::hash_map::Entry as HashEntry;8use crate::inst_predicates::is_pure_for_egraph;9use crate::ir::{Block, Function, Inst, Value, ValueDef};10use crate::loop_analysis::{Loop, LoopAnalysis};11use crate::scoped_hash_map::ScopedHashMap;12use crate::trace;13use alloc::vec::Vec;14use cranelift_control::ControlPlane;15use cranelift_entity::{SecondaryMap, packed_option::ReservedValue};16use rustc_hash::{FxHashMap, FxHashSet};17use smallvec::{SmallVec, smallvec};1819pub(crate) struct Elaborator<'a> {20func: &'a mut Function,21domtree: &'a DominatorTree,22loop_analysis: &'a LoopAnalysis,23/// Map from Value that is produced by a pure Inst (and was thus24/// not in the side-effecting skeleton) to the value produced by25/// an elaborated inst (placed in the layout) to whose results we26/// refer in the final code.27///28/// The first time we use some result of an instruction during29/// elaboration, we can place it and insert an identity map (inst30/// results to that same inst's results) in this scoped31/// map. Within that block and its dom-tree children, that mapping32/// is visible and we can continue to use it. This allows us to33/// avoid cloning the instruction. However, if we pop that scope34/// and use it somewhere else as well, we will need to35/// duplicate. We detect this case by checking, when a value that36/// we want is not present in this map, whether the producing inst37/// is already placed in the Layout. If so, we duplicate, and38/// insert non-identity mappings from the original inst's results39/// to the cloned inst's results.40///41/// Note that as values may refer to unions that represent a subset42/// of a larger eclass, it's not valid to walk towards the root of a43/// union tree: doing so would potentially equate values that fall44/// on different branches of the dominator tree.45value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,46/// Map from Value to the best (lowest-cost) Value in its eclass47/// (tree of union value-nodes).48value_to_best_value: SecondaryMap<Value, BestEntry>,49/// Stack of blocks and loops in current elaboration path.50loop_stack: SmallVec<[LoopStackEntry; 8]>,51/// The current block into which we are elaborating.52cur_block: Block,53/// Values that opt rules have indicated should be rematerialized54/// in every block they are used (e.g., immediates or other55/// "cheap-to-compute" ops).56remat_values: &'a FxHashSet<Value>,57/// Explicitly-unrolled value elaboration stack.58elab_stack: Vec<ElabStackEntry>,59/// Results from the elab stack.60elab_result_stack: Vec<ElaboratedValue>,61/// Explicitly-unrolled block elaboration stack.62block_stack: Vec<BlockStackEntry>,63/// Copies of values that have been rematerialized.64remat_copies: FxHashMap<(Block, Value), Value>,65/// Stats for various events during egraph processing, to help66/// with optimization of this infrastructure.67stats: &'a mut Stats,68/// Chaos-mode control-plane so we can test that we still get69/// correct results when our heuristics make bad decisions.70ctrl_plane: &'a mut ControlPlane,71}7273#[derive(Clone, Copy, Debug, PartialEq, Eq)]74struct BestEntry(Cost, Value);7576impl PartialOrd for BestEntry {77fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {78Some(self.cmp(other))79}80}8182impl Ord for BestEntry {83#[inline]84fn cmp(&self, other: &Self) -> std::cmp::Ordering {85self.0.cmp(&other.0).then_with(|| {86// Note that this comparison is reversed. When costs are equal,87// prefer the value with the bigger index. This is a heuristic that88// prefers results of rewrites to the original value, since we89// expect that our rewrites are generally improvements.90self.1.cmp(&other.1).reverse()91})92}93}9495#[derive(Clone, Copy, Debug)]96struct ElaboratedValue {97in_block: Block,98value: Value,99}100101#[derive(Clone, Debug)]102struct LoopStackEntry {103/// The loop identifier.104lp: Loop,105/// The hoist point: a block that immediately dominates this106/// loop. May not be an immediate predecessor, but will be a valid107/// point to place all loop-invariant ops: they must depend only108/// on inputs that dominate the loop, so are available at (the end109/// of) this block.110hoist_block: Block,111/// The depth in the scope map.112scope_depth: u32,113}114115#[derive(Clone, Debug)]116enum ElabStackEntry {117/// Next action is to resolve this value into an elaborated inst118/// (placed into the layout) that produces the value, and119/// recursively elaborate the insts that produce its args.120///121/// Any inserted ops should be inserted before `before`, which is122/// the instruction demanding this value.123Start { value: Value, before: Inst },124/// Args have been pushed; waiting for results.125PendingInst {126inst: Inst,127result_idx: usize,128num_args: usize,129before: Inst,130},131}132133#[derive(Clone, Debug)]134enum BlockStackEntry {135Elaborate { block: Block, idom: Option<Block> },136Pop,137}138139impl<'a> Elaborator<'a> {140pub(crate) fn new(141func: &'a mut Function,142domtree: &'a DominatorTree,143loop_analysis: &'a LoopAnalysis,144remat_values: &'a FxHashSet<Value>,145stats: &'a mut Stats,146ctrl_plane: &'a mut ControlPlane,147) -> Self {148let num_values = func.dfg.num_values();149let mut value_to_best_value =150SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));151value_to_best_value.resize(num_values);152Self {153func,154domtree,155loop_analysis,156value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),157value_to_best_value,158loop_stack: smallvec![],159cur_block: Block::reserved_value(),160remat_values,161elab_stack: vec![],162elab_result_stack: vec![],163block_stack: vec![],164remat_copies: FxHashMap::default(),165stats,166ctrl_plane,167}168}169170fn start_block(&mut self, idom: Option<Block>, block: Block) {171trace!(172"start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",173block,174idom,175self.loop_stack.len(),176self.value_to_elaborated_value.depth()177);178179// Pop any loop levels we're no longer in.180while let Some(inner_loop) = self.loop_stack.last() {181if self.loop_analysis.is_in_loop(block, inner_loop.lp) {182break;183}184self.loop_stack.pop();185}186187// Note that if the *entry* block is a loop header, we will188// not make note of the loop here because it will not have an189// immediate dominator. We must disallow this case because we190// will skip adding the `LoopStackEntry` here but our191// `LoopAnalysis` will otherwise still make note of this loop192// and loop depths will not match.193if let Some(idom) = idom {194if let Some(lp) = self.loop_analysis.is_loop_header(block) {195self.loop_stack.push(LoopStackEntry {196lp,197// Any code hoisted out of this loop will have code198// placed in `idom`, and will have def mappings199// inserted in to the scoped hashmap at that block's200// level.201hoist_block: idom,202scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,203});204trace!(205" -> loop header, pushing; depth now {}",206self.loop_stack.len()207);208}209} else {210debug_assert!(211self.loop_analysis.is_loop_header(block).is_none(),212"Entry block (domtree root) cannot be a loop header!"213);214}215216trace!("block {}: loop stack is {:?}", block, self.loop_stack);217218self.cur_block = block;219}220221fn compute_best_values(&mut self) {222let best = &mut self.value_to_best_value;223224// We can't make random decisions inside the fixpoint loop below because225// that could cause values to change on every iteration of the loop,226// which would make the loop never terminate. So in chaos testing227// mode we need a form of making suboptimal decisions that is fully228// deterministic. We choose to simply make the worst decision we know229// how to do instead of the best.230let use_worst = self.ctrl_plane.get_decision();231232// Do a fixpoint loop to compute the best value for each eclass.233//234// The maximum number of iterations is the length of the longest chain235// of `vNN -> vMM` edges in the dataflow graph where `NN < MM`, so this236// is *technically* quadratic, but `cranelift-frontend` won't construct237// any such edges. NaN canonicalization will introduce some of these238// edges, but they are chains of only two or three edges. So in239// practice, we *never* do more than a handful of iterations here unless240// (a) we parsed the CLIF from text and the text was funkily numbered,241// which we don't really care about, or (b) the CLIF producer did242// something weird, in which case it is their responsibility to stop243// doing that.244trace!(245"Entering fixpoint loop to compute the {} values for each eclass",246if use_worst {247"worst (chaos mode)"248} else {249"best"250}251);252let mut keep_going = true;253while keep_going {254keep_going = false;255trace!(256"fixpoint iteration {}",257self.stats.elaborate_best_cost_fixpoint_iters258);259self.stats.elaborate_best_cost_fixpoint_iters += 1;260261for (value, def) in self.func.dfg.values_and_defs() {262trace!("computing best for value {:?} def {:?}", value, def);263let orig_best_value = best[value];264265match def {266ValueDef::Union(x, y) => {267// Pick the best of the two options based on268// min-cost. This works because each element of `best`269// is a `(cost, value)` tuple; `cost` comes first so270// the natural comparison works based on cost, and271// breaks ties based on value number.272best[value] = if use_worst {273if best[x].1.is_reserved_value() {274best[y]275} else if best[y].1.is_reserved_value() {276best[x]277} else {278std::cmp::max(best[x], best[y])279}280} else {281std::cmp::min(best[x], best[y])282};283trace!(284" -> best of union({:?}, {:?}) = {:?}",285best[x], best[y], best[value]286);287}288ValueDef::Param(_, _) => {289best[value] = BestEntry(Cost::zero(), value);290}291// If the Inst is inserted into the layout (which is,292// at this point, only the side-effecting skeleton),293// then it must be computed and thus we give it zero294// cost.295ValueDef::Result(inst, _) => {296if let Some(_) = self.func.layout.inst_block(inst) {297best[value] = BestEntry(Cost::zero(), value);298} else {299let inst_data = &self.func.dfg.insts[inst];300// N.B.: at this point we know that the opcode is301// pure, so `pure_op_cost`'s precondition is302// satisfied.303let cost = Cost::of_pure_op(304inst_data.opcode(),305self.func.dfg.inst_values(inst).map(|value| best[value].0),306);307best[value] = BestEntry(cost, value);308trace!(" -> cost of value {} = {:?}", value, cost);309}310}311};312313// Keep on iterating the fixpoint loop while we are finding new314// best values.315keep_going |= orig_best_value != best[value];316}317}318319if cfg!(any(feature = "trace-log", debug_assertions)) {320trace!("finished fixpoint loop to compute best value for each eclass");321for value in self.func.dfg.values() {322trace!("-> best for eclass {:?}: {:?}", value, best[value]);323debug_assert_ne!(best[value].1, Value::reserved_value());324// You might additionally be expecting an assert that the best325// cost is not infinity, however infinite cost *can* happen in326// practice. First, note that our cost function doesn't know327// about any shared structure in the dataflow graph, it only328// sums operand costs. (And trying to avoid that by deduping a329// single operation's operands is a losing game because you can330// always just add one indirection and go from `add(x, x)` to331// `add(foo(x), bar(x))` to hide the shared structure.) Given332// that blindness to sharing, we can make cost grow333// exponentially with a linear sequence of operations:334//335// v0 = iconst.i32 1 ;; cost = 1336// v1 = iadd v0, v0 ;; cost = 3 + 1 + 1337// v2 = iadd v1, v1 ;; cost = 3 + 5 + 5338// v3 = iadd v2, v2 ;; cost = 3 + 13 + 13339// v4 = iadd v3, v3 ;; cost = 3 + 29 + 29340// v5 = iadd v4, v4 ;; cost = 3 + 61 + 61341// v6 = iadd v5, v5 ;; cost = 3 + 125 + 125342// ;; etc...343//344// Such a chain can cause cost to saturate to infinity. How do345// we choose which e-node is best when there are multiple that346// have saturated to infinity? It doesn't matter. As long as347// invariant (2) for optimization rules is upheld by our rule348// set (see `cranelift/codegen/src/opts/README.md`) it is safe349// to choose *any* e-node in the e-class. At worst we will350// produce suboptimal code, but never an incorrectness.351}352}353}354355/// Elaborate use of an eclass, inserting any needed new356/// instructions before the given inst `before`. Should only be357/// given values corresponding to results of instructions or358/// blockparams.359fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {360debug_assert_ne!(value, Value::reserved_value());361362// Kick off the process by requesting this result363// value.364self.elab_stack365.push(ElabStackEntry::Start { value, before });366367// Now run the explicit-stack recursion until we reach368// the root.369self.process_elab_stack();370debug_assert_eq!(self.elab_result_stack.len(), 1);371self.elab_result_stack.pop().unwrap()372}373374/// Possibly rematerialize the instruction producing the value in375/// `arg` and rewrite `arg` to refer to it, if needed. Returns376/// `true` if a rewrite occurred.377fn maybe_remat_arg(378remat_values: &FxHashSet<Value>,379func: &mut Function,380remat_copies: &mut FxHashMap<(Block, Value), Value>,381insert_block: Block,382before: Inst,383arg: &mut ElaboratedValue,384stats: &mut Stats,385) -> bool {386// TODO (#7313): we may want to consider recursive387// rematerialization as well. We could process the arguments of388// the rematerialized instruction up to a certain depth. This389// would affect, e.g., adds-with-one-constant-arg, which are390// currently rematerialized. Right now we don't do this, to391// avoid the need for another fixpoint loop here.392if arg.in_block != insert_block && remat_values.contains(&arg.value) {393let new_value = match remat_copies.entry((insert_block, arg.value)) {394HashEntry::Occupied(o) => *o.get(),395HashEntry::Vacant(v) => {396let inst = func.dfg.value_def(arg.value).inst().unwrap();397debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);398let new_inst = func.dfg.clone_inst(inst);399func.layout.insert_inst(new_inst, before);400let new_result = func.dfg.inst_results(new_inst)[0];401*v.insert(new_result)402}403};404trace!("rematerialized {} as {}", arg.value, new_value);405arg.value = new_value;406stats.elaborate_remat += 1;407true408} else {409false410}411}412413fn process_elab_stack(&mut self) {414while let Some(entry) = self.elab_stack.pop() {415match entry {416ElabStackEntry::Start { value, before } => {417debug_assert!(self.func.dfg.value_is_real(value));418419self.stats.elaborate_visit_node += 1;420421// Get the best option; we use `value` (latest422// value) here so we have a full view of the423// eclass.424trace!("looking up best value for {}", value);425let BestEntry(_, best_value) = self.value_to_best_value[value];426trace!("elaborate: value {} -> best {}", value, best_value);427debug_assert_ne!(best_value, Value::reserved_value());428429if let Some(elab_val) =430self.value_to_elaborated_value.get(&NullCtx, &best_value)431{432// Value is available; use it.433trace!("elaborate: value {} -> {:?}", value, elab_val);434self.stats.elaborate_memoize_hit += 1;435self.elab_result_stack.push(*elab_val);436continue;437}438439self.stats.elaborate_memoize_miss += 1;440441// Now resolve the value to its definition to see442// how we can compute it.443let (inst, result_idx) = match self.func.dfg.value_def(best_value) {444ValueDef::Result(inst, result_idx) => {445trace!(446" -> value {} is result {} of {}",447best_value, result_idx, inst448);449(inst, result_idx)450}451ValueDef::Param(in_block, _) => {452// We don't need to do anything to compute453// this value; just push its result on the454// result stack (blockparams are already455// available).456trace!(" -> value {} is a blockparam", best_value);457self.elab_result_stack.push(ElaboratedValue {458in_block,459value: best_value,460});461continue;462}463ValueDef::Union(_, _) => {464panic!("Should never have a Union value as the best value");465}466};467468trace!(469" -> result {} of inst {:?}",470result_idx, self.func.dfg.insts[inst]471);472473// We're going to need to use this instruction474// result, placing the instruction into the475// layout. First, enqueue all args to be476// elaborated. Push state to receive the results477// and later elab this inst.478let num_args = self.func.dfg.inst_values(inst).count();479self.elab_stack.push(ElabStackEntry::PendingInst {480inst,481result_idx,482num_args,483before,484});485486// Push args in reverse order so we process the487// first arg first.488for arg in self.func.dfg.inst_values(inst).rev() {489debug_assert_ne!(arg, Value::reserved_value());490self.elab_stack491.push(ElabStackEntry::Start { value: arg, before });492}493}494495ElabStackEntry::PendingInst {496inst,497result_idx,498num_args,499before,500} => {501trace!(502"PendingInst: {} result {} args {} before {}",503inst, result_idx, num_args, before504);505506// We should have all args resolved at this507// point. Grab them and drain them out, removing508// them.509let arg_idx = self.elab_result_stack.len() - num_args;510let arg_values = &mut self.elab_result_stack[arg_idx..];511512// Compute max loop depth.513//514// Note that if there are no arguments then this instruction515// is allowed to get hoisted up one loop. This is not516// usually used since no-argument values are things like517// constants which are typically rematerialized, but for the518// `vconst` instruction 128-bit constants aren't as easily519// rematerialized. They're hoisted out of inner loops but520// not to the function entry which may run the risk of521// placing too much register pressure on the entire522// function. This is modeled with the `.saturating_sub(1)`523// as the default if there's otherwise no maximum.524let loop_hoist_level = arg_values525.iter()526.map(|&value| {527// Find the outermost loop level at which528// the value's defining block *is not* a529// member. This is the loop-nest level530// whose hoist-block we hoist to.531let hoist_level = self532.loop_stack533.iter()534.position(|loop_entry| {535!self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)536})537.unwrap_or(self.loop_stack.len());538trace!(539" -> arg: elab_value {:?} hoist level {:?}",540value, hoist_level541);542hoist_level543})544.max()545.unwrap_or(self.loop_stack.len().saturating_sub(1));546trace!(547" -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",548loop_hoist_level,549self.loop_stack.len(),550self.loop_stack,551);552553// We know that this is a pure inst, because554// non-pure roots have already been placed in the555// value-to-elab'd-value map, so they will not556// reach this stage of processing.557//558// We now must determine the location at which we559// place the instruction. This is the current560// block *unless* we hoist above a loop when all561// args are loop-invariant (and this op is pure).562let (scope_depth, before, insert_block) = if loop_hoist_level563== self.loop_stack.len()564{565// Depends on some value at the current566// loop depth, or remat forces it here:567// place it at the current location.568(569self.value_to_elaborated_value.depth(),570before,571self.func.layout.inst_block(before).unwrap(),572)573} else {574// Does not depend on any args at current575// loop depth: hoist out of loop.576self.stats.elaborate_licm_hoist += 1;577let data = &self.loop_stack[loop_hoist_level];578// `data.hoist_block` should dominate `before`'s block.579let before_block = self.func.layout.inst_block(before).unwrap();580debug_assert!(self.domtree.block_dominates(data.hoist_block, before_block));581// Determine the instruction at which we582// insert in `data.hoist_block`.583let before = self.func.layout.last_inst(data.hoist_block).unwrap();584(data.scope_depth as usize, before, data.hoist_block)585};586587trace!(588" -> decided to place: before {} insert_block {}",589before, insert_block590);591592// Now that we have the location for the593// instruction, check if any of its args are remat594// values. If so, and if we don't have a copy of595// the rematerializing instruction for this block596// yet, create one.597let mut remat_arg = false;598for arg_value in arg_values.iter_mut() {599if Self::maybe_remat_arg(600&self.remat_values,601&mut self.func,602&mut self.remat_copies,603insert_block,604before,605arg_value,606&mut self.stats,607) {608remat_arg = true;609}610}611612// Now we need to place `inst` at the computed613// location (just before `before`). Note that614// `inst` may already have been placed somewhere615// else, because a pure node may be elaborated at616// more than one place. In this case, we need to617// duplicate the instruction (and return the618// `Value`s for that duplicated instance instead).619//620// Also clone if we rematerialized, because we621// don't want to rewrite the args in the original622// copy.623trace!("need inst {} before {}", inst, before);624let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {625// Clone the inst!626let new_inst = self.func.dfg.clone_inst(inst);627trace!(628" -> inst {} already has a location; cloned to {}",629inst, new_inst630);631// Create mappings in the632// value-to-elab'd-value map from original633// results to cloned results.634for (&result, &new_result) in self635.func636.dfg637.inst_results(inst)638.iter()639.zip(self.func.dfg.inst_results(new_inst).iter())640{641let elab_value = ElaboratedValue {642value: new_result,643in_block: insert_block,644};645let best_result = self.value_to_best_value[result];646self.value_to_elaborated_value.insert_if_absent_with_depth(647&NullCtx,648best_result.1,649elab_value,650scope_depth,651);652653self.value_to_best_value[new_result] = best_result;654655trace!(656" -> cloned inst has new result {} for orig {}",657new_result, result658);659}660new_inst661} else {662trace!(" -> no location; using original inst");663// Create identity mappings from result values664// to themselves in this scope, since we're665// using the original inst.666for &result in self.func.dfg.inst_results(inst) {667let elab_value = ElaboratedValue {668value: result,669in_block: insert_block,670};671let best_result = self.value_to_best_value[result];672self.value_to_elaborated_value.insert_if_absent_with_depth(673&NullCtx,674best_result.1,675elab_value,676scope_depth,677);678trace!(" -> inserting identity mapping for {}", result);679}680inst681};682683// Place the inst just before `before`.684assert!(685is_pure_for_egraph(self.func, inst),686"something has gone very wrong if we are elaborating effectful \687instructions, they should have remained in the skeleton"688);689self.func.layout.insert_inst(inst, before);690691// Update the inst's arguments.692self.func693.dfg694.overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));695696// Now that we've consumed the arg values, pop697// them off the stack.698self.elab_result_stack.truncate(arg_idx);699700// Push the requested result index of the701// instruction onto the elab-results stack.702self.elab_result_stack.push(ElaboratedValue {703in_block: insert_block,704value: self.func.dfg.inst_results(inst)[result_idx],705});706}707}708}709}710711fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {712trace!("elaborate_block: block {}", block);713self.start_block(idom, block);714715// Iterate over the side-effecting skeleton using the linked716// list in Layout. We will insert instructions that are717// elaborated *before* `inst`, so we can always use its718// next-link to continue the iteration.719let mut next_inst = self.func.layout.first_inst(block);720let mut first_branch = None;721while let Some(inst) = next_inst {722trace!(723"elaborating inst {} with results {:?}",724inst,725self.func.dfg.inst_results(inst)726);727// Record the first branch we see in the block; all728// elaboration for args of *any* branch must be inserted729// before the *first* branch, because the branch group730// must remain contiguous at the end of the block.731if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {732first_branch = Some(inst);733}734735// Determine where elaboration inserts insts.736let before = first_branch.unwrap_or(inst);737trace!(" -> inserting before {}", before);738739elab_values.extend(self.func.dfg.inst_values(inst));740for arg in elab_values.iter_mut() {741trace!(" -> arg {}", *arg);742// Elaborate the arg, placing any newly-inserted insts743// before `before`. Get the updated value, which may744// be different than the original.745let mut new_arg = self.elaborate_eclass_use(*arg, before);746Self::maybe_remat_arg(747&self.remat_values,748&mut self.func,749&mut self.remat_copies,750block,751inst,752&mut new_arg,753&mut self.stats,754);755trace!(" -> rewrote arg to {:?}", new_arg);756*arg = new_arg.value;757}758self.func759.dfg760.overwrite_inst_values(inst, elab_values.drain(..));761762// We need to put the results of this instruction in the763// map now.764for &result in self.func.dfg.inst_results(inst) {765trace!(" -> result {}", result);766let best_result = self.value_to_best_value[result];767self.value_to_elaborated_value.insert_if_absent(768&NullCtx,769best_result.1,770ElaboratedValue {771in_block: block,772value: result,773},774);775}776777next_inst = self.func.layout.next_inst(inst);778}779}780781fn elaborate_domtree(&mut self, domtree: &DominatorTree) {782self.block_stack.push(BlockStackEntry::Elaborate {783block: self.func.layout.entry_block().unwrap(),784idom: None,785});786787// A temporary workspace for elaborate_block, allocated here to maximize the use of the788// allocation.789let mut elab_values = Vec::new();790791while let Some(top) = self.block_stack.pop() {792match top {793BlockStackEntry::Elaborate { block, idom } => {794self.block_stack.push(BlockStackEntry::Pop);795self.value_to_elaborated_value.increment_depth();796797self.elaborate_block(&mut elab_values, idom, block);798799// Push children. We are doing a preorder800// traversal so we do this after processing this801// block above.802let block_stack_end = self.block_stack.len();803for child in self.ctrl_plane.shuffled(domtree.children(block)) {804self.block_stack.push(BlockStackEntry::Elaborate {805block: child,806idom: Some(block),807});808}809// Reverse what we just pushed so we elaborate in810// original block order. (The domtree iter is a811// single-ended iter over a singly-linked list so812// we can't `.rev()` above.)813self.block_stack[block_stack_end..].reverse();814}815BlockStackEntry::Pop => {816self.value_to_elaborated_value.decrement_depth();817}818}819}820}821822pub(crate) fn elaborate(&mut self) {823self.stats.elaborate_func += 1;824self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;825self.compute_best_values();826self.elaborate_domtree(&self.domtree);827self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;828}829}830831832