Path: blob/main/cranelift/codegen/src/egraph.rs
1693 views
//! Support for egraphs represented in the DataFlowGraph.12use crate::alias_analysis::{AliasAnalysis, LastStores};3use crate::ctxhash::{CtxEq, CtxHash, NullCtx};4use crate::cursor::{Cursor, CursorPosition, FuncCursor};5use crate::dominator_tree::DominatorTree;6use crate::egraph::elaborate::Elaborator;7use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph};8use crate::ir::pcc::Fact;9use crate::ir::{10Block, DataFlowGraph, Function, Inst, InstructionData, Opcode, Type, Value, ValueDef,11ValueListPool,12};13use crate::loop_analysis::LoopAnalysis;14use crate::opts::IsleContext;15use crate::opts::generated_code::SkeletonInstSimplification;16use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap};17use crate::settings::Flags;18use crate::take_and_replace::TakeAndReplace;19use crate::trace;20use alloc::vec::Vec;21use core::cmp::Ordering;22use core::hash::Hasher;23use cranelift_control::ControlPlane;24use cranelift_entity::SecondaryMap;25use cranelift_entity::packed_option::ReservedValue;26use rustc_hash::FxHashSet;27use smallvec::SmallVec;2829mod cost;30mod elaborate;3132/// Pass over a Function that does the whole aegraph thing.33///34/// - Removes non-skeleton nodes from the Layout.35/// - Performs a GVN-and-rule-application pass over all Values36/// reachable from the skeleton, potentially creating new Union37/// nodes (i.e., an aegraph) so that some values have multiple38/// representations.39/// - Does "extraction" on the aegraph: selects the best value out of40/// the tree-of-Union nodes for each used value.41/// - Does "scoped elaboration" on the aegraph: chooses one or more42/// locations for pure nodes to become instructions again in the43/// layout, as forced by the skeleton.44///45/// At the beginning and end of this pass, the CLIF should be in a46/// state that passes the verifier and, additionally, has no Union47/// nodes. During the pass, Union nodes may exist, and instructions in48/// the layout may refer to results of instructions that are not49/// placed in the layout.50pub struct EgraphPass<'a> {51/// The function we're operating on.52func: &'a mut Function,53/// Dominator tree for the CFG, used to visit blocks in pre-order54/// so we see value definitions before their uses, and also used for55/// O(1) dominance checks.56domtree: &'a DominatorTree,57/// Alias analysis, used during optimization.58alias_analysis: &'a mut AliasAnalysis<'a>,59/// Loop analysis results, used for built-in LICM during60/// elaboration.61loop_analysis: &'a LoopAnalysis,62/// Compiler flags.63flags: &'a Flags,64/// Chaos-mode control-plane so we can test that we still get65/// correct results when our heuristics make bad decisions.66ctrl_plane: &'a mut ControlPlane,67/// Which Values do we want to rematerialize in each block where68/// they're used?69remat_values: FxHashSet<Value>,70/// Stats collected while we run this pass.71pub(crate) stats: Stats,72}7374/// The maximum number of rewrites we will take from a single call into ISLE.75const MATCHES_LIMIT: usize = 5;7677/// The maximum number of enodes in any given eclass.78const ECLASS_ENODE_LIMIT: usize = 5;7980/// Context passed through node insertion and optimization.81pub(crate) struct OptimizeCtx<'opt, 'analysis>82where83'analysis: 'opt,84{85// Borrowed from EgraphPass:86pub(crate) func: &'opt mut Function,87pub(crate) value_to_opt_value: &'opt mut SecondaryMap<Value, Value>,88available_block: &'opt mut SecondaryMap<Value, Block>,89eclass_size: &'opt mut SecondaryMap<Value, u8>,90pub(crate) gvn_map: &'opt mut ScopedHashMap<(Type, InstructionData), Option<Value>>,91pub(crate) gvn_map_blocks: &'opt Vec<Block>,92pub(crate) remat_values: &'opt mut FxHashSet<Value>,93pub(crate) stats: &'opt mut Stats,94domtree: &'opt DominatorTree,95pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>,96pub(crate) alias_analysis_state: &'opt mut LastStores,97flags: &'opt Flags,98ctrl_plane: &'opt mut ControlPlane,99// Held locally during optimization of one node (recursively):100pub(crate) rewrite_depth: usize,101pub(crate) subsume_values: FxHashSet<Value>,102optimized_values: SmallVec<[Value; MATCHES_LIMIT]>,103optimized_insts: SmallVec<[SkeletonInstSimplification; MATCHES_LIMIT]>,104}105106/// For passing to `insert_pure_enode`. Sometimes the enode already107/// exists as an Inst (from the original CLIF), and sometimes we're in108/// the middle of creating it and want to avoid inserting it if109/// possible until we know we need it.110pub(crate) enum NewOrExistingInst {111New(InstructionData, Type),112Existing(Inst),113}114115impl NewOrExistingInst {116fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) {117match self {118NewOrExistingInst::New(data, ty) => (*ty, *data),119NewOrExistingInst::Existing(inst) => {120let ty = dfg.ctrl_typevar(*inst);121(ty, dfg.insts[*inst])122}123}124}125}126127impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis>128where129'analysis: 'opt,130{131/// Optimization of a single instruction.132///133/// This does a few things:134/// - Looks up the instruction in the GVN deduplication map. If we135/// already have the same instruction somewhere else, with the136/// same args, then we can alias the original instruction's137/// results and omit this instruction entirely.138/// - If the instruction is "new" (not deduplicated), then apply139/// optimization rules:140/// - All of the mid-end rules written in ISLE.141/// - Store-to-load forwarding.142/// - Update the value-to-opt-value map, and update the eclass143/// union-find, if we rewrote the value to different form(s).144pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value {145// Create the external context for looking up and updating the146// GVN map. This is necessary so that instructions themselves147// do not have to carry all the references or data for a full148// `Eq` or `Hash` impl.149let gvn_context = GVNContext {150value_lists: &self.func.dfg.value_lists,151};152153self.stats.pure_inst += 1;154if let NewOrExistingInst::New(..) = inst {155self.stats.new_inst += 1;156}157158// Does this instruction already exist? If so, add entries to159// the value-map to rewrite uses of its results to the results160// of the original (existing) instruction. If not, optimize161// the new instruction.162if let Some(&Some(orig_result)) = self163.gvn_map164.get(&gvn_context, &inst.get_inst_key(&self.func.dfg))165{166self.stats.pure_inst_deduped += 1;167if let NewOrExistingInst::Existing(inst) = inst {168debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1);169let result = self.func.dfg.first_result(inst);170self.value_to_opt_value[result] = orig_result;171self.available_block[result] = self.available_block[orig_result];172self.func.dfg.merge_facts(result, orig_result);173}174orig_result175} else {176// Now actually insert the InstructionData and attach177// result value (exactly one).178let (inst, result, ty) = match inst {179NewOrExistingInst::New(data, typevar) => {180self.stats.pure_inst_insert_new += 1;181let inst = self.func.dfg.make_inst(data);182// TODO: reuse return value?183self.func.dfg.make_inst_results(inst, typevar);184let result = self.func.dfg.first_result(inst);185// New inst. We need to do the analysis of its result.186(inst, result, typevar)187}188NewOrExistingInst::Existing(inst) => {189self.stats.pure_inst_insert_orig += 1;190let result = self.func.dfg.first_result(inst);191let ty = self.func.dfg.ctrl_typevar(inst);192(inst, result, ty)193}194};195196self.attach_constant_fact(inst, result, ty);197198self.available_block[result] = self.get_available_block(inst);199let opt_value = self.optimize_pure_enode(inst);200log::trace!("optimizing inst {inst} orig result {result} gave {opt_value}");201202let gvn_context = GVNContext {203value_lists: &self.func.dfg.value_lists,204};205// Insert at level implied by args. This enables merging206// in LICM cases like:207//208// while (...) {209// if (...) {210// let x = loop_invariant_expr;211// }212// if (...) {213// let x = loop_invariant_expr;214// }215// }216//217// where the two instances of the expression otherwise218// wouldn't merge because each would be in a separate219// subscope of the scoped hashmap during traversal.220log::trace!(221"value {} is available at {}",222opt_value,223self.available_block[opt_value]224);225let depth = self.depth_of_block_in_gvn_map(self.available_block[opt_value]);226self.gvn_map.insert_with_depth(227&gvn_context,228(ty, self.func.dfg.insts[inst]),229Some(opt_value),230depth,231);232self.value_to_opt_value[result] = opt_value;233opt_value234}235}236237/// Find the block where a pure instruction first becomes available,238/// defined as the block that is closest to the root where all of239/// its arguments are available. In the unusual case where a pure240/// instruction has no arguments (e.g. get_return_address), we can241/// place it anywhere, so it is available in the entry block.242///243/// This function does not compute available blocks recursively.244/// All of the instruction's arguments must have had their available245/// blocks assigned already.246fn get_available_block(&self, inst: Inst) -> Block {247// Side-effecting instructions have different rules for where248// they become available, so this function does not apply.249debug_assert!(is_pure_for_egraph(self.func, inst));250251// Note that the def-point of all arguments to an instruction252// in SSA lie on a line of direct ancestors in the domtree, and253// so do their available-blocks. This means that for any pair of254// arguments, their available blocks are either the same or one255// strictly dominates the other. We just need to find any argument256// whose available block is deepest in the domtree.257self.func.dfg.insts[inst]258.arguments(&self.func.dfg.value_lists)259.iter()260.map(|&v| {261let block = self.available_block[v];262debug_assert!(!block.is_reserved_value());263block264})265.max_by(|&x, &y| {266if self.domtree.block_dominates(x, y) {267Ordering::Less268} else {269debug_assert!(self.domtree.block_dominates(y, x));270Ordering::Greater271}272})273.unwrap_or(self.func.layout.entry_block().unwrap())274}275276fn depth_of_block_in_gvn_map(&self, block: Block) -> usize {277log::trace!(278"finding depth of available block {} in domtree stack: {:?}",279block,280self.gvn_map_blocks281);282self.gvn_map_blocks283.iter()284.enumerate()285.rev()286.find(|&(_, b)| *b == block)287.unwrap()288.0289}290291/// Optimizes an enode by applying any matching mid-end rewrite292/// rules (or store-to-load forwarding, which is a special case),293/// unioning together all possible optimized (or rewritten) forms294/// of this expression into an eclass and returning the `Value`295/// that represents that eclass.296fn optimize_pure_enode(&mut self, inst: Inst) -> Value {297// A pure node always has exactly one result.298let orig_value = self.func.dfg.first_result(inst);299300let mut guard = TakeAndReplace::new(self, |x| &mut x.optimized_values);301let (ctx, optimized_values) = guard.get();302303// Limit rewrite depth. When we apply optimization rules, they304// may create new nodes (values) and those are, recursively,305// optimized eagerly as soon as they are created. So we may306// have more than one ISLE invocation on the stack. (This is307// necessary so that as the toplevel builds the308// right-hand-side expression bottom-up, it uses the "latest"309// optimized values for all the constituent parts.) To avoid310// infinite or problematic recursion, we bound the rewrite311// depth to a small constant here.312const REWRITE_LIMIT: usize = 5;313if ctx.rewrite_depth > REWRITE_LIMIT {314ctx.stats.rewrite_depth_limit += 1;315return orig_value;316}317ctx.rewrite_depth += 1;318trace!("Incrementing rewrite depth; now {}", ctx.rewrite_depth);319320// Invoke the ISLE toplevel constructor, getting all new321// values produced as equivalents to this value.322trace!("Calling into ISLE with original value {}", orig_value);323ctx.stats.rewrite_rule_invoked += 1;324debug_assert!(optimized_values.is_empty());325crate::opts::generated_code::constructor_simplify(326&mut IsleContext { ctx },327orig_value,328optimized_values,329);330331ctx.stats.rewrite_rule_results += optimized_values.len() as u64;332333// It's not supposed to matter what order `simplify` returns values in.334ctx.ctrl_plane.shuffle(optimized_values);335336let num_matches = optimized_values.len();337if num_matches > MATCHES_LIMIT {338trace!(339"Reached maximum matches limit; too many optimized values \340({num_matches} > {MATCHES_LIMIT}); ignoring rest.",341);342optimized_values.truncate(MATCHES_LIMIT);343}344345// Sort and deduplicate optimized values, in case multiple346// rules produced the same simplification.347optimized_values.sort_unstable();348optimized_values.dedup();349350trace!(" -> returned from ISLE: {orig_value} -> {optimized_values:?}");351352// Construct a union-node tree representing the new eclass353// that results from rewriting. If any returned value was354// marked "subsume", take only that value. Otherwise,355// sequentially build the chain over the original value and356// all returned values.357let result_value = if let Some(&subsuming_value) = optimized_values358.iter()359.find(|&value| ctx.subsume_values.contains(value))360{361optimized_values.clear();362ctx.stats.pure_inst_subsume += 1;363subsuming_value364} else {365let mut union_value = orig_value;366let mut eclass_size = ctx.eclass_size[orig_value] + 1;367for optimized_value in optimized_values.drain(..) {368trace!(369"Returned from ISLE for {}, got {:?}",370orig_value, optimized_value371);372if optimized_value == orig_value {373trace!(" -> same as orig value; skipping");374ctx.stats.pure_inst_rewrite_to_self += 1;375continue;376}377let rhs_eclass_size = ctx.eclass_size[optimized_value] + 1;378if usize::from(eclass_size) + usize::from(rhs_eclass_size) > ECLASS_ENODE_LIMIT {379trace!(" -> reached eclass size limit");380ctx.stats.eclass_size_limit += 1;381break;382}383let old_union_value = union_value;384union_value = ctx.func.dfg.union(old_union_value, optimized_value);385eclass_size += rhs_eclass_size;386ctx.eclass_size[union_value] = eclass_size - 1;387ctx.stats.union += 1;388trace!(" -> union: now {}", union_value);389ctx.func.dfg.merge_facts(old_union_value, optimized_value);390ctx.available_block[union_value] =391ctx.merge_availability(old_union_value, optimized_value);392}393union_value394};395396ctx.rewrite_depth -= 1;397trace!("Decrementing rewrite depth; now {}", ctx.rewrite_depth);398if ctx.rewrite_depth == 0 {399ctx.subsume_values.clear();400}401402debug_assert!(ctx.optimized_values.is_empty());403result_value404}405406fn merge_availability(&self, a: Value, b: Value) -> Block {407let a = self.available_block[a];408let b = self.available_block[b];409if self.domtree.block_dominates(a, b) {410a411} else {412b413}414}415416/// Optimize a "skeleton" instruction.417///418/// Returns an optional command of how to continue processing the optimized419/// instruction (e.g. removing it or replacing it with a new instruction).420fn optimize_skeleton_inst(421&mut self,422inst: Inst,423block: Block,424) -> Option<SkeletonInstSimplification> {425self.stats.skeleton_inst += 1;426427// If we have a rewrite rule for this instruction, do that first, so428// that GVN and alias analysis only see simplified skeleton429// instructions.430if let Some(cmd) = self.simplify_skeleton_inst(inst) {431self.stats.skeleton_inst_simplified += 1;432return Some(cmd);433}434435// First, can we try to deduplicate? We need to keep some copy436// of the instruction around because it's side-effecting, but437// we may be able to reuse an earlier instance of it.438if is_mergeable_for_egraph(self.func, inst) {439let result = self.func.dfg.inst_results(inst).get(0).copied();440trace!(" -> mergeable side-effecting op {}", inst);441442// Does this instruction already exist? If so, add entries to443// the value-map to rewrite uses of its results to the results444// of the original (existing) instruction. If not, optimize445// the new instruction.446//447// Note that the GVN map is scoped, which is important448// here: because effectful ops are not removed from the449// skeleton (`Layout`), we need to be mindful of whether450// our current position is dominated by an instance of the451// instruction. (See #5796 for details.)452let ty = self.func.dfg.ctrl_typevar(inst);453match self454.gvn_map455.entry(&NullCtx, (ty, self.func.dfg.insts[inst]))456{457ScopedEntry::Occupied(o) => {458let orig_result = *o.get();459match (result, orig_result) {460(Some(result), Some(orig_result)) => {461// Hit in GVN map -- reuse value.462self.stats.skeleton_inst_gvn += 1;463self.value_to_opt_value[result] = orig_result;464self.available_block[result] = self.available_block[orig_result];465trace!(" -> merges result {} to {}", result, orig_result);466}467(None, None) => {468// Hit in the GVN map, but the instruction doesn't469// produce results, only side effects. Nothing else470// to do here.471self.stats.skeleton_inst_gvn += 1;472trace!(" -> merges with dominating instruction");473}474(_, _) => unreachable!(),475}476Some(SkeletonInstSimplification::Remove)477}478ScopedEntry::Vacant(v) => {479// Otherwise, insert it into the value-map.480if let Some(result) = result {481self.value_to_opt_value[result] = result;482self.available_block[result] = block;483}484v.insert(result);485trace!(" -> inserts as new (no GVN)");486None487}488}489}490// Otherwise, if a load or store, process it with the alias491// analysis to see if we can optimize it (rewrite in terms of492// an earlier load or stored value).493else if let Some(new_result) =494self.alias_analysis495.process_inst(self.func, self.alias_analysis_state, inst)496{497self.stats.alias_analysis_removed += 1;498let result = self.func.dfg.first_result(inst);499trace!(500" -> inst {} has result {} replaced with {}",501inst, result, new_result502);503self.value_to_opt_value[result] = new_result;504self.available_block[result] = self.available_block[new_result];505self.func.dfg.merge_facts(result, new_result);506Some(SkeletonInstSimplification::Remove)507}508// Otherwise, generic side-effecting op -- always keep it, and509// set its results to identity-map to original values.510else {511// Set all results to identity-map to themselves512// in the value-to-opt-value map.513for &result in self.func.dfg.inst_results(inst) {514self.value_to_opt_value[result] = result;515self.available_block[result] = block;516}517None518}519}520521/// Find the best simplification of the given skeleton instruction, if any,522/// by consulting our `simplify_skeleton` ISLE rules.523fn simplify_skeleton_inst(&mut self, inst: Inst) -> Option<SkeletonInstSimplification> {524// We cannot currently simplify terminators, or simplify into525// terminators. Anything that could change the control-flow graph is off526// limits.527//528// Consider the following CLIF snippet:529//530// block0(v0: i64):531// v1 = iconst.i32 0532// trapz v1, user42533// v2 = load.i32 v0534// brif v1, block1, block2535// block1:536// return v2537// block2:538// v3 = iconst.i32 1539// v4 = iadd v2, v3540// return v4541//542// We would ideally like to perform simplifications like replacing the543// `trapz` with an unconditional `trap` and the conditional `brif`544// branch with an unconditional `jump`. Note, however, that blocks545// `block1` and `block2` are dominated by `block0` and therefore can and546// do use values defined in `block0`. This presents challenges:547//548// * If we replace the `brif` with a `jump`, then we've mutated the549// control-flow graph and removed that domination property. The uses550// of `v2` and `v3` in those blocks become invalid.551//552// * Even worse, if we turn the `trapz` into a `trap`, we are553// introducing a terminator into the middle of the block, which leaves554// us with two choices to fix up the IR so that there aren't any555// instructions following the terminator in the block:556//557// 1. We can split the unreachable instructions off into a new558// block. However, there is no control-flow edge from the current559// block to this new block and so, again, the new block isn't560// dominated by the current block, and therefore the can't use561// values defined in this block or any dominating it. The `load`562// instruction uses `v0` but is not dominated by `v0`'s563// definition.564//565// 2. Alternatively, we can simply delete the trailing instructions,566// since they are unreachable. But then not only are the old567// instructions' uses no longer dominated by their definitions, but568// the definitions do not exist at all anymore!569//570// Whatever approach we would take, we would invalidate value uses, and571// would need to track and fix them up.572if self.func.dfg.insts[inst].opcode().is_branch() {573return None;574}575576let mut guard = TakeAndReplace::new(self, |x| &mut x.optimized_insts);577let (ctx, optimized_insts) = guard.get();578579crate::opts::generated_code::constructor_simplify_skeleton(580&mut IsleContext { ctx },581inst,582optimized_insts,583);584585let simplifications_len = optimized_insts.len();586log::trace!(" -> simplify_skeleton: yielded {simplifications_len} simplification(s)");587if simplifications_len > MATCHES_LIMIT {588log::trace!(" too many candidate simplifications; truncating to {MATCHES_LIMIT}");589optimized_insts.truncate(MATCHES_LIMIT);590}591592// Find the best simplification, if any, from our candidates.593//594// Unlike simplifying pure values, we do not add side-effectful595// instructions to the egraph, nor do we extract the best version via596// dynamic programming and considering the costs of operands. Instead,597// we greedily choose the best simplification. This is because there is598// an impedance mismatch: the egraph and our pure rewrites are centered599// around *values*, but we don't represent side-effects with values, we600// represent them implicitly in their *instructions*.601//602// The initial best choice is "no simplification, just use the original603// instruction" which has the original instruction's cost.604let mut best = None;605let mut best_cost = cost::Cost::of_skeleton_op(606ctx.func.dfg.insts[inst].opcode(),607ctx.func.dfg.inst_args(inst).len(),608);609while let Some(simplification) = optimized_insts.pop() {610let (new_inst, new_val) = match simplification {611// We can't do better than completely removing the skeleton612// instruction, so short-cicuit the loop and eagerly return the613// `Remove*` simplifications.614SkeletonInstSimplification::Remove => {615log::trace!(" -> simplify_skeleton: remove inst");616debug_assert!(ctx.func.dfg.inst_results(inst).is_empty());617return Some(simplification);618}619SkeletonInstSimplification::RemoveWithVal { val } => {620log::trace!(" -> simplify_skeleton: remove inst and use {val} as its result");621if cfg!(debug_assertions) {622let results = ctx.func.dfg.inst_results(inst);623debug_assert_eq!(results.len(), 1);624debug_assert_eq!(625ctx.func.dfg.value_type(results[0]),626ctx.func.dfg.value_type(val),627);628}629return Some(simplification);630}631632// For instruction replacement simplification, we want to check633// that the replacements define the same number and types of634// values as the original instruction, and also determine635// whether they are actually an improvement over (i.e. have636// lower cost than) the original instruction.637SkeletonInstSimplification::Replace { inst } => {638log::trace!(639" -> simplify_skeleton: replace inst with {inst}: {}",640ctx.func.dfg.display_inst(inst)641);642(inst, None)643}644SkeletonInstSimplification::ReplaceWithVal { inst, val } => {645log::trace!(646" -> simplify_skeleton: replace inst with {val} and {inst}: {}",647ctx.func.dfg.display_inst(inst)648);649(inst, Some(val))650}651};652653if cfg!(debug_assertions) {654let opcode = ctx.func.dfg.insts[inst].opcode();655debug_assert!(656!(opcode.is_terminator() || opcode.is_branch()),657"simplifying control-flow instructions and terminators is not yet supported",658);659660let old_vals = ctx.func.dfg.inst_results(inst);661let new_vals = if let Some(val) = new_val.as_ref() {662std::slice::from_ref(val)663} else {664ctx.func.dfg.inst_results(new_inst)665};666debug_assert_eq!(667old_vals.len(),668new_vals.len(),669"skeleton simplification should result in the same number of result values",670);671672for (old_val, new_val) in old_vals.iter().zip(new_vals) {673let old_ty = ctx.func.dfg.value_type(*old_val);674let new_ty = ctx.func.dfg.value_type(*new_val);675debug_assert_eq!(676old_ty, new_ty,677"skeleton simplification should result in values of the correct type",678);679}680}681682// Our best simplification is the one with the least cost. Update683// `best` if necessary.684let cost = cost::Cost::of_skeleton_op(685ctx.func.dfg.insts[new_inst].opcode(),686ctx.func.dfg.inst_args(new_inst).len(),687);688if cost < best_cost {689best = Some(simplification);690best_cost = cost;691}692}693694// Return the best simplification!695best696}697698/// Helper to propagate facts on constant values: if PCC is699/// enabled, then unconditionally add a fact attesting to the700/// Value's concrete value.701fn attach_constant_fact(&mut self, inst: Inst, value: Value, ty: Type) {702if self.flags.enable_pcc() {703if let InstructionData::UnaryImm {704opcode: Opcode::Iconst,705imm,706} = self.func.dfg.insts[inst]707{708let imm: i64 = imm.into();709self.func.dfg.facts[value] =710Some(Fact::constant(ty.bits().try_into().unwrap(), imm as u64));711}712}713}714}715716impl<'a> EgraphPass<'a> {717/// Create a new EgraphPass.718pub fn new(719func: &'a mut Function,720domtree: &'a DominatorTree,721loop_analysis: &'a LoopAnalysis,722alias_analysis: &'a mut AliasAnalysis<'a>,723flags: &'a Flags,724ctrl_plane: &'a mut ControlPlane,725) -> Self {726Self {727func,728domtree,729loop_analysis,730alias_analysis,731flags,732ctrl_plane,733stats: Stats::default(),734remat_values: FxHashSet::default(),735}736}737738/// Run the process.739pub fn run(&mut self) {740self.remove_pure_and_optimize();741742trace!("egraph built:\n{}\n", self.func.display());743if cfg!(feature = "trace-log") {744for (value, def) in self.func.dfg.values_and_defs() {745trace!(" -> {} = {:?}", value, def);746match def {747ValueDef::Result(i, 0) => {748trace!(" -> {} = {:?}", i, self.func.dfg.insts[i]);749}750_ => {}751}752}753}754755self.elaborate();756757log::trace!("stats: {:#?}", self.stats);758}759760/// Remove pure nodes from the `Layout` of the function, ensuring761/// that only the "side-effect skeleton" remains, and also762/// optimize the pure nodes. This is the first step of763/// egraph-based processing and turns the pure CFG-based CLIF into764/// a CFG skeleton with a sea of (optimized) nodes tying it765/// together.766///767/// As we walk through the code, we eagerly apply optimization768/// rules; at any given point we have a "latest version" of an769/// eclass of possible representations for a `Value` in the770/// original program, which is itself a `Value` at the root of a771/// union-tree. We keep a map from the original values to these772/// optimized values. When we encounter any instruction (pure or773/// side-effecting skeleton) we rewrite its arguments to capture774/// the "latest" optimized forms of these values. (We need to do775/// this as part of this pass, and not later using a finished map,776/// because the eclass can continue to be updated and we need to777/// only refer to its subset that exists at this stage, to778/// maintain acyclicity.)779fn remove_pure_and_optimize(&mut self) {780let mut cursor = FuncCursor::new(self.func);781let mut value_to_opt_value: SecondaryMap<Value, Value> =782SecondaryMap::with_default(Value::reserved_value());783784// Map from instruction to value for hash-consing of pure ops785// into the egraph. This can be a standard (non-scoped)786// hashmap because pure ops have no location: they are787// "outside of" control flow.788//789// Note also that we keep the controlling typevar (the `Type`790// in the tuple below) because it may disambiguate791// instructions that are identical except for type.792//793// We store both skeleton and non-skeleton instructions in the794// GVN map; for skeleton instructions, we only store those795// that are idempotent, i.e., still eligible to GVN. Note that796// some skeleton instructions are idempotent but do not797// produce a value: e.g., traps on a given condition. To allow798// for both cases, we store an `Option<Value>` as the value in799// this map.800let mut gvn_map: ScopedHashMap<(Type, InstructionData), Option<Value>> =801ScopedHashMap::with_capacity(cursor.func.dfg.num_values());802803// The block in the domtree preorder traversal at each level804// of the GVN map.805let mut gvn_map_blocks: Vec<Block> = vec![];806807// To get the best possible merging and canonicalization, we808// track where a value is "available" at: this is the809// domtree-nearest-ancestor join of all args if the value810// itself is pure, otherwise the block where the value is811// defined. (And for union nodes, the812// domtree-highest-ancestor, i.e., the meet or the dual to the813// above join.)814let mut available_block: SecondaryMap<Value, Block> =815SecondaryMap::with_default(Block::reserved_value());816817// To avoid blowing up eclasses too much, we track the size of818// each eclass reachable by a tree of union nodes from a given819// value ID, and we avoid union'ing additional values into an820// eclass when it reaches `ECLASS_ENODE_LIMIT`.821//822// For efficiency, this encodes size minus one: so a value of823// zero (which is cheap to bulk-initialize) means a singleton824// eclass of size one. This also allows us to avoid explicitly825// writing the size for any values that are not union nodes.826let mut eclass_size: SecondaryMap<Value, u8> = SecondaryMap::with_default(0);827828// This is an initial guess at the size we'll need, but we add829// more values as we build simplified alternative expressions so830// this is likely to realloc again later.831available_block.resize(cursor.func.dfg.num_values());832833// In domtree preorder, visit blocks. (TODO: factor out an834// iterator from this and elaborator.)835let root = cursor.layout().entry_block().unwrap();836enum StackEntry {837Visit(Block),838Pop,839}840let mut block_stack = vec![StackEntry::Visit(root)];841while let Some(entry) = block_stack.pop() {842match entry {843StackEntry::Visit(block) => {844// We popped this block; push children845// immediately, then process this block.846block_stack.push(StackEntry::Pop);847block_stack.extend(848self.ctrl_plane849.shuffled(self.domtree.children(block))850.map(StackEntry::Visit),851);852gvn_map.increment_depth();853gvn_map_blocks.push(block);854855trace!("Processing block {}", block);856cursor.set_position(CursorPosition::Before(block));857858let mut alias_analysis_state = self.alias_analysis.block_starting_state(block);859860for ¶m in cursor.func.dfg.block_params(block) {861trace!("creating initial singleton eclass for blockparam {}", param);862value_to_opt_value[param] = param;863available_block[param] = block;864}865while let Some(inst) = cursor.next_inst() {866trace!(867"Processing inst {inst}: {}",868cursor.func.dfg.display_inst(inst),869);870871// Rewrite args of *all* instructions using the872// value-to-opt-value map.873cursor.func.dfg.map_inst_values(inst, |arg| {874let new_value = value_to_opt_value[arg];875trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value);876debug_assert_ne!(new_value, Value::reserved_value());877new_value878});879880// Build a context for optimization, with borrows of881// state. We can't invoke a method on `self` because882// we've borrowed `self.func` mutably (as883// `cursor.func`) so we pull apart the pieces instead884// here.885let mut ctx = OptimizeCtx {886func: cursor.func,887value_to_opt_value: &mut value_to_opt_value,888gvn_map: &mut gvn_map,889gvn_map_blocks: &mut gvn_map_blocks,890available_block: &mut available_block,891eclass_size: &mut eclass_size,892rewrite_depth: 0,893subsume_values: FxHashSet::default(),894remat_values: &mut self.remat_values,895stats: &mut self.stats,896domtree: &self.domtree,897alias_analysis: self.alias_analysis,898alias_analysis_state: &mut alias_analysis_state,899flags: self.flags,900ctrl_plane: self.ctrl_plane,901optimized_values: Default::default(),902optimized_insts: Default::default(),903};904905if is_pure_for_egraph(ctx.func, inst) {906// Insert into GVN map and optimize any new nodes907// inserted (recursively performing this work for908// any nodes the optimization rules produce).909let inst = NewOrExistingInst::Existing(inst);910ctx.insert_pure_enode(inst);911// We've now rewritten all uses, or will when we912// see them, and the instruction exists as a pure913// enode in the eclass, so we can remove it.914cursor.remove_inst_and_step_back();915} else {916if let Some(cmd) = ctx.optimize_skeleton_inst(inst, block) {917Self::execute_skeleton_inst_simplification(918cmd,919&mut cursor,920&mut value_to_opt_value,921inst,922);923}924}925}926}927StackEntry::Pop => {928gvn_map.decrement_depth();929gvn_map_blocks.pop();930}931}932}933}934935/// Execute a simplification of an instruction in the side-effectful936/// skeleton.937fn execute_skeleton_inst_simplification(938simplification: SkeletonInstSimplification,939cursor: &mut FuncCursor,940value_to_opt_value: &mut SecondaryMap<Value, Value>,941old_inst: Inst,942) {943let mut forward_val = |cursor: &mut FuncCursor, old_val, new_val| {944cursor.func.dfg.change_to_alias(old_val, new_val);945value_to_opt_value[old_val] = new_val;946};947948let (new_inst, new_val) = match simplification {949SkeletonInstSimplification::Remove => {950cursor.remove_inst_and_step_back();951return;952}953SkeletonInstSimplification::RemoveWithVal { val } => {954cursor.remove_inst_and_step_back();955let old_val = cursor.func.dfg.first_result(old_inst);956cursor.func.dfg.detach_inst_results(old_inst);957forward_val(cursor, old_val, val);958return;959}960SkeletonInstSimplification::Replace { inst } => (inst, None),961SkeletonInstSimplification::ReplaceWithVal { inst, val } => (inst, Some(val)),962};963964// Replace the old instruction with the new one.965cursor.replace_inst(new_inst);966debug_assert!(!cursor.func.dfg.insts[new_inst].opcode().is_terminator());967968// Redirect the old instruction's result values to our new instruction's969// result values.970let mut i = 0;971let mut next_new_val = |dfg: &crate::ir::DataFlowGraph| -> Value {972if let Some(val) = new_val {973val974} else {975let val = dfg.inst_results(new_inst)[i];976i += 1;977val978}979};980for i in 0..cursor.func.dfg.inst_results(old_inst).len() {981let old_val = cursor.func.dfg.inst_results(old_inst)[i];982let new_val = next_new_val(&cursor.func.dfg);983forward_val(cursor, old_val, new_val);984}985986// Back up so that the next iteration of the outer egraph loop will987// process the new instruction.988cursor.goto_inst(new_inst);989cursor.prev_inst();990}991992/// Scoped elaboration: compute a final ordering of op computation993/// for each block and update the given Func body. After this994/// runs, the function body is back into the state where every995/// Inst with an used result is placed in the layout (possibly996/// duplicated, if our code-motion logic decides this is the best997/// option).998///999/// This works in concert with the domtree. We do a preorder1000/// traversal of the domtree, tracking a scoped map from Id to1001/// (new) Value. The map's scopes correspond to levels in the1002/// domtree.1003///1004/// At each block, we iterate forward over the side-effecting1005/// eclasses, and recursively generate their arg eclasses, then1006/// emit the ops themselves.1007///1008/// To use an eclass in a given block, we first look it up in the1009/// scoped map, and get the Value if already present. If not, we1010/// need to generate it. We emit the extracted enode for this1011/// eclass after recursively generating its args. Eclasses are1012/// thus computed "as late as possible", but then memoized into1013/// the Id-to-Value map and available to all dominated blocks and1014/// for the rest of this block. (This subsumes GVN.)1015fn elaborate(&mut self) {1016let mut elaborator = Elaborator::new(1017self.func,1018&self.domtree,1019self.loop_analysis,1020&self.remat_values,1021&mut self.stats,1022self.ctrl_plane,1023);1024elaborator.elaborate();10251026self.check_post_egraph();1027}10281029#[cfg(debug_assertions)]1030fn check_post_egraph(&self) {1031// Verify that no union nodes are reachable from inst args,1032// and that all inst args' defining instructions are in the1033// layout.1034for block in self.func.layout.blocks() {1035for inst in self.func.layout.block_insts(block) {1036self.func1037.dfg1038.inst_values(inst)1039.for_each(|arg| match self.func.dfg.value_def(arg) {1040ValueDef::Result(i, _) => {1041debug_assert!(self.func.layout.inst_block(i).is_some());1042}1043ValueDef::Union(..) => {1044panic!("egraph union node {arg} still reachable at {inst}!");1045}1046_ => {}1047})1048}1049}1050}10511052#[cfg(not(debug_assertions))]1053fn check_post_egraph(&self) {}1054}10551056/// Implementation of external-context equality and hashing on1057/// InstructionData. This allows us to deduplicate instructions given1058/// some context that lets us see its value lists, so we don't need to1059/// store arguments inline in the `InstuctionData` (or alongside it in1060/// some newly-defined key type) in all cases.1061struct GVNContext<'a> {1062value_lists: &'a ValueListPool,1063}10641065impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> {1066fn ctx_eq(1067&self,1068(a_ty, a_inst): &(Type, InstructionData),1069(b_ty, b_inst): &(Type, InstructionData),1070) -> bool {1071a_ty == b_ty && a_inst.eq(b_inst, self.value_lists)1072}1073}10741075impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> {1076fn ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) {1077std::hash::Hash::hash(&ty, state);1078inst.hash(state, self.value_lists);1079}1080}10811082/// Statistics collected during egraph-based processing.1083#[derive(Clone, Debug, Default)]1084pub(crate) struct Stats {1085pub(crate) pure_inst: u64,1086pub(crate) pure_inst_deduped: u64,1087pub(crate) pure_inst_subsume: u64,1088pub(crate) pure_inst_rewrite_to_self: u64,1089pub(crate) pure_inst_insert_orig: u64,1090pub(crate) pure_inst_insert_new: u64,1091pub(crate) skeleton_inst: u64,1092pub(crate) skeleton_inst_simplified: u64,1093pub(crate) skeleton_inst_gvn: u64,1094pub(crate) alias_analysis_removed: u64,1095pub(crate) new_inst: u64,1096pub(crate) union: u64,1097pub(crate) subsume: u64,1098pub(crate) remat: u64,1099pub(crate) rewrite_rule_invoked: u64,1100pub(crate) rewrite_rule_results: u64,1101pub(crate) rewrite_depth_limit: u64,1102pub(crate) elaborate_visit_node: u64,1103pub(crate) elaborate_memoize_hit: u64,1104pub(crate) elaborate_memoize_miss: u64,1105pub(crate) elaborate_remat: u64,1106pub(crate) elaborate_licm_hoist: u64,1107pub(crate) elaborate_func: u64,1108pub(crate) elaborate_func_pre_insts: u64,1109pub(crate) elaborate_func_post_insts: u64,1110pub(crate) elaborate_best_cost_fixpoint_iters: u64,1111pub(crate) eclass_size_limit: u64,1112}111311141115