Path: blob/main/cranelift/codegen/src/machinst/lower.rs
1693 views
//! This module implements lowering (instruction selection) from Cranelift IR1//! to machine instructions with virtual registers. This is *almost* the final2//! machine code, except for register allocation.34// TODO: separate the IR-query core of `Lower` from the lowering logic built on5// top of it, e.g. the side-effect/coloring analysis and the scan support.67use crate::entity::SecondaryMap;8use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};9use crate::ir::pcc::{Fact, FactContext, PccError, PccResult};10use crate::ir::{11ArgumentPurpose, Block, BlockArg, Constant, ConstantData, DataFlowGraph, ExternalName,12Function, GlobalValue, GlobalValueData, Immediate, Inst, InstructionData, MemFlags,13RelSourceLoc, SigRef, Signature, Type, Value, ValueDef, ValueLabelAssignments, ValueLabelStart,14};15use crate::machinst::valueregs::InvalidSentinel;16use crate::machinst::{17ABIMachineSpec, BackwardsInsnIndex, BlockIndex, BlockLoweringOrder, CallArgList, CallInfo,18CallRetList, Callee, InsnIndex, LoweredBlock, MachLabel, Reg, Sig, SigSet, TryCallInfo, VCode,19VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, VCodeInst, ValueRegs, Writable,20writable_value_regs,21};22use crate::settings::Flags;23use crate::{CodegenError, CodegenResult, trace};24use alloc::vec::Vec;25use cranelift_control::ControlPlane;26use rustc_hash::{FxHashMap, FxHashSet};27use smallvec::{SmallVec, smallvec};28use std::fmt::Debug;2930use super::{VCodeBuildDirection, VRegAllocator};3132/// A vector of ValueRegs, used to represent the outputs of an instruction.33pub type InstOutput = SmallVec<[ValueRegs<Reg>; 2]>;3435/// An "instruction color" partitions CLIF instructions by side-effecting ops.36/// All instructions with the same "color" are guaranteed not to be separated by37/// any side-effecting op (for this purpose, loads are also considered38/// side-effecting, to avoid subtle questions w.r.t. the memory model), and39/// furthermore, it is guaranteed that for any two instructions A and B such40/// that color(A) == color(B), either A dominates B and B postdominates A, or41/// vice-versa. (For now, in practice, only ops in the same basic block can ever42/// have the same color, trivially providing the second condition.) Intuitively,43/// this means that the ops of the same color must always execute "together", as44/// part of one atomic contiguous section of the dynamic execution trace, and45/// they can be freely permuted (modulo true dataflow dependencies) without46/// affecting program behavior.47#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]48struct InstColor(u32);49impl InstColor {50fn new(n: u32) -> InstColor {51InstColor(n)52}5354/// Get an arbitrary index representing this color. The index is unique55/// *within a single function compilation*, but indices may be reused across56/// functions.57pub fn get(self) -> u32 {58self.059}60}6162/// A representation of all of the ways in which a value is available, aside63/// from as a direct register.64///65/// - An instruction, if it would be allowed to occur at the current location66/// instead (see [Lower::get_input_as_source_or_const()] for more details).67///68/// - A constant, if the value is known to be a constant.69#[derive(Clone, Copy, Debug)]70pub struct NonRegInput {71/// An instruction produces this value (as the given output), and its72/// computation (and side-effect if applicable) could occur at the73/// current instruction's location instead.74///75/// If this instruction's operation is merged into the current instruction,76/// the backend must call [Lower::sink_inst()].77///78/// This enum indicates whether this use of the source instruction79/// is unique or not.80pub inst: InputSourceInst,81/// The value is a known constant.82pub constant: Option<u64>,83}8485/// When examining an input to an instruction, this enum provides one86/// of several options: there is or isn't a single instruction (that87/// we can see and merge with) that produces that input's value, and88/// we are or aren't the single user of that instruction.89#[derive(Clone, Copy, Debug)]90pub enum InputSourceInst {91/// The input in question is the single, unique use of the given92/// instruction and output index, and it can be sunk to the93/// location of this input.94UniqueUse(Inst, usize),95/// The input in question is one of multiple uses of the given96/// instruction. It can still be sunk to the location of this97/// input.98Use(Inst, usize),99/// We cannot determine which instruction produced the input, or100/// it is one of several instructions (e.g., due to a control-flow101/// merge and blockparam), or the source instruction cannot be102/// allowed to sink to the current location due to side-effects.103None,104}105106impl InputSourceInst {107/// Get the instruction and output index for this source, whether108/// we are its single or one of many users.109pub fn as_inst(&self) -> Option<(Inst, usize)> {110match self {111&InputSourceInst::UniqueUse(inst, output_idx)112| &InputSourceInst::Use(inst, output_idx) => Some((inst, output_idx)),113&InputSourceInst::None => None,114}115}116}117118/// A machine backend.119pub trait LowerBackend {120/// The machine instruction type.121type MInst: VCodeInst;122123/// Lower a single instruction.124///125/// For a branch, this function should not generate the actual branch126/// instruction. However, it must force any values it needs for the branch127/// edge (block-param actuals) into registers, because the actual branch128/// generation (`lower_branch()`) happens *after* any possible merged129/// out-edge.130///131/// Returns `None` if no lowering for the instruction was found.132fn lower(&self, ctx: &mut Lower<Self::MInst>, inst: Inst) -> Option<InstOutput>;133134/// Lower a block-terminating group of branches (which together can be seen135/// as one N-way branch), given a vcode MachLabel for each target.136///137/// Returns `None` if no lowering for the branch was found.138fn lower_branch(139&self,140ctx: &mut Lower<Self::MInst>,141inst: Inst,142targets: &[MachLabel],143) -> Option<()>;144145/// A bit of a hack: give a fixed register that always holds the result of a146/// `get_pinned_reg` instruction, if known. This allows elision of moves147/// into the associated vreg, instead using the real reg directly.148fn maybe_pinned_reg(&self) -> Option<Reg> {149None150}151152/// The type of state carried between `check_fact` invocations.153type FactFlowState: Default + Clone + Debug;154155/// Check any facts about an instruction, given VCode with facts156/// on VRegs. Takes mutable `VCode` so that it can propagate some157/// kinds of facts automatically.158fn check_fact(159&self,160_ctx: &FactContext<'_>,161_vcode: &mut VCode<Self::MInst>,162_inst: InsnIndex,163_state: &mut Self::FactFlowState,164) -> PccResult<()> {165Err(PccError::UnimplementedBackend)166}167}168169/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence170/// from original Inst to MachInsts.171pub struct Lower<'func, I: VCodeInst> {172/// The function to lower.173pub(crate) f: &'func Function,174175/// Lowered machine instructions.176vcode: VCodeBuilder<I>,177178/// VReg allocation context, given to the vcode field at build time to finalize the vcode.179vregs: VRegAllocator<I>,180181/// Mapping from `Value` (SSA value in IR) to virtual register.182value_regs: SecondaryMap<Value, ValueRegs<Reg>>,183184/// sret registers, if needed.185sret_reg: Option<ValueRegs<Reg>>,186187/// Instruction colors at block exits. From this map, we can recover all188/// instruction colors by scanning backward from the block end and189/// decrementing on any color-changing (side-effecting) instruction.190block_end_colors: SecondaryMap<Block, InstColor>,191192/// Instruction colors at side-effecting ops. This is the *entry* color,193/// i.e., the version of global state that exists before an instruction194/// executes. For each side-effecting instruction, the *exit* color is its195/// entry color plus one.196side_effect_inst_entry_colors: FxHashMap<Inst, InstColor>,197198/// Current color as we scan during lowering. While we are lowering an199/// instruction, this is equal to the color *at entry to* the instruction.200cur_scan_entry_color: Option<InstColor>,201202/// Current instruction as we scan during lowering.203cur_inst: Option<Inst>,204205/// Instruction constant values, if known.206inst_constants: FxHashMap<Inst, u64>,207208/// Use-counts per SSA value, as counted in the input IR. These209/// are "coarsened", in the abstract-interpretation sense: we only210/// care about "0, 1, many" states, as this is all we need and211/// this lets us do an efficient fixpoint analysis.212///213/// See doc comment on `ValueUseState` for more details.214value_ir_uses: SecondaryMap<Value, ValueUseState>,215216/// Actual uses of each SSA value so far, incremented while lowering.217value_lowered_uses: SecondaryMap<Value, u32>,218219/// Effectful instructions that have been sunk; they are not codegen'd at220/// their original locations.221inst_sunk: FxHashSet<Inst>,222223/// Instructions collected for the CLIF inst in progress, in forward order.224ir_insts: Vec<I>,225226/// Try-call block arg normal-return values, indexed by instruction.227try_call_rets: FxHashMap<Inst, SmallVec<[ValueRegs<Writable<Reg>>; 2]>>,228229/// Try-call block arg exceptional-return payloads, indexed by230/// instruction. Payloads are carried in registers per the ABI and231/// can only be one register each.232try_call_payloads: FxHashMap<Inst, SmallVec<[Writable<Reg>; 2]>>,233234/// The register to use for GetPinnedReg, if any, on this architecture.235pinned_reg: Option<Reg>,236237/// Compilation flags.238flags: Flags,239}240241/// How is a value used in the IR?242///243/// This can be seen as a coarsening of an integer count. We only need244/// distinct states for zero, one, or many.245///246/// This analysis deserves further explanation. The basic idea is that247/// we want to allow instruction lowering to know whether a value that248/// an instruction references is *only* referenced by that one use, or249/// by others as well. This is necessary to know when we might want to250/// move a side-effect: we cannot, for example, duplicate a load, so251/// we cannot let instruction lowering match a load as part of a252/// subpattern and potentially incorporate it.253///254/// Note that a lot of subtlety comes into play once we have255/// *indirect* uses. The classical example of this in our development256/// history was the x86 compare instruction, which is incorporated257/// into flags users (e.g. `selectif`, `trueif`, branches) and can258/// subsequently incorporate loads, or at least we would like it259/// to. However, danger awaits: the compare might be the only user of260/// a load, so we might think we can just move the load (and nothing261/// is duplicated -- success!), except that the compare itself is262/// codegen'd in multiple places, where it is incorporated as a263/// subpattern itself.264///265/// So we really want a notion of "unique all the way along the266/// matching path". Rust's `&T` and `&mut T` offer a partial analogy267/// to the semantics that we want here: we want to know when we've268/// matched a unique use of an instruction, and that instruction's269/// unique use of another instruction, etc, just as `&mut T` can only270/// be obtained by going through a chain of `&mut T`. If one has a271/// `&T` to a struct containing `&mut T` (one of several uses of an272/// instruction that itself has a unique use of an instruction), one273/// can only get a `&T` (one can only get a "I am one of several users274/// of this instruction" result).275///276/// We could track these paths, either dynamically as one "looks up the operand277/// tree" or precomputed. But the former requires state and means that the278/// `Lower` API carries that state implicitly, which we'd like to avoid if we279/// can. And the latter implies O(n^2) storage: it is an all-pairs property (is280/// inst `i` unique from the point of view of `j`).281///282/// To make matters even a little more complex still, a value that is283/// not uniquely used when initially viewing the IR can *become*284/// uniquely used, at least as a root allowing further unique uses of285/// e.g. loads to merge, if no other instruction actually merges286/// it. To be more concrete, if we have `v1 := load; v2 := op v1; v3287/// := op v2; v4 := op v2` then `v2` is non-uniquely used, so from the288/// point of view of lowering `v4` or `v3`, we cannot merge the load289/// at `v1`. But if we decide just to use the assigned register for290/// `v2` at both `v3` and `v4`, then we only actually codegen `v2`291/// once, so it *is* a unique root at that point and we *can* merge292/// the load.293///294/// Note also that the color scheme is not sufficient to give us this295/// information, for various reasons: reasoning about side-effects296/// does not tell us about potential duplication of uses through pure297/// ops.298///299/// To keep things simple and avoid error-prone lowering APIs that300/// would extract more information about whether instruction merging301/// happens or not (we don't have that info now, and it would be302/// difficult to refactor to get it and make that refactor 100%303/// correct), we give up on the above "can become unique if not304/// actually merged" point. Instead, we compute a305/// transitive-uniqueness. That is what this enum represents.306///307/// There is one final caveat as well to the result of this analysis. Notably,308/// we define some instructions to be "root" instructions, which means that we309/// assume they will always be codegen'd at the root of a matching tree, and not310/// matched. (This comes with the caveat that we actually enforce this property311/// by making them "opaque" to subtree matching in312/// `get_value_as_source_or_const`). Because they will always be codegen'd once,313/// they in some sense "reset" multiplicity: these root instructions can be used314/// many times, but because their result(s) are only computed once, they only315/// use their inputs once.316///317/// We currently define all multi-result instructions to be "root" instructions,318/// because it is too complex to reason about matching through them, and they319/// cause too-coarse-grained approximation of multiplicity otherwise: the320/// analysis would have to assume (as it used to!) that they are always321/// multiply-used, simply because they have multiple outputs even if those322/// outputs are used only once.323///324/// In the future we could define other instructions to be "root" instructions325/// as well, if we make the corresponding change to get_value_as_source_or_const326/// as well.327///328/// To define `ValueUseState` more plainly: a value is `Unused` if no references329/// exist to it; `Once` if only one other op refers to it, *and* that other op330/// is `Unused` or `Once`; and `Multiple` otherwise. In other words, `Multiple`331/// is contagious (except through root instructions): even if an op's result332/// value is directly used only once in the CLIF, that value is `Multiple` if333/// the op that uses it is itself used multiple times (hence could be codegen'd334/// multiple times). In brief, this analysis tells us whether, if every op335/// merged all of its operand tree, a given op could be codegen'd in more than336/// one place.337///338/// To compute this, we first consider direct uses. At this point339/// `Unused` answers are correct, `Multiple` answers are correct, but340/// some `Once`s may change to `Multiple`s. Then we propagate341/// `Multiple` transitively using a workqueue/fixpoint algorithm.342#[derive(Clone, Copy, Debug, PartialEq, Eq)]343enum ValueUseState {344/// Not used at all.345Unused,346/// Used exactly once.347Once,348/// Used multiple times.349Multiple,350}351352impl ValueUseState {353/// Add one use.354fn inc(&mut self) {355let new = match self {356Self::Unused => Self::Once,357Self::Once | Self::Multiple => Self::Multiple,358};359*self = new;360}361}362363/// Notion of "relocation distance". This gives an estimate of how far away a symbol will be from a364/// reference.365#[derive(Clone, Copy, Debug, PartialEq, Eq)]366pub enum RelocDistance {367/// Target of relocation is "nearby". The threshold for this is fuzzy but should be interpreted368/// as approximately "within the compiled output of one module"; e.g., within AArch64's +/-369/// 128MB offset. If unsure, use `Far` instead.370Near,371/// Target of relocation could be anywhere in the address space.372Far,373}374375impl<'func, I: VCodeInst> Lower<'func, I> {376/// Prepare a new lowering context for the given IR function.377pub fn new(378f: &'func Function,379abi: Callee<I::ABIMachineSpec>,380emit_info: I::Info,381block_order: BlockLoweringOrder,382sigs: SigSet,383flags: Flags,384) -> CodegenResult<Self> {385let constants = VCodeConstants::with_capacity(f.dfg.constants.len());386let vcode = VCodeBuilder::new(387sigs,388abi,389emit_info,390block_order,391constants,392VCodeBuildDirection::Backward,393flags.log2_min_function_alignment(),394);395396// We usually need two VRegs per instruction result, plus extras for397// various temporaries, but two per Value is a good starting point.398let mut vregs = VRegAllocator::with_capacity(f.dfg.num_values() * 2);399400let mut value_regs = SecondaryMap::with_default(ValueRegs::invalid());401let mut try_call_rets = FxHashMap::default();402let mut try_call_payloads = FxHashMap::default();403404// Assign a vreg to each block param, each inst result, and405// each edge-defined block-call arg.406for bb in f.layout.blocks() {407for ¶m in f.dfg.block_params(bb) {408let ty = f.dfg.value_type(param);409if value_regs[param].is_invalid() {410let regs = vregs.alloc_with_maybe_fact(ty, f.dfg.facts[param].clone())?;411value_regs[param] = regs;412trace!("bb {} param {}: regs {:?}", bb, param, regs);413}414}415for inst in f.layout.block_insts(bb) {416for &result in f.dfg.inst_results(inst) {417let ty = f.dfg.value_type(result);418if value_regs[result].is_invalid() && !ty.is_invalid() {419let regs = vregs.alloc_with_maybe_fact(ty, f.dfg.facts[result].clone())?;420value_regs[result] = regs;421trace!(422"bb {} inst {} ({:?}): result {} regs {:?}",423bb, inst, f.dfg.insts[inst], result, regs,424);425}426}427428if let Some(et) = f.dfg.insts[inst].exception_table() {429let exdata = &f.dfg.exception_tables[et];430let sig = &f.dfg.signatures[exdata.signature()];431432let mut rets = smallvec![];433for ty in sig.returns.iter().map(|ret| ret.value_type) {434rets.push(vregs.alloc(ty)?.map(|r| Writable::from_reg(r)));435}436try_call_rets.insert(inst, rets);437438let mut payloads = smallvec![];439// Note that this is intentionally using the calling440// convention of the callee to determine what payload types441// are available. The callee defines that, not the calling442// convention of the caller.443for &ty in sig444.call_conv445.exception_payload_types(I::ABIMachineSpec::word_type())446{447payloads.push(Writable::from_reg(vregs.alloc(ty)?.only_reg().unwrap()));448}449try_call_payloads.insert(inst, payloads);450}451}452}453454// Find the sret register, if it's used.455let mut sret_param = None;456for ret in vcode.abi().signature().returns.iter() {457if ret.purpose == ArgumentPurpose::StructReturn {458let entry_bb = f.stencil.layout.entry_block().unwrap();459for (¶m, sig_param) in f460.dfg461.block_params(entry_bb)462.iter()463.zip(vcode.abi().signature().params.iter())464{465if sig_param.purpose == ArgumentPurpose::StructReturn {466assert!(sret_param.is_none());467sret_param = Some(param);468}469}470471assert!(sret_param.is_some());472}473}474475let sret_reg = sret_param.map(|param| {476let regs = value_regs[param];477assert!(regs.len() == 1);478regs479});480481// Compute instruction colors, find constant instructions, and find instructions with482// side-effects, in one combined pass.483let mut cur_color = 0;484let mut block_end_colors = SecondaryMap::with_default(InstColor::new(0));485let mut side_effect_inst_entry_colors = FxHashMap::default();486let mut inst_constants = FxHashMap::default();487for bb in f.layout.blocks() {488cur_color += 1;489for inst in f.layout.block_insts(bb) {490let side_effect = has_lowering_side_effect(f, inst);491492trace!("bb {} inst {} has color {}", bb, inst, cur_color);493if side_effect {494side_effect_inst_entry_colors.insert(inst, InstColor::new(cur_color));495trace!(" -> side-effecting; incrementing color for next inst");496cur_color += 1;497}498499// Determine if this is a constant; if so, add to the table.500if let Some(c) = is_constant_64bit(f, inst) {501trace!(" -> constant: {}", c);502inst_constants.insert(inst, c);503}504}505506block_end_colors[bb] = InstColor::new(cur_color);507}508509let value_ir_uses = compute_use_states(f, sret_param);510511Ok(Lower {512f,513vcode,514vregs,515value_regs,516sret_reg,517block_end_colors,518side_effect_inst_entry_colors,519inst_constants,520value_ir_uses,521value_lowered_uses: SecondaryMap::default(),522inst_sunk: FxHashSet::default(),523cur_scan_entry_color: None,524cur_inst: None,525ir_insts: vec![],526try_call_rets,527try_call_payloads,528pinned_reg: None,529flags,530})531}532533pub fn sigs(&self) -> &SigSet {534self.vcode.sigs()535}536537pub fn sigs_mut(&mut self) -> &mut SigSet {538self.vcode.sigs_mut()539}540541pub fn vregs_mut(&mut self) -> &mut VRegAllocator<I> {542&mut self.vregs543}544545fn gen_arg_setup(&mut self) {546if let Some(entry_bb) = self.f.layout.entry_block() {547trace!(548"gen_arg_setup: entry BB {} args are:\n{:?}",549entry_bb,550self.f.dfg.block_params(entry_bb)551);552553for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {554if self.value_ir_uses[*param] == ValueUseState::Unused {555continue;556}557let regs = writable_value_regs(self.value_regs[*param]);558for insn in self559.vcode560.vcode561.abi562.gen_copy_arg_to_regs(&self.vcode.vcode.sigs, i, regs, &mut self.vregs)563.into_iter()564{565self.emit(insn);566}567}568if let Some(insn) = self569.vcode570.vcode571.abi572.gen_retval_area_setup(&self.vcode.vcode.sigs, &mut self.vregs)573{574self.emit(insn);575}576577// The `args` instruction below must come first. Finish578// the current "IR inst" (with a default source location,579// as for other special instructions inserted during580// lowering) and continue the scan backward.581self.finish_ir_inst(Default::default());582583if let Some(insn) = self.vcode.vcode.abi.take_args() {584self.emit(insn);585}586}587}588589/// Generate the return instruction.590pub fn gen_return(&mut self, rets: &[ValueRegs<Reg>]) {591let mut out_rets = vec![];592593let mut rets = rets.into_iter();594for (i, ret) in self595.abi()596.signature()597.returns598.clone()599.into_iter()600.enumerate()601{602let regs = if ret.purpose == ArgumentPurpose::StructReturn {603self.sret_reg.unwrap()604} else {605*rets.next().unwrap()606};607608let (regs, insns) = self.vcode.abi().gen_copy_regs_to_retval(609self.vcode.sigs(),610i,611regs,612&mut self.vregs,613);614out_rets.extend(regs);615for insn in insns {616self.emit(insn);617}618}619620// Hack: generate a virtual instruction that uses vmctx in621// order to keep it alive for the duration of the function,622// for the benefit of debuginfo.623if self.f.dfg.values_labels.is_some() {624if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {625if self.value_ir_uses[vmctx_val] != ValueUseState::Unused {626let vmctx_reg = self.value_regs[vmctx_val].only_reg().unwrap();627self.emit(I::gen_dummy_use(vmctx_reg));628}629}630}631632let inst = self.abi().gen_rets(out_rets);633self.emit(inst);634}635636/// Generate list of registers to hold the output of a call with637/// signature `sig`.638pub fn gen_call_output(&mut self, sig: &Signature) -> InstOutput {639let mut rets = smallvec![];640for ty in sig.returns.iter().map(|ret| ret.value_type) {641rets.push(self.vregs.alloc_with_deferred_error(ty));642}643rets644}645646/// Likewise, but for a `SigRef` instead.647pub fn gen_call_output_from_sig_ref(&mut self, sig_ref: SigRef) -> InstOutput {648self.gen_call_output(&self.f.dfg.signatures[sig_ref])649}650651/// Set up arguments values `args` for a call with signature `sig`.652pub fn gen_call_args(&mut self, sig: Sig, args: &[ValueRegs<Reg>]) -> CallArgList {653let (uses, insts) = self.vcode.abi().gen_call_args(654self.vcode.sigs(),655sig,656args,657/* is_tail_call */ false,658&self.flags,659&mut self.vregs,660);661for insn in insts {662self.emit(insn);663}664uses665}666667/// Likewise, but for a `return_call`.668pub fn gen_return_call_args(&mut self, sig: Sig, args: &[ValueRegs<Reg>]) -> CallArgList {669let (uses, insts) = self.vcode.abi().gen_call_args(670self.vcode.sigs(),671sig,672args,673/* is_tail_call */ true,674&self.flags,675&mut self.vregs,676);677for insn in insts {678self.emit(insn);679}680uses681}682683/// Set up return values `outputs` for a call with signature `sig`.684pub fn gen_call_rets(&mut self, sig: Sig, outputs: &[ValueRegs<Reg>]) -> CallRetList {685self.vcode686.abi()687.gen_call_rets(self.vcode.sigs(), sig, outputs, None, &mut self.vregs)688}689690/// Likewise, but for a `try_call`.691pub fn gen_try_call_rets(&mut self, sig: Sig) -> CallRetList {692let ir_inst = self.cur_inst.unwrap();693let mut outputs: SmallVec<[ValueRegs<Reg>; 2]> = smallvec![];694for return_def in self.try_call_rets.get(&ir_inst).unwrap() {695outputs.push(return_def.map(|r| r.to_reg()));696}697let payloads = Some(&self.try_call_payloads.get(&ir_inst).unwrap()[..]);698699self.vcode700.abi()701.gen_call_rets(self.vcode.sigs(), sig, &outputs, payloads, &mut self.vregs)702}703704/// Populate a `CallInfo` for a call with signature `sig`.705pub fn gen_call_info<T>(706&mut self,707sig: Sig,708dest: T,709uses: CallArgList,710defs: CallRetList,711try_call_info: Option<TryCallInfo>,712) -> CallInfo<T> {713self.vcode714.abi()715.gen_call_info(self.vcode.sigs(), sig, dest, uses, defs, try_call_info)716}717718/// Has this instruction been sunk to a use-site (i.e., away from its719/// original location)?720fn is_inst_sunk(&self, inst: Inst) -> bool {721self.inst_sunk.contains(&inst)722}723724// Is any result of this instruction needed?725fn is_any_inst_result_needed(&self, inst: Inst) -> bool {726self.f727.dfg728.inst_results(inst)729.iter()730.any(|&result| self.value_lowered_uses[result] > 0)731}732733fn lower_clif_block<B: LowerBackend<MInst = I>>(734&mut self,735backend: &B,736block: Block,737ctrl_plane: &mut ControlPlane,738) -> CodegenResult<()> {739self.cur_scan_entry_color = Some(self.block_end_colors[block]);740// Lowering loop:741// - For each non-branch instruction, in reverse order:742// - If side-effecting (load, store, branch/call/return,743// possible trap), or if used outside of this block, or if744// demanded by another inst, then lower.745//746// That's it! Lowering of side-effecting ops will force all *needed*747// (live) non-side-effecting ops to be lowered at the right places, via748// the `use_input_reg()` callback on the `Lower` (that's us). That's749// because `use_input_reg()` sets the eager/demand bit for any insts750// whose result registers are used.751//752// We set the VCodeBuilder to "backward" mode, so we emit753// blocks in reverse order wrt the BlockIndex sequence, and754// emit instructions in reverse order within blocks. Because755// the machine backend calls `ctx.emit()` in forward order, we756// collect per-IR-inst lowered instructions in `ir_insts`,757// then reverse these and append to the VCode at the end of758// each IR instruction.759for inst in self.f.layout.block_insts(block).rev() {760let data = &self.f.dfg.insts[inst];761let has_side_effect = has_lowering_side_effect(self.f, inst);762// If inst has been sunk to another location, skip it.763if self.is_inst_sunk(inst) {764continue;765}766// Are any outputs used at least once?767let value_needed = self.is_any_inst_result_needed(inst);768trace!(769"lower_clif_block: block {} inst {} ({:?}) is_branch {} side_effect {} value_needed {}",770block,771inst,772data,773data.opcode().is_branch(),774has_side_effect,775value_needed,776);777778// Update scan state to color prior to this inst (as we are scanning779// backward).780self.cur_inst = Some(inst);781if has_side_effect {782let entry_color = *self783.side_effect_inst_entry_colors784.get(&inst)785.expect("every side-effecting inst should have a color-map entry");786self.cur_scan_entry_color = Some(entry_color);787}788789// Skip lowering branches; these are handled separately790// (see `lower_clif_branches()` below).791if self.f.dfg.insts[inst].opcode().is_branch() {792continue;793}794795// Value defined by "inst" becomes live after it in normal796// order, and therefore **before** in reversed order.797self.emit_value_label_live_range_start_for_inst(inst);798799// Normal instruction: codegen if the instruction is side-effecting800// or any of its outputs is used.801if has_side_effect || value_needed {802trace!("lowering: inst {}: {}", inst, self.f.dfg.display_inst(inst));803let temp_regs = match backend.lower(self, inst) {804Some(regs) => regs,805None => {806let ty = if self.num_outputs(inst) > 0 {807Some(self.output_ty(inst, 0))808} else {809None810};811return Err(CodegenError::Unsupported(format!(812"should be implemented in ISLE: inst = `{}`, type = `{:?}`",813self.f.dfg.display_inst(inst),814ty815)));816}817};818819// The ISLE generated code emits its own registers to define the820// instruction's lowered values in. However, other instructions821// that use this SSA value will be lowered assuming that the value822// is generated into a pre-assigned, different, register.823//824// To connect the two, we set up "aliases" in the VCodeBuilder825// that apply when it is building the Operand table for the826// regalloc to use. These aliases effectively rewrite any use of827// the pre-assigned register to the register that was returned by828// the ISLE lowering logic.829let results = self.f.dfg.inst_results(inst);830debug_assert_eq!(temp_regs.len(), results.len());831for (regs, &result) in temp_regs.iter().zip(results) {832let dsts = self.value_regs[result];833let mut regs = regs.regs().iter();834for &dst in dsts.regs().iter() {835let temp = regs.next().copied().unwrap_or(Reg::invalid_sentinel());836trace!("set vreg alias: {result:?} = {dst:?}, lowering = {temp:?}");837self.vregs.set_vreg_alias(dst, temp);838}839}840}841842let start = self.vcode.vcode.num_insts();843let loc = self.srcloc(inst);844self.finish_ir_inst(loc);845846// If the instruction had a user stack map, forward it from the CLIF847// to the vcode.848if let Some(entries) = self.f.dfg.user_stack_map_entries(inst) {849let end = self.vcode.vcode.num_insts();850debug_assert!(end > start);851debug_assert_eq!(852(start..end)853.filter(|i| self.vcode.vcode[InsnIndex::new(*i)].is_safepoint())854.count(),8551856);857for i in start..end {858let iix = InsnIndex::new(i);859if self.vcode.vcode[iix].is_safepoint() {860trace!(861"Adding user stack map from clif\n\n\862{inst:?} `{}`\n\n\863to vcode\n\n\864{iix:?} `{}`",865self.f.dfg.display_inst(inst),866&self.vcode.vcode[iix].pretty_print_inst(&mut Default::default()),867);868self.vcode869.add_user_stack_map(BackwardsInsnIndex::new(iix.index()), entries);870break;871}872}873}874875// maybe insert random instruction876if ctrl_plane.get_decision() {877if ctrl_plane.get_decision() {878let imm: u64 = ctrl_plane.get_arbitrary();879let reg = self.alloc_tmp(crate::ir::types::I64).regs()[0];880I::gen_imm_u64(imm, reg).map(|inst| self.emit(inst));881} else {882let imm: f64 = ctrl_plane.get_arbitrary();883let tmp = self.alloc_tmp(crate::ir::types::I64).regs()[0];884let reg = self.alloc_tmp(crate::ir::types::F64).regs()[0];885for inst in I::gen_imm_f64(imm, tmp, reg) {886self.emit(inst);887}888}889}890}891892// Add the block params to this block.893self.add_block_params(block)?;894895self.cur_scan_entry_color = None;896Ok(())897}898899fn add_block_params(&mut self, block: Block) -> CodegenResult<()> {900for ¶m in self.f.dfg.block_params(block) {901for ® in self.value_regs[param].regs() {902let vreg = reg.to_virtual_reg().unwrap();903self.vcode.add_block_param(vreg);904}905}906Ok(())907}908909fn get_value_labels<'a>(&'a self, val: Value, depth: usize) -> Option<&'a [ValueLabelStart]> {910if let Some(ref values_labels) = self.f.dfg.values_labels {911debug_assert!(self.f.dfg.value_is_real(val));912trace!(913"get_value_labels: val {} -> {:?}",914val,915values_labels.get(&val)916);917match values_labels.get(&val) {918Some(&ValueLabelAssignments::Starts(ref list)) => Some(&list[..]),919Some(&ValueLabelAssignments::Alias { value, .. }) if depth < 10 => {920self.get_value_labels(value, depth + 1)921}922_ => None,923}924} else {925None926}927}928929fn emit_value_label_marks_for_value(&mut self, val: Value) {930let regs = self.value_regs[val];931if regs.len() > 1 {932return;933}934let reg = regs.only_reg().unwrap();935936if let Some(label_starts) = self.get_value_labels(val, 0) {937let labels = label_starts938.iter()939.map(|&ValueLabelStart { label, .. }| label)940.collect::<FxHashSet<_>>();941for label in labels {942trace!(943"value labeling: defines val {:?} -> reg {:?} -> label {:?}",944val, reg, label,945);946self.vcode.add_value_label(reg, label);947}948}949}950951fn emit_value_label_live_range_start_for_inst(&mut self, inst: Inst) {952if self.f.dfg.values_labels.is_none() {953return;954}955956trace!(957"value labeling: srcloc {}: inst {}",958self.srcloc(inst),959inst960);961for &val in self.f.dfg.inst_results(inst) {962self.emit_value_label_marks_for_value(val);963}964}965966fn emit_value_label_live_range_start_for_block_args(&mut self, block: Block) {967if self.f.dfg.values_labels.is_none() {968return;969}970971trace!("value labeling: block {}", block);972for &arg in self.f.dfg.block_params(block) {973self.emit_value_label_marks_for_value(arg);974}975self.finish_ir_inst(Default::default());976}977978fn finish_ir_inst(&mut self, loc: RelSourceLoc) {979// The VCodeBuilder builds in reverse order (and reverses at980// the end), but `ir_insts` is in forward order, so reverse981// it.982for inst in self.ir_insts.drain(..).rev() {983self.vcode.push(inst, loc);984}985}986987fn finish_bb(&mut self) {988self.vcode.end_bb();989}990991fn lower_clif_branch<B: LowerBackend<MInst = I>>(992&mut self,993backend: &B,994// Lowered block index:995bindex: BlockIndex,996// Original CLIF block:997block: Block,998branch: Inst,999targets: &[MachLabel],1000) -> CodegenResult<()> {1001trace!(1002"lower_clif_branch: block {} branch {:?} targets {:?}",1003block, branch, targets,1004);1005// When considering code-motion opportunities, consider the current1006// program point to be this branch.1007self.cur_inst = Some(branch);10081009// Lower the branch in ISLE.1010backend1011.lower_branch(self, branch, targets)1012.unwrap_or_else(|| {1013panic!(1014"should be implemented in ISLE: branch = `{}`",1015self.f.dfg.display_inst(branch),1016)1017});1018let loc = self.srcloc(branch);1019self.finish_ir_inst(loc);1020// Add block param outputs for current block.1021self.lower_branch_blockparam_args(bindex);1022Ok(())1023}10241025fn lower_branch_blockparam_args(&mut self, block: BlockIndex) {1026let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];10271028// TODO: why not make `block_order` public?1029for succ_idx in 0..self.vcode.block_order().succ_indices(block).1.len() {1030branch_arg_vregs.clear();1031let (succ, args) = self.collect_block_call(block, succ_idx, &mut branch_arg_vregs);1032self.vcode.add_succ(succ, args);1033}1034}10351036fn collect_branch_and_targets(1037&self,1038bindex: BlockIndex,1039_bb: Block,1040targets: &mut SmallVec<[MachLabel; 2]>,1041) -> Option<Inst> {1042targets.clear();1043let (opt_inst, succs) = self.vcode.block_order().succ_indices(bindex);1044targets.extend(succs.iter().map(|succ| MachLabel::from_block(*succ)));1045opt_inst1046}10471048/// Collect the outgoing block-call arguments for a given edge out1049/// of a lowered block.1050fn collect_block_call<'a>(1051&mut self,1052block: BlockIndex,1053succ_idx: usize,1054buffer: &'a mut SmallVec<[Reg; 16]>,1055) -> (BlockIndex, &'a [Reg]) {1056let block_order = self.vcode.block_order();1057let (_, succs) = block_order.succ_indices(block);1058let succ = succs[succ_idx];1059let this_lb = block_order.lowered_order()[block.index()];1060let succ_lb = block_order.lowered_order()[succ.index()];10611062let (branch_inst, succ_idx) = match (this_lb, succ_lb) {1063(_, LoweredBlock::CriticalEdge { .. }) => {1064// The successor is a split-critical-edge block. In this1065// case, this block-call has no arguments, and the1066// arguments go on the critical edge block's unconditional1067// branch instead.1068return (succ, &[]);1069}1070(LoweredBlock::CriticalEdge { pred, succ_idx, .. }, _) => {1071// This is a split-critical-edge block. In this case, our1072// block-call has the arguments that in the CLIF appear in1073// the predecessor's branch to this edge.1074let branch_inst = self.f.layout.last_inst(pred).unwrap();1075(branch_inst, succ_idx as usize)1076}10771078(this, _) => {1079let block = this.orig_block().unwrap();1080// Ordinary block, with an ordinary block as1081// successor. Take the arguments from the branch.1082let branch_inst = self.f.layout.last_inst(block).unwrap();1083(branch_inst, succ_idx)1084}1085};10861087let block_call = self.f.dfg.insts[branch_inst]1088.branch_destination(&self.f.dfg.jump_tables, &self.f.dfg.exception_tables)[succ_idx];1089for arg in block_call.args(&self.f.dfg.value_lists) {1090match arg {1091BlockArg::Value(arg) => {1092debug_assert!(self.f.dfg.value_is_real(arg));1093let regs = self.put_value_in_regs(arg);1094buffer.extend_from_slice(regs.regs());1095}1096BlockArg::TryCallRet(i) => {1097let regs = self.try_call_rets.get(&branch_inst).unwrap()[i as usize]1098.map(|r| r.to_reg());1099buffer.extend_from_slice(regs.regs());1100}1101BlockArg::TryCallExn(i) => {1102let reg =1103self.try_call_payloads.get(&branch_inst).unwrap()[i as usize].to_reg();1104buffer.push(reg);1105}1106}1107}1108(succ, &buffer[..])1109}11101111/// Lower the function.1112pub fn lower<B: LowerBackend<MInst = I>>(1113mut self,1114backend: &B,1115ctrl_plane: &mut ControlPlane,1116) -> CodegenResult<VCode<I>> {1117trace!("about to lower function: {:?}", self.f);11181119self.vcode.init_retval_area(&mut self.vregs)?;11201121// Get the pinned reg here (we only parameterize this function on `B`,1122// not the whole `Lower` impl).1123self.pinned_reg = backend.maybe_pinned_reg();11241125self.vcode.set_entry(BlockIndex::new(0));11261127// Reused vectors for branch lowering.1128let mut targets: SmallVec<[MachLabel; 2]> = SmallVec::new();11291130// get a copy of the lowered order; we hold this separately because we1131// need a mut ref to the vcode to mutate it below.1132let lowered_order: SmallVec<[LoweredBlock; 64]> = self1133.vcode1134.block_order()1135.lowered_order()1136.iter()1137.cloned()1138.collect();11391140// Main lowering loop over lowered blocks.1141for (bindex, lb) in lowered_order.iter().enumerate().rev() {1142let bindex = BlockIndex::new(bindex);11431144// Lower the block body in reverse order (see comment in1145// `lower_clif_block()` for rationale).11461147// End branch.1148if let Some(bb) = lb.orig_block() {1149if let Some(branch) = self.collect_branch_and_targets(bindex, bb, &mut targets) {1150self.lower_clif_branch(backend, bindex, bb, branch, &targets)?;1151self.finish_ir_inst(self.srcloc(branch));1152}1153} else {1154// If no orig block, this must be a pure edge block;1155// get the successor and emit a jump. This block has1156// no block params; and this jump's block-call args1157// will be filled in by1158// `lower_branch_blockparam_args`.1159let succ = self.vcode.block_order().succ_indices(bindex).1[0];1160self.emit(I::gen_jump(MachLabel::from_block(succ)));1161self.finish_ir_inst(Default::default());1162self.lower_branch_blockparam_args(bindex);1163}11641165// Original block body.1166if let Some(bb) = lb.orig_block() {1167self.lower_clif_block(backend, bb, ctrl_plane)?;1168self.emit_value_label_live_range_start_for_block_args(bb);1169}11701171if bindex.index() == 0 {1172// Set up the function with arg vreg inits.1173self.gen_arg_setup();1174self.finish_ir_inst(Default::default());1175}11761177self.finish_bb();11781179// Check for any deferred vreg-temp allocation errors, and1180// bubble one up at this time if it exists.1181if let Some(e) = self.vregs.take_deferred_error() {1182return Err(e);1183}1184}11851186// Now that we've emitted all instructions into the1187// VCodeBuilder, let's build the VCode.1188trace!(1189"built vcode:\n{:?}Backwards {:?}",1190&self.vregs, &self.vcode.vcode1191);1192let vcode = self.vcode.build(self.vregs);11931194Ok(vcode)1195}11961197pub fn value_is_unused(&self, val: Value) -> bool {1198match self.value_ir_uses[val] {1199ValueUseState::Unused => true,1200_ => false,1201}1202}12031204pub fn block_successor_label(&self, block: Block, succ: usize) -> MachLabel {1205trace!("block_successor_label: block {block} succ {succ}");1206let lowered = self1207.vcode1208.block_order()1209.lowered_index_for_block(block)1210.expect("Unreachable block");1211trace!(" -> lowered block {lowered:?}");1212let (_, succs) = self.vcode.block_order().succ_indices(lowered);1213trace!(" -> succs {succs:?}");1214let succ_block = *succs.get(succ).expect("Successor index out of range");1215MachLabel::from_block(succ_block)1216}1217}12181219/// Pre-analysis: compute `value_ir_uses`. See comment on1220/// `ValueUseState` for a description of what this analysis1221/// computes.1222fn compute_use_states(1223f: &Function,1224sret_param: Option<Value>,1225) -> SecondaryMap<Value, ValueUseState> {1226// We perform the analysis without recursion, so we don't1227// overflow the stack on long chains of ops in the input.1228//1229// This is sort of a hybrid of a "shallow use-count" pass and1230// a DFS. We iterate over all instructions and mark their args1231// as used. However when we increment a use-count to1232// "Multiple" we push its args onto the stack and do a DFS,1233// immediately marking the whole dependency tree as1234// Multiple. Doing both (shallow use-counting over all insts,1235// and deep Multiple propagation) lets us trim both1236// traversals, stopping recursion when a node is already at1237// the appropriate state.1238//1239// In particular, note that the *coarsening* into {Unused,1240// Once, Multiple} is part of what makes this pass more1241// efficient than a full indirect-use-counting pass.12421243let mut value_ir_uses = SecondaryMap::with_default(ValueUseState::Unused);12441245if let Some(sret_param) = sret_param {1246// There's an implicit use of the struct-return parameter in each1247// copy of the function epilogue, which we count here.1248value_ir_uses[sret_param] = ValueUseState::Multiple;1249}12501251// Stack of iterators over Values as we do DFS to mark1252// Multiple-state subtrees. The iterator type is whatever is1253// returned by `uses` below.1254let mut stack: SmallVec<[_; 16]> = smallvec![];12551256// Find the args for the inst corresponding to the given value.1257//1258// Note that "root" instructions are skipped here. This means that multiple1259// uses of any result of a multi-result instruction are not considered1260// multiple uses of the operands of a multi-result instruction. This1261// requires tight coupling with `get_value_as_source_or_const` above which1262// is the consumer of the map that this function is producing.1263let uses = |value| {1264trace!(" -> pushing args for {} onto stack", value);1265if let ValueDef::Result(src_inst, _) = f.dfg.value_def(value) {1266if is_value_use_root(f, src_inst) {1267None1268} else {1269Some(f.dfg.inst_values(src_inst))1270}1271} else {1272None1273}1274};12751276// Do a DFS through `value_ir_uses` to mark a subtree as1277// Multiple.1278for inst in f1279.layout1280.blocks()1281.flat_map(|block| f.layout.block_insts(block))1282{1283// Iterate over all values used by all instructions, noting an1284// additional use on each operand.1285for arg in f.dfg.inst_values(inst) {1286debug_assert!(f.dfg.value_is_real(arg));1287let old = value_ir_uses[arg];1288value_ir_uses[arg].inc();1289let new = value_ir_uses[arg];1290trace!("arg {} used, old state {:?}, new {:?}", arg, old, new);12911292// On transition to Multiple, do DFS.1293if old == ValueUseState::Multiple || new != ValueUseState::Multiple {1294continue;1295}1296if let Some(iter) = uses(arg) {1297stack.push(iter);1298}1299while let Some(iter) = stack.last_mut() {1300if let Some(value) = iter.next() {1301debug_assert!(f.dfg.value_is_real(value));1302trace!(" -> DFS reaches {}", value);1303if value_ir_uses[value] == ValueUseState::Multiple {1304// Truncate DFS here: no need to go further,1305// as whole subtree must already be Multiple.1306// With debug asserts, check one level of1307// that invariant at least.1308debug_assert!(uses(value).into_iter().flatten().all(|arg| {1309debug_assert!(f.dfg.value_is_real(arg));1310value_ir_uses[arg] == ValueUseState::Multiple1311}));1312continue;1313}1314value_ir_uses[value] = ValueUseState::Multiple;1315trace!(" -> became Multiple");1316if let Some(iter) = uses(value) {1317stack.push(iter);1318}1319} else {1320// Empty iterator, discard.1321stack.pop();1322}1323}1324}1325}13261327value_ir_uses1328}13291330/// Definition of a "root" instruction for the calculation of `ValueUseState`.1331///1332/// This function calculates whether `inst` is considered a "root" for value-use1333/// information. This concept is used to forcibly prevent looking-through the1334/// instruction during `get_value_as_source_or_const` as it additionally1335/// prevents propagating `Multiple`-used results of the `inst` here to the1336/// operands of the instruction.1337///1338/// Currently this is defined as multi-result instructions. That means that1339/// lowerings are never allowed to look through a multi-result instruction to1340/// generate patterns. Note that this isn't possible in ISLE today anyway so1341/// this isn't currently much of a loss.1342///1343/// The main purpose of this function is to prevent the operands of a1344/// multi-result instruction from being forcibly considered `Multiple`-used1345/// regardless of circumstances.1346fn is_value_use_root(f: &Function, inst: Inst) -> bool {1347f.dfg.inst_results(inst).len() > 11348}13491350/// Function-level queries.1351impl<'func, I: VCodeInst> Lower<'func, I> {1352pub fn dfg(&self) -> &DataFlowGraph {1353&self.f.dfg1354}13551356/// Get the `Callee`.1357pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {1358self.vcode.abi()1359}13601361/// Get the `Callee`.1362pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {1363self.vcode.abi_mut()1364}1365}13661367/// Instruction input/output queries.1368impl<'func, I: VCodeInst> Lower<'func, I> {1369/// Get the instdata for a given IR instruction.1370pub fn data(&self, ir_inst: Inst) -> &InstructionData {1371&self.f.dfg.insts[ir_inst]1372}13731374/// Likewise, but starting with a GlobalValue identifier.1375pub fn symbol_value_data<'b>(1376&'b self,1377global_value: GlobalValue,1378) -> Option<(&'b ExternalName, RelocDistance, i64)> {1379let gvdata = &self.f.global_values[global_value];1380match gvdata {1381&GlobalValueData::Symbol {1382ref name,1383ref offset,1384colocated,1385..1386} => {1387let offset = offset.bits();1388let dist = if colocated {1389RelocDistance::Near1390} else {1391RelocDistance::Far1392};1393Some((name, dist, offset))1394}1395_ => None,1396}1397}13981399/// Returns the memory flags of a given memory access.1400pub fn memflags(&self, ir_inst: Inst) -> Option<MemFlags> {1401match &self.f.dfg.insts[ir_inst] {1402&InstructionData::AtomicCas { flags, .. } => Some(flags),1403&InstructionData::AtomicRmw { flags, .. } => Some(flags),1404&InstructionData::Load { flags, .. }1405| &InstructionData::LoadNoOffset { flags, .. }1406| &InstructionData::Store { flags, .. } => Some(flags),1407&InstructionData::StoreNoOffset { flags, .. } => Some(flags),1408_ => None,1409}1410}14111412/// Get the source location for a given instruction.1413pub fn srcloc(&self, ir_inst: Inst) -> RelSourceLoc {1414self.f.rel_srclocs()[ir_inst]1415}14161417/// Get the number of inputs to the given IR instruction. This is a count only of the Value1418/// arguments to the instruction: block arguments will not be included in this count.1419pub fn num_inputs(&self, ir_inst: Inst) -> usize {1420self.f.dfg.inst_args(ir_inst).len()1421}14221423/// Get the number of outputs to the given IR instruction.1424pub fn num_outputs(&self, ir_inst: Inst) -> usize {1425self.f.dfg.inst_results(ir_inst).len()1426}14271428/// Get the type for an instruction's input.1429pub fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type {1430self.value_ty(self.input_as_value(ir_inst, idx))1431}14321433/// Get the type for a value.1434pub fn value_ty(&self, val: Value) -> Type {1435self.f.dfg.value_type(val)1436}14371438/// Get the type for an instruction's output.1439pub fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type {1440self.f.dfg.value_type(self.f.dfg.inst_results(ir_inst)[idx])1441}14421443/// Get the value of a constant instruction (`iconst`, etc.) as a 64-bit1444/// value, if possible.1445pub fn get_constant(&self, ir_inst: Inst) -> Option<u64> {1446self.inst_constants.get(&ir_inst).map(|&c| {1447// The upper bits must be zero, enforced during legalization and by1448// the CLIF verifier.1449debug_assert_eq!(c, {1450let input_size = self.output_ty(ir_inst, 0).bits() as u64;1451let shift = 64 - input_size;1452(c << shift) >> shift1453});1454c1455})1456}14571458/// Get the input as one of two options other than a direct register:1459///1460/// - An instruction, given that it is effect-free or able to sink its1461/// effect to the current instruction being lowered, and given it has only1462/// one output, and if effect-ful, given that this is the only use;1463/// - A constant, if the value is a constant.1464///1465/// The instruction input may be available in either of these forms. It may1466/// be available in neither form, if the conditions are not met; if so, use1467/// `put_input_in_regs()` instead to get it in a register.1468///1469/// If the backend merges the effect of a side-effecting instruction, it1470/// must call `sink_inst()`. When this is called, it indicates that the1471/// effect has been sunk to the current scan location. The sunk1472/// instruction's result(s) must have *no* uses remaining, because it will1473/// not be codegen'd (it has been integrated into the current instruction).1474pub fn input_as_value(&self, ir_inst: Inst, idx: usize) -> Value {1475let val = self.f.dfg.inst_args(ir_inst)[idx];1476debug_assert!(self.f.dfg.value_is_real(val));1477val1478}14791480/// Resolves a particular input of an instruction to the `Value` that it is1481/// represented with.1482///1483/// For more information see [`Lower::get_value_as_source_or_const`].1484pub fn get_input_as_source_or_const(&self, ir_inst: Inst, idx: usize) -> NonRegInput {1485let val = self.input_as_value(ir_inst, idx);1486self.get_value_as_source_or_const(val)1487}14881489/// Resolves a `Value` definition to the source instruction it came from1490/// plus whether it's a unique-use of that instruction.1491///1492/// This function is the workhorse of pattern-matching in ISLE which enables1493/// combining multiple instructions together. This is used implicitly in1494/// patterns such as `(iadd x (iconst y))` where this function is used to1495/// extract the `(iconst y)` operand.1496///1497/// At its core this function is a wrapper around1498/// [`DataFlowGraph::value_def`]. This function applies a filter on top of1499/// that, however, to determine when it is actually safe to "look through"1500/// the `val` definition here and view the underlying instruction. This1501/// protects against duplicating side effects, such as loads, for example.1502///1503/// Internally this uses the data computed from `compute_use_states` along1504/// with other instruction properties to know what to return.1505pub fn get_value_as_source_or_const(&self, val: Value) -> NonRegInput {1506trace!(1507"get_input_for_val: val {} at cur_inst {:?} cur_scan_entry_color {:?}",1508val, self.cur_inst, self.cur_scan_entry_color,1509);1510let inst = match self.f.dfg.value_def(val) {1511// OK to merge source instruction if we have a source1512// instruction, and one of these two conditions hold:1513//1514// - It has no side-effects and this instruction is not a "value-use1515// root" instruction. Instructions which are considered "roots"1516// for value-use calculations do not have accurate information1517// known about the `ValueUseState` of their operands. This is1518// currently done for multi-result instructions to prevent a use1519// of each result from forcing all operands of the multi-result1520// instruction to also be `Multiple`. This in turn means that the1521// `ValueUseState` for operands of a "root" instruction to be a1522// lie if pattern matching were to look through the multi-result1523// instruction. As a result the "look through this instruction"1524// logic only succeeds if it's not a root instruction.1525//1526// - It has a side-effect, has one output value, that one1527// output has only one use, directly or indirectly (so1528// cannot be duplicated -- see comment on1529// `ValueUseState`), and the instruction's color is *one1530// less than* the current scan color.1531//1532// This latter set of conditions is testing whether a1533// side-effecting instruction can sink to the current scan1534// location; this is possible if the in-color of this inst is1535// equal to the out-color of the producing inst, so no other1536// side-effecting ops occur between them (which will only be true1537// if they are in the same BB, because color increments at each BB1538// start).1539//1540// If it is actually sunk, then in `merge_inst()`, we update the1541// scan color so that as we scan over the range past which the1542// instruction was sunk, we allow other instructions (that came1543// prior to the sunk instruction) to sink.1544ValueDef::Result(src_inst, result_idx) => {1545let src_side_effect = has_lowering_side_effect(self.f, src_inst);1546trace!(" -> src inst {}", self.f.dfg.display_inst(src_inst));1547trace!(" -> has lowering side effect: {}", src_side_effect);1548if is_value_use_root(self.f, src_inst) {1549// If this instruction is a "root instruction" then it's1550// required that we can't look through it to see the1551// definition. This means that the `ValueUseState` for the1552// operands of this result assume that this instruction is1553// generated exactly once which might get violated were we1554// to allow looking through it.1555trace!(" -> is a root instruction");1556InputSourceInst::None1557} else if !src_side_effect {1558// Otherwise if this instruction has no side effects and the1559// value is used only once then we can look through it with1560// a "unique" tag. A non-unique `Use` can be shown for other1561// values ensuring consumers know how it's computed but that1562// it's not available to omit.1563if self.value_ir_uses[val] == ValueUseState::Once {1564InputSourceInst::UniqueUse(src_inst, result_idx)1565} else {1566InputSourceInst::Use(src_inst, result_idx)1567}1568} else {1569// Side-effect: test whether this is the only use of the1570// only result of the instruction, and whether colors allow1571// the code-motion.1572trace!(1573" -> side-effecting op {} for val {}: use state {:?}",1574src_inst, val, self.value_ir_uses[val]1575);1576if self.cur_scan_entry_color.is_some()1577&& self.value_ir_uses[val] == ValueUseState::Once1578&& self.num_outputs(src_inst) == 11579&& self1580.side_effect_inst_entry_colors1581.get(&src_inst)1582.unwrap()1583.get()1584+ 11585== self.cur_scan_entry_color.unwrap().get()1586{1587InputSourceInst::UniqueUse(src_inst, 0)1588} else {1589InputSourceInst::None1590}1591}1592}1593_ => InputSourceInst::None,1594};1595let constant = inst.as_inst().and_then(|(inst, _)| self.get_constant(inst));15961597NonRegInput { inst, constant }1598}15991600/// Increment the reference count for the Value, ensuring that it gets lowered.1601pub fn increment_lowered_uses(&mut self, val: Value) {1602self.value_lowered_uses[val] += 11603}16041605/// Put the `idx`th input into register(s) and return the assigned register.1606pub fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg> {1607let val = self.f.dfg.inst_args(ir_inst)[idx];1608self.put_value_in_regs(val)1609}16101611/// Put the given value into register(s) and return the assigned register.1612pub fn put_value_in_regs(&mut self, val: Value) -> ValueRegs<Reg> {1613debug_assert!(self.f.dfg.value_is_real(val));1614trace!("put_value_in_regs: val {}", val);16151616if let Some(inst) = self.f.dfg.value_def(val).inst() {1617assert!(!self.inst_sunk.contains(&inst));1618}16191620let regs = self.value_regs[val];1621trace!(" -> regs {:?}", regs);1622assert!(regs.is_valid());16231624self.value_lowered_uses[val] += 1;16251626regs1627}16281629/// Get the ValueRegs for the edge-defined values for special1630/// try-call-return block arguments.1631pub fn try_call_return_defs(&mut self, ir_inst: Inst) -> &[ValueRegs<Writable<Reg>>] {1632&self.try_call_rets.get(&ir_inst).unwrap()[..]1633}16341635/// Get the Regs for the edge-defined values for special1636/// try-call-return exception payload arguments.1637pub fn try_call_exception_defs(&mut self, ir_inst: Inst) -> &[Writable<Reg>] {1638&self.try_call_payloads.get(&ir_inst).unwrap()[..]1639}1640}16411642/// Codegen primitives: allocate temps, emit instructions, set result registers,1643/// ask for an input to be gen'd into a register.1644impl<'func, I: VCodeInst> Lower<'func, I> {1645/// Get a new temp.1646pub fn alloc_tmp(&mut self, ty: Type) -> ValueRegs<Writable<Reg>> {1647writable_value_regs(self.vregs.alloc_with_deferred_error(ty))1648}16491650/// Get the current root instruction that we are lowering.1651pub fn cur_inst(&self) -> Inst {1652self.cur_inst.unwrap()1653}16541655/// Emit a machine instruction.1656pub fn emit(&mut self, mach_inst: I) {1657trace!("emit: {:?}", mach_inst);1658self.ir_insts.push(mach_inst);1659}16601661/// Indicate that the side-effect of an instruction has been sunk to the1662/// current scan location. This should only be done with the instruction's1663/// original results are not used (i.e., `put_input_in_regs` is not invoked1664/// for the input produced by the sunk instruction), otherwise the1665/// side-effect will occur twice.1666pub fn sink_inst(&mut self, ir_inst: Inst) {1667assert!(has_lowering_side_effect(self.f, ir_inst));1668assert!(self.cur_scan_entry_color.is_some());16691670for result in self.dfg().inst_results(ir_inst) {1671assert!(self.value_lowered_uses[*result] == 0);1672}16731674let sunk_inst_entry_color = self1675.side_effect_inst_entry_colors1676.get(&ir_inst)1677.cloned()1678.unwrap();1679let sunk_inst_exit_color = InstColor::new(sunk_inst_entry_color.get() + 1);1680assert!(sunk_inst_exit_color == self.cur_scan_entry_color.unwrap());1681self.cur_scan_entry_color = Some(sunk_inst_entry_color);1682self.inst_sunk.insert(ir_inst);1683}16841685/// Retrieve immediate data given a handle.1686pub fn get_immediate_data(&self, imm: Immediate) -> &ConstantData {1687self.f.dfg.immediates.get(imm).unwrap()1688}16891690/// Retrieve constant data given a handle.1691pub fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData {1692self.f.dfg.constants.get(constant_handle)1693}16941695/// Indicate that a constant should be emitted.1696pub fn use_constant(&mut self, constant: VCodeConstantData) -> VCodeConstant {1697self.vcode.constants().insert(constant)1698}16991700/// Cause the value in `reg` to be in a virtual reg, by copying it into a1701/// new virtual reg if `reg` is a real reg. `ty` describes the type of the1702/// value in `reg`.1703pub fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {1704if reg.to_virtual_reg().is_some() {1705reg1706} else {1707let new_reg = self.alloc_tmp(ty).only_reg().unwrap();1708self.emit(I::gen_move(new_reg, reg, ty));1709new_reg.to_reg()1710}1711}17121713/// Add a range fact to a register, if no other fact is present.1714pub fn add_range_fact(&mut self, reg: Reg, bit_width: u16, min: u64, max: u64) {1715if self.flags.enable_pcc() {1716self.vregs.set_fact_if_missing(1717reg.to_virtual_reg().unwrap(),1718Fact::Range {1719bit_width,1720min,1721max,1722},1723);1724}1725}1726}17271728#[cfg(test)]1729mod tests {1730use super::ValueUseState;1731use crate::cursor::{Cursor, FuncCursor};1732use crate::ir::types;1733use crate::ir::{Function, InstBuilder};17341735#[test]1736fn multi_result_use_once() {1737let mut func = Function::new();1738let block0 = func.dfg.make_block();1739let mut pos = FuncCursor::new(&mut func);1740pos.insert_block(block0);1741let v1 = pos.ins().iconst(types::I64, 0);1742let v2 = pos.ins().iconst(types::I64, 1);1743let v3 = pos.ins().iconcat(v1, v2);1744let (v4, v5) = pos.ins().isplit(v3);1745pos.ins().return_(&[v4, v5]);1746let func = pos.func;17471748let uses = super::compute_use_states(&func, None);1749assert_eq!(uses[v1], ValueUseState::Once);1750assert_eq!(uses[v2], ValueUseState::Once);1751assert_eq!(uses[v3], ValueUseState::Once);1752assert_eq!(uses[v4], ValueUseState::Once);1753assert_eq!(uses[v5], ValueUseState::Once);1754}17551756#[test]1757fn results_used_twice_but_not_operands() {1758let mut func = Function::new();1759let block0 = func.dfg.make_block();1760let mut pos = FuncCursor::new(&mut func);1761pos.insert_block(block0);1762let v1 = pos.ins().iconst(types::I64, 0);1763let v2 = pos.ins().iconst(types::I64, 1);1764let v3 = pos.ins().iconcat(v1, v2);1765let (v4, v5) = pos.ins().isplit(v3);1766pos.ins().return_(&[v4, v4]);1767let func = pos.func;17681769let uses = super::compute_use_states(&func, None);1770assert_eq!(uses[v1], ValueUseState::Once);1771assert_eq!(uses[v2], ValueUseState::Once);1772assert_eq!(uses[v3], ValueUseState::Once);1773assert_eq!(uses[v4], ValueUseState::Multiple);1774assert_eq!(uses[v5], ValueUseState::Unused);1775}1776}177717781779