Path: blob/main/cranelift/codegen/src/machinst/lower.rs
3063 views
//! This module implements lowering (instruction selection) from Cranelift IR1//! to machine instructions with virtual registers. This is *almost* the final2//! machine code, except for register allocation.34// TODO: separate the IR-query core of `Lower` from the lowering logic built on5// top of it, e.g. the side-effect/coloring analysis and the scan support.67use crate::entity::SecondaryMap;8use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};9use crate::ir::pcc::{Fact, FactContext, PccError, PccResult};10use crate::ir::{11ArgumentPurpose, Block, BlockArg, Constant, ConstantData, DataFlowGraph, ExternalName,12Function, GlobalValue, GlobalValueData, Immediate, Inst, InstructionData, MemFlags,13RelSourceLoc, SigRef, Signature, Type, Value, ValueDef, ValueLabelAssignments, ValueLabelStart,14};15use crate::machinst::valueregs::InvalidSentinel;16use crate::machinst::{17ABIMachineSpec, BackwardsInsnIndex, BlockIndex, BlockLoweringOrder, CallArgList, CallInfo,18CallRetList, Callee, InsnIndex, LoweredBlock, MachLabel, Reg, Sig, SigSet, TryCallInfo, VCode,19VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, VCodeInst, ValueRegs, Writable,20writable_value_regs,21};22use crate::settings::Flags;23use crate::{CodegenError, CodegenResult, trace};24use crate::{FxHashMap, FxHashSet};25use alloc::vec::Vec;26use core::fmt::Debug;27use cranelift_control::ControlPlane;28use smallvec::{SmallVec, smallvec};2930use super::{VCodeBuildDirection, VRegAllocator};3132/// A vector of ValueRegs, used to represent the outputs of an instruction.33pub type InstOutput = SmallVec<[ValueRegs<Reg>; 2]>;3435/// An "instruction color" partitions CLIF instructions by side-effecting ops.36/// All instructions with the same "color" are guaranteed not to be separated by37/// any side-effecting op (for this purpose, loads are also considered38/// side-effecting, to avoid subtle questions w.r.t. the memory model), and39/// furthermore, it is guaranteed that for any two instructions A and B such40/// that color(A) == color(B), either A dominates B and B postdominates A, or41/// vice-versa. (For now, in practice, only ops in the same basic block can ever42/// have the same color, trivially providing the second condition.) Intuitively,43/// this means that the ops of the same color must always execute "together", as44/// part of one atomic contiguous section of the dynamic execution trace, and45/// they can be freely permuted (modulo true dataflow dependencies) without46/// affecting program behavior.47#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]48struct InstColor(u32);49impl InstColor {50fn new(n: u32) -> InstColor {51InstColor(n)52}5354/// Get an arbitrary index representing this color. The index is unique55/// *within a single function compilation*, but indices may be reused across56/// functions.57pub fn get(self) -> u32 {58self.059}60}6162/// A representation of all of the ways in which a value is available, aside63/// from as a direct register.64///65/// - An instruction, if it would be allowed to occur at the current location66/// instead (see [Lower::get_input_as_source_or_const()] for more details).67///68/// - A constant, if the value is known to be a constant.69#[derive(Clone, Copy, Debug)]70pub struct NonRegInput {71/// An instruction produces this value (as the given output), and its72/// computation (and side-effect if applicable) could occur at the73/// current instruction's location instead.74///75/// If this instruction's operation is merged into the current instruction,76/// the backend must call [Lower::sink_inst()].77///78/// This enum indicates whether this use of the source instruction79/// is unique or not.80pub inst: InputSourceInst,81/// The value is a known constant.82pub constant: Option<u64>,83}8485/// When examining an input to an instruction, this enum provides one86/// of several options: there is or isn't a single instruction (that87/// we can see and merge with) that produces that input's value, and88/// we are or aren't the single user of that instruction.89#[derive(Clone, Copy, Debug)]90pub enum InputSourceInst {91/// The input in question is the single, unique use of the given92/// instruction and output index, and it can be sunk to the93/// location of this input.94UniqueUse(Inst, usize),95/// The input in question is one of multiple uses of the given96/// instruction. It can still be sunk to the location of this97/// input.98Use(Inst, usize),99/// We cannot determine which instruction produced the input, or100/// it is one of several instructions (e.g., due to a control-flow101/// merge and blockparam), or the source instruction cannot be102/// allowed to sink to the current location due to side-effects.103None,104}105106impl InputSourceInst {107/// Get the instruction and output index for this source, whether108/// we are its single or one of many users.109pub fn as_inst(&self) -> Option<(Inst, usize)> {110match self {111&InputSourceInst::UniqueUse(inst, output_idx)112| &InputSourceInst::Use(inst, output_idx) => Some((inst, output_idx)),113&InputSourceInst::None => None,114}115}116}117118/// A machine backend.119pub trait LowerBackend {120/// The machine instruction type.121type MInst: VCodeInst;122123/// Lower a single instruction.124///125/// For a branch, this function should not generate the actual branch126/// instruction. However, it must force any values it needs for the branch127/// edge (block-param actuals) into registers, because the actual branch128/// generation (`lower_branch()`) happens *after* any possible merged129/// out-edge.130///131/// Returns `None` if no lowering for the instruction was found.132fn lower(&self, ctx: &mut Lower<Self::MInst>, inst: Inst) -> Option<InstOutput>;133134/// Lower a block-terminating group of branches (which together can be seen135/// as one N-way branch), given a vcode MachLabel for each target.136///137/// Returns `None` if no lowering for the branch was found.138fn lower_branch(139&self,140ctx: &mut Lower<Self::MInst>,141inst: Inst,142targets: &[MachLabel],143) -> Option<()>;144145/// A bit of a hack: give a fixed register that always holds the result of a146/// `get_pinned_reg` instruction, if known. This allows elision of moves147/// into the associated vreg, instead using the real reg directly.148fn maybe_pinned_reg(&self) -> Option<Reg> {149None150}151152/// The type of state carried between `check_fact` invocations.153type FactFlowState: Default + Clone + Debug;154155/// Check any facts about an instruction, given VCode with facts156/// on VRegs. Takes mutable `VCode` so that it can propagate some157/// kinds of facts automatically.158fn check_fact(159&self,160_ctx: &FactContext<'_>,161_vcode: &mut VCode<Self::MInst>,162_inst: InsnIndex,163_state: &mut Self::FactFlowState,164) -> PccResult<()> {165Err(PccError::UnimplementedBackend)166}167}168169/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence170/// from original Inst to MachInsts.171pub struct Lower<'func, I: VCodeInst> {172/// The function to lower.173pub(crate) f: &'func Function,174175/// Lowered machine instructions.176vcode: VCodeBuilder<I>,177178/// VReg allocation context, given to the vcode field at build time to finalize the vcode.179vregs: VRegAllocator<I>,180181/// Mapping from `Value` (SSA value in IR) to virtual register.182value_regs: SecondaryMap<Value, ValueRegs<Reg>>,183184/// sret registers, if needed.185sret_reg: Option<ValueRegs<Reg>>,186187/// Instruction colors at block exits. From this map, we can recover all188/// instruction colors by scanning backward from the block end and189/// decrementing on any color-changing (side-effecting) instruction.190block_end_colors: SecondaryMap<Block, InstColor>,191192/// Instruction colors at side-effecting ops. This is the *entry* color,193/// i.e., the version of global state that exists before an instruction194/// executes. For each side-effecting instruction, the *exit* color is its195/// entry color plus one.196side_effect_inst_entry_colors: FxHashMap<Inst, InstColor>,197198/// Current color as we scan during lowering. While we are lowering an199/// instruction, this is equal to the color *at entry to* the instruction.200cur_scan_entry_color: Option<InstColor>,201202/// Current instruction as we scan during lowering.203cur_inst: Option<Inst>,204205/// Instruction constant values, if known.206inst_constants: FxHashMap<Inst, u64>,207208/// Use-counts per SSA value, as counted in the input IR. These209/// are "coarsened", in the abstract-interpretation sense: we only210/// care about "0, 1, many" states, as this is all we need and211/// this lets us do an efficient fixpoint analysis.212///213/// See doc comment on `ValueUseState` for more details.214value_ir_uses: SecondaryMap<Value, ValueUseState>,215216/// Actual uses of each SSA value so far, incremented while lowering.217value_lowered_uses: SecondaryMap<Value, u32>,218219/// Effectful instructions that have been sunk; they are not codegen'd at220/// their original locations.221inst_sunk: FxHashSet<Inst>,222223/// Instructions collected for the CLIF inst in progress, in forward order.224ir_insts: Vec<I>,225226/// Try-call block arg normal-return values, indexed by instruction.227try_call_rets: FxHashMap<Inst, SmallVec<[ValueRegs<Writable<Reg>>; 2]>>,228229/// Try-call block arg exceptional-return payloads, indexed by230/// instruction. Payloads are carried in registers per the ABI and231/// can only be one register each.232try_call_payloads: FxHashMap<Inst, SmallVec<[Writable<Reg>; 2]>>,233234/// The register to use for GetPinnedReg, if any, on this architecture.235pinned_reg: Option<Reg>,236237/// Compilation flags.238flags: Flags,239}240241/// How is a value used in the IR?242///243/// This can be seen as a coarsening of an integer count. We only need244/// distinct states for zero, one, or many.245///246/// This analysis deserves further explanation. The basic idea is that247/// we want to allow instruction lowering to know whether a value that248/// an instruction references is *only* referenced by that one use, or249/// by others as well. This is necessary to know when we might want to250/// move a side-effect: we cannot, for example, duplicate a load, so251/// we cannot let instruction lowering match a load as part of a252/// subpattern and potentially incorporate it.253///254/// Note that a lot of subtlety comes into play once we have255/// *indirect* uses. The classical example of this in our development256/// history was the x86 compare instruction, which is incorporated257/// into flags users (e.g. `selectif`, `trueif`, branches) and can258/// subsequently incorporate loads, or at least we would like it259/// to. However, danger awaits: the compare might be the only user of260/// a load, so we might think we can just move the load (and nothing261/// is duplicated -- success!), except that the compare itself is262/// codegen'd in multiple places, where it is incorporated as a263/// subpattern itself.264///265/// So we really want a notion of "unique all the way along the266/// matching path". Rust's `&T` and `&mut T` offer a partial analogy267/// to the semantics that we want here: we want to know when we've268/// matched a unique use of an instruction, and that instruction's269/// unique use of another instruction, etc, just as `&mut T` can only270/// be obtained by going through a chain of `&mut T`. If one has a271/// `&T` to a struct containing `&mut T` (one of several uses of an272/// instruction that itself has a unique use of an instruction), one273/// can only get a `&T` (one can only get a "I am one of several users274/// of this instruction" result).275///276/// We could track these paths, either dynamically as one "looks up the operand277/// tree" or precomputed. But the former requires state and means that the278/// `Lower` API carries that state implicitly, which we'd like to avoid if we279/// can. And the latter implies O(n^2) storage: it is an all-pairs property (is280/// inst `i` unique from the point of view of `j`).281///282/// To make matters even a little more complex still, a value that is283/// not uniquely used when initially viewing the IR can *become*284/// uniquely used, at least as a root allowing further unique uses of285/// e.g. loads to merge, if no other instruction actually merges286/// it. To be more concrete, if we have `v1 := load; v2 := op v1; v3287/// := op v2; v4 := op v2` then `v2` is non-uniquely used, so from the288/// point of view of lowering `v4` or `v3`, we cannot merge the load289/// at `v1`. But if we decide just to use the assigned register for290/// `v2` at both `v3` and `v4`, then we only actually codegen `v2`291/// once, so it *is* a unique root at that point and we *can* merge292/// the load.293///294/// Note also that the color scheme is not sufficient to give us this295/// information, for various reasons: reasoning about side-effects296/// does not tell us about potential duplication of uses through pure297/// ops.298///299/// To keep things simple and avoid error-prone lowering APIs that300/// would extract more information about whether instruction merging301/// happens or not (we don't have that info now, and it would be302/// difficult to refactor to get it and make that refactor 100%303/// correct), we give up on the above "can become unique if not304/// actually merged" point. Instead, we compute a305/// transitive-uniqueness. That is what this enum represents.306///307/// There is one final caveat as well to the result of this analysis. Notably,308/// we define some instructions to be "root" instructions, which means that we309/// assume they will always be codegen'd at the root of a matching tree, and not310/// matched. (This comes with the caveat that we actually enforce this property311/// by making them "opaque" to subtree matching in312/// `get_value_as_source_or_const`). Because they will always be codegen'd once,313/// they in some sense "reset" multiplicity: these root instructions can be used314/// many times, but because their result(s) are only computed once, they only315/// use their inputs once.316///317/// We currently define all multi-result instructions to be "root" instructions,318/// because it is too complex to reason about matching through them, and they319/// cause too-coarse-grained approximation of multiplicity otherwise: the320/// analysis would have to assume (as it used to!) that they are always321/// multiply-used, simply because they have multiple outputs even if those322/// outputs are used only once.323///324/// In the future we could define other instructions to be "root" instructions325/// as well, if we make the corresponding change to get_value_as_source_or_const326/// as well.327///328/// To define `ValueUseState` more plainly: a value is `Unused` if no references329/// exist to it; `Once` if only one other op refers to it, *and* that other op330/// is `Unused` or `Once`; and `Multiple` otherwise. In other words, `Multiple`331/// is contagious (except through root instructions): even if an op's result332/// value is directly used only once in the CLIF, that value is `Multiple` if333/// the op that uses it is itself used multiple times (hence could be codegen'd334/// multiple times). In brief, this analysis tells us whether, if every op335/// merged all of its operand tree, a given op could be codegen'd in more than336/// one place.337///338/// To compute this, we first consider direct uses. At this point339/// `Unused` answers are correct, `Multiple` answers are correct, but340/// some `Once`s may change to `Multiple`s. Then we propagate341/// `Multiple` transitively using a workqueue/fixpoint algorithm.342#[derive(Clone, Copy, Debug, PartialEq, Eq)]343enum ValueUseState {344/// Not used at all.345Unused,346/// Used exactly once.347Once,348/// Used multiple times.349Multiple,350}351352impl ValueUseState {353/// Add one use.354fn inc(&mut self) {355let new = match self {356Self::Unused => Self::Once,357Self::Once | Self::Multiple => Self::Multiple,358};359*self = new;360}361}362363/// Notion of "relocation distance". This gives an estimate of how far away a symbol will be from a364/// reference.365#[derive(Clone, Copy, Debug, PartialEq, Eq)]366pub enum RelocDistance {367/// Target of relocation is "nearby". The threshold for this is fuzzy but should be interpreted368/// as approximately "within the compiled output of one module"; e.g., within AArch64's +/-369/// 128MB offset. If unsure, use `Far` instead.370Near,371/// Target of relocation could be anywhere in the address space.372Far,373}374375impl<'func, I: VCodeInst> Lower<'func, I> {376/// Prepare a new lowering context for the given IR function.377pub fn new(378f: &'func Function,379abi: Callee<I::ABIMachineSpec>,380emit_info: I::Info,381block_order: BlockLoweringOrder,382sigs: SigSet,383flags: Flags,384) -> CodegenResult<Self> {385let constants = VCodeConstants::with_capacity(f.dfg.constants.len());386let vcode = VCodeBuilder::new(387sigs,388abi,389emit_info,390block_order,391constants,392VCodeBuildDirection::Backward,393flags.log2_min_function_alignment(),394);395396// We usually need two VRegs per instruction result, plus extras for397// various temporaries, but two per Value is a good starting point.398let mut vregs = VRegAllocator::with_capacity(f.dfg.num_values() * 2);399400let mut value_regs = SecondaryMap::with_default(ValueRegs::invalid());401let mut try_call_rets = FxHashMap::default();402let mut try_call_payloads = FxHashMap::default();403404// Assign a vreg to each block param, each inst result, and405// each edge-defined block-call arg.406for bb in f.layout.blocks() {407for ¶m in f.dfg.block_params(bb) {408let ty = f.dfg.value_type(param);409if value_regs[param].is_invalid() {410let regs = vregs.alloc_with_maybe_fact(ty, f.dfg.facts[param].clone())?;411value_regs[param] = regs;412trace!("bb {} param {}: regs {:?}", bb, param, regs);413}414}415for inst in f.layout.block_insts(bb) {416for &result in f.dfg.inst_results(inst) {417let ty = f.dfg.value_type(result);418if value_regs[result].is_invalid() && !ty.is_invalid() {419let regs = vregs.alloc_with_maybe_fact(ty, f.dfg.facts[result].clone())?;420value_regs[result] = regs;421trace!(422"bb {} inst {} ({:?}): result {} regs {:?}",423bb, inst, f.dfg.insts[inst], result, regs,424);425}426}427428if let Some(et) = f.dfg.insts[inst].exception_table() {429let exdata = &f.dfg.exception_tables[et];430let sig = &f.dfg.signatures[exdata.signature()];431432let mut rets = smallvec![];433for ty in sig.returns.iter().map(|ret| ret.value_type) {434rets.push(vregs.alloc(ty)?.map(|r| Writable::from_reg(r)));435}436try_call_rets.insert(inst, rets);437438let mut payloads = smallvec![];439// Note that this is intentionally using the calling440// convention of the callee to determine what payload types441// are available. The callee defines that, not the calling442// convention of the caller.443for &ty in sig444.call_conv445.exception_payload_types(I::ABIMachineSpec::word_type())446{447payloads.push(Writable::from_reg(vregs.alloc(ty)?.only_reg().unwrap()));448}449try_call_payloads.insert(inst, payloads);450}451}452}453454// Find the sret register, if it's used.455let mut sret_param = None;456for ret in vcode.abi().signature().returns.iter() {457if ret.purpose == ArgumentPurpose::StructReturn {458let entry_bb = f.stencil.layout.entry_block().unwrap();459for (¶m, sig_param) in f460.dfg461.block_params(entry_bb)462.iter()463.zip(vcode.abi().signature().params.iter())464{465if sig_param.purpose == ArgumentPurpose::StructReturn {466assert!(sret_param.is_none());467sret_param = Some(param);468}469}470471assert!(sret_param.is_some());472}473}474475let sret_reg = sret_param.map(|param| {476let regs = value_regs[param];477assert!(regs.len() == 1);478regs479});480481// Compute instruction colors, find constant instructions, and find instructions with482// side-effects, in one combined pass.483let mut cur_color = 0;484let mut block_end_colors = SecondaryMap::with_default(InstColor::new(0));485let mut side_effect_inst_entry_colors = FxHashMap::default();486let mut inst_constants = FxHashMap::default();487for bb in f.layout.blocks() {488cur_color += 1;489for inst in f.layout.block_insts(bb) {490let side_effect = has_lowering_side_effect(f, inst);491492trace!("bb {} inst {} has color {}", bb, inst, cur_color);493if side_effect {494side_effect_inst_entry_colors.insert(inst, InstColor::new(cur_color));495trace!(" -> side-effecting; incrementing color for next inst");496cur_color += 1;497}498499// Determine if this is a constant; if so, add to the table.500if let Some(c) = is_constant_64bit(f, inst) {501trace!(" -> constant: {}", c);502inst_constants.insert(inst, c);503}504}505506block_end_colors[bb] = InstColor::new(cur_color);507}508509let value_ir_uses = compute_use_states(f, sret_param);510511Ok(Lower {512f,513vcode,514vregs,515value_regs,516sret_reg,517block_end_colors,518side_effect_inst_entry_colors,519inst_constants,520value_ir_uses,521value_lowered_uses: SecondaryMap::default(),522inst_sunk: FxHashSet::default(),523cur_scan_entry_color: None,524cur_inst: None,525ir_insts: vec![],526try_call_rets,527try_call_payloads,528pinned_reg: None,529flags,530})531}532533pub fn sigs(&self) -> &SigSet {534self.vcode.sigs()535}536537pub fn sigs_mut(&mut self) -> &mut SigSet {538self.vcode.sigs_mut()539}540541pub fn vregs_mut(&mut self) -> &mut VRegAllocator<I> {542&mut self.vregs543}544545fn gen_arg_setup(&mut self) {546if let Some(entry_bb) = self.f.layout.entry_block() {547trace!(548"gen_arg_setup: entry BB {} args are:\n{:?}",549entry_bb,550self.f.dfg.block_params(entry_bb)551);552553for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {554if self.value_ir_uses[*param] == ValueUseState::Unused {555continue;556}557let regs = writable_value_regs(self.value_regs[*param]);558for insn in self559.vcode560.vcode561.abi562.gen_copy_arg_to_regs(&self.vcode.vcode.sigs, i, regs, &mut self.vregs)563.into_iter()564{565self.emit(insn);566}567}568if let Some(insn) = self569.vcode570.vcode571.abi572.gen_retval_area_setup(&self.vcode.vcode.sigs, &mut self.vregs)573{574self.emit(insn);575}576577// The `args` instruction below must come first. Finish578// the current "IR inst" (with a default source location,579// as for other special instructions inserted during580// lowering) and continue the scan backward.581self.finish_ir_inst(Default::default());582583if let Some(insn) = self.vcode.vcode.abi.take_args() {584self.emit(insn);585}586}587}588589/// Generate the return instruction.590pub fn gen_return(&mut self, rets: &[ValueRegs<Reg>]) {591let mut out_rets = vec![];592593let mut rets = rets.into_iter();594for (i, ret) in self595.abi()596.signature()597.returns598.clone()599.into_iter()600.enumerate()601{602let regs = if ret.purpose == ArgumentPurpose::StructReturn {603self.sret_reg.unwrap()604} else {605*rets.next().unwrap()606};607608let (regs, insns) = self.vcode.abi().gen_copy_regs_to_retval(609self.vcode.sigs(),610i,611regs,612&mut self.vregs,613);614out_rets.extend(regs);615for insn in insns {616self.emit(insn);617}618}619620// Hack: generate a virtual instruction that uses vmctx in621// order to keep it alive for the duration of the function,622// for the benefit of debuginfo.623if self.f.dfg.values_labels.is_some() {624if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {625if self.value_ir_uses[vmctx_val] != ValueUseState::Unused {626let vmctx_reg = self.value_regs[vmctx_val].only_reg().unwrap();627self.emit(I::gen_dummy_use(vmctx_reg));628}629}630}631632let inst = self.abi().gen_rets(out_rets);633self.emit(inst);634}635636/// Generate list of registers to hold the output of a call with637/// signature `sig`.638pub fn gen_call_output(&mut self, sig: &Signature) -> InstOutput {639let mut rets = smallvec![];640for ty in sig.returns.iter().map(|ret| ret.value_type) {641rets.push(self.vregs.alloc_with_deferred_error(ty));642}643rets644}645646/// Likewise, but for a `SigRef` instead.647pub fn gen_call_output_from_sig_ref(&mut self, sig_ref: SigRef) -> InstOutput {648self.gen_call_output(&self.f.dfg.signatures[sig_ref])649}650651/// Set up arguments values `args` for a call with signature `sig`.652pub fn gen_call_args(&mut self, sig: Sig, args: &[ValueRegs<Reg>]) -> CallArgList {653let (uses, insts) = self.vcode.abi().gen_call_args(654self.vcode.sigs(),655sig,656args,657/* is_tail_call */ false,658&self.flags,659&mut self.vregs,660);661for insn in insts {662self.emit(insn);663}664uses665}666667/// Likewise, but for a `return_call`.668pub fn gen_return_call_args(&mut self, sig: Sig, args: &[ValueRegs<Reg>]) -> CallArgList {669let (uses, insts) = self.vcode.abi().gen_call_args(670self.vcode.sigs(),671sig,672args,673/* is_tail_call */ true,674&self.flags,675&mut self.vregs,676);677for insn in insts {678self.emit(insn);679}680uses681}682683/// Set up return values `outputs` for a call with signature `sig`.684pub fn gen_call_rets(&mut self, sig: Sig, outputs: &[ValueRegs<Reg>]) -> CallRetList {685self.vcode686.abi()687.gen_call_rets(self.vcode.sigs(), sig, outputs, None, &mut self.vregs)688}689690/// Likewise, but for a `try_call`.691pub fn gen_try_call_rets(&mut self, sig: Sig) -> CallRetList {692let ir_inst = self.cur_inst.unwrap();693let mut outputs: SmallVec<[ValueRegs<Reg>; 2]> = smallvec![];694for return_def in self.try_call_rets.get(&ir_inst).unwrap() {695outputs.push(return_def.map(|r| r.to_reg()));696}697let payloads = Some(&self.try_call_payloads.get(&ir_inst).unwrap()[..]);698699self.vcode700.abi()701.gen_call_rets(self.vcode.sigs(), sig, &outputs, payloads, &mut self.vregs)702}703704/// Populate a `CallInfo` for a call with signature `sig`.705pub fn gen_call_info<T>(706&mut self,707sig: Sig,708dest: T,709uses: CallArgList,710defs: CallRetList,711try_call_info: Option<TryCallInfo>,712patchable: bool,713) -> CallInfo<T> {714self.vcode.abi().gen_call_info(715self.vcode.sigs(),716sig,717dest,718uses,719defs,720try_call_info,721patchable,722)723}724725/// Has this instruction been sunk to a use-site (i.e., away from its726/// original location)?727fn is_inst_sunk(&self, inst: Inst) -> bool {728self.inst_sunk.contains(&inst)729}730731// Is any result of this instruction needed?732fn is_any_inst_result_needed(&self, inst: Inst) -> bool {733self.f734.dfg735.inst_results(inst)736.iter()737.any(|&result| self.value_lowered_uses[result] > 0)738}739740fn lower_clif_block<B: LowerBackend<MInst = I>>(741&mut self,742backend: &B,743block: Block,744ctrl_plane: &mut ControlPlane,745) -> CodegenResult<()> {746self.cur_scan_entry_color = Some(self.block_end_colors[block]);747// Lowering loop:748// - For each non-branch instruction, in reverse order:749// - If side-effecting (load, store, branch/call/return,750// possible trap), or if used outside of this block, or if751// demanded by another inst, then lower.752//753// That's it! Lowering of side-effecting ops will force all *needed*754// (live) non-side-effecting ops to be lowered at the right places, via755// the `use_input_reg()` callback on the `Lower` (that's us). That's756// because `use_input_reg()` sets the eager/demand bit for any insts757// whose result registers are used.758//759// We set the VCodeBuilder to "backward" mode, so we emit760// blocks in reverse order wrt the BlockIndex sequence, and761// emit instructions in reverse order within blocks. Because762// the machine backend calls `ctx.emit()` in forward order, we763// collect per-IR-inst lowered instructions in `ir_insts`,764// then reverse these and append to the VCode at the end of765// each IR instruction.766for inst in self.f.layout.block_insts(block).rev() {767let data = &self.f.dfg.insts[inst];768let has_side_effect = has_lowering_side_effect(self.f, inst);769// If inst has been sunk to another location, skip it.770if self.is_inst_sunk(inst) {771continue;772}773// Are any outputs used at least once?774let value_needed = self.is_any_inst_result_needed(inst);775trace!(776"lower_clif_block: block {} inst {} ({:?}) is_branch {} side_effect {} value_needed {}",777block,778inst,779data,780data.opcode().is_branch(),781has_side_effect,782value_needed,783);784785// Update scan state to color prior to this inst (as we are scanning786// backward).787self.cur_inst = Some(inst);788if has_side_effect {789let entry_color = *self790.side_effect_inst_entry_colors791.get(&inst)792.expect("every side-effecting inst should have a color-map entry");793self.cur_scan_entry_color = Some(entry_color);794}795796// Skip lowering branches; these are handled separately797// (see `lower_clif_branches()` below).798if self.f.dfg.insts[inst].opcode().is_branch() {799continue;800}801802// Value defined by "inst" becomes live after it in normal803// order, and therefore **before** in reversed order.804self.emit_value_label_live_range_start_for_inst(inst);805806// Normal instruction: codegen if the instruction is side-effecting807// or any of its outputs is used.808if has_side_effect || value_needed {809trace!("lowering: inst {}: {}", inst, self.f.dfg.display_inst(inst));810let temp_regs = match backend.lower(self, inst) {811Some(regs) => regs,812None => {813let ty = if self.num_outputs(inst) > 0 {814Some(self.output_ty(inst, 0))815} else {816None817};818return Err(CodegenError::Unsupported(format!(819"should be implemented in ISLE: inst = `{}`, type = `{:?}`",820self.f.dfg.display_inst(inst),821ty822)));823}824};825826// The ISLE generated code emits its own registers to define the827// instruction's lowered values in. However, other instructions828// that use this SSA value will be lowered assuming that the value829// is generated into a pre-assigned, different, register.830//831// To connect the two, we set up "aliases" in the VCodeBuilder832// that apply when it is building the Operand table for the833// regalloc to use. These aliases effectively rewrite any use of834// the pre-assigned register to the register that was returned by835// the ISLE lowering logic.836let results = self.f.dfg.inst_results(inst);837debug_assert_eq!(temp_regs.len(), results.len());838for (regs, &result) in temp_regs.iter().zip(results) {839let dsts = self.value_regs[result];840let mut regs = regs.regs().iter();841for &dst in dsts.regs().iter() {842let temp = regs.next().copied().unwrap_or(Reg::invalid_sentinel());843trace!("set vreg alias: {result:?} = {dst:?}, lowering = {temp:?}");844self.vregs.set_vreg_alias(dst, temp);845}846}847}848849let start = self.vcode.vcode.num_insts();850let loc = self.srcloc(inst);851self.finish_ir_inst(loc);852853// If the instruction had a user stack map, forward it from the CLIF854// to the vcode.855if let Some(entries) = self.f.dfg.user_stack_map_entries(inst) {856let end = self.vcode.vcode.num_insts();857debug_assert!(end > start);858debug_assert_eq!(859(start..end)860.filter(|i| self.vcode.vcode[InsnIndex::new(*i)].is_safepoint())861.count(),8621863);864for i in start..end {865let iix = InsnIndex::new(i);866if self.vcode.vcode[iix].is_safepoint() {867trace!(868"Adding user stack map from clif\n\n\869{inst:?} `{}`\n\n\870to vcode\n\n\871{iix:?} `{}`",872self.f.dfg.display_inst(inst),873&self.vcode.vcode[iix].pretty_print_inst(&mut Default::default()),874);875self.vcode876.add_user_stack_map(BackwardsInsnIndex::new(iix.index()), entries);877break;878}879}880}881882// If the CLIF instruction had debug tags, copy them to883// the VCode. Place on all VCode instructions lowered from884// this CLIF instruction.885let debug_tags = self.f.debug_tags.get(inst);886if !debug_tags.is_empty() && self.vcode.vcode.num_insts() > 0 {887let end = self.vcode.vcode.num_insts();888for i in start..end {889let backwards_index = BackwardsInsnIndex::new(i);890log::trace!(891"debug tags on {inst}; associating {debug_tags:?} with {backwards_index:?}"892);893self.vcode.add_debug_tags(backwards_index, debug_tags);894}895}896897// maybe insert random instruction898if ctrl_plane.get_decision() {899if ctrl_plane.get_decision() {900let imm: u64 = ctrl_plane.get_arbitrary();901let reg = self.alloc_tmp(crate::ir::types::I64).regs()[0];902I::gen_imm_u64(imm, reg).map(|inst| self.emit(inst));903} else {904let imm: f64 = ctrl_plane.get_arbitrary();905let tmp = self.alloc_tmp(crate::ir::types::I64).regs()[0];906let reg = self.alloc_tmp(crate::ir::types::F64).regs()[0];907for inst in I::gen_imm_f64(imm, tmp, reg) {908self.emit(inst);909}910}911}912}913914// Add the block params to this block.915self.add_block_params(block)?;916917self.cur_scan_entry_color = None;918Ok(())919}920921fn add_block_params(&mut self, block: Block) -> CodegenResult<()> {922for ¶m in self.f.dfg.block_params(block) {923for ® in self.value_regs[param].regs() {924let vreg = reg.to_virtual_reg().unwrap();925self.vcode.add_block_param(vreg);926}927}928Ok(())929}930931fn get_value_labels<'a>(&'a self, val: Value, depth: usize) -> Option<&'a [ValueLabelStart]> {932if let Some(ref values_labels) = self.f.dfg.values_labels {933debug_assert!(self.f.dfg.value_is_real(val));934trace!(935"get_value_labels: val {} -> {:?}",936val,937values_labels.get(&val)938);939match values_labels.get(&val) {940Some(&ValueLabelAssignments::Starts(ref list)) => Some(&list[..]),941Some(&ValueLabelAssignments::Alias { value, .. }) if depth < 10 => {942self.get_value_labels(value, depth + 1)943}944_ => None,945}946} else {947None948}949}950951fn emit_value_label_marks_for_value(&mut self, val: Value) {952let regs = self.value_regs[val];953if regs.len() > 1 {954return;955}956let reg = regs.only_reg().unwrap();957958if let Some(label_starts) = self.get_value_labels(val, 0) {959let labels = label_starts960.iter()961.map(|&ValueLabelStart { label, .. }| label)962.collect::<FxHashSet<_>>();963for label in labels {964trace!(965"value labeling: defines val {:?} -> reg {:?} -> label {:?}",966val, reg, label,967);968self.vcode.add_value_label(reg, label);969}970}971}972973fn emit_value_label_live_range_start_for_inst(&mut self, inst: Inst) {974if self.f.dfg.values_labels.is_none() {975return;976}977978trace!(979"value labeling: srcloc {}: inst {}",980self.srcloc(inst),981inst982);983for &val in self.f.dfg.inst_results(inst) {984self.emit_value_label_marks_for_value(val);985}986}987988fn emit_value_label_live_range_start_for_block_args(&mut self, block: Block) {989if self.f.dfg.values_labels.is_none() {990return;991}992993trace!("value labeling: block {}", block);994for &arg in self.f.dfg.block_params(block) {995self.emit_value_label_marks_for_value(arg);996}997self.finish_ir_inst(Default::default());998}9991000fn finish_ir_inst(&mut self, loc: RelSourceLoc) {1001// The VCodeBuilder builds in reverse order (and reverses at1002// the end), but `ir_insts` is in forward order, so reverse1003// it.1004for inst in self.ir_insts.drain(..).rev() {1005self.vcode.push(inst, loc);1006}1007}10081009fn finish_bb(&mut self) {1010self.vcode.end_bb();1011}10121013fn lower_clif_branch<B: LowerBackend<MInst = I>>(1014&mut self,1015backend: &B,1016// Lowered block index:1017bindex: BlockIndex,1018// Original CLIF block:1019block: Block,1020branch: Inst,1021targets: &[MachLabel],1022) -> CodegenResult<()> {1023trace!(1024"lower_clif_branch: block {} branch {:?} targets {:?}",1025block, branch, targets,1026);1027// When considering code-motion opportunities, consider the current1028// program point to be this branch.1029self.cur_inst = Some(branch);10301031// Lower the branch in ISLE.1032backend1033.lower_branch(self, branch, targets)1034.unwrap_or_else(|| {1035panic!(1036"should be implemented in ISLE: branch = `{}`",1037self.f.dfg.display_inst(branch),1038)1039});1040let loc = self.srcloc(branch);1041self.finish_ir_inst(loc);1042// Add block param outputs for current block.1043self.lower_branch_blockparam_args(bindex);1044Ok(())1045}10461047fn lower_branch_blockparam_args(&mut self, block: BlockIndex) {1048let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];10491050// TODO: why not make `block_order` public?1051for succ_idx in 0..self.vcode.block_order().succ_indices(block).1.len() {1052branch_arg_vregs.clear();1053let (succ, args) = self.collect_block_call(block, succ_idx, &mut branch_arg_vregs);1054self.vcode.add_succ(succ, args);1055}1056}10571058fn collect_branch_and_targets(1059&self,1060bindex: BlockIndex,1061_bb: Block,1062targets: &mut SmallVec<[MachLabel; 2]>,1063) -> Option<Inst> {1064targets.clear();1065let (opt_inst, succs) = self.vcode.block_order().succ_indices(bindex);1066targets.extend(succs.iter().map(|succ| MachLabel::from_block(*succ)));1067opt_inst1068}10691070/// Collect the outgoing block-call arguments for a given edge out1071/// of a lowered block.1072fn collect_block_call<'a>(1073&mut self,1074block: BlockIndex,1075succ_idx: usize,1076buffer: &'a mut SmallVec<[Reg; 16]>,1077) -> (BlockIndex, &'a [Reg]) {1078let block_order = self.vcode.block_order();1079let (_, succs) = block_order.succ_indices(block);1080let succ = succs[succ_idx];1081let this_lb = block_order.lowered_order()[block.index()];1082let succ_lb = block_order.lowered_order()[succ.index()];10831084let (branch_inst, succ_idx) = match (this_lb, succ_lb) {1085(_, LoweredBlock::CriticalEdge { .. }) => {1086// The successor is a split-critical-edge block. In this1087// case, this block-call has no arguments, and the1088// arguments go on the critical edge block's unconditional1089// branch instead.1090return (succ, &[]);1091}1092(LoweredBlock::CriticalEdge { pred, succ_idx, .. }, _) => {1093// This is a split-critical-edge block. In this case, our1094// block-call has the arguments that in the CLIF appear in1095// the predecessor's branch to this edge.1096let branch_inst = self.f.layout.last_inst(pred).unwrap();1097(branch_inst, succ_idx as usize)1098}10991100(this, _) => {1101let block = this.orig_block().unwrap();1102// Ordinary block, with an ordinary block as1103// successor. Take the arguments from the branch.1104let branch_inst = self.f.layout.last_inst(block).unwrap();1105(branch_inst, succ_idx)1106}1107};11081109let block_call = self.f.dfg.insts[branch_inst]1110.branch_destination(&self.f.dfg.jump_tables, &self.f.dfg.exception_tables)[succ_idx];1111for arg in block_call.args(&self.f.dfg.value_lists) {1112match arg {1113BlockArg::Value(arg) => {1114debug_assert!(self.f.dfg.value_is_real(arg));1115let regs = self.put_value_in_regs(arg);1116buffer.extend_from_slice(regs.regs());1117}1118BlockArg::TryCallRet(i) => {1119let regs = self.try_call_rets.get(&branch_inst).unwrap()[i as usize]1120.map(|r| r.to_reg());1121buffer.extend_from_slice(regs.regs());1122}1123BlockArg::TryCallExn(i) => {1124let reg =1125self.try_call_payloads.get(&branch_inst).unwrap()[i as usize].to_reg();1126buffer.push(reg);1127}1128}1129}1130(succ, &buffer[..])1131}11321133/// Lower the function.1134pub fn lower<B: LowerBackend<MInst = I>>(1135mut self,1136backend: &B,1137ctrl_plane: &mut ControlPlane,1138) -> CodegenResult<VCode<I>> {1139trace!("about to lower function: {:?}", self.f);11401141self.vcode.init_retval_area(&mut self.vregs)?;11421143// Get the pinned reg here (we only parameterize this function on `B`,1144// not the whole `Lower` impl).1145self.pinned_reg = backend.maybe_pinned_reg();11461147self.vcode.set_entry(BlockIndex::new(0));11481149// Reused vectors for branch lowering.1150let mut targets: SmallVec<[MachLabel; 2]> = SmallVec::new();11511152// get a copy of the lowered order; we hold this separately because we1153// need a mut ref to the vcode to mutate it below.1154let lowered_order: SmallVec<[LoweredBlock; 64]> = self1155.vcode1156.block_order()1157.lowered_order()1158.iter()1159.cloned()1160.collect();11611162// Main lowering loop over lowered blocks.1163for (bindex, lb) in lowered_order.iter().enumerate().rev() {1164let bindex = BlockIndex::new(bindex);11651166// Lower the block body in reverse order (see comment in1167// `lower_clif_block()` for rationale).11681169// End branch.1170if let Some(bb) = lb.orig_block() {1171if let Some(branch) = self.collect_branch_and_targets(bindex, bb, &mut targets) {1172self.lower_clif_branch(backend, bindex, bb, branch, &targets)?;1173self.finish_ir_inst(self.srcloc(branch));1174}1175} else {1176// If no orig block, this must be a pure edge block;1177// get the successor and emit a jump. This block has1178// no block params; and this jump's block-call args1179// will be filled in by1180// `lower_branch_blockparam_args`.1181let succ = self.vcode.block_order().succ_indices(bindex).1[0];1182self.emit(I::gen_jump(MachLabel::from_block(succ)));1183self.finish_ir_inst(Default::default());1184self.lower_branch_blockparam_args(bindex);1185}11861187// Original block body.1188if let Some(bb) = lb.orig_block() {1189self.lower_clif_block(backend, bb, ctrl_plane)?;1190self.emit_value_label_live_range_start_for_block_args(bb);1191}11921193if bindex.index() == 0 {1194// Set up the function with arg vreg inits.1195self.gen_arg_setup();1196self.finish_ir_inst(Default::default());1197}11981199self.finish_bb();12001201// Check for any deferred vreg-temp allocation errors, and1202// bubble one up at this time if it exists.1203if let Some(e) = self.vregs.take_deferred_error() {1204return Err(e);1205}1206}12071208// Now that we've emitted all instructions into the1209// VCodeBuilder, let's build the VCode.1210trace!(1211"built vcode:\n{:?}Backwards {:?}",1212&self.vregs, &self.vcode.vcode1213);1214let vcode = self.vcode.build(self.vregs);12151216Ok(vcode)1217}12181219pub fn value_is_unused(&self, val: Value) -> bool {1220match self.value_ir_uses[val] {1221ValueUseState::Unused => true,1222_ => false,1223}1224}12251226pub fn block_successor_label(&self, block: Block, succ: usize) -> MachLabel {1227trace!("block_successor_label: block {block} succ {succ}");1228let lowered = self1229.vcode1230.block_order()1231.lowered_index_for_block(block)1232.expect("Unreachable block");1233trace!(" -> lowered block {lowered:?}");1234let (_, succs) = self.vcode.block_order().succ_indices(lowered);1235trace!(" -> succs {succs:?}");1236let succ_block = *succs.get(succ).expect("Successor index out of range");1237MachLabel::from_block(succ_block)1238}1239}12401241/// Pre-analysis: compute `value_ir_uses`. See comment on1242/// `ValueUseState` for a description of what this analysis1243/// computes.1244fn compute_use_states(1245f: &Function,1246sret_param: Option<Value>,1247) -> SecondaryMap<Value, ValueUseState> {1248// We perform the analysis without recursion, so we don't1249// overflow the stack on long chains of ops in the input.1250//1251// This is sort of a hybrid of a "shallow use-count" pass and1252// a DFS. We iterate over all instructions and mark their args1253// as used. However when we increment a use-count to1254// "Multiple" we push its args onto the stack and do a DFS,1255// immediately marking the whole dependency tree as1256// Multiple. Doing both (shallow use-counting over all insts,1257// and deep Multiple propagation) lets us trim both1258// traversals, stopping recursion when a node is already at1259// the appropriate state.1260//1261// In particular, note that the *coarsening* into {Unused,1262// Once, Multiple} is part of what makes this pass more1263// efficient than a full indirect-use-counting pass.12641265let mut value_ir_uses = SecondaryMap::with_default(ValueUseState::Unused);12661267if let Some(sret_param) = sret_param {1268// There's an implicit use of the struct-return parameter in each1269// copy of the function epilogue, which we count here.1270value_ir_uses[sret_param] = ValueUseState::Multiple;1271}12721273// Stack of iterators over Values as we do DFS to mark1274// Multiple-state subtrees. The iterator type is whatever is1275// returned by `uses` below.1276let mut stack: SmallVec<[_; 16]> = smallvec![];12771278// Find the args for the inst corresponding to the given value.1279//1280// Note that "root" instructions are skipped here. This means that multiple1281// uses of any result of a multi-result instruction are not considered1282// multiple uses of the operands of a multi-result instruction. This1283// requires tight coupling with `get_value_as_source_or_const` above which1284// is the consumer of the map that this function is producing.1285let uses = |value| {1286trace!(" -> pushing args for {} onto stack", value);1287if let ValueDef::Result(src_inst, _) = f.dfg.value_def(value) {1288if is_value_use_root(f, src_inst) {1289None1290} else {1291Some(f.dfg.inst_values(src_inst))1292}1293} else {1294None1295}1296};12971298// Do a DFS through `value_ir_uses` to mark a subtree as1299// Multiple.1300for inst in f1301.layout1302.blocks()1303.flat_map(|block| f.layout.block_insts(block))1304{1305// Iterate over all values used by all instructions, noting an1306// additional use on each operand.1307for arg in f.dfg.inst_values(inst) {1308debug_assert!(f.dfg.value_is_real(arg));1309let old = value_ir_uses[arg];1310value_ir_uses[arg].inc();1311let new = value_ir_uses[arg];1312trace!("arg {} used, old state {:?}, new {:?}", arg, old, new);13131314// On transition to Multiple, do DFS.1315if old == ValueUseState::Multiple || new != ValueUseState::Multiple {1316continue;1317}1318if let Some(iter) = uses(arg) {1319stack.push(iter);1320}1321while let Some(iter) = stack.last_mut() {1322if let Some(value) = iter.next() {1323debug_assert!(f.dfg.value_is_real(value));1324trace!(" -> DFS reaches {}", value);1325if value_ir_uses[value] == ValueUseState::Multiple {1326// Truncate DFS here: no need to go further,1327// as whole subtree must already be Multiple.1328// With debug asserts, check one level of1329// that invariant at least.1330debug_assert!(uses(value).into_iter().flatten().all(|arg| {1331debug_assert!(f.dfg.value_is_real(arg));1332value_ir_uses[arg] == ValueUseState::Multiple1333}));1334continue;1335}1336value_ir_uses[value] = ValueUseState::Multiple;1337trace!(" -> became Multiple");1338if let Some(iter) = uses(value) {1339stack.push(iter);1340}1341} else {1342// Empty iterator, discard.1343stack.pop();1344}1345}1346}1347}13481349value_ir_uses1350}13511352/// Definition of a "root" instruction for the calculation of `ValueUseState`.1353///1354/// This function calculates whether `inst` is considered a "root" for value-use1355/// information. This concept is used to forcibly prevent looking-through the1356/// instruction during `get_value_as_source_or_const` as it additionally1357/// prevents propagating `Multiple`-used results of the `inst` here to the1358/// operands of the instruction.1359///1360/// Currently this is defined as multi-result instructions. That means that1361/// lowerings are never allowed to look through a multi-result instruction to1362/// generate patterns. Note that this isn't possible in ISLE today anyway so1363/// this isn't currently much of a loss.1364///1365/// The main purpose of this function is to prevent the operands of a1366/// multi-result instruction from being forcibly considered `Multiple`-used1367/// regardless of circumstances.1368fn is_value_use_root(f: &Function, inst: Inst) -> bool {1369f.dfg.inst_results(inst).len() > 11370}13711372/// Function-level queries.1373impl<'func, I: VCodeInst> Lower<'func, I> {1374pub fn dfg(&self) -> &DataFlowGraph {1375&self.f.dfg1376}13771378/// Get the `Callee`.1379pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {1380self.vcode.abi()1381}13821383/// Get the `Callee`.1384pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {1385self.vcode.abi_mut()1386}1387}13881389/// Instruction input/output queries.1390impl<'func, I: VCodeInst> Lower<'func, I> {1391/// Get the instdata for a given IR instruction.1392pub fn data(&self, ir_inst: Inst) -> &InstructionData {1393&self.f.dfg.insts[ir_inst]1394}13951396/// Likewise, but starting with a GlobalValue identifier.1397pub fn symbol_value_data<'b>(1398&'b self,1399global_value: GlobalValue,1400) -> Option<(&'b ExternalName, RelocDistance, i64)> {1401let gvdata = &self.f.global_values[global_value];1402match gvdata {1403&GlobalValueData::Symbol {1404ref name,1405ref offset,1406colocated,1407..1408} => {1409let offset = offset.bits();1410let dist = if colocated {1411RelocDistance::Near1412} else {1413RelocDistance::Far1414};1415Some((name, dist, offset))1416}1417_ => None,1418}1419}14201421/// Returns the memory flags of a given memory access.1422pub fn memflags(&self, ir_inst: Inst) -> Option<MemFlags> {1423match &self.f.dfg.insts[ir_inst] {1424&InstructionData::AtomicCas { flags, .. } => Some(flags),1425&InstructionData::AtomicRmw { flags, .. } => Some(flags),1426&InstructionData::Load { flags, .. }1427| &InstructionData::LoadNoOffset { flags, .. }1428| &InstructionData::Store { flags, .. } => Some(flags),1429&InstructionData::StoreNoOffset { flags, .. } => Some(flags),1430_ => None,1431}1432}14331434/// Get the source location for a given instruction.1435pub fn srcloc(&self, ir_inst: Inst) -> RelSourceLoc {1436self.f.rel_srclocs()[ir_inst]1437}14381439/// Get the number of inputs to the given IR instruction. This is a count only of the Value1440/// arguments to the instruction: block arguments will not be included in this count.1441pub fn num_inputs(&self, ir_inst: Inst) -> usize {1442self.f.dfg.inst_args(ir_inst).len()1443}14441445/// Get the number of outputs to the given IR instruction.1446pub fn num_outputs(&self, ir_inst: Inst) -> usize {1447self.f.dfg.inst_results(ir_inst).len()1448}14491450/// Get the type for an instruction's input.1451pub fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type {1452self.value_ty(self.input_as_value(ir_inst, idx))1453}14541455/// Get the type for a value.1456pub fn value_ty(&self, val: Value) -> Type {1457self.f.dfg.value_type(val)1458}14591460/// Get the type for an instruction's output.1461pub fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type {1462self.f.dfg.value_type(self.f.dfg.inst_results(ir_inst)[idx])1463}14641465/// Get the value of a constant instruction (`iconst`, etc.) as a 64-bit1466/// value, if possible.1467pub fn get_constant(&self, ir_inst: Inst) -> Option<u64> {1468self.inst_constants.get(&ir_inst).map(|&c| {1469// The upper bits must be zero, enforced during legalization and by1470// the CLIF verifier.1471debug_assert_eq!(c, {1472let input_size = self.output_ty(ir_inst, 0).bits() as u64;1473let shift = 64 - input_size;1474(c << shift) >> shift1475});1476c1477})1478}14791480/// Get the input as one of two options other than a direct register:1481///1482/// - An instruction, given that it is effect-free or able to sink its1483/// effect to the current instruction being lowered, and given it has only1484/// one output, and if effect-ful, given that this is the only use;1485/// - A constant, if the value is a constant.1486///1487/// The instruction input may be available in either of these forms. It may1488/// be available in neither form, if the conditions are not met; if so, use1489/// `put_input_in_regs()` instead to get it in a register.1490///1491/// If the backend merges the effect of a side-effecting instruction, it1492/// must call `sink_inst()`. When this is called, it indicates that the1493/// effect has been sunk to the current scan location. The sunk1494/// instruction's result(s) must have *no* uses remaining, because it will1495/// not be codegen'd (it has been integrated into the current instruction).1496pub fn input_as_value(&self, ir_inst: Inst, idx: usize) -> Value {1497let val = self.f.dfg.inst_args(ir_inst)[idx];1498debug_assert!(self.f.dfg.value_is_real(val));1499val1500}15011502/// Resolves a particular input of an instruction to the `Value` that it is1503/// represented with.1504///1505/// For more information see [`Lower::get_value_as_source_or_const`].1506pub fn get_input_as_source_or_const(&self, ir_inst: Inst, idx: usize) -> NonRegInput {1507let val = self.input_as_value(ir_inst, idx);1508self.get_value_as_source_or_const(val)1509}15101511/// Resolves a `Value` definition to the source instruction it came from1512/// plus whether it's a unique-use of that instruction.1513///1514/// This function is the workhorse of pattern-matching in ISLE which enables1515/// combining multiple instructions together. This is used implicitly in1516/// patterns such as `(iadd x (iconst y))` where this function is used to1517/// extract the `(iconst y)` operand.1518///1519/// At its core this function is a wrapper around1520/// [`DataFlowGraph::value_def`]. This function applies a filter on top of1521/// that, however, to determine when it is actually safe to "look through"1522/// the `val` definition here and view the underlying instruction. This1523/// protects against duplicating side effects, such as loads, for example.1524///1525/// Internally this uses the data computed from `compute_use_states` along1526/// with other instruction properties to know what to return.1527pub fn get_value_as_source_or_const(&self, val: Value) -> NonRegInput {1528trace!(1529"get_input_for_val: val {} at cur_inst {:?} cur_scan_entry_color {:?}",1530val, self.cur_inst, self.cur_scan_entry_color,1531);1532let inst = match self.f.dfg.value_def(val) {1533// OK to merge source instruction if we have a source1534// instruction, and one of these two conditions hold:1535//1536// - It has no side-effects and this instruction is not a "value-use1537// root" instruction. Instructions which are considered "roots"1538// for value-use calculations do not have accurate information1539// known about the `ValueUseState` of their operands. This is1540// currently done for multi-result instructions to prevent a use1541// of each result from forcing all operands of the multi-result1542// instruction to also be `Multiple`. This in turn means that the1543// `ValueUseState` for operands of a "root" instruction to be a1544// lie if pattern matching were to look through the multi-result1545// instruction. As a result the "look through this instruction"1546// logic only succeeds if it's not a root instruction.1547//1548// - It has a side-effect, has one output value, that one1549// output has only one use, directly or indirectly (so1550// cannot be duplicated -- see comment on1551// `ValueUseState`), and the instruction's color is *one1552// less than* the current scan color.1553//1554// This latter set of conditions is testing whether a1555// side-effecting instruction can sink to the current scan1556// location; this is possible if the in-color of this inst is1557// equal to the out-color of the producing inst, so no other1558// side-effecting ops occur between them (which will only be true1559// if they are in the same BB, because color increments at each BB1560// start).1561//1562// If it is actually sunk, then in `merge_inst()`, we update the1563// scan color so that as we scan over the range past which the1564// instruction was sunk, we allow other instructions (that came1565// prior to the sunk instruction) to sink.1566ValueDef::Result(src_inst, result_idx) => {1567let src_side_effect = has_lowering_side_effect(self.f, src_inst);1568trace!(" -> src inst {}", self.f.dfg.display_inst(src_inst));1569trace!(" -> has lowering side effect: {}", src_side_effect);1570if is_value_use_root(self.f, src_inst) {1571// If this instruction is a "root instruction" then it's1572// required that we can't look through it to see the1573// definition. This means that the `ValueUseState` for the1574// operands of this result assume that this instruction is1575// generated exactly once which might get violated were we1576// to allow looking through it.1577trace!(" -> is a root instruction");1578InputSourceInst::None1579} else if !src_side_effect {1580// Otherwise if this instruction has no side effects and the1581// value is used only once then we can look through it with1582// a "unique" tag. A non-unique `Use` can be shown for other1583// values ensuring consumers know how it's computed but that1584// it's not available to omit.1585if self.value_ir_uses[val] == ValueUseState::Once {1586InputSourceInst::UniqueUse(src_inst, result_idx)1587} else {1588InputSourceInst::Use(src_inst, result_idx)1589}1590} else {1591// Side-effect: test whether this is the only use of the1592// only result of the instruction, and whether colors allow1593// the code-motion.1594trace!(1595" -> side-effecting op {} for val {}: use state {:?}",1596src_inst, val, self.value_ir_uses[val]1597);1598if self.cur_scan_entry_color.is_some()1599&& self.value_ir_uses[val] == ValueUseState::Once1600&& self.num_outputs(src_inst) == 11601&& self1602.side_effect_inst_entry_colors1603.get(&src_inst)1604.unwrap()1605.get()1606+ 11607== self.cur_scan_entry_color.unwrap().get()1608{1609InputSourceInst::UniqueUse(src_inst, 0)1610} else {1611InputSourceInst::None1612}1613}1614}1615_ => InputSourceInst::None,1616};1617let constant = inst.as_inst().and_then(|(inst, _)| self.get_constant(inst));16181619NonRegInput { inst, constant }1620}16211622/// Increment the reference count for the Value, ensuring that it gets lowered.1623pub fn increment_lowered_uses(&mut self, val: Value) {1624self.value_lowered_uses[val] += 11625}16261627/// Put the `idx`th input into register(s) and return the assigned register.1628pub fn put_input_in_regs(&mut self, ir_inst: Inst, idx: usize) -> ValueRegs<Reg> {1629let val = self.f.dfg.inst_args(ir_inst)[idx];1630self.put_value_in_regs(val)1631}16321633/// Put the given value into register(s) and return the assigned register.1634pub fn put_value_in_regs(&mut self, val: Value) -> ValueRegs<Reg> {1635debug_assert!(self.f.dfg.value_is_real(val));1636trace!("put_value_in_regs: val {}", val);16371638if let Some(inst) = self.f.dfg.value_def(val).inst() {1639assert!(!self.inst_sunk.contains(&inst));1640}16411642let regs = self.value_regs[val];1643trace!(" -> regs {:?}", regs);1644assert!(regs.is_valid());16451646self.value_lowered_uses[val] += 1;16471648regs1649}16501651/// Get the ValueRegs for the edge-defined values for special1652/// try-call-return block arguments.1653pub fn try_call_return_defs(&mut self, ir_inst: Inst) -> &[ValueRegs<Writable<Reg>>] {1654&self.try_call_rets.get(&ir_inst).unwrap()[..]1655}16561657/// Get the Regs for the edge-defined values for special1658/// try-call-return exception payload arguments.1659pub fn try_call_exception_defs(&mut self, ir_inst: Inst) -> &[Writable<Reg>] {1660&self.try_call_payloads.get(&ir_inst).unwrap()[..]1661}1662}16631664/// Codegen primitives: allocate temps, emit instructions, set result registers,1665/// ask for an input to be gen'd into a register.1666impl<'func, I: VCodeInst> Lower<'func, I> {1667/// Get a new temp.1668pub fn alloc_tmp(&mut self, ty: Type) -> ValueRegs<Writable<Reg>> {1669writable_value_regs(self.vregs.alloc_with_deferred_error(ty))1670}16711672/// Get the current root instruction that we are lowering.1673pub fn cur_inst(&self) -> Inst {1674self.cur_inst.unwrap()1675}16761677/// Emit a machine instruction.1678pub fn emit(&mut self, mach_inst: I) {1679trace!("emit: {:?}", mach_inst);1680self.ir_insts.push(mach_inst);1681}16821683/// Indicate that the side-effect of an instruction has been sunk to the1684/// current scan location. This should only be done with the instruction's1685/// original results are not used (i.e., `put_input_in_regs` is not invoked1686/// for the input produced by the sunk instruction), otherwise the1687/// side-effect will occur twice.1688pub fn sink_inst(&mut self, ir_inst: Inst) {1689assert!(has_lowering_side_effect(self.f, ir_inst));1690assert!(self.cur_scan_entry_color.is_some());16911692for result in self.dfg().inst_results(ir_inst) {1693assert!(self.value_lowered_uses[*result] == 0);1694}16951696let sunk_inst_entry_color = self1697.side_effect_inst_entry_colors1698.get(&ir_inst)1699.cloned()1700.unwrap();1701let sunk_inst_exit_color = InstColor::new(sunk_inst_entry_color.get() + 1);1702assert!(sunk_inst_exit_color == self.cur_scan_entry_color.unwrap());1703self.cur_scan_entry_color = Some(sunk_inst_entry_color);1704self.inst_sunk.insert(ir_inst);1705}17061707/// Retrieve immediate data given a handle.1708pub fn get_immediate_data(&self, imm: Immediate) -> &ConstantData {1709self.f.dfg.immediates.get(imm).unwrap()1710}17111712/// Retrieve constant data given a handle.1713pub fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData {1714self.f.dfg.constants.get(constant_handle)1715}17161717/// Indicate that a constant should be emitted.1718pub fn use_constant(&mut self, constant: VCodeConstantData) -> VCodeConstant {1719self.vcode.constants().insert(constant)1720}17211722/// Cause the value in `reg` to be in a virtual reg, by copying it into a1723/// new virtual reg if `reg` is a real reg. `ty` describes the type of the1724/// value in `reg`.1725pub fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {1726if reg.to_virtual_reg().is_some() {1727reg1728} else {1729let new_reg = self.alloc_tmp(ty).only_reg().unwrap();1730self.emit(I::gen_move(new_reg, reg, ty));1731new_reg.to_reg()1732}1733}17341735/// Add a range fact to a register, if no other fact is present.1736pub fn add_range_fact(&mut self, reg: Reg, bit_width: u16, min: u64, max: u64) {1737if self.flags.enable_pcc() {1738self.vregs.set_fact_if_missing(1739reg.to_virtual_reg().unwrap(),1740Fact::Range {1741bit_width,1742min,1743max,1744},1745);1746}1747}1748}17491750#[cfg(test)]1751mod tests {1752use super::ValueUseState;1753use crate::cursor::{Cursor, FuncCursor};1754use crate::ir::types;1755use crate::ir::{Function, InstBuilder};17561757#[test]1758fn multi_result_use_once() {1759let mut func = Function::new();1760let block0 = func.dfg.make_block();1761let mut pos = FuncCursor::new(&mut func);1762pos.insert_block(block0);1763let v1 = pos.ins().iconst(types::I64, 0);1764let v2 = pos.ins().iconst(types::I64, 1);1765let v3 = pos.ins().iconcat(v1, v2);1766let (v4, v5) = pos.ins().isplit(v3);1767pos.ins().return_(&[v4, v5]);1768let func = pos.func;17691770let uses = super::compute_use_states(&func, None);1771assert_eq!(uses[v1], ValueUseState::Once);1772assert_eq!(uses[v2], ValueUseState::Once);1773assert_eq!(uses[v3], ValueUseState::Once);1774assert_eq!(uses[v4], ValueUseState::Once);1775assert_eq!(uses[v5], ValueUseState::Once);1776}17771778#[test]1779fn results_used_twice_but_not_operands() {1780let mut func = Function::new();1781let block0 = func.dfg.make_block();1782let mut pos = FuncCursor::new(&mut func);1783pos.insert_block(block0);1784let v1 = pos.ins().iconst(types::I64, 0);1785let v2 = pos.ins().iconst(types::I64, 1);1786let v3 = pos.ins().iconcat(v1, v2);1787let (v4, v5) = pos.ins().isplit(v3);1788pos.ins().return_(&[v4, v4]);1789let func = pos.func;17901791let uses = super::compute_use_states(&func, None);1792assert_eq!(uses[v1], ValueUseState::Once);1793assert_eq!(uses[v2], ValueUseState::Once);1794assert_eq!(uses[v3], ValueUseState::Once);1795assert_eq!(uses[v4], ValueUseState::Multiple);1796assert_eq!(uses[v5], ValueUseState::Unused);1797}1798}179918001801