Path: blob/main/cranelift/codegen/src/machinst/vcode.rs
3054 views
//! This implements the VCode container: a CFG of Insts that have been lowered.1//!2//! VCode is virtual-register code. An instruction in VCode is almost a machine3//! instruction; however, its register slots can refer to virtual registers in4//! addition to real machine registers.5//!6//! VCode is structured with traditional basic blocks, and7//! each block must be terminated by an unconditional branch (one target), a8//! conditional branch (two targets), or a return (no targets). Note that this9//! slightly differs from the machine code of most ISAs: in most ISAs, a10//! conditional branch has one target (and the not-taken case falls through).11//! However, we expect that machine backends will elide branches to the following12//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /13//! branch-uncond pair if *both* targets are not fallthrough. This allows us to14//! play with layout prior to final binary emission, as well, if we want.15//!16//! See the main module comment in `mod.rs` for more details on the VCode-based17//! backend pipeline.1819use crate::CodegenError;20use crate::FxHashMap;21use crate::ir::pcc::*;22use crate::ir::{self, Constant, ConstantData, ValueLabel, types};23use crate::ranges::Ranges;24use crate::timing;25use crate::trace;26use crate::{LabelValueLoc, ValueLocRange};27use crate::{machinst::*, trace_log_enabled};28use regalloc2::{29Edit, Function as RegallocFunction, InstOrEdit, InstPosition, InstRange, Operand,30OperandConstraint, OperandKind, PRegSet, ProgPoint, RegClass,31};3233use crate::HashMap;34use crate::hash_map::Entry;35use core::cmp::Ordering;36use core::fmt::{self, Write};37use core::mem::take;38use core::ops::Range;39use cranelift_entity::{Keys, entity_impl};4041/// Index referring to an instruction in VCode.42pub type InsnIndex = regalloc2::Inst;4344/// Extension trait for `InsnIndex` to allow conversion to a45/// `BackwardsInsnIndex`.46trait ToBackwardsInsnIndex {47fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;48}4950impl ToBackwardsInsnIndex for InsnIndex {51fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {52BackwardsInsnIndex::new(num_insts - self.index() - 1)53}54}5556/// An index referring to an instruction in the VCode when it is backwards,57/// during VCode construction.58#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]59#[cfg_attr(60feature = "enable-serde",61derive(::serde::Serialize, ::serde::Deserialize)62)]63pub struct BackwardsInsnIndex(InsnIndex);6465impl BackwardsInsnIndex {66pub fn new(i: usize) -> Self {67BackwardsInsnIndex(InsnIndex::new(i))68}69}7071/// Index referring to a basic block in VCode.72pub type BlockIndex = regalloc2::Block;7374/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be75/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.76pub trait VCodeInst: MachInst + MachInstEmit {}77impl<I: MachInst + MachInstEmit> VCodeInst for I {}7879/// A function in "VCode" (virtualized-register code) form, after80/// lowering. This is essentially a standard CFG of basic blocks,81/// where each basic block consists of lowered instructions produced82/// by the machine-specific backend.83///84/// Note that the VCode is immutable once produced, and is not85/// modified by register allocation in particular. Rather, register86/// allocation on the `VCode` produces a separate `regalloc2::Output`87/// struct, and this can be passed to `emit`. `emit` in turn does not88/// modify the vcode, but produces an `EmitResult`, which contains the89/// machine code itself, and the associated disassembly and/or90/// metadata as requested.91pub struct VCode<I: VCodeInst> {92/// VReg IR-level types.93vreg_types: Vec<Type>,9495/// Lowered machine instructions in order corresponding to the original IR.96insts: Vec<I>,9798/// A map from backwards instruction index to the user stack map for that99/// instruction.100///101/// This is a sparse side table that only has entries for instructions that102/// are safepoints, and only for a subset of those that have an associated103/// user stack map.104user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,105106/// A map from backwards instruction index to the debug tags for107/// that instruction. Each entry indexes a range in the108/// `debug_tag_pool`.109debug_tags: FxHashMap<BackwardsInsnIndex, Range<u32>>,110111/// Pooled storage for sequences of debug tags; indexed by entries112/// in `debug_tags`.113debug_tag_pool: Vec<ir::DebugTag>,114115/// Operands: pre-regalloc references to virtual registers with116/// constraints, in one flattened array. This allows the regalloc117/// to efficiently access all operands without requiring expensive118/// matches or method invocations on insts.119operands: Vec<Operand>,120121/// Operand index ranges: for each instruction in `insts`, there122/// is a tuple here providing the range in `operands` for that123/// instruction's operands.124operand_ranges: Ranges,125126/// Clobbers: a sparse map from instruction indices to clobber masks.127clobbers: FxHashMap<InsnIndex, PRegSet>,128129/// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is130/// reasonable to keep one of these per instruction.)131srclocs: Vec<RelSourceLoc>,132133/// Entry block.134entry: BlockIndex,135136/// Block instruction indices.137block_ranges: Ranges,138139/// Block successors: index range in the `block_succs` list.140block_succ_range: Ranges,141142/// Block successor lists, concatenated into one vec. The143/// `block_succ_range` list of tuples above gives (start, end)144/// ranges within this list that correspond to each basic block's145/// successors.146block_succs: Vec<regalloc2::Block>,147148/// Block predecessors: index range in the `block_preds` list.149block_pred_range: Ranges,150151/// Block predecessor lists, concatenated into one vec. The152/// `block_pred_range` list of tuples above gives (start, end)153/// ranges within this list that correspond to each basic block's154/// predecessors.155block_preds: Vec<regalloc2::Block>,156157/// Block parameters: index range in `block_params` below.158block_params_range: Ranges,159160/// Block parameter lists, concatenated into one vec. The161/// `block_params_range` list of tuples above gives (start, end)162/// ranges within this list that correspond to each basic block's163/// blockparam vregs.164block_params: Vec<regalloc2::VReg>,165166/// Outgoing block arguments on branch instructions, concatenated167/// into one list.168///169/// Note that this is conceptually a 3D array: we have a VReg list170/// per block, per successor. We flatten those three dimensions171/// into this 1D vec, then store index ranges in two levels of172/// indirection.173///174/// Indexed by the indices in `branch_block_arg_succ_range`.175branch_block_args: Vec<regalloc2::VReg>,176177/// Array of sequences of (start, end) tuples in178/// `branch_block_args`, one for each successor; these sequences179/// for each block are concatenated.180///181/// Indexed by the indices in `branch_block_arg_succ_range`.182branch_block_arg_range: Ranges,183184/// For a given block, indices in `branch_block_arg_range`185/// corresponding to all of its successors.186branch_block_arg_succ_range: Ranges,187188/// Block-order information.189block_order: BlockLoweringOrder,190191/// ABI object.192pub(crate) abi: Callee<I::ABIMachineSpec>,193194/// Constant information used during code emission. This should be195/// immutable across function compilations within the same module.196emit_info: I::Info,197198/// Constants.199pub(crate) constants: VCodeConstants,200201/// Value labels for debuginfo attached to vregs.202debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,203204pub(crate) sigs: SigSet,205206/// Facts on VRegs, for proof-carrying code verification.207facts: Vec<Option<Fact>>,208209log2_min_function_alignment: u8,210}211212/// The result of `VCode::emit`. Contains all information computed213/// during emission: actual machine code, optionally a disassembly,214/// and optionally metadata about the code layout.215pub struct EmitResult {216/// The MachBuffer containing the machine code.217pub buffer: MachBufferFinalized<Stencil>,218219/// Offset of each basic block, recorded during emission. Computed220/// only if `machine_code_cfg_info` is enabled.221pub bb_offsets: Vec<CodeOffset>,222223/// Final basic-block edges, in terms of code offsets of224/// bb-starts. Computed only if `machine_code_cfg_info` is enabled.225pub bb_edges: Vec<(CodeOffset, CodeOffset)>,226227/// The pretty-printed disassembly, if any. This uses the same228/// pretty-printing for MachInsts as the pre-regalloc VCode Debug229/// implementation, but additionally includes the prologue and230/// epilogue(s), and makes use of the regalloc results.231pub disasm: Option<String>,232233/// Value-labels information (debug metadata).234pub value_labels_ranges: ValueLabelsRanges,235}236237/// A builder for a VCode function body.238///239/// This builder has the ability to accept instructions in either240/// forward or reverse order, depending on the pass direction that241/// produces the VCode. The lowering from CLIF to VCode<MachInst>242/// ordinarily occurs in reverse order (in order to allow instructions243/// to be lowered only if used, and not merged) so a reversal will244/// occur at the end of lowering to ensure the VCode is in machine245/// order.246///247/// If built in reverse, block and instruction indices used once the248/// VCode is built are relative to the final (reversed) order, not the249/// order of construction. Note that this means we do not know the250/// final block or instruction indices when building, so we do not251/// hand them out. (The user is assumed to know them when appending252/// terminator instructions with successor blocks.)253pub struct VCodeBuilder<I: VCodeInst> {254/// In-progress VCode.255pub(crate) vcode: VCode<I>,256257/// In what direction is the build occurring?258direction: VCodeBuildDirection,259260/// Debug-value label in-progress map, keyed by label. For each261/// label, we keep disjoint ranges mapping to vregs. We'll flatten262/// this into (vreg, range, label) tuples when done.263debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,264}265266/// Direction in which a VCodeBuilder builds VCode.267#[derive(Clone, Copy, Debug, PartialEq, Eq)]268pub enum VCodeBuildDirection {269// TODO: add `Forward` once we need it and can test it adequately.270/// Backward-build pass: we expect the producer to call `emit()`271/// with instructions in reverse program order within each block.272Backward,273}274275impl<I: VCodeInst> VCodeBuilder<I> {276/// Create a new VCodeBuilder.277pub fn new(278sigs: SigSet,279abi: Callee<I::ABIMachineSpec>,280emit_info: I::Info,281block_order: BlockLoweringOrder,282constants: VCodeConstants,283direction: VCodeBuildDirection,284log2_min_function_alignment: u8,285) -> Self {286let vcode = VCode::new(287sigs,288abi,289emit_info,290block_order,291constants,292log2_min_function_alignment,293);294295VCodeBuilder {296vcode,297direction,298debug_info: FxHashMap::default(),299}300}301302pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {303self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)304}305306/// Access the ABI object.307pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {308&self.vcode.abi309}310311/// Access the ABI object.312pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {313&mut self.vcode.abi314}315316pub fn sigs(&self) -> &SigSet {317&self.vcode.sigs318}319320pub fn sigs_mut(&mut self) -> &mut SigSet {321&mut self.vcode.sigs322}323324/// Access to the BlockLoweringOrder object.325pub fn block_order(&self) -> &BlockLoweringOrder {326&self.vcode.block_order327}328329/// Set the current block as the entry block.330pub fn set_entry(&mut self, block: BlockIndex) {331self.vcode.entry = block;332}333334/// End the current basic block. Must be called after emitting vcode insts335/// for IR insts and prior to ending the function (building the VCode).336pub fn end_bb(&mut self) {337let end_idx = self.vcode.insts.len();338// Add the instruction index range to the list of blocks.339self.vcode.block_ranges.push_end(end_idx);340// End the successors list.341let succ_end = self.vcode.block_succs.len();342self.vcode.block_succ_range.push_end(succ_end);343// End the blockparams list.344let block_params_end = self.vcode.block_params.len();345self.vcode.block_params_range.push_end(block_params_end);346// End the branch blockparam args list.347let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();348self.vcode349.branch_block_arg_succ_range350.push_end(branch_block_arg_succ_end);351}352353pub fn add_block_param(&mut self, param: VirtualReg) {354self.vcode.block_params.push(param.into());355}356357fn add_branch_args_for_succ(&mut self, args: &[Reg]) {358self.vcode359.branch_block_args360.extend(args.iter().map(|&arg| VReg::from(arg)));361let end = self.vcode.branch_block_args.len();362self.vcode.branch_block_arg_range.push_end(end);363}364365/// Push an instruction for the current BB and current IR inst366/// within the BB.367pub fn push(&mut self, insn: I, loc: RelSourceLoc) {368assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.369self.vcode.insts.push(insn);370self.vcode.srclocs.push(loc);371}372373/// Add a successor block with branch args.374pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {375self.vcode.block_succs.push(block);376self.add_branch_args_for_succ(args);377}378379/// Add a debug value label to a register.380pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {381// 1) In the reversed order, we consider the instructions382// that define ranges in the "debug_info" array to refer383// to the IP **after** them (when reversed):384// IP[2]__| Inst 3 |385// IP[1]__| Inst 2 |386// IP[0]__| Inst 1 |387// | Inst 0 |388// This is so that we can represent IP[<function start>],389// done at the cost of not being to represent IP[<function end>],390// which is OK since no values will be live at that point.391// 2) The live range for "reg" begins at the current IP392// and continues until the next, in execution order,393// VReg that defines "label". Since the ranges are open394// at the end, the subtraction of 1 cancels out:395// [last..current IP] <=>396// [last..last emitted inst index] <=>397// [last..next_inst_index - 1] <=>398// [last..next_inst_index)399//400let next_inst_index = self.vcode.insts.len();401if next_inst_index == 0 {402// This would produce a defective [0..0) range.403return;404}405let next_inst = InsnIndex::new(next_inst_index);406let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);407let last = labels408.last()409.map(|(_start, end, _vreg)| *end)410.unwrap_or(InsnIndex::new(0));411labels.push((last, next_inst, reg.into()));412}413414/// Access the constants.415pub fn constants(&mut self) -> &mut VCodeConstants {416&mut self.vcode.constants417}418419fn compute_preds_from_succs(&mut self) {420// Do a linear-time counting sort: first determine how many421// times each block appears as a successor.422let mut starts = vec![0u32; self.vcode.num_blocks()];423for succ in &self.vcode.block_succs {424starts[succ.index()] += 1;425}426427// Determine for each block the starting index where that428// block's predecessors should go. This is equivalent to the429// ranges we need to store in block_pred_range.430self.vcode.block_pred_range.reserve(starts.len());431let mut end = 0;432for count in starts.iter_mut() {433let start = end;434end += *count;435*count = start;436self.vcode.block_pred_range.push_end(end as usize);437}438let end = end as usize;439debug_assert_eq!(end, self.vcode.block_succs.len());440441// Walk over the successors again, this time grouped by442// predecessor, and push the predecessor at the current443// starting position of each of its successors. We build444// each group of predecessors in whatever order Ranges::iter445// returns them; regalloc2 doesn't care.446self.vcode.block_preds.resize(end, BlockIndex::invalid());447for (pred, range) in self.vcode.block_succ_range.iter() {448let pred = BlockIndex::new(pred);449for succ in &self.vcode.block_succs[range] {450let pos = &mut starts[succ.index()];451self.vcode.block_preds[*pos as usize] = pred;452*pos += 1;453}454}455debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));456}457458/// Called once, when a build in Backward order is complete, to459/// perform the overall reversal (into final forward order) and460/// finalize metadata accordingly.461fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {462let n_insts = self.vcode.insts.len();463if n_insts == 0 {464return;465}466467// Reverse the per-block and per-inst sequences.468self.vcode.block_ranges.reverse_index();469self.vcode.block_ranges.reverse_target(n_insts);470// block_params_range is indexed by block (and blocks were471// traversed in reverse) so we reverse it; but block-param472// sequences in the concatenated vec can remain in reverse473// order (it is effectively an arena of arbitrarily-placed474// referenced sequences).475self.vcode.block_params_range.reverse_index();476// Likewise, we reverse block_succ_range, but the block_succ477// concatenated array can remain as-is.478self.vcode.block_succ_range.reverse_index();479self.vcode.insts.reverse();480self.vcode.srclocs.reverse();481// Likewise, branch_block_arg_succ_range is indexed by block482// so must be reversed.483self.vcode.branch_block_arg_succ_range.reverse_index();484485// To translate an instruction index *endpoint* in reversed486// order to forward order, compute `n_insts - i`.487//488// Why not `n_insts - 1 - i`? That would be correct to489// translate an individual instruction index (for ten insts 0490// to 9 inclusive, inst 0 becomes 9, and inst 9 becomes491// 0). But for the usual inclusive-start, exclusive-end range492// idiom, inclusive starts become exclusive ends and493// vice-versa, so e.g. an (inclusive) start of 0 becomes an494// (exclusive) end of 10.495let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());496497// Generate debug-value labels based on per-label maps.498for (label, tuples) in &self.debug_info {499for &(start, end, vreg) in tuples {500let vreg = vregs.resolve_vreg_alias(vreg);501let fwd_start = translate(end);502let fwd_end = translate(start);503self.vcode504.debug_value_labels505.push((vreg, fwd_start, fwd_end, label.as_u32()));506}507}508509// Now sort debug value labels by VReg, as required510// by regalloc2.511self.vcode512.debug_value_labels513.sort_unstable_by_key(|(vreg, _, _, _)| *vreg);514}515516fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {517let allocatable = PRegSet::from(self.vcode.abi.machine_env());518for (i, insn) in self.vcode.insts.iter_mut().enumerate() {519// Push operands from the instruction onto the operand list.520//521// We rename through the vreg alias table as we collect522// the operands. This is better than a separate post-pass523// over operands, because it has more cache locality:524// operands only need to pass through L1 once. This is525// also better than renaming instructions'526// operands/registers while lowering, because here we only527// need to do the `match` over the instruction to visit528// its register fields (which is slow, branchy code) once.529530let mut op_collector =531OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {532vregs.resolve_vreg_alias(vreg)533});534insn.get_operands(&mut op_collector);535let (ops, clobbers) = op_collector.finish();536self.vcode.operand_ranges.push_end(ops);537538if clobbers != PRegSet::default() {539self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);540}541542if let Some((dst, src)) = insn.is_move() {543// We should never see non-virtual registers present in move544// instructions.545assert!(546src.is_virtual(),547"the real register {src:?} was used as the source of a move instruction"548);549assert!(550dst.to_reg().is_virtual(),551"the real register {:?} was used as the destination of a move instruction",552dst.to_reg()553);554}555}556557// Translate blockparam args via the vreg aliases table as well.558for arg in &mut self.vcode.branch_block_args {559let new_arg = vregs.resolve_vreg_alias(*arg);560trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);561*arg = new_arg;562}563}564565/// Build the final VCode.566pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {567self.vcode.vreg_types = take(&mut vregs.vreg_types);568self.vcode.facts = take(&mut vregs.facts);569570if self.direction == VCodeBuildDirection::Backward {571self.reverse_and_finalize(&vregs);572}573self.collect_operands(&vregs);574575self.compute_preds_from_succs();576self.vcode.debug_value_labels.sort_unstable();577578// At this point, nothing in the vcode should mention any579// VReg which has been aliased. All the appropriate rewriting580// should have happened above. Just to be sure, let's581// double-check each field which has vregs.582// Note: can't easily check vcode.insts, resolved in collect_operands.583// Operands are resolved in collect_operands.584vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));585// Currently block params are never aliased to another vreg.586vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());587// Branch block args are resolved in collect_operands.588vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());589// Debug value labels are resolved in reverse_and_finalize.590vregs.debug_assert_no_vreg_aliases(591self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),592);593// Facts are resolved eagerly during set_vreg_alias.594vregs.debug_assert_no_vreg_aliases(595self.vcode596.facts597.iter()598.zip(&vregs.vreg_types)599.enumerate()600.filter(|(_, (fact, _))| fact.is_some())601.map(|(vreg, (_, &ty))| {602let (regclasses, _) = I::rc_for_type(ty).unwrap();603VReg::new(vreg, regclasses[0])604}),605);606607self.vcode608}609610/// Add a user stack map for the associated instruction.611pub fn add_user_stack_map(612&mut self,613inst: BackwardsInsnIndex,614entries: &[ir::UserStackMapEntry],615) {616let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());617let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);618debug_assert!(old_entry.is_none());619}620621/// Add debug tags for the associated instruction.622pub fn add_debug_tags(&mut self, inst: BackwardsInsnIndex, entries: &[ir::DebugTag]) {623let start = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap();624self.vcode.debug_tag_pool.extend(entries.iter().cloned());625let end = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap();626self.vcode.debug_tags.insert(inst, start..end);627}628}629630const NO_INST_OFFSET: CodeOffset = u32::MAX;631632impl<I: VCodeInst> VCode<I> {633/// New empty VCode.634fn new(635sigs: SigSet,636abi: Callee<I::ABIMachineSpec>,637emit_info: I::Info,638block_order: BlockLoweringOrder,639constants: VCodeConstants,640log2_min_function_alignment: u8,641) -> Self {642let n_blocks = block_order.lowered_order().len();643VCode {644sigs,645vreg_types: vec![],646insts: Vec::with_capacity(10 * n_blocks),647user_stack_maps: FxHashMap::default(),648debug_tags: FxHashMap::default(),649debug_tag_pool: vec![],650operands: Vec::with_capacity(30 * n_blocks),651operand_ranges: Ranges::with_capacity(10 * n_blocks),652clobbers: FxHashMap::default(),653srclocs: Vec::with_capacity(10 * n_blocks),654entry: BlockIndex::new(0),655block_ranges: Ranges::with_capacity(n_blocks),656block_succ_range: Ranges::with_capacity(n_blocks),657block_succs: Vec::with_capacity(n_blocks),658block_pred_range: Ranges::default(),659block_preds: Vec::new(),660block_params_range: Ranges::with_capacity(n_blocks),661block_params: Vec::with_capacity(5 * n_blocks),662branch_block_args: Vec::with_capacity(10 * n_blocks),663branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),664branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),665block_order,666abi,667emit_info,668constants,669debug_value_labels: vec![],670facts: vec![],671log2_min_function_alignment,672}673}674675/// Get the number of blocks. Block indices will be in the range `0 ..676/// (self.num_blocks() - 1)`.677pub fn num_blocks(&self) -> usize {678self.block_ranges.len()679}680681/// The number of lowered instructions.682pub fn num_insts(&self) -> usize {683self.insts.len()684}685686fn compute_clobbers_and_function_calls(687&self,688regalloc: ®alloc2::Output,689) -> (Vec<Writable<RealReg>>, FunctionCalls) {690let mut clobbered = PRegSet::default();691let mut function_calls = FunctionCalls::None;692693// All moves are included in clobbers.694for (_, Edit::Move { to, .. }) in ®alloc.edits {695if let Some(preg) = to.as_reg() {696clobbered.add(preg);697}698}699700for (i, range) in self.operand_ranges.iter() {701let operands = &self.operands[range.clone()];702let allocs = ®alloc.allocs[range];703for (operand, alloc) in operands.iter().zip(allocs.iter()) {704if operand.kind() == OperandKind::Def {705if let Some(preg) = alloc.as_reg() {706clobbered.add(preg);707}708}709}710711function_calls.update(self.insts[i].call_type());712713// Also add explicitly-clobbered registers.714//715// Skip merging this instruction's clobber list if not716// "included in clobbers" as per the MachInst. (Some717// backends use this to implement ABI specifics; e.g.,718// excluding calls of the same ABI as the current function719// from clobbers, because by definition everything720// clobbered by the call can be clobbered by this function721// without saving as well.722//723// This is important for a particular optimization: when724// some registers are "half-clobbered", e.g. vector/float725// registers on aarch64, we want them to be seen as726// clobbered by regalloc so it avoids carrying values727// across calls in these registers but not seen as728// clobbered by prologue generation here (because the729// actual half-clobber implied by the clobber list fits730// within the clobbers that we allow without731// clobber-saves).732if self.insts[i].is_included_in_clobbers() {733if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {734clobbered.union_from(inst_clobbered);735}736}737}738739let clobbered_regs = clobbered740.into_iter()741.map(|preg| Writable::from_reg(RealReg::from(preg)))742.collect();743744(clobbered_regs, function_calls)745}746747/// Emit the instructions to a `MachBuffer`, containing fixed-up748/// code and external reloc/trap/etc. records ready for use. Takes749/// the regalloc results as well.750///751/// Returns the machine code itself, and optionally metadata752/// and/or a disassembly, as an `EmitResult`. The `VCode` itself753/// is consumed by the emission process.754pub fn emit(755mut self,756regalloc: ®alloc2::Output,757want_disasm: bool,758flags: &settings::Flags,759ctrl_plane: &mut ControlPlane,760) -> EmitResult761where762I: VCodeInst,763{764let _tt = timing::vcode_emit();765let mut buffer = MachBuffer::new();766buffer.set_log2_min_function_alignment(self.log2_min_function_alignment);767let mut bb_starts: Vec<Option<CodeOffset>> = vec![];768769// The first M MachLabels are reserved for block indices.770buffer.reserve_labels_for_blocks(self.num_blocks());771772// Register all allocated constants with the `MachBuffer` to ensure that773// any references to the constants during instructions can be handled774// correctly.775buffer.register_constants(&self.constants);776777// Construct the final order we emit code in: cold blocks at the end.778let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];779let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];780for block in 0..self.num_blocks() {781let block = BlockIndex::new(block);782if self.block_order.is_cold(block) {783cold_blocks.push(block);784} else {785final_order.push(block);786}787}788final_order.extend(cold_blocks.clone());789790// Compute/save info we need for the prologue: clobbers and791// number of spillslots.792//793// We clone `abi` here because we will mutate it as we794// generate the prologue and set other info, but we can't795// mutate `VCode`. The info it usually carries prior to796// setting clobbers is fairly minimal so this should be797// relatively cheap.798let (clobbers, function_calls) = self.compute_clobbers_and_function_calls(regalloc);799self.abi.compute_frame_layout(800&self.sigs,801regalloc.num_spillslots,802clobbers,803function_calls,804);805806// Emit blocks.807let mut cur_srcloc = None;808let mut last_offset = None;809let mut inst_offsets = vec![];810let mut state = I::State::new(&self.abi, core::mem::take(ctrl_plane));811812let mut disasm = String::new();813814if !self.debug_value_labels.is_empty() {815inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);816}817818// Count edits per block ahead of time; this is needed for819// lookahead island emission. (We could derive it per-block820// with binary search in the edit list, but it's more821// efficient to do it in one pass here.)822let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];823let mut edit_idx = 0;824for block in 0..self.num_blocks() {825let end_inst = InsnIndex::new(self.block_ranges.get(block).end);826let start_edit_idx = edit_idx;827while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {828edit_idx += 1;829}830let end_edit_idx = edit_idx;831ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);832}833834let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();835let mut bb_padding = match flags.bb_padding_log2_minus_one() {8360 => Vec::new(),837n => vec![0; 1 << (n - 1)],838};839let mut total_bb_padding = 0;840841for (block_order_idx, &block) in final_order.iter().enumerate() {842trace!("emitting block {:?}", block);843844// Call the new block hook for state845state.on_new_block();846847// Emit NOPs to align the block.848let new_offset = I::align_basic_block(buffer.cur_offset());849while new_offset > buffer.cur_offset() {850// Pad with NOPs up to the aligned block offset.851let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);852nop.emit(&mut buffer, &self.emit_info, &mut Default::default());853}854assert_eq!(buffer.cur_offset(), new_offset);855856let do_emit = |inst: &I,857disasm: &mut String,858buffer: &mut MachBuffer<I>,859state: &mut I::State| {860if want_disasm && !inst.is_args() {861let mut s = state.clone();862writeln!(disasm, " {}", inst.pretty_print_inst(&mut s)).unwrap();863}864inst.emit(buffer, &self.emit_info, state);865};866867// Is this the first block? Emit the prologue directly if so.868if block == self.entry {869trace!(" -> entry block");870buffer.start_srcloc(Default::default());871for inst in &self.abi.gen_prologue() {872do_emit(&inst, &mut disasm, &mut buffer, &mut state);873}874buffer.end_srcloc();875}876877// Now emit the regular block body.878879buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());880881if want_disasm {882writeln!(&mut disasm, "block{}:", block.index()).unwrap();883}884885if flags.machine_code_cfg_info() {886// Track BB starts. If we have backed up due to MachBuffer887// branch opts, note that the removed blocks were removed.888let cur_offset = buffer.cur_offset();889if last_offset.is_some() && cur_offset <= last_offset.unwrap() {890for i in (0..bb_starts.len()).rev() {891if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {892break;893}894bb_starts[i] = None;895}896}897bb_starts.push(Some(cur_offset));898last_offset = Some(cur_offset);899}900901if let Some(block_start) = I::gen_block_start(902self.block_order.is_indirect_branch_target(block),903is_forward_edge_cfi_enabled,904) {905do_emit(&block_start, &mut disasm, &mut buffer, &mut state);906}907908for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {909match inst_or_edit {910InstOrEdit::Inst(iix) => {911if !self.debug_value_labels.is_empty() {912// If we need to produce debug info,913// record the offset of each instruction914// so that we can translate value-label915// ranges to machine-code offsets.916917// Cold blocks violate monotonicity918// assumptions elsewhere (that919// instructions in inst-index order are in920// order in machine code), so we omit921// their offsets here. Value-label range922// generation below will skip empty ranges923// and ranges with to-offsets of zero.924if !self.block_order.is_cold(block) {925inst_offsets[iix.index()] = buffer.cur_offset();926}927}928929// Update the srcloc at this point in the buffer.930let srcloc = self.srclocs[iix.index()];931if cur_srcloc != Some(srcloc) {932if cur_srcloc.is_some() {933buffer.end_srcloc();934}935buffer.start_srcloc(srcloc);936cur_srcloc = Some(srcloc);937}938939// If this is a safepoint, compute a stack map940// and pass it to the emit state.941let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {942let (user_stack_map, user_stack_map_disasm) = {943// The `user_stack_maps` is keyed by reverse944// instruction index, so we must flip the945// index. We can't put this into a helper method946// due to borrowck issues because parts of947// `self` are borrowed mutably elsewhere in this948// function.949let index = iix.to_backwards_insn_index(self.num_insts());950let user_stack_map = self.user_stack_maps.remove(&index);951let user_stack_map_disasm = if want_disasm {952user_stack_map.as_ref().map(|m| format!(" ; {m:?}"))953} else {954None955};956(user_stack_map, user_stack_map_disasm)957};958959state.pre_safepoint(user_stack_map);960961user_stack_map_disasm962} else {963None964};965966// Place debug tags in the emission buffer967// either at the offset prior to the968// instruction or after the instruction,969// depending on whether this is a call. See970// the documentation on [`MachDebugTagPos`]971// for details on why.972let mut debug_tag_disasm = None;973let mut place_debug_tags =974|this: &VCode<I>, pos: MachDebugTagPos, buffer: &mut MachBuffer<I>| {975// As above, translate the forward instruction976// index to a backward index for the lookup.977let debug_tag_range = {978let index = iix.to_backwards_insn_index(this.num_insts());979this.debug_tags.get(&index)980};981if let Some(range) = debug_tag_range {982let start = usize::try_from(range.start).unwrap();983let end = usize::try_from(range.end).unwrap();984let tags = &this.debug_tag_pool[start..end];985986if want_disasm {987debug_tag_disasm =988Some(format!(" ; ^-- debug @ {pos:?}: {tags:?}"));989}990buffer.push_debug_tags(pos, tags);991}992};993let debug_tag_pos =994if self.insts[iix.index()].call_type() == CallType::Regular {995MachDebugTagPos::Post996} else {997MachDebugTagPos::Pre998};9991000if debug_tag_pos == MachDebugTagPos::Pre {1001place_debug_tags(&self, debug_tag_pos, &mut buffer);1002}10031004// If the instruction we are about to emit is1005// a return, place an epilogue at this point1006// (and don't emit the return; the actual1007// epilogue will contain it).1008if self.insts[iix.index()].is_term() == MachTerminator::Ret {1009log::trace!("emitting epilogue");1010for inst in self.abi.gen_epilogue() {1011do_emit(&inst, &mut disasm, &mut buffer, &mut state);1012}1013} else {1014// Update the operands for this inst using the1015// allocations from the regalloc result.1016let mut allocs = regalloc.inst_allocs(iix).iter();1017self.insts[iix.index()].get_operands(1018&mut |reg: &mut Reg, constraint, _kind, _pos| {1019let alloc =1020allocs.next().expect("enough allocations for all operands");10211022if let Some(alloc) = alloc.as_reg() {1023let alloc: Reg = alloc.into();1024if let OperandConstraint::FixedReg(rreg) = constraint {1025debug_assert_eq!(Reg::from(rreg), alloc);1026}1027*reg = alloc;1028} else if let Some(alloc) = alloc.as_stack() {1029let alloc: Reg = alloc.into();1030*reg = alloc;1031}1032},1033);1034debug_assert!(allocs.next().is_none());10351036log::trace!("emitting: {:?}", self.insts[iix.index()]);10371038// Emit the instruction!1039do_emit(1040&self.insts[iix.index()],1041&mut disasm,1042&mut buffer,1043&mut state,1044);10451046if debug_tag_pos == MachDebugTagPos::Post {1047place_debug_tags(&self, debug_tag_pos, &mut buffer);1048}10491050if let Some(stack_map_disasm) = stack_map_disasm {1051disasm.push_str(&stack_map_disasm);1052disasm.push('\n');1053}1054if let Some(debug_tag_disasm) = debug_tag_disasm {1055disasm.push_str(&debug_tag_disasm);1056disasm.push('\n');1057}1058}1059}10601061InstOrEdit::Edit(Edit::Move { from, to }) => {1062// Create a move/spill/reload instruction and1063// immediately emit it.1064match (from.as_reg(), to.as_reg()) {1065(Some(from), Some(to)) => {1066// Reg-to-reg move.1067let from_rreg = Reg::from(from);1068let to_rreg = Writable::from_reg(Reg::from(to));1069debug_assert_eq!(from.class(), to.class());1070let ty = I::canonical_type_for_rc(from.class());1071let mv = I::gen_move(to_rreg, from_rreg, ty);1072do_emit(&mv, &mut disasm, &mut buffer, &mut state);1073}1074(Some(from), None) => {1075// Spill from register to spillslot.1076let to = to.as_stack().unwrap();1077let from_rreg = RealReg::from(from);1078let spill = self.abi.gen_spill(to, from_rreg);1079do_emit(&spill, &mut disasm, &mut buffer, &mut state);1080}1081(None, Some(to)) => {1082// Load from spillslot to register.1083let from = from.as_stack().unwrap();1084let to_rreg = Writable::from_reg(RealReg::from(to));1085let reload = self.abi.gen_reload(to_rreg, from);1086do_emit(&reload, &mut disasm, &mut buffer, &mut state);1087}1088(None, None) => {1089panic!("regalloc2 should have eliminated stack-to-stack moves!");1090}1091}1092}1093}1094}10951096if cur_srcloc.is_some() {1097buffer.end_srcloc();1098cur_srcloc = None;1099}11001101// Do we need an island? Get the worst-case size of the next BB, add1102// it to the optional padding behind the block, and pass this to the1103// `MachBuffer` to determine if an island is necessary.1104let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {1105let next_block = final_order[block_order_idx + 1];1106let next_block_range = self.block_ranges.get(next_block.index());1107let next_block_size = next_block_range.len() as u32;1108let next_block_ra_insertions = ra_edits_per_block[next_block.index()];1109I::worst_case_size() * (next_block_size + next_block_ra_insertions)1110} else {111101112};1113let padding = if bb_padding.is_empty() {111401115} else {1116bb_padding.len() as u32 + I::LabelUse::ALIGN - 11117};1118if buffer.island_needed(padding + worst_case_next_bb) {1119buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);1120}11211122// Insert padding, if configured, to stress the `MachBuffer`'s1123// relocation and island calculations.1124//1125// Padding can get quite large during fuzzing though so place a1126// total cap on it where when a per-function threshold is exceeded1127// the padding is turned back down to zero. This avoids a small-ish1128// test case generating a GB+ memory footprint in Cranelift for1129// example.1130if !bb_padding.is_empty() {1131buffer.put_data(&bb_padding);1132buffer.align_to(I::LabelUse::ALIGN);1133total_bb_padding += bb_padding.len();1134if total_bb_padding > (150 << 20) {1135bb_padding = Vec::new();1136}1137}1138}11391140debug_assert!(1141self.user_stack_maps.is_empty(),1142"any stack maps should have been consumed by instruction emission, still have: {:#?}",1143self.user_stack_maps,1144);11451146// Do any optimizations on branches at tail of buffer, as if we had1147// bound one last label.1148buffer.optimize_branches(ctrl_plane);11491150// emission state is not needed anymore, move control plane back out1151*ctrl_plane = state.take_ctrl_plane();11521153let func_body_len = buffer.cur_offset();11541155// Create `bb_edges` and final (filtered) `bb_starts`.1156let mut bb_edges = vec![];1157let mut bb_offsets = vec![];1158if flags.machine_code_cfg_info() {1159for block in 0..self.num_blocks() {1160if bb_starts[block].is_none() {1161// Block was deleted by MachBuffer; skip.1162continue;1163}1164let from = bb_starts[block].unwrap();11651166bb_offsets.push(from);1167// Resolve each `succ` label and add edges.1168let succs = self.block_succs(BlockIndex::new(block));1169for &succ in succs.iter() {1170let to = buffer.resolve_label_offset(MachLabel::from_block(succ));1171bb_edges.push((from, to));1172}1173}1174}11751176self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);1177let value_labels_ranges =1178self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);11791180// Store metadata about frame layout in the MachBuffer.1181buffer.set_frame_layout(self.abi.frame_slot_metadata());11821183EmitResult {1184buffer: buffer.finish(&self.constants, ctrl_plane),1185bb_offsets,1186bb_edges,1187disasm: if want_disasm { Some(disasm) } else { None },1188value_labels_ranges,1189}1190}11911192fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {1193if self.debug_value_labels.is_empty() {1194return;1195}11961197// During emission, branch removal can make offsets of instructions incorrect.1198// Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]1199// It will be recorded as (say): [30] [34] [38] [42] [<would be 46>]1200// When the jumps get removed we are left with (in "inst_offsets"):1201// [insi][jmp0][jmp1][jmp2][insj][...]1202// [30] [34] [38] [42] [34]1203// Which violates the monotonicity invariant. This method sets offsets of these1204// removed instructions such as to make them appear zero-sized:1205// [insi][jmp0][jmp1][jmp2][insj][...]1206// [30] [34] [34] [34] [34]1207//1208let mut next_offset = func_body_len;1209for inst_index in (0..(inst_offsets.len() - 1)).rev() {1210let inst_offset = inst_offsets[inst_index];12111212// Not all instructions get their offsets recorded.1213if inst_offset == NO_INST_OFFSET {1214continue;1215}12161217if inst_offset > next_offset {1218trace!(1219"Fixing code offset of the removed Inst {}: {} -> {}",1220inst_index, inst_offset, next_offset1221);1222inst_offsets[inst_index] = next_offset;1223continue;1224}12251226next_offset = inst_offset;1227}1228}12291230fn compute_value_labels_ranges(1231&self,1232regalloc: ®alloc2::Output,1233inst_offsets: &[CodeOffset],1234func_body_len: u32,1235) -> ValueLabelsRanges {1236if self.debug_value_labels.is_empty() {1237return ValueLabelsRanges::default();1238}12391240if trace_log_enabled!() {1241self.log_value_labels_ranges(regalloc, inst_offsets);1242}12431244let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();1245for &(label, from, to, alloc) in ®alloc.debug_locations {1246let label = ValueLabel::from_u32(label);1247let ranges = value_labels_ranges.entry(label).or_insert_with(|| vec![]);1248let prog_point_to_inst = |prog_point: ProgPoint| {1249let mut inst = prog_point.inst();1250if prog_point.pos() == InstPosition::After {1251inst = inst.next();1252}1253inst.index()1254};1255let inst_to_offset = |inst_index: usize| {1256// Skip over cold blocks.1257for offset in &inst_offsets[inst_index..] {1258if *offset != NO_INST_OFFSET {1259return *offset;1260}1261}1262func_body_len1263};1264let from_inst_index = prog_point_to_inst(from);1265let to_inst_index = prog_point_to_inst(to);1266let from_offset = inst_to_offset(from_inst_index);1267let to_offset = inst_to_offset(to_inst_index);12681269// Empty ranges or unavailable offsets can happen1270// due to cold blocks and branch removal (see above).1271if from_offset == to_offset {1272continue;1273}12741275let loc = if let Some(preg) = alloc.as_reg() {1276LabelValueLoc::Reg(Reg::from(preg))1277} else {1278#[cfg(not(feature = "unwind"))]1279continue;12801281#[cfg(feature = "unwind")]1282{1283let slot = alloc.as_stack().unwrap();1284let slot_offset = self.abi.get_spillslot_offset(slot);1285let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();1286let caller_sp_to_cfa_offset =1287crate::isa::unwind::systemv::caller_sp_to_cfa_offset();1288// NOTE: this is a negative offset because it's relative to the caller's SP1289let cfa_to_sp_offset =1290-((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);1291LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)1292}1293};12941295// Coalesce adjacent ranges that for the same location1296// to minimize output size here and for the consumers.1297if let Some(last_loc_range) = ranges.last_mut() {1298if last_loc_range.loc == loc && last_loc_range.end == from_offset {1299trace!(1300"Extending debug range for {:?} in {:?} to Inst {} ({})",1301label, loc, to_inst_index, to_offset1302);1303last_loc_range.end = to_offset;1304continue;1305}1306}13071308trace!(1309"Recording debug range for {:?} in {:?}: [Inst {}..Inst {}) [{}..{})",1310label, loc, from_inst_index, to_inst_index, from_offset, to_offset1311);13121313ranges.push(ValueLocRange {1314loc,1315start: from_offset,1316end: to_offset,1317});1318}13191320value_labels_ranges1321}13221323fn log_value_labels_ranges(&self, regalloc: ®alloc2::Output, inst_offsets: &[CodeOffset]) {1324debug_assert!(trace_log_enabled!());13251326// What debug labels do we have? Note we'll skip those that have not been1327// allocated any location at all. They will show up as numeric gaps in the table.1328let mut labels = vec![];1329for &(label, _, _, _) in ®alloc.debug_locations {1330if Some(&label) == labels.last() {1331continue;1332}1333labels.push(label);1334}13351336// Reformat the data on what VRegs were the VLs assigned to by lowering, since1337// the array we have is sorted by VReg, and we want it sorted by VL for easy1338// access in the loop below.1339let mut vregs = vec![];1340for &(vreg, start, end, label) in &self.debug_value_labels {1341if matches!(labels.binary_search(&label), Ok(_)) {1342vregs.push((label, start, end, vreg));1343}1344}1345vregs.sort_unstable_by(1346|(l_label, l_start, _, _), (r_label, r_start, _, _)| match l_label.cmp(r_label) {1347Ordering::Equal => l_start.cmp(r_start),1348cmp => cmp,1349},1350);13511352#[derive(PartialEq)]1353enum Mode {1354Measure,1355Emit,1356}1357#[derive(PartialEq)]1358enum Row {1359Head,1360Line,1361Inst(usize, usize),1362}13631364let mut widths = vec![0; 3 + 2 * labels.len()];1365let mut row = String::new();1366let mut output_row = |row_kind: Row, mode: Mode| {1367let mut column_index = 0;1368row.clear();13691370macro_rules! output_cell_impl {1371($fill:literal, $span:literal, $($cell_fmt:tt)*) => {1372let column_start = row.len();1373{1374row.push('|');1375write!(row, $($cell_fmt)*).unwrap();1376}13771378let next_column_index = column_index + $span;1379let expected_width: usize = widths[column_index..next_column_index].iter().sum();1380if mode == Mode::Measure {1381let actual_width = row.len() - column_start;1382if actual_width > expected_width {1383widths[next_column_index - 1] += actual_width - expected_width;1384}1385} else {1386let column_end = column_start + expected_width;1387while row.len() != column_end {1388row.push($fill);1389}1390}1391column_index = next_column_index;1392};1393}1394macro_rules! output_cell {1395($($cell_fmt:tt)*) => {1396output_cell_impl!(' ', 1, $($cell_fmt)*);1397};1398}13991400match row_kind {1401Row::Head => {1402output_cell!("BB");1403output_cell!("Inst");1404output_cell!("IP");1405for label in &labels {1406output_cell_impl!(' ', 2, "{:?}", ValueLabel::from_u32(*label));1407}1408}1409Row::Line => {1410debug_assert!(mode == Mode::Emit);1411for _ in 0..3 {1412output_cell_impl!('-', 1, "");1413}1414for _ in &labels {1415output_cell_impl!('-', 2, "");1416}1417}1418Row::Inst(block_index, inst_index) => {1419debug_assert!(inst_index < self.num_insts());1420if self.block_ranges.get(block_index).start == inst_index {1421output_cell!("B{}", block_index);1422} else {1423output_cell!("");1424}1425output_cell!("Inst {inst_index} ");1426output_cell!("{} ", inst_offsets[inst_index]);14271428for label in &labels {1429// First, the VReg.1430use regalloc2::Inst;1431let vreg_cmp = |inst: usize,1432vreg_label: &u32,1433range_start: &Inst,1434range_end: &Inst| {1435match vreg_label.cmp(&label) {1436Ordering::Equal => {1437if range_end.index() <= inst {1438Ordering::Less1439} else if range_start.index() > inst {1440Ordering::Greater1441} else {1442Ordering::Equal1443}1444}1445cmp => cmp,1446}1447};1448let vreg_index =1449vregs.binary_search_by(|(l, s, e, _)| vreg_cmp(inst_index, l, s, e));1450if let Ok(vreg_index) = vreg_index {1451let mut prev_vreg = None;1452if inst_index > 0 {1453let prev_vreg_index = vregs.binary_search_by(|(l, s, e, _)| {1454vreg_cmp(inst_index - 1, l, s, e)1455});1456if let Ok(prev_vreg_index) = prev_vreg_index {1457prev_vreg = Some(vregs[prev_vreg_index].3);1458}1459}14601461let vreg = vregs[vreg_index].3;1462if Some(vreg) == prev_vreg {1463output_cell!("*");1464} else {1465output_cell!("{}", vreg);1466}1467} else {1468output_cell!("");1469}14701471// Second, the allocated location.1472let inst_prog_point = ProgPoint::before(Inst::new(inst_index));1473let range_index = regalloc.debug_locations.binary_search_by(1474|(range_label, range_start, range_end, _)| match range_label.cmp(label)1475{1476Ordering::Equal => {1477if *range_end <= inst_prog_point {1478Ordering::Less1479} else if *range_start > inst_prog_point {1480Ordering::Greater1481} else {1482Ordering::Equal1483}1484}1485cmp => cmp,1486},1487);1488if let Ok(range_index) = range_index {1489// Live at this instruction, print the location.1490if let Some(reg) = regalloc.debug_locations[range_index].3.as_reg() {1491output_cell!("{:?}", Reg::from(reg));1492} else {1493output_cell!("Stk");1494}1495} else {1496// Not live at this instruction.1497output_cell!("");1498}1499}1500}1501}1502row.push('|');15031504if mode == Mode::Emit {1505trace!("{}", row.as_str());1506}1507};15081509for block_index in 0..self.num_blocks() {1510for inst_index in self.block_ranges.get(block_index) {1511output_row(Row::Inst(block_index, inst_index), Mode::Measure);1512}1513}1514output_row(Row::Head, Mode::Measure);15151516output_row(Row::Head, Mode::Emit);1517output_row(Row::Line, Mode::Emit);1518for block_index in 0..self.num_blocks() {1519for inst_index in self.block_ranges.get(block_index) {1520output_row(Row::Inst(block_index, inst_index), Mode::Emit);1521}1522}1523}15241525/// Get the IR block for a BlockIndex, if one exists.1526pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {1527self.block_order.lowered_order()[block.index()].orig_block()1528}15291530/// Get the type of a VReg.1531pub fn vreg_type(&self, vreg: VReg) -> Type {1532self.vreg_types[vreg.vreg()]1533}15341535/// Get the fact, if any, for a given VReg.1536pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {1537self.facts[vreg.vreg()].as_ref()1538}15391540/// Set the fact for a given VReg.1541pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {1542trace!("set fact on {}: {:?}", vreg, fact);1543self.facts[vreg.vreg()] = Some(fact);1544}15451546/// Does a given instruction define any facts?1547pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {1548self.inst_operands(inst)1549.iter()1550.filter(|o| o.kind() == OperandKind::Def)1551.map(|o| o.vreg())1552.any(|vreg| self.facts[vreg.vreg()].is_some())1553}15541555/// Get the user stack map associated with the given forward instruction index.1556pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {1557let index = inst.to_backwards_insn_index(self.num_insts());1558self.user_stack_maps.get(&index)1559}1560}15611562impl<I: VCodeInst> core::ops::Index<InsnIndex> for VCode<I> {1563type Output = I;1564fn index(&self, idx: InsnIndex) -> &Self::Output {1565&self.insts[idx.index()]1566}1567}15681569impl<I: VCodeInst> RegallocFunction for VCode<I> {1570fn num_insts(&self) -> usize {1571self.insts.len()1572}15731574fn num_blocks(&self) -> usize {1575self.block_ranges.len()1576}15771578fn entry_block(&self) -> BlockIndex {1579self.entry1580}15811582fn block_insns(&self, block: BlockIndex) -> InstRange {1583let range = self.block_ranges.get(block.index());1584InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))1585}15861587fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {1588let range = self.block_succ_range.get(block.index());1589&self.block_succs[range]1590}15911592fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {1593let range = self.block_pred_range.get(block.index());1594&self.block_preds[range]1595}15961597fn block_params(&self, block: BlockIndex) -> &[VReg] {1598// As a special case we don't return block params for the entry block, as all the arguments1599// will be defined by the `Inst::Args` instruction.1600if block == self.entry {1601return &[];1602}16031604let range = self.block_params_range.get(block.index());1605&self.block_params[range]1606}16071608fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {1609let succ_range = self.branch_block_arg_succ_range.get(block.index());1610debug_assert!(succ_idx < succ_range.len());1611let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);1612&self.branch_block_args[branch_block_args]1613}16141615fn is_ret(&self, insn: InsnIndex) -> bool {1616match self.insts[insn.index()].is_term() {1617// We treat blocks terminated by an unconditional trap like a return for regalloc.1618MachTerminator::None => self.insts[insn.index()].is_trap(),1619MachTerminator::Ret | MachTerminator::RetCall => true,1620MachTerminator::Branch => false,1621}1622}16231624fn is_branch(&self, insn: InsnIndex) -> bool {1625match self.insts[insn.index()].is_term() {1626MachTerminator::Branch => true,1627_ => false,1628}1629}16301631fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {1632let range = self.operand_ranges.get(insn.index());1633&self.operands[range]1634}16351636fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {1637self.clobbers.get(&insn).cloned().unwrap_or_default()1638}16391640fn num_vregs(&self) -> usize {1641self.vreg_types.len()1642}16431644fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {1645&self.debug_value_labels1646}16471648fn spillslot_size(&self, regclass: RegClass) -> usize {1649self.abi.get_spillslot_size(regclass) as usize1650}16511652fn allow_multiple_vreg_defs(&self) -> bool {1653// At least the s390x backend requires this, because the1654// `Loop` pseudo-instruction aggregates all Operands so pinned1655// vregs (RealRegs) may occur more than once.1656true1657}1658}16591660impl<I: VCodeInst> Debug for VRegAllocator<I> {1661fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {1662writeln!(f, "VRegAllocator {{")?;16631664let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();1665alias_keys.sort_unstable();1666for key in alias_keys {1667let dest = self.vreg_aliases.get(&key).unwrap();1668writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;1669}16701671for (vreg, fact) in self.facts.iter().enumerate() {1672if let Some(fact) = fact {1673writeln!(f, " v{vreg} ! {fact}")?;1674}1675}16761677writeln!(f, "}}")1678}1679}16801681impl<I: VCodeInst> fmt::Debug for VCode<I> {1682fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {1683writeln!(f, "VCode {{")?;1684writeln!(f, " Entry block: {}", self.entry.index())?;16851686let mut state = Default::default();16871688for block in 0..self.num_blocks() {1689let block = BlockIndex::new(block);1690writeln!(1691f,1692"Block {}({:?}):",1693block.index(),1694self.block_params(block)1695)?;1696if let Some(bb) = self.bindex_to_bb(block) {1697writeln!(f, " (original IR block: {bb})")?;1698}1699for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {1700writeln!(1701f,1702" (successor: Block {}({:?}))",1703succ.index(),1704self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)1705)?;1706}1707for inst in self.block_ranges.get(block.index()) {1708writeln!(1709f,1710" Inst {}: {}",1711inst,1712self.insts[inst].pretty_print_inst(&mut state)1713)?;1714if !self.operands.is_empty() {1715for operand in self.inst_operands(InsnIndex::new(inst)) {1716if operand.kind() == OperandKind::Def {1717if let Some(fact) = &self.facts[operand.vreg().vreg()] {1718writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?;1719}1720}1721}1722}1723if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {1724writeln!(f, " {user_stack_map:?}")?;1725}1726}1727}17281729writeln!(f, "}}")?;1730Ok(())1731}1732}17331734/// This structure manages VReg allocation during the lifetime of the VCodeBuilder.1735pub struct VRegAllocator<I> {1736/// VReg IR-level types.1737vreg_types: Vec<Type>,17381739/// VReg aliases. When the final VCode is built we rewrite all1740/// uses of the keys in this table to their replacement values.1741///1742/// We use these aliases to rename an instruction's expected1743/// result vregs to the returned vregs from lowering, which are1744/// usually freshly-allocated temps.1745vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,17461747/// A deferred error, to be bubbled up to the top level of the1748/// lowering algorithm. We take this approach because we cannot1749/// currently propagate a `Result` upward through ISLE code (the1750/// lowering rules) or some ABI code.1751deferred_error: Option<CodegenError>,17521753/// Facts on VRegs, for proof-carrying code.1754facts: Vec<Option<Fact>>,17551756/// The type of instruction that this allocator makes registers for.1757_inst: core::marker::PhantomData<I>,1758}17591760impl<I: VCodeInst> VRegAllocator<I> {1761/// Make a new VRegAllocator.1762pub fn with_capacity(capacity: usize) -> Self {1763let capacity = first_user_vreg_index() + capacity;1764let mut vreg_types = Vec::with_capacity(capacity);1765vreg_types.resize(first_user_vreg_index(), types::INVALID);1766Self {1767vreg_types,1768vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),1769deferred_error: None,1770facts: Vec::with_capacity(capacity),1771_inst: core::marker::PhantomData::default(),1772}1773}17741775/// Allocate a fresh ValueRegs.1776pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {1777if self.deferred_error.is_some() {1778return Err(CodegenError::CodeTooLarge);1779}1780let v = self.vreg_types.len();1781let (regclasses, tys) = I::rc_for_type(ty)?;1782if v + regclasses.len() >= VReg::MAX {1783return Err(CodegenError::CodeTooLarge);1784}17851786let regs: ValueRegs<Reg> = match regclasses {1787&[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),1788&[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),1789// We can extend this if/when we support 32-bit targets; e.g.,1790// an i128 on a 32-bit machine will need up to four machine regs1791// for a `Value`.1792_ => panic!("Value must reside in 1 or 2 registers"),1793};1794for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) {1795let vreg = reg.to_virtual_reg().unwrap();1796debug_assert_eq!(self.vreg_types.len(), vreg.index());1797self.vreg_types.push(reg_ty);1798}17991800// Create empty facts for each allocated vreg.1801self.facts.resize(self.vreg_types.len(), None);18021803Ok(regs)1804}18051806/// Allocate a fresh ValueRegs, deferring any out-of-vregs1807/// errors. This is useful in places where we cannot bubble a1808/// `CodegenResult` upward easily, and which are known to be1809/// invoked from within the lowering loop that checks the deferred1810/// error status below.1811pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {1812match self.alloc(ty) {1813Ok(x) => x,1814Err(e) => {1815self.deferred_error = Some(e);1816self.bogus_for_deferred_error(ty)1817}1818}1819}18201821/// Take any deferred error that was accumulated by `alloc_with_deferred_error`.1822pub fn take_deferred_error(&mut self) -> Option<CodegenError> {1823self.deferred_error.take()1824}18251826/// Produce an bogus VReg placeholder with the proper number of1827/// registers for the given type. This is meant to be used with1828/// deferred allocation errors (see `Lower::alloc_tmp()`).1829fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {1830let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");1831match regclasses {1832&[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),1833&[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),1834_ => panic!("Value must reside in 1 or 2 registers"),1835}1836}18371838/// Rewrite any mention of `from` into `to`.1839pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {1840let from = from.into();1841let resolved_to = self.resolve_vreg_alias(to.into());1842// Disallow cycles (see below).1843assert_ne!(resolved_to, from);18441845// Maintain the invariant that PCC facts only exist on vregs1846// which aren't aliases. We want to preserve whatever was1847// stated about the vreg before its producer was lowered.1848if let Some(fact) = self.facts[from.vreg()].take() {1849self.set_fact(resolved_to, fact);1850}18511852let old_alias = self.vreg_aliases.insert(from, resolved_to);1853debug_assert_eq!(old_alias, None);1854}18551856fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {1857// We prevent cycles from existing by resolving targets of1858// aliases eagerly before setting them. If the target resolves1859// to the origin of the alias, then a cycle would be created1860// and the alias is disallowed. Because of the structure of1861// SSA code (one instruction can refer to another's defs but1862// not vice-versa, except indirectly through1863// phis/blockparams), cycles should not occur as we use1864// aliases to redirect vregs to the temps that actually define1865// them.1866while let Some(to) = self.vreg_aliases.get(&vreg) {1867vreg = *to;1868}1869vreg1870}18711872#[inline]1873fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {1874debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));1875}18761877/// Set the proof-carrying code fact on a given virtual register.1878///1879/// Returns the old fact, if any (only one fact can be stored).1880fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {1881trace!("vreg {:?} has fact: {:?}", vreg, fact);1882debug_assert!(!self.vreg_aliases.contains_key(&vreg));1883self.facts[vreg.vreg()].replace(fact)1884}18851886/// Set a fact only if one doesn't already exist.1887pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {1888let vreg = self.resolve_vreg_alias(vreg.into());1889if self.facts[vreg.vreg()].is_none() {1890self.set_fact(vreg, fact);1891}1892}18931894/// Allocate a fresh ValueRegs, with a given fact to apply if1895/// the value fits in one VReg.1896pub fn alloc_with_maybe_fact(1897&mut self,1898ty: Type,1899fact: Option<Fact>,1900) -> CodegenResult<ValueRegs<Reg>> {1901let result = self.alloc(ty)?;19021903// Ensure that we don't lose a fact on a value that splits1904// into multiple VRegs.1905assert!(result.len() == 1 || fact.is_none());1906if let Some(fact) = fact {1907self.set_fact(result.regs()[0].into(), fact);1908}19091910Ok(result)1911}1912}19131914/// This structure tracks the large constants used in VCode that will be emitted separately by the1915/// [MachBuffer].1916///1917/// First, during the lowering phase, constants are inserted using1918/// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are1919/// used in this phase. Some deduplication is performed, when possible, as constant1920/// values are inserted.1921///1922/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the1923/// constants so that instructions can refer to the value's memory location. The [MachBuffer]1924/// then writes the constant values to the buffer.1925#[derive(Default)]1926pub struct VCodeConstants {1927constants: PrimaryMap<VCodeConstant, VCodeConstantData>,1928pool_uses: HashMap<Constant, VCodeConstant>,1929well_known_uses: HashMap<*const [u8], VCodeConstant>,1930u64s: HashMap<[u8; 8], VCodeConstant>,1931}1932impl VCodeConstants {1933/// Initialize the structure with the expected number of constants.1934pub fn with_capacity(expected_num_constants: usize) -> Self {1935Self {1936constants: PrimaryMap::with_capacity(expected_num_constants),1937pool_uses: HashMap::with_capacity(expected_num_constants),1938well_known_uses: HashMap::new(),1939u64s: HashMap::new(),1940}1941}19421943/// Insert a constant; using this method indicates that a constant value will be used and thus1944/// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants1945/// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not1946/// [VCodeConstantData::Generated].1947pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {1948match data {1949VCodeConstantData::Generated(_) => self.constants.push(data),1950VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {1951None => {1952let vcode_constant = self.constants.push(data);1953self.pool_uses.insert(constant, vcode_constant);1954vcode_constant1955}1956Some(&vcode_constant) => vcode_constant,1957},1958VCodeConstantData::WellKnown(data_ref) => {1959match self.well_known_uses.entry(data_ref as *const [u8]) {1960Entry::Vacant(v) => {1961let vcode_constant = self.constants.push(data);1962v.insert(vcode_constant);1963vcode_constant1964}1965Entry::Occupied(o) => *o.get(),1966}1967}1968VCodeConstantData::U64(value) => match self.u64s.entry(value) {1969Entry::Vacant(v) => {1970let vcode_constant = self.constants.push(data);1971v.insert(vcode_constant);1972vcode_constant1973}1974Entry::Occupied(o) => *o.get(),1975},1976}1977}19781979/// Return the number of constants inserted.1980pub fn len(&self) -> usize {1981self.constants.len()1982}19831984/// Iterate over the `VCodeConstant` keys inserted in this structure.1985pub fn keys(&self) -> Keys<VCodeConstant> {1986self.constants.keys()1987}19881989/// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this1990/// structure.1991pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {1992self.constants.iter()1993}19941995/// Returns the data associated with the specified constant.1996pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {1997&self.constants[c]1998}19992000/// Checks if the given [VCodeConstantData] is registered as2001/// used by the pool.2002pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {2003match constant {2004VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),2005_ => false,2006}2007}2008}20092010/// A use of a constant by one or more VCode instructions; see [VCodeConstants].2011#[derive(Clone, Copy, Debug, PartialEq, Eq)]2012pub struct VCodeConstant(u32);2013entity_impl!(VCodeConstant);20142015/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking2016/// these separately instead of as raw byte buffers allows us to avoid some duplication.2017pub enum VCodeConstantData {2018/// A constant already present in the Cranelift IR2019/// [ConstantPool](crate::ir::constant::ConstantPool).2020Pool(Constant, ConstantData),2021/// A reference to a well-known constant value that is statically encoded within the compiler.2022WellKnown(&'static [u8]),2023/// A constant value generated during lowering; the value may depend on the instruction context2024/// which makes it difficult to de-duplicate--if possible, use other variants.2025Generated(ConstantData),2026/// A constant of at most 64 bits. These are deduplicated as2027/// well. Stored as a fixed-size array of `u8` so that we do not2028/// encounter endianness problems when cross-compiling.2029U64([u8; 8]),2030}2031impl VCodeConstantData {2032/// Retrieve the constant data as a byte slice.2033pub fn as_slice(&self) -> &[u8] {2034match self {2035VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),2036VCodeConstantData::WellKnown(d) => d,2037VCodeConstantData::U64(value) => &value[..],2038}2039}20402041/// Calculate the alignment of the constant data.2042pub fn alignment(&self) -> u32 {2043if self.as_slice().len() <= 8 { 8 } else { 16 }2044}2045}20462047#[cfg(test)]2048mod test {2049use super::*;2050use core::mem::size_of;20512052#[test]2053fn size_of_constant_structs() {2054assert_eq!(size_of::<Constant>(), 4);2055assert_eq!(size_of::<VCodeConstant>(), 4);2056assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());2057assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());2058assert_eq!(2059size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),20603 * size_of::<usize>()2061);2062// TODO The VCodeConstants structure's memory size could be further optimized.2063// With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at2064// least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.2065}2066}206720682069