Path: blob/main/cranelift/codegen/src/machinst/mod.rs
3050 views
//! This module exposes the machine-specific backend definition pieces.1//!2//! The MachInst infrastructure is the compiler backend, from CLIF3//! (ir::Function) to machine code. The purpose of this infrastructure is, at a4//! high level, to do instruction selection/lowering (to machine instructions),5//! register allocation, and then perform all the fixups to branches, constant6//! data references, etc., needed to actually generate machine code.7//!8//! The container for machine instructions, at various stages of construction,9//! is the `VCode` struct. We refer to a sequence of machine instructions organized10//! into basic blocks as "vcode". This is short for "virtual-register code".11//!12//! The compilation pipeline, from an `ir::Function` (already optimized as much as13//! you like by machine-independent optimization passes) onward, is as follows.14//!15//! ```plain16//!17//! ir::Function (SSA IR, machine-independent opcodes)18//! |19//! | [lower]20//! |21//! VCode<arch_backend::Inst> (machine instructions:22//! | - mostly virtual registers.23//! | - cond branches in two-target form.24//! | - branch targets are block indices.25//! | - in-memory constants held by insns,26//! | with unknown offsets.27//! | - critical edges (actually all edges)28//! | are split.)29//! |30//! | [regalloc --> `regalloc2::Output`; VCode is unchanged]31//! |32//! | [binary emission via MachBuffer]33//! |34//! Vec<u8> (machine code:35//! | - two-dest branches resolved via36//! | streaming branch resolution/simplification.37//! | - regalloc `Allocation` results used directly38//! | by instruction emission code.39//! | - prologue and epilogue(s) built and emitted40//! | directly during emission.41//! | - SP-relative offsets resolved by tracking42//! | EmitState.)43//!44//! ```4546use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc};47use crate::ir::{48self, DynamicStackSlot, RelSourceLoc, StackSlot, Type, function::FunctionParameters,49};50use crate::isa::FunctionAlignment;51use crate::result::CodegenResult;52use crate::settings;53use crate::settings::Flags;54use crate::value_label::ValueLabelsRanges;55use alloc::string::String;56use alloc::vec::Vec;57use core::fmt::Debug;58use cranelift_control::ControlPlane;59use cranelift_entity::PrimaryMap;60use regalloc2::VReg;61use smallvec::{SmallVec, smallvec};6263#[cfg(feature = "enable-serde")]64use serde_derive::{Deserialize, Serialize};6566#[macro_use]67pub mod isle;6869pub mod lower;70pub use lower::*;71pub mod vcode;72pub use vcode::*;73pub mod compile;74pub use compile::*;75pub mod blockorder;76pub use blockorder::*;77pub mod abi;78pub use abi::*;79pub mod buffer;80pub use buffer::*;81pub mod helpers;82pub use helpers::*;83pub mod valueregs;84pub use reg::*;85pub use valueregs::*;86pub mod pcc;87pub mod reg;8889/// A machine instruction.90pub trait MachInst: Clone + Debug {91/// The ABI machine spec for this `MachInst`.92type ABIMachineSpec: ABIMachineSpec<I = Self>;9394/// Return the registers referenced by this machine instruction along with95/// the modes of reference (use, def, modify).96fn get_operands(&mut self, collector: &mut impl OperandVisitor);9798/// If this is a simple move, return the (source, destination) tuple of registers.99fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;100101/// Is this a terminator (branch or ret)? If so, return its type102/// (ret/uncond/cond) and target if applicable.103fn is_term(&self) -> MachTerminator;104105/// Is this an unconditional trap?106fn is_trap(&self) -> bool;107108/// Is this an "args" pseudoinst?109fn is_args(&self) -> bool;110111/// Classify the type of call instruction this is.112///113/// This enables more granular function type analysis and optimization.114/// Returns `CallType::None` for non-call instructions, `CallType::Regular`115/// for normal calls that return to the caller, and `CallType::TailCall`116/// for tail calls that don't return to the caller.117fn call_type(&self) -> CallType;118119/// Should this instruction's clobber-list be included in the120/// clobber-set?121fn is_included_in_clobbers(&self) -> bool;122123/// Does this instruction access memory?124fn is_mem_access(&self) -> bool;125126/// Generate a move.127fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;128129/// Generate a dummy instruction that will keep a value alive but130/// has no other purpose.131fn gen_dummy_use(reg: Reg) -> Self;132133/// Determine register class(es) to store the given Cranelift type, and the134/// Cranelift type actually stored in the underlying register(s). May return135/// an error if the type isn't supported by this backend.136///137/// If the type requires multiple registers, then the list of registers is138/// returned in little-endian order.139///140/// Note that the type actually stored in the register(s) may differ in the141/// case that a value is split across registers: for example, on a 32-bit142/// target, an I64 may be stored in two registers, each of which holds an143/// I32. The actually-stored types are used only to inform the backend when144/// generating spills and reloads for individual registers.145fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>;146147/// Get an appropriate type that can fully hold a value in a given148/// register class. This may not be the only type that maps to149/// that class, but when used with `gen_move()` or the ABI trait's150/// load/spill constructors, it should produce instruction(s) that151/// move the entire register contents.152fn canonical_type_for_rc(rc: RegClass) -> Type;153154/// Generate a jump to another target. Used during lowering of155/// control flow.156fn gen_jump(target: MachLabel) -> Self;157158/// Generate a store of an immediate 64-bit integer to a register. Used by159/// the control plane to generate random instructions.160fn gen_imm_u64(_value: u64, _dst: Writable<Reg>) -> Option<Self> {161None162}163164/// Generate a store of an immediate 64-bit integer to a register. Used by165/// the control plane to generate random instructions. The tmp register may166/// be used by architectures which don't support writing immediate values to167/// floating point registers directly.168fn gen_imm_f64(_value: f64, _tmp: Writable<Reg>, _dst: Writable<Reg>) -> SmallVec<[Self; 2]> {169SmallVec::new()170}171172/// Generate a NOP. The `preferred_size` parameter allows the caller to173/// request a NOP of that size, or as close to it as possible. The machine174/// backend may return a NOP whose binary encoding is smaller than the175/// preferred size, but must not return a NOP that is larger. However,176/// the instruction must have a nonzero size if preferred_size is nonzero.177fn gen_nop(preferred_size: usize) -> Self;178179/// The various kinds of NOP, with size, sorted in ascending-size180/// order.181fn gen_nop_units() -> Vec<Vec<u8>>;182183/// Align a basic block offset (from start of function). By default, no184/// alignment occurs.185fn align_basic_block(offset: CodeOffset) -> CodeOffset {186offset187}188189/// What is the worst-case instruction size emitted by this instruction type?190fn worst_case_size() -> CodeOffset;191192/// What is the register class used for reference types (GC-observable pointers)? Can193/// be dependent on compilation flags.194fn ref_type_regclass(_flags: &Flags) -> RegClass;195196/// Is this a safepoint?197fn is_safepoint(&self) -> bool;198199/// Generate an instruction that must appear at the beginning of a basic200/// block, if any. Note that the return value must not be subject to201/// register allocation.202fn gen_block_start(203_is_indirect_branch_target: bool,204_is_forward_edge_cfi_enabled: bool,205) -> Option<Self> {206None207}208209/// Returns a description of the alignment required for functions for this210/// architecture.211fn function_alignment() -> FunctionAlignment;212213/// Is this a low-level, one-way branch, not meant for use in a214/// VCode body? These instructions are meant to be used only when215/// directly emitted, i.e. when `MachInst` is used as an assembler216/// library.217fn is_low_level_branch(&self) -> bool {218false219}220221/// A label-use kind: a type that describes the types of label references that222/// can occur in an instruction.223type LabelUse: MachInstLabelUse;224225/// Byte representation of a trap opcode which is inserted by `MachBuffer`226/// during its `defer_trap` method.227const TRAP_OPCODE: &'static [u8];228}229230/// A descriptor of a label reference (use) in an instruction set.231pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {232/// Required alignment for any veneer. Usually the required instruction233/// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).234const ALIGN: CodeOffset;235236/// What is the maximum PC-relative range (positive)? E.g., if `1024`, a237/// label-reference fixup at offset `x` is valid if the label resolves to `x238/// + 1024`.239fn max_pos_range(self) -> CodeOffset;240/// What is the maximum PC-relative range (negative)? This is the absolute241/// value; i.e., if `1024`, then a label-reference fixup at offset `x` is242/// valid if the label resolves to `x - 1024`.243fn max_neg_range(self) -> CodeOffset;244/// What is the size of code-buffer slice this label-use needs to patch in245/// the label's value?246fn patch_size(self) -> CodeOffset;247/// Perform a code-patch, given the offset into the buffer of this label use248/// and the offset into the buffer of the label's definition.249/// It is guaranteed that, given `delta = offset - label_offset`, we will250/// have `offset >= -self.max_neg_range()` and `offset <=251/// self.max_pos_range()`.252fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);253/// Can the label-use be patched to a veneer that supports a longer range?254/// Usually valid for jumps (a short-range jump can jump to a longer-range255/// jump), but not for e.g. constant pool references, because the constant256/// load would require different code (one more level of indirection).257fn supports_veneer(self) -> bool;258/// How many bytes are needed for a veneer?259fn veneer_size(self) -> CodeOffset;260/// What's the largest possible veneer that may be generated?261fn worst_case_veneer_size() -> CodeOffset;262/// Generate a veneer. The given code-buffer slice is `self.veneer_size()`263/// bytes long at offset `veneer_offset` in the buffer. The original264/// label-use will be patched to refer to this veneer's offset. A new265/// (offset, LabelUse) is returned that allows the veneer to use the actual266/// label. For veneers to work properly, it is expected that the new veneer267/// has a larger range; on most platforms this probably means either a268/// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that269/// stage, a jump that supports a full 32-bit range, for example.270fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);271272/// Returns the corresponding label-use for the relocation specified.273///274/// This returns `None` if the relocation doesn't have a corresponding275/// representation for the target architecture.276fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self>;277}278279/// Classification of call instruction types for granular analysis.280#[derive(Clone, Copy, Debug, PartialEq, Eq)]281pub enum CallType {282/// Not a call instruction.283None,284/// Regular call that returns to the caller.285Regular,286/// Tail call that doesn't return to the caller.287TailCall,288}289290/// Function classification based on call patterns.291///292/// This enum classifies functions based on their calling behavior to enable293/// targeted optimizations. Functions are categorized as:294/// - `None`: No calls at all (can use simplified calling conventions)295/// - `TailOnly`: Only tail calls (may skip frame setup in some cases)296/// - `Regular`: Has regular calls (requires full calling convention support)297#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]298pub enum FunctionCalls {299/// Function makes no calls at all.300#[default]301None,302/// Function only makes tail calls (no regular calls).303TailOnly,304/// Function makes at least one regular call (may also have tail calls).305Regular,306}307308impl FunctionCalls {309/// Update the function classification based on a new call instruction.310///311/// This method implements the merge logic for accumulating call patterns:312/// - Any regular call makes the function Regular313/// - Tail calls upgrade None to TailOnly314/// - Regular always stays Regular315pub fn update(&mut self, call_type: CallType) {316*self = match (*self, call_type) {317// No call instruction - state unchanged318(current, CallType::None) => current,319// Regular call always results in Regular classification320(_, CallType::Regular) => FunctionCalls::Regular,321// Tail call: None becomes TailOnly, others unchanged322(FunctionCalls::None, CallType::TailCall) => FunctionCalls::TailOnly,323(current, CallType::TailCall) => current,324};325}326}327328/// Describes a block terminator (not call) in the VCode.329///330/// Actual targets are not included: the single-source-of-truth for331/// those is the VCode itself, which holds, for each block, successors332/// and outgoing branch args per successor.333#[derive(Clone, Debug, PartialEq, Eq)]334pub enum MachTerminator {335/// Not a terminator.336None,337/// A return instruction.338Ret,339/// A tail call.340RetCall,341/// A branch.342Branch,343}344345/// A trait describing the ability to encode a MachInst into binary machine code.346pub trait MachInstEmit: MachInst {347/// Persistent state carried across `emit` invocations.348type State: MachInstEmitState<Self>;349350/// Constant information used in `emit` invocations.351type Info;352353/// Emit the instruction.354fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);355356/// Pretty-print the instruction.357fn pretty_print_inst(&self, state: &mut Self::State) -> String;358}359360/// A trait describing the emission state carried between MachInsts when361/// emitting a function body.362pub trait MachInstEmitState<I: VCodeInst>: Default + Clone + Debug {363/// Create a new emission state given the ABI object.364fn new(abi: &Callee<I::ABIMachineSpec>, ctrl_plane: ControlPlane) -> Self;365366/// Update the emission state before emitting an instruction that is a367/// safepoint.368fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>);369370/// The emission state holds ownership of a control plane, so it doesn't371/// have to be passed around explicitly too much. `ctrl_plane_mut` may372/// be used if temporary access to the control plane is needed by some373/// other function that doesn't have access to the emission state.374fn ctrl_plane_mut(&mut self) -> &mut ControlPlane;375376/// Used to continue using a control plane after the emission state is377/// not needed anymore.378fn take_ctrl_plane(self) -> ControlPlane;379380/// A hook that triggers when first emitting a new block.381/// It is guaranteed to be called before any instructions are emitted.382fn on_new_block(&mut self) {}383384/// The [`FrameLayout`] for the function currently being compiled.385fn frame_layout(&self) -> &FrameLayout;386}387388/// The result of a `MachBackend::compile_function()` call. Contains machine389/// code (as bytes) and a disassembly, if requested.390#[derive(PartialEq, Debug, Clone)]391#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]392pub struct CompiledCodeBase<T: CompilePhase> {393/// Machine code.394pub buffer: MachBufferFinalized<T>,395/// Disassembly, if requested.396pub vcode: Option<String>,397/// Debug info: value labels to registers/stackslots at code offsets.398pub value_labels_ranges: ValueLabelsRanges,399/// Basic-block layout info: block start offsets.400///401/// This info is generated only if the `machine_code_cfg_info`402/// flag is set.403pub bb_starts: Vec<CodeOffset>,404/// Basic-block layout info: block edges. Each edge is `(from,405/// to)`, where `from` and `to` are basic-block start offsets of406/// the respective blocks.407///408/// This info is generated only if the `machine_code_cfg_info`409/// flag is set.410pub bb_edges: Vec<(CodeOffset, CodeOffset)>,411}412413impl CompiledCodeStencil {414/// Apply function parameters to finalize a stencil into its final form.415pub fn apply_params(self, params: &FunctionParameters) -> CompiledCode {416CompiledCode {417buffer: self.buffer.apply_base_srcloc(params.base_srcloc()),418vcode: self.vcode,419value_labels_ranges: self.value_labels_ranges,420bb_starts: self.bb_starts,421bb_edges: self.bb_edges,422}423}424}425426impl<T: CompilePhase> CompiledCodeBase<T> {427/// Get a `CodeInfo` describing section sizes from this compilation result.428pub fn code_info(&self) -> CodeInfo {429CodeInfo {430total_size: self.buffer.total_size(),431}432}433434/// Returns a reference to the machine code generated for this function compilation.435pub fn code_buffer(&self) -> &[u8] {436self.buffer.data()437}438439/// Get the disassembly of the buffer, using the given capstone context.440#[cfg(feature = "disas")]441pub fn disassemble(442&self,443params: Option<&crate::ir::function::FunctionParameters>,444cs: &capstone::Capstone,445) -> Result<String, anyhow::Error> {446use core::fmt::Write;447448let mut buf = String::new();449450let relocs = self.buffer.relocs();451let traps = self.buffer.traps();452let mut patchables = self.buffer.patchable_call_sites().peekable();453454// Normalize the block starts to include an initial block of offset 0.455let mut block_starts = Vec::new();456if self.bb_starts.first().copied() != Some(0) {457block_starts.push(0);458}459block_starts.extend_from_slice(&self.bb_starts);460block_starts.push(self.buffer.data().len() as u32);461462// Iterate over block regions, to ensure that we always produce block labels463for (n, (&start, &end)) in block_starts464.iter()465.zip(block_starts.iter().skip(1))466.enumerate()467{468writeln!(buf, "block{n}: ; offset 0x{start:x}")?;469470let buffer = &self.buffer.data()[start as usize..end as usize];471let insns = cs.disasm_all(buffer, start as u64).map_err(map_caperr)?;472for i in insns.iter() {473write!(buf, " ")?;474475let op_str = i.op_str().unwrap_or("");476if let Some(s) = i.mnemonic() {477write!(buf, "{s}")?;478if !op_str.is_empty() {479write!(buf, " ")?;480}481}482483write!(buf, "{op_str}")?;484485let end = i.address() + i.bytes().len() as u64;486let contains = |off| i.address() <= off && off < end;487488for reloc in relocs.iter().filter(|reloc| contains(reloc.offset as u64)) {489write!(490buf,491" ; reloc_external {} {} {}",492reloc.kind,493reloc.target.display(params),494reloc.addend,495)?;496}497498if let Some(trap) = traps.iter().find(|trap| contains(trap.offset as u64)) {499write!(buf, " ; trap: {}", trap.code)?;500}501502if let Some(patchable) = patchables.peek()503&& patchable.ret_addr == end as u32504{505write!(506buf,507" ; patchable call: NOP out last {} bytes",508patchable.len509)?;510patchables.next();511}512513writeln!(buf)?;514}515}516517return Ok(buf);518519fn map_caperr(err: capstone::Error) -> anyhow::Error {520anyhow::format_err!("{err}")521}522}523}524525/// Result of compiling a `FunctionStencil`, before applying `FunctionParameters` onto it.526///527/// Only used internally, in a transient manner, for the incremental compilation cache.528pub type CompiledCodeStencil = CompiledCodeBase<Stencil>;529530/// `CompiledCode` in its final form (i.e. after `FunctionParameters` have been applied), ready for531/// consumption.532pub type CompiledCode = CompiledCodeBase<Final>;533534impl CompiledCode {535/// If available, return information about the code layout in the536/// final machine code: the offsets (in bytes) of each basic-block537/// start, and all basic-block edges.538pub fn get_code_bb_layout(&self) -> (Vec<usize>, Vec<(usize, usize)>) {539(540self.bb_starts.iter().map(|&off| off as usize).collect(),541self.bb_edges542.iter()543.map(|&(from, to)| (from as usize, to as usize))544.collect(),545)546}547548/// Creates unwind information for the function.549///550/// Returns `None` if the function has no unwind information.551#[cfg(feature = "unwind")]552pub fn create_unwind_info(553&self,554isa: &dyn crate::isa::TargetIsa,555) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {556use crate::isa::unwind::UnwindInfoKind;557let unwind_info_kind = match isa.triple().operating_system {558target_lexicon::OperatingSystem::Windows => UnwindInfoKind::Windows,559_ => UnwindInfoKind::SystemV,560};561self.create_unwind_info_of_kind(isa, unwind_info_kind)562}563564/// Creates unwind information for the function using the supplied565/// "kind". Supports cross-OS (but not cross-arch) generation.566///567/// Returns `None` if the function has no unwind information.568#[cfg(feature = "unwind")]569pub fn create_unwind_info_of_kind(570&self,571isa: &dyn crate::isa::TargetIsa,572unwind_info_kind: crate::isa::unwind::UnwindInfoKind,573) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {574isa.emit_unwind_info(self, unwind_info_kind)575}576}577578/// An object that can be used to create the text section of an executable.579///580/// This primarily handles resolving relative relocations at581/// text-section-assembly time rather than at load/link time. This582/// architecture-specific logic is sort of like a linker, but only for one583/// object file at a time.584pub trait TextSectionBuilder {585/// Appends `data` to the text section with the `align` specified.586///587/// If `labeled` is `true` then this also binds the appended data to the588/// `n`th label for how many times this has been called with `labeled:589/// true`. The label target can be passed as the `target` argument to590/// `resolve_reloc`.591///592/// This function returns the offset at which the data was placed in the593/// text section.594fn append(595&mut self,596labeled: bool,597data: &[u8],598align: u32,599ctrl_plane: &mut ControlPlane,600) -> u64;601602/// Attempts to resolve a relocation for this function.603///604/// The `offset` is the offset of the relocation, within the text section.605/// The `reloc` is the kind of relocation.606/// The `addend` is the value to add to the relocation.607/// The `target` is the labeled function that is the target of this608/// relocation.609///610/// Labeled functions are created with the `append` function above by611/// setting the `labeled` parameter to `true`.612///613/// If this builder does not know how to handle `reloc` then this function614/// will return `false`. Otherwise this function will return `true` and this615/// relocation will be resolved in the final bytes returned by `finish`.616fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: usize) -> bool;617618/// A debug-only option which is used to for619fn force_veneers(&mut self);620621/// Write the `data` provided at `offset`, for example when resolving a622/// relocation.623fn write(&mut self, offset: u64, data: &[u8]);624625/// Completes this text section, filling out any final details, and returns626/// the bytes of the text section.627fn finish(&mut self, ctrl_plane: &mut ControlPlane) -> Vec<u8>;628}629630631