Path: blob/main/cranelift/codegen/src/machinst/mod.rs
1693 views
//! This module exposes the machine-specific backend definition pieces.1//!2//! The MachInst infrastructure is the compiler backend, from CLIF3//! (ir::Function) to machine code. The purpose of this infrastructure is, at a4//! high level, to do instruction selection/lowering (to machine instructions),5//! register allocation, and then perform all the fixups to branches, constant6//! data references, etc., needed to actually generate machine code.7//!8//! The container for machine instructions, at various stages of construction,9//! is the `VCode` struct. We refer to a sequence of machine instructions organized10//! into basic blocks as "vcode". This is short for "virtual-register code".11//!12//! The compilation pipeline, from an `ir::Function` (already optimized as much as13//! you like by machine-independent optimization passes) onward, is as follows.14//!15//! ```plain16//!17//! ir::Function (SSA IR, machine-independent opcodes)18//! |19//! | [lower]20//! |21//! VCode<arch_backend::Inst> (machine instructions:22//! | - mostly virtual registers.23//! | - cond branches in two-target form.24//! | - branch targets are block indices.25//! | - in-memory constants held by insns,26//! | with unknown offsets.27//! | - critical edges (actually all edges)28//! | are split.)29//! |30//! | [regalloc --> `regalloc2::Output`; VCode is unchanged]31//! |32//! | [binary emission via MachBuffer]33//! |34//! Vec<u8> (machine code:35//! | - two-dest branches resolved via36//! | streaming branch resolution/simplification.37//! | - regalloc `Allocation` results used directly38//! | by instruction emission code.39//! | - prologue and epilogue(s) built and emitted40//! | directly during emission.41//! | - SP-relative offsets resolved by tracking42//! | EmitState.)43//!44//! ```4546use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc};47use crate::ir::{48self, DynamicStackSlot, RelSourceLoc, StackSlot, Type, function::FunctionParameters,49};50use crate::isa::FunctionAlignment;51use crate::result::CodegenResult;52use crate::settings;53use crate::settings::Flags;54use crate::value_label::ValueLabelsRanges;55use alloc::vec::Vec;56use core::fmt::Debug;57use cranelift_control::ControlPlane;58use cranelift_entity::PrimaryMap;59use regalloc2::VReg;60use smallvec::{SmallVec, smallvec};61use std::string::String;6263#[cfg(feature = "enable-serde")]64use serde_derive::{Deserialize, Serialize};6566#[macro_use]67pub mod isle;6869pub mod lower;70pub use lower::*;71pub mod vcode;72pub use vcode::*;73pub mod compile;74pub use compile::*;75pub mod blockorder;76pub use blockorder::*;77pub mod abi;78pub use abi::*;79pub mod buffer;80pub use buffer::*;81pub mod helpers;82pub use helpers::*;83pub mod valueregs;84pub use reg::*;85pub use valueregs::*;86pub mod pcc;87pub mod reg;8889/// A machine instruction.90pub trait MachInst: Clone + Debug {91/// The ABI machine spec for this `MachInst`.92type ABIMachineSpec: ABIMachineSpec<I = Self>;9394/// Return the registers referenced by this machine instruction along with95/// the modes of reference (use, def, modify).96fn get_operands(&mut self, collector: &mut impl OperandVisitor);9798/// If this is a simple move, return the (source, destination) tuple of registers.99fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;100101/// Is this a terminator (branch or ret)? If so, return its type102/// (ret/uncond/cond) and target if applicable.103fn is_term(&self) -> MachTerminator;104105/// Is this an unconditional trap?106fn is_trap(&self) -> bool;107108/// Is this an "args" pseudoinst?109fn is_args(&self) -> bool;110111/// Classify the type of call instruction this is.112///113/// This enables more granular function type analysis and optimization.114/// Returns `CallType::None` for non-call instructions, `CallType::Regular`115/// for normal calls that return to the caller, and `CallType::TailCall`116/// for tail calls that don't return to the caller.117fn call_type(&self) -> CallType;118119/// Should this instruction's clobber-list be included in the120/// clobber-set?121fn is_included_in_clobbers(&self) -> bool;122123/// Does this instruction access memory?124fn is_mem_access(&self) -> bool;125126/// Generate a move.127fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;128129/// Generate a dummy instruction that will keep a value alive but130/// has no other purpose.131fn gen_dummy_use(reg: Reg) -> Self;132133/// Determine register class(es) to store the given Cranelift type, and the134/// Cranelift type actually stored in the underlying register(s). May return135/// an error if the type isn't supported by this backend.136///137/// If the type requires multiple registers, then the list of registers is138/// returned in little-endian order.139///140/// Note that the type actually stored in the register(s) may differ in the141/// case that a value is split across registers: for example, on a 32-bit142/// target, an I64 may be stored in two registers, each of which holds an143/// I32. The actually-stored types are used only to inform the backend when144/// generating spills and reloads for individual registers.145fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>;146147/// Get an appropriate type that can fully hold a value in a given148/// register class. This may not be the only type that maps to149/// that class, but when used with `gen_move()` or the ABI trait's150/// load/spill constructors, it should produce instruction(s) that151/// move the entire register contents.152fn canonical_type_for_rc(rc: RegClass) -> Type;153154/// Generate a jump to another target. Used during lowering of155/// control flow.156fn gen_jump(target: MachLabel) -> Self;157158/// Generate a store of an immediate 64-bit integer to a register. Used by159/// the control plane to generate random instructions.160fn gen_imm_u64(_value: u64, _dst: Writable<Reg>) -> Option<Self> {161None162}163164/// Generate a store of an immediate 64-bit integer to a register. Used by165/// the control plane to generate random instructions. The tmp register may166/// be used by architectures which don't support writing immediate values to167/// floating point registers directly.168fn gen_imm_f64(_value: f64, _tmp: Writable<Reg>, _dst: Writable<Reg>) -> SmallVec<[Self; 2]> {169SmallVec::new()170}171172/// Generate a NOP. The `preferred_size` parameter allows the caller to173/// request a NOP of that size, or as close to it as possible. The machine174/// backend may return a NOP whose binary encoding is smaller than the175/// preferred size, but must not return a NOP that is larger. However,176/// the instruction must have a nonzero size if preferred_size is nonzero.177fn gen_nop(preferred_size: usize) -> Self;178179/// Align a basic block offset (from start of function). By default, no180/// alignment occurs.181fn align_basic_block(offset: CodeOffset) -> CodeOffset {182offset183}184185/// What is the worst-case instruction size emitted by this instruction type?186fn worst_case_size() -> CodeOffset;187188/// What is the register class used for reference types (GC-observable pointers)? Can189/// be dependent on compilation flags.190fn ref_type_regclass(_flags: &Flags) -> RegClass;191192/// Is this a safepoint?193fn is_safepoint(&self) -> bool;194195/// Generate an instruction that must appear at the beginning of a basic196/// block, if any. Note that the return value must not be subject to197/// register allocation.198fn gen_block_start(199_is_indirect_branch_target: bool,200_is_forward_edge_cfi_enabled: bool,201) -> Option<Self> {202None203}204205/// Returns a description of the alignment required for functions for this206/// architecture.207fn function_alignment() -> FunctionAlignment;208209/// Is this a low-level, one-way branch, not meant for use in a210/// VCode body? These instructions are meant to be used only when211/// directly emitted, i.e. when `MachInst` is used as an assembler212/// library.213fn is_low_level_branch(&self) -> bool {214false215}216217/// A label-use kind: a type that describes the types of label references that218/// can occur in an instruction.219type LabelUse: MachInstLabelUse;220221/// Byte representation of a trap opcode which is inserted by `MachBuffer`222/// during its `defer_trap` method.223const TRAP_OPCODE: &'static [u8];224}225226/// A descriptor of a label reference (use) in an instruction set.227pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {228/// Required alignment for any veneer. Usually the required instruction229/// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).230const ALIGN: CodeOffset;231232/// What is the maximum PC-relative range (positive)? E.g., if `1024`, a233/// label-reference fixup at offset `x` is valid if the label resolves to `x234/// + 1024`.235fn max_pos_range(self) -> CodeOffset;236/// What is the maximum PC-relative range (negative)? This is the absolute237/// value; i.e., if `1024`, then a label-reference fixup at offset `x` is238/// valid if the label resolves to `x - 1024`.239fn max_neg_range(self) -> CodeOffset;240/// What is the size of code-buffer slice this label-use needs to patch in241/// the label's value?242fn patch_size(self) -> CodeOffset;243/// Perform a code-patch, given the offset into the buffer of this label use244/// and the offset into the buffer of the label's definition.245/// It is guaranteed that, given `delta = offset - label_offset`, we will246/// have `offset >= -self.max_neg_range()` and `offset <=247/// self.max_pos_range()`.248fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);249/// Can the label-use be patched to a veneer that supports a longer range?250/// Usually valid for jumps (a short-range jump can jump to a longer-range251/// jump), but not for e.g. constant pool references, because the constant252/// load would require different code (one more level of indirection).253fn supports_veneer(self) -> bool;254/// How many bytes are needed for a veneer?255fn veneer_size(self) -> CodeOffset;256/// What's the largest possible veneer that may be generated?257fn worst_case_veneer_size() -> CodeOffset;258/// Generate a veneer. The given code-buffer slice is `self.veneer_size()`259/// bytes long at offset `veneer_offset` in the buffer. The original260/// label-use will be patched to refer to this veneer's offset. A new261/// (offset, LabelUse) is returned that allows the veneer to use the actual262/// label. For veneers to work properly, it is expected that the new veneer263/// has a larger range; on most platforms this probably means either a264/// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that265/// stage, a jump that supports a full 32-bit range, for example.266fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);267268/// Returns the corresponding label-use for the relocation specified.269///270/// This returns `None` if the relocation doesn't have a corresponding271/// representation for the target architecture.272fn from_reloc(reloc: Reloc, addend: Addend) -> Option<Self>;273}274275/// Classification of call instruction types for granular analysis.276#[derive(Clone, Copy, Debug, PartialEq, Eq)]277pub enum CallType {278/// Not a call instruction.279None,280/// Regular call that returns to the caller.281Regular,282/// Tail call that doesn't return to the caller.283TailCall,284}285286/// Function classification based on call patterns.287///288/// This enum classifies functions based on their calling behavior to enable289/// targeted optimizations. Functions are categorized as:290/// - `None`: No calls at all (can use simplified calling conventions)291/// - `TailOnly`: Only tail calls (may skip frame setup in some cases)292/// - `Regular`: Has regular calls (requires full calling convention support)293#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]294pub enum FunctionCalls {295/// Function makes no calls at all.296#[default]297None,298/// Function only makes tail calls (no regular calls).299TailOnly,300/// Function makes at least one regular call (may also have tail calls).301Regular,302}303304impl FunctionCalls {305/// Update the function classification based on a new call instruction.306///307/// This method implements the merge logic for accumulating call patterns:308/// - Any regular call makes the function Regular309/// - Tail calls upgrade None to TailOnly310/// - Regular always stays Regular311pub fn update(&mut self, call_type: CallType) {312*self = match (*self, call_type) {313// No call instruction - state unchanged314(current, CallType::None) => current,315// Regular call always results in Regular classification316(_, CallType::Regular) => FunctionCalls::Regular,317// Tail call: None becomes TailOnly, others unchanged318(FunctionCalls::None, CallType::TailCall) => FunctionCalls::TailOnly,319(current, CallType::TailCall) => current,320};321}322}323324/// Describes a block terminator (not call) in the VCode.325///326/// Actual targets are not included: the single-source-of-truth for327/// those is the VCode itself, which holds, for each block, successors328/// and outgoing branch args per successor.329#[derive(Clone, Debug, PartialEq, Eq)]330pub enum MachTerminator {331/// Not a terminator.332None,333/// A return instruction.334Ret,335/// A tail call.336RetCall,337/// A branch.338Branch,339}340341/// A trait describing the ability to encode a MachInst into binary machine code.342pub trait MachInstEmit: MachInst {343/// Persistent state carried across `emit` invocations.344type State: MachInstEmitState<Self>;345346/// Constant information used in `emit` invocations.347type Info;348349/// Emit the instruction.350fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);351352/// Pretty-print the instruction.353fn pretty_print_inst(&self, state: &mut Self::State) -> String;354}355356/// A trait describing the emission state carried between MachInsts when357/// emitting a function body.358pub trait MachInstEmitState<I: VCodeInst>: Default + Clone + Debug {359/// Create a new emission state given the ABI object.360fn new(abi: &Callee<I::ABIMachineSpec>, ctrl_plane: ControlPlane) -> Self;361362/// Update the emission state before emitting an instruction that is a363/// safepoint.364fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>);365366/// The emission state holds ownership of a control plane, so it doesn't367/// have to be passed around explicitly too much. `ctrl_plane_mut` may368/// be used if temporary access to the control plane is needed by some369/// other function that doesn't have access to the emission state.370fn ctrl_plane_mut(&mut self) -> &mut ControlPlane;371372/// Used to continue using a control plane after the emission state is373/// not needed anymore.374fn take_ctrl_plane(self) -> ControlPlane;375376/// A hook that triggers when first emitting a new block.377/// It is guaranteed to be called before any instructions are emitted.378fn on_new_block(&mut self) {}379380/// The [`FrameLayout`] for the function currently being compiled.381fn frame_layout(&self) -> &FrameLayout;382}383384/// The result of a `MachBackend::compile_function()` call. Contains machine385/// code (as bytes) and a disassembly, if requested.386#[derive(PartialEq, Debug, Clone)]387#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]388pub struct CompiledCodeBase<T: CompilePhase> {389/// Machine code.390pub buffer: MachBufferFinalized<T>,391/// Size of stack frame, in bytes.392pub frame_size: u32,393/// Disassembly, if requested.394pub vcode: Option<String>,395/// Debug info: value labels to registers/stackslots at code offsets.396pub value_labels_ranges: ValueLabelsRanges,397/// Debug info: stackslots to stack pointer offsets.398pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,399/// Debug info: stackslots to stack pointer offsets.400pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,401/// Basic-block layout info: block start offsets.402///403/// This info is generated only if the `machine_code_cfg_info`404/// flag is set.405pub bb_starts: Vec<CodeOffset>,406/// Basic-block layout info: block edges. Each edge is `(from,407/// to)`, where `from` and `to` are basic-block start offsets of408/// the respective blocks.409///410/// This info is generated only if the `machine_code_cfg_info`411/// flag is set.412pub bb_edges: Vec<(CodeOffset, CodeOffset)>,413}414415impl CompiledCodeStencil {416/// Apply function parameters to finalize a stencil into its final form.417pub fn apply_params(self, params: &FunctionParameters) -> CompiledCode {418CompiledCode {419buffer: self.buffer.apply_base_srcloc(params.base_srcloc()),420frame_size: self.frame_size,421vcode: self.vcode,422value_labels_ranges: self.value_labels_ranges,423sized_stackslot_offsets: self.sized_stackslot_offsets,424dynamic_stackslot_offsets: self.dynamic_stackslot_offsets,425bb_starts: self.bb_starts,426bb_edges: self.bb_edges,427}428}429}430431impl<T: CompilePhase> CompiledCodeBase<T> {432/// Get a `CodeInfo` describing section sizes from this compilation result.433pub fn code_info(&self) -> CodeInfo {434CodeInfo {435total_size: self.buffer.total_size(),436}437}438439/// Returns a reference to the machine code generated for this function compilation.440pub fn code_buffer(&self) -> &[u8] {441self.buffer.data()442}443444/// Get the disassembly of the buffer, using the given capstone context.445#[cfg(feature = "disas")]446pub fn disassemble(447&self,448params: Option<&crate::ir::function::FunctionParameters>,449cs: &capstone::Capstone,450) -> Result<String, anyhow::Error> {451use std::fmt::Write;452453let mut buf = String::new();454455let relocs = self.buffer.relocs();456let traps = self.buffer.traps();457458// Normalize the block starts to include an initial block of offset 0.459let mut block_starts = Vec::new();460if self.bb_starts.first().copied() != Some(0) {461block_starts.push(0);462}463block_starts.extend_from_slice(&self.bb_starts);464block_starts.push(self.buffer.data().len() as u32);465466// Iterate over block regions, to ensure that we always produce block labels467for (n, (&start, &end)) in block_starts468.iter()469.zip(block_starts.iter().skip(1))470.enumerate()471{472writeln!(buf, "block{n}: ; offset 0x{start:x}")?;473474let buffer = &self.buffer.data()[start as usize..end as usize];475let insns = cs.disasm_all(buffer, start as u64).map_err(map_caperr)?;476for i in insns.iter() {477write!(buf, " ")?;478479let op_str = i.op_str().unwrap_or("");480if let Some(s) = i.mnemonic() {481write!(buf, "{s}")?;482if !op_str.is_empty() {483write!(buf, " ")?;484}485}486487write!(buf, "{op_str}")?;488489let end = i.address() + i.bytes().len() as u64;490let contains = |off| i.address() <= off && off < end;491492for reloc in relocs.iter().filter(|reloc| contains(reloc.offset as u64)) {493write!(494buf,495" ; reloc_external {} {} {}",496reloc.kind,497reloc.target.display(params),498reloc.addend,499)?;500}501502if let Some(trap) = traps.iter().find(|trap| contains(trap.offset as u64)) {503write!(buf, " ; trap: {}", trap.code)?;504}505506writeln!(buf)?;507}508}509510return Ok(buf);511512fn map_caperr(err: capstone::Error) -> anyhow::Error {513anyhow::format_err!("{}", err)514}515}516}517518/// Result of compiling a `FunctionStencil`, before applying `FunctionParameters` onto it.519///520/// Only used internally, in a transient manner, for the incremental compilation cache.521pub type CompiledCodeStencil = CompiledCodeBase<Stencil>;522523/// `CompiledCode` in its final form (i.e. after `FunctionParameters` have been applied), ready for524/// consumption.525pub type CompiledCode = CompiledCodeBase<Final>;526527impl CompiledCode {528/// If available, return information about the code layout in the529/// final machine code: the offsets (in bytes) of each basic-block530/// start, and all basic-block edges.531pub fn get_code_bb_layout(&self) -> (Vec<usize>, Vec<(usize, usize)>) {532(533self.bb_starts.iter().map(|&off| off as usize).collect(),534self.bb_edges535.iter()536.map(|&(from, to)| (from as usize, to as usize))537.collect(),538)539}540541/// Creates unwind information for the function.542///543/// Returns `None` if the function has no unwind information.544#[cfg(feature = "unwind")]545pub fn create_unwind_info(546&self,547isa: &dyn crate::isa::TargetIsa,548) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {549use crate::isa::unwind::UnwindInfoKind;550let unwind_info_kind = match isa.triple().operating_system {551target_lexicon::OperatingSystem::Windows => UnwindInfoKind::Windows,552_ => UnwindInfoKind::SystemV,553};554self.create_unwind_info_of_kind(isa, unwind_info_kind)555}556557/// Creates unwind information for the function using the supplied558/// "kind". Supports cross-OS (but not cross-arch) generation.559///560/// Returns `None` if the function has no unwind information.561#[cfg(feature = "unwind")]562pub fn create_unwind_info_of_kind(563&self,564isa: &dyn crate::isa::TargetIsa,565unwind_info_kind: crate::isa::unwind::UnwindInfoKind,566) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {567isa.emit_unwind_info(self, unwind_info_kind)568}569}570571/// An object that can be used to create the text section of an executable.572///573/// This primarily handles resolving relative relocations at574/// text-section-assembly time rather than at load/link time. This575/// architecture-specific logic is sort of like a linker, but only for one576/// object file at a time.577pub trait TextSectionBuilder {578/// Appends `data` to the text section with the `align` specified.579///580/// If `labeled` is `true` then this also binds the appended data to the581/// `n`th label for how many times this has been called with `labeled:582/// true`. The label target can be passed as the `target` argument to583/// `resolve_reloc`.584///585/// This function returns the offset at which the data was placed in the586/// text section.587fn append(588&mut self,589labeled: bool,590data: &[u8],591align: u32,592ctrl_plane: &mut ControlPlane,593) -> u64;594595/// Attempts to resolve a relocation for this function.596///597/// The `offset` is the offset of the relocation, within the text section.598/// The `reloc` is the kind of relocation.599/// The `addend` is the value to add to the relocation.600/// The `target` is the labeled function that is the target of this601/// relocation.602///603/// Labeled functions are created with the `append` function above by604/// setting the `labeled` parameter to `true`.605///606/// If this builder does not know how to handle `reloc` then this function607/// will return `false`. Otherwise this function will return `true` and this608/// relocation will be resolved in the final bytes returned by `finish`.609fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: usize) -> bool;610611/// A debug-only option which is used to for612fn force_veneers(&mut self);613614/// Write the `data` provided at `offset`, for example when resolving a615/// relocation.616fn write(&mut self, offset: u64, data: &[u8]);617618/// Completes this text section, filling out any final details, and returns619/// the bytes of the text section.620fn finish(&mut self, ctrl_plane: &mut ControlPlane) -> Vec<u8>;621}622623624