Path: blob/main/cranelift/codegen/src/isa/x64/abi.rs
3073 views
//! Implementation of the standard x64 ABI.12use crate::CodegenResult;3use crate::ir::{self, LibCall, MemFlags, Signature, TrapCode, types};4use crate::ir::{ExternalName, types::*};5use crate::isa;6use crate::isa::winch;7use crate::isa::{CallConv, unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings};8use crate::machinst::abi::*;9use crate::machinst::*;10use crate::settings;11use alloc::borrow::ToOwned;12use alloc::boxed::Box;13use alloc::vec::Vec;14use args::*;15use cranelift_assembler_x64 as asm;16use regalloc2::{MachineEnv, PReg, PRegSet};17use smallvec::{SmallVec, smallvec};18use std::sync::OnceLock;1920/// Support for the x64 ABI from the callee side (within a function body).21pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;2223/// Implementation of ABI primitives for x64.24pub struct X64ABIMachineSpec;2526impl X64ABIMachineSpec {27fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {28insts.reserve(probe_count as usize);29for _ in 0..probe_count {30// "Allocate" stack space for the probe by decrementing the stack pointer before31// the write. This is required to make valgrind happy.32// See: https://github.com/bytecodealliance/wasmtime/issues/745433insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));3435// Touch the current page by storing an immediate zero.36// mov [rsp], 037insts.push(Inst::External {38inst: asm::inst::movl_mi::new(Amode::imm_reg(0, regs::rsp()), 0i32.cast_unsigned())39.into(),40});41}4243// Restore the stack pointer to its original value44insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));45}4647fn gen_probestack_loop(48insts: &mut SmallInstVec<Inst>,49_call_conv: isa::CallConv,50frame_size: u32,51guard_size: u32,52) {53// We have to use a caller-saved register since clobbering only54// happens after stack probing.55// `r11` is caller saved on both Fastcall and SystemV, and not used56// for argument passing, so it's pretty much free. It is also not57// used by the stacklimit mechanism.58let tmp = regs::r11();59debug_assert!({60let real_reg = tmp.to_real_reg().unwrap();61!is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)62});6364insts.push(Inst::StackProbeLoop {65tmp: Writable::from_reg(tmp),66frame_size,67guard_size,68});69}70}7172impl IsaFlags for x64_settings::Flags {}7374impl ABIMachineSpec for X64ABIMachineSpec {75type I = Inst;7677type F = x64_settings::Flags;7879/// This is the limit for the size of argument and return-value areas on the80/// stack. We place a reasonable limit here to avoid integer overflow issues81/// with 32-bit arithmetic: for now, 128 MB.82const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;8384fn word_bits() -> u32 {856486}8788/// Return required stack alignment in bytes.89fn stack_align(_call_conv: isa::CallConv) -> u32 {901691}9293fn compute_arg_locs(94call_conv: isa::CallConv,95flags: &settings::Flags,96params: &[ir::AbiParam],97args_or_rets: ArgsOrRets,98add_ret_area_ptr: bool,99mut args: ArgsAccumulator,100) -> CodegenResult<(u32, Option<usize>)> {101let is_fastcall = call_conv == CallConv::WindowsFastcall;102let is_tail = call_conv == CallConv::Tail;103104let mut next_gpr = 0;105let mut next_vreg = 0;106let mut next_stack: u32 = 0;107let mut next_param_idx = 0; // Fastcall cares about overall param index108109if args_or_rets == ArgsOrRets::Args && is_fastcall {110// Fastcall always reserves 32 bytes of shadow space corresponding to111// the four initial in-arg parameters.112//113// (See:114// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)115next_stack = 32;116}117118let ret_area_ptr = if add_ret_area_ptr {119debug_assert_eq!(args_or_rets, ArgsOrRets::Args);120next_gpr += 1;121next_param_idx += 1;122// In the SystemV and WindowsFastcall ABIs, the return area pointer is the first123// argument. For the Tail and Winch ABIs we do the same for simplicity sake.124Some(ABIArg::reg(125get_intreg_for_arg(call_conv, 0, 0)126.unwrap()127.to_real_reg()128.unwrap(),129types::I64,130ir::ArgumentExtension::None,131ir::ArgumentPurpose::Normal,132))133} else {134None135};136137// If any param uses extension, the winch calling convention will not pack its results138// on the stack and will instead align them to 8-byte boundaries the same way that all the139// other calling conventions do. This isn't consistent with Winch itself, but is fine as140// Winch only uses this calling convention via trampolines, and those trampolines don't add141// extension annotations. Additionally, handling extension attributes this way allows clif142// functions that use them with the Winch calling convention to interact successfully with143// testing infrastructure.144// The results are also not packed if any of the types are `f16`. This is to simplify the145// implementation of `Inst::load`/`Inst::store` (which would otherwise require multiple146// instructions), and doesn't affect Winch itself as Winch doesn't support `f16` at all.147let uses_extension = params.iter().any(|p| {148p.extension != ir::ArgumentExtension::None149|| p.value_type == types::F16150|| p.value_type == types::I8X2151});152153for (ix, param) in params.iter().enumerate() {154let last_param = ix == params.len() - 1;155156if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {157let offset = next_stack as i64;158let size = size;159assert!(size % 8 == 0, "StructArgument size is not properly aligned");160next_stack += size;161args.push(ABIArg::StructArg {162offset,163size: size as u64,164purpose: param.purpose,165});166continue;167}168169// Find regclass(es) of the register(s) used to store a value of this type.170let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;171172// Now assign ABIArgSlots for each register-sized part.173//174// Note that the handling of `i128` values is unique here:175//176// - If `enable_llvm_abi_extensions` is set in the flags, each177// `i128` is split into two `i64`s and assigned exactly as if it178// were two consecutive 64-bit args, except that if one of the179// two halves is forced onto the stack, the other half is too.180// This is consistent with LLVM's behavior, and is needed for181// some uses of Cranelift (e.g., the rustc backend).182//183// - Otherwise, if the calling convention is Tail, we behave as in184// the previous case, even if `enable_llvm_abi_extensions` is not185// set in the flags: This is a custom calling convention defined186// by Cranelift, LLVM doesn't know about it.187//188// - Otherwise, both SysV and Fastcall specify behavior (use of189// vector register, a register pair, or passing by reference190// depending on the case), but for simplicity, we will just panic if191// an i128 type appears in a signature and the LLVM extensions flag192// is not set.193//194// For examples of how rustc compiles i128 args and return values on195// both SysV and Fastcall platforms, see:196// https://godbolt.org/z/PhG3ob197198if param.value_type.bits() > 64199&& !(param.value_type.is_vector() || param.value_type.is_float())200&& !flags.enable_llvm_abi_extensions()201&& !is_tail202{203panic!(204"i128 args/return values not supported unless LLVM ABI extensions are enabled"205);206}207// As MSVC doesn't support f16/f128 there is no standard way to pass/return them with208// the Windows ABI. LLVM passes/returns them in XMM registers.209if matches!(param.value_type, types::F16 | types::F128)210&& is_fastcall211&& !flags.enable_llvm_abi_extensions()212{213panic!(214"f16/f128 args/return values not supported for windows_fastcall unless LLVM ABI extensions are enabled"215);216}217218// Windows fastcall dictates that `__m128i` and `f128` parameters to219// a function are passed indirectly as pointers, so handle that as a220// special case before the loop below.221if (param.value_type.is_vector() || param.value_type.is_float())222&& param.value_type.bits() >= 128223&& args_or_rets == ArgsOrRets::Args224&& is_fastcall225{226let pointer = match get_intreg_for_arg(call_conv, next_gpr, next_param_idx) {227Some(reg) => {228next_gpr += 1;229ABIArgSlot::Reg {230reg: reg.to_real_reg().unwrap(),231ty: ir::types::I64,232extension: ir::ArgumentExtension::None,233}234}235236None => {237next_stack = align_to(next_stack, 8) + 8;238ABIArgSlot::Stack {239offset: (next_stack - 8) as i64,240ty: ir::types::I64,241extension: param.extension,242}243}244};245next_param_idx += 1;246args.push(ABIArg::ImplicitPtrArg {247// NB: this is filled in after this loop248offset: 0,249pointer,250ty: param.value_type,251purpose: param.purpose,252});253continue;254}255256// SystemV dictates that 128bit int parameters are always either257// passed in two registers or on the stack, so handle that as a258// special case before the loop below.259if param.value_type == types::I128260&& args_or_rets == ArgsOrRets::Args261&& call_conv == CallConv::SystemV262{263let mut slots = ABIArgSlotVec::new();264match (265get_intreg_for_arg(CallConv::SystemV, next_gpr, next_param_idx),266get_intreg_for_arg(CallConv::SystemV, next_gpr + 1, next_param_idx + 1),267) {268(Some(reg1), Some(reg2)) => {269slots.push(ABIArgSlot::Reg {270reg: reg1.to_real_reg().unwrap(),271ty: ir::types::I64,272extension: ir::ArgumentExtension::None,273});274slots.push(ABIArgSlot::Reg {275reg: reg2.to_real_reg().unwrap(),276ty: ir::types::I64,277extension: ir::ArgumentExtension::None,278});279}280_ => {281let size = 16;282283// Align.284next_stack = align_to(next_stack, size);285286slots.push(ABIArgSlot::Stack {287offset: next_stack as i64,288ty: ir::types::I64,289extension: param.extension,290});291slots.push(ABIArgSlot::Stack {292offset: next_stack as i64 + 8,293ty: ir::types::I64,294extension: param.extension,295});296next_stack += size;297}298};299// Unconditionally increment next_gpr even when storing the300// argument on the stack to prevent reusing a possibly301// remaining register for the next argument.302next_gpr += 2;303next_param_idx += 2;304305args.push(ABIArg::Slots {306slots,307purpose: param.purpose,308});309continue;310}311312let mut slots = ABIArgSlotVec::new();313for (ix, (rc, reg_ty)) in rcs.iter().zip(reg_tys.iter()).enumerate() {314let last_slot = last_param && ix == rcs.len() - 1;315316let intreg = *rc == RegClass::Int;317let nextreg = if intreg {318match args_or_rets {319ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr, next_param_idx),320ArgsOrRets::Rets => {321get_intreg_for_retval(call_conv, flags, next_gpr, last_slot)322}323}324} else {325match args_or_rets {326ArgsOrRets::Args => {327get_fltreg_for_arg(call_conv, next_vreg, next_param_idx)328}329ArgsOrRets::Rets => get_fltreg_for_retval(call_conv, next_vreg, last_slot),330}331};332next_param_idx += 1;333if let Some(reg) = nextreg {334if intreg {335next_gpr += 1;336} else {337next_vreg += 1;338}339slots.push(ABIArgSlot::Reg {340reg: reg.to_real_reg().unwrap(),341ty: *reg_ty,342extension: param.extension,343});344} else {345if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {346return Err(crate::CodegenError::Unsupported(347"Too many return values to fit in registers. \348Use a StructReturn argument instead. (#9510)"349.to_owned(),350));351}352353let size = reg_ty.bytes();354let size = if call_conv == CallConv::Winch355&& args_or_rets == ArgsOrRets::Rets356&& !uses_extension357{358size359} else {360let size = core::cmp::max(size, 8);361362// Align.363debug_assert!(size.is_power_of_two());364next_stack = align_to(next_stack, size);365size366};367368slots.push(ABIArgSlot::Stack {369offset: next_stack as i64,370ty: *reg_ty,371extension: param.extension,372});373next_stack += size;374}375}376377args.push(ABIArg::Slots {378slots,379purpose: param.purpose,380});381}382383// Fastcall's indirect 128+ bit vector arguments are all located on the384// stack, and stack space is reserved after all parameters are passed,385// so allocate from the space now.386if args_or_rets == ArgsOrRets::Args && is_fastcall {387for arg in args.args_mut() {388if let ABIArg::ImplicitPtrArg { offset, .. } = arg {389assert_eq!(*offset, 0);390next_stack = align_to(next_stack, 16);391*offset = next_stack as i64;392next_stack += 16;393}394}395}396let extra_arg_idx = if let Some(ret_area_ptr) = ret_area_ptr {397args.push_non_formal(ret_area_ptr);398Some(args.args().len() - 1)399} else {400None401};402403// Winch writes the first result to the highest offset, so we need to iterate through the404// args and adjust the offsets down.405if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {406winch::reverse_stack(args, next_stack, uses_extension);407}408409next_stack = align_to(next_stack, 16);410411Ok((next_stack, extra_arg_idx))412}413414fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {415// For integer-typed values, we always load a full 64 bits (and we always spill a full 64416// bits as well -- see `Inst::store()`).417let ty = match ty {418types::I8 | types::I16 | types::I32 => types::I64,419// Stack slots are always at least 8 bytes, so it's fine to load 4 bytes instead of only420// two.421types::F16 | types::I8X2 => types::F32,422_ => ty,423};424Inst::load(ty, mem, into_reg, ExtKind::None)425}426427fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {428let ty = match ty {429// See `gen_load_stack`.430types::F16 | types::I8X2 => types::F32,431_ => ty,432};433Inst::store(ty, from_reg, mem)434}435436fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {437Inst::gen_move(to_reg, from_reg, ty)438}439440/// Generate an integer-extend operation.441fn gen_extend(442to_reg: Writable<Reg>,443from_reg: Reg,444is_signed: bool,445from_bits: u8,446to_bits: u8,447) -> Self::I {448let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)449.unwrap_or_else(|| panic!("invalid extension: {from_bits} -> {to_bits}"));450if is_signed {451Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)452} else {453Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)454}455}456457fn gen_args(args: Vec<ArgPair>) -> Inst {458Inst::Args { args }459}460461fn gen_rets(rets: Vec<RetPair>) -> Inst {462Inst::Rets { rets }463}464465fn gen_add_imm(466_call_conv: isa::CallConv,467into_reg: Writable<Reg>,468from_reg: Reg,469imm: u32,470) -> SmallInstVec<Self::I> {471let mut ret = SmallVec::new();472if from_reg != into_reg.to_reg() {473ret.push(Inst::gen_move(into_reg, from_reg, I64));474}475let imm = i32::try_from(imm).expect("`imm` is too large to fit in a 32-bit immediate");476ret.push(Inst::addq_mi(into_reg, imm));477ret478}479480fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {481smallvec![482Inst::External {483inst: asm::inst::cmpq_rm::new(Gpr::unwrap_new(limit_reg), Gpr::RSP,).into(),484},485Inst::TrapIf {486// NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.487cc: CC::NBE,488trap_code: TrapCode::STACK_OVERFLOW,489},490]491}492493fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I {494let mem: SyntheticAmode = mem.into();495Inst::External {496inst: asm::inst::leaq_rm::new(into_reg, mem).into(),497}498}499500fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {501// As per comment on trait definition, we must return a caller-save502// register that is not used as an argument here.503debug_assert!(!is_callee_save_systemv(504regs::r10().to_real_reg().unwrap(),505false506));507regs::r10()508}509510fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {511// Only ever used for I64s, F128s and vectors; if that changes, see if512// the ExtKind below needs to be changed.513assert!(ty == I64 || ty.is_vector() || ty == F128);514let mem = Amode::imm_reg(offset, base);515Inst::load(ty, mem, into_reg, ExtKind::None)516}517518fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {519let ty = match ty {520// See `gen_load_stack`.521types::F16 | types::I8X2 => types::F32,522_ => ty,523};524let mem = Amode::imm_reg(offset, base);525Inst::store(ty, from_reg, mem)526}527528fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {529let rsp = Writable::from_reg(regs::rsp());530let inst = if amount >= 0 {531Inst::addq_mi(rsp, amount)532} else {533Inst::subq_mi(rsp, -amount)534};535smallvec![inst]536}537538fn gen_prologue_frame_setup(539_call_conv: isa::CallConv,540flags: &settings::Flags,541_isa_flags: &x64_settings::Flags,542frame_layout: &FrameLayout,543) -> SmallInstVec<Self::I> {544let r_rsp = Gpr::RSP;545let r_rbp = Gpr::RBP;546let w_rbp = Writable::from_reg(r_rbp);547let mut insts = SmallVec::new();548// `push %rbp`549// RSP before the call will be 0 % 16. So here, it is 8 % 16.550insts.push(Inst::External {551inst: asm::inst::pushq_o::new(r_rbp).into(),552});553554if flags.unwind_info() {555insts.push(Inst::Unwind {556inst: UnwindInst::PushFrameRegs {557offset_upward_to_caller_sp: frame_layout.setup_area_size,558},559});560}561562// `mov %rsp, %rbp`563// RSP is now 0 % 16564insts.push(Inst::External {565inst: asm::inst::movq_mr::new(w_rbp, r_rsp).into(),566});567568insts569}570571fn gen_epilogue_frame_restore(572_call_conv: isa::CallConv,573_flags: &settings::Flags,574_isa_flags: &x64_settings::Flags,575_frame_layout: &FrameLayout,576) -> SmallInstVec<Self::I> {577let rbp = Gpr::RBP;578let rsp = Gpr::RSP;579580let mut insts = SmallVec::new();581// `mov %rbp, %rsp`582insts.push(Inst::External {583inst: asm::inst::movq_mr::new(Writable::from_reg(rsp), rbp).into(),584});585// `pop %rbp`586insts.push(Inst::External {587inst: asm::inst::popq_o::new(Writable::from_reg(rbp)).into(),588});589insts590}591592fn gen_return(593call_conv: CallConv,594_isa_flags: &x64_settings::Flags,595frame_layout: &FrameLayout,596) -> SmallInstVec<Self::I> {597// Emit return instruction.598let stack_bytes_to_pop = if call_conv == CallConv::Tail {599frame_layout.tail_args_size600} else {6010602};603let inst = if stack_bytes_to_pop == 0 {604asm::inst::retq_zo::new().into()605} else {606let stack_bytes_to_pop = u16::try_from(stack_bytes_to_pop).unwrap();607asm::inst::retq_i::new(stack_bytes_to_pop).into()608};609smallvec![Inst::External { inst }]610}611612fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {613insts.push(Inst::imm(614OperandSize::Size32,615frame_size as u64,616Writable::from_reg(regs::rax()),617));618insts.push(Inst::CallKnown {619// No need to include arg here: we are post-regalloc620// so no constraints will be seen anyway.621info: Box::new(CallInfo::empty(622ExternalName::LibCall(LibCall::Probestack),623CallConv::Probestack,624)),625});626}627628fn gen_inline_probestack(629insts: &mut SmallInstVec<Self::I>,630call_conv: isa::CallConv,631frame_size: u32,632guard_size: u32,633) {634// Unroll at most n consecutive probes, before falling back to using a loop635//636// This was number was picked because the loop version is 38 bytes long. We can fit637// 4 inline probes in that space, so unroll if its beneficial in terms of code size.638const PROBE_MAX_UNROLL: u32 = 4;639640// Calculate how many probes we need to perform. Round down, as we only641// need to probe whole guard_size regions we'd otherwise skip over.642let probe_count = frame_size / guard_size;643if probe_count == 0 {644// No probe necessary645} else if probe_count <= PROBE_MAX_UNROLL {646Self::gen_probestack_unroll(insts, guard_size, probe_count)647} else {648Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)649}650}651652fn gen_clobber_save(653_call_conv: isa::CallConv,654flags: &settings::Flags,655frame_layout: &FrameLayout,656) -> SmallVec<[Self::I; 16]> {657let mut insts = SmallVec::new();658659// When a return_call within this function required more stack arguments than we have660// present, resize the incoming argument area of the frame to accommodate those arguments.661let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;662if incoming_args_diff > 0 {663// Decrement the stack pointer to make space for the new arguments.664let rsp = Writable::from_reg(regs::rsp());665insts.push(Inst::subq_mi(666rsp,667i32::try_from(incoming_args_diff)668.expect("`incoming_args_diff` is too large to fit in a 32-bit immediate"),669));670671// Make sure to keep the frame pointer and stack pointer in sync at672// this point.673let rbp = Gpr::RBP;674let rsp = Gpr::RSP;675insts.push(Inst::External {676inst: asm::inst::movq_mr::new(Writable::from_reg(rbp), rsp).into(),677});678679let incoming_args_diff = i32::try_from(incoming_args_diff).unwrap();680681// Move the saved frame pointer down by `incoming_args_diff`.682let addr = Amode::imm_reg(incoming_args_diff, regs::rsp());683let r11 = Writable::from_reg(Gpr::R11);684let inst = asm::inst::movq_rm::new(r11, addr).into();685insts.push(Inst::External { inst });686let inst = asm::inst::movq_mr::new(Amode::imm_reg(0, regs::rsp()), r11.to_reg()).into();687insts.push(Inst::External { inst });688689// Move the saved return address down by `incoming_args_diff`.690let addr = Amode::imm_reg(incoming_args_diff + 8, regs::rsp());691let inst = asm::inst::movq_rm::new(r11, addr).into();692insts.push(Inst::External { inst });693let inst = asm::inst::movq_mr::new(Amode::imm_reg(8, regs::rsp()), r11.to_reg()).into();694insts.push(Inst::External { inst });695}696697// We need to factor `incoming_args_diff` into the offset upward here, as we have grown698// the argument area -- `setup_area_size` alone will not be the correct offset up to the699// original caller's SP.700let offset_upward_to_caller_sp = frame_layout.setup_area_size + incoming_args_diff;701if flags.unwind_info() && offset_upward_to_caller_sp > 0 {702// Emit unwind info: start the frame. The frame (from unwind703// consumers' point of view) starts at clobbbers, just below704// the FP and return address. Spill slots and stack slots are705// part of our actual frame but do not concern the unwinder.706insts.push(Inst::Unwind {707inst: UnwindInst::DefineNewFrame {708offset_downward_to_clobbers: frame_layout.clobber_size,709offset_upward_to_caller_sp,710},711});712}713714// Adjust the stack pointer downward for clobbers and the function fixed715// frame (spillslots, storage slots, and argument area).716let stack_size = frame_layout.fixed_frame_storage_size717+ frame_layout.clobber_size718+ frame_layout.outgoing_args_size;719if stack_size > 0 {720let rsp = Writable::from_reg(regs::rsp());721let stack_size = i32::try_from(stack_size)722.expect("`stack_size` is too large to fit in a 32-bit immediate");723insts.push(Inst::subq_mi(rsp, stack_size));724}725726// Store each clobbered register in order at offsets from RSP,727// placing them above the fixed frame slots.728let clobber_offset =729frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;730let mut cur_offset = 0;731for reg in &frame_layout.clobbered_callee_saves {732let r_reg = reg.to_reg();733let ty = match r_reg.class() {734RegClass::Int => types::I64,735RegClass::Float => types::I8X16,736RegClass::Vector => unreachable!(),737};738739// Align to 8 or 16 bytes as required by the storage type of the clobber.740cur_offset = align_to(cur_offset, ty.bytes());741let off = cur_offset;742cur_offset += ty.bytes();743744insts.push(Inst::store(745ty,746r_reg.into(),747Amode::imm_reg(i32::try_from(off + clobber_offset).unwrap(), regs::rsp()),748));749750if flags.unwind_info() {751insts.push(Inst::Unwind {752inst: UnwindInst::SaveReg {753clobber_offset: off,754reg: r_reg,755},756});757}758}759760insts761}762763fn gen_clobber_restore(764_call_conv: isa::CallConv,765_flags: &settings::Flags,766frame_layout: &FrameLayout,767) -> SmallVec<[Self::I; 16]> {768let mut insts = SmallVec::new();769770// Restore regs by loading from offsets of RSP. We compute the offset from771// the same base as above in clobber_save, as RSP won't change between the772// prologue and epilogue.773let mut cur_offset =774frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;775for reg in &frame_layout.clobbered_callee_saves {776let rreg = reg.to_reg();777let ty = match rreg.class() {778RegClass::Int => types::I64,779RegClass::Float => types::I8X16,780RegClass::Vector => unreachable!(),781};782783// Align to 8 or 16 bytes as required by the storage type of the clobber.784cur_offset = align_to(cur_offset, ty.bytes());785786insts.push(Inst::load(787ty,788Amode::imm_reg(cur_offset.try_into().unwrap(), regs::rsp()),789Writable::from_reg(rreg.into()),790ExtKind::None,791));792793cur_offset += ty.bytes();794}795796let stack_size = frame_layout.fixed_frame_storage_size797+ frame_layout.clobber_size798+ frame_layout.outgoing_args_size;799800// Adjust RSP back upward.801if stack_size > 0 {802let rsp = Writable::from_reg(regs::rsp());803let stack_size = i32::try_from(stack_size)804.expect("`stack_size` is too large to fit in a 32-bit immediate");805insts.push(Inst::addq_mi(rsp, stack_size));806}807808insts809}810811fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(812call_conv: isa::CallConv,813dst: Reg,814src: Reg,815size: usize,816mut alloc_tmp: F,817) -> SmallVec<[Self::I; 8]> {818let mut insts = SmallVec::new();819let arg0 = get_intreg_for_arg(call_conv, 0, 0).unwrap();820let arg1 = get_intreg_for_arg(call_conv, 1, 1).unwrap();821let arg2 = get_intreg_for_arg(call_conv, 2, 2).unwrap();822let temp = alloc_tmp(Self::word_type());823let temp2 = alloc_tmp(Self::word_type());824insts.push(Inst::imm(OperandSize::Size64, size as u64, temp));825// We use an indirect call and a full LoadExtName because we do not have826// information about the libcall `RelocDistance` here, so we827// conservatively use the more flexible calling sequence.828insts.push(Inst::LoadExtName {829dst: temp2.map(Gpr::unwrap_new),830name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),831offset: 0,832distance: RelocDistance::Far,833});834let callee_pop_size = 0;835insts.push(Inst::call_unknown(Box::new(CallInfo {836dest: RegMem::reg(temp2.to_reg()),837uses: smallvec![838CallArgPair {839vreg: dst,840preg: arg0841},842CallArgPair {843vreg: src,844preg: arg1845},846CallArgPair {847vreg: temp.to_reg(),848preg: arg2849},850],851defs: smallvec![],852clobbers: Self::get_regs_clobbered_by_call(call_conv, false),853callee_pop_size,854callee_conv: call_conv,855caller_conv: call_conv,856try_call_info: None,857patchable: false,858})));859insts860}861862fn get_number_of_spillslots_for_value(863rc: RegClass,864vector_scale: u32,865_isa_flags: &Self::F,866) -> u32 {867// We allocate in terms of 8-byte slots.868match rc {869RegClass::Int => 1,870RegClass::Float => vector_scale / 8,871RegClass::Vector => unreachable!(),872}873}874875fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {876if flags.enable_pinned_reg() {877static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();878MACHINE_ENV.get_or_init(|| create_reg_env_systemv(true))879} else {880static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();881MACHINE_ENV.get_or_init(|| create_reg_env_systemv(false))882}883}884885fn get_regs_clobbered_by_call(886call_conv_of_callee: isa::CallConv,887is_exception: bool,888) -> PRegSet {889match (call_conv_of_callee, is_exception) {890(isa::CallConv::Tail, true) => ALL_CLOBBERS,891// Note that "PreserveAll" actually preserves nothing at892// the callsite if used for a `try_call`, because the893// unwinder ABI for `try_call`s is still "no clobbered894// register restores" for this ABI (so as to work with895// Wasmtime).896(isa::CallConv::PreserveAll, true) => ALL_CLOBBERS,897(isa::CallConv::Winch, _) => ALL_CLOBBERS,898(isa::CallConv::SystemV, _) => SYSV_CLOBBERS,899(isa::CallConv::WindowsFastcall, false) => WINDOWS_CLOBBERS,900(isa::CallConv::PreserveAll, _) => NO_CLOBBERS,901(_, false) => SYSV_CLOBBERS,902(call_conv, true) => panic!("unimplemented clobbers for exn abi of {call_conv:?}"),903}904}905906fn get_ext_mode(907_call_conv: isa::CallConv,908specified: ir::ArgumentExtension,909) -> ir::ArgumentExtension {910specified911}912913fn compute_frame_layout(914call_conv: CallConv,915flags: &settings::Flags,916_sig: &Signature,917regs: &[Writable<RealReg>],918function_calls: FunctionCalls,919incoming_args_size: u32,920tail_args_size: u32,921stackslots_size: u32,922fixed_frame_storage_size: u32,923outgoing_args_size: u32,924) -> FrameLayout {925debug_assert!(tail_args_size >= incoming_args_size);926927let mut regs: Vec<Writable<RealReg>> = match call_conv {928// The `winch` calling convention doesn't have any callee-save929// registers.930CallConv::Winch => vec![],931CallConv::Fast | CallConv::SystemV | CallConv::Tail => regs932.iter()933.cloned()934.filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))935.collect(),936CallConv::WindowsFastcall => regs937.iter()938.cloned()939.filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))940.collect(),941// The `preserve_all` calling convention makes every reg a callee-save reg.942CallConv::PreserveAll => regs.iter().cloned().collect(),943CallConv::Probestack => todo!("probestack?"),944CallConv::AppleAarch64 => unreachable!(),945};946// Sort registers for deterministic code output. We can do an unstable sort because the947// registers will be unique (there are no dups).948regs.sort_unstable();949950// Compute clobber size.951let clobber_size = compute_clobber_size(®s);952953// Compute setup area size.954let setup_area_size = 16; // RBP, return address955956// Return FrameLayout structure.957FrameLayout {958word_bytes: 8,959incoming_args_size,960tail_args_size: align_to(tail_args_size, 16),961setup_area_size,962clobber_size,963fixed_frame_storage_size,964stackslots_size,965outgoing_args_size,966clobbered_callee_saves: regs,967function_calls,968}969}970971fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {972// Use r11 as a temp: clobbered anyway, and973// not otherwise used as a return value in any of our974// supported calling conventions.975Writable::from_reg(regs::r11())976}977978fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {979const PAYLOAD_REGS: &'static [Reg] = &[regs::rax(), regs::rdx()];980match call_conv {981isa::CallConv::SystemV | isa::CallConv::Tail | isa::CallConv::PreserveAll => {982PAYLOAD_REGS983}984_ => &[],985}986}987}988989impl From<StackAMode> for SyntheticAmode {990fn from(amode: StackAMode) -> Self {991// We enforce a 128 MB stack-frame size limit above, so these992// `expect()`s should never fail.993match amode {994StackAMode::IncomingArg(off, stack_args_size) => {995let offset = u32::try_from(off).expect(996"Offset in IncomingArg is greater than 4GB; should hit impl limit first",997);998SyntheticAmode::IncomingArg {999offset: stack_args_size - offset,1000}1001}1002StackAMode::Slot(off) => {1003let off = i32::try_from(off)1004.expect("Offset in Slot is greater than 2GB; should hit impl limit first");1005SyntheticAmode::slot_offset(off)1006}1007StackAMode::OutgoingArg(off) => {1008let off = i32::try_from(off).expect(1009"Offset in OutgoingArg is greater than 2GB; should hit impl limit first",1010);1011SyntheticAmode::Real(Amode::ImmReg {1012simm32: off,1013base: regs::rsp(),1014flags: MemFlags::trusted(),1015})1016}1017}1018}1019}10201021fn get_intreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {1022let is_fastcall = call_conv == CallConv::WindowsFastcall;10231024// Fastcall counts by absolute argument number; SysV counts by argument of1025// this (integer) class.1026let i = if is_fastcall { arg_idx } else { idx };1027match (i, is_fastcall) {1028(0, false) => Some(regs::rdi()),1029(1, false) => Some(regs::rsi()),1030(2, false) => Some(regs::rdx()),1031(3, false) => Some(regs::rcx()),1032(4, false) => Some(regs::r8()),1033(5, false) => Some(regs::r9()),1034(0, true) => Some(regs::rcx()),1035(1, true) => Some(regs::rdx()),1036(2, true) => Some(regs::r8()),1037(3, true) => Some(regs::r9()),1038_ => None,1039}1040}10411042fn get_fltreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {1043let is_fastcall = call_conv == CallConv::WindowsFastcall;10441045// Fastcall counts by absolute argument number; SysV counts by argument of1046// this (floating-point) class.1047let i = if is_fastcall { arg_idx } else { idx };1048match (i, is_fastcall) {1049(0, false) => Some(regs::xmm0()),1050(1, false) => Some(regs::xmm1()),1051(2, false) => Some(regs::xmm2()),1052(3, false) => Some(regs::xmm3()),1053(4, false) => Some(regs::xmm4()),1054(5, false) => Some(regs::xmm5()),1055(6, false) => Some(regs::xmm6()),1056(7, false) => Some(regs::xmm7()),1057(0, true) => Some(regs::xmm0()),1058(1, true) => Some(regs::xmm1()),1059(2, true) => Some(regs::xmm2()),1060(3, true) => Some(regs::xmm3()),1061_ => None,1062}1063}10641065fn get_intreg_for_retval(1066call_conv: CallConv,1067flags: &settings::Flags,1068intreg_idx: usize,1069is_last: bool,1070) -> Option<Reg> {1071match call_conv {1072CallConv::Tail => match intreg_idx {10730 => Some(regs::rax()),10741 => Some(regs::rcx()),10752 => Some(regs::rdx()),10763 => Some(regs::rsi()),10774 => Some(regs::rdi()),10785 => Some(regs::r8()),10796 => Some(regs::r9()),10807 => Some(regs::r10()),1081// NB: `r11` is reserved as a scratch register that is1082// also part of the clobber set.1083// NB: `r15` is reserved as a scratch register.1084_ => None,1085},1086CallConv::Fast | CallConv::SystemV | CallConv::PreserveAll => match intreg_idx {10870 => Some(regs::rax()),10881 => Some(regs::rdx()),10892 if flags.enable_llvm_abi_extensions() => Some(regs::rcx()),1090_ => None,1091},1092CallConv::WindowsFastcall => match intreg_idx {10930 => Some(regs::rax()),10941 => Some(regs::rdx()), // The Rust ABI for i128s needs this.1095_ => None,1096},10971098CallConv::Winch => is_last.then(|| regs::rax()),1099CallConv::Probestack => todo!(),1100CallConv::AppleAarch64 => unreachable!(),1101}1102}11031104fn get_fltreg_for_retval(call_conv: CallConv, fltreg_idx: usize, is_last: bool) -> Option<Reg> {1105match call_conv {1106CallConv::Tail => match fltreg_idx {11070 => Some(regs::xmm0()),11081 => Some(regs::xmm1()),11092 => Some(regs::xmm2()),11103 => Some(regs::xmm3()),11114 => Some(regs::xmm4()),11125 => Some(regs::xmm5()),11136 => Some(regs::xmm6()),11147 => Some(regs::xmm7()),1115_ => None,1116},1117CallConv::Fast | CallConv::SystemV | CallConv::PreserveAll => match fltreg_idx {11180 => Some(regs::xmm0()),11191 => Some(regs::xmm1()),1120_ => None,1121},1122CallConv::WindowsFastcall => match fltreg_idx {11230 => Some(regs::xmm0()),1124_ => None,1125},1126CallConv::Winch => is_last.then(|| regs::xmm0()),1127CallConv::Probestack => todo!(),1128CallConv::AppleAarch64 => unreachable!(),1129}1130}11311132fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {1133use asm::gpr::enc::*;11341135match r.class() {1136RegClass::Int => match r.hw_enc() {1137RBX | RBP | R12 | R13 | R14 => true,1138// R15 is the pinned register; if we're using it that way,1139// it is effectively globally-allocated, and is not1140// callee-saved.1141R15 => !enable_pinned_reg,1142_ => false,1143},1144RegClass::Float => false,1145RegClass::Vector => unreachable!(),1146}1147}11481149fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {1150use asm::gpr::enc::*;1151use asm::xmm::enc::*;11521153match r.class() {1154RegClass::Int => match r.hw_enc() {1155RBX | RBP | RSI | RDI | R12 | R13 | R14 => true,1156// See above for SysV: we must treat the pinned reg specially.1157R15 => !enable_pinned_reg,1158_ => false,1159},1160RegClass::Float => match r.hw_enc() {1161XMM6 | XMM7 | XMM8 | XMM9 | XMM10 | XMM11 | XMM12 | XMM13 | XMM14 | XMM15 => true,1162_ => false,1163},1164RegClass::Vector => unreachable!(),1165}1166}11671168fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {1169let mut clobbered_size = 0;1170for reg in clobbers {1171match reg.to_reg().class() {1172RegClass::Int => {1173clobbered_size += 8;1174}1175RegClass::Float => {1176clobbered_size = align_to(clobbered_size, 16);1177clobbered_size += 16;1178}1179RegClass::Vector => unreachable!(),1180}1181}1182align_to(clobbered_size, 16)1183}11841185const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();1186const SYSV_CLOBBERS: PRegSet = sysv_clobbers();1187pub(crate) const ALL_CLOBBERS: PRegSet = all_clobbers();1188const NO_CLOBBERS: PRegSet = PRegSet::empty();11891190const fn windows_clobbers() -> PRegSet {1191use asm::gpr::enc::*;1192use asm::xmm::enc::*;11931194PRegSet::empty()1195.with(regs::gpr_preg(RAX))1196.with(regs::gpr_preg(RCX))1197.with(regs::gpr_preg(RDX))1198.with(regs::gpr_preg(R8))1199.with(regs::gpr_preg(R9))1200.with(regs::gpr_preg(R10))1201.with(regs::gpr_preg(R11))1202.with(regs::fpr_preg(XMM0))1203.with(regs::fpr_preg(XMM1))1204.with(regs::fpr_preg(XMM2))1205.with(regs::fpr_preg(XMM3))1206.with(regs::fpr_preg(XMM4))1207.with(regs::fpr_preg(XMM5))1208}12091210const fn sysv_clobbers() -> PRegSet {1211use asm::gpr::enc::*;1212use asm::xmm::enc::*;12131214PRegSet::empty()1215.with(regs::gpr_preg(RAX))1216.with(regs::gpr_preg(RCX))1217.with(regs::gpr_preg(RDX))1218.with(regs::gpr_preg(RSI))1219.with(regs::gpr_preg(RDI))1220.with(regs::gpr_preg(R8))1221.with(regs::gpr_preg(R9))1222.with(regs::gpr_preg(R10))1223.with(regs::gpr_preg(R11))1224.with(regs::fpr_preg(XMM0))1225.with(regs::fpr_preg(XMM1))1226.with(regs::fpr_preg(XMM2))1227.with(regs::fpr_preg(XMM3))1228.with(regs::fpr_preg(XMM4))1229.with(regs::fpr_preg(XMM5))1230.with(regs::fpr_preg(XMM6))1231.with(regs::fpr_preg(XMM7))1232.with(regs::fpr_preg(XMM8))1233.with(regs::fpr_preg(XMM9))1234.with(regs::fpr_preg(XMM10))1235.with(regs::fpr_preg(XMM11))1236.with(regs::fpr_preg(XMM12))1237.with(regs::fpr_preg(XMM13))1238.with(regs::fpr_preg(XMM14))1239.with(regs::fpr_preg(XMM15))1240}12411242/// For calling conventions that clobber all registers.1243const fn all_clobbers() -> PRegSet {1244use asm::gpr::enc::*;1245use asm::xmm::enc::*;12461247PRegSet::empty()1248.with(regs::gpr_preg(RAX))1249.with(regs::gpr_preg(RCX))1250.with(regs::gpr_preg(RDX))1251.with(regs::gpr_preg(RBX))1252.with(regs::gpr_preg(RSI))1253.with(regs::gpr_preg(RDI))1254.with(regs::gpr_preg(R8))1255.with(regs::gpr_preg(R9))1256.with(regs::gpr_preg(R10))1257.with(regs::gpr_preg(R11))1258.with(regs::gpr_preg(R12))1259.with(regs::gpr_preg(R13))1260.with(regs::gpr_preg(R14))1261.with(regs::gpr_preg(R15))1262.with(regs::fpr_preg(XMM0))1263.with(regs::fpr_preg(XMM1))1264.with(regs::fpr_preg(XMM2))1265.with(regs::fpr_preg(XMM3))1266.with(regs::fpr_preg(XMM4))1267.with(regs::fpr_preg(XMM5))1268.with(regs::fpr_preg(XMM6))1269.with(regs::fpr_preg(XMM7))1270.with(regs::fpr_preg(XMM8))1271.with(regs::fpr_preg(XMM9))1272.with(regs::fpr_preg(XMM10))1273.with(regs::fpr_preg(XMM11))1274.with(regs::fpr_preg(XMM12))1275.with(regs::fpr_preg(XMM13))1276.with(regs::fpr_preg(XMM14))1277.with(regs::fpr_preg(XMM15))1278}12791280fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv {1281fn preg(r: Reg) -> PReg {1282r.to_real_reg().unwrap().into()1283}12841285let mut env = MachineEnv {1286preferred_regs_by_class: [1287// Preferred GPRs: caller-saved in the SysV ABI.1288vec![1289preg(regs::rsi()),1290preg(regs::rdi()),1291preg(regs::rax()),1292preg(regs::rcx()),1293preg(regs::rdx()),1294preg(regs::r8()),1295preg(regs::r9()),1296preg(regs::r10()),1297preg(regs::r11()),1298],1299// Preferred XMMs: the first 8, which can have smaller encodings1300// with AVX instructions.1301vec![1302preg(regs::xmm0()),1303preg(regs::xmm1()),1304preg(regs::xmm2()),1305preg(regs::xmm3()),1306preg(regs::xmm4()),1307preg(regs::xmm5()),1308preg(regs::xmm6()),1309preg(regs::xmm7()),1310],1311// The Vector Regclass is unused1312vec![],1313],1314non_preferred_regs_by_class: [1315// Non-preferred GPRs: callee-saved in the SysV ABI.1316vec![1317preg(regs::rbx()),1318preg(regs::r12()),1319preg(regs::r13()),1320preg(regs::r14()),1321],1322// Non-preferred XMMs: the last 8 registers, which can have larger1323// encodings with AVX instructions.1324vec![1325preg(regs::xmm8()),1326preg(regs::xmm9()),1327preg(regs::xmm10()),1328preg(regs::xmm11()),1329preg(regs::xmm12()),1330preg(regs::xmm13()),1331preg(regs::xmm14()),1332preg(regs::xmm15()),1333],1334// The Vector Regclass is unused1335vec![],1336],1337fixed_stack_slots: vec![],1338scratch_by_class: [None, None, None],1339};13401341debug_assert_eq!(regs::r15(), regs::pinned_reg());1342if !enable_pinned_reg {1343env.non_preferred_regs_by_class[0].push(preg(regs::r15()));1344}13451346env1347}134813491350