Path: blob/main/cranelift/codegen/src/isa/x64/abi.rs
1693 views
//! Implementation of the standard x64 ABI.12use crate::CodegenResult;3use crate::ir::{self, LibCall, MemFlags, Signature, TrapCode, types};4use crate::ir::{ExternalName, types::*};5use crate::isa;6use crate::isa::winch;7use crate::isa::{CallConv, unwind::UnwindInst, x64::inst::*, x64::settings as x64_settings};8use crate::machinst::abi::*;9use crate::machinst::*;10use crate::settings;11use alloc::boxed::Box;12use alloc::vec::Vec;13use args::*;14use cranelift_assembler_x64 as asm;15use regalloc2::{MachineEnv, PReg, PRegSet};16use smallvec::{SmallVec, smallvec};17use std::borrow::ToOwned;18use std::sync::OnceLock;1920/// Support for the x64 ABI from the callee side (within a function body).21pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;2223/// Implementation of ABI primitives for x64.24pub struct X64ABIMachineSpec;2526impl X64ABIMachineSpec {27fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {28insts.reserve(probe_count as usize);29for _ in 0..probe_count {30// "Allocate" stack space for the probe by decrementing the stack pointer before31// the write. This is required to make valgrind happy.32// See: https://github.com/bytecodealliance/wasmtime/issues/745433insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));3435// TODO: It would be nice if we could store the imm 0, but we don't have insts for those36// so store the stack pointer. Any register will do, since the stack is undefined at this point37insts.push(Inst::store(38I32,39regs::rsp(),40Amode::imm_reg(0, regs::rsp()),41));42}4344// Restore the stack pointer to its original value45insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));46}4748fn gen_probestack_loop(49insts: &mut SmallInstVec<Inst>,50_call_conv: isa::CallConv,51frame_size: u32,52guard_size: u32,53) {54// We have to use a caller-saved register since clobbering only55// happens after stack probing.56// `r11` is caller saved on both Fastcall and SystemV, and not used57// for argument passing, so it's pretty much free. It is also not58// used by the stacklimit mechanism.59let tmp = regs::r11();60debug_assert!({61let real_reg = tmp.to_real_reg().unwrap();62!is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)63});6465insts.push(Inst::StackProbeLoop {66tmp: Writable::from_reg(tmp),67frame_size,68guard_size,69});70}71}7273impl IsaFlags for x64_settings::Flags {}7475impl ABIMachineSpec for X64ABIMachineSpec {76type I = Inst;7778type F = x64_settings::Flags;7980/// This is the limit for the size of argument and return-value areas on the81/// stack. We place a reasonable limit here to avoid integer overflow issues82/// with 32-bit arithmetic: for now, 128 MB.83const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;8485fn word_bits() -> u32 {866487}8889/// Return required stack alignment in bytes.90fn stack_align(_call_conv: isa::CallConv) -> u32 {911692}9394fn compute_arg_locs(95call_conv: isa::CallConv,96flags: &settings::Flags,97params: &[ir::AbiParam],98args_or_rets: ArgsOrRets,99add_ret_area_ptr: bool,100mut args: ArgsAccumulator,101) -> CodegenResult<(u32, Option<usize>)> {102let is_fastcall = call_conv == CallConv::WindowsFastcall;103let is_tail = call_conv == CallConv::Tail;104105let mut next_gpr = 0;106let mut next_vreg = 0;107let mut next_stack: u32 = 0;108let mut next_param_idx = 0; // Fastcall cares about overall param index109110if args_or_rets == ArgsOrRets::Args && is_fastcall {111// Fastcall always reserves 32 bytes of shadow space corresponding to112// the four initial in-arg parameters.113//114// (See:115// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170)116next_stack = 32;117}118119let ret_area_ptr = if add_ret_area_ptr {120debug_assert_eq!(args_or_rets, ArgsOrRets::Args);121next_gpr += 1;122next_param_idx += 1;123// In the SystemV and WindowsFastcall ABIs, the return area pointer is the first124// argument. For the Tail and Winch ABIs we do the same for simplicity sake.125Some(ABIArg::reg(126get_intreg_for_arg(call_conv, 0, 0)127.unwrap()128.to_real_reg()129.unwrap(),130types::I64,131ir::ArgumentExtension::None,132ir::ArgumentPurpose::Normal,133))134} else {135None136};137138// If any param uses extension, the winch calling convention will not pack its results139// on the stack and will instead align them to 8-byte boundaries the same way that all the140// other calling conventions do. This isn't consistent with Winch itself, but is fine as141// Winch only uses this calling convention via trampolines, and those trampolines don't add142// extension annotations. Additionally, handling extension attributes this way allows clif143// functions that use them with the Winch calling convention to interact successfully with144// testing infrastructure.145// The results are also not packed if any of the types are `f16`. This is to simplify the146// implementation of `Inst::load`/`Inst::store` (which would otherwise require multiple147// instructions), and doesn't affect Winch itself as Winch doesn't support `f16` at all.148let uses_extension = params.iter().any(|p| {149p.extension != ir::ArgumentExtension::None150|| p.value_type == types::F16151|| p.value_type == types::I8X2152});153154for (ix, param) in params.iter().enumerate() {155let last_param = ix == params.len() - 1;156157if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {158let offset = next_stack as i64;159let size = size;160assert!(size % 8 == 0, "StructArgument size is not properly aligned");161next_stack += size;162args.push(ABIArg::StructArg {163offset,164size: size as u64,165purpose: param.purpose,166});167continue;168}169170// Find regclass(es) of the register(s) used to store a value of this type.171let (rcs, reg_tys) = Inst::rc_for_type(param.value_type)?;172173// Now assign ABIArgSlots for each register-sized part.174//175// Note that the handling of `i128` values is unique here:176//177// - If `enable_llvm_abi_extensions` is set in the flags, each178// `i128` is split into two `i64`s and assigned exactly as if it179// were two consecutive 64-bit args, except that if one of the180// two halves is forced onto the stack, the other half is too.181// This is consistent with LLVM's behavior, and is needed for182// some uses of Cranelift (e.g., the rustc backend).183//184// - Otherwise, if the calling convention is Tail, we behave as in185// the previous case, even if `enable_llvm_abi_extensions` is not186// set in the flags: This is a custom calling convention defined187// by Cranelift, LLVM doesn't know about it.188//189// - Otherwise, both SysV and Fastcall specify behavior (use of190// vector register, a register pair, or passing by reference191// depending on the case), but for simplicity, we will just panic if192// an i128 type appears in a signature and the LLVM extensions flag193// is not set.194//195// For examples of how rustc compiles i128 args and return values on196// both SysV and Fastcall platforms, see:197// https://godbolt.org/z/PhG3ob198199if param.value_type.bits() > 64200&& !(param.value_type.is_vector() || param.value_type.is_float())201&& !flags.enable_llvm_abi_extensions()202&& !is_tail203{204panic!(205"i128 args/return values not supported unless LLVM ABI extensions are enabled"206);207}208// As MSVC doesn't support f16/f128 there is no standard way to pass/return them with209// the Windows ABI. LLVM passes/returns them in XMM registers.210if matches!(param.value_type, types::F16 | types::F128)211&& is_fastcall212&& !flags.enable_llvm_abi_extensions()213{214panic!(215"f16/f128 args/return values not supported for windows_fastcall unless LLVM ABI extensions are enabled"216);217}218219// Windows fastcall dictates that `__m128i` and `f128` parameters to220// a function are passed indirectly as pointers, so handle that as a221// special case before the loop below.222if (param.value_type.is_vector() || param.value_type.is_float())223&& param.value_type.bits() >= 128224&& args_or_rets == ArgsOrRets::Args225&& is_fastcall226{227let pointer = match get_intreg_for_arg(call_conv, next_gpr, next_param_idx) {228Some(reg) => {229next_gpr += 1;230ABIArgSlot::Reg {231reg: reg.to_real_reg().unwrap(),232ty: ir::types::I64,233extension: ir::ArgumentExtension::None,234}235}236237None => {238next_stack = align_to(next_stack, 8) + 8;239ABIArgSlot::Stack {240offset: (next_stack - 8) as i64,241ty: ir::types::I64,242extension: param.extension,243}244}245};246next_param_idx += 1;247args.push(ABIArg::ImplicitPtrArg {248// NB: this is filled in after this loop249offset: 0,250pointer,251ty: param.value_type,252purpose: param.purpose,253});254continue;255}256257// SystemV dictates that 128bit int parameters are always either258// passed in two registers or on the stack, so handle that as a259// special case before the loop below.260if param.value_type == types::I128261&& args_or_rets == ArgsOrRets::Args262&& call_conv == CallConv::SystemV263{264let mut slots = ABIArgSlotVec::new();265match (266get_intreg_for_arg(CallConv::SystemV, next_gpr, next_param_idx),267get_intreg_for_arg(CallConv::SystemV, next_gpr + 1, next_param_idx + 1),268) {269(Some(reg1), Some(reg2)) => {270slots.push(ABIArgSlot::Reg {271reg: reg1.to_real_reg().unwrap(),272ty: ir::types::I64,273extension: ir::ArgumentExtension::None,274});275slots.push(ABIArgSlot::Reg {276reg: reg2.to_real_reg().unwrap(),277ty: ir::types::I64,278extension: ir::ArgumentExtension::None,279});280}281_ => {282let size = 16;283284// Align.285next_stack = align_to(next_stack, size);286287slots.push(ABIArgSlot::Stack {288offset: next_stack as i64,289ty: ir::types::I64,290extension: param.extension,291});292slots.push(ABIArgSlot::Stack {293offset: next_stack as i64 + 8,294ty: ir::types::I64,295extension: param.extension,296});297next_stack += size;298}299};300// Unconditionally increment next_gpr even when storing the301// argument on the stack to prevent reusing a possibly302// remaining register for the next argument.303next_gpr += 2;304next_param_idx += 2;305306args.push(ABIArg::Slots {307slots,308purpose: param.purpose,309});310continue;311}312313let mut slots = ABIArgSlotVec::new();314for (ix, (rc, reg_ty)) in rcs.iter().zip(reg_tys.iter()).enumerate() {315let last_slot = last_param && ix == rcs.len() - 1;316317let intreg = *rc == RegClass::Int;318let nextreg = if intreg {319match args_or_rets {320ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr, next_param_idx),321ArgsOrRets::Rets => {322get_intreg_for_retval(call_conv, flags, next_gpr, last_slot)323}324}325} else {326match args_or_rets {327ArgsOrRets::Args => {328get_fltreg_for_arg(call_conv, next_vreg, next_param_idx)329}330ArgsOrRets::Rets => get_fltreg_for_retval(call_conv, next_vreg, last_slot),331}332};333next_param_idx += 1;334if let Some(reg) = nextreg {335if intreg {336next_gpr += 1;337} else {338next_vreg += 1;339}340slots.push(ABIArgSlot::Reg {341reg: reg.to_real_reg().unwrap(),342ty: *reg_ty,343extension: param.extension,344});345} else {346if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {347return Err(crate::CodegenError::Unsupported(348"Too many return values to fit in registers. \349Use a StructReturn argument instead. (#9510)"350.to_owned(),351));352}353354let size = reg_ty.bytes();355let size = if call_conv == CallConv::Winch356&& args_or_rets == ArgsOrRets::Rets357&& !uses_extension358{359size360} else {361let size = std::cmp::max(size, 8);362363// Align.364debug_assert!(size.is_power_of_two());365next_stack = align_to(next_stack, size);366size367};368369slots.push(ABIArgSlot::Stack {370offset: next_stack as i64,371ty: *reg_ty,372extension: param.extension,373});374next_stack += size;375}376}377378args.push(ABIArg::Slots {379slots,380purpose: param.purpose,381});382}383384// Fastcall's indirect 128+ bit vector arguments are all located on the385// stack, and stack space is reserved after all parameters are passed,386// so allocate from the space now.387if args_or_rets == ArgsOrRets::Args && is_fastcall {388for arg in args.args_mut() {389if let ABIArg::ImplicitPtrArg { offset, .. } = arg {390assert_eq!(*offset, 0);391next_stack = align_to(next_stack, 16);392*offset = next_stack as i64;393next_stack += 16;394}395}396}397let extra_arg_idx = if let Some(ret_area_ptr) = ret_area_ptr {398args.push_non_formal(ret_area_ptr);399Some(args.args().len() - 1)400} else {401None402};403404// Winch writes the first result to the highest offset, so we need to iterate through the405// args and adjust the offsets down.406if call_conv == CallConv::Winch && args_or_rets == ArgsOrRets::Rets {407winch::reverse_stack(args, next_stack, uses_extension);408}409410next_stack = align_to(next_stack, 16);411412Ok((next_stack, extra_arg_idx))413}414415fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {416// For integer-typed values, we always load a full 64 bits (and we always spill a full 64417// bits as well -- see `Inst::store()`).418let ty = match ty {419types::I8 | types::I16 | types::I32 => types::I64,420// Stack slots are always at least 8 bytes, so it's fine to load 4 bytes instead of only421// two.422types::F16 | types::I8X2 => types::F32,423_ => ty,424};425Inst::load(ty, mem, into_reg, ExtKind::None)426}427428fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {429let ty = match ty {430// See `gen_load_stack`.431types::F16 | types::I8X2 => types::F32,432_ => ty,433};434Inst::store(ty, from_reg, mem)435}436437fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {438Inst::gen_move(to_reg, from_reg, ty)439}440441/// Generate an integer-extend operation.442fn gen_extend(443to_reg: Writable<Reg>,444from_reg: Reg,445is_signed: bool,446from_bits: u8,447to_bits: u8,448) -> Self::I {449let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)450.unwrap_or_else(|| panic!("invalid extension: {from_bits} -> {to_bits}"));451if is_signed {452Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)453} else {454Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)455}456}457458fn gen_args(args: Vec<ArgPair>) -> Inst {459Inst::Args { args }460}461462fn gen_rets(rets: Vec<RetPair>) -> Inst {463Inst::Rets { rets }464}465466fn gen_add_imm(467_call_conv: isa::CallConv,468into_reg: Writable<Reg>,469from_reg: Reg,470imm: u32,471) -> SmallInstVec<Self::I> {472let mut ret = SmallVec::new();473if from_reg != into_reg.to_reg() {474ret.push(Inst::gen_move(into_reg, from_reg, I64));475}476let imm = i32::try_from(imm).expect("`imm` is too large to fit in a 32-bit immediate");477ret.push(Inst::addq_mi(into_reg, imm));478ret479}480481fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I> {482smallvec![483Inst::External {484inst: asm::inst::cmpq_rm::new(Gpr::unwrap_new(limit_reg), Gpr::RSP,).into(),485},486Inst::TrapIf {487// NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.488cc: CC::NBE,489trap_code: TrapCode::STACK_OVERFLOW,490},491]492}493494fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I {495let mem: SyntheticAmode = mem.into();496Inst::External {497inst: asm::inst::leaq_rm::new(into_reg, mem).into(),498}499}500501fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {502// As per comment on trait definition, we must return a caller-save503// register that is not used as an argument here.504debug_assert!(!is_callee_save_systemv(505regs::r10().to_real_reg().unwrap(),506false507));508regs::r10()509}510511fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {512// Only ever used for I64s, F128s and vectors; if that changes, see if513// the ExtKind below needs to be changed.514assert!(ty == I64 || ty.is_vector() || ty == F128);515let mem = Amode::imm_reg(offset, base);516Inst::load(ty, mem, into_reg, ExtKind::None)517}518519fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {520let ty = match ty {521// See `gen_load_stack`.522types::F16 | types::I8X2 => types::F32,523_ => ty,524};525let mem = Amode::imm_reg(offset, base);526Inst::store(ty, from_reg, mem)527}528529fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I> {530let rsp = Writable::from_reg(regs::rsp());531let inst = if amount >= 0 {532Inst::addq_mi(rsp, amount)533} else {534Inst::subq_mi(rsp, -amount)535};536smallvec![inst]537}538539fn gen_prologue_frame_setup(540_call_conv: isa::CallConv,541flags: &settings::Flags,542_isa_flags: &x64_settings::Flags,543frame_layout: &FrameLayout,544) -> SmallInstVec<Self::I> {545let r_rsp = Gpr::RSP;546let r_rbp = Gpr::RBP;547let w_rbp = Writable::from_reg(r_rbp);548let mut insts = SmallVec::new();549// `push %rbp`550// RSP before the call will be 0 % 16. So here, it is 8 % 16.551insts.push(Inst::External {552inst: asm::inst::pushq_o::new(r_rbp).into(),553});554555if flags.unwind_info() {556insts.push(Inst::Unwind {557inst: UnwindInst::PushFrameRegs {558offset_upward_to_caller_sp: frame_layout.setup_area_size,559},560});561}562563// `mov %rsp, %rbp`564// RSP is now 0 % 16565insts.push(Inst::External {566inst: asm::inst::movq_mr::new(w_rbp, r_rsp).into(),567});568569insts570}571572fn gen_epilogue_frame_restore(573_call_conv: isa::CallConv,574_flags: &settings::Flags,575_isa_flags: &x64_settings::Flags,576_frame_layout: &FrameLayout,577) -> SmallInstVec<Self::I> {578let rbp = Gpr::RBP;579let rsp = Gpr::RSP;580581let mut insts = SmallVec::new();582// `mov %rbp, %rsp`583insts.push(Inst::External {584inst: asm::inst::movq_mr::new(Writable::from_reg(rsp), rbp).into(),585});586// `pop %rbp`587insts.push(Inst::External {588inst: asm::inst::popq_o::new(Writable::from_reg(rbp)).into(),589});590insts591}592593fn gen_return(594call_conv: CallConv,595_isa_flags: &x64_settings::Flags,596frame_layout: &FrameLayout,597) -> SmallInstVec<Self::I> {598// Emit return instruction.599let stack_bytes_to_pop = if call_conv == CallConv::Tail {600frame_layout.tail_args_size601} else {6020603};604let inst = if stack_bytes_to_pop == 0 {605asm::inst::retq_zo::new().into()606} else {607let stack_bytes_to_pop = u16::try_from(stack_bytes_to_pop).unwrap();608asm::inst::retq_i::new(stack_bytes_to_pop).into()609};610smallvec![Inst::External { inst }]611}612613fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32) {614insts.push(Inst::imm(615OperandSize::Size32,616frame_size as u64,617Writable::from_reg(regs::rax()),618));619insts.push(Inst::CallKnown {620// No need to include arg here: we are post-regalloc621// so no constraints will be seen anyway.622info: Box::new(CallInfo::empty(623ExternalName::LibCall(LibCall::Probestack),624CallConv::Probestack,625)),626});627}628629fn gen_inline_probestack(630insts: &mut SmallInstVec<Self::I>,631call_conv: isa::CallConv,632frame_size: u32,633guard_size: u32,634) {635// Unroll at most n consecutive probes, before falling back to using a loop636//637// This was number was picked because the loop version is 38 bytes long. We can fit638// 4 inline probes in that space, so unroll if its beneficial in terms of code size.639const PROBE_MAX_UNROLL: u32 = 4;640641// Calculate how many probes we need to perform. Round down, as we only642// need to probe whole guard_size regions we'd otherwise skip over.643let probe_count = frame_size / guard_size;644if probe_count == 0 {645// No probe necessary646} else if probe_count <= PROBE_MAX_UNROLL {647Self::gen_probestack_unroll(insts, guard_size, probe_count)648} else {649Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)650}651}652653fn gen_clobber_save(654_call_conv: isa::CallConv,655flags: &settings::Flags,656frame_layout: &FrameLayout,657) -> SmallVec<[Self::I; 16]> {658let mut insts = SmallVec::new();659660// When a return_call within this function required more stack arguments than we have661// present, resize the incoming argument area of the frame to accommodate those arguments.662let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;663if incoming_args_diff > 0 {664// Decrement the stack pointer to make space for the new arguments.665let rsp = Writable::from_reg(regs::rsp());666insts.push(Inst::subq_mi(667rsp,668i32::try_from(incoming_args_diff)669.expect("`incoming_args_diff` is too large to fit in a 32-bit immediate"),670));671672// Make sure to keep the frame pointer and stack pointer in sync at673// this point.674let rbp = Gpr::RBP;675let rsp = Gpr::RSP;676insts.push(Inst::External {677inst: asm::inst::movq_mr::new(Writable::from_reg(rbp), rsp).into(),678});679680let incoming_args_diff = i32::try_from(incoming_args_diff).unwrap();681682// Move the saved frame pointer down by `incoming_args_diff`.683let addr = Amode::imm_reg(incoming_args_diff, regs::rsp());684let r11 = Writable::from_reg(Gpr::R11);685let inst = asm::inst::movq_rm::new(r11, addr).into();686insts.push(Inst::External { inst });687let inst = asm::inst::movq_mr::new(Amode::imm_reg(0, regs::rsp()), r11.to_reg()).into();688insts.push(Inst::External { inst });689690// Move the saved return address down by `incoming_args_diff`.691let addr = Amode::imm_reg(incoming_args_diff + 8, regs::rsp());692let inst = asm::inst::movq_rm::new(r11, addr).into();693insts.push(Inst::External { inst });694let inst = asm::inst::movq_mr::new(Amode::imm_reg(8, regs::rsp()), r11.to_reg()).into();695insts.push(Inst::External { inst });696}697698// We need to factor `incoming_args_diff` into the offset upward here, as we have grown699// the argument area -- `setup_area_size` alone will not be the correct offset up to the700// original caller's SP.701let offset_upward_to_caller_sp = frame_layout.setup_area_size + incoming_args_diff;702if flags.unwind_info() && offset_upward_to_caller_sp > 0 {703// Emit unwind info: start the frame. The frame (from unwind704// consumers' point of view) starts at clobbbers, just below705// the FP and return address. Spill slots and stack slots are706// part of our actual frame but do not concern the unwinder.707insts.push(Inst::Unwind {708inst: UnwindInst::DefineNewFrame {709offset_downward_to_clobbers: frame_layout.clobber_size,710offset_upward_to_caller_sp,711},712});713}714715// Adjust the stack pointer downward for clobbers and the function fixed716// frame (spillslots, storage slots, and argument area).717let stack_size = frame_layout.fixed_frame_storage_size718+ frame_layout.clobber_size719+ frame_layout.outgoing_args_size;720if stack_size > 0 {721let rsp = Writable::from_reg(regs::rsp());722let stack_size = i32::try_from(stack_size)723.expect("`stack_size` is too large to fit in a 32-bit immediate");724insts.push(Inst::subq_mi(rsp, stack_size));725}726727// Store each clobbered register in order at offsets from RSP,728// placing them above the fixed frame slots.729let clobber_offset =730frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;731let mut cur_offset = 0;732for reg in &frame_layout.clobbered_callee_saves {733let r_reg = reg.to_reg();734let ty = match r_reg.class() {735RegClass::Int => types::I64,736RegClass::Float => types::I8X16,737RegClass::Vector => unreachable!(),738};739740// Align to 8 or 16 bytes as required by the storage type of the clobber.741cur_offset = align_to(cur_offset, ty.bytes());742let off = cur_offset;743cur_offset += ty.bytes();744745insts.push(Inst::store(746ty,747r_reg.into(),748Amode::imm_reg(i32::try_from(off + clobber_offset).unwrap(), regs::rsp()),749));750751if flags.unwind_info() {752insts.push(Inst::Unwind {753inst: UnwindInst::SaveReg {754clobber_offset: off,755reg: r_reg,756},757});758}759}760761insts762}763764fn gen_clobber_restore(765_call_conv: isa::CallConv,766_flags: &settings::Flags,767frame_layout: &FrameLayout,768) -> SmallVec<[Self::I; 16]> {769let mut insts = SmallVec::new();770771// Restore regs by loading from offsets of RSP. We compute the offset from772// the same base as above in clobber_save, as RSP won't change between the773// prologue and epilogue.774let mut cur_offset =775frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;776for reg in &frame_layout.clobbered_callee_saves {777let rreg = reg.to_reg();778let ty = match rreg.class() {779RegClass::Int => types::I64,780RegClass::Float => types::I8X16,781RegClass::Vector => unreachable!(),782};783784// Align to 8 or 16 bytes as required by the storage type of the clobber.785cur_offset = align_to(cur_offset, ty.bytes());786787insts.push(Inst::load(788ty,789Amode::imm_reg(cur_offset.try_into().unwrap(), regs::rsp()),790Writable::from_reg(rreg.into()),791ExtKind::None,792));793794cur_offset += ty.bytes();795}796797let stack_size = frame_layout.fixed_frame_storage_size798+ frame_layout.clobber_size799+ frame_layout.outgoing_args_size;800801// Adjust RSP back upward.802if stack_size > 0 {803let rsp = Writable::from_reg(regs::rsp());804let stack_size = i32::try_from(stack_size)805.expect("`stack_size` is too large to fit in a 32-bit immediate");806insts.push(Inst::addq_mi(rsp, stack_size));807}808809insts810}811812fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(813call_conv: isa::CallConv,814dst: Reg,815src: Reg,816size: usize,817mut alloc_tmp: F,818) -> SmallVec<[Self::I; 8]> {819let mut insts = SmallVec::new();820let arg0 = get_intreg_for_arg(call_conv, 0, 0).unwrap();821let arg1 = get_intreg_for_arg(call_conv, 1, 1).unwrap();822let arg2 = get_intreg_for_arg(call_conv, 2, 2).unwrap();823let temp = alloc_tmp(Self::word_type());824let temp2 = alloc_tmp(Self::word_type());825insts.push(Inst::imm(OperandSize::Size64, size as u64, temp));826// We use an indirect call and a full LoadExtName because we do not have827// information about the libcall `RelocDistance` here, so we828// conservatively use the more flexible calling sequence.829insts.push(Inst::LoadExtName {830dst: temp2.map(Gpr::unwrap_new),831name: Box::new(ExternalName::LibCall(LibCall::Memcpy)),832offset: 0,833distance: RelocDistance::Far,834});835let callee_pop_size = 0;836insts.push(Inst::call_unknown(Box::new(CallInfo {837dest: RegMem::reg(temp2.to_reg()),838uses: smallvec![839CallArgPair {840vreg: dst,841preg: arg0842},843CallArgPair {844vreg: src,845preg: arg1846},847CallArgPair {848vreg: temp.to_reg(),849preg: arg2850},851],852defs: smallvec![],853clobbers: Self::get_regs_clobbered_by_call(call_conv, false),854callee_pop_size,855callee_conv: call_conv,856caller_conv: call_conv,857try_call_info: None,858})));859insts860}861862fn get_number_of_spillslots_for_value(863rc: RegClass,864vector_scale: u32,865_isa_flags: &Self::F,866) -> u32 {867// We allocate in terms of 8-byte slots.868match rc {869RegClass::Int => 1,870RegClass::Float => vector_scale / 8,871RegClass::Vector => unreachable!(),872}873}874875fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {876if flags.enable_pinned_reg() {877static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();878MACHINE_ENV.get_or_init(|| create_reg_env_systemv(true))879} else {880static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();881MACHINE_ENV.get_or_init(|| create_reg_env_systemv(false))882}883}884885fn get_regs_clobbered_by_call(886call_conv_of_callee: isa::CallConv,887is_exception: bool,888) -> PRegSet {889match call_conv_of_callee {890CallConv::Winch => ALL_CLOBBERS,891CallConv::WindowsFastcall => WINDOWS_CLOBBERS,892CallConv::Tail if is_exception => ALL_CLOBBERS,893_ => SYSV_CLOBBERS,894}895}896897fn get_ext_mode(898_call_conv: isa::CallConv,899specified: ir::ArgumentExtension,900) -> ir::ArgumentExtension {901specified902}903904fn compute_frame_layout(905call_conv: CallConv,906flags: &settings::Flags,907_sig: &Signature,908regs: &[Writable<RealReg>],909function_calls: FunctionCalls,910incoming_args_size: u32,911tail_args_size: u32,912stackslots_size: u32,913fixed_frame_storage_size: u32,914outgoing_args_size: u32,915) -> FrameLayout {916debug_assert!(tail_args_size >= incoming_args_size);917918let mut regs: Vec<Writable<RealReg>> = match call_conv {919// The `winch` calling convention doesn't have any callee-save920// registers.921CallConv::Winch => vec![],922CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::Tail => regs923.iter()924.cloned()925.filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))926.collect(),927CallConv::WindowsFastcall => regs928.iter()929.cloned()930.filter(|r| is_callee_save_fastcall(r.to_reg(), flags.enable_pinned_reg()))931.collect(),932CallConv::Probestack => todo!("probestack?"),933CallConv::AppleAarch64 => unreachable!(),934};935// Sort registers for deterministic code output. We can do an unstable sort because the936// registers will be unique (there are no dups).937regs.sort_unstable();938939// Compute clobber size.940let clobber_size = compute_clobber_size(®s);941942// Compute setup area size.943let setup_area_size = 16; // RBP, return address944945// Return FrameLayout structure.946FrameLayout {947word_bytes: 8,948incoming_args_size,949tail_args_size: align_to(tail_args_size, 16),950setup_area_size,951clobber_size,952fixed_frame_storage_size,953stackslots_size,954outgoing_args_size,955clobbered_callee_saves: regs,956function_calls,957}958}959960fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {961// Use r11 as a temp: clobbered anyway, and962// not otherwise used as a return value in any of our963// supported calling conventions.964Writable::from_reg(regs::r11())965}966967fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {968const PAYLOAD_REGS: &'static [Reg] = &[regs::rax(), regs::rdx()];969match call_conv {970isa::CallConv::SystemV | isa::CallConv::Tail => PAYLOAD_REGS,971_ => &[],972}973}974}975976impl From<StackAMode> for SyntheticAmode {977fn from(amode: StackAMode) -> Self {978// We enforce a 128 MB stack-frame size limit above, so these979// `expect()`s should never fail.980match amode {981StackAMode::IncomingArg(off, stack_args_size) => {982let offset = u32::try_from(off).expect(983"Offset in IncomingArg is greater than 4GB; should hit impl limit first",984);985SyntheticAmode::IncomingArg {986offset: stack_args_size - offset,987}988}989StackAMode::Slot(off) => {990let off = i32::try_from(off)991.expect("Offset in Slot is greater than 2GB; should hit impl limit first");992SyntheticAmode::slot_offset(off)993}994StackAMode::OutgoingArg(off) => {995let off = i32::try_from(off).expect(996"Offset in OutgoingArg is greater than 2GB; should hit impl limit first",997);998SyntheticAmode::Real(Amode::ImmReg {999simm32: off,1000base: regs::rsp(),1001flags: MemFlags::trusted(),1002})1003}1004}1005}1006}10071008fn get_intreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {1009let is_fastcall = call_conv == CallConv::WindowsFastcall;10101011// Fastcall counts by absolute argument number; SysV counts by argument of1012// this (integer) class.1013let i = if is_fastcall { arg_idx } else { idx };1014match (i, is_fastcall) {1015(0, false) => Some(regs::rdi()),1016(1, false) => Some(regs::rsi()),1017(2, false) => Some(regs::rdx()),1018(3, false) => Some(regs::rcx()),1019(4, false) => Some(regs::r8()),1020(5, false) => Some(regs::r9()),1021(0, true) => Some(regs::rcx()),1022(1, true) => Some(regs::rdx()),1023(2, true) => Some(regs::r8()),1024(3, true) => Some(regs::r9()),1025_ => None,1026}1027}10281029fn get_fltreg_for_arg(call_conv: CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {1030let is_fastcall = call_conv == CallConv::WindowsFastcall;10311032// Fastcall counts by absolute argument number; SysV counts by argument of1033// this (floating-point) class.1034let i = if is_fastcall { arg_idx } else { idx };1035match (i, is_fastcall) {1036(0, false) => Some(regs::xmm0()),1037(1, false) => Some(regs::xmm1()),1038(2, false) => Some(regs::xmm2()),1039(3, false) => Some(regs::xmm3()),1040(4, false) => Some(regs::xmm4()),1041(5, false) => Some(regs::xmm5()),1042(6, false) => Some(regs::xmm6()),1043(7, false) => Some(regs::xmm7()),1044(0, true) => Some(regs::xmm0()),1045(1, true) => Some(regs::xmm1()),1046(2, true) => Some(regs::xmm2()),1047(3, true) => Some(regs::xmm3()),1048_ => None,1049}1050}10511052fn get_intreg_for_retval(1053call_conv: CallConv,1054flags: &settings::Flags,1055intreg_idx: usize,1056is_last: bool,1057) -> Option<Reg> {1058match call_conv {1059CallConv::Tail => match intreg_idx {10600 => Some(regs::rax()),10611 => Some(regs::rcx()),10622 => Some(regs::rdx()),10633 => Some(regs::rsi()),10644 => Some(regs::rdi()),10655 => Some(regs::r8()),10666 => Some(regs::r9()),10677 => Some(regs::r10()),1068// NB: `r11` is reserved as a scratch register that is1069// also part of the clobber set.1070// NB: `r15` is reserved as a scratch register.1071_ => None,1072},1073CallConv::Fast | CallConv::Cold | CallConv::SystemV => match intreg_idx {10740 => Some(regs::rax()),10751 => Some(regs::rdx()),10762 if flags.enable_llvm_abi_extensions() => Some(regs::rcx()),1077_ => None,1078},1079CallConv::WindowsFastcall => match intreg_idx {10800 => Some(regs::rax()),10811 => Some(regs::rdx()), // The Rust ABI for i128s needs this.1082_ => None,1083},10841085CallConv::Winch => is_last.then(|| regs::rax()),1086CallConv::Probestack => todo!(),1087CallConv::AppleAarch64 => unreachable!(),1088}1089}10901091fn get_fltreg_for_retval(call_conv: CallConv, fltreg_idx: usize, is_last: bool) -> Option<Reg> {1092match call_conv {1093CallConv::Tail => match fltreg_idx {10940 => Some(regs::xmm0()),10951 => Some(regs::xmm1()),10962 => Some(regs::xmm2()),10973 => Some(regs::xmm3()),10984 => Some(regs::xmm4()),10995 => Some(regs::xmm5()),11006 => Some(regs::xmm6()),11017 => Some(regs::xmm7()),1102_ => None,1103},1104CallConv::Fast | CallConv::Cold | CallConv::SystemV => match fltreg_idx {11050 => Some(regs::xmm0()),11061 => Some(regs::xmm1()),1107_ => None,1108},1109CallConv::WindowsFastcall => match fltreg_idx {11100 => Some(regs::xmm0()),1111_ => None,1112},1113CallConv::Winch => is_last.then(|| regs::xmm0()),1114CallConv::Probestack => todo!(),1115CallConv::AppleAarch64 => unreachable!(),1116}1117}11181119fn is_callee_save_systemv(r: RealReg, enable_pinned_reg: bool) -> bool {1120use asm::gpr::enc::*;11211122match r.class() {1123RegClass::Int => match r.hw_enc() {1124RBX | RBP | R12 | R13 | R14 => true,1125// R15 is the pinned register; if we're using it that way,1126// it is effectively globally-allocated, and is not1127// callee-saved.1128R15 => !enable_pinned_reg,1129_ => false,1130},1131RegClass::Float => false,1132RegClass::Vector => unreachable!(),1133}1134}11351136fn is_callee_save_fastcall(r: RealReg, enable_pinned_reg: bool) -> bool {1137use asm::gpr::enc::*;1138use asm::xmm::enc::*;11391140match r.class() {1141RegClass::Int => match r.hw_enc() {1142RBX | RBP | RSI | RDI | R12 | R13 | R14 => true,1143// See above for SysV: we must treat the pinned reg specially.1144R15 => !enable_pinned_reg,1145_ => false,1146},1147RegClass::Float => match r.hw_enc() {1148XMM6 | XMM7 | XMM8 | XMM9 | XMM10 | XMM11 | XMM12 | XMM13 | XMM14 | XMM15 => true,1149_ => false,1150},1151RegClass::Vector => unreachable!(),1152}1153}11541155fn compute_clobber_size(clobbers: &[Writable<RealReg>]) -> u32 {1156let mut clobbered_size = 0;1157for reg in clobbers {1158match reg.to_reg().class() {1159RegClass::Int => {1160clobbered_size += 8;1161}1162RegClass::Float => {1163clobbered_size = align_to(clobbered_size, 16);1164clobbered_size += 16;1165}1166RegClass::Vector => unreachable!(),1167}1168}1169align_to(clobbered_size, 16)1170}11711172const WINDOWS_CLOBBERS: PRegSet = windows_clobbers();1173const SYSV_CLOBBERS: PRegSet = sysv_clobbers();1174pub(crate) const ALL_CLOBBERS: PRegSet = all_clobbers();11751176const fn windows_clobbers() -> PRegSet {1177use asm::gpr::enc::*;1178use asm::xmm::enc::*;11791180PRegSet::empty()1181.with(regs::gpr_preg(RAX))1182.with(regs::gpr_preg(RCX))1183.with(regs::gpr_preg(RDX))1184.with(regs::gpr_preg(R8))1185.with(regs::gpr_preg(R9))1186.with(regs::gpr_preg(R10))1187.with(regs::gpr_preg(R11))1188.with(regs::fpr_preg(XMM0))1189.with(regs::fpr_preg(XMM1))1190.with(regs::fpr_preg(XMM2))1191.with(regs::fpr_preg(XMM3))1192.with(regs::fpr_preg(XMM4))1193.with(regs::fpr_preg(XMM5))1194}11951196const fn sysv_clobbers() -> PRegSet {1197use asm::gpr::enc::*;1198use asm::xmm::enc::*;11991200PRegSet::empty()1201.with(regs::gpr_preg(RAX))1202.with(regs::gpr_preg(RCX))1203.with(regs::gpr_preg(RDX))1204.with(regs::gpr_preg(RSI))1205.with(regs::gpr_preg(RDI))1206.with(regs::gpr_preg(R8))1207.with(regs::gpr_preg(R9))1208.with(regs::gpr_preg(R10))1209.with(regs::gpr_preg(R11))1210.with(regs::fpr_preg(XMM0))1211.with(regs::fpr_preg(XMM1))1212.with(regs::fpr_preg(XMM2))1213.with(regs::fpr_preg(XMM3))1214.with(regs::fpr_preg(XMM4))1215.with(regs::fpr_preg(XMM5))1216.with(regs::fpr_preg(XMM6))1217.with(regs::fpr_preg(XMM7))1218.with(regs::fpr_preg(XMM8))1219.with(regs::fpr_preg(XMM9))1220.with(regs::fpr_preg(XMM10))1221.with(regs::fpr_preg(XMM11))1222.with(regs::fpr_preg(XMM12))1223.with(regs::fpr_preg(XMM13))1224.with(regs::fpr_preg(XMM14))1225.with(regs::fpr_preg(XMM15))1226}12271228/// For calling conventions that clobber all registers.1229const fn all_clobbers() -> PRegSet {1230use asm::gpr::enc::*;1231use asm::xmm::enc::*;12321233PRegSet::empty()1234.with(regs::gpr_preg(RAX))1235.with(regs::gpr_preg(RCX))1236.with(regs::gpr_preg(RDX))1237.with(regs::gpr_preg(RBX))1238.with(regs::gpr_preg(RSI))1239.with(regs::gpr_preg(RDI))1240.with(regs::gpr_preg(R8))1241.with(regs::gpr_preg(R9))1242.with(regs::gpr_preg(R10))1243.with(regs::gpr_preg(R11))1244.with(regs::gpr_preg(R12))1245.with(regs::gpr_preg(R13))1246.with(regs::gpr_preg(R14))1247.with(regs::gpr_preg(R15))1248.with(regs::fpr_preg(XMM0))1249.with(regs::fpr_preg(XMM1))1250.with(regs::fpr_preg(XMM2))1251.with(regs::fpr_preg(XMM3))1252.with(regs::fpr_preg(XMM4))1253.with(regs::fpr_preg(XMM5))1254.with(regs::fpr_preg(XMM6))1255.with(regs::fpr_preg(XMM7))1256.with(regs::fpr_preg(XMM8))1257.with(regs::fpr_preg(XMM9))1258.with(regs::fpr_preg(XMM10))1259.with(regs::fpr_preg(XMM11))1260.with(regs::fpr_preg(XMM12))1261.with(regs::fpr_preg(XMM13))1262.with(regs::fpr_preg(XMM14))1263.with(regs::fpr_preg(XMM15))1264}12651266fn create_reg_env_systemv(enable_pinned_reg: bool) -> MachineEnv {1267fn preg(r: Reg) -> PReg {1268r.to_real_reg().unwrap().into()1269}12701271let mut env = MachineEnv {1272preferred_regs_by_class: [1273// Preferred GPRs: caller-saved in the SysV ABI.1274vec![1275preg(regs::rsi()),1276preg(regs::rdi()),1277preg(regs::rax()),1278preg(regs::rcx()),1279preg(regs::rdx()),1280preg(regs::r8()),1281preg(regs::r9()),1282preg(regs::r10()),1283preg(regs::r11()),1284],1285// Preferred XMMs: the first 8, which can have smaller encodings1286// with AVX instructions.1287vec![1288preg(regs::xmm0()),1289preg(regs::xmm1()),1290preg(regs::xmm2()),1291preg(regs::xmm3()),1292preg(regs::xmm4()),1293preg(regs::xmm5()),1294preg(regs::xmm6()),1295preg(regs::xmm7()),1296],1297// The Vector Regclass is unused1298vec![],1299],1300non_preferred_regs_by_class: [1301// Non-preferred GPRs: callee-saved in the SysV ABI.1302vec![1303preg(regs::rbx()),1304preg(regs::r12()),1305preg(regs::r13()),1306preg(regs::r14()),1307],1308// Non-preferred XMMs: the last 8 registers, which can have larger1309// encodings with AVX instructions.1310vec![1311preg(regs::xmm8()),1312preg(regs::xmm9()),1313preg(regs::xmm10()),1314preg(regs::xmm11()),1315preg(regs::xmm12()),1316preg(regs::xmm13()),1317preg(regs::xmm14()),1318preg(regs::xmm15()),1319],1320// The Vector Regclass is unused1321vec![],1322],1323fixed_stack_slots: vec![],1324scratch_by_class: [None, None, None],1325};13261327debug_assert_eq!(regs::r15(), regs::pinned_reg());1328if !enable_pinned_reg {1329env.non_preferred_regs_by_class[0].push(preg(regs::r15()));1330}13311332env1333}133413351336