Path: blob/main/cranelift/codegen/src/isa/s390x/abi.rs
1693 views
//! Implementation of a standard S390x ABI.1//!2//! This machine uses the "vanilla" ABI implementation from abi.rs,3//! however a few details are different from the description there:4//!5//! - On s390x, the caller must provide a "register save area" of 1606//! bytes to any function it calls. The called function is free to use7//! this space for any purpose; usually to save callee-saved GPRs.8//! (Note that while this area is allocated by the caller, it is counted9//! as part of the callee's stack frame; in particular, the callee's CFA10//! is the top of the register save area, not the incoming SP value.)11//!12//! - Overflow arguments are passed on the stack starting immediately13//! above the register save area. On s390x, this space is allocated14//! only once directly in the prologue, using a size large enough to15//! hold overflow arguments for every call in the function.16//!17//! - On s390x we do not use a frame pointer register; instead, every18//! element of the stack frame is addressed via (constant) offsets19//! from the stack pointer. Note that due to the above (and because20//! there are no variable-sized stack allocations in cranelift), the21//! value of the stack pointer register never changes after the22//! initial allocation in the function prologue.23//!24//! - If we are asked to "preserve frame pointers" to enable stack25//! unwinding, we use the stack backchain feature instead, which26//! is documented by the s390x ELF ABI, but marked as optional.27//! This ensures that at all times during execution of a function,28//! the lowest word on the stack (part of the register save area)29//! holds a copy of the stack pointer at function entry.30//!31//! Overall, the stack frame layout on s390x is as follows:32//!33//! ```plain34//! (high address)35//!36//! +---------------------------+37//! | ... |38//! CFA -----> | stack args |39//! +---------------------------+40//! | ... |41//! | 160 bytes reg save area |42//! | (used to save GPRs) |43//! SP at function entry -----> | (incl. caller's backchain)|44//! +---------------------------+45//! | ... |46//! | clobbered callee-saves |47//! | (used to save FPRs) |48//! unwind-frame base ----> | (alloc'd by prologue) |49//! +---------------------------+50//! | ... |51//! | spill slots |52//! | (accessed via SP) |53//! | ... |54//! | stack slots |55//! | (accessed via SP) |56//! | (alloc'd by prologue) |57//! +---------------------------+58//! | ... |59//! | args for call |60//! | outgoing reg save area |61//! | (alloc'd by prologue) |62//! SP during function ------> | (incl. callee's backchain)|63//! +---------------------------+64//!65//! (low address)66//! ```67//!68//!69//! The tail-call ABI has the following changes to the system ABI:70//!71//! - %r6 and %r7 are both non-callee-saved argument registers.72//!73//! - The argument save area for outgoing (non-tail) calls to74//! a tail-call ABI function is placed *below* the caller's75//! stack frame. This means the caller temporarily allocates76//! a part of the callee's frame, including temporary space77//! for a register save area holding a copy of the backchain.78//!79//! - For tail calls, the caller puts outgoing arguments at the80//! very top of its stack frame, overlapping the incoming81//! argument area. This is extended by the prolog if needed.82//!83//! Overall, the tail-call stack frame layout on s390x is as follows:84//!85//! ```plain86//! (high address)87//!88//! +---------------------------+89//! | ... |90//! CFA -----> | (caller's frame) |91//! +---------------------------+92//! | ... |93//! | 160 bytes reg save area |94//! | (used to save GPRs) |95//! SP at function return-----> | (incl. caller's backchain)|96//! +---------------------------+97//! | ... |98//! | incoming stack args |99//! SP at function entry -----> | (incl. backchain copy) |100//! +---------------------------+101//! | ... |102//! | outgoing tail call args |103//! | (overlaps incoming args) |104//! | (incl. backchain copy) |105//! SP at tail cail ----> | (alloc'd by prologue) |106//! +---------------------------+107//! | ... |108//! | clobbered callee-saves |109//! | (used to save FPRs) |110//! unwind-frame base ----> | (alloc'd by prologue) |111//! +---------------------------+112//! | ... |113//! | spill slots |114//! | (accessed via SP) |115//! | ... |116//! | stack slots |117//! | (accessed via SP) |118//! | (alloc'd by prologue) |119//! +---------------------------+120//! | ... |121//! | outgoing calls return buf |122//! | outgoing reg save area |123//! | (alloc'd by prologue) |124//! SP during function ------> | (incl. callee's backchain)|125//! +---------------------------+126//! | ... |127//! | outgoing stack args |128//! | (alloc'd by call sequence)|129//! SP at non-tail call -----> | (incl. backchain copy) |130//! +---------------------------+131//! (low address)132//! ```133134use crate::CodegenResult;135use crate::ir;136use crate::ir::MemFlags;137use crate::ir::Signature;138use crate::ir::Type;139use crate::ir::condcodes::IntCC;140use crate::ir::types;141use crate::isa;142use crate::isa::s390x::{inst::*, settings as s390x_settings};143use crate::isa::unwind::UnwindInst;144use crate::machinst::*;145use crate::settings;146use alloc::vec::Vec;147use regalloc2::{MachineEnv, PRegSet};148use smallvec::{SmallVec, smallvec};149use std::borrow::ToOwned;150use std::sync::OnceLock;151152// We use a generic implementation that factors out ABI commonalities.153154/// Support for the S390x ABI from the callee side (within a function body).155pub type S390xCallee = Callee<S390xMachineDeps>;156157/// ABI Register usage158159fn in_int_reg(ty: Type) -> bool {160match ty {161types::I8 | types::I16 | types::I32 | types::I64 => true,162_ => false,163}164}165166fn in_flt_reg(ty: Type) -> bool {167match ty {168types::F16 | types::F32 | types::F64 => true,169_ => false,170}171}172173fn in_vec_reg(ty: Type) -> bool {174ty.is_vector() && ty.bits() == 128175}176177fn get_intreg_for_arg(call_conv: isa::CallConv, idx: usize) -> Option<Reg> {178match idx {1790 => Some(regs::gpr(2)),1801 => Some(regs::gpr(3)),1812 => Some(regs::gpr(4)),1823 => Some(regs::gpr(5)),1834 => Some(regs::gpr(6)),1845 if call_conv == isa::CallConv::Tail => Some(regs::gpr(7)),185_ => None,186}187}188189fn get_fltreg_for_arg(idx: usize) -> Option<Reg> {190match idx {1910 => Some(regs::vr(0)),1921 => Some(regs::vr(2)),1932 => Some(regs::vr(4)),1943 => Some(regs::vr(6)),195_ => None,196}197}198199fn get_vecreg_for_arg(idx: usize) -> Option<Reg> {200match idx {2010 => Some(regs::vr(24)),2021 => Some(regs::vr(25)),2032 => Some(regs::vr(26)),2043 => Some(regs::vr(27)),2054 => Some(regs::vr(28)),2065 => Some(regs::vr(29)),2076 => Some(regs::vr(30)),2087 => Some(regs::vr(31)),209_ => None,210}211}212213fn get_intreg_for_ret(call_conv: isa::CallConv, idx: usize) -> Option<Reg> {214match idx {2150 => Some(regs::gpr(2)),216// ABI extension to support multi-value returns:2171 => Some(regs::gpr(3)),2182 => Some(regs::gpr(4)),2193 => Some(regs::gpr(5)),2204 if call_conv == isa::CallConv::Tail => Some(regs::gpr(6)),2215 if call_conv == isa::CallConv::Tail => Some(regs::gpr(7)),222_ => None,223}224}225226fn get_fltreg_for_ret(idx: usize) -> Option<Reg> {227match idx {2280 => Some(regs::vr(0)),229// ABI extension to support multi-value returns:2301 => Some(regs::vr(2)),2312 => Some(regs::vr(4)),2323 => Some(regs::vr(6)),233_ => None,234}235}236237fn get_vecreg_for_ret(idx: usize) -> Option<Reg> {238match idx {2390 => Some(regs::vr(24)),240// ABI extension to support multi-value returns:2411 => Some(regs::vr(25)),2422 => Some(regs::vr(26)),2433 => Some(regs::vr(27)),2444 => Some(regs::vr(28)),2455 => Some(regs::vr(29)),2466 => Some(regs::vr(30)),2477 => Some(regs::vr(31)),248_ => None,249}250}251252/// The size of the register save area253pub static REG_SAVE_AREA_SIZE: u32 = 160;254255impl From<StackAMode> for MemArg {256fn from(stack: StackAMode) -> MemArg {257match stack {258StackAMode::IncomingArg(off, stack_args_size) => MemArg::IncomingArgOffset {259off: off - stack_args_size as i64,260},261StackAMode::Slot(off) => MemArg::SlotOffset { off },262StackAMode::OutgoingArg(off) => MemArg::OutgoingArgOffset { off },263}264}265}266267/// Lane order to be used for a given calling convention.268impl From<isa::CallConv> for LaneOrder {269fn from(call_conv: isa::CallConv) -> Self {270match call_conv {271isa::CallConv::Tail => LaneOrder::LittleEndian,272_ => LaneOrder::BigEndian,273}274}275}276277/// S390x-specific ABI behavior. This struct just serves as an implementation278/// point for the trait; it is never actually instantiated.279pub struct S390xMachineDeps;280281impl IsaFlags for s390x_settings::Flags {}282283impl ABIMachineSpec for S390xMachineDeps {284type I = Inst;285286type F = s390x_settings::Flags;287288/// This is the limit for the size of argument and return-value areas on the289/// stack. We place a reasonable limit here to avoid integer overflow issues290/// with 32-bit arithmetic: for now, 128 MB.291const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;292293fn word_bits() -> u32 {29464295}296297/// Return required stack alignment in bytes.298fn stack_align(_call_conv: isa::CallConv) -> u32 {2998300}301302fn compute_arg_locs(303call_conv: isa::CallConv,304flags: &settings::Flags,305params: &[ir::AbiParam],306args_or_rets: ArgsOrRets,307add_ret_area_ptr: bool,308mut args: ArgsAccumulator,309) -> CodegenResult<(u32, Option<usize>)> {310assert_ne!(311call_conv,312isa::CallConv::Winch,313"s390x does not support the 'winch' calling convention yet"314);315316let mut next_gpr = 0;317let mut next_fpr = 0;318let mut next_vr = 0;319let mut next_stack: u32 = 0;320321let ret_area_ptr = if add_ret_area_ptr {322debug_assert_eq!(args_or_rets, ArgsOrRets::Args);323next_gpr += 1;324Some(ABIArg::reg(325get_intreg_for_arg(call_conv, 0)326.unwrap()327.to_real_reg()328.unwrap(),329types::I64,330ir::ArgumentExtension::None,331ir::ArgumentPurpose::Normal,332))333} else {334None335};336337for mut param in params.into_iter().copied() {338if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {339panic!(340"StructArgument parameters are not supported on s390x. \341Use regular pointer arguments instead."342);343}344345let intreg = in_int_reg(param.value_type);346let fltreg = in_flt_reg(param.value_type);347let vecreg = in_vec_reg(param.value_type);348debug_assert!(intreg as i32 + fltreg as i32 + vecreg as i32 <= 1);349350let (next_reg, candidate, implicit_ref) = if intreg {351let candidate = match args_or_rets {352ArgsOrRets::Args => get_intreg_for_arg(call_conv, next_gpr),353ArgsOrRets::Rets => get_intreg_for_ret(call_conv, next_gpr),354};355(&mut next_gpr, candidate, None)356} else if fltreg {357let candidate = match args_or_rets {358ArgsOrRets::Args => get_fltreg_for_arg(next_fpr),359ArgsOrRets::Rets => get_fltreg_for_ret(next_fpr),360};361(&mut next_fpr, candidate, None)362} else if vecreg {363let candidate = match args_or_rets {364ArgsOrRets::Args => get_vecreg_for_arg(next_vr),365ArgsOrRets::Rets => get_vecreg_for_ret(next_vr),366};367(&mut next_vr, candidate, None)368} else {369// We must pass this by implicit reference.370if args_or_rets == ArgsOrRets::Rets {371// For return values, just force them to memory.372(&mut next_gpr, None, None)373} else {374// For arguments, implicitly convert to pointer type.375let implicit_ref = Some(param.value_type);376param = ir::AbiParam::new(types::I64);377let candidate = get_intreg_for_arg(call_conv, next_gpr);378(&mut next_gpr, candidate, implicit_ref)379}380};381382let slot = if let Some(reg) = candidate {383*next_reg += 1;384ABIArgSlot::Reg {385reg: reg.to_real_reg().unwrap(),386ty: param.value_type,387extension: param.extension,388}389} else {390if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {391return Err(crate::CodegenError::Unsupported(392"Too many return values to fit in registers. \393Use a StructReturn argument instead. (#9510)"394.to_owned(),395));396}397398// Compute size. Every argument or return value takes a slot of399// at least 8 bytes.400let size = (ty_bits(param.value_type) / 8) as u32;401let slot_size = std::cmp::max(size, 8);402403// Align the stack slot.404debug_assert!(slot_size.is_power_of_two());405let slot_align = std::cmp::min(slot_size, 8);406next_stack = align_to(next_stack, slot_align);407408// If the type is actually of smaller size (and the argument409// was not extended), it is passed right-aligned.410let offset = if size < slot_size && param.extension == ir::ArgumentExtension::None {411slot_size - size412} else {4130414};415let offset = (next_stack + offset) as i64;416next_stack += slot_size;417ABIArgSlot::Stack {418offset,419ty: param.value_type,420extension: param.extension,421}422};423424if let Some(ty) = implicit_ref {425assert!(426(ty_bits(ty) / 8) % 8 == 0,427"implicit argument size is not properly aligned"428);429args.push(ABIArg::ImplicitPtrArg {430pointer: slot,431offset: 0, // Will be filled in later432ty,433purpose: param.purpose,434});435} else {436args.push(ABIArg::Slots {437slots: smallvec![slot],438purpose: param.purpose,439});440}441}442443next_stack = align_to(next_stack, 8);444445let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr {446args.push_non_formal(ret_area_ptr);447Some(args.args().len() - 1)448} else {449None450};451452// After all arguments are in their well-defined location,453// allocate buffers for all ImplicitPtrArg arguments.454for arg in args.args_mut() {455match arg {456ABIArg::StructArg { .. } => unreachable!(),457ABIArg::ImplicitPtrArg { offset, ty, .. } => {458*offset = next_stack as i64;459next_stack += (ty_bits(*ty) / 8) as u32;460}461_ => {}462}463}464465// With the tail-call convention, arguments are passed in the *callee*'s466// frame instead of the caller's frame. This means that the register save467// area will lie between the incoming arguments and the return buffer.468// Include the size of the register area in the argument area size to469// match common code expectation that the return buffer resides immediately470// above the argument area.471if call_conv == isa::CallConv::Tail && args_or_rets == ArgsOrRets::Args && next_stack != 0 {472next_stack += REG_SAVE_AREA_SIZE;473}474475Ok((next_stack, extra_arg))476}477478fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {479Inst::gen_load(into_reg, mem.into(), ty)480}481482fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {483Inst::gen_store(mem.into(), from_reg, ty)484}485486fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {487Inst::gen_move(to_reg, from_reg, ty)488}489490fn gen_extend(491to_reg: Writable<Reg>,492from_reg: Reg,493signed: bool,494from_bits: u8,495to_bits: u8,496) -> Inst {497assert!(from_bits < to_bits);498Inst::Extend {499rd: to_reg,500rn: from_reg,501signed,502from_bits,503to_bits,504}505}506507fn gen_args(args: Vec<ArgPair>) -> Inst {508Inst::Args { args }509}510511fn gen_rets(rets: Vec<RetPair>) -> Inst {512Inst::Rets { rets }513}514515fn gen_add_imm(516_call_conv: isa::CallConv,517into_reg: Writable<Reg>,518from_reg: Reg,519imm: u32,520) -> SmallInstVec<Inst> {521let mut insts = SmallVec::new();522if let Some(imm) = UImm12::maybe_from_u64(imm as u64) {523insts.push(Inst::LoadAddr {524rd: into_reg,525mem: MemArg::BXD12 {526base: from_reg,527index: zero_reg(),528disp: imm,529flags: MemFlags::trusted(),530},531});532} else if let Some(imm) = SImm20::maybe_from_i64(imm as i64) {533insts.push(Inst::LoadAddr {534rd: into_reg,535mem: MemArg::BXD20 {536base: from_reg,537index: zero_reg(),538disp: imm,539flags: MemFlags::trusted(),540},541});542} else {543if from_reg != into_reg.to_reg() {544insts.push(Inst::mov64(into_reg, from_reg));545}546insts.push(Inst::AluRUImm32 {547alu_op: ALUOp::AddLogical64,548rd: into_reg,549ri: into_reg.to_reg(),550imm,551});552}553insts554}555556fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {557let mut insts = SmallVec::new();558insts.push(Inst::CmpTrapRR {559op: CmpOp::CmpL64,560rn: stack_reg(),561rm: limit_reg,562cond: Cond::from_intcc(IntCC::UnsignedLessThanOrEqual),563trap_code: ir::TrapCode::STACK_OVERFLOW,564});565insts566}567568fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {569let mem = mem.into();570Inst::LoadAddr { rd: into_reg, mem }571}572573fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {574spilltmp_reg()575}576577fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {578let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted());579Inst::gen_load(into_reg, mem, ty)580}581582fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {583let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted());584Inst::gen_store(mem, from_reg, ty)585}586587fn gen_sp_reg_adjust(imm: i32) -> SmallInstVec<Inst> {588if imm == 0 {589return SmallVec::new();590}591592let mut insts = SmallVec::new();593if let Ok(imm) = i16::try_from(imm) {594insts.push(Inst::AluRSImm16 {595alu_op: ALUOp::Add64,596rd: writable_stack_reg(),597ri: stack_reg(),598imm,599});600} else {601insts.push(Inst::AluRSImm32 {602alu_op: ALUOp::Add64,603rd: writable_stack_reg(),604ri: stack_reg(),605imm,606});607}608insts609}610611fn gen_prologue_frame_setup(612_call_conv: isa::CallConv,613_flags: &settings::Flags,614_isa_flags: &s390x_settings::Flags,615_frame_layout: &FrameLayout,616) -> SmallInstVec<Inst> {617SmallVec::new()618}619620fn gen_epilogue_frame_restore(621_call_conv: isa::CallConv,622_flags: &settings::Flags,623_isa_flags: &s390x_settings::Flags,624_frame_layout: &FrameLayout,625) -> SmallInstVec<Inst> {626SmallVec::new()627}628629fn gen_return(630_call_conv: isa::CallConv,631_isa_flags: &s390x_settings::Flags,632_frame_layout: &FrameLayout,633) -> SmallInstVec<Inst> {634smallvec![Inst::Ret { link: gpr(14) }]635}636637fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32) {638// TODO: implement if we ever require stack probes on an s390x host639// (unlikely unless Lucet is ported)640unimplemented!("Stack probing is unimplemented on S390x");641}642643fn gen_inline_probestack(644insts: &mut SmallInstVec<Self::I>,645_call_conv: isa::CallConv,646frame_size: u32,647guard_size: u32,648) {649// The stack probe loop currently takes 4 instructions and each unrolled650// probe takes 2. Set this to 2 to keep the max size to 4 instructions.651const PROBE_MAX_UNROLL: u32 = 2;652653// Calculate how many probes we need to perform. Round down, as we only654// need to probe whole guard_size regions we'd otherwise skip over.655let probe_count = frame_size / guard_size;656if probe_count == 0 {657// No probe necessary658} else if probe_count <= PROBE_MAX_UNROLL {659// Unrolled probe loop.660for _ in 0..probe_count {661insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));662663insts.push(Inst::StoreImm8 {664imm: 0,665mem: MemArg::reg(stack_reg(), MemFlags::trusted()),666});667}668} else {669// Explicit probe loop.670671// Load the number of probes into a register used as loop counter.672// `gen_inline_probestack` is called after regalloc2, so we can673// use the nonallocatable spilltmp register for this purpose.674let probe_count_reg = writable_spilltmp_reg();675if let Ok(probe_count) = i16::try_from(probe_count) {676insts.push(Inst::Mov32SImm16 {677rd: probe_count_reg,678imm: probe_count,679});680} else {681insts.push(Inst::Mov32Imm {682rd: probe_count_reg,683imm: probe_count,684});685}686687// Emit probe loop. The guard size is assumed to fit in 16 bits.688insts.push(Inst::StackProbeLoop {689probe_count: probe_count_reg,690guard_size: i16::try_from(guard_size).unwrap(),691});692}693694// Restore the stack pointer to its original position.695insts.extend(Self::gen_sp_reg_adjust((probe_count * guard_size) as i32));696}697698fn gen_clobber_save(699call_conv: isa::CallConv,700flags: &settings::Flags,701frame_layout: &FrameLayout,702) -> SmallVec<[Inst; 16]> {703let mut insts = SmallVec::new();704705// With the tail call convention, the caller already allocated the706// part of our stack frame that contains incoming arguments.707let incoming_tail_args_size = if call_conv == isa::CallConv::Tail {708frame_layout.incoming_args_size709} else {7100711};712713// Define unwind stack frame.714if flags.unwind_info() {715insts.push(Inst::Unwind {716inst: UnwindInst::DefineNewFrame {717offset_upward_to_caller_sp: REG_SAVE_AREA_SIZE + incoming_tail_args_size,718offset_downward_to_clobbers: frame_layout.clobber_size719- incoming_tail_args_size,720},721});722}723724// Use STMG to save clobbered GPRs into save area.725// Note that we always save SP (%r15) here if anything is saved.726if let Some((first_clobbered_gpr, _)) = get_clobbered_gprs(frame_layout) {727let mut last_clobbered_gpr = 15;728let offset = 8 * first_clobbered_gpr as i64 + incoming_tail_args_size as i64;729insts.push(Inst::StoreMultiple64 {730rt: gpr(first_clobbered_gpr),731rt2: gpr(last_clobbered_gpr),732mem: MemArg::reg_plus_off(stack_reg(), offset, MemFlags::trusted()),733});734if flags.unwind_info() {735// Normally, we instruct the unwinder to restore the stack pointer736// from its slot in the save area. However, if we have incoming737// tail-call arguments, the value saved in that slot is incorrect.738// In that case, we instead instruct the unwinder to compute the739// unwound SP relative to the current CFA, as CFA == SP + 160.740if incoming_tail_args_size != 0 {741insts.push(Inst::Unwind {742inst: UnwindInst::RegStackOffset {743clobber_offset: frame_layout.clobber_size,744reg: gpr(last_clobbered_gpr).to_real_reg().unwrap(),745},746});747last_clobbered_gpr = last_clobbered_gpr - 1;748}749for i in first_clobbered_gpr..(last_clobbered_gpr + 1) {750insts.push(Inst::Unwind {751inst: UnwindInst::SaveReg {752clobber_offset: frame_layout.clobber_size + (i * 8) as u32,753reg: gpr(i).to_real_reg().unwrap(),754},755});756}757}758}759760// Save current stack pointer value if we need to write the backchain.761if flags.preserve_frame_pointers() {762if incoming_tail_args_size == 0 {763insts.push(Inst::mov64(writable_gpr(1), stack_reg()));764} else {765insts.extend(Self::gen_add_imm(766call_conv,767writable_gpr(1),768stack_reg(),769incoming_tail_args_size,770));771}772}773774// Decrement stack pointer.775let stack_size = frame_layout.outgoing_args_size as i32776+ frame_layout.clobber_size as i32777+ frame_layout.fixed_frame_storage_size as i32778- incoming_tail_args_size as i32;779insts.extend(Self::gen_sp_reg_adjust(-stack_size));780if flags.unwind_info() {781insts.push(Inst::Unwind {782inst: UnwindInst::StackAlloc {783size: stack_size as u32,784},785});786}787788// Write the stack backchain if requested, using the value saved above.789if flags.preserve_frame_pointers() {790insts.push(Inst::Store64 {791rd: gpr(1),792mem: MemArg::reg_plus_off(stack_reg(), 0, MemFlags::trusted()),793});794}795796// Save FPRs.797for (i, reg) in get_clobbered_fprs(frame_layout).iter().enumerate() {798insts.push(Inst::VecStoreLane {799size: 64,800rd: reg.to_reg().into(),801mem: MemArg::reg_plus_off(802stack_reg(),803(i * 8) as i64804+ frame_layout.outgoing_args_size as i64805+ frame_layout.fixed_frame_storage_size as i64,806MemFlags::trusted(),807),808lane_imm: 0,809});810if flags.unwind_info() {811insts.push(Inst::Unwind {812inst: UnwindInst::SaveReg {813clobber_offset: (i * 8) as u32,814reg: reg.to_reg(),815},816});817}818}819820insts821}822823fn gen_clobber_restore(824call_conv: isa::CallConv,825_flags: &settings::Flags,826frame_layout: &FrameLayout,827) -> SmallVec<[Inst; 16]> {828let mut insts = SmallVec::new();829830// Restore FPRs.831insts.extend(gen_restore_fprs(frame_layout));832833// Restore GPRs (including SP).834insts.extend(gen_restore_gprs(call_conv, frame_layout, 0));835836insts837}838839fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(840_call_conv: isa::CallConv,841_dst: Reg,842_src: Reg,843_size: usize,844_alloc: F,845) -> SmallVec<[Self::I; 8]> {846unimplemented!("StructArgs not implemented for S390X yet");847}848849fn get_number_of_spillslots_for_value(850rc: RegClass,851_vector_scale: u32,852_isa_flags: &Self::F,853) -> u32 {854// We allocate in terms of 8-byte slots.855match rc {856RegClass::Int => 1,857RegClass::Float => 2,858RegClass::Vector => unreachable!(),859}860}861862fn get_machine_env(_flags: &settings::Flags, call_conv: isa::CallConv) -> &MachineEnv {863match call_conv {864isa::CallConv::Tail => {865static TAIL_MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();866TAIL_MACHINE_ENV.get_or_init(tail_create_machine_env)867}868_ => {869static SYSV_MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();870SYSV_MACHINE_ENV.get_or_init(sysv_create_machine_env)871}872}873}874875fn get_regs_clobbered_by_call(876call_conv_of_callee: isa::CallConv,877is_exception: bool,878) -> PRegSet {879match call_conv_of_callee {880isa::CallConv::Tail if is_exception => ALL_CLOBBERS,881isa::CallConv::Tail => TAIL_CLOBBERS,882_ => SYSV_CLOBBERS,883}884}885886fn get_ext_mode(887_call_conv: isa::CallConv,888specified: ir::ArgumentExtension,889) -> ir::ArgumentExtension {890specified891}892893fn compute_frame_layout(894call_conv: isa::CallConv,895flags: &settings::Flags,896_sig: &Signature,897regs: &[Writable<RealReg>],898function_calls: FunctionCalls,899incoming_args_size: u32,900tail_args_size: u32,901stackslots_size: u32,902fixed_frame_storage_size: u32,903mut outgoing_args_size: u32,904) -> FrameLayout {905assert!(906!flags.enable_pinned_reg(),907"Pinned register not supported on s390x"908);909910let mut regs: Vec<Writable<RealReg>> = regs911.iter()912.cloned()913.filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg()))914.collect();915916// If the front end asks to preserve frame pointers (which we do not917// really have in the s390x ABI), we use the stack backchain instead.918// For this to work in all cases, we must allocate a stack frame with919// at least the outgoing register save area even in leaf functions.920// Update our caller's outgoing_args_size to reflect this.921if flags.preserve_frame_pointers() {922if outgoing_args_size < REG_SAVE_AREA_SIZE {923outgoing_args_size = REG_SAVE_AREA_SIZE;924}925}926927// We need to save/restore the link register in non-leaf functions.928// This is not included in the clobber list because we have excluded929// call instructions via the is_included_in_clobbers callback.930// We also want to enforce saving the link register in leaf functions931// for stack unwinding, if we're asked to preserve frame pointers.932if outgoing_args_size > 0 {933let link_reg = Writable::from_reg(RealReg::from(gpr_preg(14)));934if !regs.contains(&link_reg) {935regs.push(link_reg);936}937}938939// Sort registers for deterministic code output. We can do an unstable940// sort because the registers will be unique (there are no dups).941regs.sort_unstable();942943// Compute clobber size. We only need to count FPR save slots.944let mut clobber_size = 0;945for reg in ®s {946match reg.to_reg().class() {947RegClass::Int => {}948RegClass::Float => {949clobber_size += 8;950}951RegClass::Vector => unreachable!(),952}953}954955// Common code assumes that tail-call arguments are part of the caller's956// frame. This is not correct for our tail-call convention. To ensure957// common code still gets the total size of this stack frame correct,958// we add the (incoming and outgoing) taill-call argument size to the959// "clobber" size.960if call_conv == isa::CallConv::Tail {961clobber_size += tail_args_size;962}963964// Return FrameLayout structure.965FrameLayout {966word_bytes: 8,967incoming_args_size,968// We already accounted for tail-call arguments above, so reset969// this value to its default.970tail_args_size: incoming_args_size,971setup_area_size: 0,972clobber_size,973fixed_frame_storage_size,974stackslots_size,975outgoing_args_size,976clobbered_callee_saves: regs,977function_calls,978}979}980981fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {982panic!("Should not be called");983}984985fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {986const PAYLOAD_REGS: &'static [Reg] = &[gpr(6), gpr(7)];987match call_conv {988isa::CallConv::SystemV | isa::CallConv::Tail => PAYLOAD_REGS,989_ => &[],990}991}992}993994impl S390xMachineDeps {995pub fn gen_tail_epilogue(996frame_layout: &FrameLayout,997callee_pop_size: u32,998dest: &CallInstDest,999) -> (SmallVec<[Inst; 16]>, Option<Reg>) {1000let mut insts = SmallVec::new();1001let call_conv = isa::CallConv::Tail;10021003// Restore FPRs.1004insts.extend(gen_restore_fprs(frame_layout));10051006// If the tail call target is in a callee-saved GPR, we need to move it1007// to %r1 (as the only available temp register) before restoring GPRs1008// (but after restoring FPRs, which might clobber %r1).1009let temp_dest = match dest {1010CallInstDest::Indirect { reg }1011if reg.to_real_reg().is_some()1012&& is_reg_saved_in_prologue(call_conv, reg.to_real_reg().unwrap()) =>1013{1014insts.push(Inst::Mov64 {1015rd: writable_gpr(1),1016rm: *reg,1017});1018Some(gpr(1))1019}1020_ => None,1021};10221023// Restore GPRs (including SP).1024insts.extend(gen_restore_gprs(call_conv, frame_layout, callee_pop_size));10251026(insts, temp_dest)1027}10281029/// Emit loads for any stack-carried return values using the call1030/// info and allocations. In addition, emit lane swaps for all1031/// vector-types return values if needed.1032pub fn gen_retval_loads(info: &CallInfo<CallInstDest>) -> SmallInstVec<Inst> {1033let mut insts = SmallVec::new();10341035// Helper routine to lane-swap a register if needed.1036let lane_swap_if_needed = |insts: &mut SmallInstVec<Inst>, vreg, ty: Type| {1037if LaneOrder::from(info.caller_conv) != LaneOrder::from(info.callee_conv) {1038if ty.is_vector() && ty.lane_count() >= 2 {1039insts.push(Inst::VecEltRev {1040lane_count: ty.lane_count(),1041rd: vreg,1042rn: vreg.to_reg(),1043});1044}1045}1046};10471048// Helper routine to allocate a temp register for ty.1049let temp_reg = |ty| match Inst::rc_for_type(ty).unwrap() {1050(&[RegClass::Int], _) => writable_gpr(0),1051(&[RegClass::Float], _) => writable_vr(1),1052_ => unreachable!(),1053};10541055// Do a first pass over the return locations to handle copies that1056// need temp registers. These need to be done before regular stack1057// loads in case the destination of a load happens to be our temp1058// register. (The temp registers by choice are distinct from all1059// real return registers, which we verify here again.)1060for CallRetPair { vreg, location } in &info.defs {1061match location {1062RetLocation::Reg(preg, ty) => {1063debug_assert!(*preg != temp_reg(*ty).to_reg());1064}1065RetLocation::Stack(amode, ty) => {1066if let Some(spillslot) = vreg.to_reg().to_spillslot() {1067let temp = temp_reg(*ty);1068insts.push(Inst::gen_load(temp, (*amode).into(), *ty));1069lane_swap_if_needed(&mut insts, temp, *ty);1070insts.push(Inst::gen_store(1071MemArg::SpillOffset {1072off: 8 * (spillslot.index() as i64),1073},1074temp.to_reg(),1075Inst::canonical_type_for_rc(temp.to_reg().class()),1076));1077}1078}1079}1080}1081// Now handle all remaining return locations.1082for CallRetPair { vreg, location } in &info.defs {1083match location {1084RetLocation::Reg(preg, ty) => {1085lane_swap_if_needed(&mut insts, Writable::from_reg(*preg), *ty);1086}1087RetLocation::Stack(amode, ty) => {1088if vreg.to_reg().to_spillslot().is_none() {1089insts.push(Inst::gen_load(*vreg, (*amode).into(), *ty));1090lane_swap_if_needed(&mut insts, *vreg, *ty);1091}1092}1093}1094}1095insts1096}1097}10981099fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {1100match (call_conv, r.class()) {1101(isa::CallConv::Tail, RegClass::Int) => {1102// r8 - r15 inclusive are callee-saves.1103r.hw_enc() >= 8 && r.hw_enc() <= 151104}1105(_, RegClass::Int) => {1106// r6 - r15 inclusive are callee-saves.1107r.hw_enc() >= 6 && r.hw_enc() <= 151108}1109(_, RegClass::Float) => {1110// f8 - f15 inclusive are callee-saves.1111r.hw_enc() >= 8 && r.hw_enc() <= 151112}1113(_, RegClass::Vector) => unreachable!(),1114}1115}11161117fn get_clobbered_gprs(frame_layout: &FrameLayout) -> Option<(u8, u8)> {1118// Collect clobbered GPRs. Note we save/restore GPR always as1119// a block of registers using LOAD MULTIPLE / STORE MULTIPLE, starting1120// with the clobbered GPR with the lowest number up to the clobbered GPR1121// with the highest number.1122let (clobbered_gpr, _) = frame_layout.clobbered_callee_saves_by_class();1123if clobbered_gpr.is_empty() {1124return None;1125}11261127let first = clobbered_gpr.first().unwrap().to_reg().hw_enc();1128let last = clobbered_gpr.last().unwrap().to_reg().hw_enc();1129debug_assert!(clobbered_gpr.iter().all(|r| r.to_reg().hw_enc() >= first));1130debug_assert!(clobbered_gpr.iter().all(|r| r.to_reg().hw_enc() <= last));1131Some((first, last))1132}11331134fn get_clobbered_fprs(frame_layout: &FrameLayout) -> &[Writable<RealReg>] {1135// Collect clobbered floating-point registers.1136let (_, clobbered_fpr) = frame_layout.clobbered_callee_saves_by_class();1137clobbered_fpr1138}11391140// Restore GPRs (including SP) from the register save area.1141// This must not clobber any register, specifically including %r1.1142fn gen_restore_gprs(1143call_conv: isa::CallConv,1144frame_layout: &FrameLayout,1145callee_pop_size: u32,1146) -> SmallVec<[Inst; 16]> {1147let mut insts = SmallVec::new();11481149// Determine GPRs to be restored.1150let clobbered_gpr = get_clobbered_gprs(frame_layout);11511152// Increment stack pointer unless it will be restored implicitly.1153// Note that implicit stack pointer restoration cannot be done in the1154// presence of either incoming or outgoing tail call arguments.1155let stack_size = frame_layout.outgoing_args_size as i321156+ frame_layout.clobber_size as i321157+ frame_layout.fixed_frame_storage_size as i32;1158let implicit_sp_restore = callee_pop_size == 01159&& (call_conv != isa::CallConv::Tail || frame_layout.incoming_args_size == 0)1160&& clobbered_gpr.map_or(false, |(first, _)| {1161SImm20::maybe_from_i64(8 * first as i64 + stack_size as i64).is_some()1162});1163if !implicit_sp_restore {1164insts.extend(S390xMachineDeps::gen_sp_reg_adjust(1165stack_size - callee_pop_size as i32,1166));1167}11681169// Use LMG to restore clobbered GPRs from save area.1170if let Some((first, mut last)) = clobbered_gpr {1171// Attempt to restore via SP, taking implicit restoration into account.1172let mut reg = stack_reg();1173let mut offset = callee_pop_size as i64 + 8 * first as i64;1174if implicit_sp_restore {1175offset += stack_size as i64 - callee_pop_size as i64;1176last = 15;1177}1178// If the offset still overflows, use the first restored GPR1179// as temporary holding the address, as we cannot use %r1.1180if SImm20::maybe_from_i64(offset).is_none() {1181insts.extend(S390xMachineDeps::gen_add_imm(1182call_conv,1183writable_gpr(first),1184stack_reg(),1185offset as u32,1186));1187reg = gpr(first);1188offset = 0;1189}1190// Now this LMG will always have an in-range offset.1191insts.push(Inst::LoadMultiple64 {1192rt: writable_gpr(first),1193rt2: writable_gpr(last),1194mem: MemArg::reg_plus_off(reg, offset, MemFlags::trusted()),1195});1196}11971198insts1199}12001201// Restore FPRs from the clobber area.1202fn gen_restore_fprs(frame_layout: &FrameLayout) -> SmallVec<[Inst; 16]> {1203let mut insts = SmallVec::new();12041205// Determine FPRs to be restored.1206let clobbered_fpr = get_clobbered_fprs(frame_layout);12071208// Restore FPRs.1209for (i, reg) in clobbered_fpr.iter().enumerate() {1210insts.push(Inst::VecLoadLaneUndef {1211size: 64,1212rd: Writable::from_reg(reg.to_reg().into()),1213mem: MemArg::reg_plus_off(1214stack_reg(),1215(i * 8) as i641216+ frame_layout.outgoing_args_size as i641217+ frame_layout.fixed_frame_storage_size as i64,1218MemFlags::trusted(),1219),1220lane_imm: 0,1221});1222}12231224insts1225}12261227const fn sysv_clobbers() -> PRegSet {1228PRegSet::empty()1229.with(gpr_preg(0))1230.with(gpr_preg(1))1231.with(gpr_preg(2))1232.with(gpr_preg(3))1233.with(gpr_preg(4))1234.with(gpr_preg(5))1235// v0 - v7 inclusive and v16 - v31 inclusive are1236// caller-saves. The upper 64 bits of v8 - v15 inclusive are1237// also caller-saves. However, because we cannot currently1238// represent partial registers to regalloc2, we indicate here1239// that every vector register is caller-save. Because this1240// function is used at *callsites*, approximating in this1241// direction (save more than necessary) is conservative and1242// thus safe.1243//1244// Note that we exclude clobbers from a call instruction when1245// a call instruction's callee has the same ABI as the caller1246// (the current function body); this is safe (anything1247// clobbered by callee can be clobbered by caller as well) and1248// avoids unnecessary saves of v8-v15 in the prologue even1249// though we include them as defs here.1250.with(vr_preg(0))1251.with(vr_preg(1))1252.with(vr_preg(2))1253.with(vr_preg(3))1254.with(vr_preg(4))1255.with(vr_preg(5))1256.with(vr_preg(6))1257.with(vr_preg(7))1258.with(vr_preg(8))1259.with(vr_preg(9))1260.with(vr_preg(10))1261.with(vr_preg(11))1262.with(vr_preg(12))1263.with(vr_preg(13))1264.with(vr_preg(14))1265.with(vr_preg(15))1266.with(vr_preg(16))1267.with(vr_preg(17))1268.with(vr_preg(18))1269.with(vr_preg(19))1270.with(vr_preg(20))1271.with(vr_preg(21))1272.with(vr_preg(22))1273.with(vr_preg(23))1274.with(vr_preg(24))1275.with(vr_preg(25))1276.with(vr_preg(26))1277.with(vr_preg(27))1278.with(vr_preg(28))1279.with(vr_preg(29))1280.with(vr_preg(30))1281.with(vr_preg(31))1282}1283const SYSV_CLOBBERS: PRegSet = sysv_clobbers();12841285const fn tail_clobbers() -> PRegSet {1286// Same as the SystemV ABI, except that %r6 and %r7 are clobbered.1287PRegSet::empty()1288.with(gpr_preg(0))1289.with(gpr_preg(1))1290.with(gpr_preg(2))1291.with(gpr_preg(3))1292.with(gpr_preg(4))1293.with(gpr_preg(5))1294.with(gpr_preg(6))1295.with(gpr_preg(7))1296.with(vr_preg(0))1297.with(vr_preg(1))1298.with(vr_preg(2))1299.with(vr_preg(3))1300.with(vr_preg(4))1301.with(vr_preg(5))1302.with(vr_preg(6))1303.with(vr_preg(7))1304.with(vr_preg(8))1305.with(vr_preg(9))1306.with(vr_preg(10))1307.with(vr_preg(11))1308.with(vr_preg(12))1309.with(vr_preg(13))1310.with(vr_preg(14))1311.with(vr_preg(15))1312.with(vr_preg(16))1313.with(vr_preg(17))1314.with(vr_preg(18))1315.with(vr_preg(19))1316.with(vr_preg(20))1317.with(vr_preg(21))1318.with(vr_preg(22))1319.with(vr_preg(23))1320.with(vr_preg(24))1321.with(vr_preg(25))1322.with(vr_preg(26))1323.with(vr_preg(27))1324.with(vr_preg(28))1325.with(vr_preg(29))1326.with(vr_preg(30))1327.with(vr_preg(31))1328}1329const TAIL_CLOBBERS: PRegSet = tail_clobbers();13301331const fn all_clobbers() -> PRegSet {1332PRegSet::empty()1333.with(gpr_preg(0))1334.with(gpr_preg(1))1335.with(gpr_preg(2))1336.with(gpr_preg(3))1337.with(gpr_preg(4))1338.with(gpr_preg(5))1339.with(gpr_preg(6))1340.with(gpr_preg(7))1341.with(gpr_preg(8))1342.with(gpr_preg(9))1343.with(gpr_preg(10))1344.with(gpr_preg(11))1345.with(gpr_preg(12))1346.with(gpr_preg(13))1347.with(gpr_preg(14))1348.with(gpr_preg(15))1349.with(vr_preg(0))1350.with(vr_preg(1))1351.with(vr_preg(2))1352.with(vr_preg(3))1353.with(vr_preg(4))1354.with(vr_preg(5))1355.with(vr_preg(6))1356.with(vr_preg(7))1357.with(vr_preg(8))1358.with(vr_preg(9))1359.with(vr_preg(10))1360.with(vr_preg(11))1361.with(vr_preg(12))1362.with(vr_preg(13))1363.with(vr_preg(14))1364.with(vr_preg(15))1365.with(vr_preg(16))1366.with(vr_preg(17))1367.with(vr_preg(18))1368.with(vr_preg(19))1369.with(vr_preg(20))1370.with(vr_preg(21))1371.with(vr_preg(22))1372.with(vr_preg(23))1373.with(vr_preg(24))1374.with(vr_preg(25))1375.with(vr_preg(26))1376.with(vr_preg(27))1377.with(vr_preg(28))1378.with(vr_preg(29))1379.with(vr_preg(30))1380.with(vr_preg(31))1381}1382const ALL_CLOBBERS: PRegSet = all_clobbers();13831384fn sysv_create_machine_env() -> MachineEnv {1385MachineEnv {1386preferred_regs_by_class: [1387vec![1388// no r0; can't use for addressing?1389// no r1; it is our spilltmp.1390gpr_preg(2),1391gpr_preg(3),1392gpr_preg(4),1393gpr_preg(5),1394],1395vec![1396vr_preg(0),1397vr_preg(1),1398vr_preg(2),1399vr_preg(3),1400vr_preg(4),1401vr_preg(5),1402vr_preg(6),1403vr_preg(7),1404vr_preg(16),1405vr_preg(17),1406vr_preg(18),1407vr_preg(19),1408vr_preg(20),1409vr_preg(21),1410vr_preg(22),1411vr_preg(23),1412vr_preg(24),1413vr_preg(25),1414vr_preg(26),1415vr_preg(27),1416vr_preg(28),1417vr_preg(29),1418vr_preg(30),1419vr_preg(31),1420],1421// Vector Regclass is unused1422vec![],1423],1424non_preferred_regs_by_class: [1425vec![1426gpr_preg(6),1427gpr_preg(7),1428gpr_preg(8),1429gpr_preg(9),1430gpr_preg(10),1431gpr_preg(11),1432gpr_preg(12),1433gpr_preg(13),1434gpr_preg(14),1435// no r15; it is the stack pointer.1436],1437vec![1438vr_preg(8),1439vr_preg(9),1440vr_preg(10),1441vr_preg(11),1442vr_preg(12),1443vr_preg(13),1444vr_preg(14),1445vr_preg(15),1446],1447// Vector Regclass is unused1448vec![],1449],1450fixed_stack_slots: vec![],1451scratch_by_class: [None, None, None],1452}1453}14541455fn tail_create_machine_env() -> MachineEnv {1456// Same as the SystemV ABI, except that %r6 and %r7 are preferred.1457MachineEnv {1458preferred_regs_by_class: [1459vec![1460// no r0; can't use for addressing?1461// no r1; it is our spilltmp.1462gpr_preg(2),1463gpr_preg(3),1464gpr_preg(4),1465gpr_preg(5),1466gpr_preg(6),1467gpr_preg(7),1468],1469vec![1470vr_preg(0),1471vr_preg(1),1472vr_preg(2),1473vr_preg(3),1474vr_preg(4),1475vr_preg(5),1476vr_preg(6),1477vr_preg(7),1478vr_preg(16),1479vr_preg(17),1480vr_preg(18),1481vr_preg(19),1482vr_preg(20),1483vr_preg(21),1484vr_preg(22),1485vr_preg(23),1486vr_preg(24),1487vr_preg(25),1488vr_preg(26),1489vr_preg(27),1490vr_preg(28),1491vr_preg(29),1492vr_preg(30),1493vr_preg(31),1494],1495// Vector Regclass is unused1496vec![],1497],1498non_preferred_regs_by_class: [1499vec![1500gpr_preg(8),1501gpr_preg(9),1502gpr_preg(10),1503gpr_preg(11),1504gpr_preg(12),1505gpr_preg(13),1506gpr_preg(14),1507// no r15; it is the stack pointer.1508],1509vec![1510vr_preg(8),1511vr_preg(9),1512vr_preg(10),1513vr_preg(11),1514vr_preg(12),1515vr_preg(13),1516vr_preg(14),1517vr_preg(15),1518],1519// Vector Regclass is unused1520vec![],1521],1522fixed_stack_slots: vec![],1523scratch_by_class: [None, None, None],1524}1525}152615271528