Path: blob/main/cranelift/codegen/src/isa/aarch64/abi.rs
3073 views
//! Implementation of a standard AArch64 ABI.12use crate::CodegenResult;3use crate::ir;4use crate::ir::MemFlags;5use crate::ir::types;6use crate::ir::types::*;7use crate::ir::{ExternalName, LibCall, Signature, dynamic_to_fixed};8use crate::isa;9use crate::isa::aarch64::{inst::*, settings as aarch64_settings};10use crate::isa::unwind::UnwindInst;11use crate::isa::winch;12use crate::machinst::*;13use crate::settings;14use alloc::borrow::ToOwned;15use alloc::boxed::Box;16use alloc::vec::Vec;17use regalloc2::{MachineEnv, PReg, PRegSet};18use smallvec::{SmallVec, smallvec};19use std::sync::OnceLock;2021// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because22// these ABIs are very similar.2324/// Support for the AArch64 ABI from the callee side (within a function body).25pub(crate) type AArch64Callee = Callee<AArch64MachineDeps>;2627impl From<StackAMode> for AMode {28fn from(stack: StackAMode) -> AMode {29match stack {30StackAMode::IncomingArg(off, stack_args_size) => AMode::IncomingArg {31off: i64::from(stack_args_size) - off,32},33StackAMode::Slot(off) => AMode::SlotOffset { off },34StackAMode::OutgoingArg(off) => AMode::SPOffset { off },35}36}37}3839// Returns the size of stack space needed to store the40// `clobbered_callee_saved` registers.41fn compute_clobber_size(clobbered_callee_saves: &[Writable<RealReg>]) -> u32 {42let mut int_regs = 0;43let mut vec_regs = 0;44for ® in clobbered_callee_saves {45match reg.to_reg().class() {46RegClass::Int => {47int_regs += 1;48}49RegClass::Float => {50vec_regs += 1;51}52RegClass::Vector => unreachable!(),53}54}5556// Round up to multiple of 2, to keep 16-byte stack alignment.57let int_save_bytes = (int_regs + (int_regs & 1)) * 8;58// The Procedure Call Standard for the Arm 64-bit Architecture59// (AAPCS64, including several related ABIs such as the one used by60// Windows) mandates saving only the bottom 8 bytes of the vector61// registers, so we round up the number of registers to ensure62// proper stack alignment (similarly to the situation with63// `int_reg`).64let vec_reg_size = 8;65let vec_save_padding = vec_regs & 1;66// FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs?67let vec_save_bytes = (vec_regs + vec_save_padding) * vec_reg_size;6869int_save_bytes + vec_save_bytes70}7172/// AArch64-specific ABI behavior. This struct just serves as an implementation73/// point for the trait; it is never actually instantiated.74pub struct AArch64MachineDeps;7576impl IsaFlags for aarch64_settings::Flags {77fn is_forward_edge_cfi_enabled(&self) -> bool {78self.use_bti()79}80}8182impl ABIMachineSpec for AArch64MachineDeps {83type I = Inst;8485type F = aarch64_settings::Flags;8687/// This is the limit for the size of argument and return-value areas on the88/// stack. We place a reasonable limit here to avoid integer overflow issues89/// with 32-bit arithmetic: for now, 128 MB.90const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;9192fn word_bits() -> u32 {936494}9596/// Return required stack alignment in bytes.97fn stack_align(_call_conv: isa::CallConv) -> u32 {981699}100101fn compute_arg_locs(102call_conv: isa::CallConv,103flags: &settings::Flags,104params: &[ir::AbiParam],105args_or_rets: ArgsOrRets,106add_ret_area_ptr: bool,107mut args: ArgsAccumulator,108) -> CodegenResult<(u32, Option<usize>)> {109let is_apple_cc = call_conv == isa::CallConv::AppleAarch64;110let is_winch_return = call_conv == isa::CallConv::Winch && args_or_rets == ArgsOrRets::Rets;111112// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.113//114// MacOS aarch64 is slightly different, see also115// https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.116// We are diverging from the MacOS aarch64 implementation in the117// following ways:118// - sign- and zero- extensions of data types less than 32 bits are not119// implemented yet.120// - we align the arguments stack space to a 16-bytes boundary, while121// the MacOS allows aligning only on 8 bytes. In practice it means we're122// slightly overallocating when calling, which is fine, and doesn't123// break our other invariants that the stack is always allocated in124// 16-bytes chunks.125126let mut next_xreg = if call_conv == isa::CallConv::Tail {127// We reserve `x0` for the return area pointer. For simplicity, we128// reserve it even when there is no return area pointer needed. This129// also means that identity functions don't have to shuffle arguments to130// different return registers because we shifted all argument register131// numbers down by one to make space for the return area pointer.132//133// Also, we cannot use all allocatable GPRs as arguments because we need134// at least one allocatable register for holding the callee address in135// indirect calls. So skip `x1` also, reserving it for that role.1362137} else {1380139};140let mut next_vreg = 0;141let mut next_stack: u32 = 0;142143// Note on return values: on the regular ABI, we may return values144// in 8 registers for V128 and I64 registers independently of the145// number of register values returned in the other class. That is,146// we can return values in up to 8 integer and147// 8 vector registers at once.148let max_per_class_reg_vals = 8; // x0-x7 and v0-v7149let mut remaining_reg_vals = 16;150151let ret_area_ptr = if add_ret_area_ptr {152debug_assert_eq!(args_or_rets, ArgsOrRets::Args);153if call_conv != isa::CallConv::Winch {154// In the AAPCS64 calling convention the return area pointer is155// stored in x8.156Some(ABIArg::reg(157xreg(8).to_real_reg().unwrap(),158I64,159ir::ArgumentExtension::None,160ir::ArgumentPurpose::Normal,161))162} else {163// Use x0 for the return area pointer in the Winch calling convention164// to simplify the ABI handling code in Winch by avoiding an AArch64165// special case to assign it to x8.166next_xreg += 1;167Some(ABIArg::reg(168xreg(0).to_real_reg().unwrap(),169I64,170ir::ArgumentExtension::None,171ir::ArgumentPurpose::Normal,172))173}174} else {175None176};177178for (i, param) in params.into_iter().enumerate() {179if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions()180{181panic!(182"f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled"183);184}185186let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;187188if let ir::ArgumentPurpose::StructReturn = param.purpose {189assert!(190call_conv != isa::CallConv::Tail,191"support for StructReturn parameters is not implemented for the `tail` \192calling convention yet",193);194}195196if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {197panic!(198"StructArgument parameters are not supported on arm64. \199Use regular pointer arguments instead."200);201}202203if let ir::ArgumentPurpose::StructReturn = param.purpose {204// FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once205// ensure_struct_return_ptr_is_returned is gone.206assert!(207param.value_type == types::I64,208"StructReturn must be a pointer sized integer"209);210args.push(ABIArg::Slots {211slots: smallvec![ABIArgSlot::Reg {212reg: xreg(8).to_real_reg().unwrap(),213ty: types::I64,214extension: param.extension,215},],216purpose: ir::ArgumentPurpose::StructReturn,217});218continue;219}220221// Handle multi register params222//223// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).224//225// For arguments with alignment of 16 we round up the register number226// to the next even value. So we can never allocate for example an i128227// to X1 and X2, we have to skip one register and do X2, X3228// (Stage C.8)229// Note: The Apple ABI deviates a bit here. They don't respect Stage C.8230// and will happily allocate a i128 to X1 and X2231//232// For integer types with alignment of 16 we also have the additional233// restriction of passing the lower half in Xn and the upper half in Xn+1234// (Stage C.9)235//236// For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh237//238// On the Apple ABI it is unspecified if we can spill half the value into the stack239// i.e load the lower half into x7 and the upper half into the stack240// LLVM does not seem to do this, so we are going to replicate that behaviour241let is_multi_reg = rcs.len() >= 2;242if is_multi_reg {243assert!(244rcs.len() == 2,245"Unable to handle multi reg params with more than 2 regs"246);247assert!(248rcs == &[RegClass::Int, RegClass::Int],249"Unable to handle non i64 regs"250);251252let reg_class_space = max_per_class_reg_vals - next_xreg;253let reg_space = remaining_reg_vals;254255if reg_space >= 2 && reg_class_space >= 2 {256// The aarch64 ABI does not allow us to start a split argument257// at an odd numbered register. So we need to skip one register258//259// TODO: The Fast ABI should probably not skip the register260if !is_apple_cc && next_xreg % 2 != 0 {261next_xreg += 1;262}263264let lower_reg = xreg(next_xreg);265let upper_reg = xreg(next_xreg + 1);266267args.push(ABIArg::Slots {268slots: smallvec![269ABIArgSlot::Reg {270reg: lower_reg.to_real_reg().unwrap(),271ty: reg_types[0],272extension: param.extension,273},274ABIArgSlot::Reg {275reg: upper_reg.to_real_reg().unwrap(),276ty: reg_types[1],277extension: param.extension,278},279],280purpose: param.purpose,281});282283next_xreg += 2;284remaining_reg_vals -= 2;285continue;286}287} else {288// Single Register parameters289let rc = rcs[0];290let next_reg = match rc {291RegClass::Int => &mut next_xreg,292RegClass::Float => &mut next_vreg,293RegClass::Vector => unreachable!(),294};295296let push_to_reg = if is_winch_return {297// Winch uses the first register to return the last result298i == params.len() - 1299} else {300// Use max_per_class_reg_vals & remaining_reg_vals otherwise301*next_reg < max_per_class_reg_vals && remaining_reg_vals > 0302};303304if push_to_reg {305let reg = match rc {306RegClass::Int => xreg(*next_reg),307RegClass::Float => vreg(*next_reg),308RegClass::Vector => unreachable!(),309};310// Overlay Z-regs on V-regs for parameter passing.311let ty = if param.value_type.is_dynamic_vector() {312dynamic_to_fixed(param.value_type)313} else {314param.value_type315};316args.push(ABIArg::reg(317reg.to_real_reg().unwrap(),318ty,319param.extension,320param.purpose,321));322*next_reg += 1;323remaining_reg_vals -= 1;324continue;325}326}327328// Spill to the stack329330if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {331return Err(crate::CodegenError::Unsupported(332"Too many return values to fit in registers. \333Use a StructReturn argument instead. (#9510)"334.to_owned(),335));336}337338// Compute the stack slot's size.339let size = (ty_bits(param.value_type) / 8) as u32;340341let size = if is_apple_cc || is_winch_return {342// MacOS and Winch aarch64 allows stack slots with343// sizes less than 8 bytes. They still need to be344// properly aligned on their natural data alignment,345// though.346size347} else {348// Every arg takes a minimum slot of 8 bytes. (16-byte stack349// alignment happens separately after all args.)350core::cmp::max(size, 8)351};352353if !is_winch_return {354// Align the stack slot.355debug_assert!(size.is_power_of_two());356next_stack = align_to(next_stack, size);357}358359let slots = reg_types360.iter()361.copied()362// Build the stack locations from each slot363.scan(next_stack, |next_stack, ty| {364let slot_offset = *next_stack as i64;365*next_stack += (ty_bits(ty) / 8) as u32;366367Some((ty, slot_offset))368})369.map(|(ty, offset)| ABIArgSlot::Stack {370offset,371ty,372extension: param.extension,373})374.collect();375376args.push(ABIArg::Slots {377slots,378purpose: param.purpose,379});380381next_stack += size;382}383384let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr {385args.push_non_formal(ret_area_ptr);386Some(args.args().len() - 1)387} else {388None389};390391if is_winch_return {392winch::reverse_stack(args, next_stack, false);393}394395next_stack = align_to(next_stack, 16);396397Ok((next_stack, extra_arg))398}399400fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {401Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())402}403404fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {405Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())406}407408fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {409Inst::gen_move(to_reg, from_reg, ty)410}411412fn gen_extend(413to_reg: Writable<Reg>,414from_reg: Reg,415signed: bool,416from_bits: u8,417to_bits: u8,418) -> Inst {419assert!(from_bits < to_bits);420Inst::Extend {421rd: to_reg,422rn: from_reg,423signed,424from_bits,425to_bits,426}427}428429fn gen_args(args: Vec<ArgPair>) -> Inst {430Inst::Args { args }431}432433fn gen_rets(rets: Vec<RetPair>) -> Inst {434Inst::Rets { rets }435}436437fn gen_add_imm(438_call_conv: isa::CallConv,439into_reg: Writable<Reg>,440from_reg: Reg,441imm: u32,442) -> SmallInstVec<Inst> {443let imm = imm as u64;444let mut insts = SmallVec::new();445if let Some(imm12) = Imm12::maybe_from_u64(imm) {446insts.push(Inst::AluRRImm12 {447alu_op: ALUOp::Add,448size: OperandSize::Size64,449rd: into_reg,450rn: from_reg,451imm12,452});453} else {454let scratch2 = writable_tmp2_reg();455assert_ne!(scratch2.to_reg(), from_reg);456// `gen_add_imm` is only ever called after register allocation has taken place, and as a457// result it's ok to reuse the scratch2 register here. If that changes, we'll need to458// plumb through a way to allocate temporary virtual registers459insts.extend(Inst::load_constant(scratch2, imm));460insts.push(Inst::AluRRRExtend {461alu_op: ALUOp::Add,462size: OperandSize::Size64,463rd: into_reg,464rn: from_reg,465rm: scratch2.to_reg(),466extendop: ExtendOp::UXTX,467});468}469insts470}471472fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {473let mut insts = SmallVec::new();474insts.push(Inst::AluRRRExtend {475alu_op: ALUOp::SubS,476size: OperandSize::Size64,477rd: writable_zero_reg(),478rn: stack_reg(),479rm: limit_reg,480extendop: ExtendOp::UXTX,481});482insts.push(Inst::TrapIf {483trap_code: ir::TrapCode::STACK_OVERFLOW,484// Here `Lo` == "less than" when interpreting the two485// operands as unsigned integers.486kind: CondBrKind::Cond(Cond::Lo),487});488insts489}490491fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {492// FIXME: Do something different for dynamic types?493let mem = mem.into();494Inst::LoadAddr { rd: into_reg, mem }495}496497fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {498spilltmp_reg()499}500501fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {502let mem = AMode::RegOffset {503rn: base,504off: offset as i64,505};506Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())507}508509fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {510let mem = AMode::RegOffset {511rn: base,512off: offset as i64,513};514Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())515}516517fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {518if amount == 0 {519return SmallVec::new();520}521522let (amount, is_sub) = if amount > 0 {523(amount as u64, false)524} else {525(-amount as u64, true)526};527528let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add };529530let mut ret = SmallVec::new();531if let Some(imm12) = Imm12::maybe_from_u64(amount) {532let adj_inst = Inst::AluRRImm12 {533alu_op,534size: OperandSize::Size64,535rd: writable_stack_reg(),536rn: stack_reg(),537imm12,538};539ret.push(adj_inst);540} else {541let tmp = writable_spilltmp_reg();542// `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for543// intermediates in `load_constant`.544let const_inst = Inst::load_constant(tmp, amount);545let adj_inst = Inst::AluRRRExtend {546alu_op,547size: OperandSize::Size64,548rd: writable_stack_reg(),549rn: stack_reg(),550rm: tmp.to_reg(),551extendop: ExtendOp::UXTX,552};553ret.extend(const_inst);554ret.push(adj_inst);555}556ret557}558559fn gen_prologue_frame_setup(560call_conv: isa::CallConv,561flags: &settings::Flags,562isa_flags: &aarch64_settings::Flags,563frame_layout: &FrameLayout,564) -> SmallInstVec<Inst> {565let setup_frame = frame_layout.setup_area_size > 0;566let mut insts = SmallVec::new();567568match Self::select_api_key(isa_flags, call_conv, setup_frame) {569Some(key) => {570insts.push(Inst::Paci { key });571if flags.unwind_info() {572insts.push(Inst::Unwind {573inst: UnwindInst::Aarch64SetPointerAuth {574return_addresses: true,575},576});577}578}579None => {580if isa_flags.use_bti() {581insts.push(Inst::Bti {582targets: BranchTargetType::C,583});584}585586if flags.unwind_info() && call_conv == isa::CallConv::AppleAarch64 {587// The macOS unwinder seems to require this.588insts.push(Inst::Unwind {589inst: UnwindInst::Aarch64SetPointerAuth {590return_addresses: false,591},592});593}594}595}596597if setup_frame {598// stp fp (x29), lr (x30), [sp, #-16]!599insts.push(Inst::StoreP64 {600rt: fp_reg(),601rt2: link_reg(),602mem: PairAMode::SPPreIndexed {603simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),604},605flags: MemFlags::trusted(),606});607608if flags.unwind_info() {609insts.push(Inst::Unwind {610inst: UnwindInst::PushFrameRegs {611offset_upward_to_caller_sp: frame_layout.setup_area_size,612},613});614}615616// mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because617// the usual encoding (`ORR`) does not work with SP.618insts.push(Inst::AluRRImm12 {619alu_op: ALUOp::Add,620size: OperandSize::Size64,621rd: writable_fp_reg(),622rn: stack_reg(),623imm12: Imm12 {624bits: 0,625shift12: false,626},627});628}629630insts631}632633fn gen_epilogue_frame_restore(634call_conv: isa::CallConv,635_flags: &settings::Flags,636_isa_flags: &aarch64_settings::Flags,637frame_layout: &FrameLayout,638) -> SmallInstVec<Inst> {639let setup_frame = frame_layout.setup_area_size > 0;640let mut insts = SmallVec::new();641642if setup_frame {643// N.B.: sp is already adjusted to the appropriate place by the644// clobber-restore code (which also frees the fixed frame). Hence, there645// is no need for the usual `mov sp, fp` here.646647// `ldp fp, lr, [sp], #16`648insts.push(Inst::LoadP64 {649rt: writable_fp_reg(),650rt2: writable_link_reg(),651mem: PairAMode::SPPostIndexed {652simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),653},654flags: MemFlags::trusted(),655});656}657658if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {659insts.extend(Self::gen_sp_reg_adjust(660frame_layout.tail_args_size.try_into().unwrap(),661));662}663664insts665}666667fn gen_return(668call_conv: isa::CallConv,669isa_flags: &aarch64_settings::Flags,670frame_layout: &FrameLayout,671) -> SmallInstVec<Inst> {672let setup_frame = frame_layout.setup_area_size > 0;673674match Self::select_api_key(isa_flags, call_conv, setup_frame) {675Some(key) => {676smallvec![Inst::AuthenticatedRet {677key,678is_hint: !isa_flags.has_pauth(),679}]680}681None => {682smallvec![Inst::Ret {}]683}684}685}686687fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32) {688// TODO: implement if we ever require stack probes on an AArch64 host689// (unlikely unless Lucet is ported)690unimplemented!("Stack probing is unimplemented on AArch64");691}692693fn gen_inline_probestack(694insts: &mut SmallInstVec<Self::I>,695_call_conv: isa::CallConv,696frame_size: u32,697guard_size: u32,698) {699// The stack probe loop currently takes 6 instructions and each inline700// probe takes 2 (ish, these numbers sort of depend on the constants).701// Set this to 3 to keep the max size of the probe to 6 instructions.702const PROBE_MAX_UNROLL: u32 = 3;703704// Calculate how many probes we need to perform. Round down, as we only705// need to probe whole guard_size regions we'd otherwise skip over.706let probe_count = frame_size / guard_size;707if probe_count == 0 {708// No probe necessary709} else if probe_count <= PROBE_MAX_UNROLL {710Self::gen_probestack_unroll(insts, guard_size, probe_count)711} else {712Self::gen_probestack_loop(insts, frame_size, guard_size)713}714}715716fn gen_clobber_save(717_call_conv: isa::CallConv,718flags: &settings::Flags,719frame_layout: &FrameLayout,720) -> SmallVec<[Inst; 16]> {721let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();722723let mut insts = SmallVec::new();724let setup_frame = frame_layout.setup_area_size > 0;725726// When a return_call within this function required more stack arguments than we have727// present, resize the incoming argument area of the frame to accommodate those arguments.728let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;729if incoming_args_diff > 0 {730// Decrement SP to account for the additional space required by a tail call.731insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));732if flags.unwind_info() {733insts.push(Inst::Unwind {734inst: UnwindInst::StackAlloc {735size: incoming_args_diff,736},737});738}739740// Move fp and lr down.741if setup_frame {742// Reload the frame pointer from the stack.743insts.push(Inst::ULoad64 {744rd: regs::writable_fp_reg(),745mem: AMode::SPOffset {746off: i64::from(incoming_args_diff),747},748flags: MemFlags::trusted(),749});750751// Store the frame pointer and link register again at the new SP752insts.push(Inst::StoreP64 {753rt: fp_reg(),754rt2: link_reg(),755mem: PairAMode::SignedOffset {756reg: regs::stack_reg(),757simm7: SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(),758},759flags: MemFlags::trusted(),760});761762// Keep the frame pointer in sync763insts.push(Self::gen_move(764regs::writable_fp_reg(),765regs::stack_reg(),766types::I64,767));768}769}770771if flags.unwind_info() && setup_frame {772// The *unwind* frame (but not the actual frame) starts at the773// clobbers, just below the saved FP/LR pair.774insts.push(Inst::Unwind {775inst: UnwindInst::DefineNewFrame {776offset_downward_to_clobbers: frame_layout.clobber_size,777offset_upward_to_caller_sp: frame_layout.setup_area_size,778},779});780}781782// We use pre-indexed addressing modes here, rather than the possibly783// more efficient "subtract sp once then used fixed offsets" scheme,784// because (i) we cannot necessarily guarantee that the offset of a785// clobber-save slot will be within a SImm7Scaled (+504-byte) offset786// range of the whole frame including other slots, it is more complex to787// conditionally generate a two-stage SP adjustment (clobbers then fixed788// frame) otherwise, and generally we just want to maintain simplicity789// here for maintainability. Because clobbers are at the top of the790// frame, just below FP, all that is necessary is to use the pre-indexed791// "push" `[sp, #-16]!` addressing mode.792//793// `frame_offset` tracks offset above start-of-clobbers for unwind-info794// purposes.795let mut clobber_offset = frame_layout.clobber_size;796let clobber_offset_change = 16;797let iter = clobbered_int.chunks_exact(2);798799if let [rd] = iter.remainder() {800let rd: Reg = rd.to_reg().into();801802debug_assert_eq!(rd.class(), RegClass::Int);803// str rd, [sp, #-16]!804insts.push(Inst::Store64 {805rd,806mem: AMode::SPPreIndexed {807simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),808},809flags: MemFlags::trusted(),810});811812if flags.unwind_info() {813clobber_offset -= clobber_offset_change as u32;814insts.push(Inst::Unwind {815inst: UnwindInst::SaveReg {816clobber_offset,817reg: rd.to_real_reg().unwrap(),818},819});820}821}822823let mut iter = iter.rev();824825while let Some([rt, rt2]) = iter.next() {826// .to_reg().into(): Writable<RealReg> --> RealReg --> Reg827let rt: Reg = rt.to_reg().into();828let rt2: Reg = rt2.to_reg().into();829830debug_assert!(rt.class() == RegClass::Int);831debug_assert!(rt2.class() == RegClass::Int);832833// stp rt, rt2, [sp, #-16]!834insts.push(Inst::StoreP64 {835rt,836rt2,837mem: PairAMode::SPPreIndexed {838simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),839},840flags: MemFlags::trusted(),841});842843if flags.unwind_info() {844clobber_offset -= clobber_offset_change as u32;845insts.push(Inst::Unwind {846inst: UnwindInst::SaveReg {847clobber_offset,848reg: rt.to_real_reg().unwrap(),849},850});851insts.push(Inst::Unwind {852inst: UnwindInst::SaveReg {853clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,854reg: rt2.to_real_reg().unwrap(),855},856});857}858}859860let store_vec_reg = |rd| Inst::FpuStore64 {861rd,862mem: AMode::SPPreIndexed {863simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),864},865flags: MemFlags::trusted(),866};867let iter = clobbered_vec.chunks_exact(2);868869if let [rd] = iter.remainder() {870let rd: Reg = rd.to_reg().into();871872debug_assert_eq!(rd.class(), RegClass::Float);873insts.push(store_vec_reg(rd));874875if flags.unwind_info() {876clobber_offset -= clobber_offset_change as u32;877insts.push(Inst::Unwind {878inst: UnwindInst::SaveReg {879clobber_offset,880reg: rd.to_real_reg().unwrap(),881},882});883}884}885886let store_vec_reg_pair = |rt, rt2| {887let clobber_offset_change = 16;888889(890Inst::FpuStoreP64 {891rt,892rt2,893mem: PairAMode::SPPreIndexed {894simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),895},896flags: MemFlags::trusted(),897},898clobber_offset_change as u32,899)900};901let mut iter = iter.rev();902903while let Some([rt, rt2]) = iter.next() {904let rt: Reg = rt.to_reg().into();905let rt2: Reg = rt2.to_reg().into();906907debug_assert_eq!(rt.class(), RegClass::Float);908debug_assert_eq!(rt2.class(), RegClass::Float);909910let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);911912insts.push(inst);913914if flags.unwind_info() {915clobber_offset -= clobber_offset_change;916insts.push(Inst::Unwind {917inst: UnwindInst::SaveReg {918clobber_offset,919reg: rt.to_real_reg().unwrap(),920},921});922insts.push(Inst::Unwind {923inst: UnwindInst::SaveReg {924clobber_offset: clobber_offset + clobber_offset_change / 2,925reg: rt2.to_real_reg().unwrap(),926},927});928}929}930931// Allocate the fixed frame below the clobbers if necessary.932let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;933if stack_size > 0 {934insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));935if flags.unwind_info() {936insts.push(Inst::Unwind {937inst: UnwindInst::StackAlloc { size: stack_size },938});939}940}941942insts943}944945fn gen_clobber_restore(946_call_conv: isa::CallConv,947_flags: &settings::Flags,948frame_layout: &FrameLayout,949) -> SmallVec<[Inst; 16]> {950let mut insts = SmallVec::new();951let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();952953// Free the fixed frame if necessary.954let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;955if stack_size > 0 {956insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));957}958959let load_vec_reg = |rd| Inst::FpuLoad64 {960rd,961mem: AMode::SPPostIndexed {962simm9: SImm9::maybe_from_i64(16).unwrap(),963},964flags: MemFlags::trusted(),965};966let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 {967rt,968rt2,969mem: PairAMode::SPPostIndexed {970simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(),971},972flags: MemFlags::trusted(),973};974975let mut iter = clobbered_vec.chunks_exact(2);976977while let Some([rt, rt2]) = iter.next() {978let rt: Writable<Reg> = rt.map(|r| r.into());979let rt2: Writable<Reg> = rt2.map(|r| r.into());980981debug_assert_eq!(rt.to_reg().class(), RegClass::Float);982debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);983insts.push(load_vec_reg_pair(rt, rt2));984}985986debug_assert!(iter.remainder().len() <= 1);987988if let [rd] = iter.remainder() {989let rd: Writable<Reg> = rd.map(|r| r.into());990991debug_assert_eq!(rd.to_reg().class(), RegClass::Float);992insts.push(load_vec_reg(rd));993}994995let mut iter = clobbered_int.chunks_exact(2);996997while let Some([rt, rt2]) = iter.next() {998let rt: Writable<Reg> = rt.map(|r| r.into());999let rt2: Writable<Reg> = rt2.map(|r| r.into());10001001debug_assert_eq!(rt.to_reg().class(), RegClass::Int);1002debug_assert_eq!(rt2.to_reg().class(), RegClass::Int);1003// ldp rt, rt2, [sp], #161004insts.push(Inst::LoadP64 {1005rt,1006rt2,1007mem: PairAMode::SPPostIndexed {1008simm7: SImm7Scaled::maybe_from_i64(16, I64).unwrap(),1009},1010flags: MemFlags::trusted(),1011});1012}10131014debug_assert!(iter.remainder().len() <= 1);10151016if let [rd] = iter.remainder() {1017let rd: Writable<Reg> = rd.map(|r| r.into());10181019debug_assert_eq!(rd.to_reg().class(), RegClass::Int);1020// ldr rd, [sp], #161021insts.push(Inst::ULoad64 {1022rd,1023mem: AMode::SPPostIndexed {1024simm9: SImm9::maybe_from_i64(16).unwrap(),1025},1026flags: MemFlags::trusted(),1027});1028}10291030insts1031}10321033fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(1034call_conv: isa::CallConv,1035dst: Reg,1036src: Reg,1037size: usize,1038mut alloc_tmp: F,1039) -> SmallVec<[Self::I; 8]> {1040let mut insts = SmallVec::new();1041let arg0 = writable_xreg(0);1042let arg1 = writable_xreg(1);1043let arg2 = writable_xreg(2);1044let tmp = alloc_tmp(Self::word_type());1045insts.extend(Inst::load_constant(tmp, size as u64));1046insts.push(Inst::Call {1047info: Box::new(CallInfo {1048dest: ExternalName::LibCall(LibCall::Memcpy),1049uses: smallvec![1050CallArgPair {1051vreg: dst,1052preg: arg0.to_reg()1053},1054CallArgPair {1055vreg: src,1056preg: arg1.to_reg()1057},1058CallArgPair {1059vreg: tmp.to_reg(),1060preg: arg2.to_reg()1061}1062],1063defs: smallvec![],1064clobbers: Self::get_regs_clobbered_by_call(call_conv, false),1065caller_conv: call_conv,1066callee_conv: call_conv,1067callee_pop_size: 0,1068try_call_info: None,1069patchable: false,1070}),1071});1072insts1073}10741075fn get_number_of_spillslots_for_value(1076rc: RegClass,1077vector_size: u32,1078_isa_flags: &Self::F,1079) -> u32 {1080assert_eq!(vector_size % 8, 0);1081// We allocate in terms of 8-byte slots.1082match rc {1083RegClass::Int => 1,1084RegClass::Float => vector_size / 8,1085RegClass::Vector => unreachable!(),1086}1087}10881089fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {1090if flags.enable_pinned_reg() {1091static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();1092MACHINE_ENV.get_or_init(|| create_reg_env(true))1093} else {1094static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();1095MACHINE_ENV.get_or_init(|| create_reg_env(false))1096}1097}10981099fn get_regs_clobbered_by_call(call_conv: isa::CallConv, is_exception: bool) -> PRegSet {1100match (call_conv, is_exception) {1101(isa::CallConv::Tail, true) => ALL_CLOBBERS,1102(isa::CallConv::Winch, true) => ALL_CLOBBERS,1103(isa::CallConv::Winch, false) => WINCH_CLOBBERS,1104// Note that "PreserveAll" actually preserves nothing at1105// the callsite if used for a `try_call`, because the1106// unwinder ABI for `try_call`s is still "no clobbered1107// register restores" for this ABI (so as to work with1108// Wasmtime).1109(isa::CallConv::PreserveAll, true) => ALL_CLOBBERS,1110(isa::CallConv::SystemV, _) => DEFAULT_AAPCS_CLOBBERS,1111(isa::CallConv::PreserveAll, _) => NO_CLOBBERS,1112(_, false) => DEFAULT_AAPCS_CLOBBERS,1113(_, true) => panic!("unimplemented clobbers for exn abi of {call_conv:?}"),1114}1115}11161117fn get_ext_mode(1118call_conv: isa::CallConv,1119specified: ir::ArgumentExtension,1120) -> ir::ArgumentExtension {1121if call_conv == isa::CallConv::AppleAarch64 {1122specified1123} else {1124ir::ArgumentExtension::None1125}1126}11271128fn compute_frame_layout(1129call_conv: isa::CallConv,1130flags: &settings::Flags,1131sig: &Signature,1132regs: &[Writable<RealReg>],1133function_calls: FunctionCalls,1134incoming_args_size: u32,1135tail_args_size: u32,1136stackslots_size: u32,1137fixed_frame_storage_size: u32,1138outgoing_args_size: u32,1139) -> FrameLayout {1140let mut regs: Vec<Writable<RealReg>> = regs1141.iter()1142.cloned()1143.filter(|r| {1144is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg())1145})1146.collect();11471148// Sort registers for deterministic code output. We can do an unstable1149// sort because the registers will be unique (there are no dups).1150regs.sort_unstable();11511152// Compute clobber size.1153let clobber_size = compute_clobber_size(®s);11541155// Compute linkage frame size.1156let setup_area_size = if flags.preserve_frame_pointers()1157|| function_calls != FunctionCalls::None1158// The function arguments that are passed on the stack are addressed1159// relative to the Frame Pointer.1160|| incoming_args_size > 01161|| clobber_size > 01162|| fixed_frame_storage_size > 01163{116416 // FP, LR1165} else {116601167};11681169// Return FrameLayout structure.1170FrameLayout {1171word_bytes: 8,1172incoming_args_size,1173tail_args_size,1174setup_area_size,1175clobber_size,1176fixed_frame_storage_size,1177stackslots_size,1178outgoing_args_size,1179clobbered_callee_saves: regs,1180function_calls,1181}1182}11831184fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {1185// Use x9 as a temp if needed: clobbered, not a1186// retval.1187regs::writable_xreg(9)1188}11891190fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {1191const PAYLOAD_REGS: &'static [Reg] = &[regs::xreg(0), regs::xreg(1)];1192match call_conv {1193isa::CallConv::SystemV | isa::CallConv::Tail | isa::CallConv::PreserveAll => {1194PAYLOAD_REGS1195}1196_ => &[],1197}1198}1199}12001201impl AArch64MachineDeps {1202fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {1203// When manually unrolling adjust the stack pointer and then write a zero1204// to the stack at that offset. This generates something like1205// `sub sp, sp, #1, lsl #12` followed by `stur wzr, [sp]`.1206//1207// We do this because valgrind expects us to never write beyond the stack1208// pointer and associated redzone.1209// See: https://github.com/bytecodealliance/wasmtime/issues/74541210for _ in 0..probe_count {1211insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));12121213insts.push(Inst::gen_store(1214AMode::SPOffset { off: 0 },1215zero_reg(),1216I32,1217MemFlags::trusted(),1218));1219}12201221// Restore the stack pointer to its original value1222insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));1223}12241225fn gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32) {1226// The non-unrolled version uses two temporary registers. The1227// `start` contains the current offset from sp and counts downwards1228// during the loop by increments of `guard_size`. The `end` is1229// the size of the frame and where we stop.1230//1231// Note that this emission is all post-regalloc so it should be ok1232// to use the temporary registers here as input/output as the loop1233// itself is not allowed to use the registers.1234let start = writable_spilltmp_reg();1235let end = writable_tmp2_reg();1236// `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse1237// `start` and `end` as temporaries in load_constant.1238insts.extend(Inst::load_constant(start, 0));1239insts.extend(Inst::load_constant(end, frame_size.into()));1240insts.push(Inst::StackProbeLoop {1241start,1242end: end.to_reg(),1243step: Imm12::maybe_from_u64(guard_size.into()).unwrap(),1244});1245}12461247pub fn select_api_key(1248isa_flags: &aarch64_settings::Flags,1249call_conv: isa::CallConv,1250setup_frame: bool,1251) -> Option<APIKey> {1252if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) {1253// The `tail` calling convention uses a zero modifier rather than SP1254// because tail calls may happen with a different stack pointer than1255// when the function was entered, meaning that it won't be the same when1256// the return address is decrypted.1257Some(if isa_flags.sign_return_address_with_bkey() {1258match call_conv {1259isa::CallConv::Tail => APIKey::BZ,1260_ => APIKey::BSP,1261}1262} else {1263match call_conv {1264isa::CallConv::Tail => APIKey::AZ,1265_ => APIKey::ASP,1266}1267})1268} else {1269None1270}1271}1272}12731274/// Is the given register saved in the prologue if clobbered, i.e., is it a1275/// callee-save?1276fn is_reg_saved_in_prologue(1277call_conv: isa::CallConv,1278enable_pinned_reg: bool,1279sig: &Signature,1280r: RealReg,1281) -> bool {1282if call_conv == isa::CallConv::PreserveAll {1283return true;1284}12851286// FIXME: We need to inspect whether a function is returning Z or P regs too.1287let save_z_regs = sig1288.params1289.iter()1290.filter(|p| p.value_type.is_dynamic_vector())1291.count()1292!= 0;12931294match r.class() {1295RegClass::Int => {1296// x19 - x28 inclusive are callee-saves.1297// However, x21 is the pinned reg if `enable_pinned_reg`1298// is set, and is implicitly globally-allocated, hence not1299// callee-saved in prologues.1300if enable_pinned_reg && r.hw_enc() == PINNED_REG {1301false1302} else {1303r.hw_enc() >= 19 && r.hw_enc() <= 281304}1305}1306RegClass::Float => {1307// If a subroutine takes at least one argument in scalable vector registers1308// or scalable predicate registers, or if it is a function that returns1309// results in such registers, it must ensure that the entire contents of1310// z8-z23 are preserved across the call. In other cases it need only1311// preserve the low 64 bits of z8-z15.1312if save_z_regs {1313r.hw_enc() >= 8 && r.hw_enc() <= 231314} else {1315// v8 - v15 inclusive are callee-saves.1316r.hw_enc() >= 8 && r.hw_enc() <= 151317}1318}1319RegClass::Vector => unreachable!(),1320}1321}13221323const fn default_aapcs_clobbers() -> PRegSet {1324PRegSet::empty()1325// x0 - x17 inclusive are caller-saves.1326.with(xreg_preg(0))1327.with(xreg_preg(1))1328.with(xreg_preg(2))1329.with(xreg_preg(3))1330.with(xreg_preg(4))1331.with(xreg_preg(5))1332.with(xreg_preg(6))1333.with(xreg_preg(7))1334.with(xreg_preg(8))1335.with(xreg_preg(9))1336.with(xreg_preg(10))1337.with(xreg_preg(11))1338.with(xreg_preg(12))1339.with(xreg_preg(13))1340.with(xreg_preg(14))1341.with(xreg_preg(15))1342.with(xreg_preg(16))1343.with(xreg_preg(17))1344// v0 - v7 inclusive and v16 - v31 inclusive are1345// caller-saves. The upper 64 bits of v8 - v15 inclusive are1346// also caller-saves. However, because we cannot currently1347// represent partial registers to regalloc2, we indicate here1348// that every vector register is caller-save. Because this1349// function is used at *callsites*, approximating in this1350// direction (save more than necessary) is conservative and1351// thus safe.1352//1353// Note that we exclude clobbers from a call instruction when1354// a call instruction's callee has the same ABI as the caller1355// (the current function body); this is safe (anything1356// clobbered by callee can be clobbered by caller as well) and1357// avoids unnecessary saves of v8-v15 in the prologue even1358// though we include them as defs here.1359.with(vreg_preg(0))1360.with(vreg_preg(1))1361.with(vreg_preg(2))1362.with(vreg_preg(3))1363.with(vreg_preg(4))1364.with(vreg_preg(5))1365.with(vreg_preg(6))1366.with(vreg_preg(7))1367.with(vreg_preg(8))1368.with(vreg_preg(9))1369.with(vreg_preg(10))1370.with(vreg_preg(11))1371.with(vreg_preg(12))1372.with(vreg_preg(13))1373.with(vreg_preg(14))1374.with(vreg_preg(15))1375.with(vreg_preg(16))1376.with(vreg_preg(17))1377.with(vreg_preg(18))1378.with(vreg_preg(19))1379.with(vreg_preg(20))1380.with(vreg_preg(21))1381.with(vreg_preg(22))1382.with(vreg_preg(23))1383.with(vreg_preg(24))1384.with(vreg_preg(25))1385.with(vreg_preg(26))1386.with(vreg_preg(27))1387.with(vreg_preg(28))1388.with(vreg_preg(29))1389.with(vreg_preg(30))1390.with(vreg_preg(31))1391}13921393const fn winch_clobbers() -> PRegSet {1394PRegSet::empty()1395.with(xreg_preg(0))1396.with(xreg_preg(1))1397.with(xreg_preg(2))1398.with(xreg_preg(3))1399.with(xreg_preg(4))1400.with(xreg_preg(5))1401.with(xreg_preg(6))1402.with(xreg_preg(7))1403.with(xreg_preg(8))1404.with(xreg_preg(9))1405.with(xreg_preg(10))1406.with(xreg_preg(11))1407.with(xreg_preg(12))1408.with(xreg_preg(13))1409.with(xreg_preg(14))1410.with(xreg_preg(15))1411.with(xreg_preg(16))1412.with(xreg_preg(17))1413// x18 is used to carry platform state and is not allocatable by Winch.1414//1415// x19 - x27 are considered caller-saved in Winch's calling convention.1416.with(xreg_preg(19))1417.with(xreg_preg(20))1418.with(xreg_preg(21))1419.with(xreg_preg(22))1420.with(xreg_preg(23))1421.with(xreg_preg(24))1422.with(xreg_preg(25))1423.with(xreg_preg(26))1424.with(xreg_preg(27))1425// x28 is used as the shadow stack pointer and is considered1426// callee-saved.1427//1428// All vregs are considered caller-saved.1429.with(vreg_preg(0))1430.with(vreg_preg(1))1431.with(vreg_preg(2))1432.with(vreg_preg(3))1433.with(vreg_preg(4))1434.with(vreg_preg(5))1435.with(vreg_preg(6))1436.with(vreg_preg(7))1437.with(vreg_preg(8))1438.with(vreg_preg(9))1439.with(vreg_preg(10))1440.with(vreg_preg(11))1441.with(vreg_preg(12))1442.with(vreg_preg(13))1443.with(vreg_preg(14))1444.with(vreg_preg(15))1445.with(vreg_preg(16))1446.with(vreg_preg(17))1447.with(vreg_preg(18))1448.with(vreg_preg(19))1449.with(vreg_preg(20))1450.with(vreg_preg(21))1451.with(vreg_preg(22))1452.with(vreg_preg(23))1453.with(vreg_preg(24))1454.with(vreg_preg(25))1455.with(vreg_preg(26))1456.with(vreg_preg(27))1457.with(vreg_preg(28))1458.with(vreg_preg(29))1459.with(vreg_preg(30))1460.with(vreg_preg(31))1461}14621463const fn all_clobbers() -> PRegSet {1464PRegSet::empty()1465// integer registers: x0 to x28 inclusive. (x29 is FP, x30 is1466// LR, x31 is SP/ZR.)1467.with(xreg_preg(0))1468.with(xreg_preg(1))1469.with(xreg_preg(2))1470.with(xreg_preg(3))1471.with(xreg_preg(4))1472.with(xreg_preg(5))1473.with(xreg_preg(6))1474.with(xreg_preg(7))1475.with(xreg_preg(8))1476.with(xreg_preg(9))1477.with(xreg_preg(10))1478.with(xreg_preg(11))1479.with(xreg_preg(12))1480.with(xreg_preg(13))1481.with(xreg_preg(14))1482.with(xreg_preg(15))1483.with(xreg_preg(16))1484.with(xreg_preg(17))1485.with(xreg_preg(18))1486.with(xreg_preg(19))1487.with(xreg_preg(20))1488.with(xreg_preg(21))1489.with(xreg_preg(22))1490.with(xreg_preg(23))1491.with(xreg_preg(24))1492.with(xreg_preg(25))1493.with(xreg_preg(26))1494.with(xreg_preg(27))1495.with(xreg_preg(28))1496// vector registers: v0 to v31 inclusive.1497.with(vreg_preg(0))1498.with(vreg_preg(1))1499.with(vreg_preg(2))1500.with(vreg_preg(3))1501.with(vreg_preg(4))1502.with(vreg_preg(5))1503.with(vreg_preg(6))1504.with(vreg_preg(7))1505.with(vreg_preg(8))1506.with(vreg_preg(9))1507.with(vreg_preg(10))1508.with(vreg_preg(11))1509.with(vreg_preg(12))1510.with(vreg_preg(13))1511.with(vreg_preg(14))1512.with(vreg_preg(15))1513.with(vreg_preg(16))1514.with(vreg_preg(17))1515.with(vreg_preg(18))1516.with(vreg_preg(19))1517.with(vreg_preg(20))1518.with(vreg_preg(21))1519.with(vreg_preg(22))1520.with(vreg_preg(23))1521.with(vreg_preg(24))1522.with(vreg_preg(25))1523.with(vreg_preg(26))1524.with(vreg_preg(27))1525.with(vreg_preg(28))1526.with(vreg_preg(29))1527.with(vreg_preg(30))1528.with(vreg_preg(31))1529}15301531const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers();1532const WINCH_CLOBBERS: PRegSet = winch_clobbers();1533const ALL_CLOBBERS: PRegSet = all_clobbers();1534const NO_CLOBBERS: PRegSet = PRegSet::empty();15351536fn create_reg_env(enable_pinned_reg: bool) -> MachineEnv {1537fn preg(r: Reg) -> PReg {1538r.to_real_reg().unwrap().into()1539}15401541let mut env = MachineEnv {1542preferred_regs_by_class: [1543vec![1544preg(xreg(0)),1545preg(xreg(1)),1546preg(xreg(2)),1547preg(xreg(3)),1548preg(xreg(4)),1549preg(xreg(5)),1550preg(xreg(6)),1551preg(xreg(7)),1552preg(xreg(8)),1553preg(xreg(9)),1554preg(xreg(10)),1555preg(xreg(11)),1556preg(xreg(12)),1557preg(xreg(13)),1558preg(xreg(14)),1559preg(xreg(15)),1560// x16 and x17 are spilltmp and tmp2 (see above).1561// x18 could be used by the platform to carry inter-procedural state;1562// conservatively assume so and make it not allocatable.1563// x19-28 are callee-saved and so not preferred.1564// x21 is the pinned register (if enabled) and not allocatable if so.1565// x29 is FP, x30 is LR, x31 is SP/ZR.1566],1567vec![1568preg(vreg(0)),1569preg(vreg(1)),1570preg(vreg(2)),1571preg(vreg(3)),1572preg(vreg(4)),1573preg(vreg(5)),1574preg(vreg(6)),1575preg(vreg(7)),1576// v8-15 are callee-saved and so not preferred.1577preg(vreg(16)),1578preg(vreg(17)),1579preg(vreg(18)),1580preg(vreg(19)),1581preg(vreg(20)),1582preg(vreg(21)),1583preg(vreg(22)),1584preg(vreg(23)),1585preg(vreg(24)),1586preg(vreg(25)),1587preg(vreg(26)),1588preg(vreg(27)),1589preg(vreg(28)),1590preg(vreg(29)),1591preg(vreg(30)),1592preg(vreg(31)),1593],1594// Vector Regclass is unused1595vec![],1596],1597non_preferred_regs_by_class: [1598vec![1599preg(xreg(19)),1600preg(xreg(20)),1601// x21 is pinned reg if enabled; we add to this list below if not.1602preg(xreg(22)),1603preg(xreg(23)),1604preg(xreg(24)),1605preg(xreg(25)),1606preg(xreg(26)),1607preg(xreg(27)),1608preg(xreg(28)),1609],1610vec![1611preg(vreg(8)),1612preg(vreg(9)),1613preg(vreg(10)),1614preg(vreg(11)),1615preg(vreg(12)),1616preg(vreg(13)),1617preg(vreg(14)),1618preg(vreg(15)),1619],1620// Vector Regclass is unused1621vec![],1622],1623fixed_stack_slots: vec![],1624scratch_by_class: [None, None, None],1625};16261627if !enable_pinned_reg {1628debug_assert_eq!(PINNED_REG, 21); // We assumed this above in hardcoded reg list.1629env.non_preferred_regs_by_class[0].push(preg(xreg(PINNED_REG)));1630}16311632env1633}163416351636