Path: blob/main/cranelift/codegen/src/isa/aarch64/abi.rs
1693 views
//! Implementation of a standard AArch64 ABI.12use crate::CodegenResult;3use crate::ir;4use crate::ir::MemFlags;5use crate::ir::types;6use crate::ir::types::*;7use crate::ir::{ExternalName, LibCall, Signature, dynamic_to_fixed};8use crate::isa;9use crate::isa::aarch64::{inst::*, settings as aarch64_settings};10use crate::isa::unwind::UnwindInst;11use crate::isa::winch;12use crate::machinst::*;13use crate::settings;14use alloc::boxed::Box;15use alloc::vec::Vec;16use regalloc2::{MachineEnv, PReg, PRegSet};17use smallvec::{SmallVec, smallvec};18use std::borrow::ToOwned;19use std::sync::OnceLock;2021// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because22// these ABIs are very similar.2324/// Support for the AArch64 ABI from the callee side (within a function body).25pub(crate) type AArch64Callee = Callee<AArch64MachineDeps>;2627impl From<StackAMode> for AMode {28fn from(stack: StackAMode) -> AMode {29match stack {30StackAMode::IncomingArg(off, stack_args_size) => AMode::IncomingArg {31off: i64::from(stack_args_size) - off,32},33StackAMode::Slot(off) => AMode::SlotOffset { off },34StackAMode::OutgoingArg(off) => AMode::SPOffset { off },35}36}37}3839// Returns the size of stack space needed to store the40// `clobbered_callee_saved` registers.41fn compute_clobber_size(clobbered_callee_saves: &[Writable<RealReg>]) -> u32 {42let mut int_regs = 0;43let mut vec_regs = 0;44for ® in clobbered_callee_saves {45match reg.to_reg().class() {46RegClass::Int => {47int_regs += 1;48}49RegClass::Float => {50vec_regs += 1;51}52RegClass::Vector => unreachable!(),53}54}5556// Round up to multiple of 2, to keep 16-byte stack alignment.57let int_save_bytes = (int_regs + (int_regs & 1)) * 8;58// The Procedure Call Standard for the Arm 64-bit Architecture59// (AAPCS64, including several related ABIs such as the one used by60// Windows) mandates saving only the bottom 8 bytes of the vector61// registers, so we round up the number of registers to ensure62// proper stack alignment (similarly to the situation with63// `int_reg`).64let vec_reg_size = 8;65let vec_save_padding = vec_regs & 1;66// FIXME: SVE: ABI is different to Neon, so do we treat all vec regs as Z-regs?67let vec_save_bytes = (vec_regs + vec_save_padding) * vec_reg_size;6869int_save_bytes + vec_save_bytes70}7172/// AArch64-specific ABI behavior. This struct just serves as an implementation73/// point for the trait; it is never actually instantiated.74pub struct AArch64MachineDeps;7576impl IsaFlags for aarch64_settings::Flags {77fn is_forward_edge_cfi_enabled(&self) -> bool {78self.use_bti()79}80}8182impl ABIMachineSpec for AArch64MachineDeps {83type I = Inst;8485type F = aarch64_settings::Flags;8687/// This is the limit for the size of argument and return-value areas on the88/// stack. We place a reasonable limit here to avoid integer overflow issues89/// with 32-bit arithmetic: for now, 128 MB.90const STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;9192fn word_bits() -> u32 {936494}9596/// Return required stack alignment in bytes.97fn stack_align(_call_conv: isa::CallConv) -> u32 {981699}100101fn compute_arg_locs(102call_conv: isa::CallConv,103flags: &settings::Flags,104params: &[ir::AbiParam],105args_or_rets: ArgsOrRets,106add_ret_area_ptr: bool,107mut args: ArgsAccumulator,108) -> CodegenResult<(u32, Option<usize>)> {109let is_apple_cc = call_conv == isa::CallConv::AppleAarch64;110let is_winch_return = call_conv == isa::CallConv::Winch && args_or_rets == ArgsOrRets::Rets;111112// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#64parameter-passing), sections 6.4.113//114// MacOS aarch64 is slightly different, see also115// https://developer.apple.com/documentation/xcode/writing_arm64_code_for_apple_platforms.116// We are diverging from the MacOS aarch64 implementation in the117// following ways:118// - sign- and zero- extensions of data types less than 32 bits are not119// implemented yet.120// - we align the arguments stack space to a 16-bytes boundary, while121// the MacOS allows aligning only on 8 bytes. In practice it means we're122// slightly overallocating when calling, which is fine, and doesn't123// break our other invariants that the stack is always allocated in124// 16-bytes chunks.125126let mut next_xreg = if call_conv == isa::CallConv::Tail {127// We reserve `x0` for the return area pointer. For simplicity, we128// reserve it even when there is no return area pointer needed. This129// also means that identity functions don't have to shuffle arguments to130// different return registers because we shifted all argument register131// numbers down by one to make space for the return area pointer.132//133// Also, we cannot use all allocatable GPRs as arguments because we need134// at least one allocatable register for holding the callee address in135// indirect calls. So skip `x1` also, reserving it for that role.1362137} else {1380139};140let mut next_vreg = 0;141let mut next_stack: u32 = 0;142143// Note on return values: on the regular ABI, we may return values144// in 8 registers for V128 and I64 registers independently of the145// number of register values returned in the other class. That is,146// we can return values in up to 8 integer and147// 8 vector registers at once.148let max_per_class_reg_vals = 8; // x0-x7 and v0-v7149let mut remaining_reg_vals = 16;150151let ret_area_ptr = if add_ret_area_ptr {152debug_assert_eq!(args_or_rets, ArgsOrRets::Args);153if call_conv != isa::CallConv::Winch {154// In the AAPCS64 calling convention the return area pointer is155// stored in x8.156Some(ABIArg::reg(157xreg(8).to_real_reg().unwrap(),158I64,159ir::ArgumentExtension::None,160ir::ArgumentPurpose::Normal,161))162} else {163// Use x0 for the return area pointer in the Winch calling convention164// to simplify the ABI handling code in Winch by avoiding an AArch64165// special case to assign it to x8.166next_xreg += 1;167Some(ABIArg::reg(168xreg(0).to_real_reg().unwrap(),169I64,170ir::ArgumentExtension::None,171ir::ArgumentPurpose::Normal,172))173}174} else {175None176};177178for (i, param) in params.into_iter().enumerate() {179if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions()180{181panic!(182"f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled"183);184}185186let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;187188if let ir::ArgumentPurpose::StructReturn = param.purpose {189assert!(190call_conv != isa::CallConv::Tail,191"support for StructReturn parameters is not implemented for the `tail` \192calling convention yet",193);194}195196if let ir::ArgumentPurpose::StructArgument(_) = param.purpose {197panic!(198"StructArgument parameters are not supported on arm64. \199Use regular pointer arguments instead."200);201}202203if let ir::ArgumentPurpose::StructReturn = param.purpose {204// FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once205// ensure_struct_return_ptr_is_returned is gone.206assert!(207param.value_type == types::I64,208"StructReturn must be a pointer sized integer"209);210args.push(ABIArg::Slots {211slots: smallvec![ABIArgSlot::Reg {212reg: xreg(8).to_real_reg().unwrap(),213ty: types::I64,214extension: param.extension,215},],216purpose: ir::ArgumentPurpose::StructReturn,217});218continue;219}220221// Handle multi register params222//223// See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).224//225// For arguments with alignment of 16 we round up the register number226// to the next even value. So we can never allocate for example an i128227// to X1 and X2, we have to skip one register and do X2, X3228// (Stage C.8)229// Note: The Apple ABI deviates a bit here. They don't respect Stage C.8230// and will happily allocate a i128 to X1 and X2231//232// For integer types with alignment of 16 we also have the additional233// restriction of passing the lower half in Xn and the upper half in Xn+1234// (Stage C.9)235//236// For examples of how LLVM handles this: https://godbolt.org/z/bhd3vvEfh237//238// On the Apple ABI it is unspecified if we can spill half the value into the stack239// i.e load the lower half into x7 and the upper half into the stack240// LLVM does not seem to do this, so we are going to replicate that behaviour241let is_multi_reg = rcs.len() >= 2;242if is_multi_reg {243assert!(244rcs.len() == 2,245"Unable to handle multi reg params with more than 2 regs"246);247assert!(248rcs == &[RegClass::Int, RegClass::Int],249"Unable to handle non i64 regs"250);251252let reg_class_space = max_per_class_reg_vals - next_xreg;253let reg_space = remaining_reg_vals;254255if reg_space >= 2 && reg_class_space >= 2 {256// The aarch64 ABI does not allow us to start a split argument257// at an odd numbered register. So we need to skip one register258//259// TODO: The Fast ABI should probably not skip the register260if !is_apple_cc && next_xreg % 2 != 0 {261next_xreg += 1;262}263264let lower_reg = xreg(next_xreg);265let upper_reg = xreg(next_xreg + 1);266267args.push(ABIArg::Slots {268slots: smallvec![269ABIArgSlot::Reg {270reg: lower_reg.to_real_reg().unwrap(),271ty: reg_types[0],272extension: param.extension,273},274ABIArgSlot::Reg {275reg: upper_reg.to_real_reg().unwrap(),276ty: reg_types[1],277extension: param.extension,278},279],280purpose: param.purpose,281});282283next_xreg += 2;284remaining_reg_vals -= 2;285continue;286}287} else {288// Single Register parameters289let rc = rcs[0];290let next_reg = match rc {291RegClass::Int => &mut next_xreg,292RegClass::Float => &mut next_vreg,293RegClass::Vector => unreachable!(),294};295296let push_to_reg = if is_winch_return {297// Winch uses the first register to return the last result298i == params.len() - 1299} else {300// Use max_per_class_reg_vals & remaining_reg_vals otherwise301*next_reg < max_per_class_reg_vals && remaining_reg_vals > 0302};303304if push_to_reg {305let reg = match rc {306RegClass::Int => xreg(*next_reg),307RegClass::Float => vreg(*next_reg),308RegClass::Vector => unreachable!(),309};310// Overlay Z-regs on V-regs for parameter passing.311let ty = if param.value_type.is_dynamic_vector() {312dynamic_to_fixed(param.value_type)313} else {314param.value_type315};316args.push(ABIArg::reg(317reg.to_real_reg().unwrap(),318ty,319param.extension,320param.purpose,321));322*next_reg += 1;323remaining_reg_vals -= 1;324continue;325}326}327328// Spill to the stack329330if args_or_rets == ArgsOrRets::Rets && !flags.enable_multi_ret_implicit_sret() {331return Err(crate::CodegenError::Unsupported(332"Too many return values to fit in registers. \333Use a StructReturn argument instead. (#9510)"334.to_owned(),335));336}337338// Compute the stack slot's size.339let size = (ty_bits(param.value_type) / 8) as u32;340341let size = if is_apple_cc || is_winch_return {342// MacOS and Winch aarch64 allows stack slots with343// sizes less than 8 bytes. They still need to be344// properly aligned on their natural data alignment,345// though.346size347} else {348// Every arg takes a minimum slot of 8 bytes. (16-byte stack349// alignment happens separately after all args.)350std::cmp::max(size, 8)351};352353if !is_winch_return {354// Align the stack slot.355debug_assert!(size.is_power_of_two());356next_stack = align_to(next_stack, size);357}358359let slots = reg_types360.iter()361.copied()362// Build the stack locations from each slot363.scan(next_stack, |next_stack, ty| {364let slot_offset = *next_stack as i64;365*next_stack += (ty_bits(ty) / 8) as u32;366367Some((ty, slot_offset))368})369.map(|(ty, offset)| ABIArgSlot::Stack {370offset,371ty,372extension: param.extension,373})374.collect();375376args.push(ABIArg::Slots {377slots,378purpose: param.purpose,379});380381next_stack += size;382}383384let extra_arg = if let Some(ret_area_ptr) = ret_area_ptr {385args.push_non_formal(ret_area_ptr);386Some(args.args().len() - 1)387} else {388None389};390391if is_winch_return {392winch::reverse_stack(args, next_stack, false);393}394395next_stack = align_to(next_stack, 16);396397Ok((next_stack, extra_arg))398}399400fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {401Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())402}403404fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {405Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())406}407408fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {409Inst::gen_move(to_reg, from_reg, ty)410}411412fn gen_extend(413to_reg: Writable<Reg>,414from_reg: Reg,415signed: bool,416from_bits: u8,417to_bits: u8,418) -> Inst {419assert!(from_bits < to_bits);420Inst::Extend {421rd: to_reg,422rn: from_reg,423signed,424from_bits,425to_bits,426}427}428429fn gen_args(args: Vec<ArgPair>) -> Inst {430Inst::Args { args }431}432433fn gen_rets(rets: Vec<RetPair>) -> Inst {434Inst::Rets { rets }435}436437fn gen_add_imm(438_call_conv: isa::CallConv,439into_reg: Writable<Reg>,440from_reg: Reg,441imm: u32,442) -> SmallInstVec<Inst> {443let imm = imm as u64;444let mut insts = SmallVec::new();445if let Some(imm12) = Imm12::maybe_from_u64(imm) {446insts.push(Inst::AluRRImm12 {447alu_op: ALUOp::Add,448size: OperandSize::Size64,449rd: into_reg,450rn: from_reg,451imm12,452});453} else {454let scratch2 = writable_tmp2_reg();455assert_ne!(scratch2.to_reg(), from_reg);456// `gen_add_imm` is only ever called after register allocation has taken place, and as a457// result it's ok to reuse the scratch2 register here. If that changes, we'll need to458// plumb through a way to allocate temporary virtual registers459insts.extend(Inst::load_constant(scratch2, imm));460insts.push(Inst::AluRRRExtend {461alu_op: ALUOp::Add,462size: OperandSize::Size64,463rd: into_reg,464rn: from_reg,465rm: scratch2.to_reg(),466extendop: ExtendOp::UXTX,467});468}469insts470}471472fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Inst> {473let mut insts = SmallVec::new();474insts.push(Inst::AluRRRExtend {475alu_op: ALUOp::SubS,476size: OperandSize::Size64,477rd: writable_zero_reg(),478rn: stack_reg(),479rm: limit_reg,480extendop: ExtendOp::UXTX,481});482insts.push(Inst::TrapIf {483trap_code: ir::TrapCode::STACK_OVERFLOW,484// Here `Lo` == "less than" when interpreting the two485// operands as unsigned integers.486kind: CondBrKind::Cond(Cond::Lo),487});488insts489}490491fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Inst {492// FIXME: Do something different for dynamic types?493let mem = mem.into();494Inst::LoadAddr { rd: into_reg, mem }495}496497fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {498spilltmp_reg()499}500501fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {502let mem = AMode::RegOffset {503rn: base,504off: offset as i64,505};506Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())507}508509fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {510let mem = AMode::RegOffset {511rn: base,512off: offset as i64,513};514Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())515}516517fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Inst> {518if amount == 0 {519return SmallVec::new();520}521522let (amount, is_sub) = if amount > 0 {523(amount as u64, false)524} else {525(-amount as u64, true)526};527528let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add };529530let mut ret = SmallVec::new();531if let Some(imm12) = Imm12::maybe_from_u64(amount) {532let adj_inst = Inst::AluRRImm12 {533alu_op,534size: OperandSize::Size64,535rd: writable_stack_reg(),536rn: stack_reg(),537imm12,538};539ret.push(adj_inst);540} else {541let tmp = writable_spilltmp_reg();542// `gen_sp_reg_adjust` is called after regalloc2, so it's acceptable to reuse `tmp` for543// intermediates in `load_constant`.544let const_inst = Inst::load_constant(tmp, amount);545let adj_inst = Inst::AluRRRExtend {546alu_op,547size: OperandSize::Size64,548rd: writable_stack_reg(),549rn: stack_reg(),550rm: tmp.to_reg(),551extendop: ExtendOp::UXTX,552};553ret.extend(const_inst);554ret.push(adj_inst);555}556ret557}558559fn gen_prologue_frame_setup(560call_conv: isa::CallConv,561flags: &settings::Flags,562isa_flags: &aarch64_settings::Flags,563frame_layout: &FrameLayout,564) -> SmallInstVec<Inst> {565let setup_frame = frame_layout.setup_area_size > 0;566let mut insts = SmallVec::new();567568match Self::select_api_key(isa_flags, call_conv, setup_frame) {569Some(key) => {570insts.push(Inst::Paci { key });571if flags.unwind_info() {572insts.push(Inst::Unwind {573inst: UnwindInst::Aarch64SetPointerAuth {574return_addresses: true,575},576});577}578}579None => {580if isa_flags.use_bti() {581insts.push(Inst::Bti {582targets: BranchTargetType::C,583});584}585586if flags.unwind_info() && call_conv == isa::CallConv::AppleAarch64 {587// The macOS unwinder seems to require this.588insts.push(Inst::Unwind {589inst: UnwindInst::Aarch64SetPointerAuth {590return_addresses: false,591},592});593}594}595}596597if setup_frame {598// stp fp (x29), lr (x30), [sp, #-16]!599insts.push(Inst::StoreP64 {600rt: fp_reg(),601rt2: link_reg(),602mem: PairAMode::SPPreIndexed {603simm7: SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),604},605flags: MemFlags::trusted(),606});607608if flags.unwind_info() {609insts.push(Inst::Unwind {610inst: UnwindInst::PushFrameRegs {611offset_upward_to_caller_sp: frame_layout.setup_area_size,612},613});614}615616// mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because617// the usual encoding (`ORR`) does not work with SP.618insts.push(Inst::AluRRImm12 {619alu_op: ALUOp::Add,620size: OperandSize::Size64,621rd: writable_fp_reg(),622rn: stack_reg(),623imm12: Imm12 {624bits: 0,625shift12: false,626},627});628}629630insts631}632633fn gen_epilogue_frame_restore(634call_conv: isa::CallConv,635_flags: &settings::Flags,636_isa_flags: &aarch64_settings::Flags,637frame_layout: &FrameLayout,638) -> SmallInstVec<Inst> {639let setup_frame = frame_layout.setup_area_size > 0;640let mut insts = SmallVec::new();641642if setup_frame {643// N.B.: sp is already adjusted to the appropriate place by the644// clobber-restore code (which also frees the fixed frame). Hence, there645// is no need for the usual `mov sp, fp` here.646647// `ldp fp, lr, [sp], #16`648insts.push(Inst::LoadP64 {649rt: writable_fp_reg(),650rt2: writable_link_reg(),651mem: PairAMode::SPPostIndexed {652simm7: SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),653},654flags: MemFlags::trusted(),655});656}657658if call_conv == isa::CallConv::Tail && frame_layout.tail_args_size > 0 {659insts.extend(Self::gen_sp_reg_adjust(660frame_layout.tail_args_size.try_into().unwrap(),661));662}663664insts665}666667fn gen_return(668call_conv: isa::CallConv,669isa_flags: &aarch64_settings::Flags,670frame_layout: &FrameLayout,671) -> SmallInstVec<Inst> {672let setup_frame = frame_layout.setup_area_size > 0;673674match Self::select_api_key(isa_flags, call_conv, setup_frame) {675Some(key) => {676smallvec![Inst::AuthenticatedRet {677key,678is_hint: !isa_flags.has_pauth(),679}]680}681None => {682smallvec![Inst::Ret {}]683}684}685}686687fn gen_probestack(_insts: &mut SmallInstVec<Self::I>, _: u32) {688// TODO: implement if we ever require stack probes on an AArch64 host689// (unlikely unless Lucet is ported)690unimplemented!("Stack probing is unimplemented on AArch64");691}692693fn gen_inline_probestack(694insts: &mut SmallInstVec<Self::I>,695_call_conv: isa::CallConv,696frame_size: u32,697guard_size: u32,698) {699// The stack probe loop currently takes 6 instructions and each inline700// probe takes 2 (ish, these numbers sort of depend on the constants).701// Set this to 3 to keep the max size of the probe to 6 instructions.702const PROBE_MAX_UNROLL: u32 = 3;703704// Calculate how many probes we need to perform. Round down, as we only705// need to probe whole guard_size regions we'd otherwise skip over.706let probe_count = frame_size / guard_size;707if probe_count == 0 {708// No probe necessary709} else if probe_count <= PROBE_MAX_UNROLL {710Self::gen_probestack_unroll(insts, guard_size, probe_count)711} else {712Self::gen_probestack_loop(insts, frame_size, guard_size)713}714}715716fn gen_clobber_save(717_call_conv: isa::CallConv,718flags: &settings::Flags,719frame_layout: &FrameLayout,720) -> SmallVec<[Inst; 16]> {721let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();722723let mut insts = SmallVec::new();724let setup_frame = frame_layout.setup_area_size > 0;725726// When a return_call within this function required more stack arguments than we have727// present, resize the incoming argument area of the frame to accommodate those arguments.728let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size;729if incoming_args_diff > 0 {730// Decrement SP to account for the additional space required by a tail call.731insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32)));732if flags.unwind_info() {733insts.push(Inst::Unwind {734inst: UnwindInst::StackAlloc {735size: incoming_args_diff,736},737});738}739740// Move fp and lr down.741if setup_frame {742// Reload the frame pointer from the stack.743insts.push(Inst::ULoad64 {744rd: regs::writable_fp_reg(),745mem: AMode::SPOffset {746off: i64::from(incoming_args_diff),747},748flags: MemFlags::trusted(),749});750751// Store the frame pointer and link register again at the new SP752insts.push(Inst::StoreP64 {753rt: fp_reg(),754rt2: link_reg(),755mem: PairAMode::SignedOffset {756reg: regs::stack_reg(),757simm7: SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(),758},759flags: MemFlags::trusted(),760});761762// Keep the frame pointer in sync763insts.push(Self::gen_move(764regs::writable_fp_reg(),765regs::stack_reg(),766types::I64,767));768}769}770771if flags.unwind_info() && setup_frame {772// The *unwind* frame (but not the actual frame) starts at the773// clobbers, just below the saved FP/LR pair.774insts.push(Inst::Unwind {775inst: UnwindInst::DefineNewFrame {776offset_downward_to_clobbers: frame_layout.clobber_size,777offset_upward_to_caller_sp: frame_layout.setup_area_size,778},779});780}781782// We use pre-indexed addressing modes here, rather than the possibly783// more efficient "subtract sp once then used fixed offsets" scheme,784// because (i) we cannot necessarily guarantee that the offset of a785// clobber-save slot will be within a SImm7Scaled (+504-byte) offset786// range of the whole frame including other slots, it is more complex to787// conditionally generate a two-stage SP adjustment (clobbers then fixed788// frame) otherwise, and generally we just want to maintain simplicity789// here for maintainability. Because clobbers are at the top of the790// frame, just below FP, all that is necessary is to use the pre-indexed791// "push" `[sp, #-16]!` addressing mode.792//793// `frame_offset` tracks offset above start-of-clobbers for unwind-info794// purposes.795let mut clobber_offset = frame_layout.clobber_size;796let clobber_offset_change = 16;797let iter = clobbered_int.chunks_exact(2);798799if let [rd] = iter.remainder() {800let rd: Reg = rd.to_reg().into();801802debug_assert_eq!(rd.class(), RegClass::Int);803// str rd, [sp, #-16]!804insts.push(Inst::Store64 {805rd,806mem: AMode::SPPreIndexed {807simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),808},809flags: MemFlags::trusted(),810});811812if flags.unwind_info() {813clobber_offset -= clobber_offset_change as u32;814insts.push(Inst::Unwind {815inst: UnwindInst::SaveReg {816clobber_offset,817reg: rd.to_real_reg().unwrap(),818},819});820}821}822823let mut iter = iter.rev();824825while let Some([rt, rt2]) = iter.next() {826// .to_reg().into(): Writable<RealReg> --> RealReg --> Reg827let rt: Reg = rt.to_reg().into();828let rt2: Reg = rt2.to_reg().into();829830debug_assert!(rt.class() == RegClass::Int);831debug_assert!(rt2.class() == RegClass::Int);832833// stp rt, rt2, [sp, #-16]!834insts.push(Inst::StoreP64 {835rt,836rt2,837mem: PairAMode::SPPreIndexed {838simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, types::I64).unwrap(),839},840flags: MemFlags::trusted(),841});842843if flags.unwind_info() {844clobber_offset -= clobber_offset_change as u32;845insts.push(Inst::Unwind {846inst: UnwindInst::SaveReg {847clobber_offset,848reg: rt.to_real_reg().unwrap(),849},850});851insts.push(Inst::Unwind {852inst: UnwindInst::SaveReg {853clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32,854reg: rt2.to_real_reg().unwrap(),855},856});857}858}859860let store_vec_reg = |rd| Inst::FpuStore64 {861rd,862mem: AMode::SPPreIndexed {863simm9: SImm9::maybe_from_i64(-clobber_offset_change).unwrap(),864},865flags: MemFlags::trusted(),866};867let iter = clobbered_vec.chunks_exact(2);868869if let [rd] = iter.remainder() {870let rd: Reg = rd.to_reg().into();871872debug_assert_eq!(rd.class(), RegClass::Float);873insts.push(store_vec_reg(rd));874875if flags.unwind_info() {876clobber_offset -= clobber_offset_change as u32;877insts.push(Inst::Unwind {878inst: UnwindInst::SaveReg {879clobber_offset,880reg: rd.to_real_reg().unwrap(),881},882});883}884}885886let store_vec_reg_pair = |rt, rt2| {887let clobber_offset_change = 16;888889(890Inst::FpuStoreP64 {891rt,892rt2,893mem: PairAMode::SPPreIndexed {894simm7: SImm7Scaled::maybe_from_i64(-clobber_offset_change, F64).unwrap(),895},896flags: MemFlags::trusted(),897},898clobber_offset_change as u32,899)900};901let mut iter = iter.rev();902903while let Some([rt, rt2]) = iter.next() {904let rt: Reg = rt.to_reg().into();905let rt2: Reg = rt2.to_reg().into();906907debug_assert_eq!(rt.class(), RegClass::Float);908debug_assert_eq!(rt2.class(), RegClass::Float);909910let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2);911912insts.push(inst);913914if flags.unwind_info() {915clobber_offset -= clobber_offset_change;916insts.push(Inst::Unwind {917inst: UnwindInst::SaveReg {918clobber_offset,919reg: rt.to_real_reg().unwrap(),920},921});922insts.push(Inst::Unwind {923inst: UnwindInst::SaveReg {924clobber_offset: clobber_offset + clobber_offset_change / 2,925reg: rt2.to_real_reg().unwrap(),926},927});928}929}930931// Allocate the fixed frame below the clobbers if necessary.932let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;933if stack_size > 0 {934insts.extend(Self::gen_sp_reg_adjust(-(stack_size as i32)));935if flags.unwind_info() {936insts.push(Inst::Unwind {937inst: UnwindInst::StackAlloc { size: stack_size },938});939}940}941942insts943}944945fn gen_clobber_restore(946_call_conv: isa::CallConv,947_flags: &settings::Flags,948frame_layout: &FrameLayout,949) -> SmallVec<[Inst; 16]> {950let mut insts = SmallVec::new();951let (clobbered_int, clobbered_vec) = frame_layout.clobbered_callee_saves_by_class();952953// Free the fixed frame if necessary.954let stack_size = frame_layout.fixed_frame_storage_size + frame_layout.outgoing_args_size;955if stack_size > 0 {956insts.extend(Self::gen_sp_reg_adjust(stack_size as i32));957}958959let load_vec_reg = |rd| Inst::FpuLoad64 {960rd,961mem: AMode::SPPostIndexed {962simm9: SImm9::maybe_from_i64(16).unwrap(),963},964flags: MemFlags::trusted(),965};966let load_vec_reg_pair = |rt, rt2| Inst::FpuLoadP64 {967rt,968rt2,969mem: PairAMode::SPPostIndexed {970simm7: SImm7Scaled::maybe_from_i64(16, F64).unwrap(),971},972flags: MemFlags::trusted(),973};974975let mut iter = clobbered_vec.chunks_exact(2);976977while let Some([rt, rt2]) = iter.next() {978let rt: Writable<Reg> = rt.map(|r| r.into());979let rt2: Writable<Reg> = rt2.map(|r| r.into());980981debug_assert_eq!(rt.to_reg().class(), RegClass::Float);982debug_assert_eq!(rt2.to_reg().class(), RegClass::Float);983insts.push(load_vec_reg_pair(rt, rt2));984}985986debug_assert!(iter.remainder().len() <= 1);987988if let [rd] = iter.remainder() {989let rd: Writable<Reg> = rd.map(|r| r.into());990991debug_assert_eq!(rd.to_reg().class(), RegClass::Float);992insts.push(load_vec_reg(rd));993}994995let mut iter = clobbered_int.chunks_exact(2);996997while let Some([rt, rt2]) = iter.next() {998let rt: Writable<Reg> = rt.map(|r| r.into());999let rt2: Writable<Reg> = rt2.map(|r| r.into());10001001debug_assert_eq!(rt.to_reg().class(), RegClass::Int);1002debug_assert_eq!(rt2.to_reg().class(), RegClass::Int);1003// ldp rt, rt2, [sp], #161004insts.push(Inst::LoadP64 {1005rt,1006rt2,1007mem: PairAMode::SPPostIndexed {1008simm7: SImm7Scaled::maybe_from_i64(16, I64).unwrap(),1009},1010flags: MemFlags::trusted(),1011});1012}10131014debug_assert!(iter.remainder().len() <= 1);10151016if let [rd] = iter.remainder() {1017let rd: Writable<Reg> = rd.map(|r| r.into());10181019debug_assert_eq!(rd.to_reg().class(), RegClass::Int);1020// ldr rd, [sp], #161021insts.push(Inst::ULoad64 {1022rd,1023mem: AMode::SPPostIndexed {1024simm9: SImm9::maybe_from_i64(16).unwrap(),1025},1026flags: MemFlags::trusted(),1027});1028}10291030insts1031}10321033fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(1034call_conv: isa::CallConv,1035dst: Reg,1036src: Reg,1037size: usize,1038mut alloc_tmp: F,1039) -> SmallVec<[Self::I; 8]> {1040let mut insts = SmallVec::new();1041let arg0 = writable_xreg(0);1042let arg1 = writable_xreg(1);1043let arg2 = writable_xreg(2);1044let tmp = alloc_tmp(Self::word_type());1045insts.extend(Inst::load_constant(tmp, size as u64));1046insts.push(Inst::Call {1047info: Box::new(CallInfo {1048dest: ExternalName::LibCall(LibCall::Memcpy),1049uses: smallvec![1050CallArgPair {1051vreg: dst,1052preg: arg0.to_reg()1053},1054CallArgPair {1055vreg: src,1056preg: arg1.to_reg()1057},1058CallArgPair {1059vreg: tmp.to_reg(),1060preg: arg2.to_reg()1061}1062],1063defs: smallvec![],1064clobbers: Self::get_regs_clobbered_by_call(call_conv, false),1065caller_conv: call_conv,1066callee_conv: call_conv,1067callee_pop_size: 0,1068try_call_info: None,1069}),1070});1071insts1072}10731074fn get_number_of_spillslots_for_value(1075rc: RegClass,1076vector_size: u32,1077_isa_flags: &Self::F,1078) -> u32 {1079assert_eq!(vector_size % 8, 0);1080// We allocate in terms of 8-byte slots.1081match rc {1082RegClass::Int => 1,1083RegClass::Float => vector_size / 8,1084RegClass::Vector => unreachable!(),1085}1086}10871088fn get_machine_env(flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv {1089if flags.enable_pinned_reg() {1090static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();1091MACHINE_ENV.get_or_init(|| create_reg_env(true))1092} else {1093static MACHINE_ENV: OnceLock<MachineEnv> = OnceLock::new();1094MACHINE_ENV.get_or_init(|| create_reg_env(false))1095}1096}10971098fn get_regs_clobbered_by_call(call_conv: isa::CallConv, is_exception: bool) -> PRegSet {1099match call_conv {1100isa::CallConv::Winch => WINCH_CLOBBERS,1101isa::CallConv::Tail if is_exception => ALL_CLOBBERS,1102_ => DEFAULT_AAPCS_CLOBBERS,1103}1104}11051106fn get_ext_mode(1107call_conv: isa::CallConv,1108specified: ir::ArgumentExtension,1109) -> ir::ArgumentExtension {1110if call_conv == isa::CallConv::AppleAarch64 {1111specified1112} else {1113ir::ArgumentExtension::None1114}1115}11161117fn compute_frame_layout(1118call_conv: isa::CallConv,1119flags: &settings::Flags,1120sig: &Signature,1121regs: &[Writable<RealReg>],1122function_calls: FunctionCalls,1123incoming_args_size: u32,1124tail_args_size: u32,1125stackslots_size: u32,1126fixed_frame_storage_size: u32,1127outgoing_args_size: u32,1128) -> FrameLayout {1129let mut regs: Vec<Writable<RealReg>> = regs1130.iter()1131.cloned()1132.filter(|r| {1133is_reg_saved_in_prologue(call_conv, flags.enable_pinned_reg(), sig, r.to_reg())1134})1135.collect();11361137// Sort registers for deterministic code output. We can do an unstable1138// sort because the registers will be unique (there are no dups).1139regs.sort_unstable();11401141// Compute clobber size.1142let clobber_size = compute_clobber_size(®s);11431144// Compute linkage frame size.1145let setup_area_size = if flags.preserve_frame_pointers()1146|| function_calls != FunctionCalls::None1147// The function arguments that are passed on the stack are addressed1148// relative to the Frame Pointer.1149|| incoming_args_size > 01150|| clobber_size > 01151|| fixed_frame_storage_size > 01152{115316 // FP, LR1154} else {115501156};11571158// Return FrameLayout structure.1159FrameLayout {1160word_bytes: 8,1161incoming_args_size,1162tail_args_size,1163setup_area_size,1164clobber_size,1165fixed_frame_storage_size,1166stackslots_size,1167outgoing_args_size,1168clobbered_callee_saves: regs,1169function_calls,1170}1171}11721173fn retval_temp_reg(_call_conv_of_callee: isa::CallConv) -> Writable<Reg> {1174// Use x9 as a temp if needed: clobbered, not a1175// retval.1176regs::writable_xreg(9)1177}11781179fn exception_payload_regs(call_conv: isa::CallConv) -> &'static [Reg] {1180const PAYLOAD_REGS: &'static [Reg] = &[regs::xreg(0), regs::xreg(1)];1181match call_conv {1182isa::CallConv::SystemV | isa::CallConv::Tail => PAYLOAD_REGS,1183_ => &[],1184}1185}1186}11871188impl AArch64MachineDeps {1189fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {1190// When manually unrolling adjust the stack pointer and then write a zero1191// to the stack at that offset. This generates something like1192// `sub sp, sp, #1, lsl #12` followed by `stur wzr, [sp]`.1193//1194// We do this because valgrind expects us to never write beyond the stack1195// pointer and associated redzone.1196// See: https://github.com/bytecodealliance/wasmtime/issues/74541197for _ in 0..probe_count {1198insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));11991200insts.push(Inst::gen_store(1201AMode::SPOffset { off: 0 },1202zero_reg(),1203I32,1204MemFlags::trusted(),1205));1206}12071208// Restore the stack pointer to its original value1209insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));1210}12111212fn gen_probestack_loop(insts: &mut SmallInstVec<Inst>, frame_size: u32, guard_size: u32) {1213// The non-unrolled version uses two temporary registers. The1214// `start` contains the current offset from sp and counts downwards1215// during the loop by increments of `guard_size`. The `end` is1216// the size of the frame and where we stop.1217//1218// Note that this emission is all post-regalloc so it should be ok1219// to use the temporary registers here as input/output as the loop1220// itself is not allowed to use the registers.1221let start = writable_spilltmp_reg();1222let end = writable_tmp2_reg();1223// `gen_inline_probestack` is called after regalloc2, so it's acceptable to reuse1224// `start` and `end` as temporaries in load_constant.1225insts.extend(Inst::load_constant(start, 0));1226insts.extend(Inst::load_constant(end, frame_size.into()));1227insts.push(Inst::StackProbeLoop {1228start,1229end: end.to_reg(),1230step: Imm12::maybe_from_u64(guard_size.into()).unwrap(),1231});1232}12331234pub fn select_api_key(1235isa_flags: &aarch64_settings::Flags,1236call_conv: isa::CallConv,1237setup_frame: bool,1238) -> Option<APIKey> {1239if isa_flags.sign_return_address() && (setup_frame || isa_flags.sign_return_address_all()) {1240// The `tail` calling convention uses a zero modifier rather than SP1241// because tail calls may happen with a different stack pointer than1242// when the function was entered, meaning that it won't be the same when1243// the return address is decrypted.1244Some(if isa_flags.sign_return_address_with_bkey() {1245match call_conv {1246isa::CallConv::Tail => APIKey::BZ,1247_ => APIKey::BSP,1248}1249} else {1250match call_conv {1251isa::CallConv::Tail => APIKey::AZ,1252_ => APIKey::ASP,1253}1254})1255} else {1256None1257}1258}1259}12601261/// Is the given register saved in the prologue if clobbered, i.e., is it a1262/// callee-save?1263fn is_reg_saved_in_prologue(1264_call_conv: isa::CallConv,1265enable_pinned_reg: bool,1266sig: &Signature,1267r: RealReg,1268) -> bool {1269// FIXME: We need to inspect whether a function is returning Z or P regs too.1270let save_z_regs = sig1271.params1272.iter()1273.filter(|p| p.value_type.is_dynamic_vector())1274.count()1275!= 0;12761277match r.class() {1278RegClass::Int => {1279// x19 - x28 inclusive are callee-saves.1280// However, x21 is the pinned reg if `enable_pinned_reg`1281// is set, and is implicitly globally-allocated, hence not1282// callee-saved in prologues.1283if enable_pinned_reg && r.hw_enc() == PINNED_REG {1284false1285} else {1286r.hw_enc() >= 19 && r.hw_enc() <= 281287}1288}1289RegClass::Float => {1290// If a subroutine takes at least one argument in scalable vector registers1291// or scalable predicate registers, or if it is a function that returns1292// results in such registers, it must ensure that the entire contents of1293// z8-z23 are preserved across the call. In other cases it need only1294// preserve the low 64 bits of z8-z15.1295if save_z_regs {1296r.hw_enc() >= 8 && r.hw_enc() <= 231297} else {1298// v8 - v15 inclusive are callee-saves.1299r.hw_enc() >= 8 && r.hw_enc() <= 151300}1301}1302RegClass::Vector => unreachable!(),1303}1304}13051306const fn default_aapcs_clobbers() -> PRegSet {1307PRegSet::empty()1308// x0 - x17 inclusive are caller-saves.1309.with(xreg_preg(0))1310.with(xreg_preg(1))1311.with(xreg_preg(2))1312.with(xreg_preg(3))1313.with(xreg_preg(4))1314.with(xreg_preg(5))1315.with(xreg_preg(6))1316.with(xreg_preg(7))1317.with(xreg_preg(8))1318.with(xreg_preg(9))1319.with(xreg_preg(10))1320.with(xreg_preg(11))1321.with(xreg_preg(12))1322.with(xreg_preg(13))1323.with(xreg_preg(14))1324.with(xreg_preg(15))1325.with(xreg_preg(16))1326.with(xreg_preg(17))1327// v0 - v7 inclusive and v16 - v31 inclusive are1328// caller-saves. The upper 64 bits of v8 - v15 inclusive are1329// also caller-saves. However, because we cannot currently1330// represent partial registers to regalloc2, we indicate here1331// that every vector register is caller-save. Because this1332// function is used at *callsites*, approximating in this1333// direction (save more than necessary) is conservative and1334// thus safe.1335//1336// Note that we exclude clobbers from a call instruction when1337// a call instruction's callee has the same ABI as the caller1338// (the current function body); this is safe (anything1339// clobbered by callee can be clobbered by caller as well) and1340// avoids unnecessary saves of v8-v15 in the prologue even1341// though we include them as defs here.1342.with(vreg_preg(0))1343.with(vreg_preg(1))1344.with(vreg_preg(2))1345.with(vreg_preg(3))1346.with(vreg_preg(4))1347.with(vreg_preg(5))1348.with(vreg_preg(6))1349.with(vreg_preg(7))1350.with(vreg_preg(8))1351.with(vreg_preg(9))1352.with(vreg_preg(10))1353.with(vreg_preg(11))1354.with(vreg_preg(12))1355.with(vreg_preg(13))1356.with(vreg_preg(14))1357.with(vreg_preg(15))1358.with(vreg_preg(16))1359.with(vreg_preg(17))1360.with(vreg_preg(18))1361.with(vreg_preg(19))1362.with(vreg_preg(20))1363.with(vreg_preg(21))1364.with(vreg_preg(22))1365.with(vreg_preg(23))1366.with(vreg_preg(24))1367.with(vreg_preg(25))1368.with(vreg_preg(26))1369.with(vreg_preg(27))1370.with(vreg_preg(28))1371.with(vreg_preg(29))1372.with(vreg_preg(30))1373.with(vreg_preg(31))1374}13751376const fn winch_clobbers() -> PRegSet {1377PRegSet::empty()1378.with(xreg_preg(0))1379.with(xreg_preg(1))1380.with(xreg_preg(2))1381.with(xreg_preg(3))1382.with(xreg_preg(4))1383.with(xreg_preg(5))1384.with(xreg_preg(6))1385.with(xreg_preg(7))1386.with(xreg_preg(8))1387.with(xreg_preg(9))1388.with(xreg_preg(10))1389.with(xreg_preg(11))1390.with(xreg_preg(12))1391.with(xreg_preg(13))1392.with(xreg_preg(14))1393.with(xreg_preg(15))1394.with(xreg_preg(16))1395.with(xreg_preg(17))1396// x18 is used to carry platform state and is not allocatable by Winch.1397//1398// x19 - x27 are considered caller-saved in Winch's calling convention.1399.with(xreg_preg(19))1400.with(xreg_preg(20))1401.with(xreg_preg(21))1402.with(xreg_preg(22))1403.with(xreg_preg(23))1404.with(xreg_preg(24))1405.with(xreg_preg(25))1406.with(xreg_preg(26))1407.with(xreg_preg(27))1408// x28 is used as the shadow stack pointer and is considered1409// callee-saved.1410//1411// All vregs are considered caller-saved.1412.with(vreg_preg(0))1413.with(vreg_preg(1))1414.with(vreg_preg(2))1415.with(vreg_preg(3))1416.with(vreg_preg(4))1417.with(vreg_preg(5))1418.with(vreg_preg(6))1419.with(vreg_preg(7))1420.with(vreg_preg(8))1421.with(vreg_preg(9))1422.with(vreg_preg(10))1423.with(vreg_preg(11))1424.with(vreg_preg(12))1425.with(vreg_preg(13))1426.with(vreg_preg(14))1427.with(vreg_preg(15))1428.with(vreg_preg(16))1429.with(vreg_preg(17))1430.with(vreg_preg(18))1431.with(vreg_preg(19))1432.with(vreg_preg(20))1433.with(vreg_preg(21))1434.with(vreg_preg(22))1435.with(vreg_preg(23))1436.with(vreg_preg(24))1437.with(vreg_preg(25))1438.with(vreg_preg(26))1439.with(vreg_preg(27))1440.with(vreg_preg(28))1441.with(vreg_preg(29))1442.with(vreg_preg(30))1443.with(vreg_preg(31))1444}14451446const fn all_clobbers() -> PRegSet {1447PRegSet::empty()1448// integer registers: x0 to x28 inclusive. (x29 is FP, x30 is1449// LR, x31 is SP/ZR.)1450.with(xreg_preg(0))1451.with(xreg_preg(1))1452.with(xreg_preg(2))1453.with(xreg_preg(3))1454.with(xreg_preg(4))1455.with(xreg_preg(5))1456.with(xreg_preg(6))1457.with(xreg_preg(7))1458.with(xreg_preg(8))1459.with(xreg_preg(9))1460.with(xreg_preg(10))1461.with(xreg_preg(11))1462.with(xreg_preg(12))1463.with(xreg_preg(13))1464.with(xreg_preg(14))1465.with(xreg_preg(15))1466.with(xreg_preg(16))1467.with(xreg_preg(17))1468.with(xreg_preg(18))1469.with(xreg_preg(19))1470.with(xreg_preg(20))1471.with(xreg_preg(21))1472.with(xreg_preg(22))1473.with(xreg_preg(23))1474.with(xreg_preg(24))1475.with(xreg_preg(25))1476.with(xreg_preg(26))1477.with(xreg_preg(27))1478.with(xreg_preg(28))1479// vector registers: v0 to v31 inclusive.1480.with(vreg_preg(0))1481.with(vreg_preg(1))1482.with(vreg_preg(2))1483.with(vreg_preg(3))1484.with(vreg_preg(4))1485.with(vreg_preg(5))1486.with(vreg_preg(6))1487.with(vreg_preg(7))1488.with(vreg_preg(8))1489.with(vreg_preg(9))1490.with(vreg_preg(10))1491.with(vreg_preg(11))1492.with(vreg_preg(12))1493.with(vreg_preg(13))1494.with(vreg_preg(14))1495.with(vreg_preg(15))1496.with(vreg_preg(16))1497.with(vreg_preg(17))1498.with(vreg_preg(18))1499.with(vreg_preg(19))1500.with(vreg_preg(20))1501.with(vreg_preg(21))1502.with(vreg_preg(22))1503.with(vreg_preg(23))1504.with(vreg_preg(24))1505.with(vreg_preg(25))1506.with(vreg_preg(26))1507.with(vreg_preg(27))1508.with(vreg_preg(28))1509.with(vreg_preg(29))1510.with(vreg_preg(30))1511.with(vreg_preg(31))1512}15131514const DEFAULT_AAPCS_CLOBBERS: PRegSet = default_aapcs_clobbers();1515const WINCH_CLOBBERS: PRegSet = winch_clobbers();1516const ALL_CLOBBERS: PRegSet = all_clobbers();15171518fn create_reg_env(enable_pinned_reg: bool) -> MachineEnv {1519fn preg(r: Reg) -> PReg {1520r.to_real_reg().unwrap().into()1521}15221523let mut env = MachineEnv {1524preferred_regs_by_class: [1525vec![1526preg(xreg(0)),1527preg(xreg(1)),1528preg(xreg(2)),1529preg(xreg(3)),1530preg(xreg(4)),1531preg(xreg(5)),1532preg(xreg(6)),1533preg(xreg(7)),1534preg(xreg(8)),1535preg(xreg(9)),1536preg(xreg(10)),1537preg(xreg(11)),1538preg(xreg(12)),1539preg(xreg(13)),1540preg(xreg(14)),1541preg(xreg(15)),1542// x16 and x17 are spilltmp and tmp2 (see above).1543// x18 could be used by the platform to carry inter-procedural state;1544// conservatively assume so and make it not allocatable.1545// x19-28 are callee-saved and so not preferred.1546// x21 is the pinned register (if enabled) and not allocatable if so.1547// x29 is FP, x30 is LR, x31 is SP/ZR.1548],1549vec![1550preg(vreg(0)),1551preg(vreg(1)),1552preg(vreg(2)),1553preg(vreg(3)),1554preg(vreg(4)),1555preg(vreg(5)),1556preg(vreg(6)),1557preg(vreg(7)),1558// v8-15 are callee-saved and so not preferred.1559preg(vreg(16)),1560preg(vreg(17)),1561preg(vreg(18)),1562preg(vreg(19)),1563preg(vreg(20)),1564preg(vreg(21)),1565preg(vreg(22)),1566preg(vreg(23)),1567preg(vreg(24)),1568preg(vreg(25)),1569preg(vreg(26)),1570preg(vreg(27)),1571preg(vreg(28)),1572preg(vreg(29)),1573preg(vreg(30)),1574preg(vreg(31)),1575],1576// Vector Regclass is unused1577vec![],1578],1579non_preferred_regs_by_class: [1580vec![1581preg(xreg(19)),1582preg(xreg(20)),1583// x21 is pinned reg if enabled; we add to this list below if not.1584preg(xreg(22)),1585preg(xreg(23)),1586preg(xreg(24)),1587preg(xreg(25)),1588preg(xreg(26)),1589preg(xreg(27)),1590preg(xreg(28)),1591],1592vec![1593preg(vreg(8)),1594preg(vreg(9)),1595preg(vreg(10)),1596preg(vreg(11)),1597preg(vreg(12)),1598preg(vreg(13)),1599preg(vreg(14)),1600preg(vreg(15)),1601],1602// Vector Regclass is unused1603vec![],1604],1605fixed_stack_slots: vec![],1606scratch_by_class: [None, None, None],1607};16081609if !enable_pinned_reg {1610debug_assert_eq!(PINNED_REG, 21); // We assumed this above in hardcoded reg list.1611env.non_preferred_regs_by_class[0].push(preg(xreg(PINNED_REG)));1612}16131614env1615}161616171618