Path: blob/main/cranelift/codegen/src/isa/aarch64/inst/mod.rs
3073 views
//! This module defines aarch64-specific machine instruction types.12use crate::binemit::{Addend, CodeOffset, Reloc};3use crate::ir::types::{F16, F32, F64, F128, I8, I8X16, I16, I32, I64, I128};4use crate::ir::{MemFlags, Type, types};5use crate::isa::{CallConv, FunctionAlignment};6use crate::machinst::*;7use crate::{CodegenError, CodegenResult, settings};89use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};1011use alloc::string::{String, ToString};12use alloc::vec::Vec;13use core::fmt::Write;14use core::slice;15use smallvec::{SmallVec, smallvec};1617pub(crate) mod regs;18pub(crate) use self::regs::*;19pub mod imms;20pub use self::imms::*;21pub mod args;22pub use self::args::*;23pub mod emit;24pub(crate) use self::emit::*;25use crate::isa::aarch64::abi::AArch64MachineDeps;2627pub(crate) mod unwind;2829#[cfg(test)]30mod emit_tests;3132//=============================================================================33// Instructions (top level): definition3435pub use crate::isa::aarch64::lower::isle::generated_code::{36ALUOp, ALUOp3, AMode, APIKey, AtomicRMWLoopOp, AtomicRMWOp, BitOp, BranchTargetType, FPUOp1,37FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, MoveWideOp, VecALUModOp,38VecALUOp, VecExtendOp, VecLanesOp, VecMisc2, VecPairOp, VecRRLongOp, VecRRNarrowOp,39VecRRPairLongOp, VecRRRLongModOp, VecRRRLongOp, VecShiftImmModOp, VecShiftImmOp,40};4142/// A floating-point unit (FPU) operation with two args, a register and an immediate.43#[derive(Copy, Clone, Debug)]44pub enum FPUOpRI {45/// Unsigned right shift. Rd = Rn << #imm46UShr32(FPURightShiftImm),47/// Unsigned right shift. Rd = Rn << #imm48UShr64(FPURightShiftImm),49}5051/// A floating-point unit (FPU) operation with two args, a register and52/// an immediate that modifies its dest (so takes that input value as a53/// separate virtual register).54#[derive(Copy, Clone, Debug)]55pub enum FPUOpRIMod {56/// Shift left and insert. Rd |= Rn << #imm57Sli32(FPULeftShiftImm),58/// Shift left and insert. Rd |= Rn << #imm59Sli64(FPULeftShiftImm),60}6162impl BitOp {63/// Get the assembly mnemonic for this opcode.64pub fn op_str(&self) -> &'static str {65match self {66BitOp::RBit => "rbit",67BitOp::Clz => "clz",68BitOp::Cls => "cls",69BitOp::Rev16 => "rev16",70BitOp::Rev32 => "rev32",71BitOp::Rev64 => "rev64",72}73}74}7576/// Additional information for `return_call[_ind]` instructions, left out of77/// line to lower the size of the `Inst` enum.78#[derive(Clone, Debug)]79pub struct ReturnCallInfo<T> {80/// Where this call is going to81pub dest: T,82/// Arguments to the call instruction.83pub uses: CallArgList,84/// The size of the new stack frame's stack arguments. This is necessary85/// for copying the frame over our current frame. It must already be86/// allocated on the stack.87pub new_stack_arg_size: u32,88/// API key to use to restore the return address, if any.89pub key: Option<APIKey>,90}9192fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {93let mut count = 0;94for _ in 0..num_half_words {95if value & 0xffff == 0 {96count += 1;97}98value >>= 16;99}100101count102}103104impl Inst {105/// Create an instruction that loads a constant, using one of several options (MOVZ, MOVN,106/// logical immediate, or constant pool).107pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {108// NB: this is duplicated in `lower/isle.rs` and `inst.isle` right now,109// if modifications are made here before this is deleted after moving to110// ISLE then those locations should be updated as well.111112if let Some(imm) = MoveWideConst::maybe_from_u64(value) {113// 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ114smallvec![Inst::MovWide {115op: MoveWideOp::MovZ,116rd,117imm,118size: OperandSize::Size64119}]120} else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {121// 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN122smallvec![Inst::MovWide {123op: MoveWideOp::MovN,124rd,125imm,126size: OperandSize::Size64127}]128} else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {129// Weird logical-instruction immediate in ORI using zero register130smallvec![Inst::AluRRImmLogic {131alu_op: ALUOp::Orr,132size: OperandSize::Size64,133rd,134rn: zero_reg(),135imml,136}]137} else {138let mut insts = smallvec![];139140// If the top 32 bits are zero, use 32-bit `mov` operations.141let (num_half_words, size, negated) = if value >> 32 == 0 {142(2, OperandSize::Size32, (!value << 32) >> 32)143} else {144(4, OperandSize::Size64, !value)145};146147// If the number of 0xffff half words is greater than the number of 0x0000 half words148// it is more efficient to use `movn` for the first instruction.149let first_is_inverted = count_zero_half_words(negated, num_half_words)150> count_zero_half_words(value, num_half_words);151152// Either 0xffff or 0x0000 half words can be skipped, depending on the first153// instruction used.154let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };155156let halfwords: SmallVec<[_; 4]> = (0..num_half_words)157.filter_map(|i| {158let imm16 = (value >> (16 * i)) & 0xffff;159if imm16 == ignored_halfword {160None161} else {162Some((i, imm16))163}164})165.collect();166167let mut prev_result = None;168for (i, imm16) in halfwords {169let shift = i * 16;170171if let Some(rn) = prev_result {172let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();173insts.push(Inst::MovK { rd, rn, imm, size });174} else {175if first_is_inverted {176let imm =177MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, shift)178.unwrap();179insts.push(Inst::MovWide {180op: MoveWideOp::MovN,181rd,182imm,183size,184});185} else {186let imm = MoveWideConst::maybe_with_shift(imm16 as u16, shift).unwrap();187insts.push(Inst::MovWide {188op: MoveWideOp::MovZ,189rd,190imm,191size,192});193}194}195196prev_result = Some(rd.to_reg());197}198199assert!(prev_result.is_some());200201insts202}203}204205/// Generic constructor for a load (zero-extending where appropriate).206pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {207match ty {208I8 => Inst::ULoad8 {209rd: into_reg,210mem,211flags,212},213I16 => Inst::ULoad16 {214rd: into_reg,215mem,216flags,217},218I32 => Inst::ULoad32 {219rd: into_reg,220mem,221flags,222},223I64 => Inst::ULoad64 {224rd: into_reg,225mem,226flags,227},228_ => {229if ty.is_vector() || ty.is_float() {230let bits = ty_bits(ty);231let rd = into_reg;232233match bits {234128 => Inst::FpuLoad128 { rd, mem, flags },23564 => Inst::FpuLoad64 { rd, mem, flags },23632 => Inst::FpuLoad32 { rd, mem, flags },23716 => Inst::FpuLoad16 { rd, mem, flags },238_ => unimplemented!("gen_load({})", ty),239}240} else {241unimplemented!("gen_load({})", ty);242}243}244}245}246247/// Generic constructor for a store.248pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {249match ty {250I8 => Inst::Store8 {251rd: from_reg,252mem,253flags,254},255I16 => Inst::Store16 {256rd: from_reg,257mem,258flags,259},260I32 => Inst::Store32 {261rd: from_reg,262mem,263flags,264},265I64 => Inst::Store64 {266rd: from_reg,267mem,268flags,269},270_ => {271if ty.is_vector() || ty.is_float() {272let bits = ty_bits(ty);273let rd = from_reg;274275match bits {276128 => Inst::FpuStore128 { rd, mem, flags },27764 => Inst::FpuStore64 { rd, mem, flags },27832 => Inst::FpuStore32 { rd, mem, flags },27916 => Inst::FpuStore16 { rd, mem, flags },280_ => unimplemented!("gen_store({})", ty),281}282} else {283unimplemented!("gen_store({})", ty);284}285}286}287}288289/// What type does this load or store instruction access in memory? When290/// uimm12 encoding is used, the size of this type is the amount that291/// immediate offsets are scaled by.292pub fn mem_type(&self) -> Option<Type> {293match self {294Inst::ULoad8 { .. } => Some(I8),295Inst::SLoad8 { .. } => Some(I8),296Inst::ULoad16 { .. } => Some(I16),297Inst::SLoad16 { .. } => Some(I16),298Inst::ULoad32 { .. } => Some(I32),299Inst::SLoad32 { .. } => Some(I32),300Inst::ULoad64 { .. } => Some(I64),301Inst::FpuLoad16 { .. } => Some(F16),302Inst::FpuLoad32 { .. } => Some(F32),303Inst::FpuLoad64 { .. } => Some(F64),304Inst::FpuLoad128 { .. } => Some(I8X16),305Inst::Store8 { .. } => Some(I8),306Inst::Store16 { .. } => Some(I16),307Inst::Store32 { .. } => Some(I32),308Inst::Store64 { .. } => Some(I64),309Inst::FpuStore16 { .. } => Some(F16),310Inst::FpuStore32 { .. } => Some(F32),311Inst::FpuStore64 { .. } => Some(F64),312Inst::FpuStore128 { .. } => Some(I8X16),313_ => None,314}315}316}317318//=============================================================================319// Instructions: get_regs320321fn memarg_operands(memarg: &mut AMode, collector: &mut impl OperandVisitor) {322match memarg {323AMode::Unscaled { rn, .. } | AMode::UnsignedOffset { rn, .. } => {324collector.reg_use(rn);325}326AMode::RegReg { rn, rm, .. }327| AMode::RegScaled { rn, rm, .. }328| AMode::RegScaledExtended { rn, rm, .. }329| AMode::RegExtended { rn, rm, .. } => {330collector.reg_use(rn);331collector.reg_use(rm);332}333AMode::Label { .. } => {}334AMode::SPPreIndexed { .. } | AMode::SPPostIndexed { .. } => {}335AMode::FPOffset { .. } | AMode::IncomingArg { .. } => {}336AMode::SPOffset { .. } | AMode::SlotOffset { .. } => {}337AMode::RegOffset { rn, .. } => {338collector.reg_use(rn);339}340AMode::Const { .. } => {}341}342}343344fn pairmemarg_operands(pairmemarg: &mut PairAMode, collector: &mut impl OperandVisitor) {345match pairmemarg {346PairAMode::SignedOffset { reg, .. } => {347collector.reg_use(reg);348}349PairAMode::SPPreIndexed { .. } | PairAMode::SPPostIndexed { .. } => {}350}351}352353fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {354match inst {355Inst::AluRRR { rd, rn, rm, .. } => {356collector.reg_def(rd);357collector.reg_use(rn);358collector.reg_use(rm);359}360Inst::AluRRRR { rd, rn, rm, ra, .. } => {361collector.reg_def(rd);362collector.reg_use(rn);363collector.reg_use(rm);364collector.reg_use(ra);365}366Inst::AluRRImm12 { rd, rn, .. } => {367collector.reg_def(rd);368collector.reg_use(rn);369}370Inst::AluRRImmLogic { rd, rn, .. } => {371collector.reg_def(rd);372collector.reg_use(rn);373}374Inst::AluRRImmShift { rd, rn, .. } => {375collector.reg_def(rd);376collector.reg_use(rn);377}378Inst::AluRRRShift { rd, rn, rm, .. } => {379collector.reg_def(rd);380collector.reg_use(rn);381collector.reg_use(rm);382}383Inst::AluRRRExtend { rd, rn, rm, .. } => {384collector.reg_def(rd);385collector.reg_use(rn);386collector.reg_use(rm);387}388Inst::BitRR { rd, rn, .. } => {389collector.reg_def(rd);390collector.reg_use(rn);391}392Inst::ULoad8 { rd, mem, .. }393| Inst::SLoad8 { rd, mem, .. }394| Inst::ULoad16 { rd, mem, .. }395| Inst::SLoad16 { rd, mem, .. }396| Inst::ULoad32 { rd, mem, .. }397| Inst::SLoad32 { rd, mem, .. }398| Inst::ULoad64 { rd, mem, .. } => {399collector.reg_def(rd);400memarg_operands(mem, collector);401}402Inst::Store8 { rd, mem, .. }403| Inst::Store16 { rd, mem, .. }404| Inst::Store32 { rd, mem, .. }405| Inst::Store64 { rd, mem, .. } => {406collector.reg_use(rd);407memarg_operands(mem, collector);408}409Inst::StoreP64 { rt, rt2, mem, .. } => {410collector.reg_use(rt);411collector.reg_use(rt2);412pairmemarg_operands(mem, collector);413}414Inst::LoadP64 { rt, rt2, mem, .. } => {415collector.reg_def(rt);416collector.reg_def(rt2);417pairmemarg_operands(mem, collector);418}419Inst::Mov { rd, rm, .. } => {420collector.reg_def(rd);421collector.reg_use(rm);422}423Inst::MovFromPReg { rd, rm } => {424debug_assert!(rd.to_reg().is_virtual());425collector.reg_def(rd);426collector.reg_fixed_nonallocatable(*rm);427}428Inst::MovToPReg { rd, rm } => {429debug_assert!(rm.is_virtual());430collector.reg_fixed_nonallocatable(*rd);431collector.reg_use(rm);432}433Inst::MovK { rd, rn, .. } => {434collector.reg_use(rn);435collector.reg_reuse_def(rd, 0); // `rn` == `rd`.436}437Inst::MovWide { rd, .. } => {438collector.reg_def(rd);439}440Inst::CSel { rd, rn, rm, .. } => {441collector.reg_def(rd);442collector.reg_use(rn);443collector.reg_use(rm);444}445Inst::CSNeg { rd, rn, rm, .. } => {446collector.reg_def(rd);447collector.reg_use(rn);448collector.reg_use(rm);449}450Inst::CSet { rd, .. } | Inst::CSetm { rd, .. } => {451collector.reg_def(rd);452}453Inst::CCmp { rn, rm, .. } => {454collector.reg_use(rn);455collector.reg_use(rm);456}457Inst::CCmpImm { rn, .. } => {458collector.reg_use(rn);459}460Inst::AtomicRMWLoop {461op,462addr,463operand,464oldval,465scratch1,466scratch2,467..468} => {469collector.reg_fixed_use(addr, xreg(25));470collector.reg_fixed_use(operand, xreg(26));471collector.reg_fixed_def(oldval, xreg(27));472collector.reg_fixed_def(scratch1, xreg(24));473if *op != AtomicRMWLoopOp::Xchg {474collector.reg_fixed_def(scratch2, xreg(28));475}476}477Inst::AtomicRMW { rs, rt, rn, .. } => {478collector.reg_use(rs);479collector.reg_def(rt);480collector.reg_use(rn);481}482Inst::AtomicCAS { rd, rs, rt, rn, .. } => {483collector.reg_reuse_def(rd, 1); // reuse `rs`.484collector.reg_use(rs);485collector.reg_use(rt);486collector.reg_use(rn);487}488Inst::AtomicCASLoop {489addr,490expected,491replacement,492oldval,493scratch,494..495} => {496collector.reg_fixed_use(addr, xreg(25));497collector.reg_fixed_use(expected, xreg(26));498collector.reg_fixed_use(replacement, xreg(28));499collector.reg_fixed_def(oldval, xreg(27));500collector.reg_fixed_def(scratch, xreg(24));501}502Inst::LoadAcquire { rt, rn, .. } => {503collector.reg_use(rn);504collector.reg_def(rt);505}506Inst::StoreRelease { rt, rn, .. } => {507collector.reg_use(rn);508collector.reg_use(rt);509}510Inst::Fence {} | Inst::Csdb {} => {}511Inst::FpuMove32 { rd, rn } => {512collector.reg_def(rd);513collector.reg_use(rn);514}515Inst::FpuMove64 { rd, rn } => {516collector.reg_def(rd);517collector.reg_use(rn);518}519Inst::FpuMove128 { rd, rn } => {520collector.reg_def(rd);521collector.reg_use(rn);522}523Inst::FpuMoveFromVec { rd, rn, .. } => {524collector.reg_def(rd);525collector.reg_use(rn);526}527Inst::FpuExtend { rd, rn, .. } => {528collector.reg_def(rd);529collector.reg_use(rn);530}531Inst::FpuRR { rd, rn, .. } => {532collector.reg_def(rd);533collector.reg_use(rn);534}535Inst::FpuRRR { rd, rn, rm, .. } => {536collector.reg_def(rd);537collector.reg_use(rn);538collector.reg_use(rm);539}540Inst::FpuRRI { rd, rn, .. } => {541collector.reg_def(rd);542collector.reg_use(rn);543}544Inst::FpuRRIMod { rd, ri, rn, .. } => {545collector.reg_reuse_def(rd, 1); // reuse `ri`.546collector.reg_use(ri);547collector.reg_use(rn);548}549Inst::FpuRRRR { rd, rn, rm, ra, .. } => {550collector.reg_def(rd);551collector.reg_use(rn);552collector.reg_use(rm);553collector.reg_use(ra);554}555Inst::VecMisc { rd, rn, .. } => {556collector.reg_def(rd);557collector.reg_use(rn);558}559560Inst::VecLanes { rd, rn, .. } => {561collector.reg_def(rd);562collector.reg_use(rn);563}564Inst::VecShiftImm { rd, rn, .. } => {565collector.reg_def(rd);566collector.reg_use(rn);567}568Inst::VecShiftImmMod { rd, ri, rn, .. } => {569collector.reg_reuse_def(rd, 1); // `rd` == `ri`.570collector.reg_use(ri);571collector.reg_use(rn);572}573Inst::VecExtract { rd, rn, rm, .. } => {574collector.reg_def(rd);575collector.reg_use(rn);576collector.reg_use(rm);577}578Inst::VecTbl { rd, rn, rm } => {579collector.reg_use(rn);580collector.reg_use(rm);581collector.reg_def(rd);582}583Inst::VecTblExt { rd, ri, rn, rm } => {584collector.reg_use(rn);585collector.reg_use(rm);586collector.reg_reuse_def(rd, 3); // `rd` == `ri`.587collector.reg_use(ri);588}589590Inst::VecTbl2 { rd, rn, rn2, rm } => {591// Constrain to v30 / v31 so that we satisfy the "adjacent592// registers" constraint without use of pinned vregs in593// lowering.594collector.reg_fixed_use(rn, vreg(30));595collector.reg_fixed_use(rn2, vreg(31));596collector.reg_use(rm);597collector.reg_def(rd);598}599Inst::VecTbl2Ext {600rd,601ri,602rn,603rn2,604rm,605} => {606// Constrain to v30 / v31 so that we satisfy the "adjacent607// registers" constraint without use of pinned vregs in608// lowering.609collector.reg_fixed_use(rn, vreg(30));610collector.reg_fixed_use(rn2, vreg(31));611collector.reg_use(rm);612collector.reg_reuse_def(rd, 4); // `rd` == `ri`.613collector.reg_use(ri);614}615Inst::VecLoadReplicate { rd, rn, .. } => {616collector.reg_def(rd);617collector.reg_use(rn);618}619Inst::VecCSel { rd, rn, rm, .. } => {620collector.reg_def(rd);621collector.reg_use(rn);622collector.reg_use(rm);623}624Inst::FpuCmp { rn, rm, .. } => {625collector.reg_use(rn);626collector.reg_use(rm);627}628Inst::FpuLoad16 { rd, mem, .. } => {629collector.reg_def(rd);630memarg_operands(mem, collector);631}632Inst::FpuLoad32 { rd, mem, .. } => {633collector.reg_def(rd);634memarg_operands(mem, collector);635}636Inst::FpuLoad64 { rd, mem, .. } => {637collector.reg_def(rd);638memarg_operands(mem, collector);639}640Inst::FpuLoad128 { rd, mem, .. } => {641collector.reg_def(rd);642memarg_operands(mem, collector);643}644Inst::FpuStore16 { rd, mem, .. } => {645collector.reg_use(rd);646memarg_operands(mem, collector);647}648Inst::FpuStore32 { rd, mem, .. } => {649collector.reg_use(rd);650memarg_operands(mem, collector);651}652Inst::FpuStore64 { rd, mem, .. } => {653collector.reg_use(rd);654memarg_operands(mem, collector);655}656Inst::FpuStore128 { rd, mem, .. } => {657collector.reg_use(rd);658memarg_operands(mem, collector);659}660Inst::FpuLoadP64 { rt, rt2, mem, .. } => {661collector.reg_def(rt);662collector.reg_def(rt2);663pairmemarg_operands(mem, collector);664}665Inst::FpuStoreP64 { rt, rt2, mem, .. } => {666collector.reg_use(rt);667collector.reg_use(rt2);668pairmemarg_operands(mem, collector);669}670Inst::FpuLoadP128 { rt, rt2, mem, .. } => {671collector.reg_def(rt);672collector.reg_def(rt2);673pairmemarg_operands(mem, collector);674}675Inst::FpuStoreP128 { rt, rt2, mem, .. } => {676collector.reg_use(rt);677collector.reg_use(rt2);678pairmemarg_operands(mem, collector);679}680Inst::FpuToInt { rd, rn, .. } => {681collector.reg_def(rd);682collector.reg_use(rn);683}684Inst::IntToFpu { rd, rn, .. } => {685collector.reg_def(rd);686collector.reg_use(rn);687}688Inst::FpuCSel16 { rd, rn, rm, .. }689| Inst::FpuCSel32 { rd, rn, rm, .. }690| Inst::FpuCSel64 { rd, rn, rm, .. } => {691collector.reg_def(rd);692collector.reg_use(rn);693collector.reg_use(rm);694}695Inst::FpuRound { rd, rn, .. } => {696collector.reg_def(rd);697collector.reg_use(rn);698}699Inst::MovToFpu { rd, rn, .. } => {700collector.reg_def(rd);701collector.reg_use(rn);702}703Inst::FpuMoveFPImm { rd, .. } => {704collector.reg_def(rd);705}706Inst::MovToVec { rd, ri, rn, .. } => {707collector.reg_reuse_def(rd, 1); // `rd` == `ri`.708collector.reg_use(ri);709collector.reg_use(rn);710}711Inst::MovFromVec { rd, rn, .. } | Inst::MovFromVecSigned { rd, rn, .. } => {712collector.reg_def(rd);713collector.reg_use(rn);714}715Inst::VecDup { rd, rn, .. } => {716collector.reg_def(rd);717collector.reg_use(rn);718}719Inst::VecDupFromFpu { rd, rn, .. } => {720collector.reg_def(rd);721collector.reg_use(rn);722}723Inst::VecDupFPImm { rd, .. } => {724collector.reg_def(rd);725}726Inst::VecDupImm { rd, .. } => {727collector.reg_def(rd);728}729Inst::VecExtend { rd, rn, .. } => {730collector.reg_def(rd);731collector.reg_use(rn);732}733Inst::VecMovElement { rd, ri, rn, .. } => {734collector.reg_reuse_def(rd, 1); // `rd` == `ri`.735collector.reg_use(ri);736collector.reg_use(rn);737}738Inst::VecRRLong { rd, rn, .. } => {739collector.reg_def(rd);740collector.reg_use(rn);741}742Inst::VecRRNarrowLow { rd, rn, .. } => {743collector.reg_use(rn);744collector.reg_def(rd);745}746Inst::VecRRNarrowHigh { rd, ri, rn, .. } => {747collector.reg_use(rn);748collector.reg_reuse_def(rd, 2); // `rd` == `ri`.749collector.reg_use(ri);750}751Inst::VecRRPair { rd, rn, .. } => {752collector.reg_def(rd);753collector.reg_use(rn);754}755Inst::VecRRRLong { rd, rn, rm, .. } => {756collector.reg_def(rd);757collector.reg_use(rn);758collector.reg_use(rm);759}760Inst::VecRRRLongMod { rd, ri, rn, rm, .. } => {761collector.reg_reuse_def(rd, 1); // `rd` == `ri`.762collector.reg_use(ri);763collector.reg_use(rn);764collector.reg_use(rm);765}766Inst::VecRRPairLong { rd, rn, .. } => {767collector.reg_def(rd);768collector.reg_use(rn);769}770Inst::VecRRR { rd, rn, rm, .. } => {771collector.reg_def(rd);772collector.reg_use(rn);773collector.reg_use(rm);774}775Inst::VecRRRMod { rd, ri, rn, rm, .. } | Inst::VecFmlaElem { rd, ri, rn, rm, .. } => {776collector.reg_reuse_def(rd, 1); // `rd` == `ri`.777collector.reg_use(ri);778collector.reg_use(rn);779collector.reg_use(rm);780}781Inst::MovToNZCV { rn } => {782collector.reg_use(rn);783}784Inst::MovFromNZCV { rd } => {785collector.reg_def(rd);786}787Inst::Extend { rd, rn, .. } => {788collector.reg_def(rd);789collector.reg_use(rn);790}791Inst::Args { args } => {792for ArgPair { vreg, preg } in args {793collector.reg_fixed_def(vreg, *preg);794}795}796Inst::Rets { rets } => {797for RetPair { vreg, preg } in rets {798collector.reg_fixed_use(vreg, *preg);799}800}801Inst::Ret { .. } | Inst::AuthenticatedRet { .. } => {}802Inst::Jump { .. } => {}803Inst::Call { info, .. } => {804let CallInfo { uses, defs, .. } = &mut **info;805for CallArgPair { vreg, preg } in uses {806collector.reg_fixed_use(vreg, *preg);807}808for CallRetPair { vreg, location } in defs {809match location {810RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),811RetLocation::Stack(..) => collector.any_def(vreg),812}813}814collector.reg_clobbers(info.clobbers);815if let Some(try_call_info) = &mut info.try_call_info {816try_call_info.collect_operands(collector);817}818}819Inst::CallInd { info, .. } => {820let CallInfo {821dest, uses, defs, ..822} = &mut **info;823collector.reg_use(dest);824for CallArgPair { vreg, preg } in uses {825collector.reg_fixed_use(vreg, *preg);826}827for CallRetPair { vreg, location } in defs {828match location {829RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),830RetLocation::Stack(..) => collector.any_def(vreg),831}832}833collector.reg_clobbers(info.clobbers);834if let Some(try_call_info) = &mut info.try_call_info {835try_call_info.collect_operands(collector);836}837}838Inst::ReturnCall { info } => {839for CallArgPair { vreg, preg } in &mut info.uses {840collector.reg_fixed_use(vreg, *preg);841}842}843Inst::ReturnCallInd { info } => {844// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):845// This shouldn't be a fixed register constraint, but it's not clear how to pick a846// register that won't be clobbered by the callee-save restore code emitted with a847// return_call_indirect.848collector.reg_fixed_use(&mut info.dest, xreg(1));849for CallArgPair { vreg, preg } in &mut info.uses {850collector.reg_fixed_use(vreg, *preg);851}852}853Inst::CondBr { kind, .. } => match kind {854CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),855CondBrKind::Cond(_) => {}856},857Inst::TestBitAndBranch { rn, .. } => {858collector.reg_use(rn);859}860Inst::IndirectBr { rn, .. } => {861collector.reg_use(rn);862}863Inst::Nop0 | Inst::Nop4 => {}864Inst::Brk => {}865Inst::Udf { .. } => {}866Inst::TrapIf { kind, .. } => match kind {867CondBrKind::Zero(rt, _) | CondBrKind::NotZero(rt, _) => collector.reg_use(rt),868CondBrKind::Cond(_) => {}869},870Inst::Adr { rd, .. } | Inst::Adrp { rd, .. } => {871collector.reg_def(rd);872}873Inst::Word4 { .. } | Inst::Word8 { .. } => {}874Inst::JTSequence {875ridx, rtmp1, rtmp2, ..876} => {877collector.reg_use(ridx);878collector.reg_early_def(rtmp1);879collector.reg_early_def(rtmp2);880}881Inst::LoadExtNameGot { rd, .. }882| Inst::LoadExtNameNear { rd, .. }883| Inst::LoadExtNameFar { rd, .. } => {884collector.reg_def(rd);885}886Inst::LoadAddr { rd, mem } => {887collector.reg_def(rd);888memarg_operands(mem, collector);889}890Inst::Paci { .. } | Inst::Xpaclri => {891// Neither LR nor SP is an allocatable register, so there is no need892// to do anything.893}894Inst::Bti { .. } => {}895896Inst::ElfTlsGetAddr { rd, tmp, .. } => {897// TLSDESC has a very neat calling convention. It is required to preserve898// all registers except x0 and x30. X30 is non allocatable in cranelift since899// its the link register.900//901// Additionally we need a second register as a temporary register for the902// TLSDESC sequence. This register can be any register other than x0 (and x30).903collector.reg_fixed_def(rd, regs::xreg(0));904collector.reg_early_def(tmp);905}906Inst::MachOTlsGetAddr { rd, .. } => {907collector.reg_fixed_def(rd, regs::xreg(0));908let mut clobbers =909AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64, false);910clobbers.remove(regs::xreg_preg(0));911collector.reg_clobbers(clobbers);912}913Inst::Unwind { .. } => {}914Inst::EmitIsland { .. } => {}915Inst::DummyUse { reg } => {916collector.reg_use(reg);917}918Inst::LabelAddress { dst, .. } => {919collector.reg_def(dst);920}921Inst::SequencePoint { .. } => {}922Inst::StackProbeLoop { start, end, .. } => {923collector.reg_early_def(start);924collector.reg_use(end);925}926}927}928929//=============================================================================930// Instructions: misc functions and external interface931932impl MachInst for Inst {933type ABIMachineSpec = AArch64MachineDeps;934type LabelUse = LabelUse;935936// "CLIF" in hex, to make the trap recognizable during937// debugging.938const TRAP_OPCODE: &'static [u8] = &0xc11f_u32.to_le_bytes();939940fn get_operands(&mut self, collector: &mut impl OperandVisitor) {941aarch64_get_operands(self, collector);942}943944fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {945match self {946&Inst::Mov {947size: OperandSize::Size64,948rd,949rm,950} => Some((rd, rm)),951&Inst::FpuMove64 { rd, rn } => Some((rd, rn)),952&Inst::FpuMove128 { rd, rn } => Some((rd, rn)),953_ => None,954}955}956957fn is_included_in_clobbers(&self) -> bool {958let (caller, callee, is_exception) = match self {959Inst::Args { .. } => return false,960Inst::Call { info } => (961info.caller_conv,962info.callee_conv,963info.try_call_info.is_some(),964),965Inst::CallInd { info } => (966info.caller_conv,967info.callee_conv,968info.try_call_info.is_some(),969),970_ => return true,971};972973// We exclude call instructions from the clobber-set when they are calls974// from caller to callee that both clobber the same register (such as975// using the same or similar ABIs). Such calls cannot possibly force any976// new registers to be saved in the prologue, because anything that the977// callee clobbers, the caller is also allowed to clobber. This both978// saves work and enables us to more precisely follow the979// half-caller-save, half-callee-save SysV ABI for some vector980// registers.981//982// See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for983// more information on this ABI-implementation hack.984let caller_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(caller, false);985let callee_clobbers = AArch64MachineDeps::get_regs_clobbered_by_call(callee, is_exception);986987let mut all_clobbers = caller_clobbers;988all_clobbers.union_from(callee_clobbers);989all_clobbers != caller_clobbers990}991992fn is_trap(&self) -> bool {993match self {994Self::Udf { .. } => true,995_ => false,996}997}998999fn is_args(&self) -> bool {1000match self {1001Self::Args { .. } => true,1002_ => false,1003}1004}10051006fn call_type(&self) -> CallType {1007match self {1008Inst::Call { .. }1009| Inst::CallInd { .. }1010| Inst::ElfTlsGetAddr { .. }1011| Inst::MachOTlsGetAddr { .. } => CallType::Regular,10121013Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } => CallType::TailCall,10141015_ => CallType::None,1016}1017}10181019fn is_term(&self) -> MachTerminator {1020match self {1021&Inst::Rets { .. } => MachTerminator::Ret,1022&Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,1023&Inst::Jump { .. } => MachTerminator::Branch,1024&Inst::CondBr { .. } => MachTerminator::Branch,1025&Inst::TestBitAndBranch { .. } => MachTerminator::Branch,1026&Inst::IndirectBr { .. } => MachTerminator::Branch,1027&Inst::JTSequence { .. } => MachTerminator::Branch,1028&Inst::Call { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,1029&Inst::CallInd { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,1030_ => MachTerminator::None,1031}1032}10331034fn is_mem_access(&self) -> bool {1035match self {1036&Inst::ULoad8 { .. }1037| &Inst::SLoad8 { .. }1038| &Inst::ULoad16 { .. }1039| &Inst::SLoad16 { .. }1040| &Inst::ULoad32 { .. }1041| &Inst::SLoad32 { .. }1042| &Inst::ULoad64 { .. }1043| &Inst::LoadP64 { .. }1044| &Inst::FpuLoad16 { .. }1045| &Inst::FpuLoad32 { .. }1046| &Inst::FpuLoad64 { .. }1047| &Inst::FpuLoad128 { .. }1048| &Inst::FpuLoadP64 { .. }1049| &Inst::FpuLoadP128 { .. }1050| &Inst::Store8 { .. }1051| &Inst::Store16 { .. }1052| &Inst::Store32 { .. }1053| &Inst::Store64 { .. }1054| &Inst::StoreP64 { .. }1055| &Inst::FpuStore16 { .. }1056| &Inst::FpuStore32 { .. }1057| &Inst::FpuStore64 { .. }1058| &Inst::FpuStore128 { .. } => true,1059// TODO: verify this carefully1060_ => false,1061}1062}10631064fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {1065let bits = ty.bits();10661067assert!(bits <= 128);1068assert!(to_reg.to_reg().class() == from_reg.class());1069match from_reg.class() {1070RegClass::Int => Inst::Mov {1071size: OperandSize::Size64,1072rd: to_reg,1073rm: from_reg,1074},1075RegClass::Float => {1076if bits > 64 {1077Inst::FpuMove128 {1078rd: to_reg,1079rn: from_reg,1080}1081} else {1082Inst::FpuMove64 {1083rd: to_reg,1084rn: from_reg,1085}1086}1087}1088RegClass::Vector => unreachable!(),1089}1090}10911092fn is_safepoint(&self) -> bool {1093match self {1094Inst::Call { .. } | Inst::CallInd { .. } => true,1095_ => false,1096}1097}10981099fn gen_dummy_use(reg: Reg) -> Inst {1100Inst::DummyUse { reg }1101}11021103fn gen_nop(preferred_size: usize) -> Inst {1104if preferred_size == 0 {1105return Inst::Nop0;1106}1107// We can't give a NOP (or any insn) < 4 bytes.1108assert!(preferred_size >= 4);1109Inst::Nop41110}11111112fn gen_nop_units() -> Vec<Vec<u8>> {1113vec![vec![0x1f, 0x20, 0x03, 0xd5]]1114}11151116fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {1117match ty {1118I8 => Ok((&[RegClass::Int], &[I8])),1119I16 => Ok((&[RegClass::Int], &[I16])),1120I32 => Ok((&[RegClass::Int], &[I32])),1121I64 => Ok((&[RegClass::Int], &[I64])),1122F16 => Ok((&[RegClass::Float], &[F16])),1123F32 => Ok((&[RegClass::Float], &[F32])),1124F64 => Ok((&[RegClass::Float], &[F64])),1125F128 => Ok((&[RegClass::Float], &[F128])),1126I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),1127_ if ty.is_vector() && ty.bits() <= 128 => {1128let types = &[types::I8X2, types::I8X4, types::I8X8, types::I8X16];1129Ok((1130&[RegClass::Float],1131slice::from_ref(&types[ty.bytes().ilog2() as usize - 1]),1132))1133}1134_ if ty.is_dynamic_vector() => Ok((&[RegClass::Float], &[I8X16])),1135_ => Err(CodegenError::Unsupported(format!(1136"Unexpected SSA-value type: {ty}"1137))),1138}1139}11401141fn canonical_type_for_rc(rc: RegClass) -> Type {1142match rc {1143RegClass::Float => types::I8X16,1144RegClass::Int => types::I64,1145RegClass::Vector => unreachable!(),1146}1147}11481149fn gen_jump(target: MachLabel) -> Inst {1150Inst::Jump {1151dest: BranchTarget::Label(target),1152}1153}11541155fn worst_case_size() -> CodeOffset {1156// The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of1157// an 8-instruction sequence (saturating int-to-float conversions) with three embedded1158// 64-bit f64 constants.1159//1160// Note that inline jump-tables handle island/pool insertion separately, so we do not need1161// to account for them here (otherwise the worst case would be 2^31 * 4, clearly not1162// feasible for other reasons).1163441164}11651166fn ref_type_regclass(_: &settings::Flags) -> RegClass {1167RegClass::Int1168}11691170fn gen_block_start(1171is_indirect_branch_target: bool,1172is_forward_edge_cfi_enabled: bool,1173) -> Option<Self> {1174if is_indirect_branch_target && is_forward_edge_cfi_enabled {1175Some(Inst::Bti {1176targets: BranchTargetType::J,1177})1178} else {1179None1180}1181}11821183fn function_alignment() -> FunctionAlignment {1184// We use 32-byte alignment for performance reasons, but for correctness1185// we would only need 4-byte alignment.1186FunctionAlignment {1187minimum: 4,1188preferred: 32,1189}1190}1191}11921193//=============================================================================1194// Pretty-printing of instructions.11951196fn mem_finalize_for_show(mem: &AMode, access_ty: Type, state: &EmitState) -> (String, String) {1197let (mem_insts, mem) = mem_finalize(None, mem, access_ty, state);1198let mut mem_str = mem_insts1199.into_iter()1200.map(|inst| inst.print_with_state(&mut EmitState::default()))1201.collect::<Vec<_>>()1202.join(" ; ");1203if !mem_str.is_empty() {1204mem_str += " ; ";1205}12061207let mem = mem.pretty_print(access_ty.bytes() as u8);1208(mem_str, mem)1209}12101211fn pretty_print_try_call(info: &TryCallInfo) -> String {1212format!(1213"; b {:?}; catch [{}]",1214info.continuation,1215info.pretty_print_dests()1216)1217}12181219impl Inst {1220fn print_with_state(&self, state: &mut EmitState) -> String {1221fn op_name(alu_op: ALUOp) -> &'static str {1222match alu_op {1223ALUOp::Add => "add",1224ALUOp::Sub => "sub",1225ALUOp::Orr => "orr",1226ALUOp::And => "and",1227ALUOp::AndS => "ands",1228ALUOp::Eor => "eor",1229ALUOp::AddS => "adds",1230ALUOp::SubS => "subs",1231ALUOp::SMulH => "smulh",1232ALUOp::UMulH => "umulh",1233ALUOp::SDiv => "sdiv",1234ALUOp::UDiv => "udiv",1235ALUOp::AndNot => "bic",1236ALUOp::OrrNot => "orn",1237ALUOp::EorNot => "eon",1238ALUOp::Extr => "extr",1239ALUOp::Lsr => "lsr",1240ALUOp::Asr => "asr",1241ALUOp::Lsl => "lsl",1242ALUOp::Adc => "adc",1243ALUOp::AdcS => "adcs",1244ALUOp::Sbc => "sbc",1245ALUOp::SbcS => "sbcs",1246}1247}12481249match self {1250&Inst::Nop0 => "nop-zero-len".to_string(),1251&Inst::Nop4 => "nop".to_string(),1252&Inst::AluRRR {1253alu_op,1254size,1255rd,1256rn,1257rm,1258} => {1259let op = op_name(alu_op);1260let rd = pretty_print_ireg(rd.to_reg(), size);1261let rn = pretty_print_ireg(rn, size);1262let rm = pretty_print_ireg(rm, size);1263format!("{op} {rd}, {rn}, {rm}")1264}1265&Inst::AluRRRR {1266alu_op,1267size,1268rd,1269rn,1270rm,1271ra,1272} => {1273let (op, da_size) = match alu_op {1274ALUOp3::MAdd => ("madd", size),1275ALUOp3::MSub => ("msub", size),1276ALUOp3::UMAddL => ("umaddl", OperandSize::Size64),1277ALUOp3::SMAddL => ("smaddl", OperandSize::Size64),1278};1279let rd = pretty_print_ireg(rd.to_reg(), da_size);1280let rn = pretty_print_ireg(rn, size);1281let rm = pretty_print_ireg(rm, size);1282let ra = pretty_print_ireg(ra, da_size);12831284format!("{op} {rd}, {rn}, {rm}, {ra}")1285}1286&Inst::AluRRImm12 {1287alu_op,1288size,1289rd,1290rn,1291ref imm12,1292} => {1293let op = op_name(alu_op);1294let rd = pretty_print_ireg(rd.to_reg(), size);1295let rn = pretty_print_ireg(rn, size);12961297if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() {1298// special-case MOV (used for moving into SP).1299format!("mov {rd}, {rn}")1300} else {1301let imm12 = imm12.pretty_print(0);1302format!("{op} {rd}, {rn}, {imm12}")1303}1304}1305&Inst::AluRRImmLogic {1306alu_op,1307size,1308rd,1309rn,1310ref imml,1311} => {1312let op = op_name(alu_op);1313let rd = pretty_print_ireg(rd.to_reg(), size);1314let rn = pretty_print_ireg(rn, size);1315let imml = imml.pretty_print(0);1316format!("{op} {rd}, {rn}, {imml}")1317}1318&Inst::AluRRImmShift {1319alu_op,1320size,1321rd,1322rn,1323ref immshift,1324} => {1325let op = op_name(alu_op);1326let rd = pretty_print_ireg(rd.to_reg(), size);1327let rn = pretty_print_ireg(rn, size);1328let immshift = immshift.pretty_print(0);1329format!("{op} {rd}, {rn}, {immshift}")1330}1331&Inst::AluRRRShift {1332alu_op,1333size,1334rd,1335rn,1336rm,1337ref shiftop,1338} => {1339let op = op_name(alu_op);1340let rd = pretty_print_ireg(rd.to_reg(), size);1341let rn = pretty_print_ireg(rn, size);1342let rm = pretty_print_ireg(rm, size);1343let shiftop = shiftop.pretty_print(0);1344format!("{op} {rd}, {rn}, {rm}, {shiftop}")1345}1346&Inst::AluRRRExtend {1347alu_op,1348size,1349rd,1350rn,1351rm,1352ref extendop,1353} => {1354let op = op_name(alu_op);1355let rd = pretty_print_ireg(rd.to_reg(), size);1356let rn = pretty_print_ireg(rn, size);1357let rm = pretty_print_ireg(rm, size);1358let extendop = extendop.pretty_print(0);1359format!("{op} {rd}, {rn}, {rm}, {extendop}")1360}1361&Inst::BitRR { op, size, rd, rn } => {1362let op = op.op_str();1363let rd = pretty_print_ireg(rd.to_reg(), size);1364let rn = pretty_print_ireg(rn, size);1365format!("{op} {rd}, {rn}")1366}1367&Inst::ULoad8 { rd, ref mem, .. }1368| &Inst::SLoad8 { rd, ref mem, .. }1369| &Inst::ULoad16 { rd, ref mem, .. }1370| &Inst::SLoad16 { rd, ref mem, .. }1371| &Inst::ULoad32 { rd, ref mem, .. }1372| &Inst::SLoad32 { rd, ref mem, .. }1373| &Inst::ULoad64 { rd, ref mem, .. } => {1374let is_unscaled = match &mem {1375&AMode::Unscaled { .. } => true,1376_ => false,1377};1378let (op, size) = match (self, is_unscaled) {1379(&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),1380(&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),1381(&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),1382(&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),1383(&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),1384(&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),1385(&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),1386(&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),1387(&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),1388(&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),1389(&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),1390(&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),1391(&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),1392(&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),1393_ => unreachable!(),1394};13951396let rd = pretty_print_ireg(rd.to_reg(), size);1397let mem = mem.clone();1398let access_ty = self.mem_type().unwrap();1399let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);14001401format!("{mem_str}{op} {rd}, {mem}")1402}1403&Inst::Store8 { rd, ref mem, .. }1404| &Inst::Store16 { rd, ref mem, .. }1405| &Inst::Store32 { rd, ref mem, .. }1406| &Inst::Store64 { rd, ref mem, .. } => {1407let is_unscaled = match &mem {1408&AMode::Unscaled { .. } => true,1409_ => false,1410};1411let (op, size) = match (self, is_unscaled) {1412(&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),1413(&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),1414(&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),1415(&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),1416(&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),1417(&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),1418(&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),1419(&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),1420_ => unreachable!(),1421};14221423let rd = pretty_print_ireg(rd, size);1424let mem = mem.clone();1425let access_ty = self.mem_type().unwrap();1426let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);14271428format!("{mem_str}{op} {rd}, {mem}")1429}1430&Inst::StoreP64 {1431rt, rt2, ref mem, ..1432} => {1433let rt = pretty_print_ireg(rt, OperandSize::Size64);1434let rt2 = pretty_print_ireg(rt2, OperandSize::Size64);1435let mem = mem.clone();1436let mem = mem.pretty_print_default();1437format!("stp {rt}, {rt2}, {mem}")1438}1439&Inst::LoadP64 {1440rt, rt2, ref mem, ..1441} => {1442let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64);1443let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64);1444let mem = mem.clone();1445let mem = mem.pretty_print_default();1446format!("ldp {rt}, {rt2}, {mem}")1447}1448&Inst::Mov { size, rd, rm } => {1449let rd = pretty_print_ireg(rd.to_reg(), size);1450let rm = pretty_print_ireg(rm, size);1451format!("mov {rd}, {rm}")1452}1453&Inst::MovFromPReg { rd, rm } => {1454let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);1455let rm = show_ireg_sized(rm.into(), OperandSize::Size64);1456format!("mov {rd}, {rm}")1457}1458&Inst::MovToPReg { rd, rm } => {1459let rd = show_ireg_sized(rd.into(), OperandSize::Size64);1460let rm = pretty_print_ireg(rm, OperandSize::Size64);1461format!("mov {rd}, {rm}")1462}1463&Inst::MovWide {1464op,1465rd,1466ref imm,1467size,1468} => {1469let op_str = match op {1470MoveWideOp::MovZ => "movz",1471MoveWideOp::MovN => "movn",1472};1473let rd = pretty_print_ireg(rd.to_reg(), size);1474let imm = imm.pretty_print(0);1475format!("{op_str} {rd}, {imm}")1476}1477&Inst::MovK {1478rd,1479rn,1480ref imm,1481size,1482} => {1483let rn = pretty_print_ireg(rn, size);1484let rd = pretty_print_ireg(rd.to_reg(), size);1485let imm = imm.pretty_print(0);1486format!("movk {rd}, {rn}, {imm}")1487}1488&Inst::CSel { rd, rn, rm, cond } => {1489let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);1490let rn = pretty_print_ireg(rn, OperandSize::Size64);1491let rm = pretty_print_ireg(rm, OperandSize::Size64);1492let cond = cond.pretty_print(0);1493format!("csel {rd}, {rn}, {rm}, {cond}")1494}1495&Inst::CSNeg { rd, rn, rm, cond } => {1496let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);1497let rn = pretty_print_ireg(rn, OperandSize::Size64);1498let rm = pretty_print_ireg(rm, OperandSize::Size64);1499let cond = cond.pretty_print(0);1500format!("csneg {rd}, {rn}, {rm}, {cond}")1501}1502&Inst::CSet { rd, cond } => {1503let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);1504let cond = cond.pretty_print(0);1505format!("cset {rd}, {cond}")1506}1507&Inst::CSetm { rd, cond } => {1508let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64);1509let cond = cond.pretty_print(0);1510format!("csetm {rd}, {cond}")1511}1512&Inst::CCmp {1513size,1514rn,1515rm,1516nzcv,1517cond,1518} => {1519let rn = pretty_print_ireg(rn, size);1520let rm = pretty_print_ireg(rm, size);1521let nzcv = nzcv.pretty_print(0);1522let cond = cond.pretty_print(0);1523format!("ccmp {rn}, {rm}, {nzcv}, {cond}")1524}1525&Inst::CCmpImm {1526size,1527rn,1528imm,1529nzcv,1530cond,1531} => {1532let rn = pretty_print_ireg(rn, size);1533let imm = imm.pretty_print(0);1534let nzcv = nzcv.pretty_print(0);1535let cond = cond.pretty_print(0);1536format!("ccmp {rn}, {imm}, {nzcv}, {cond}")1537}1538&Inst::AtomicRMW {1539rs, rt, rn, ty, op, ..1540} => {1541let op = match op {1542AtomicRMWOp::Add => "ldaddal",1543AtomicRMWOp::Clr => "ldclral",1544AtomicRMWOp::Eor => "ldeoral",1545AtomicRMWOp::Set => "ldsetal",1546AtomicRMWOp::Smax => "ldsmaxal",1547AtomicRMWOp::Umax => "ldumaxal",1548AtomicRMWOp::Smin => "ldsminal",1549AtomicRMWOp::Umin => "lduminal",1550AtomicRMWOp::Swp => "swpal",1551};15521553let size = OperandSize::from_ty(ty);1554let rs = pretty_print_ireg(rs, size);1555let rt = pretty_print_ireg(rt.to_reg(), size);1556let rn = pretty_print_ireg(rn, OperandSize::Size64);15571558let ty_suffix = match ty {1559I8 => "b",1560I16 => "h",1561_ => "",1562};1563format!("{op}{ty_suffix} {rs}, {rt}, [{rn}]")1564}1565&Inst::AtomicRMWLoop {1566ty,1567op,1568addr,1569operand,1570oldval,1571scratch1,1572scratch2,1573..1574} => {1575let op = match op {1576AtomicRMWLoopOp::Add => "add",1577AtomicRMWLoopOp::Sub => "sub",1578AtomicRMWLoopOp::Eor => "eor",1579AtomicRMWLoopOp::Orr => "orr",1580AtomicRMWLoopOp::And => "and",1581AtomicRMWLoopOp::Nand => "nand",1582AtomicRMWLoopOp::Smin => "smin",1583AtomicRMWLoopOp::Smax => "smax",1584AtomicRMWLoopOp::Umin => "umin",1585AtomicRMWLoopOp::Umax => "umax",1586AtomicRMWLoopOp::Xchg => "xchg",1587};1588let addr = pretty_print_ireg(addr, OperandSize::Size64);1589let operand = pretty_print_ireg(operand, OperandSize::Size64);1590let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);1591let scratch1 = pretty_print_ireg(scratch1.to_reg(), OperandSize::Size64);1592let scratch2 = pretty_print_ireg(scratch2.to_reg(), OperandSize::Size64);1593format!(1594"atomic_rmw_loop_{}_{} addr={} operand={} oldval={} scratch1={} scratch2={}",1595op,1596ty.bits(),1597addr,1598operand,1599oldval,1600scratch1,1601scratch2,1602)1603}1604&Inst::AtomicCAS {1605rd, rs, rt, rn, ty, ..1606} => {1607let op = match ty {1608I8 => "casalb",1609I16 => "casalh",1610I32 | I64 => "casal",1611_ => panic!("Unsupported type: {ty}"),1612};1613let size = OperandSize::from_ty(ty);1614let rd = pretty_print_ireg(rd.to_reg(), size);1615let rs = pretty_print_ireg(rs, size);1616let rt = pretty_print_ireg(rt, size);1617let rn = pretty_print_ireg(rn, OperandSize::Size64);16181619format!("{op} {rd}, {rs}, {rt}, [{rn}]")1620}1621&Inst::AtomicCASLoop {1622ty,1623addr,1624expected,1625replacement,1626oldval,1627scratch,1628..1629} => {1630let addr = pretty_print_ireg(addr, OperandSize::Size64);1631let expected = pretty_print_ireg(expected, OperandSize::Size64);1632let replacement = pretty_print_ireg(replacement, OperandSize::Size64);1633let oldval = pretty_print_ireg(oldval.to_reg(), OperandSize::Size64);1634let scratch = pretty_print_ireg(scratch.to_reg(), OperandSize::Size64);1635format!(1636"atomic_cas_loop_{} addr={}, expect={}, replacement={}, oldval={}, scratch={}",1637ty.bits(),1638addr,1639expected,1640replacement,1641oldval,1642scratch,1643)1644}1645&Inst::LoadAcquire {1646access_ty, rt, rn, ..1647} => {1648let (op, ty) = match access_ty {1649I8 => ("ldarb", I32),1650I16 => ("ldarh", I32),1651I32 => ("ldar", I32),1652I64 => ("ldar", I64),1653_ => panic!("Unsupported type: {access_ty}"),1654};1655let size = OperandSize::from_ty(ty);1656let rn = pretty_print_ireg(rn, OperandSize::Size64);1657let rt = pretty_print_ireg(rt.to_reg(), size);1658format!("{op} {rt}, [{rn}]")1659}1660&Inst::StoreRelease {1661access_ty, rt, rn, ..1662} => {1663let (op, ty) = match access_ty {1664I8 => ("stlrb", I32),1665I16 => ("stlrh", I32),1666I32 => ("stlr", I32),1667I64 => ("stlr", I64),1668_ => panic!("Unsupported type: {access_ty}"),1669};1670let size = OperandSize::from_ty(ty);1671let rn = pretty_print_ireg(rn, OperandSize::Size64);1672let rt = pretty_print_ireg(rt, size);1673format!("{op} {rt}, [{rn}]")1674}1675&Inst::Fence {} => {1676format!("dmb ish")1677}1678&Inst::Csdb {} => {1679format!("csdb")1680}1681&Inst::FpuMove32 { rd, rn } => {1682let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);1683let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);1684format!("fmov {rd}, {rn}")1685}1686&Inst::FpuMove64 { rd, rn } => {1687let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);1688let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);1689format!("fmov {rd}, {rn}")1690}1691&Inst::FpuMove128 { rd, rn } => {1692let rd = pretty_print_reg(rd.to_reg());1693let rn = pretty_print_reg(rn);1694format!("mov {rd}.16b, {rn}.16b")1695}1696&Inst::FpuMoveFromVec { rd, rn, idx, size } => {1697let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());1698let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());1699format!("mov {rd}, {rn}")1700}1701&Inst::FpuExtend { rd, rn, size } => {1702let rd = pretty_print_vreg_scalar(rd.to_reg(), size);1703let rn = pretty_print_vreg_scalar(rn, size);1704format!("fmov {rd}, {rn}")1705}1706&Inst::FpuRR {1707fpu_op,1708size,1709rd,1710rn,1711} => {1712let op = match fpu_op {1713FPUOp1::Abs => "fabs",1714FPUOp1::Neg => "fneg",1715FPUOp1::Sqrt => "fsqrt",1716FPUOp1::Cvt32To64 | FPUOp1::Cvt64To32 => "fcvt",1717};1718let dst_size = match fpu_op {1719FPUOp1::Cvt32To64 => ScalarSize::Size64,1720FPUOp1::Cvt64To32 => ScalarSize::Size32,1721_ => size,1722};1723let rd = pretty_print_vreg_scalar(rd.to_reg(), dst_size);1724let rn = pretty_print_vreg_scalar(rn, size);1725format!("{op} {rd}, {rn}")1726}1727&Inst::FpuRRR {1728fpu_op,1729size,1730rd,1731rn,1732rm,1733} => {1734let op = match fpu_op {1735FPUOp2::Add => "fadd",1736FPUOp2::Sub => "fsub",1737FPUOp2::Mul => "fmul",1738FPUOp2::Div => "fdiv",1739FPUOp2::Max => "fmax",1740FPUOp2::Min => "fmin",1741};1742let rd = pretty_print_vreg_scalar(rd.to_reg(), size);1743let rn = pretty_print_vreg_scalar(rn, size);1744let rm = pretty_print_vreg_scalar(rm, size);1745format!("{op} {rd}, {rn}, {rm}")1746}1747&Inst::FpuRRI { fpu_op, rd, rn } => {1748let (op, imm, vector) = match fpu_op {1749FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0), true),1750FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0), false),1751};17521753let (rd, rn) = if vector {1754(1755pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),1756pretty_print_vreg_vector(rn, VectorSize::Size32x2),1757)1758} else {1759(1760pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),1761pretty_print_vreg_scalar(rn, ScalarSize::Size64),1762)1763};1764format!("{op} {rd}, {rn}, {imm}")1765}1766&Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {1767let (op, imm, vector) = match fpu_op {1768FPUOpRIMod::Sli32(imm) => ("sli", imm.pretty_print(0), true),1769FPUOpRIMod::Sli64(imm) => ("sli", imm.pretty_print(0), false),1770};17711772let (rd, ri, rn) = if vector {1773(1774pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2),1775pretty_print_vreg_vector(ri, VectorSize::Size32x2),1776pretty_print_vreg_vector(rn, VectorSize::Size32x2),1777)1778} else {1779(1780pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64),1781pretty_print_vreg_scalar(ri, ScalarSize::Size64),1782pretty_print_vreg_scalar(rn, ScalarSize::Size64),1783)1784};1785format!("{op} {rd}, {ri}, {rn}, {imm}")1786}1787&Inst::FpuRRRR {1788fpu_op,1789size,1790rd,1791rn,1792rm,1793ra,1794} => {1795let op = match fpu_op {1796FPUOp3::MAdd => "fmadd",1797FPUOp3::MSub => "fmsub",1798FPUOp3::NMAdd => "fnmadd",1799FPUOp3::NMSub => "fnmsub",1800};1801let rd = pretty_print_vreg_scalar(rd.to_reg(), size);1802let rn = pretty_print_vreg_scalar(rn, size);1803let rm = pretty_print_vreg_scalar(rm, size);1804let ra = pretty_print_vreg_scalar(ra, size);1805format!("{op} {rd}, {rn}, {rm}, {ra}")1806}1807&Inst::FpuCmp { size, rn, rm } => {1808let rn = pretty_print_vreg_scalar(rn, size);1809let rm = pretty_print_vreg_scalar(rm, size);1810format!("fcmp {rn}, {rm}")1811}1812&Inst::FpuLoad16 { rd, ref mem, .. } => {1813let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);1814let mem = mem.clone();1815let access_ty = self.mem_type().unwrap();1816let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);1817format!("{mem_str}ldr {rd}, {mem}")1818}1819&Inst::FpuLoad32 { rd, ref mem, .. } => {1820let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);1821let mem = mem.clone();1822let access_ty = self.mem_type().unwrap();1823let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);1824format!("{mem_str}ldr {rd}, {mem}")1825}1826&Inst::FpuLoad64 { rd, ref mem, .. } => {1827let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);1828let mem = mem.clone();1829let access_ty = self.mem_type().unwrap();1830let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);1831format!("{mem_str}ldr {rd}, {mem}")1832}1833&Inst::FpuLoad128 { rd, ref mem, .. } => {1834let rd = pretty_print_reg(rd.to_reg());1835let rd = "q".to_string() + &rd[1..];1836let mem = mem.clone();1837let access_ty = self.mem_type().unwrap();1838let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);1839format!("{mem_str}ldr {rd}, {mem}")1840}1841&Inst::FpuStore16 { rd, ref mem, .. } => {1842let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size16);1843let mem = mem.clone();1844let access_ty = self.mem_type().unwrap();1845let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);1846format!("{mem_str}str {rd}, {mem}")1847}1848&Inst::FpuStore32 { rd, ref mem, .. } => {1849let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32);1850let mem = mem.clone();1851let access_ty = self.mem_type().unwrap();1852let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);1853format!("{mem_str}str {rd}, {mem}")1854}1855&Inst::FpuStore64 { rd, ref mem, .. } => {1856let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64);1857let mem = mem.clone();1858let access_ty = self.mem_type().unwrap();1859let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);1860format!("{mem_str}str {rd}, {mem}")1861}1862&Inst::FpuStore128 { rd, ref mem, .. } => {1863let rd = pretty_print_reg(rd);1864let rd = "q".to_string() + &rd[1..];1865let mem = mem.clone();1866let access_ty = self.mem_type().unwrap();1867let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);1868format!("{mem_str}str {rd}, {mem}")1869}1870&Inst::FpuLoadP64 {1871rt, rt2, ref mem, ..1872} => {1873let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64);1874let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64);1875let mem = mem.clone();1876let mem = mem.pretty_print_default();18771878format!("ldp {rt}, {rt2}, {mem}")1879}1880&Inst::FpuStoreP64 {1881rt, rt2, ref mem, ..1882} => {1883let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64);1884let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64);1885let mem = mem.clone();1886let mem = mem.pretty_print_default();18871888format!("stp {rt}, {rt2}, {mem}")1889}1890&Inst::FpuLoadP128 {1891rt, rt2, ref mem, ..1892} => {1893let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128);1894let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128);1895let mem = mem.clone();1896let mem = mem.pretty_print_default();18971898format!("ldp {rt}, {rt2}, {mem}")1899}1900&Inst::FpuStoreP128 {1901rt, rt2, ref mem, ..1902} => {1903let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128);1904let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128);1905let mem = mem.clone();1906let mem = mem.pretty_print_default();19071908format!("stp {rt}, {rt2}, {mem}")1909}1910&Inst::FpuToInt { op, rd, rn } => {1911let (op, sizesrc, sizedest) = match op {1912FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),1913FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),1914FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),1915FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),1916FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),1917FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),1918FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),1919FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),1920};1921let rd = pretty_print_ireg(rd.to_reg(), sizedest);1922let rn = pretty_print_vreg_scalar(rn, sizesrc);1923format!("{op} {rd}, {rn}")1924}1925&Inst::IntToFpu { op, rd, rn } => {1926let (op, sizesrc, sizedest) = match op {1927IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),1928IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),1929IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),1930IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),1931IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),1932IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),1933IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),1934IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),1935};1936let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest);1937let rn = pretty_print_ireg(rn, sizesrc);1938format!("{op} {rd}, {rn}")1939}1940&Inst::FpuCSel16 { rd, rn, rm, cond } => {1941let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);1942let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size16);1943let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size16);1944let cond = cond.pretty_print(0);1945format!("fcsel {rd}, {rn}, {rm}, {cond}")1946}1947&Inst::FpuCSel32 { rd, rn, rm, cond } => {1948let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);1949let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);1950let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32);1951let cond = cond.pretty_print(0);1952format!("fcsel {rd}, {rn}, {rm}, {cond}")1953}1954&Inst::FpuCSel64 { rd, rn, rm, cond } => {1955let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);1956let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64);1957let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64);1958let cond = cond.pretty_print(0);1959format!("fcsel {rd}, {rn}, {rm}, {cond}")1960}1961&Inst::FpuRound { op, rd, rn } => {1962let (inst, size) = match op {1963FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),1964FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),1965FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),1966FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),1967FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),1968FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),1969FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),1970FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),1971};1972let rd = pretty_print_vreg_scalar(rd.to_reg(), size);1973let rn = pretty_print_vreg_scalar(rn, size);1974format!("{inst} {rd}, {rn}")1975}1976&Inst::MovToFpu { rd, rn, size } => {1977let operand_size = size.operand_size();1978let rd = pretty_print_vreg_scalar(rd.to_reg(), size);1979let rn = pretty_print_ireg(rn, operand_size);1980format!("fmov {rd}, {rn}")1981}1982&Inst::FpuMoveFPImm { rd, imm, size } => {1983let imm = imm.pretty_print(0);1984let rd = pretty_print_vreg_scalar(rd.to_reg(), size);19851986format!("fmov {rd}, {imm}")1987}1988&Inst::MovToVec {1989rd,1990ri,1991rn,1992idx,1993size,1994} => {1995let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size.lane_size());1996let ri = pretty_print_vreg_element(ri, idx as usize, size.lane_size());1997let rn = pretty_print_ireg(rn, size.operand_size());1998format!("mov {rd}, {ri}, {rn}")1999}2000&Inst::MovFromVec { rd, rn, idx, size } => {2001let op = match size {2002ScalarSize::Size8 => "umov",2003ScalarSize::Size16 => "umov",2004ScalarSize::Size32 => "mov",2005ScalarSize::Size64 => "mov",2006_ => unimplemented!(),2007};2008let rd = pretty_print_ireg(rd.to_reg(), size.operand_size());2009let rn = pretty_print_vreg_element(rn, idx as usize, size);2010format!("{op} {rd}, {rn}")2011}2012&Inst::MovFromVecSigned {2013rd,2014rn,2015idx,2016size,2017scalar_size,2018} => {2019let rd = pretty_print_ireg(rd.to_reg(), scalar_size);2020let rn = pretty_print_vreg_element(rn, idx as usize, size.lane_size());2021format!("smov {rd}, {rn}")2022}2023&Inst::VecDup { rd, rn, size } => {2024let rd = pretty_print_vreg_vector(rd.to_reg(), size);2025let rn = pretty_print_ireg(rn, size.operand_size());2026format!("dup {rd}, {rn}")2027}2028&Inst::VecDupFromFpu { rd, rn, size, lane } => {2029let rd = pretty_print_vreg_vector(rd.to_reg(), size);2030let rn = pretty_print_vreg_element(rn, lane.into(), size.lane_size());2031format!("dup {rd}, {rn}")2032}2033&Inst::VecDupFPImm { rd, imm, size } => {2034let imm = imm.pretty_print(0);2035let rd = pretty_print_vreg_vector(rd.to_reg(), size);20362037format!("fmov {rd}, {imm}")2038}2039&Inst::VecDupImm {2040rd,2041imm,2042invert,2043size,2044} => {2045let imm = imm.pretty_print(0);2046let op = if invert { "mvni" } else { "movi" };2047let rd = pretty_print_vreg_vector(rd.to_reg(), size);20482049format!("{op} {rd}, {imm}")2050}2051&Inst::VecExtend {2052t,2053rd,2054rn,2055high_half,2056lane_size,2057} => {2058let vec64 = VectorSize::from_lane_size(lane_size.narrow(), false);2059let vec128 = VectorSize::from_lane_size(lane_size.narrow(), true);2060let rd_size = VectorSize::from_lane_size(lane_size, true);2061let (op, rn_size) = match (t, high_half) {2062(VecExtendOp::Sxtl, false) => ("sxtl", vec64),2063(VecExtendOp::Sxtl, true) => ("sxtl2", vec128),2064(VecExtendOp::Uxtl, false) => ("uxtl", vec64),2065(VecExtendOp::Uxtl, true) => ("uxtl2", vec128),2066};2067let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);2068let rn = pretty_print_vreg_vector(rn, rn_size);2069format!("{op} {rd}, {rn}")2070}2071&Inst::VecMovElement {2072rd,2073ri,2074rn,2075dest_idx,2076src_idx,2077size,2078} => {2079let rd =2080pretty_print_vreg_element(rd.to_reg(), dest_idx as usize, size.lane_size());2081let ri = pretty_print_vreg_element(ri, dest_idx as usize, size.lane_size());2082let rn = pretty_print_vreg_element(rn, src_idx as usize, size.lane_size());2083format!("mov {rd}, {ri}, {rn}")2084}2085&Inst::VecRRLong {2086op,2087rd,2088rn,2089high_half,2090} => {2091let (op, rd_size, size, suffix) = match (op, high_half) {2092(VecRRLongOp::Fcvtl16, false) => {2093("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "")2094}2095(VecRRLongOp::Fcvtl16, true) => {2096("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "")2097}2098(VecRRLongOp::Fcvtl32, false) => {2099("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "")2100}2101(VecRRLongOp::Fcvtl32, true) => {2102("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "")2103}2104(VecRRLongOp::Shll8, false) => {2105("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8")2106}2107(VecRRLongOp::Shll8, true) => {2108("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8")2109}2110(VecRRLongOp::Shll16, false) => {2111("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16")2112}2113(VecRRLongOp::Shll16, true) => {2114("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16")2115}2116(VecRRLongOp::Shll32, false) => {2117("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32")2118}2119(VecRRLongOp::Shll32, true) => {2120("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32")2121}2122};2123let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);2124let rn = pretty_print_vreg_vector(rn, size);21252126format!("{op} {rd}, {rn}{suffix}")2127}2128&Inst::VecRRNarrowLow {2129op,2130rd,2131rn,2132lane_size,2133..2134}2135| &Inst::VecRRNarrowHigh {2136op,2137rd,2138rn,2139lane_size,2140..2141} => {2142let vec64 = VectorSize::from_lane_size(lane_size, false);2143let vec128 = VectorSize::from_lane_size(lane_size, true);2144let rn_size = VectorSize::from_lane_size(lane_size.widen(), true);2145let high_half = match self {2146&Inst::VecRRNarrowLow { .. } => false,2147&Inst::VecRRNarrowHigh { .. } => true,2148_ => unreachable!(),2149};2150let (op, rd_size) = match (op, high_half) {2151(VecRRNarrowOp::Xtn, false) => ("xtn", vec64),2152(VecRRNarrowOp::Xtn, true) => ("xtn2", vec128),2153(VecRRNarrowOp::Sqxtn, false) => ("sqxtn", vec64),2154(VecRRNarrowOp::Sqxtn, true) => ("sqxtn2", vec128),2155(VecRRNarrowOp::Sqxtun, false) => ("sqxtun", vec64),2156(VecRRNarrowOp::Sqxtun, true) => ("sqxtun2", vec128),2157(VecRRNarrowOp::Uqxtn, false) => ("uqxtn", vec64),2158(VecRRNarrowOp::Uqxtn, true) => ("uqxtn2", vec128),2159(VecRRNarrowOp::Fcvtn, false) => ("fcvtn", vec64),2160(VecRRNarrowOp::Fcvtn, true) => ("fcvtn2", vec128),2161};2162let rn = pretty_print_vreg_vector(rn, rn_size);2163let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size);2164let ri = match self {2165&Inst::VecRRNarrowLow { .. } => "".to_string(),2166&Inst::VecRRNarrowHigh { ri, .. } => {2167format!("{}, ", pretty_print_vreg_vector(ri, rd_size))2168}2169_ => unreachable!(),2170};21712172format!("{op} {rd}, {ri}{rn}")2173}2174&Inst::VecRRPair { op, rd, rn } => {2175let op = match op {2176VecPairOp::Addp => "addp",2177};2178let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64);2179let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2);21802181format!("{op} {rd}, {rn}")2182}2183&Inst::VecRRPairLong { op, rd, rn } => {2184let (op, dest, src) = match op {2185VecRRPairLongOp::Saddlp8 => {2186("saddlp", VectorSize::Size16x8, VectorSize::Size8x16)2187}2188VecRRPairLongOp::Saddlp16 => {2189("saddlp", VectorSize::Size32x4, VectorSize::Size16x8)2190}2191VecRRPairLongOp::Uaddlp8 => {2192("uaddlp", VectorSize::Size16x8, VectorSize::Size8x16)2193}2194VecRRPairLongOp::Uaddlp16 => {2195("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8)2196}2197};2198let rd = pretty_print_vreg_vector(rd.to_reg(), dest);2199let rn = pretty_print_vreg_vector(rn, src);22002201format!("{op} {rd}, {rn}")2202}2203&Inst::VecRRR {2204rd,2205rn,2206rm,2207alu_op,2208size,2209} => {2210let (op, size) = match alu_op {2211VecALUOp::Sqadd => ("sqadd", size),2212VecALUOp::Uqadd => ("uqadd", size),2213VecALUOp::Sqsub => ("sqsub", size),2214VecALUOp::Uqsub => ("uqsub", size),2215VecALUOp::Cmeq => ("cmeq", size),2216VecALUOp::Cmge => ("cmge", size),2217VecALUOp::Cmgt => ("cmgt", size),2218VecALUOp::Cmhs => ("cmhs", size),2219VecALUOp::Cmhi => ("cmhi", size),2220VecALUOp::Fcmeq => ("fcmeq", size),2221VecALUOp::Fcmgt => ("fcmgt", size),2222VecALUOp::Fcmge => ("fcmge", size),2223VecALUOp::Umaxp => ("umaxp", size),2224VecALUOp::Add => ("add", size),2225VecALUOp::Sub => ("sub", size),2226VecALUOp::Mul => ("mul", size),2227VecALUOp::Sshl => ("sshl", size),2228VecALUOp::Ushl => ("ushl", size),2229VecALUOp::Umin => ("umin", size),2230VecALUOp::Smin => ("smin", size),2231VecALUOp::Umax => ("umax", size),2232VecALUOp::Smax => ("smax", size),2233VecALUOp::Urhadd => ("urhadd", size),2234VecALUOp::Fadd => ("fadd", size),2235VecALUOp::Fsub => ("fsub", size),2236VecALUOp::Fdiv => ("fdiv", size),2237VecALUOp::Fmax => ("fmax", size),2238VecALUOp::Fmin => ("fmin", size),2239VecALUOp::Fmul => ("fmul", size),2240VecALUOp::Addp => ("addp", size),2241VecALUOp::Zip1 => ("zip1", size),2242VecALUOp::Zip2 => ("zip2", size),2243VecALUOp::Sqrdmulh => ("sqrdmulh", size),2244VecALUOp::Uzp1 => ("uzp1", size),2245VecALUOp::Uzp2 => ("uzp2", size),2246VecALUOp::Trn1 => ("trn1", size),2247VecALUOp::Trn2 => ("trn2", size),22482249// Lane division does not affect bitwise operations.2250// However, when printing, use 8-bit lane division to conform to ARM formatting.2251VecALUOp::And => ("and", size.as_scalar8_vector()),2252VecALUOp::Bic => ("bic", size.as_scalar8_vector()),2253VecALUOp::Orr => ("orr", size.as_scalar8_vector()),2254VecALUOp::Orn => ("orn", size.as_scalar8_vector()),2255VecALUOp::Eor => ("eor", size.as_scalar8_vector()),2256};2257let rd = pretty_print_vreg_vector(rd.to_reg(), size);2258let rn = pretty_print_vreg_vector(rn, size);2259let rm = pretty_print_vreg_vector(rm, size);2260format!("{op} {rd}, {rn}, {rm}")2261}2262&Inst::VecRRRMod {2263rd,2264ri,2265rn,2266rm,2267alu_op,2268size,2269} => {2270let (op, size) = match alu_op {2271VecALUModOp::Bsl => ("bsl", VectorSize::Size8x16),2272VecALUModOp::Fmla => ("fmla", size),2273VecALUModOp::Fmls => ("fmls", size),2274};2275let rd = pretty_print_vreg_vector(rd.to_reg(), size);2276let ri = pretty_print_vreg_vector(ri, size);2277let rn = pretty_print_vreg_vector(rn, size);2278let rm = pretty_print_vreg_vector(rm, size);2279format!("{op} {rd}, {ri}, {rn}, {rm}")2280}2281&Inst::VecFmlaElem {2282rd,2283ri,2284rn,2285rm,2286alu_op,2287size,2288idx,2289} => {2290let (op, size) = match alu_op {2291VecALUModOp::Fmla => ("fmla", size),2292VecALUModOp::Fmls => ("fmls", size),2293_ => unreachable!(),2294};2295let rd = pretty_print_vreg_vector(rd.to_reg(), size);2296let ri = pretty_print_vreg_vector(ri, size);2297let rn = pretty_print_vreg_vector(rn, size);2298let rm = pretty_print_vreg_element(rm, idx.into(), size.lane_size());2299format!("{op} {rd}, {ri}, {rn}, {rm}")2300}2301&Inst::VecRRRLong {2302rd,2303rn,2304rm,2305alu_op,2306high_half,2307} => {2308let (op, dest_size, src_size) = match (alu_op, high_half) {2309(VecRRRLongOp::Smull8, false) => {2310("smull", VectorSize::Size16x8, VectorSize::Size8x8)2311}2312(VecRRRLongOp::Smull8, true) => {2313("smull2", VectorSize::Size16x8, VectorSize::Size8x16)2314}2315(VecRRRLongOp::Smull16, false) => {2316("smull", VectorSize::Size32x4, VectorSize::Size16x4)2317}2318(VecRRRLongOp::Smull16, true) => {2319("smull2", VectorSize::Size32x4, VectorSize::Size16x8)2320}2321(VecRRRLongOp::Smull32, false) => {2322("smull", VectorSize::Size64x2, VectorSize::Size32x2)2323}2324(VecRRRLongOp::Smull32, true) => {2325("smull2", VectorSize::Size64x2, VectorSize::Size32x4)2326}2327(VecRRRLongOp::Umull8, false) => {2328("umull", VectorSize::Size16x8, VectorSize::Size8x8)2329}2330(VecRRRLongOp::Umull8, true) => {2331("umull2", VectorSize::Size16x8, VectorSize::Size8x16)2332}2333(VecRRRLongOp::Umull16, false) => {2334("umull", VectorSize::Size32x4, VectorSize::Size16x4)2335}2336(VecRRRLongOp::Umull16, true) => {2337("umull2", VectorSize::Size32x4, VectorSize::Size16x8)2338}2339(VecRRRLongOp::Umull32, false) => {2340("umull", VectorSize::Size64x2, VectorSize::Size32x2)2341}2342(VecRRRLongOp::Umull32, true) => {2343("umull2", VectorSize::Size64x2, VectorSize::Size32x4)2344}2345};2346let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);2347let rn = pretty_print_vreg_vector(rn, src_size);2348let rm = pretty_print_vreg_vector(rm, src_size);2349format!("{op} {rd}, {rn}, {rm}")2350}2351&Inst::VecRRRLongMod {2352rd,2353ri,2354rn,2355rm,2356alu_op,2357high_half,2358} => {2359let (op, dest_size, src_size) = match (alu_op, high_half) {2360(VecRRRLongModOp::Umlal8, false) => {2361("umlal", VectorSize::Size16x8, VectorSize::Size8x8)2362}2363(VecRRRLongModOp::Umlal8, true) => {2364("umlal2", VectorSize::Size16x8, VectorSize::Size8x16)2365}2366(VecRRRLongModOp::Umlal16, false) => {2367("umlal", VectorSize::Size32x4, VectorSize::Size16x4)2368}2369(VecRRRLongModOp::Umlal16, true) => {2370("umlal2", VectorSize::Size32x4, VectorSize::Size16x8)2371}2372(VecRRRLongModOp::Umlal32, false) => {2373("umlal", VectorSize::Size64x2, VectorSize::Size32x2)2374}2375(VecRRRLongModOp::Umlal32, true) => {2376("umlal2", VectorSize::Size64x2, VectorSize::Size32x4)2377}2378};2379let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size);2380let ri = pretty_print_vreg_vector(ri, dest_size);2381let rn = pretty_print_vreg_vector(rn, src_size);2382let rm = pretty_print_vreg_vector(rm, src_size);2383format!("{op} {rd}, {ri}, {rn}, {rm}")2384}2385&Inst::VecMisc { op, rd, rn, size } => {2386let (op, size, suffix) = match op {2387VecMisc2::Neg => ("neg", size, ""),2388VecMisc2::Abs => ("abs", size, ""),2389VecMisc2::Fabs => ("fabs", size, ""),2390VecMisc2::Fneg => ("fneg", size, ""),2391VecMisc2::Fsqrt => ("fsqrt", size, ""),2392VecMisc2::Rev16 => ("rev16", size, ""),2393VecMisc2::Rev32 => ("rev32", size, ""),2394VecMisc2::Rev64 => ("rev64", size, ""),2395VecMisc2::Fcvtzs => ("fcvtzs", size, ""),2396VecMisc2::Fcvtzu => ("fcvtzu", size, ""),2397VecMisc2::Scvtf => ("scvtf", size, ""),2398VecMisc2::Ucvtf => ("ucvtf", size, ""),2399VecMisc2::Frintn => ("frintn", size, ""),2400VecMisc2::Frintz => ("frintz", size, ""),2401VecMisc2::Frintm => ("frintm", size, ""),2402VecMisc2::Frintp => ("frintp", size, ""),2403VecMisc2::Cnt => ("cnt", size, ""),2404VecMisc2::Cmeq0 => ("cmeq", size, ", #0"),2405VecMisc2::Cmge0 => ("cmge", size, ", #0"),2406VecMisc2::Cmgt0 => ("cmgt", size, ", #0"),2407VecMisc2::Cmle0 => ("cmle", size, ", #0"),2408VecMisc2::Cmlt0 => ("cmlt", size, ", #0"),2409VecMisc2::Fcmeq0 => ("fcmeq", size, ", #0.0"),2410VecMisc2::Fcmge0 => ("fcmge", size, ", #0.0"),2411VecMisc2::Fcmgt0 => ("fcmgt", size, ", #0.0"),2412VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"),2413VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"),24142415// Lane division does not affect bitwise operations.2416// However, when printing, use 8-bit lane division to conform to ARM formatting.2417VecMisc2::Not => ("mvn", size.as_scalar8_vector(), ""),2418};2419let rd = pretty_print_vreg_vector(rd.to_reg(), size);2420let rn = pretty_print_vreg_vector(rn, size);2421format!("{op} {rd}, {rn}{suffix}")2422}2423&Inst::VecLanes { op, rd, rn, size } => {2424let op = match op {2425VecLanesOp::Uminv => "uminv",2426VecLanesOp::Addv => "addv",2427};2428let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size());2429let rn = pretty_print_vreg_vector(rn, size);2430format!("{op} {rd}, {rn}")2431}2432&Inst::VecShiftImm {2433op,2434rd,2435rn,2436size,2437imm,2438} => {2439let op = match op {2440VecShiftImmOp::Shl => "shl",2441VecShiftImmOp::Ushr => "ushr",2442VecShiftImmOp::Sshr => "sshr",2443};2444let rd = pretty_print_vreg_vector(rd.to_reg(), size);2445let rn = pretty_print_vreg_vector(rn, size);2446format!("{op} {rd}, {rn}, #{imm}")2447}2448&Inst::VecShiftImmMod {2449op,2450rd,2451ri,2452rn,2453size,2454imm,2455} => {2456let op = match op {2457VecShiftImmModOp::Sli => "sli",2458};2459let rd = pretty_print_vreg_vector(rd.to_reg(), size);2460let ri = pretty_print_vreg_vector(ri, size);2461let rn = pretty_print_vreg_vector(rn, size);2462format!("{op} {rd}, {ri}, {rn}, #{imm}")2463}2464&Inst::VecExtract { rd, rn, rm, imm4 } => {2465let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);2466let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);2467let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);2468format!("ext {rd}, {rn}, {rm}, #{imm4}")2469}2470&Inst::VecTbl { rd, rn, rm } => {2471let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);2472let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);2473let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);2474format!("tbl {rd}, {{ {rn} }}, {rm}")2475}2476&Inst::VecTblExt { rd, ri, rn, rm } => {2477let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);2478let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);2479let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);2480let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);2481format!("tbx {rd}, {ri}, {{ {rn} }}, {rm}")2482}2483&Inst::VecTbl2 { rd, rn, rn2, rm } => {2484let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);2485let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);2486let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);2487let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);2488format!("tbl {rd}, {{ {rn}, {rn2} }}, {rm}")2489}2490&Inst::VecTbl2Ext {2491rd,2492ri,2493rn,2494rn2,2495rm,2496} => {2497let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);2498let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16);2499let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);2500let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);2501let ri = pretty_print_vreg_vector(ri, VectorSize::Size8x16);2502format!("tbx {rd}, {ri}, {{ {rn}, {rn2} }}, {rm}")2503}2504&Inst::VecLoadReplicate { rd, rn, size, .. } => {2505let rd = pretty_print_vreg_vector(rd.to_reg(), size);2506let rn = pretty_print_reg(rn);25072508format!("ld1r {{ {rd} }}, [{rn}]")2509}2510&Inst::VecCSel { rd, rn, rm, cond } => {2511let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16);2512let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16);2513let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16);2514let cond = cond.pretty_print(0);2515format!("vcsel {rd}, {rn}, {rm}, {cond} (if-then-else diamond)")2516}2517&Inst::MovToNZCV { rn } => {2518let rn = pretty_print_reg(rn);2519format!("msr nzcv, {rn}")2520}2521&Inst::MovFromNZCV { rd } => {2522let rd = pretty_print_reg(rd.to_reg());2523format!("mrs {rd}, nzcv")2524}2525&Inst::Extend {2526rd,2527rn,2528signed: false,2529from_bits: 1,2530..2531} => {2532let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);2533let rn = pretty_print_ireg(rn, OperandSize::Size32);2534format!("and {rd}, {rn}, #1")2535}2536&Inst::Extend {2537rd,2538rn,2539signed: false,2540from_bits: 32,2541to_bits: 64,2542} => {2543// The case of a zero extension from 32 to 64 bits, is implemented2544// with a "mov" to a 32-bit (W-reg) dest, because this zeroes2545// the top 32 bits.2546let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32);2547let rn = pretty_print_ireg(rn, OperandSize::Size32);2548format!("mov {rd}, {rn}")2549}2550&Inst::Extend {2551rd,2552rn,2553signed,2554from_bits,2555to_bits,2556} => {2557assert!(from_bits <= to_bits);2558let op = match (signed, from_bits) {2559(false, 8) => "uxtb",2560(true, 8) => "sxtb",2561(false, 16) => "uxth",2562(true, 16) => "sxth",2563(true, 32) => "sxtw",2564(true, _) => "sbfx",2565(false, _) => "ubfx",2566};2567if op == "sbfx" || op == "ubfx" {2568let dest_size = OperandSize::from_bits(to_bits);2569let rd = pretty_print_ireg(rd.to_reg(), dest_size);2570let rn = pretty_print_ireg(rn, dest_size);2571format!("{op} {rd}, {rn}, #0, #{from_bits}")2572} else {2573let dest_size = if signed {2574OperandSize::from_bits(to_bits)2575} else {2576OperandSize::Size322577};2578let rd = pretty_print_ireg(rd.to_reg(), dest_size);2579let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits));2580format!("{op} {rd}, {rn}")2581}2582}2583&Inst::Call { ref info } => {2584let try_call = info2585.try_call_info2586.as_ref()2587.map(|tci| pretty_print_try_call(tci))2588.unwrap_or_default();2589format!("bl 0{try_call}")2590}2591&Inst::CallInd { ref info } => {2592let rn = pretty_print_reg(info.dest);2593let try_call = info2594.try_call_info2595.as_ref()2596.map(|tci| pretty_print_try_call(tci))2597.unwrap_or_default();2598format!("blr {rn}{try_call}")2599}2600&Inst::ReturnCall { ref info } => {2601let mut s = format!(2602"return_call {:?} new_stack_arg_size:{}",2603info.dest, info.new_stack_arg_size2604);2605for ret in &info.uses {2606let preg = pretty_print_reg(ret.preg);2607let vreg = pretty_print_reg(ret.vreg);2608write!(&mut s, " {vreg}={preg}").unwrap();2609}2610s2611}2612&Inst::ReturnCallInd { ref info } => {2613let callee = pretty_print_reg(info.dest);2614let mut s = format!(2615"return_call_ind {callee} new_stack_arg_size:{}",2616info.new_stack_arg_size2617);2618for ret in &info.uses {2619let preg = pretty_print_reg(ret.preg);2620let vreg = pretty_print_reg(ret.vreg);2621write!(&mut s, " {vreg}={preg}").unwrap();2622}2623s2624}2625&Inst::Args { ref args } => {2626let mut s = "args".to_string();2627for arg in args {2628let preg = pretty_print_reg(arg.preg);2629let def = pretty_print_reg(arg.vreg.to_reg());2630write!(&mut s, " {def}={preg}").unwrap();2631}2632s2633}2634&Inst::Rets { ref rets } => {2635let mut s = "rets".to_string();2636for ret in rets {2637let preg = pretty_print_reg(ret.preg);2638let vreg = pretty_print_reg(ret.vreg);2639write!(&mut s, " {vreg}={preg}").unwrap();2640}2641s2642}2643&Inst::Ret {} => "ret".to_string(),2644&Inst::AuthenticatedRet { key, is_hint } => {2645let key = match key {2646APIKey::AZ => "az",2647APIKey::BZ => "bz",2648APIKey::ASP => "asp",2649APIKey::BSP => "bsp",2650};2651match is_hint {2652false => format!("reta{key}"),2653true => format!("auti{key} ; ret"),2654}2655}2656&Inst::Jump { ref dest } => {2657let dest = dest.pretty_print(0);2658format!("b {dest}")2659}2660&Inst::CondBr {2661ref taken,2662ref not_taken,2663ref kind,2664} => {2665let taken = taken.pretty_print(0);2666let not_taken = not_taken.pretty_print(0);2667match kind {2668&CondBrKind::Zero(reg, size) => {2669let reg = pretty_print_reg_sized(reg, size);2670format!("cbz {reg}, {taken} ; b {not_taken}")2671}2672&CondBrKind::NotZero(reg, size) => {2673let reg = pretty_print_reg_sized(reg, size);2674format!("cbnz {reg}, {taken} ; b {not_taken}")2675}2676&CondBrKind::Cond(c) => {2677let c = c.pretty_print(0);2678format!("b.{c} {taken} ; b {not_taken}")2679}2680}2681}2682&Inst::TestBitAndBranch {2683kind,2684ref taken,2685ref not_taken,2686rn,2687bit,2688} => {2689let cond = match kind {2690TestBitAndBranchKind::Z => "z",2691TestBitAndBranchKind::NZ => "nz",2692};2693let taken = taken.pretty_print(0);2694let not_taken = not_taken.pretty_print(0);2695let rn = pretty_print_reg(rn);2696format!("tb{cond} {rn}, #{bit}, {taken} ; b {not_taken}")2697}2698&Inst::IndirectBr { rn, .. } => {2699let rn = pretty_print_reg(rn);2700format!("br {rn}")2701}2702&Inst::Brk => "brk #0xf000".to_string(),2703&Inst::Udf { .. } => "udf #0xc11f".to_string(),2704&Inst::TrapIf {2705ref kind,2706trap_code,2707} => match kind {2708&CondBrKind::Zero(reg, size) => {2709let reg = pretty_print_reg_sized(reg, size);2710format!("cbz {reg}, #trap={trap_code}")2711}2712&CondBrKind::NotZero(reg, size) => {2713let reg = pretty_print_reg_sized(reg, size);2714format!("cbnz {reg}, #trap={trap_code}")2715}2716&CondBrKind::Cond(c) => {2717let c = c.pretty_print(0);2718format!("b.{c} #trap={trap_code}")2719}2720},2721&Inst::Adr { rd, off } => {2722let rd = pretty_print_reg(rd.to_reg());2723format!("adr {rd}, pc+{off}")2724}2725&Inst::Adrp { rd, off } => {2726let rd = pretty_print_reg(rd.to_reg());2727// This instruction addresses 4KiB pages, so multiply it by the page size.2728let byte_offset = off * 4096;2729format!("adrp {rd}, pc+{byte_offset}")2730}2731&Inst::Word4 { data } => format!("data.i32 {data}"),2732&Inst::Word8 { data } => format!("data.i64 {data}"),2733&Inst::JTSequence {2734default,2735ref targets,2736ridx,2737rtmp1,2738rtmp2,2739..2740} => {2741let ridx = pretty_print_reg(ridx);2742let rtmp1 = pretty_print_reg(rtmp1.to_reg());2743let rtmp2 = pretty_print_reg(rtmp2.to_reg());2744let default_target = BranchTarget::Label(default).pretty_print(0);2745format!(2746concat!(2747"b.hs {} ; ",2748"csel {}, xzr, {}, hs ; ",2749"csdb ; ",2750"adr {}, pc+16 ; ",2751"ldrsw {}, [{}, {}, uxtw #2] ; ",2752"add {}, {}, {} ; ",2753"br {} ; ",2754"jt_entries {:?}"2755),2756default_target,2757rtmp2,2758ridx,2759rtmp1,2760rtmp2,2761rtmp1,2762rtmp2,2763rtmp1,2764rtmp1,2765rtmp2,2766rtmp1,2767targets2768)2769}2770&Inst::LoadExtNameGot { rd, ref name } => {2771let rd = pretty_print_reg(rd.to_reg());2772format!("load_ext_name_got {rd}, {name:?}")2773}2774&Inst::LoadExtNameNear {2775rd,2776ref name,2777offset,2778} => {2779let rd = pretty_print_reg(rd.to_reg());2780format!("load_ext_name_near {rd}, {name:?}+{offset}")2781}2782&Inst::LoadExtNameFar {2783rd,2784ref name,2785offset,2786} => {2787let rd = pretty_print_reg(rd.to_reg());2788format!("load_ext_name_far {rd}, {name:?}+{offset}")2789}2790&Inst::LoadAddr { rd, ref mem } => {2791// TODO: we really should find a better way to avoid duplication of2792// this logic between `emit()` and `show_rru()` -- a separate 1-to-N2793// expansion stage (i.e., legalization, but without the slow edit-in-place2794// of the existing legalization framework).2795let mem = mem.clone();2796let (mem_insts, mem) = mem_finalize(None, &mem, I8, state);2797let mut ret = String::new();2798for inst in mem_insts.into_iter() {2799ret.push_str(&inst.print_with_state(&mut EmitState::default()));2800}2801let (reg, index_reg, offset) = match mem {2802AMode::RegExtended { rn, rm, extendop } => (rn, Some((rm, extendop)), 0),2803AMode::Unscaled { rn, simm9 } => (rn, None, simm9.value()),2804AMode::UnsignedOffset { rn, uimm12 } => (rn, None, uimm12.value() as i32),2805_ => panic!("Unsupported case for LoadAddr: {mem:?}"),2806};2807let abs_offset = if offset < 0 {2808-offset as u642809} else {2810offset as u642811};2812let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };28132814if let Some((idx, extendop)) = index_reg {2815let add = Inst::AluRRRExtend {2816alu_op: ALUOp::Add,2817size: OperandSize::Size64,2818rd,2819rn: reg,2820rm: idx,2821extendop,2822};28232824ret.push_str(&add.print_with_state(&mut EmitState::default()));2825} else if offset == 0 {2826let mov = Inst::gen_move(rd, reg, I64);2827ret.push_str(&mov.print_with_state(&mut EmitState::default()));2828} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {2829let add = Inst::AluRRImm12 {2830alu_op,2831size: OperandSize::Size64,2832rd,2833rn: reg,2834imm12,2835};2836ret.push_str(&add.print_with_state(&mut EmitState::default()));2837} else {2838let tmp = writable_spilltmp_reg();2839for inst in Inst::load_constant(tmp, abs_offset).into_iter() {2840ret.push_str(&inst.print_with_state(&mut EmitState::default()));2841}2842let add = Inst::AluRRR {2843alu_op,2844size: OperandSize::Size64,2845rd,2846rn: reg,2847rm: tmp.to_reg(),2848};2849ret.push_str(&add.print_with_state(&mut EmitState::default()));2850}2851ret2852}2853&Inst::Paci { key } => {2854let key = match key {2855APIKey::AZ => "az",2856APIKey::BZ => "bz",2857APIKey::ASP => "asp",2858APIKey::BSP => "bsp",2859};28602861"paci".to_string() + key2862}2863&Inst::Xpaclri => "xpaclri".to_string(),2864&Inst::Bti { targets } => {2865let targets = match targets {2866BranchTargetType::None => "",2867BranchTargetType::C => " c",2868BranchTargetType::J => " j",2869BranchTargetType::JC => " jc",2870};28712872"bti".to_string() + targets2873}2874&Inst::EmitIsland { needed_space } => format!("emit_island {needed_space}"),28752876&Inst::ElfTlsGetAddr {2877ref symbol,2878rd,2879tmp,2880} => {2881let rd = pretty_print_reg(rd.to_reg());2882let tmp = pretty_print_reg(tmp.to_reg());2883format!("elf_tls_get_addr {}, {}, {}", rd, tmp, symbol.display(None))2884}2885&Inst::MachOTlsGetAddr { ref symbol, rd } => {2886let rd = pretty_print_reg(rd.to_reg());2887format!("macho_tls_get_addr {}, {}", rd, symbol.display(None))2888}2889&Inst::Unwind { ref inst } => {2890format!("unwind {inst:?}")2891}2892&Inst::DummyUse { reg } => {2893let reg = pretty_print_reg(reg);2894format!("dummy_use {reg}")2895}2896&Inst::LabelAddress { dst, label } => {2897let dst = pretty_print_reg(dst.to_reg());2898format!("label_address {dst}, {label:?}")2899}2900&Inst::SequencePoint {} => {2901format!("sequence_point")2902}2903&Inst::StackProbeLoop { start, end, step } => {2904let start = pretty_print_reg(start.to_reg());2905let end = pretty_print_reg(end);2906let step = step.pretty_print(0);2907format!("stack_probe_loop {start}, {end}, {step}")2908}2909}2910}2911}29122913//=============================================================================2914// Label fixups and jump veneers.29152916/// Different forms of label references for different instruction formats.2917#[derive(Clone, Copy, Debug, PartialEq, Eq)]2918pub enum LabelUse {2919/// 14-bit branch offset (conditional branches). PC-rel, offset is imm <<2920/// 2. Immediate is 14 signed bits, in bits 18:5. Used by tbz and tbnz.2921Branch14,2922/// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 192923/// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.2924Branch19,2925/// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 262926/// signed bits, in bits 25:0. Used by b, bl.2927Branch26,2928/// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,2929/// in bits 23:5.2930Ldr19,2931/// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is2932/// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.2933Adr21,2934/// 32-bit PC relative constant offset (from address of constant itself),2935/// signed. Used in jump tables.2936PCRel32,2937}29382939impl MachInstLabelUse for LabelUse {2940/// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.2941const ALIGN: CodeOffset = 4;29422943/// Maximum PC-relative range (positive), inclusive.2944fn max_pos_range(self) -> CodeOffset {2945match self {2946// N-bit immediate, left-shifted by 2, for (N+2) bits of total2947// range. Signed, so +2^(N+1) from zero. Likewise for two other2948// shifted cases below.2949LabelUse::Branch14 => (1 << 15) - 1,2950LabelUse::Branch19 => (1 << 20) - 1,2951LabelUse::Branch26 => (1 << 27) - 1,2952LabelUse::Ldr19 => (1 << 20) - 1,2953// Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total2954// range.2955LabelUse::Adr21 => (1 << 20) - 1,2956LabelUse::PCRel32 => 0x7fffffff,2957}2958}29592960/// Maximum PC-relative range (negative).2961fn max_neg_range(self) -> CodeOffset {2962// All forms are twos-complement signed offsets, so negative limit is one more than2963// positive limit.2964self.max_pos_range() + 12965}29662967/// Size of window into code needed to do the patch.2968fn patch_size(self) -> CodeOffset {2969// Patch is on one instruction only for all of these label reference types.297042971}29722973/// Perform the patch.2974fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {2975let pc_rel = (label_offset as i64) - (use_offset as i64);2976debug_assert!(pc_rel <= self.max_pos_range() as i64);2977debug_assert!(pc_rel >= -(self.max_neg_range() as i64));2978let pc_rel = pc_rel as u32;2979let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);2980let mask = match self {2981LabelUse::Branch14 => 0x0007ffe0, // bits 18..5 inclusive2982LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive2983LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive2984LabelUse::Ldr19 => 0x00ffffe0, // bits 23..5 inclusive2985LabelUse::Adr21 => 0x60ffffe0, // bits 30..29, 25..5 inclusive2986LabelUse::PCRel32 => 0xffffffff,2987};2988let pc_rel_shifted = match self {2989LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,2990_ => {2991debug_assert!(pc_rel & 3 == 0);2992pc_rel >> 22993}2994};2995let pc_rel_inserted = match self {2996LabelUse::Branch14 => (pc_rel_shifted & 0x3fff) << 5,2997LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,2998LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,2999// Note: the *low* two bits of offset are put in the3000// *high* bits (30, 29).3001LabelUse::Adr21 => (pc_rel_shifted & 0x1ffffc) << 3 | (pc_rel_shifted & 3) << 29,3002LabelUse::PCRel32 => pc_rel_shifted,3003};3004let is_add = match self {3005LabelUse::PCRel32 => true,3006_ => false,3007};3008let insn_word = if is_add {3009insn_word.wrapping_add(pc_rel_inserted)3010} else {3011(insn_word & !mask) | pc_rel_inserted3012};3013buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));3014}30153016/// Is a veneer supported for this label reference type?3017fn supports_veneer(self) -> bool {3018match self {3019LabelUse::Branch14 | LabelUse::Branch19 => true, // veneer is a Branch263020LabelUse::Branch26 => true, // veneer is a PCRel323021_ => false,3022}3023}30243025/// How large is the veneer, if supported?3026fn veneer_size(self) -> CodeOffset {3027match self {3028LabelUse::Branch14 | LabelUse::Branch19 => 4,3029LabelUse::Branch26 => 20,3030_ => unreachable!(),3031}3032}30333034fn worst_case_veneer_size() -> CodeOffset {3035203036}30373038/// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return3039/// an offset and label-use for the veneer's use of the original label.3040fn generate_veneer(3041self,3042buffer: &mut [u8],3043veneer_offset: CodeOffset,3044) -> (CodeOffset, LabelUse) {3045match self {3046LabelUse::Branch14 | LabelUse::Branch19 => {3047// veneer is a Branch26 (unconditional branch). Just encode directly here -- don't3048// bother with constructing an Inst.3049let insn_word = 0b000101 << 26;3050buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));3051(veneer_offset, LabelUse::Branch26)3052}30533054// This is promoting a 26-bit call/jump to a 32-bit call/jump to3055// get a further range. This jump translates to a jump to a3056// relative location based on the address of the constant loaded3057// from here.3058//3059// If this path is taken from a call instruction then caller-saved3060// registers are available (minus arguments), so x16/x17 are3061// available. Otherwise for intra-function jumps we also reserve3062// x16/x17 as spill-style registers. In both cases these are3063// available for us to use.3064LabelUse::Branch26 => {3065let tmp1 = regs::spilltmp_reg();3066let tmp1_w = regs::writable_spilltmp_reg();3067let tmp2 = regs::tmp2_reg();3068let tmp2_w = regs::writable_tmp2_reg();3069// ldrsw x16, 163070let ldr = emit::enc_ldst_imm19(0b1001_1000, 16 / 4, tmp1);3071// adr x17, 123072let adr = emit::enc_adr(12, tmp2_w);3073// add x16, x16, x173074let add = emit::enc_arith_rrr(0b10001011_000, 0, tmp1_w, tmp1, tmp2);3075// br x163076let br = emit::enc_br(tmp1);3077buffer[0..4].clone_from_slice(&u32::to_le_bytes(ldr));3078buffer[4..8].clone_from_slice(&u32::to_le_bytes(adr));3079buffer[8..12].clone_from_slice(&u32::to_le_bytes(add));3080buffer[12..16].clone_from_slice(&u32::to_le_bytes(br));3081// the 4-byte signed immediate we'll load is after these3082// instructions, 16-bytes in.3083(veneer_offset + 16, LabelUse::PCRel32)3084}30853086_ => panic!("Unsupported label-reference type for veneer generation!"),3087}3088}30893090fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {3091match (reloc, addend) {3092(Reloc::Arm64Call, 0) => Some(LabelUse::Branch26),3093_ => None,3094}3095}3096}30973098#[cfg(test)]3099mod tests {3100use super::*;31013102#[test]3103fn inst_size_test() {3104// This test will help with unintentionally growing the size3105// of the Inst enum.3106let expected = if cfg!(target_pointer_width = "32") && !cfg!(target_arch = "arm") {3107283108} else {3109323110};3111assert_eq!(expected, core::mem::size_of::<Inst>());3112}3113}311431153116