Path: blob/main/cranelift/codegen/src/isa/aarch64/inst/emit.rs
1693 views
//! AArch64 ISA: binary code emission.12use cranelift_control::ControlPlane;34use crate::ir::{self, types::*};5use crate::isa::aarch64::inst::*;6use crate::trace;78/// Memory addressing mode finalization: convert "special" modes (e.g.,9/// generic arbitrary stack offset) into real addressing modes, possibly by10/// emitting some helper instructions that come immediately before the use11/// of this amode.12pub fn mem_finalize(13sink: Option<&mut MachBuffer<Inst>>,14mem: &AMode,15access_ty: Type,16state: &EmitState,17) -> (SmallVec<[Inst; 4]>, AMode) {18match mem {19&AMode::RegOffset { off, .. }20| &AMode::SPOffset { off }21| &AMode::FPOffset { off }22| &AMode::IncomingArg { off }23| &AMode::SlotOffset { off } => {24let basereg = match mem {25&AMode::RegOffset { rn, .. } => rn,26&AMode::SPOffset { .. }27| &AMode::SlotOffset { .. }28| &AMode::IncomingArg { .. } => stack_reg(),29&AMode::FPOffset { .. } => fp_reg(),30_ => unreachable!(),31};32let off = match mem {33&AMode::IncomingArg { .. } => {34let frame_layout = state.frame_layout();35i64::from(36frame_layout.setup_area_size37+ frame_layout.tail_args_size38+ frame_layout.clobber_size39+ frame_layout.fixed_frame_storage_size40+ frame_layout.outgoing_args_size,41) - off42}43&AMode::SlotOffset { .. } => {44let adj = i64::from(state.frame_layout().outgoing_args_size);45trace!(46"mem_finalize: slot offset {} + adj {} -> {}",47off,48adj,49off + adj50);51off + adj52}53_ => off,54};5556if let Some(simm9) = SImm9::maybe_from_i64(off) {57let mem = AMode::Unscaled { rn: basereg, simm9 };58(smallvec![], mem)59} else if let Some(uimm12) = UImm12Scaled::maybe_from_i64(off, access_ty) {60let mem = AMode::UnsignedOffset {61rn: basereg,62uimm12,63};64(smallvec![], mem)65} else {66let tmp = writable_spilltmp_reg();67(68Inst::load_constant(tmp, off as u64),69AMode::RegExtended {70rn: basereg,71rm: tmp.to_reg(),72extendop: ExtendOp::SXTX,73},74)75}76}7778AMode::Const { addr } => {79let sink = match sink {80Some(sink) => sink,81None => return (smallvec![], mem.clone()),82};83let label = sink.get_label_for_constant(*addr);84let label = MemLabel::Mach(label);85(smallvec![], AMode::Label { label })86}8788_ => (smallvec![], mem.clone()),89}90}9192//=============================================================================93// Instructions and subcomponents: emission9495pub(crate) fn machreg_to_gpr(m: Reg) -> u32 {96assert_eq!(m.class(), RegClass::Int);97u32::from(m.to_real_reg().unwrap().hw_enc() & 31)98}99100pub(crate) fn machreg_to_vec(m: Reg) -> u32 {101assert_eq!(m.class(), RegClass::Float);102u32::from(m.to_real_reg().unwrap().hw_enc())103}104105fn machreg_to_gpr_or_vec(m: Reg) -> u32 {106u32::from(m.to_real_reg().unwrap().hw_enc() & 31)107}108109/// Encode a 3-register aeithmeric instruction.110pub fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {111(bits_31_21 << 21)112| (bits_15_10 << 10)113| machreg_to_gpr(rd.to_reg())114| (machreg_to_gpr(rn) << 5)115| (machreg_to_gpr(rm) << 16)116}117118fn enc_arith_rr_imm12(119bits_31_24: u32,120immshift: u32,121imm12: u32,122rn: Reg,123rd: Writable<Reg>,124) -> u32 {125(bits_31_24 << 24)126| (immshift << 22)127| (imm12 << 10)128| (machreg_to_gpr(rn) << 5)129| machreg_to_gpr(rd.to_reg())130}131132fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {133(bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())134}135136fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {137(top11 << 21)138| (machreg_to_gpr(rm) << 16)139| (bit15 << 15)140| (machreg_to_gpr(ra) << 10)141| (machreg_to_gpr(rn) << 5)142| machreg_to_gpr(rd.to_reg())143}144145fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {146assert!(off_26_0 < (1 << 26));147(op_31_26 << 26) | off_26_0148}149150fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {151assert!(off_18_0 < (1 << 19));152(op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)153}154155fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {156assert!(off_18_0 < (1 << 19));157assert!(cond < (1 << 4));158(op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond159}160161/// Set the size bit of an instruction.162fn enc_op_size(op: u32, size: OperandSize) -> u32 {163(op & !(1 << 31)) | (size.sf_bit() << 31)164}165166fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {167match kind {168CondBrKind::Zero(reg, size) => enc_op_size(169enc_cmpbr(0b0_011010_0, taken.as_offset19_or_zero(), reg),170size,171),172CondBrKind::NotZero(reg, size) => enc_op_size(173enc_cmpbr(0b0_011010_1, taken.as_offset19_or_zero(), reg),174size,175),176CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),177}178}179180fn enc_test_bit_and_branch(181kind: TestBitAndBranchKind,182taken: BranchTarget,183reg: Reg,184bit: u8,185) -> u32 {186assert!(bit < 64);187let op_31 = u32::from(bit >> 5);188let op_23_19 = u32::from(bit & 0b11111);189let op_30_24 = 0b0110110190| match kind {191TestBitAndBranchKind::Z => 0,192TestBitAndBranchKind::NZ => 1,193};194(op_31 << 31)195| (op_30_24 << 24)196| (op_23_19 << 19)197| (taken.as_offset14_or_zero() << 5)198| machreg_to_gpr(reg)199}200201/// Encode a move-wide instruction.202pub fn enc_move_wide(203op: MoveWideOp,204rd: Writable<Reg>,205imm: MoveWideConst,206size: OperandSize,207) -> u32 {208assert!(imm.shift <= 0b11);209let op = match op {210MoveWideOp::MovN => 0b00,211MoveWideOp::MovZ => 0b10,212};2130x12800000214| size.sf_bit() << 31215| op << 29216| u32::from(imm.shift) << 21217| u32::from(imm.bits) << 5218| machreg_to_gpr(rd.to_reg())219}220221/// Encode a move-keep immediate instruction.222pub fn enc_movk(rd: Writable<Reg>, imm: MoveWideConst, size: OperandSize) -> u32 {223assert!(imm.shift <= 0b11);2240x72800000225| size.sf_bit() << 31226| u32::from(imm.shift) << 21227| u32::from(imm.bits) << 5228| machreg_to_gpr(rd.to_reg())229}230231fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {232(op_31_22 << 22)233| (simm7.bits() << 15)234| (machreg_to_gpr(rt2) << 10)235| (machreg_to_gpr(rn) << 5)236| machreg_to_gpr(rt)237}238239fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {240(op_31_22 << 22)241| (simm9.bits() << 12)242| (op_11_10 << 10)243| (machreg_to_gpr(rn) << 5)244| machreg_to_gpr_or_vec(rd)245}246247fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {248(op_31_22 << 22)249| (0b1 << 24)250| (uimm12.bits() << 10)251| (machreg_to_gpr(rn) << 5)252| machreg_to_gpr_or_vec(rd)253}254255fn enc_ldst_reg(256op_31_22: u32,257rn: Reg,258rm: Reg,259s_bit: bool,260extendop: Option<ExtendOp>,261rd: Reg,262) -> u32 {263let s_bit = if s_bit { 1 } else { 0 };264let extend_bits = match extendop {265Some(ExtendOp::UXTW) => 0b010,266Some(ExtendOp::SXTW) => 0b110,267Some(ExtendOp::SXTX) => 0b111,268None => 0b011, // LSL269_ => panic!("bad extend mode for ld/st AMode"),270};271(op_31_22 << 22)272| (1 << 21)273| (machreg_to_gpr(rm) << 16)274| (extend_bits << 13)275| (s_bit << 12)276| (0b10 << 10)277| (machreg_to_gpr(rn) << 5)278| machreg_to_gpr_or_vec(rd)279}280281pub(crate) fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {282(op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)283}284285fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {286debug_assert_eq!(q & 0b1, q);287debug_assert_eq!(size & 0b11, size);2880b0_0_0011010_10_00000_110_0_00_00000_00000289| q << 30290| size << 10291| machreg_to_gpr(rn) << 5292| machreg_to_vec(rt.to_reg())293}294295fn enc_ldst_vec_pair(296opc: u32,297amode: u32,298is_load: bool,299simm7: SImm7Scaled,300rn: Reg,301rt: Reg,302rt2: Reg,303) -> u32 {304debug_assert_eq!(opc & 0b11, opc);305debug_assert_eq!(amode & 0b11, amode);3063070b00_10110_00_0_0000000_00000_00000_00000308| opc << 30309| amode << 23310| (is_load as u32) << 22311| simm7.bits() << 15312| machreg_to_vec(rt2) << 10313| machreg_to_gpr(rn) << 5314| machreg_to_vec(rt)315}316317fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {318(top11 << 21)319| (machreg_to_vec(rm) << 16)320| (bit15_10 << 10)321| (machreg_to_vec(rn) << 5)322| machreg_to_vec(rd.to_reg())323}324325fn enc_vec_rrr_long(326q: u32,327u: u32,328size: u32,329bit14: u32,330rm: Reg,331rn: Reg,332rd: Writable<Reg>,333) -> u32 {334debug_assert_eq!(q & 0b1, q);335debug_assert_eq!(u & 0b1, u);336debug_assert_eq!(size & 0b11, size);337debug_assert_eq!(bit14 & 0b1, bit14);3383390b0_0_0_01110_00_1_00000_100000_00000_00000340| q << 30341| u << 29342| size << 22343| bit14 << 14344| (machreg_to_vec(rm) << 16)345| (machreg_to_vec(rn) << 5)346| machreg_to_vec(rd.to_reg())347}348349fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {350(0b01011010110 << 21)351| size << 31352| opcode2 << 16353| opcode1 << 10354| machreg_to_gpr(rn) << 5355| machreg_to_gpr(rd.to_reg())356}357358pub(crate) fn enc_br(rn: Reg) -> u32 {3590b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)360}361362pub(crate) fn enc_adr_inst(opcode: u32, off: i32, rd: Writable<Reg>) -> u32 {363let off = u32::try_from(off).unwrap();364let immlo = off & 3;365let immhi = (off >> 2) & ((1 << 19) - 1);366opcode | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())367}368369pub(crate) fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {370let opcode = 0b00010000 << 24;371enc_adr_inst(opcode, off, rd)372}373374pub(crate) fn enc_adrp(off: i32, rd: Writable<Reg>) -> u32 {375let opcode = 0b10010000 << 24;376enc_adr_inst(opcode, off, rd)377}378379fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, op: u32, o2: u32) -> u32 {380debug_assert_eq!(op & 0b1, op);381debug_assert_eq!(o2 & 0b1, o2);3820b100_11010100_00000_0000_00_00000_00000383| (op << 30)384| (machreg_to_gpr(rm) << 16)385| (cond.bits() << 12)386| (o2 << 10)387| (machreg_to_gpr(rn) << 5)388| machreg_to_gpr(rd.to_reg())389}390391fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {3920b000_11110_00_1_00000_0000_11_00000_00000393| (size.ftype() << 22)394| (machreg_to_vec(rm) << 16)395| (machreg_to_vec(rn) << 5)396| machreg_to_vec(rd.to_reg())397| (cond.bits() << 12)398}399400fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 {4010b0_1_1_11010010_00000_0000_00_00000_0_0000402| size.sf_bit() << 31403| machreg_to_gpr(rm) << 16404| cond.bits() << 12405| machreg_to_gpr(rn) << 5406| nzcv.bits()407}408409fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {4100b0_1_1_11010010_00000_0000_10_00000_0_0000411| size.sf_bit() << 31412| imm.bits() << 16413| cond.bits() << 12414| machreg_to_gpr(rn) << 5415| nzcv.bits()416}417418fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {419match size {420OperandSize::Size64 => {421debug_assert!(immr <= 63);422debug_assert!(imms <= 63);423}424OperandSize::Size32 => {425debug_assert!(immr <= 31);426debug_assert!(imms <= 31);427}428}429debug_assert_eq!(opc & 0b11, opc);430let n_bit = size.sf_bit();4310b0_00_100110_0_000000_000000_00000_00000432| size.sf_bit() << 31433| u32::from(opc) << 29434| n_bit << 22435| u32::from(immr) << 16436| u32::from(imms) << 10437| machreg_to_gpr(rn) << 5438| machreg_to_gpr(rd.to_reg())439}440441fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {4420b00001110_101_00000_00011_1_00000_00000443| ((is_16b as u32) << 30)444| machreg_to_vec(rd.to_reg())445| (machreg_to_vec(rn) << 16)446| (machreg_to_vec(rn) << 5)447}448449fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {450(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())451}452453fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {454(top22 << 10)455| (machreg_to_vec(rm) << 16)456| (machreg_to_vec(rn) << 5)457| machreg_to_vec(rd.to_reg())458}459460fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {461(top17 << 15)462| (machreg_to_vec(rm) << 16)463| (machreg_to_vec(ra) << 10)464| (machreg_to_vec(rn) << 5)465| machreg_to_vec(rd.to_reg())466}467468fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {4690b000_11110_00_1_00000_00_1000_00000_00000470| (size.ftype() << 22)471| (machreg_to_vec(rm) << 16)472| (machreg_to_vec(rn) << 5)473}474475fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {476(top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())477}478479fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {480(top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())481}482483fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {484(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())485}486487fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {488debug_assert_eq!(qu & 0b11, qu);489debug_assert_eq!(size & 0b11, size);490debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);491let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;492bits | qu << 29493| size << 22494| bits_12_16 << 12495| machreg_to_vec(rn) << 5496| machreg_to_vec(rd.to_reg())497}498499fn enc_vec_rr_pair(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {500debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);5015020b010_11110_11_11000_11011_10_00000_00000503| bits_12_16 << 12504| machreg_to_vec(rn) << 5505| machreg_to_vec(rd.to_reg())506}507508fn enc_vec_rr_pair_long(u: u32, enc_size: u32, rd: Writable<Reg>, rn: Reg) -> u32 {509debug_assert_eq!(u & 0b1, u);510debug_assert_eq!(enc_size & 0b1, enc_size);5115120b0_1_0_01110_00_10000_00_0_10_10_00000_00000513| u << 29514| enc_size << 22515| machreg_to_vec(rn) << 5516| machreg_to_vec(rd.to_reg())517}518519fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {520debug_assert_eq!(q & 0b1, q);521debug_assert_eq!(u & 0b1, u);522debug_assert_eq!(size & 0b11, size);523debug_assert_eq!(opcode & 0b11111, opcode);5240b0_0_0_01110_00_11000_0_0000_10_00000_00000525| q << 30526| u << 29527| size << 22528| opcode << 12529| machreg_to_vec(rn) << 5530| machreg_to_vec(rd.to_reg())531}532533fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {534debug_assert_eq!(len & 0b11, len);5350b0_1_001110_000_00000_0_00_0_00_00000_00000536| (machreg_to_vec(rm) << 16)537| len << 13538| (is_extension as u32) << 12539| (machreg_to_vec(rn) << 5)540| machreg_to_vec(rd.to_reg())541}542543fn enc_dmb_ish() -> u32 {5440xD5033BBF545}546547fn enc_acq_rel(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable<Reg>, rn: Reg) -> u32 {548assert!(machreg_to_gpr(rt.to_reg()) != 31);549let sz = match ty {550I64 => 0b11,551I32 => 0b10,552I16 => 0b01,553I8 => 0b00,554_ => unreachable!(),555};556let bit15 = match op {557AtomicRMWOp::Swp => 0b1,558_ => 0b0,559};560let op = match op {561AtomicRMWOp::Add => 0b000,562AtomicRMWOp::Clr => 0b001,563AtomicRMWOp::Eor => 0b010,564AtomicRMWOp::Set => 0b011,565AtomicRMWOp::Smax => 0b100,566AtomicRMWOp::Smin => 0b101,567AtomicRMWOp::Umax => 0b110,568AtomicRMWOp::Umin => 0b111,569AtomicRMWOp::Swp => 0b000,570};5710b00_111_000_111_00000_0_000_00_00000_00000572| (sz << 30)573| (machreg_to_gpr(rs) << 16)574| bit15 << 15575| (op << 12)576| (machreg_to_gpr(rn) << 5)577| machreg_to_gpr(rt.to_reg())578}579580fn enc_ldar(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {581let sz = match ty {582I64 => 0b11,583I32 => 0b10,584I16 => 0b01,585I8 => 0b00,586_ => unreachable!(),587};5880b00_001000_1_1_0_11111_1_11111_00000_00000589| (sz << 30)590| (machreg_to_gpr(rn) << 5)591| machreg_to_gpr(rt.to_reg())592}593594fn enc_stlr(ty: Type, rt: Reg, rn: Reg) -> u32 {595let sz = match ty {596I64 => 0b11,597I32 => 0b10,598I16 => 0b01,599I8 => 0b00,600_ => unreachable!(),601};6020b00_001000_100_11111_1_11111_00000_00000603| (sz << 30)604| (machreg_to_gpr(rn) << 5)605| machreg_to_gpr(rt)606}607608fn enc_ldaxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {609let sz = match ty {610I64 => 0b11,611I32 => 0b10,612I16 => 0b01,613I8 => 0b00,614_ => unreachable!(),615};6160b00_001000_0_1_0_11111_1_11111_00000_00000617| (sz << 30)618| (machreg_to_gpr(rn) << 5)619| machreg_to_gpr(rt.to_reg())620}621622fn enc_stlxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {623let sz = match ty {624I64 => 0b11,625I32 => 0b10,626I16 => 0b01,627I8 => 0b00,628_ => unreachable!(),629};6300b00_001000_000_00000_1_11111_00000_00000631| (sz << 30)632| (machreg_to_gpr(rs.to_reg()) << 16)633| (machreg_to_gpr(rn) << 5)634| machreg_to_gpr(rt)635}636637fn enc_cas(size: u32, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {638debug_assert_eq!(size & 0b11, size);6396400b00_0010001_1_1_00000_1_11111_00000_00000641| size << 30642| machreg_to_gpr(rs.to_reg()) << 16643| machreg_to_gpr(rn) << 5644| machreg_to_gpr(rt)645}646647fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {648let abc = (imm >> 5) as u32;649let defgh = (imm & 0b11111) as u32;650651debug_assert_eq!(cmode & 0b1111, cmode);652debug_assert_eq!(q_op & 0b11, q_op);6536540b0_0_0_0111100000_000_0000_01_00000_00000655| (q_op << 29)656| (abc << 16)657| (cmode << 12)658| (defgh << 5)659| machreg_to_vec(rd.to_reg())660}661662/// State carried between emissions of a sequence of instructions.663#[derive(Default, Clone, Debug)]664pub struct EmitState {665/// The user stack map for the upcoming instruction, as provided to666/// `pre_safepoint()`.667user_stack_map: Option<ir::UserStackMap>,668669/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and670/// optimized away at compiletime. See [cranelift_control].671ctrl_plane: ControlPlane,672673frame_layout: FrameLayout,674}675676impl MachInstEmitState<Inst> for EmitState {677fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self {678EmitState {679user_stack_map: None,680ctrl_plane,681frame_layout: abi.frame_layout().clone(),682}683}684685fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {686self.user_stack_map = user_stack_map;687}688689fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {690&mut self.ctrl_plane691}692693fn take_ctrl_plane(self) -> ControlPlane {694self.ctrl_plane695}696697fn frame_layout(&self) -> &FrameLayout {698&self.frame_layout699}700}701702impl EmitState {703fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {704self.user_stack_map.take()705}706707fn clear_post_insn(&mut self) {708self.user_stack_map = None;709}710}711712/// Constant state used during function compilation.713pub struct EmitInfo(settings::Flags);714715impl EmitInfo {716/// Create a constant state for emission of instructions.717pub fn new(flags: settings::Flags) -> Self {718Self(flags)719}720}721722impl MachInstEmit for Inst {723type State = EmitState;724type Info = EmitInfo;725726fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {727// N.B.: we *must* not exceed the "worst-case size" used to compute728// where to insert islands, except when islands are explicitly triggered729// (with an `EmitIsland`). We check this in debug builds. This is `mut`730// to allow disabling the check for `JTSequence`, which is always731// emitted following an `EmitIsland`.732let mut start_off = sink.cur_offset();733734match self {735&Inst::AluRRR {736alu_op,737size,738rd,739rn,740rm,741} => {742debug_assert!(match alu_op {743ALUOp::SMulH | ALUOp::UMulH => size == OperandSize::Size64,744_ => true,745});746let top11 = match alu_op {747ALUOp::Add => 0b00001011_000,748ALUOp::Adc => 0b00011010_000,749ALUOp::AdcS => 0b00111010_000,750ALUOp::Sub => 0b01001011_000,751ALUOp::Sbc => 0b01011010_000,752ALUOp::SbcS => 0b01111010_000,753ALUOp::Orr => 0b00101010_000,754ALUOp::And => 0b00001010_000,755ALUOp::AndS => 0b01101010_000,756ALUOp::Eor => 0b01001010_000,757ALUOp::OrrNot => 0b00101010_001,758ALUOp::AndNot => 0b00001010_001,759ALUOp::EorNot => 0b01001010_001,760ALUOp::AddS => 0b00101011_000,761ALUOp::SubS => 0b01101011_000,762ALUOp::SDiv | ALUOp::UDiv => 0b00011010_110,763ALUOp::Extr | ALUOp::Lsr | ALUOp::Asr | ALUOp::Lsl => 0b00011010_110,764ALUOp::SMulH => 0b10011011_010,765ALUOp::UMulH => 0b10011011_110,766};767768let top11 = top11 | size.sf_bit() << 10;769let bit15_10 = match alu_op {770ALUOp::SDiv => 0b000011,771ALUOp::UDiv => 0b000010,772ALUOp::Extr => 0b001011,773ALUOp::Lsr => 0b001001,774ALUOp::Asr => 0b001010,775ALUOp::Lsl => 0b001000,776ALUOp::SMulH | ALUOp::UMulH => 0b011111,777_ => 0b000000,778};779debug_assert_ne!(writable_stack_reg(), rd);780// The stack pointer is the zero register in this context, so this might be an781// indication that something is wrong.782debug_assert_ne!(stack_reg(), rn);783debug_assert_ne!(stack_reg(), rm);784sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));785}786&Inst::AluRRRR {787alu_op,788size,789rd,790rm,791rn,792ra,793} => {794let (top11, bit15) = match alu_op {795ALUOp3::MAdd => (0b0_00_11011_000, 0),796ALUOp3::MSub => (0b0_00_11011_000, 1),797ALUOp3::UMAddL => {798debug_assert!(size == OperandSize::Size32);799(0b1_00_11011_1_01, 0)800}801ALUOp3::SMAddL => {802debug_assert!(size == OperandSize::Size32);803(0b1_00_11011_0_01, 0)804}805};806let top11 = top11 | size.sf_bit() << 10;807sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));808}809&Inst::AluRRImm12 {810alu_op,811size,812rd,813rn,814ref imm12,815} => {816let top8 = match alu_op {817ALUOp::Add => 0b000_10001,818ALUOp::Sub => 0b010_10001,819ALUOp::AddS => 0b001_10001,820ALUOp::SubS => 0b011_10001,821_ => unimplemented!("{:?}", alu_op),822};823let top8 = top8 | size.sf_bit() << 7;824sink.put4(enc_arith_rr_imm12(825top8,826imm12.shift_bits(),827imm12.imm_bits(),828rn,829rd,830));831}832&Inst::AluRRImmLogic {833alu_op,834size,835rd,836rn,837ref imml,838} => {839let (top9, inv) = match alu_op {840ALUOp::Orr => (0b001_100100, false),841ALUOp::And => (0b000_100100, false),842ALUOp::AndS => (0b011_100100, false),843ALUOp::Eor => (0b010_100100, false),844ALUOp::OrrNot => (0b001_100100, true),845ALUOp::AndNot => (0b000_100100, true),846ALUOp::EorNot => (0b010_100100, true),847_ => unimplemented!("{:?}", alu_op),848};849let top9 = top9 | size.sf_bit() << 8;850let imml = if inv { imml.invert() } else { *imml };851sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));852}853854&Inst::AluRRImmShift {855alu_op,856size,857rd,858rn,859ref immshift,860} => {861let amt = immshift.value();862let (top10, immr, imms) = match alu_op {863ALUOp::Extr => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),864ALUOp::Lsr => (0b0101001100, u32::from(amt), 0b011111),865ALUOp::Asr => (0b0001001100, u32::from(amt), 0b011111),866ALUOp::Lsl => {867let bits = if size.is64() { 64 } else { 32 };868(8690b0101001100,870u32::from((bits - amt) % bits),871u32::from(bits - 1 - amt),872)873}874_ => unimplemented!("{:?}", alu_op),875};876let top10 = top10 | size.sf_bit() << 9 | size.sf_bit();877let imms = match alu_op {878ALUOp::Lsr | ALUOp::Asr => imms | size.sf_bit() << 5,879_ => imms,880};881sink.put4(882(top10 << 22)883| (immr << 16)884| (imms << 10)885| (machreg_to_gpr(rn) << 5)886| machreg_to_gpr(rd.to_reg()),887);888}889890&Inst::AluRRRShift {891alu_op,892size,893rd,894rn,895rm,896ref shiftop,897} => {898let top11: u32 = match alu_op {899ALUOp::Add => 0b000_01011000,900ALUOp::AddS => 0b001_01011000,901ALUOp::Sub => 0b010_01011000,902ALUOp::SubS => 0b011_01011000,903ALUOp::Orr => 0b001_01010000,904ALUOp::And => 0b000_01010000,905ALUOp::AndS => 0b011_01010000,906ALUOp::Eor => 0b010_01010000,907ALUOp::OrrNot => 0b001_01010001,908ALUOp::EorNot => 0b010_01010001,909ALUOp::AndNot => 0b000_01010001,910ALUOp::Extr => 0b000_10011100,911_ => unimplemented!("{:?}", alu_op),912};913let top11 = top11 | size.sf_bit() << 10;914let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);915let bits_15_10 = u32::from(shiftop.amt().value());916sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));917}918919&Inst::AluRRRExtend {920alu_op,921size,922rd,923rn,924rm,925extendop,926} => {927let top11: u32 = match alu_op {928ALUOp::Add => 0b00001011001,929ALUOp::Sub => 0b01001011001,930ALUOp::AddS => 0b00101011001,931ALUOp::SubS => 0b01101011001,932_ => unimplemented!("{:?}", alu_op),933};934let top11 = top11 | size.sf_bit() << 10;935let bits_15_10 = u32::from(extendop.bits()) << 3;936sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));937}938939&Inst::BitRR {940op, size, rd, rn, ..941} => {942let (op1, op2) = match op {943BitOp::RBit => (0b00000, 0b000000),944BitOp::Clz => (0b00000, 0b000100),945BitOp::Cls => (0b00000, 0b000101),946BitOp::Rev16 => (0b00000, 0b000001),947BitOp::Rev32 => (0b00000, 0b000010),948BitOp::Rev64 => (0b00000, 0b000011),949};950sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))951}952953&Inst::ULoad8 { rd, ref mem, flags }954| &Inst::SLoad8 { rd, ref mem, flags }955| &Inst::ULoad16 { rd, ref mem, flags }956| &Inst::SLoad16 { rd, ref mem, flags }957| &Inst::ULoad32 { rd, ref mem, flags }958| &Inst::SLoad32 { rd, ref mem, flags }959| &Inst::ULoad64 {960rd, ref mem, flags, ..961}962| &Inst::FpuLoad16 { rd, ref mem, flags }963| &Inst::FpuLoad32 { rd, ref mem, flags }964| &Inst::FpuLoad64 { rd, ref mem, flags }965| &Inst::FpuLoad128 { rd, ref mem, flags } => {966let mem = mem.clone();967let access_ty = self.mem_type().unwrap();968let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);969970for inst in mem_insts.into_iter() {971inst.emit(sink, emit_info, state);972}973974// ldst encoding helpers take Reg, not Writable<Reg>.975let rd = rd.to_reg();976977// This is the base opcode (top 10 bits) for the "unscaled978// immediate" form (Unscaled). Other addressing modes will OR in979// other values for bits 24/25 (bits 1/2 of this constant).980let op = match self {981Inst::ULoad8 { .. } => 0b0011100001,982Inst::SLoad8 { .. } => 0b0011100010,983Inst::ULoad16 { .. } => 0b0111100001,984Inst::SLoad16 { .. } => 0b0111100010,985Inst::ULoad32 { .. } => 0b1011100001,986Inst::SLoad32 { .. } => 0b1011100010,987Inst::ULoad64 { .. } => 0b1111100001,988Inst::FpuLoad16 { .. } => 0b0111110001,989Inst::FpuLoad32 { .. } => 0b1011110001,990Inst::FpuLoad64 { .. } => 0b1111110001,991Inst::FpuLoad128 { .. } => 0b0011110011,992_ => unreachable!(),993};994995if let Some(trap_code) = flags.trap_code() {996// Register the offset at which the actual load instruction starts.997sink.add_trap(trap_code);998}9991000match &mem {1001&AMode::Unscaled { rn, simm9 } => {1002let reg = rn;1003sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));1004}1005&AMode::UnsignedOffset { rn, uimm12 } => {1006let reg = rn;1007sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));1008}1009&AMode::RegReg { rn, rm } => {1010let r1 = rn;1011let r2 = rm;1012sink.put4(enc_ldst_reg(1013op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,1014));1015}1016&AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {1017let r1 = rn;1018let r2 = rm;1019let extendop = match &mem {1020&AMode::RegScaled { .. } => None,1021&AMode::RegScaledExtended { extendop, .. } => Some(extendop),1022_ => unreachable!(),1023};1024sink.put4(enc_ldst_reg(1025op, r1, r2, /* scaled = */ true, extendop, rd,1026));1027}1028&AMode::RegExtended { rn, rm, extendop } => {1029let r1 = rn;1030let r2 = rm;1031sink.put4(enc_ldst_reg(1032op,1033r1,1034r2,1035/* scaled = */ false,1036Some(extendop),1037rd,1038));1039}1040&AMode::Label { ref label } => {1041let offset = match label {1042// cast i32 to u32 (two's-complement)1043MemLabel::PCRel(off) => *off as u32,1044// Emit a relocation into the `MachBuffer`1045// for the label that's being loaded from and1046// encode an address of 0 in its place which will1047// get filled in by relocation resolution later on.1048MemLabel::Mach(label) => {1049sink.use_label_at_offset(1050sink.cur_offset(),1051*label,1052LabelUse::Ldr19,1053);105401055}1056} / 4;1057assert!(offset < (1 << 19));1058match self {1059&Inst::ULoad32 { .. } => {1060sink.put4(enc_ldst_imm19(0b00011000, offset, rd));1061}1062&Inst::SLoad32 { .. } => {1063sink.put4(enc_ldst_imm19(0b10011000, offset, rd));1064}1065&Inst::FpuLoad32 { .. } => {1066sink.put4(enc_ldst_imm19(0b00011100, offset, rd));1067}1068&Inst::ULoad64 { .. } => {1069sink.put4(enc_ldst_imm19(0b01011000, offset, rd));1070}1071&Inst::FpuLoad64 { .. } => {1072sink.put4(enc_ldst_imm19(0b01011100, offset, rd));1073}1074&Inst::FpuLoad128 { .. } => {1075sink.put4(enc_ldst_imm19(0b10011100, offset, rd));1076}1077_ => panic!("Unsupported size for LDR from constant pool!"),1078}1079}1080&AMode::SPPreIndexed { simm9 } => {1081let reg = stack_reg();1082sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));1083}1084&AMode::SPPostIndexed { simm9 } => {1085let reg = stack_reg();1086sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));1087}1088// Eliminated by `mem_finalize()` above.1089&AMode::SPOffset { .. }1090| &AMode::FPOffset { .. }1091| &AMode::IncomingArg { .. }1092| &AMode::SlotOffset { .. }1093| &AMode::Const { .. }1094| &AMode::RegOffset { .. } => {1095panic!("Should not see {mem:?} here!")1096}1097}1098}10991100&Inst::Store8 { rd, ref mem, flags }1101| &Inst::Store16 { rd, ref mem, flags }1102| &Inst::Store32 { rd, ref mem, flags }1103| &Inst::Store64 { rd, ref mem, flags }1104| &Inst::FpuStore16 { rd, ref mem, flags }1105| &Inst::FpuStore32 { rd, ref mem, flags }1106| &Inst::FpuStore64 { rd, ref mem, flags }1107| &Inst::FpuStore128 { rd, ref mem, flags } => {1108let mem = mem.clone();1109let access_ty = self.mem_type().unwrap();1110let (mem_insts, mem) = mem_finalize(Some(sink), &mem, access_ty, state);11111112for inst in mem_insts.into_iter() {1113inst.emit(sink, emit_info, state);1114}11151116let op = match self {1117Inst::Store8 { .. } => 0b0011100000,1118Inst::Store16 { .. } => 0b0111100000,1119Inst::Store32 { .. } => 0b1011100000,1120Inst::Store64 { .. } => 0b1111100000,1121Inst::FpuStore16 { .. } => 0b0111110000,1122Inst::FpuStore32 { .. } => 0b1011110000,1123Inst::FpuStore64 { .. } => 0b1111110000,1124Inst::FpuStore128 { .. } => 0b0011110010,1125_ => unreachable!(),1126};11271128if let Some(trap_code) = flags.trap_code() {1129// Register the offset at which the actual store instruction starts.1130sink.add_trap(trap_code);1131}11321133match &mem {1134&AMode::Unscaled { rn, simm9 } => {1135let reg = rn;1136sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));1137}1138&AMode::UnsignedOffset { rn, uimm12 } => {1139let reg = rn;1140sink.put4(enc_ldst_uimm12(op, uimm12, reg, rd));1141}1142&AMode::RegReg { rn, rm } => {1143let r1 = rn;1144let r2 = rm;1145sink.put4(enc_ldst_reg(1146op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,1147));1148}1149&AMode::RegScaled { rn, rm } | &AMode::RegScaledExtended { rn, rm, .. } => {1150let r1 = rn;1151let r2 = rm;1152let extendop = match &mem {1153&AMode::RegScaled { .. } => None,1154&AMode::RegScaledExtended { extendop, .. } => Some(extendop),1155_ => unreachable!(),1156};1157sink.put4(enc_ldst_reg(1158op, r1, r2, /* scaled = */ true, extendop, rd,1159));1160}1161&AMode::RegExtended { rn, rm, extendop } => {1162let r1 = rn;1163let r2 = rm;1164sink.put4(enc_ldst_reg(1165op,1166r1,1167r2,1168/* scaled = */ false,1169Some(extendop),1170rd,1171));1172}1173&AMode::Label { .. } => {1174panic!("Store to a MemLabel not implemented!");1175}1176&AMode::SPPreIndexed { simm9 } => {1177let reg = stack_reg();1178sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd));1179}1180&AMode::SPPostIndexed { simm9 } => {1181let reg = stack_reg();1182sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd));1183}1184// Eliminated by `mem_finalize()` above.1185&AMode::SPOffset { .. }1186| &AMode::FPOffset { .. }1187| &AMode::IncomingArg { .. }1188| &AMode::SlotOffset { .. }1189| &AMode::Const { .. }1190| &AMode::RegOffset { .. } => {1191panic!("Should not see {mem:?} here!")1192}1193}1194}11951196&Inst::StoreP64 {1197rt,1198rt2,1199ref mem,1200flags,1201} => {1202let mem = mem.clone();1203if let Some(trap_code) = flags.trap_code() {1204// Register the offset at which the actual store instruction starts.1205sink.add_trap(trap_code);1206}1207match &mem {1208&PairAMode::SignedOffset { reg, simm7 } => {1209assert_eq!(simm7.scale_ty, I64);1210sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));1211}1212&PairAMode::SPPreIndexed { simm7 } => {1213assert_eq!(simm7.scale_ty, I64);1214let reg = stack_reg();1215sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2));1216}1217&PairAMode::SPPostIndexed { simm7 } => {1218assert_eq!(simm7.scale_ty, I64);1219let reg = stack_reg();1220sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2));1221}1222}1223}1224&Inst::LoadP64 {1225rt,1226rt2,1227ref mem,1228flags,1229} => {1230let rt = rt.to_reg();1231let rt2 = rt2.to_reg();1232let mem = mem.clone();1233if let Some(trap_code) = flags.trap_code() {1234// Register the offset at which the actual load instruction starts.1235sink.add_trap(trap_code);1236}12371238match &mem {1239&PairAMode::SignedOffset { reg, simm7 } => {1240assert_eq!(simm7.scale_ty, I64);1241sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));1242}1243&PairAMode::SPPreIndexed { simm7 } => {1244assert_eq!(simm7.scale_ty, I64);1245let reg = stack_reg();1246sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2));1247}1248&PairAMode::SPPostIndexed { simm7 } => {1249assert_eq!(simm7.scale_ty, I64);1250let reg = stack_reg();1251sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2));1252}1253}1254}1255&Inst::FpuLoadP64 {1256rt,1257rt2,1258ref mem,1259flags,1260}1261| &Inst::FpuLoadP128 {1262rt,1263rt2,1264ref mem,1265flags,1266} => {1267let rt = rt.to_reg();1268let rt2 = rt2.to_reg();1269let mem = mem.clone();12701271if let Some(trap_code) = flags.trap_code() {1272// Register the offset at which the actual load instruction starts.1273sink.add_trap(trap_code);1274}12751276let opc = match self {1277&Inst::FpuLoadP64 { .. } => 0b01,1278&Inst::FpuLoadP128 { .. } => 0b10,1279_ => unreachable!(),1280};12811282match &mem {1283&PairAMode::SignedOffset { reg, simm7 } => {1284assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);1285sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2));1286}1287&PairAMode::SPPreIndexed { simm7 } => {1288assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);1289let reg = stack_reg();1290sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2));1291}1292&PairAMode::SPPostIndexed { simm7 } => {1293assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);1294let reg = stack_reg();1295sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2));1296}1297}1298}1299&Inst::FpuStoreP64 {1300rt,1301rt2,1302ref mem,1303flags,1304}1305| &Inst::FpuStoreP128 {1306rt,1307rt2,1308ref mem,1309flags,1310} => {1311let mem = mem.clone();13121313if let Some(trap_code) = flags.trap_code() {1314// Register the offset at which the actual store instruction starts.1315sink.add_trap(trap_code);1316}13171318let opc = match self {1319&Inst::FpuStoreP64 { .. } => 0b01,1320&Inst::FpuStoreP128 { .. } => 0b10,1321_ => unreachable!(),1322};13231324match &mem {1325&PairAMode::SignedOffset { reg, simm7 } => {1326assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);1327sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2));1328}1329&PairAMode::SPPreIndexed { simm7 } => {1330assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);1331let reg = stack_reg();1332sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2));1333}1334&PairAMode::SPPostIndexed { simm7 } => {1335assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16);1336let reg = stack_reg();1337sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2));1338}1339}1340}1341&Inst::Mov { size, rd, rm } => {1342assert!(rd.to_reg().class() == rm.class());1343assert!(rm.class() == RegClass::Int);13441345match size {1346OperandSize::Size64 => {1347// MOV to SP is interpreted as MOV to XZR instead. And our codegen1348// should never MOV to XZR.1349assert!(rd.to_reg() != stack_reg());13501351if rm == stack_reg() {1352// We can't use ORR here, so use an `add rd, sp, #0` instead.1353let imm12 = Imm12::maybe_from_u64(0).unwrap();1354sink.put4(enc_arith_rr_imm12(13550b100_10001,1356imm12.shift_bits(),1357imm12.imm_bits(),1358rm,1359rd,1360));1361} else {1362// Encoded as ORR rd, rm, zero.1363sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));1364}1365}1366OperandSize::Size32 => {1367// MOV to SP is interpreted as MOV to XZR instead. And our codegen1368// should never MOV to XZR.1369assert!(machreg_to_gpr(rd.to_reg()) != 31);1370// Encoded as ORR rd, rm, zero.1371sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));1372}1373}1374}1375&Inst::MovFromPReg { rd, rm } => {1376let rm: Reg = rm.into();1377debug_assert!(1378[1379regs::fp_reg(),1380regs::stack_reg(),1381regs::link_reg(),1382regs::pinned_reg()1383]1384.contains(&rm)1385);1386assert!(rm.class() == RegClass::Int);1387assert!(rd.to_reg().class() == rm.class());1388let size = OperandSize::Size64;1389Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);1390}1391&Inst::MovToPReg { rd, rm } => {1392let rd: Writable<Reg> = Writable::from_reg(rd.into());1393debug_assert!(1394[1395regs::fp_reg(),1396regs::stack_reg(),1397regs::link_reg(),1398regs::pinned_reg()1399]1400.contains(&rd.to_reg())1401);1402assert!(rd.to_reg().class() == RegClass::Int);1403assert!(rm.class() == rd.to_reg().class());1404let size = OperandSize::Size64;1405Inst::Mov { size, rd, rm }.emit(sink, emit_info, state);1406}1407&Inst::MovWide { op, rd, imm, size } => {1408sink.put4(enc_move_wide(op, rd, imm, size));1409}1410&Inst::MovK { rd, rn, imm, size } => {1411debug_assert_eq!(rn, rd.to_reg());1412sink.put4(enc_movk(rd, imm, size));1413}1414&Inst::CSel { rd, rn, rm, cond } => {1415sink.put4(enc_csel(rd, rn, rm, cond, 0, 0));1416}1417&Inst::CSNeg { rd, rn, rm, cond } => {1418sink.put4(enc_csel(rd, rn, rm, cond, 1, 1));1419}1420&Inst::CSet { rd, cond } => {1421sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 0, 1));1422}1423&Inst::CSetm { rd, cond } => {1424sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0));1425}1426&Inst::CCmp {1427size,1428rn,1429rm,1430nzcv,1431cond,1432} => {1433sink.put4(enc_ccmp(size, rn, rm, nzcv, cond));1434}1435&Inst::CCmpImm {1436size,1437rn,1438imm,1439nzcv,1440cond,1441} => {1442sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));1443}1444&Inst::AtomicRMW {1445ty,1446op,1447rs,1448rt,1449rn,1450flags,1451} => {1452if let Some(trap_code) = flags.trap_code() {1453sink.add_trap(trap_code);1454}14551456sink.put4(enc_acq_rel(ty, op, rs, rt, rn));1457}1458&Inst::AtomicRMWLoop { ty, op, flags, .. } => {1459/* Emit this:1460again:1461ldaxr{,b,h} x/w27, [x25]1462// maybe sign extend1463op x28, x27, x26 // op is add,sub,and,orr,eor1464stlxr{,b,h} w24, x/w28, [x25]1465cbnz x24, again14661467Operand conventions:1468IN: x25 (addr), x26 (2nd arg for op)1469OUT: x27 (old value), x24 (trashed), x28 (trashed)14701471It is unfortunate that, per the ARM documentation, x28 cannot be used for1472both the store-data and success-flag operands of stlxr. This causes the1473instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x241474instead for the success-flag.1475*/1476// TODO: We should not hardcode registers here, a better idea would be to1477// pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those1478let xzr = zero_reg();1479let x24 = xreg(24);1480let x25 = xreg(25);1481let x26 = xreg(26);1482let x27 = xreg(27);1483let x28 = xreg(28);1484let x24wr = writable_xreg(24);1485let x27wr = writable_xreg(27);1486let x28wr = writable_xreg(28);1487let again_label = sink.get_label();14881489// again:1490sink.bind_label(again_label, &mut state.ctrl_plane);14911492if let Some(trap_code) = flags.trap_code() {1493sink.add_trap(trap_code);1494}14951496sink.put4(enc_ldaxr(ty, x27wr, x25)); // ldaxr x27, [x25]1497let size = OperandSize::from_ty(ty);1498let sign_ext = match op {1499AtomicRMWLoopOp::Smin | AtomicRMWLoopOp::Smax => match ty {1500I16 => Some((ExtendOp::SXTH, 16)),1501I8 => Some((ExtendOp::SXTB, 8)),1502_ => None,1503},1504_ => None,1505};15061507// sxt{b|h} the loaded result if necessary.1508if sign_ext.is_some() {1509let (_, from_bits) = sign_ext.unwrap();1510Inst::Extend {1511rd: x27wr,1512rn: x27,1513signed: true,1514from_bits,1515to_bits: size.bits(),1516}1517.emit(sink, emit_info, state);1518}15191520match op {1521AtomicRMWLoopOp::Xchg => {} // do nothing1522AtomicRMWLoopOp::Nand => {1523// and x28, x27, x261524// mvn x28, x2815251526Inst::AluRRR {1527alu_op: ALUOp::And,1528size,1529rd: x28wr,1530rn: x27,1531rm: x26,1532}1533.emit(sink, emit_info, state);15341535Inst::AluRRR {1536alu_op: ALUOp::OrrNot,1537size,1538rd: x28wr,1539rn: xzr,1540rm: x28,1541}1542.emit(sink, emit_info, state);1543}1544AtomicRMWLoopOp::Umin1545| AtomicRMWLoopOp::Umax1546| AtomicRMWLoopOp::Smin1547| AtomicRMWLoopOp::Smax => {1548// cmp x27, x26 {?sxt}1549// csel.op x28, x27, x2615501551let cond = match op {1552AtomicRMWLoopOp::Umin => Cond::Lo,1553AtomicRMWLoopOp::Umax => Cond::Hi,1554AtomicRMWLoopOp::Smin => Cond::Lt,1555AtomicRMWLoopOp::Smax => Cond::Gt,1556_ => unreachable!(),1557};15581559if sign_ext.is_some() {1560let (extendop, _) = sign_ext.unwrap();1561Inst::AluRRRExtend {1562alu_op: ALUOp::SubS,1563size,1564rd: writable_zero_reg(),1565rn: x27,1566rm: x26,1567extendop,1568}1569.emit(sink, emit_info, state);1570} else {1571Inst::AluRRR {1572alu_op: ALUOp::SubS,1573size,1574rd: writable_zero_reg(),1575rn: x27,1576rm: x26,1577}1578.emit(sink, emit_info, state);1579}15801581Inst::CSel {1582cond,1583rd: x28wr,1584rn: x27,1585rm: x26,1586}1587.emit(sink, emit_info, state);1588}1589_ => {1590// add/sub/and/orr/eor x28, x27, x261591let alu_op = match op {1592AtomicRMWLoopOp::Add => ALUOp::Add,1593AtomicRMWLoopOp::Sub => ALUOp::Sub,1594AtomicRMWLoopOp::And => ALUOp::And,1595AtomicRMWLoopOp::Orr => ALUOp::Orr,1596AtomicRMWLoopOp::Eor => ALUOp::Eor,1597AtomicRMWLoopOp::Nand1598| AtomicRMWLoopOp::Umin1599| AtomicRMWLoopOp::Umax1600| AtomicRMWLoopOp::Smin1601| AtomicRMWLoopOp::Smax1602| AtomicRMWLoopOp::Xchg => unreachable!(),1603};16041605Inst::AluRRR {1606alu_op,1607size,1608rd: x28wr,1609rn: x27,1610rm: x26,1611}1612.emit(sink, emit_info, state);1613}1614}16151616if let Some(trap_code) = flags.trap_code() {1617sink.add_trap(trap_code);1618}1619if op == AtomicRMWLoopOp::Xchg {1620sink.put4(enc_stlxr(ty, x24wr, x26, x25)); // stlxr w24, x26, [x25]1621} else {1622sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]1623}16241625// cbnz w24, again1626// Note, we're actually testing x24, and relying on the default zero-high-half1627// rule in the assignment that `stlxr` does.1628let br_offset = sink.cur_offset();1629sink.put4(enc_conditional_br(1630BranchTarget::Label(again_label),1631CondBrKind::NotZero(x24, OperandSize::Size64),1632));1633sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);1634}1635&Inst::AtomicCAS {1636rd,1637rs,1638rt,1639rn,1640ty,1641flags,1642} => {1643debug_assert_eq!(rd.to_reg(), rs);1644let size = match ty {1645I8 => 0b00,1646I16 => 0b01,1647I32 => 0b10,1648I64 => 0b11,1649_ => panic!("Unsupported type: {ty}"),1650};16511652if let Some(trap_code) = flags.trap_code() {1653sink.add_trap(trap_code);1654}16551656sink.put4(enc_cas(size, rd, rt, rn));1657}1658&Inst::AtomicCASLoop { ty, flags, .. } => {1659/* Emit this:1660again:1661ldaxr{,b,h} x/w27, [x25]1662cmp x27, x/w26 uxt{b,h}1663b.ne out1664stlxr{,b,h} w24, x/w28, [x25]1665cbnz x24, again1666out:16671668Operand conventions:1669IN: x25 (addr), x26 (expected value), x28 (replacement value)1670OUT: x27 (old value), x24 (trashed)1671*/1672let x24 = xreg(24);1673let x25 = xreg(25);1674let x26 = xreg(26);1675let x27 = xreg(27);1676let x28 = xreg(28);1677let xzrwr = writable_zero_reg();1678let x24wr = writable_xreg(24);1679let x27wr = writable_xreg(27);1680let again_label = sink.get_label();1681let out_label = sink.get_label();16821683// again:1684sink.bind_label(again_label, &mut state.ctrl_plane);16851686if let Some(trap_code) = flags.trap_code() {1687sink.add_trap(trap_code);1688}16891690// ldaxr x27, [x25]1691sink.put4(enc_ldaxr(ty, x27wr, x25));16921693// The top 32-bits are zero-extended by the ldaxr so we don't1694// have to use UXTW, just the x-form of the register.1695let (bit21, extend_op) = match ty {1696I8 => (0b1, 0b000000),1697I16 => (0b1, 0b001000),1698_ => (0b0, 0b000000),1699};1700let bits_31_21 = 0b111_01011_000 | bit21;1701// cmp x27, x26 (== subs xzr, x27, x26)1702sink.put4(enc_arith_rrr(bits_31_21, extend_op, xzrwr, x27, x26));17031704// b.ne out1705let br_out_offset = sink.cur_offset();1706sink.put4(enc_conditional_br(1707BranchTarget::Label(out_label),1708CondBrKind::Cond(Cond::Ne),1709));1710sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);17111712if let Some(trap_code) = flags.trap_code() {1713sink.add_trap(trap_code);1714}17151716sink.put4(enc_stlxr(ty, x24wr, x28, x25)); // stlxr w24, x28, [x25]17171718// cbnz w24, again.1719// Note, we're actually testing x24, and relying on the default zero-high-half1720// rule in the assignment that `stlxr` does.1721let br_again_offset = sink.cur_offset();1722sink.put4(enc_conditional_br(1723BranchTarget::Label(again_label),1724CondBrKind::NotZero(x24, OperandSize::Size64),1725));1726sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);17271728// out:1729sink.bind_label(out_label, &mut state.ctrl_plane);1730}1731&Inst::LoadAcquire {1732access_ty,1733rt,1734rn,1735flags,1736} => {1737if let Some(trap_code) = flags.trap_code() {1738sink.add_trap(trap_code);1739}17401741sink.put4(enc_ldar(access_ty, rt, rn));1742}1743&Inst::StoreRelease {1744access_ty,1745rt,1746rn,1747flags,1748} => {1749if let Some(trap_code) = flags.trap_code() {1750sink.add_trap(trap_code);1751}17521753sink.put4(enc_stlr(access_ty, rt, rn));1754}1755&Inst::Fence {} => {1756sink.put4(enc_dmb_ish()); // dmb ish1757}1758&Inst::Csdb {} => {1759sink.put4(0xd503229f);1760}1761&Inst::FpuMove32 { rd, rn } => {1762sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn));1763}1764&Inst::FpuMove64 { rd, rn } => {1765sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn));1766}1767&Inst::FpuMove128 { rd, rn } => {1768sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));1769}1770&Inst::FpuMoveFromVec { rd, rn, idx, size } => {1771let (imm5, shift, mask) = match size.lane_size() {1772ScalarSize::Size32 => (0b00100, 3, 0b011),1773ScalarSize::Size64 => (0b01000, 4, 0b001),1774_ => unimplemented!(),1775};1776debug_assert_eq!(idx & mask, idx);1777let imm5 = imm5 | ((idx as u32) << shift);1778sink.put4(17790b010_11110000_00000_000001_00000_000001780| (imm5 << 16)1781| (machreg_to_vec(rn) << 5)1782| machreg_to_vec(rd.to_reg()),1783);1784}1785&Inst::FpuExtend { rd, rn, size } => {1786sink.put4(enc_fpurr(17870b000_11110_00_1_000000_10000 | (size.ftype() << 12),1788rd,1789rn,1790));1791}1792&Inst::FpuRR {1793fpu_op,1794size,1795rd,1796rn,1797} => {1798let top22 = match fpu_op {1799FPUOp1::Abs => 0b000_11110_00_1_000001_10000,1800FPUOp1::Neg => 0b000_11110_00_1_000010_10000,1801FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000,1802FPUOp1::Cvt32To64 => {1803debug_assert_eq!(size, ScalarSize::Size32);18040b000_11110_00_1_000101_100001805}1806FPUOp1::Cvt64To32 => {1807debug_assert_eq!(size, ScalarSize::Size64);18080b000_11110_01_1_000100_100001809}1810};1811let top22 = top22 | size.ftype() << 12;1812sink.put4(enc_fpurr(top22, rd, rn));1813}1814&Inst::FpuRRR {1815fpu_op,1816size,1817rd,1818rn,1819rm,1820} => {1821let top22 = match fpu_op {1822FPUOp2::Add => 0b000_11110_00_1_00000_001010,1823FPUOp2::Sub => 0b000_11110_00_1_00000_001110,1824FPUOp2::Mul => 0b000_11110_00_1_00000_000010,1825FPUOp2::Div => 0b000_11110_00_1_00000_000110,1826FPUOp2::Max => 0b000_11110_00_1_00000_010010,1827FPUOp2::Min => 0b000_11110_00_1_00000_010110,1828};1829let top22 = top22 | size.ftype() << 12;1830sink.put4(enc_fpurrr(top22, rd, rn, rm));1831}1832&Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {1833FPUOpRI::UShr32(imm) => {1834debug_assert_eq!(32, imm.lane_size_in_bits);1835sink.put4(18360b0_0_1_011110_0000000_00_0_0_0_1_00000_000001837| imm.enc() << 161838| machreg_to_vec(rn) << 51839| machreg_to_vec(rd.to_reg()),1840)1841}1842FPUOpRI::UShr64(imm) => {1843debug_assert_eq!(64, imm.lane_size_in_bits);1844sink.put4(18450b01_1_111110_0000000_00_0_0_0_1_00000_000001846| imm.enc() << 161847| machreg_to_vec(rn) << 51848| machreg_to_vec(rd.to_reg()),1849)1850}1851},1852&Inst::FpuRRIMod { fpu_op, rd, ri, rn } => {1853debug_assert_eq!(rd.to_reg(), ri);1854match fpu_op {1855FPUOpRIMod::Sli64(imm) => {1856debug_assert_eq!(64, imm.lane_size_in_bits);1857sink.put4(18580b01_1_111110_0000000_010101_00000_000001859| imm.enc() << 161860| machreg_to_vec(rn) << 51861| machreg_to_vec(rd.to_reg()),1862)1863}1864FPUOpRIMod::Sli32(imm) => {1865debug_assert_eq!(32, imm.lane_size_in_bits);1866sink.put4(18670b0_0_1_011110_0000000_010101_00000_000001868| imm.enc() << 161869| machreg_to_vec(rn) << 51870| machreg_to_vec(rd.to_reg()),1871)1872}1873}1874}1875&Inst::FpuRRRR {1876fpu_op,1877size,1878rd,1879rn,1880rm,1881ra,1882} => {1883let top17 = match fpu_op {1884FPUOp3::MAdd => 0b000_11111_00_0_00000_0,1885FPUOp3::MSub => 0b000_11111_00_0_00000_1,1886FPUOp3::NMAdd => 0b000_11111_00_1_00000_0,1887FPUOp3::NMSub => 0b000_11111_00_1_00000_1,1888};1889let top17 = top17 | size.ftype() << 7;1890sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));1891}1892&Inst::VecMisc { op, rd, rn, size } => {1893let (q, enc_size) = size.enc_size();1894let (u, bits_12_16, size) = match op {1895VecMisc2::Not => (0b1, 0b00101, 0b00),1896VecMisc2::Neg => (0b1, 0b01011, enc_size),1897VecMisc2::Abs => (0b0, 0b01011, enc_size),1898VecMisc2::Fabs => {1899debug_assert!(1900size == VectorSize::Size32x21901|| size == VectorSize::Size32x41902|| size == VectorSize::Size64x21903);1904(0b0, 0b01111, enc_size)1905}1906VecMisc2::Fneg => {1907debug_assert!(1908size == VectorSize::Size32x21909|| size == VectorSize::Size32x41910|| size == VectorSize::Size64x21911);1912(0b1, 0b01111, enc_size)1913}1914VecMisc2::Fsqrt => {1915debug_assert!(1916size == VectorSize::Size32x21917|| size == VectorSize::Size32x41918|| size == VectorSize::Size64x21919);1920(0b1, 0b11111, enc_size)1921}1922VecMisc2::Rev16 => {1923debug_assert_eq!(size, VectorSize::Size8x16);1924(0b0, 0b00001, enc_size)1925}1926VecMisc2::Rev32 => {1927debug_assert!(size == VectorSize::Size8x16 || size == VectorSize::Size16x8);1928(0b1, 0b00000, enc_size)1929}1930VecMisc2::Rev64 => {1931debug_assert!(1932size == VectorSize::Size8x161933|| size == VectorSize::Size16x81934|| size == VectorSize::Size32x41935);1936(0b0, 0b00000, enc_size)1937}1938VecMisc2::Fcvtzs => {1939debug_assert!(1940size == VectorSize::Size32x21941|| size == VectorSize::Size32x41942|| size == VectorSize::Size64x21943);1944(0b0, 0b11011, enc_size)1945}1946VecMisc2::Fcvtzu => {1947debug_assert!(1948size == VectorSize::Size32x21949|| size == VectorSize::Size32x41950|| size == VectorSize::Size64x21951);1952(0b1, 0b11011, enc_size)1953}1954VecMisc2::Scvtf => {1955debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);1956(0b0, 0b11101, enc_size & 0b1)1957}1958VecMisc2::Ucvtf => {1959debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);1960(0b1, 0b11101, enc_size & 0b1)1961}1962VecMisc2::Frintn => {1963debug_assert!(1964size == VectorSize::Size32x21965|| size == VectorSize::Size32x41966|| size == VectorSize::Size64x21967);1968(0b0, 0b11000, enc_size & 0b01)1969}1970VecMisc2::Frintz => {1971debug_assert!(1972size == VectorSize::Size32x21973|| size == VectorSize::Size32x41974|| size == VectorSize::Size64x21975);1976(0b0, 0b11001, enc_size)1977}1978VecMisc2::Frintm => {1979debug_assert!(1980size == VectorSize::Size32x21981|| size == VectorSize::Size32x41982|| size == VectorSize::Size64x21983);1984(0b0, 0b11001, enc_size & 0b01)1985}1986VecMisc2::Frintp => {1987debug_assert!(1988size == VectorSize::Size32x21989|| size == VectorSize::Size32x41990|| size == VectorSize::Size64x21991);1992(0b0, 0b11000, enc_size)1993}1994VecMisc2::Cnt => {1995debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);1996(0b0, 0b00101, enc_size)1997}1998VecMisc2::Cmeq0 => (0b0, 0b01001, enc_size),1999VecMisc2::Cmge0 => (0b1, 0b01000, enc_size),2000VecMisc2::Cmgt0 => (0b0, 0b01000, enc_size),2001VecMisc2::Cmle0 => (0b1, 0b01001, enc_size),2002VecMisc2::Cmlt0 => (0b0, 0b01010, enc_size),2003VecMisc2::Fcmeq0 => {2004debug_assert!(2005size == VectorSize::Size32x22006|| size == VectorSize::Size32x42007|| size == VectorSize::Size64x22008);2009(0b0, 0b01101, enc_size)2010}2011VecMisc2::Fcmge0 => {2012debug_assert!(2013size == VectorSize::Size32x22014|| size == VectorSize::Size32x42015|| size == VectorSize::Size64x22016);2017(0b1, 0b01100, enc_size)2018}2019VecMisc2::Fcmgt0 => {2020debug_assert!(2021size == VectorSize::Size32x22022|| size == VectorSize::Size32x42023|| size == VectorSize::Size64x22024);2025(0b0, 0b01100, enc_size)2026}2027VecMisc2::Fcmle0 => {2028debug_assert!(2029size == VectorSize::Size32x22030|| size == VectorSize::Size32x42031|| size == VectorSize::Size64x22032);2033(0b1, 0b01101, enc_size)2034}2035VecMisc2::Fcmlt0 => {2036debug_assert!(2037size == VectorSize::Size32x22038|| size == VectorSize::Size32x42039|| size == VectorSize::Size64x22040);2041(0b0, 0b01110, enc_size)2042}2043};2044sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));2045}2046&Inst::VecLanes { op, rd, rn, size } => {2047let (q, size) = match size {2048VectorSize::Size8x8 => (0b0, 0b00),2049VectorSize::Size8x16 => (0b1, 0b00),2050VectorSize::Size16x4 => (0b0, 0b01),2051VectorSize::Size16x8 => (0b1, 0b01),2052VectorSize::Size32x4 => (0b1, 0b10),2053_ => unreachable!(),2054};2055let (u, opcode) = match op {2056VecLanesOp::Uminv => (0b1, 0b11010),2057VecLanesOp::Addv => (0b0, 0b11011),2058};2059sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));2060}2061&Inst::VecShiftImm {2062op,2063rd,2064rn,2065size,2066imm,2067} => {2068let (is_shr, mut template) = match op {2069VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),2070VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),2071VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),2072};2073if size.is_128bits() {2074template |= 0b1 << 30;2075}2076let imm = imm as u32;2077// Deal with the somewhat strange encoding scheme for, and limits on,2078// the shift amount.2079let immh_immb = match (size.lane_size(), is_shr) {2080(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {20810b_1000_000_u32 | (64 - imm)2082}2083(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {20840b_0100_000_u32 | (32 - imm)2085}2086(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {20870b_0010_000_u32 | (16 - imm)2088}2089(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {20900b_0001_000_u32 | (8 - imm)2091}2092(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,2093(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,2094(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,2095(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,2096_ => panic!(2097"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"2098),2099};2100let rn_enc = machreg_to_vec(rn);2101let rd_enc = machreg_to_vec(rd.to_reg());2102sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);2103}2104&Inst::VecShiftImmMod {2105op,2106rd,2107ri,2108rn,2109size,2110imm,2111} => {2112debug_assert_eq!(rd.to_reg(), ri);2113let (is_shr, mut template) = match op {2114VecShiftImmModOp::Sli => (false, 0b_001_011110_0000_000_010101_00000_00000_u32),2115};2116if size.is_128bits() {2117template |= 0b1 << 30;2118}2119let imm = imm as u32;2120// Deal with the somewhat strange encoding scheme for, and limits on,2121// the shift amount.2122let immh_immb = match (size.lane_size(), is_shr) {2123(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {21240b_1000_000_u32 | (64 - imm)2125}2126(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {21270b_0100_000_u32 | (32 - imm)2128}2129(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {21300b_0010_000_u32 | (16 - imm)2131}2132(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {21330b_0001_000_u32 | (8 - imm)2134}2135(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,2136(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,2137(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,2138(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,2139_ => panic!(2140"aarch64: Inst::VecShiftImmMod: emit: invalid op/size/imm {op:?}, {size:?}, {imm:?}"2141),2142};2143let rn_enc = machreg_to_vec(rn);2144let rd_enc = machreg_to_vec(rd.to_reg());2145sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);2146}2147&Inst::VecExtract { rd, rn, rm, imm4 } => {2148if imm4 < 16 {2149let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;2150let rm_enc = machreg_to_vec(rm);2151let rn_enc = machreg_to_vec(rn);2152let rd_enc = machreg_to_vec(rd.to_reg());2153sink.put4(2154template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,2155);2156} else {2157panic!("aarch64: Inst::VecExtract: emit: invalid extract index {imm4}");2158}2159}2160&Inst::VecTbl { rd, rn, rm } => {2161sink.put4(enc_tbl(/* is_extension = */ false, 0b00, rd, rn, rm));2162}2163&Inst::VecTblExt { rd, ri, rn, rm } => {2164debug_assert_eq!(rd.to_reg(), ri);2165sink.put4(enc_tbl(/* is_extension = */ true, 0b00, rd, rn, rm));2166}2167&Inst::VecTbl2 { rd, rn, rn2, rm } => {2168assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);2169sink.put4(enc_tbl(/* is_extension = */ false, 0b01, rd, rn, rm));2170}2171&Inst::VecTbl2Ext {2172rd,2173ri,2174rn,2175rn2,2176rm,2177} => {2178debug_assert_eq!(rd.to_reg(), ri);2179assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);2180sink.put4(enc_tbl(/* is_extension = */ true, 0b01, rd, rn, rm));2181}2182&Inst::FpuCmp { size, rn, rm } => {2183sink.put4(enc_fcmp(size, rn, rm));2184}2185&Inst::FpuToInt { op, rd, rn } => {2186let top16 = match op {2187// FCVTZS (32/32-bit)2188FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,2189// FCVTZU (32/32-bit)2190FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,2191// FCVTZS (32/64-bit)2192FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,2193// FCVTZU (32/64-bit)2194FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,2195// FCVTZS (64/32-bit)2196FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,2197// FCVTZU (64/32-bit)2198FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,2199// FCVTZS (64/64-bit)2200FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,2201// FCVTZU (64/64-bit)2202FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,2203};2204sink.put4(enc_fputoint(top16, rd, rn));2205}2206&Inst::IntToFpu { op, rd, rn } => {2207let top16 = match op {2208// SCVTF (32/32-bit)2209IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,2210// UCVTF (32/32-bit)2211IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,2212// SCVTF (64/32-bit)2213IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,2214// UCVTF (64/32-bit)2215IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,2216// SCVTF (32/64-bit)2217IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,2218// UCVTF (32/64-bit)2219IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,2220// SCVTF (64/64-bit)2221IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,2222// UCVTF (64/64-bit)2223IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,2224};2225sink.put4(enc_inttofpu(top16, rd, rn));2226}2227&Inst::FpuCSel16 { rd, rn, rm, cond } => {2228sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16));2229}2230&Inst::FpuCSel32 { rd, rn, rm, cond } => {2231sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));2232}2233&Inst::FpuCSel64 { rd, rn, rm, cond } => {2234sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));2235}2236&Inst::FpuRound { op, rd, rn } => {2237let top22 = match op {2238FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,2239FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,2240FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,2241FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,2242FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,2243FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,2244FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,2245FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,2246};2247sink.put4(enc_fround(top22, rd, rn));2248}2249&Inst::MovToFpu { rd, rn, size } => {2250let template = match size {2251ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000,2252ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,2253ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,2254_ => unreachable!(),2255};2256sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));2257}2258&Inst::FpuMoveFPImm { rd, imm, size } => {2259sink.put4(22600b000_11110_00_1_00_000_000100_00000_000002261| size.ftype() << 222262| ((imm.enc_bits() as u32) << 13)2263| machreg_to_vec(rd.to_reg()),2264);2265}2266&Inst::MovToVec {2267rd,2268ri,2269rn,2270idx,2271size,2272} => {2273debug_assert_eq!(rd.to_reg(), ri);2274let (imm5, shift) = match size.lane_size() {2275ScalarSize::Size8 => (0b00001, 1),2276ScalarSize::Size16 => (0b00010, 2),2277ScalarSize::Size32 => (0b00100, 3),2278ScalarSize::Size64 => (0b01000, 4),2279_ => unreachable!(),2280};2281debug_assert_eq!(idx & (0b11111 >> shift), idx);2282let imm5 = imm5 | ((idx as u32) << shift);2283sink.put4(22840b010_01110000_00000_0_0011_1_00000_000002285| (imm5 << 16)2286| (machreg_to_gpr(rn) << 5)2287| machreg_to_vec(rd.to_reg()),2288);2289}2290&Inst::MovFromVec { rd, rn, idx, size } => {2291let (q, imm5, shift, mask) = match size {2292ScalarSize::Size8 => (0b0, 0b00001, 1, 0b1111),2293ScalarSize::Size16 => (0b0, 0b00010, 2, 0b0111),2294ScalarSize::Size32 => (0b0, 0b00100, 3, 0b0011),2295ScalarSize::Size64 => (0b1, 0b01000, 4, 0b0001),2296_ => panic!("Unexpected scalar FP operand size: {size:?}"),2297};2298debug_assert_eq!(idx & mask, idx);2299let imm5 = imm5 | ((idx as u32) << shift);2300sink.put4(23010b000_01110000_00000_0_0111_1_00000_000002302| (q << 30)2303| (imm5 << 16)2304| (machreg_to_vec(rn) << 5)2305| machreg_to_gpr(rd.to_reg()),2306);2307}2308&Inst::MovFromVecSigned {2309rd,2310rn,2311idx,2312size,2313scalar_size,2314} => {2315let (imm5, shift, half) = match size {2316VectorSize::Size8x8 => (0b00001, 1, true),2317VectorSize::Size8x16 => (0b00001, 1, false),2318VectorSize::Size16x4 => (0b00010, 2, true),2319VectorSize::Size16x8 => (0b00010, 2, false),2320VectorSize::Size32x2 => {2321debug_assert_ne!(scalar_size, OperandSize::Size32);2322(0b00100, 3, true)2323}2324VectorSize::Size32x4 => {2325debug_assert_ne!(scalar_size, OperandSize::Size32);2326(0b00100, 3, false)2327}2328_ => panic!("Unexpected vector operand size"),2329};2330debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);2331let imm5 = imm5 | ((idx as u32) << shift);2332sink.put4(23330b000_01110000_00000_0_0101_1_00000_000002334| (scalar_size.is64() as u32) << 302335| (imm5 << 16)2336| (machreg_to_vec(rn) << 5)2337| machreg_to_gpr(rd.to_reg()),2338);2339}2340&Inst::VecDup { rd, rn, size } => {2341let q = size.is_128bits() as u32;2342let imm5 = match size.lane_size() {2343ScalarSize::Size8 => 0b00001,2344ScalarSize::Size16 => 0b00010,2345ScalarSize::Size32 => 0b00100,2346ScalarSize::Size64 => 0b01000,2347_ => unreachable!(),2348};2349sink.put4(23500b0_0_0_01110000_00000_000011_00000_000002351| (q << 30)2352| (imm5 << 16)2353| (machreg_to_gpr(rn) << 5)2354| machreg_to_vec(rd.to_reg()),2355);2356}2357&Inst::VecDupFromFpu { rd, rn, size, lane } => {2358let q = size.is_128bits() as u32;2359let imm5 = match size.lane_size() {2360ScalarSize::Size8 => {2361assert!(lane < 16);23620b00001 | (u32::from(lane) << 1)2363}2364ScalarSize::Size16 => {2365assert!(lane < 8);23660b00010 | (u32::from(lane) << 2)2367}2368ScalarSize::Size32 => {2369assert!(lane < 4);23700b00100 | (u32::from(lane) << 3)2371}2372ScalarSize::Size64 => {2373assert!(lane < 2);23740b01000 | (u32::from(lane) << 4)2375}2376_ => unimplemented!(),2377};2378sink.put4(23790b000_01110000_00000_000001_00000_000002380| (q << 30)2381| (imm5 << 16)2382| (machreg_to_vec(rn) << 5)2383| machreg_to_vec(rd.to_reg()),2384);2385}2386&Inst::VecDupFPImm { rd, imm, size } => {2387let imm = imm.enc_bits();2388let op = match size.lane_size() {2389ScalarSize::Size32 => 0,2390ScalarSize::Size64 => 1,2391_ => unimplemented!(),2392};2393let q_op = op | ((size.is_128bits() as u32) << 1);23942395sink.put4(enc_asimd_mod_imm(rd, q_op, 0b1111, imm));2396}2397&Inst::VecDupImm {2398rd,2399imm,2400invert,2401size,2402} => {2403let (imm, shift, shift_ones) = imm.value();2404let (op, cmode) = match size.lane_size() {2405ScalarSize::Size8 => {2406assert!(!invert);2407assert_eq!(shift, 0);24082409(0, 0b1110)2410}2411ScalarSize::Size16 => {2412let s = shift & 8;24132414assert!(!shift_ones);2415assert_eq!(s, shift);24162417(invert as u32, 0b1000 | (s >> 2))2418}2419ScalarSize::Size32 => {2420if shift_ones {2421assert!(shift == 8 || shift == 16);24222423(invert as u32, 0b1100 | (shift >> 4))2424} else {2425let s = shift & 24;24262427assert_eq!(s, shift);24282429(invert as u32, 0b0000 | (s >> 2))2430}2431}2432ScalarSize::Size64 => {2433assert!(!invert);2434assert_eq!(shift, 0);24352436(1, 0b1110)2437}2438_ => unreachable!(),2439};2440let q_op = op | ((size.is_128bits() as u32) << 1);24412442sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));2443}2444&Inst::VecExtend {2445t,2446rd,2447rn,2448high_half,2449lane_size,2450} => {2451let immh = match lane_size {2452ScalarSize::Size16 => 0b001,2453ScalarSize::Size32 => 0b010,2454ScalarSize::Size64 => 0b100,2455_ => panic!("Unexpected VecExtend to lane size of {lane_size:?}"),2456};2457let u = match t {2458VecExtendOp::Sxtl => 0b0,2459VecExtendOp::Uxtl => 0b1,2460};2461sink.put4(24620b000_011110_0000_000_101001_00000_000002463| ((high_half as u32) << 30)2464| (u << 29)2465| (immh << 19)2466| (machreg_to_vec(rn) << 5)2467| machreg_to_vec(rd.to_reg()),2468);2469}2470&Inst::VecRRLong {2471op,2472rd,2473rn,2474high_half,2475} => {2476let (u, size, bits_12_16) = match op {2477VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111),2478VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111),2479VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011),2480VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011),2481VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011),2482};24832484sink.put4(enc_vec_rr_misc(2485((high_half as u32) << 1) | u,2486size,2487bits_12_16,2488rd,2489rn,2490));2491}2492&Inst::VecRRNarrowLow {2493op,2494rd,2495rn,2496lane_size,2497}2498| &Inst::VecRRNarrowHigh {2499op,2500rd,2501rn,2502lane_size,2503..2504} => {2505let high_half = match self {2506&Inst::VecRRNarrowLow { .. } => false,2507&Inst::VecRRNarrowHigh { .. } => true,2508_ => unreachable!(),2509};25102511let size = match lane_size {2512ScalarSize::Size8 => 0b00,2513ScalarSize::Size16 => 0b01,2514ScalarSize::Size32 => 0b10,2515_ => panic!("unsupported size: {lane_size:?}"),2516};25172518// Floats use a single bit, to encode either half or single.2519let size = match op {2520VecRRNarrowOp::Fcvtn => size >> 1,2521_ => size,2522};25232524let (u, bits_12_16) = match op {2525VecRRNarrowOp::Xtn => (0b0, 0b10010),2526VecRRNarrowOp::Sqxtn => (0b0, 0b10100),2527VecRRNarrowOp::Sqxtun => (0b1, 0b10010),2528VecRRNarrowOp::Uqxtn => (0b1, 0b10100),2529VecRRNarrowOp::Fcvtn => (0b0, 0b10110),2530};25312532sink.put4(enc_vec_rr_misc(2533((high_half as u32) << 1) | u,2534size,2535bits_12_16,2536rd,2537rn,2538));2539}2540&Inst::VecMovElement {2541rd,2542ri,2543rn,2544dest_idx,2545src_idx,2546size,2547} => {2548debug_assert_eq!(rd.to_reg(), ri);2549let (imm5, shift) = match size.lane_size() {2550ScalarSize::Size8 => (0b00001, 1),2551ScalarSize::Size16 => (0b00010, 2),2552ScalarSize::Size32 => (0b00100, 3),2553ScalarSize::Size64 => (0b01000, 4),2554_ => unreachable!(),2555};2556let mask = 0b11111 >> shift;2557debug_assert_eq!(dest_idx & mask, dest_idx);2558debug_assert_eq!(src_idx & mask, src_idx);2559let imm4 = (src_idx as u32) << (shift - 1);2560let imm5 = imm5 | ((dest_idx as u32) << shift);2561sink.put4(25620b011_01110000_00000_0_0000_1_00000_000002563| (imm5 << 16)2564| (imm4 << 11)2565| (machreg_to_vec(rn) << 5)2566| machreg_to_vec(rd.to_reg()),2567);2568}2569&Inst::VecRRPair { op, rd, rn } => {2570let bits_12_16 = match op {2571VecPairOp::Addp => 0b11011,2572};25732574sink.put4(enc_vec_rr_pair(bits_12_16, rd, rn));2575}2576&Inst::VecRRRLong {2577rd,2578rn,2579rm,2580alu_op,2581high_half,2582} => {2583let (u, size, bit14) = match alu_op {2584VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1),2585VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1),2586VecRRRLongOp::Smull32 => (0b0, 0b10, 0b1),2587VecRRRLongOp::Umull8 => (0b1, 0b00, 0b1),2588VecRRRLongOp::Umull16 => (0b1, 0b01, 0b1),2589VecRRRLongOp::Umull32 => (0b1, 0b10, 0b1),2590};2591sink.put4(enc_vec_rrr_long(2592high_half as u32,2593u,2594size,2595bit14,2596rm,2597rn,2598rd,2599));2600}2601&Inst::VecRRRLongMod {2602rd,2603ri,2604rn,2605rm,2606alu_op,2607high_half,2608} => {2609debug_assert_eq!(rd.to_reg(), ri);2610let (u, size, bit14) = match alu_op {2611VecRRRLongModOp::Umlal8 => (0b1, 0b00, 0b0),2612VecRRRLongModOp::Umlal16 => (0b1, 0b01, 0b0),2613VecRRRLongModOp::Umlal32 => (0b1, 0b10, 0b0),2614};2615sink.put4(enc_vec_rrr_long(2616high_half as u32,2617u,2618size,2619bit14,2620rm,2621rn,2622rd,2623));2624}2625&Inst::VecRRPairLong { op, rd, rn } => {2626let (u, size) = match op {2627VecRRPairLongOp::Saddlp8 => (0b0, 0b0),2628VecRRPairLongOp::Uaddlp8 => (0b1, 0b0),2629VecRRPairLongOp::Saddlp16 => (0b0, 0b1),2630VecRRPairLongOp::Uaddlp16 => (0b1, 0b1),2631};26322633sink.put4(enc_vec_rr_pair_long(u, size, rd, rn));2634}2635&Inst::VecRRR {2636rd,2637rn,2638rm,2639alu_op,2640size,2641} => {2642let (q, enc_size) = size.enc_size();2643let is_float = match alu_op {2644VecALUOp::Fcmeq2645| VecALUOp::Fcmgt2646| VecALUOp::Fcmge2647| VecALUOp::Fadd2648| VecALUOp::Fsub2649| VecALUOp::Fdiv2650| VecALUOp::Fmax2651| VecALUOp::Fmin2652| VecALUOp::Fmul => true,2653_ => false,2654};26552656let (top11, bit15_10) = match alu_op {2657VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),2658VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),2659VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),2660VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),2661VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),2662VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),2663VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),2664VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),2665VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),2666VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),2667VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),2668VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),2669// The following logical instructions operate on bytes, so are not encoded differently2670// for the different vector types.2671VecALUOp::And => (0b000_01110_00_1, 0b000111),2672VecALUOp::Bic => (0b000_01110_01_1, 0b000111),2673VecALUOp::Orr => (0b000_01110_10_1, 0b000111),2674VecALUOp::Eor => (0b001_01110_00_1, 0b000111),2675VecALUOp::Umaxp => {2676debug_assert_ne!(size, VectorSize::Size64x2);26772678(0b001_01110_00_1 | enc_size << 1, 0b101001)2679}2680VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),2681VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),2682VecALUOp::Mul => {2683debug_assert_ne!(size, VectorSize::Size64x2);2684(0b000_01110_00_1 | enc_size << 1, 0b100111)2685}2686VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),2687VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),2688VecALUOp::Umin => {2689debug_assert_ne!(size, VectorSize::Size64x2);26902691(0b001_01110_00_1 | enc_size << 1, 0b011011)2692}2693VecALUOp::Smin => {2694debug_assert_ne!(size, VectorSize::Size64x2);26952696(0b000_01110_00_1 | enc_size << 1, 0b011011)2697}2698VecALUOp::Umax => {2699debug_assert_ne!(size, VectorSize::Size64x2);27002701(0b001_01110_00_1 | enc_size << 1, 0b011001)2702}2703VecALUOp::Smax => {2704debug_assert_ne!(size, VectorSize::Size64x2);27052706(0b000_01110_00_1 | enc_size << 1, 0b011001)2707}2708VecALUOp::Urhadd => {2709debug_assert_ne!(size, VectorSize::Size64x2);27102711(0b001_01110_00_1 | enc_size << 1, 0b000101)2712}2713VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),2714VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),2715VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),2716VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),2717VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),2718VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),2719VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),2720VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),2721VecALUOp::Zip2 => (0b01001110_00_0 | enc_size << 1, 0b011110),2722VecALUOp::Sqrdmulh => {2723debug_assert!(2724size.lane_size() == ScalarSize::Size162725|| size.lane_size() == ScalarSize::Size322726);27272728(0b001_01110_00_1 | enc_size << 1, 0b101101)2729}2730VecALUOp::Uzp1 => (0b01001110_00_0 | enc_size << 1, 0b000110),2731VecALUOp::Uzp2 => (0b01001110_00_0 | enc_size << 1, 0b010110),2732VecALUOp::Trn1 => (0b01001110_00_0 | enc_size << 1, 0b001010),2733VecALUOp::Trn2 => (0b01001110_00_0 | enc_size << 1, 0b011010),2734};2735let top11 = if is_float {2736top11 | size.enc_float_size() << 12737} else {2738top112739};2740sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));2741}2742&Inst::VecRRRMod {2743rd,2744ri,2745rn,2746rm,2747alu_op,2748size,2749} => {2750debug_assert_eq!(rd.to_reg(), ri);2751let (q, _enc_size) = size.enc_size();27522753let (top11, bit15_10) = match alu_op {2754VecALUModOp::Bsl => (0b001_01110_01_1, 0b000111),2755VecALUModOp::Fmla => {2756(0b000_01110_00_1 | (size.enc_float_size() << 1), 0b110011)2757}2758VecALUModOp::Fmls => {2759(0b000_01110_10_1 | (size.enc_float_size() << 1), 0b110011)2760}2761};2762sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));2763}2764&Inst::VecFmlaElem {2765rd,2766ri,2767rn,2768rm,2769alu_op,2770size,2771idx,2772} => {2773debug_assert_eq!(rd.to_reg(), ri);2774let idx = u32::from(idx);27752776let (q, _size) = size.enc_size();2777let o2 = match alu_op {2778VecALUModOp::Fmla => 0b0,2779VecALUModOp::Fmls => 0b1,2780_ => unreachable!(),2781};27822783let (h, l) = match size {2784VectorSize::Size32x4 => {2785assert!(idx < 4);2786(idx >> 1, idx & 1)2787}2788VectorSize::Size64x2 => {2789assert!(idx < 2);2790(idx, 0)2791}2792_ => unreachable!(),2793};27942795let top11 = 0b000_011111_00 | (q << 9) | (size.enc_float_size() << 1) | l;2796let bit15_10 = 0b000100 | (o2 << 4) | (h << 1);2797sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));2798}2799&Inst::VecLoadReplicate {2800rd,2801rn,2802size,2803flags,2804} => {2805let (q, size) = size.enc_size();28062807if let Some(trap_code) = flags.trap_code() {2808// Register the offset at which the actual load instruction starts.2809sink.add_trap(trap_code);2810}28112812sink.put4(enc_ldst_vec(q, size, rn, rd));2813}2814&Inst::VecCSel { rd, rn, rm, cond } => {2815/* Emit this:2816b.cond else2817mov rd, rm2818b out2819else:2820mov rd, rn2821out:28222823Note, we could do better in the cases where rd == rn or rd == rm.2824*/2825let else_label = sink.get_label();2826let out_label = sink.get_label();28272828// b.cond else2829let br_else_offset = sink.cur_offset();2830sink.put4(enc_conditional_br(2831BranchTarget::Label(else_label),2832CondBrKind::Cond(cond),2833));2834sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);28352836// mov rd, rm2837sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));28382839// b out2840let b_out_offset = sink.cur_offset();2841sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);2842sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);2843sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));28442845// else:2846sink.bind_label(else_label, &mut state.ctrl_plane);28472848// mov rd, rn2849sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));28502851// out:2852sink.bind_label(out_label, &mut state.ctrl_plane);2853}2854&Inst::MovToNZCV { rn } => {2855sink.put4(0xd51b4200 | machreg_to_gpr(rn));2856}2857&Inst::MovFromNZCV { rd } => {2858sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));2859}2860&Inst::Extend {2861rd,2862rn,2863signed: false,2864from_bits: 1,2865to_bits,2866} => {2867assert!(to_bits <= 64);2868// Reduce zero-extend-from-1-bit to:2869// - and rd, rn, #12870// Note: This is special cased as UBFX may take more cycles2871// than AND on smaller cores.2872let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();2873Inst::AluRRImmLogic {2874alu_op: ALUOp::And,2875size: OperandSize::Size32,2876rd,2877rn,2878imml,2879}2880.emit(sink, emit_info, state);2881}2882&Inst::Extend {2883rd,2884rn,2885signed: false,2886from_bits: 32,2887to_bits: 64,2888} => {2889let mov = Inst::Mov {2890size: OperandSize::Size32,2891rd,2892rm: rn,2893};2894mov.emit(sink, emit_info, state);2895}2896&Inst::Extend {2897rd,2898rn,2899signed,2900from_bits,2901to_bits,2902} => {2903let (opc, size) = if signed {2904(0b00, OperandSize::from_bits(to_bits))2905} else {2906(0b10, OperandSize::Size32)2907};2908sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));2909}2910&Inst::Jump { ref dest } => {2911let off = sink.cur_offset();2912// Indicate that the jump uses a label, if so, so that a fixup can occur later.2913if let Some(l) = dest.as_label() {2914sink.use_label_at_offset(off, l, LabelUse::Branch26);2915sink.add_uncond_branch(off, off + 4, l);2916}2917// Emit the jump itself.2918sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));2919}2920&Inst::Args { .. } | &Inst::Rets { .. } => {2921// Nothing: this is a pseudoinstruction that serves2922// only to constrain registers at a certain point.2923}2924&Inst::Ret {} => {2925sink.put4(0xd65f03c0);2926}2927&Inst::AuthenticatedRet { key, is_hint } => {2928let (op2, is_hint) = match key {2929APIKey::AZ => (0b100, true),2930APIKey::ASP => (0b101, is_hint),2931APIKey::BZ => (0b110, true),2932APIKey::BSP => (0b111, is_hint),2933};29342935if is_hint {2936sink.put4(key.enc_auti_hint());2937Inst::Ret {}.emit(sink, emit_info, state);2938} else {2939sink.put4(0xd65f0bff | (op2 << 9)); // reta{key}2940}2941}2942&Inst::Call { ref info } => {2943let user_stack_map = state.take_stack_map();2944sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);2945sink.put4(enc_jump26(0b100101, 0));2946if let Some(s) = user_stack_map {2947let offset = sink.cur_offset();2948sink.push_user_stack_map(state, offset, s);2949}29502951if let Some(try_call) = info.try_call_info.as_ref() {2952sink.add_try_call_site(2953Some(state.frame_layout.sp_to_fp()),2954try_call.exception_handlers(&state.frame_layout),2955);2956} else {2957sink.add_call_site();2958}29592960if info.callee_pop_size > 0 {2961let callee_pop_size =2962i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");2963for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {2964inst.emit(sink, emit_info, state);2965}2966}29672968// Load any stack-carried return values.2969info.emit_retval_loads::<AArch64MachineDeps, _, _>(2970state.frame_layout().stackslots_size,2971|inst| inst.emit(sink, emit_info, state),2972|needed_space| Some(Inst::EmitIsland { needed_space }),2973);29742975// If this is a try-call, jump to the continuation2976// (normal-return) block.2977if let Some(try_call) = info.try_call_info.as_ref() {2978let jmp = Inst::Jump {2979dest: BranchTarget::Label(try_call.continuation),2980};2981jmp.emit(sink, emit_info, state);2982}29832984// We produce an island above if needed, so disable2985// the worst-case-size check in this case.2986start_off = sink.cur_offset();2987}2988&Inst::CallInd { ref info } => {2989let user_stack_map = state.take_stack_map();2990sink.put4(29910b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.dest) << 5),2992);2993if let Some(s) = user_stack_map {2994let offset = sink.cur_offset();2995sink.push_user_stack_map(state, offset, s);2996}29972998if let Some(try_call) = info.try_call_info.as_ref() {2999sink.add_try_call_site(3000Some(state.frame_layout.sp_to_fp()),3001try_call.exception_handlers(&state.frame_layout),3002);3003} else {3004sink.add_call_site();3005}30063007if info.callee_pop_size > 0 {3008let callee_pop_size =3009i32::try_from(info.callee_pop_size).expect("callee popped more than 2GB");3010for inst in AArch64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {3011inst.emit(sink, emit_info, state);3012}3013}30143015// Load any stack-carried return values.3016info.emit_retval_loads::<AArch64MachineDeps, _, _>(3017state.frame_layout().stackslots_size,3018|inst| inst.emit(sink, emit_info, state),3019|needed_space| Some(Inst::EmitIsland { needed_space }),3020);30213022// If this is a try-call, jump to the continuation3023// (normal-return) block.3024if let Some(try_call) = info.try_call_info.as_ref() {3025let jmp = Inst::Jump {3026dest: BranchTarget::Label(try_call.continuation),3027};3028jmp.emit(sink, emit_info, state);3029}30303031// We produce an island above if needed, so disable3032// the worst-case-size check in this case.3033start_off = sink.cur_offset();3034}3035&Inst::ReturnCall { ref info } => {3036emit_return_call_common_sequence(sink, emit_info, state, info);30373038// Note: this is not `Inst::Jump { .. }.emit(..)` because we3039// have different metadata in this case: we don't have a label3040// for the target, but rather a function relocation.3041sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);3042sink.put4(enc_jump26(0b000101, 0));3043sink.add_call_site();30443045// `emit_return_call_common_sequence` emits an island if3046// necessary, so we can safely disable the worst-case-size check3047// in this case.3048start_off = sink.cur_offset();3049}3050&Inst::ReturnCallInd { ref info } => {3051emit_return_call_common_sequence(sink, emit_info, state, info);30523053Inst::IndirectBr {3054rn: info.dest,3055targets: vec![],3056}3057.emit(sink, emit_info, state);3058sink.add_call_site();30593060// `emit_return_call_common_sequence` emits an island if3061// necessary, so we can safely disable the worst-case-size check3062// in this case.3063start_off = sink.cur_offset();3064}3065&Inst::CondBr {3066taken,3067not_taken,3068kind,3069} => {3070// Conditional part first.3071let cond_off = sink.cur_offset();3072if let Some(l) = taken.as_label() {3073sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);3074let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();3075sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);3076}3077sink.put4(enc_conditional_br(taken, kind));30783079// Unconditional part next.3080let uncond_off = sink.cur_offset();3081if let Some(l) = not_taken.as_label() {3082sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);3083sink.add_uncond_branch(uncond_off, uncond_off + 4, l);3084}3085sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));3086}3087&Inst::TestBitAndBranch {3088taken,3089not_taken,3090kind,3091rn,3092bit,3093} => {3094// Emit the conditional branch first3095let cond_off = sink.cur_offset();3096if let Some(l) = taken.as_label() {3097sink.use_label_at_offset(cond_off, l, LabelUse::Branch14);3098let inverted =3099enc_test_bit_and_branch(kind.complement(), taken, rn, bit).to_le_bytes();3100sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);3101}3102sink.put4(enc_test_bit_and_branch(kind, taken, rn, bit));31033104// Unconditional part next.3105let uncond_off = sink.cur_offset();3106if let Some(l) = not_taken.as_label() {3107sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);3108sink.add_uncond_branch(uncond_off, uncond_off + 4, l);3109}3110sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));3111}3112&Inst::TrapIf { kind, trap_code } => {3113let label = sink.defer_trap(trap_code);3114// condbr KIND, LABEL3115let off = sink.cur_offset();3116sink.put4(enc_conditional_br(BranchTarget::Label(label), kind));3117sink.use_label_at_offset(off, label, LabelUse::Branch19);3118}3119&Inst::IndirectBr { rn, .. } => {3120sink.put4(enc_br(rn));3121}3122&Inst::Nop0 => {}3123&Inst::Nop4 => {3124sink.put4(0xd503201f);3125}3126&Inst::Brk => {3127sink.put4(0xd43e0000);3128}3129&Inst::Udf { trap_code } => {3130sink.add_trap(trap_code);3131sink.put_data(Inst::TRAP_OPCODE);3132}3133&Inst::Adr { rd, off } => {3134assert!(off > -(1 << 20));3135assert!(off < (1 << 20));3136sink.put4(enc_adr(off, rd));3137}3138&Inst::Adrp { rd, off } => {3139assert!(off > -(1 << 20));3140assert!(off < (1 << 20));3141sink.put4(enc_adrp(off, rd));3142}3143&Inst::Word4 { data } => {3144sink.put4(data);3145}3146&Inst::Word8 { data } => {3147sink.put8(data);3148}3149&Inst::JTSequence {3150ridx,3151rtmp1,3152rtmp2,3153default,3154ref targets,3155..3156} => {3157// This sequence is *one* instruction in the vcode, and is expanded only here at3158// emission time, because we cannot allow the regalloc to insert spills/reloads in3159// the middle; we depend on hardcoded PC-rel addressing below.31603161// Branch to default when condition code from prior comparison indicates.3162let br =3163enc_conditional_br(BranchTarget::Label(default), CondBrKind::Cond(Cond::Hs));31643165// No need to inform the sink's branch folding logic about this branch, because it3166// will not be merged with any other branch, flipped, or elided (it is not preceded3167// or succeeded by any other branch). Just emit it with the label use.3168let default_br_offset = sink.cur_offset();3169sink.use_label_at_offset(default_br_offset, default, LabelUse::Branch19);3170sink.put4(br);31713172// Overwrite the index with a zero when the above3173// branch misspeculates (Spectre mitigation). Save the3174// resulting index in rtmp2.3175let inst = Inst::CSel {3176rd: rtmp2,3177cond: Cond::Hs,3178rn: zero_reg(),3179rm: ridx,3180};3181inst.emit(sink, emit_info, state);3182// Prevent any data value speculation.3183Inst::Csdb.emit(sink, emit_info, state);31843185// Load address of jump table3186let inst = Inst::Adr { rd: rtmp1, off: 16 };3187inst.emit(sink, emit_info, state);3188// Load value out of jump table3189let inst = Inst::SLoad32 {3190rd: rtmp2,3191mem: AMode::reg_plus_reg_scaled_extended(3192rtmp1.to_reg(),3193rtmp2.to_reg(),3194ExtendOp::UXTW,3195),3196flags: MemFlags::trusted(),3197};3198inst.emit(sink, emit_info, state);3199// Add base of jump table to jump-table-sourced block offset3200let inst = Inst::AluRRR {3201alu_op: ALUOp::Add,3202size: OperandSize::Size64,3203rd: rtmp1,3204rn: rtmp1.to_reg(),3205rm: rtmp2.to_reg(),3206};3207inst.emit(sink, emit_info, state);3208// Branch to computed address. (`targets` here is only used for successor queries3209// and is not needed for emission.)3210let inst = Inst::IndirectBr {3211rn: rtmp1.to_reg(),3212targets: vec![],3213};3214inst.emit(sink, emit_info, state);3215// Emit jump table (table of 32-bit offsets).3216let jt_off = sink.cur_offset();3217for &target in targets.iter() {3218let word_off = sink.cur_offset();3219// off_into_table is an addend here embedded in the label to be later patched3220// at the end of codegen. The offset is initially relative to this jump table3221// entry; with the extra addend, it'll be relative to the jump table's start,3222// after patching.3223let off_into_table = word_off - jt_off;3224sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);3225sink.put4(off_into_table);3226}32273228// Lowering produces an EmitIsland before using a JTSequence, so we can safely3229// disable the worst-case-size check in this case.3230start_off = sink.cur_offset();3231}3232&Inst::LoadExtNameGot { rd, ref name } => {3233// See this CE Example for the variations of this with and without BTI & PAUTH3234// https://godbolt.org/z/ncqjbbvvn3235//3236// Emit the following code:3237// adrp rd, :got:X3238// ldr rd, [rd, :got_lo12:X]32393240// adrp rd, symbol3241sink.add_reloc(Reloc::Aarch64AdrGotPage21, &**name, 0);3242let inst = Inst::Adrp { rd, off: 0 };3243inst.emit(sink, emit_info, state);32443245// ldr rd, [rd, :got_lo12:X]3246sink.add_reloc(Reloc::Aarch64Ld64GotLo12Nc, &**name, 0);3247let inst = Inst::ULoad64 {3248rd,3249mem: AMode::reg(rd.to_reg()),3250flags: MemFlags::trusted(),3251};3252inst.emit(sink, emit_info, state);3253}3254&Inst::LoadExtNameNear {3255rd,3256ref name,3257offset,3258} => {3259// Emit the following code:3260// adrp rd, X3261// add rd, rd, :lo12:X3262//3263// See https://godbolt.org/z/855KEvM5r for an example.32643265// adrp rd, symbol3266sink.add_reloc(Reloc::Aarch64AdrPrelPgHi21, &**name, offset);3267let inst = Inst::Adrp { rd, off: 0 };3268inst.emit(sink, emit_info, state);32693270// add rd, rd, :lo12:X3271sink.add_reloc(Reloc::Aarch64AddAbsLo12Nc, &**name, offset);3272let inst = Inst::AluRRImm12 {3273alu_op: ALUOp::Add,3274size: OperandSize::Size64,3275rd,3276rn: rd.to_reg(),3277imm12: Imm12::ZERO,3278};3279inst.emit(sink, emit_info, state);3280}3281&Inst::LoadExtNameFar {3282rd,3283ref name,3284offset,3285} => {3286// With absolute offsets we set up a load from a preallocated space, and then jump3287// over it.3288//3289// Emit the following code:3290// ldr rd, #83291// b #0x103292// <8 byte space>32933294let inst = Inst::ULoad64 {3295rd,3296mem: AMode::Label {3297label: MemLabel::PCRel(8),3298},3299flags: MemFlags::trusted(),3300};3301inst.emit(sink, emit_info, state);3302let inst = Inst::Jump {3303dest: BranchTarget::ResolvedOffset(12),3304};3305inst.emit(sink, emit_info, state);3306sink.add_reloc(Reloc::Abs8, &**name, offset);3307sink.put8(0);3308}3309&Inst::LoadAddr { rd, ref mem } => {3310let mem = mem.clone();3311let (mem_insts, mem) = mem_finalize(Some(sink), &mem, I8, state);3312for inst in mem_insts.into_iter() {3313inst.emit(sink, emit_info, state);3314}33153316let (reg, index_reg, offset) = match mem {3317AMode::RegExtended { rn, rm, extendop } => {3318let r = rn;3319(r, Some((rm, extendop)), 0)3320}3321AMode::Unscaled { rn, simm9 } => {3322let r = rn;3323(r, None, simm9.value())3324}3325AMode::UnsignedOffset { rn, uimm12 } => {3326let r = rn;3327(r, None, uimm12.value() as i32)3328}3329_ => panic!("Unsupported case for LoadAddr: {mem:?}"),3330};3331let abs_offset = if offset < 0 {3332-offset as u643333} else {3334offset as u643335};3336let alu_op = if offset < 0 { ALUOp::Sub } else { ALUOp::Add };33373338if let Some((idx, extendop)) = index_reg {3339let add = Inst::AluRRRExtend {3340alu_op: ALUOp::Add,3341size: OperandSize::Size64,3342rd,3343rn: reg,3344rm: idx,3345extendop,3346};33473348add.emit(sink, emit_info, state);3349} else if offset == 0 {3350if reg != rd.to_reg() {3351let mov = Inst::Mov {3352size: OperandSize::Size64,3353rd,3354rm: reg,3355};33563357mov.emit(sink, emit_info, state);3358}3359} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {3360let add = Inst::AluRRImm12 {3361alu_op,3362size: OperandSize::Size64,3363rd,3364rn: reg,3365imm12,3366};3367add.emit(sink, emit_info, state);3368} else {3369// Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction3370// was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note3371// that no other instructions will be inserted here (we're emitting directly),3372// and a live range of `tmp2` should not span this instruction, so this use3373// should otherwise be correct.3374debug_assert!(rd.to_reg() != tmp2_reg());3375debug_assert!(reg != tmp2_reg());3376let tmp = writable_tmp2_reg();3377for insn in Inst::load_constant(tmp, abs_offset).into_iter() {3378insn.emit(sink, emit_info, state);3379}3380let add = Inst::AluRRR {3381alu_op,3382size: OperandSize::Size64,3383rd,3384rn: reg,3385rm: tmp.to_reg(),3386};3387add.emit(sink, emit_info, state);3388}3389}3390&Inst::Paci { key } => {3391let (crm, op2) = match key {3392APIKey::AZ => (0b0011, 0b000),3393APIKey::ASP => (0b0011, 0b001),3394APIKey::BZ => (0b0011, 0b010),3395APIKey::BSP => (0b0011, 0b011),3396};33973398sink.put4(0xd503211f | (crm << 8) | (op2 << 5));3399}3400&Inst::Xpaclri => sink.put4(0xd50320ff),3401&Inst::Bti { targets } => {3402let targets = match targets {3403BranchTargetType::None => 0b00,3404BranchTargetType::C => 0b01,3405BranchTargetType::J => 0b10,3406BranchTargetType::JC => 0b11,3407};34083409sink.put4(0xd503241f | targets << 6);3410}3411&Inst::EmitIsland { needed_space } => {3412if sink.island_needed(needed_space + 4) {3413let jump_around_label = sink.get_label();3414let jmp = Inst::Jump {3415dest: BranchTarget::Label(jump_around_label),3416};3417jmp.emit(sink, emit_info, state);3418sink.emit_island(needed_space + 4, &mut state.ctrl_plane);3419sink.bind_label(jump_around_label, &mut state.ctrl_plane);3420}3421}34223423&Inst::ElfTlsGetAddr {3424ref symbol,3425rd,3426tmp,3427} => {3428assert_eq!(xreg(0), rd.to_reg());34293430// See the original proposal for TLSDESC.3431// http://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf3432//3433// Implement the TLSDESC instruction sequence:3434// adrp x0, :tlsdesc:tlsvar3435// ldr tmp, [x0, :tlsdesc_lo12:tlsvar]3436// add x0, x0, :tlsdesc_lo12:tlsvar3437// blr tmp3438// mrs tmp, tpidr_el03439// add x0, x0, tmp3440//3441// This is the instruction sequence that GCC emits for ELF GD TLS Relocations in aarch643442// See: https://gcc.godbolt.org/z/e4j7MdErh34433444// adrp x0, :tlsdesc:tlsvar3445sink.add_reloc(Reloc::Aarch64TlsDescAdrPage21, &**symbol, 0);3446Inst::Adrp { rd, off: 0 }.emit(sink, emit_info, state);34473448// ldr tmp, [x0, :tlsdesc_lo12:tlsvar]3449sink.add_reloc(Reloc::Aarch64TlsDescLd64Lo12, &**symbol, 0);3450Inst::ULoad64 {3451rd: tmp,3452mem: AMode::reg(rd.to_reg()),3453flags: MemFlags::trusted(),3454}3455.emit(sink, emit_info, state);34563457// add x0, x0, :tlsdesc_lo12:tlsvar3458sink.add_reloc(Reloc::Aarch64TlsDescAddLo12, &**symbol, 0);3459Inst::AluRRImm12 {3460alu_op: ALUOp::Add,3461size: OperandSize::Size64,3462rd,3463rn: rd.to_reg(),3464imm12: Imm12::maybe_from_u64(0).unwrap(),3465}3466.emit(sink, emit_info, state);34673468// blr tmp3469sink.add_reloc(Reloc::Aarch64TlsDescCall, &**symbol, 0);3470Inst::CallInd {3471info: crate::isa::Box::new(CallInfo::empty(tmp.to_reg(), CallConv::SystemV)),3472}3473.emit(sink, emit_info, state);34743475// mrs tmp, tpidr_el03476sink.put4(0xd53bd040 | machreg_to_gpr(tmp.to_reg()));34773478// add x0, x0, tmp3479Inst::AluRRR {3480alu_op: ALUOp::Add,3481size: OperandSize::Size64,3482rd,3483rn: rd.to_reg(),3484rm: tmp.to_reg(),3485}3486.emit(sink, emit_info, state);3487}34883489&Inst::MachOTlsGetAddr { ref symbol, rd } => {3490// Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer3491// to a function that takes the descriptor address in x0, and after the function returns x03492// contains the address for the thread local variable3493//3494// what we want to emit is basically:3495//3496// adrp x0, <label>@TLVPPAGE ; Load the address of the page of the thread local variable pointer (TLVP)3497// ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x03498// ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)3499// blr x1 ; Call the function pointer with the descriptor address in x03500// ; x0 now contains the TLV address35013502assert_eq!(xreg(0), rd.to_reg());3503let rtmp = writable_xreg(1);35043505// adrp x0, <label>@TLVPPAGE3506sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);3507sink.put4(0x90000000);35083509// ldr x0, [x0, <label>@TLVPPAGEOFF]3510sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);3511sink.put4(0xf9400000);35123513// load [x0] into temp register3514Inst::ULoad64 {3515rd: rtmp,3516mem: AMode::reg(rd.to_reg()),3517flags: MemFlags::trusted(),3518}3519.emit(sink, emit_info, state);35203521// call function pointer in temp register3522Inst::CallInd {3523info: crate::isa::Box::new(CallInfo::empty(3524rtmp.to_reg(),3525CallConv::AppleAarch64,3526)),3527}3528.emit(sink, emit_info, state);3529}35303531&Inst::Unwind { ref inst } => {3532sink.add_unwind(inst.clone());3533}35343535&Inst::DummyUse { .. } => {}35363537&Inst::LabelAddress { dst, label } => {3538// We emit an ADR only, which is +/- 2MiB range. This3539// should be sufficient for the typical use-case of3540// this instruction, which is insmall trampolines to3541// get exception-handler addresses.3542let inst = Inst::Adr { rd: dst, off: 0 };3543let offset = sink.cur_offset();3544inst.emit(sink, emit_info, state);3545sink.use_label_at_offset(offset, label, LabelUse::Adr21);3546}35473548&Inst::StackProbeLoop { start, end, step } => {3549assert!(emit_info.0.enable_probestack());35503551// The loop generated here uses `start` as a counter register to3552// count backwards until negating it exceeds `end`. In other3553// words `start` is an offset from `sp` we're testing where3554// `end` is the max size we need to test. The loop looks like:3555//3556// loop_start:3557// sub start, start, #step3558// stur xzr, [sp, start]3559// cmn start, end3560// br.gt loop_start3561// loop_end:3562//3563// Note that this loop cannot use the spilltmp and tmp23564// registers as those are currently used as the input to this3565// loop when generating the instruction. This means that some3566// more flavorful address modes and lowerings need to be3567// avoided.3568//3569// Perhaps someone more clever than I can figure out how to use3570// `subs` or the like and skip the `cmn`, but I can't figure it3571// out at this time.35723573let loop_start = sink.get_label();3574sink.bind_label(loop_start, &mut state.ctrl_plane);35753576Inst::AluRRImm12 {3577alu_op: ALUOp::Sub,3578size: OperandSize::Size64,3579rd: start,3580rn: start.to_reg(),3581imm12: step,3582}3583.emit(sink, emit_info, state);3584Inst::Store32 {3585rd: regs::zero_reg(),3586mem: AMode::RegReg {3587rn: regs::stack_reg(),3588rm: start.to_reg(),3589},3590flags: MemFlags::trusted(),3591}3592.emit(sink, emit_info, state);3593Inst::AluRRR {3594alu_op: ALUOp::AddS,3595size: OperandSize::Size64,3596rd: regs::writable_zero_reg(),3597rn: start.to_reg(),3598rm: end,3599}3600.emit(sink, emit_info, state);36013602let loop_end = sink.get_label();3603Inst::CondBr {3604taken: BranchTarget::Label(loop_start),3605not_taken: BranchTarget::Label(loop_end),3606kind: CondBrKind::Cond(Cond::Gt),3607}3608.emit(sink, emit_info, state);3609sink.bind_label(loop_end, &mut state.ctrl_plane);3610}3611}36123613let end_off = sink.cur_offset();3614debug_assert!(3615(end_off - start_off) <= Inst::worst_case_size()3616|| matches!(self, Inst::EmitIsland { .. }),3617"Worst case size exceed for {:?}: {}",3618self,3619end_off - start_off3620);36213622state.clear_post_insn();3623}36243625fn pretty_print_inst(&self, state: &mut Self::State) -> String {3626self.print_with_state(state)3627}3628}36293630fn emit_return_call_common_sequence<T>(3631sink: &mut MachBuffer<Inst>,3632emit_info: &EmitInfo,3633state: &mut EmitState,3634info: &ReturnCallInfo<T>,3635) {3636for inst in3637AArch64MachineDeps::gen_clobber_restore(CallConv::Tail, &emit_info.0, state.frame_layout())3638{3639inst.emit(sink, emit_info, state);3640}36413642let setup_area_size = state.frame_layout().setup_area_size;3643if setup_area_size > 0 {3644// N.B.: sp is already adjusted to the appropriate place by the3645// clobber-restore code (which also frees the fixed frame). Hence, there3646// is no need for the usual `mov sp, fp` here.36473648// `ldp fp, lr, [sp], #16`3649Inst::LoadP64 {3650rt: writable_fp_reg(),3651rt2: writable_link_reg(),3652mem: PairAMode::SPPostIndexed {3653// TODO: we could fold the increment for incoming_args_diff here, as long as that3654// value is less than 502*8, by adding it to `setup_area_size`.3655// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDP--Load-Pair-of-Registers-3656simm7: SImm7Scaled::maybe_from_i64(i64::from(setup_area_size), types::I64).unwrap(),3657},3658flags: MemFlags::trusted(),3659}3660.emit(sink, emit_info, state);3661}36623663// Adjust SP to account for the possible over-allocation in the prologue.3664let incoming_args_diff = state.frame_layout().tail_args_size - info.new_stack_arg_size;3665if incoming_args_diff > 0 {3666for inst in3667AArch64MachineDeps::gen_sp_reg_adjust(i32::try_from(incoming_args_diff).unwrap())3668{3669inst.emit(sink, emit_info, state);3670}3671}36723673if let Some(key) = info.key {3674sink.put4(key.enc_auti_hint());3675}3676}367736783679