Path: blob/main/winch/codegen/src/isa/aarch64/asm.rs
3070 views
//! Assembler library implementation for Aarch64.1use super::{address::Address, regs};2use crate::CallingConvention;3use crate::aarch64::regs::zero;4use crate::masm::{5DivKind, Extend, ExtendKind, FloatCmpKind, Imm, IntCmpKind, RemKind, RoundingMode, ShiftKind,6Signed, TRUSTED_FLAGS, TruncKind,7};8use crate::{9constant_pool::ConstantPool,10masm::OperandSize,11reg::{Reg, WritableReg, writable},12};1314use cranelift_codegen::PatchRegion;15use cranelift_codegen::isa::aarch64::inst::emit::{enc_arith_rrr, enc_move_wide, enc_movk};16use cranelift_codegen::isa::aarch64::inst::{17ASIMDFPModImm, FpuToIntOp, MoveWideConst, NZCV, UImm5,18};19use cranelift_codegen::{20Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel,21Writable,22ir::{ExternalName, MemFlags, SourceLoc, TrapCode, UserExternalNameRef},23isa::aarch64::inst::{24self, ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp,25FPULeftShiftImm, FPUOp1, FPUOp2,26FPUOpRI::{self, UShr32, UShr64},27FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp,28PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize,29emit::{EmitInfo, EmitState},30},31settings,32};33use regalloc2::RegClass;34use wasmtime_core::math::{f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};3536impl From<OperandSize> for inst::OperandSize {37fn from(size: OperandSize) -> Self {38match size {39OperandSize::S32 => Self::Size32,40OperandSize::S64 => Self::Size64,41s => panic!("Invalid operand size {s:?}"),42}43}44}4546impl From<IntCmpKind> for Cond {47fn from(value: IntCmpKind) -> Self {48match value {49IntCmpKind::Eq => Cond::Eq,50IntCmpKind::Ne => Cond::Ne,51IntCmpKind::LtS => Cond::Lt,52IntCmpKind::LtU => Cond::Lo,53IntCmpKind::GtS => Cond::Gt,54IntCmpKind::GtU => Cond::Hi,55IntCmpKind::LeS => Cond::Le,56IntCmpKind::LeU => Cond::Ls,57IntCmpKind::GeS => Cond::Ge,58IntCmpKind::GeU => Cond::Hs,59}60}61}6263impl From<FloatCmpKind> for Cond {64fn from(value: FloatCmpKind) -> Self {65match value {66FloatCmpKind::Eq => Cond::Eq,67FloatCmpKind::Ne => Cond::Ne,68FloatCmpKind::Lt => Cond::Mi,69FloatCmpKind::Gt => Cond::Gt,70FloatCmpKind::Le => Cond::Ls,71FloatCmpKind::Ge => Cond::Ge,72}73}74}7576impl From<OperandSize> for ScalarSize {77fn from(size: OperandSize) -> ScalarSize {78match size {79OperandSize::S8 => ScalarSize::Size8,80OperandSize::S16 => ScalarSize::Size16,81OperandSize::S32 => ScalarSize::Size32,82OperandSize::S64 => ScalarSize::Size64,83OperandSize::S128 => ScalarSize::Size128,84}85}86}8788impl From<ShiftKind> for ALUOp {89fn from(kind: ShiftKind) -> Self {90match kind {91ShiftKind::Shl => ALUOp::Lsl,92ShiftKind::ShrS => ALUOp::Asr,93ShiftKind::ShrU => ALUOp::Lsr,94ShiftKind::Rotr => ALUOp::Extr,95ShiftKind::Rotl => ALUOp::Extr,96}97}98}99100/// Low level assembler implementation for Aarch64.101pub(crate) struct Assembler {102/// The machine instruction buffer.103buffer: MachBuffer<Inst>,104/// Constant emission information.105emit_info: EmitInfo,106/// Emission state.107emit_state: EmitState,108/// Constant pool.109pool: ConstantPool,110}111112impl Assembler {113/// Create a new Aarch64 assembler.114pub fn new(shared_flags: settings::Flags) -> Self {115Self {116buffer: MachBuffer::<Inst>::new(),117emit_state: Default::default(),118emit_info: EmitInfo::new(shared_flags),119pool: ConstantPool::new(),120}121}122}123124impl Assembler {125/// Return the emitted code.126pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {127let stencil = self128.buffer129.finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());130stencil.apply_base_srcloc(loc.unwrap_or_default())131}132133fn emit(&mut self, inst: Inst) {134self.emit_with_island(inst, Inst::worst_case_size());135}136137fn emit_with_island(&mut self, inst: Inst, needed_space: u32) {138if self.buffer.island_needed(needed_space) {139let label = self.buffer.get_label();140let jmp = Inst::Jump {141dest: BranchTarget::Label(label),142};143jmp.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);144self.buffer145.emit_island(needed_space, self.emit_state.ctrl_plane_mut());146self.buffer147.bind_label(label, self.emit_state.ctrl_plane_mut());148}149inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);150}151152/// Adds a constant to the constant pool, returning its address.153pub fn add_constant(&mut self, constant: &[u8]) -> Address {154let handle = self.pool.register(constant, &mut self.buffer);155Address::constant(handle)156}157158/// Store a pair of registers.159pub fn stp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {160let mem: PairAMode = addr.try_into().unwrap();161self.emit(Inst::StoreP64 {162rt: xt1.into(),163rt2: xt2.into(),164mem,165flags: MemFlags::trusted(),166});167}168169/// Store a register.170pub fn str(&mut self, reg: Reg, addr: Address, size: OperandSize, flags: MemFlags) {171let mem: AMode = addr.try_into().unwrap();172173use OperandSize::*;174let inst = match (reg.is_int(), size) {175(_, S8) => Inst::Store8 {176rd: reg.into(),177mem,178flags,179},180(_, S16) => Inst::Store16 {181rd: reg.into(),182mem,183flags,184},185(true, S32) => Inst::Store32 {186rd: reg.into(),187mem,188flags,189},190(false, S32) => Inst::FpuStore32 {191rd: reg.into(),192mem,193flags,194},195(true, S64) => Inst::Store64 {196rd: reg.into(),197mem,198flags,199},200(false, S64) => Inst::FpuStore64 {201rd: reg.into(),202mem,203flags,204},205(_, S128) => Inst::FpuStore128 {206rd: reg.into(),207mem,208flags,209},210};211212self.emit(inst);213}214215/// Load a signed register.216pub fn sload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {217self.ldr(addr, rd, size, true, flags);218}219220/// Load an unsigned register.221pub fn uload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {222self.ldr(addr, rd, size, false, flags);223}224225/// Load address into a register.226fn ldr(227&mut self,228addr: Address,229rd: WritableReg,230size: OperandSize,231signed: bool,232flags: MemFlags,233) {234use OperandSize::*;235let writable_reg = rd.map(Into::into);236let mem: AMode = addr.try_into().unwrap();237238let inst = match (rd.to_reg().is_int(), signed, size) {239(_, false, S8) => Inst::ULoad8 {240rd: writable_reg,241mem,242flags,243},244(_, true, S8) => Inst::SLoad8 {245rd: writable_reg,246mem,247flags,248},249(_, false, S16) => Inst::ULoad16 {250rd: writable_reg,251mem,252flags,253},254(_, true, S16) => Inst::SLoad16 {255rd: writable_reg,256mem,257flags,258},259(true, false, S32) => Inst::ULoad32 {260rd: writable_reg,261mem,262flags,263},264(false, _, S32) => Inst::FpuLoad32 {265rd: writable_reg,266mem,267flags,268},269(true, true, S32) => Inst::SLoad32 {270rd: writable_reg,271mem,272flags,273},274(true, _, S64) => Inst::ULoad64 {275rd: writable_reg,276mem,277flags,278},279(false, _, S64) => Inst::FpuLoad64 {280rd: writable_reg,281mem,282flags,283},284(_, _, S128) => Inst::FpuLoad128 {285rd: writable_reg,286mem,287flags,288},289};290291self.emit(inst);292}293294/// Load a pair of registers.295pub fn ldp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {296let writable_xt1 = Writable::from_reg(xt1.into());297let writable_xt2 = Writable::from_reg(xt2.into());298let mem = addr.try_into().unwrap();299300self.emit(Inst::LoadP64 {301rt: writable_xt1,302rt2: writable_xt2,303mem,304flags: MemFlags::trusted(),305});306}307308/// Emit a series of instructions to move an arbitrary 64-bit immediate309/// into the destination register.310/// The emitted instructions will depend on the destination register class.311pub fn mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize) {312match rd.to_reg().class() {313RegClass::Int => {314Inst::load_constant(rd.map(Into::into), imm.unwrap_as_u64())315.into_iter()316.for_each(|i| self.emit(i));317}318RegClass::Float => {319match ASIMDFPModImm::maybe_from_u64(imm.unwrap_as_u64(), size.into()) {320Some(imm) => {321self.emit(Inst::FpuMoveFPImm {322rd: rd.map(Into::into),323imm,324size: size.into(),325});326}327_ => {328let addr = self.add_constant(&imm.to_bytes());329self.uload(addr, rd, size, TRUSTED_FLAGS);330}331}332}333_ => unreachable!(),334}335}336337/// Register to register move.338pub fn mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize) {339let writable_rd = rd.map(Into::into);340self.emit(Inst::Mov {341size: size.into(),342rd: writable_rd,343rm: rm.into(),344});345}346347/// Floating point register to register move.348pub fn fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {349let writable = rd.map(Into::into);350let inst = match size {351OperandSize::S32 => Inst::FpuMove32 {352rd: writable,353rn: rn.into(),354},355OperandSize::S64 => Inst::FpuMove64 {356rd: writable,357rn: rn.into(),358},359_ => unreachable!(),360};361362self.emit(inst);363}364365pub fn mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {366let writable_rd = rd.map(Into::into);367self.emit(Inst::MovToFpu {368size: size.into(),369rd: writable_rd,370rn: rn.into(),371});372}373374pub fn mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize) {375self.emit(Inst::MovFromVec {376rd: rd.map(Into::into),377rn: rn.into(),378idx,379size: size.into(),380});381}382383/// Add immediate and register.384pub fn add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {385self.alu_rri(ALUOp::Add, imm, rn, rd, size);386}387388/// Add immediate and register, setting overflow flags.389pub fn adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {390self.alu_rri(ALUOp::AddS, imm, rn, rd, size);391}392393/// Add with three registers.394pub fn add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {395self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size);396}397398/// Add with three registers, setting overflow flags.399pub fn adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {400self.alu_rrr_extend(ALUOp::AddS, rm, rn, rd, size);401}402403/// Add across Vector.404pub fn addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize) {405self.emit(Inst::VecLanes {406op: VecLanesOp::Addv,407rd: rd.map(Into::into),408rn: rn.into(),409size,410});411}412413/// Subtract immediate and register.414pub fn sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {415self.alu_rri(ALUOp::Sub, imm, rn, rd, size);416}417418/// Subtract immediate and register, setting flags.419pub fn subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize) {420self.alu_rri(ALUOp::SubS, imm, rn, writable!(regs::zero()), size);421}422423/// Subtract with three registers.424pub fn sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {425self.alu_rrr_extend(ALUOp::Sub, rm, rn, rd, size);426}427428/// Subtract with three registers, setting flags.429pub fn subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize) {430self.alu_rrr_extend(ALUOp::SubS, rm, rn, writable!(regs::zero()), size);431}432433/// Multiply with three registers.434pub fn mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {435self.alu_rrrr(ALUOp3::MAdd, rm, rn, rd, regs::zero(), size);436}437438/// Signed/unsigned division with three registers.439pub fn div_rrr(440&mut self,441divisor: Reg,442dividend: Reg,443dest: Writable<Reg>,444kind: DivKind,445size: OperandSize,446) {447// Check for division by 0.448self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);449450// check for overflow451if kind == DivKind::Signed {452// Check for divisor overflow.453self.alu_rri(454ALUOp::AddS,455Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"),456divisor,457writable!(zero()),458size,459);460461// Check if the dividend is 1.462self.emit(Inst::CCmpImm {463size: size.into(),464rn: dividend.into(),465imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"),466nzcv: NZCV::new(false, false, false, false),467cond: Cond::Eq,468});469470// Finally, trap if the previous operation overflowed.471self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW);472}473474// `cranelift-codegen` doesn't support emitting sdiv for anything but I64,475// we therefore sign-extend the operand.476// see: https://github.com/bytecodealliance/wasmtime/issues/9766477let size = if size == OperandSize::S32 && kind == DivKind::Signed {478self.extend(479divisor,480writable!(divisor),481ExtendKind::Signed(Extend::<Signed>::I64Extend32),482);483self.extend(484dividend,485writable!(dividend),486ExtendKind::Signed(Extend::<Signed>::I64Extend32),487);488OperandSize::S64489} else {490size491};492493let op = match kind {494DivKind::Signed => ALUOp::SDiv,495DivKind::Unsigned => ALUOp::UDiv,496};497498self.alu_rrr(op, divisor, dividend, dest.map(Into::into), size);499}500501/// Signed/unsigned remainder operation with three registers.502pub fn rem_rrr(503&mut self,504divisor: Reg,505dividend: Reg,506dest: Writable<Reg>,507scratch: WritableReg,508kind: RemKind,509size: OperandSize,510) {511// Check for division by 0512self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);513514// `cranelift-codegen` doesn't support emitting sdiv for anything but I64,515// we therefore sign-extend the operand.516// see: https://github.com/bytecodealliance/wasmtime/issues/9766517let size = if size == OperandSize::S32 && kind.is_signed() {518self.extend(519divisor,520writable!(divisor),521ExtendKind::Signed(Extend::<Signed>::I64Extend32),522);523self.extend(524dividend,525writable!(dividend),526ExtendKind::Signed(Extend::<Signed>::I64Extend32),527);528OperandSize::S64529} else {530size531};532533let op = match kind {534RemKind::Signed => ALUOp::SDiv,535RemKind::Unsigned => ALUOp::UDiv,536};537538self.alu_rrr(op, divisor, dividend, scratch, size);539540self.alu_rrrr(541ALUOp3::MSub,542scratch.to_reg(),543divisor,544dest.map(Into::into),545dividend,546size,547);548}549550/// And with three registers.551pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {552self.alu_rrr(ALUOp::And, rm, rn, rd, size);553}554555/// And immediate and register.556pub fn and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {557self.alu_rri_logic(ALUOp::And, imm, rn, rd, size);558}559560/// Or with three registers.561pub fn or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {562self.alu_rrr(ALUOp::Orr, rm, rn, rd, size);563}564565/// Or immediate and register.566pub fn or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {567self.alu_rri_logic(ALUOp::Orr, imm, rn, rd, size);568}569570/// Xor with three registers.571pub fn xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {572self.alu_rrr(ALUOp::Eor, rm, rn, rd, size);573}574575/// Xor immediate and register.576pub fn xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {577self.alu_rri_logic(ALUOp::Eor, imm, rn, rd, size);578}579580/// Shift with three registers.581pub fn shift_rrr(582&mut self,583rm: Reg,584rn: Reg,585rd: WritableReg,586kind: ShiftKind,587size: OperandSize,588) {589let shift_op: ALUOp = kind.into();590// In the case of rotate left, we negate the register containing the591// shift value.592if kind == ShiftKind::Rotl {593self.alu_rrr(ALUOp::Sub, rm, regs::zero(), writable!(rm), size);594self.alu_rrr(shift_op, rm, rn, rd, size);595} else {596self.alu_rrr(shift_op, rm, rn, rd, size);597}598}599600/// Shift immediate and register.601pub fn shift_ir(602&mut self,603imm: ImmShift,604rn: Reg,605rd: WritableReg,606kind: ShiftKind,607size: OperandSize,608) {609let shift_op: ALUOp = kind.into();610// In the case of rotate left, we emit rotate right with type_size -611// value.612if kind == ShiftKind::Rotl {613let value_size = size.num_bits();614let mut imm_val = value_size.wrapping_sub(imm.value());615imm_val &= value_size - 1;616let negated_imm = ImmShift::maybe_from_u64(imm_val as u64).unwrap();617618self.alu_rri_shift(shift_op, negated_imm, rn, rd, size);619} else {620self.alu_rri_shift(shift_op, imm, rn, rd, size);621}622}623624/// Count Leading Zeros.625pub fn clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {626self.bit_rr(BitOp::Clz, rn, rd, size);627}628629/// Reverse Bits reverses the bit order in a register.630pub fn rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {631self.bit_rr(BitOp::RBit, rn, rd, size);632}633634/// Float add with three registers.635pub fn fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {636self.fpu_rrr(FPUOp2::Add, rm, rn, rd, size);637}638639/// Float sub with three registers.640pub fn fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {641self.fpu_rrr(FPUOp2::Sub, rm, rn, rd, size);642}643644/// Float multiply with three registers.645pub fn fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {646self.fpu_rrr(FPUOp2::Mul, rm, rn, rd, size);647}648649/// Float division with three registers.650pub fn fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {651self.fpu_rrr(FPUOp2::Div, rm, rn, rd, size);652}653654/// Float max with three registers.655pub fn fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {656self.fpu_rrr(FPUOp2::Max, rm, rn, rd, size);657}658659/// Float min with three registers.660pub fn fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {661self.fpu_rrr(FPUOp2::Min, rm, rn, rd, size);662}663664/// Float neg with two registers.665pub fn fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {666self.fpu_rr(FPUOp1::Neg, rn, rd, size);667}668669/// Float abs with two registers.670pub fn fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {671self.fpu_rr(FPUOp1::Abs, rn, rd, size);672}673674/// Float sqrt with two registers.675pub fn fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {676self.fpu_rr(FPUOp1::Sqrt, rn, rd, size);677}678679/// Float round (ceil, trunc, floor) with two registers.680pub fn fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize) {681let fpu_mode = match (mode, size) {682(RoundingMode::Nearest, OperandSize::S32) => FpuRoundMode::Nearest32,683(RoundingMode::Up, OperandSize::S32) => FpuRoundMode::Plus32,684(RoundingMode::Down, OperandSize::S32) => FpuRoundMode::Minus32,685(RoundingMode::Zero, OperandSize::S32) => FpuRoundMode::Zero32,686(RoundingMode::Nearest, OperandSize::S64) => FpuRoundMode::Nearest64,687(RoundingMode::Up, OperandSize::S64) => FpuRoundMode::Plus64,688(RoundingMode::Down, OperandSize::S64) => FpuRoundMode::Minus64,689(RoundingMode::Zero, OperandSize::S64) => FpuRoundMode::Zero64,690(m, o) => panic!("Invalid rounding mode or operand size {m:?}, {o:?}"),691};692self.fpu_round(fpu_mode, rn, rd)693}694695/// Float unsigned shift right with two registers and an immediate.696pub fn fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize) {697let imm = FPURightShiftImm {698amount,699lane_size_in_bits: size.num_bits(),700};701let ushr = match size {702OperandSize::S32 => UShr32(imm),703OperandSize::S64 => UShr64(imm),704_ => unreachable!(),705};706self.fpu_rri(ushr, rn, rd)707}708709/// Float unsigned shift left and insert with three registers710/// and an immediate.711pub fn fsli_rri_mod(712&mut self,713ri: Reg,714rn: Reg,715rd: WritableReg,716amount: u8,717size: OperandSize,718) {719let imm = FPULeftShiftImm {720amount,721lane_size_in_bits: size.num_bits(),722};723let sli = match size {724OperandSize::S32 => FPUOpRIMod::Sli32(imm),725OperandSize::S64 => FPUOpRIMod::Sli64(imm),726_ => unreachable!(),727};728self.fpu_rri_mod(sli, ri, rn, rd)729}730731/// Float compare.732pub fn fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize) {733self.emit(Inst::FpuCmp {734size: size.into(),735rn: rn.into(),736rm: rm.into(),737})738}739740/// Convert an signed integer to a float.741pub fn cvt_sint_to_float(742&mut self,743rn: Reg,744rd: WritableReg,745src_size: OperandSize,746dst_size: OperandSize,747) {748let op = match (src_size, dst_size) {749(OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32,750(OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32,751(OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64,752(OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64,753_ => unreachable!(),754};755756self.emit(Inst::IntToFpu {757op,758rd: rd.map(Into::into),759rn: rn.into(),760});761}762763/// Convert an unsigned integer to a float.764pub fn cvt_uint_to_float(765&mut self,766rn: Reg,767rd: WritableReg,768src_size: OperandSize,769dst_size: OperandSize,770) {771let op = match (src_size, dst_size) {772(OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32,773(OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32,774(OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64,775(OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64,776_ => unreachable!(),777};778779self.emit(Inst::IntToFpu {780op,781rd: rd.map(Into::into),782rn: rn.into(),783});784}785786/// Change precision of float.787pub fn cvt_float_to_float(788&mut self,789rn: Reg,790rd: WritableReg,791src_size: OperandSize,792dst_size: OperandSize,793) {794let (fpu_op, size) = match (src_size, dst_size) {795(OperandSize::S32, OperandSize::S64) => (FPUOp1::Cvt32To64, ScalarSize::Size32),796(OperandSize::S64, OperandSize::S32) => (FPUOp1::Cvt64To32, ScalarSize::Size64),797_ => unimplemented!(),798};799self.emit(Inst::FpuRR {800fpu_op,801size,802rd: rd.map(Into::into),803rn: rn.into(),804});805}806807/// Return instruction.808pub fn ret(&mut self) {809self.emit(Inst::Ret {});810}811812/// An unconditional branch.813pub fn jmp(&mut self, target: MachLabel) {814self.emit(Inst::Jump {815dest: BranchTarget::Label(target),816});817}818819/// A conditional branch.820pub fn jmp_if(&mut self, kind: Cond, taken: MachLabel) {821self.emit(Inst::CondBr {822taken: BranchTarget::Label(taken),823not_taken: BranchTarget::ResolvedOffset(4),824kind: CondBrKind::Cond(kind),825});826}827828/// Emits a jump table sequence.829pub fn jmp_table(830&mut self,831targets: &[MachLabel],832default: MachLabel,833index: Reg,834tmp1: Reg,835tmp2: Reg,836) {837self.emit_with_island(838Inst::JTSequence {839default,840targets: Box::new(targets.to_vec()),841ridx: index.into(),842rtmp1: Writable::from_reg(tmp1.into()),843rtmp2: Writable::from_reg(tmp2.into()),844},845// number of bytes needed for the jumptable sequence:846// 4 bytes per instruction, with 8 instructions base + the size of847// the jumptable more.848(4 * (8 + targets.len())).try_into().unwrap(),849);850}851852/// Conditional Set sets the destination register to 1 if the condition853/// is true, and otherwise sets it to 0.854pub fn cset(&mut self, rd: WritableReg, cond: Cond) {855self.emit(Inst::CSet {856rd: rd.map(Into::into),857cond,858});859}860861/// If the condition is true, `csel` writes rn to rd. If the862/// condition is false, it writes rm to rd863pub fn csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond) {864self.emit(Inst::CSel {865rd: rd.map(Into::into),866rn: rn.into(),867rm: rm.into(),868cond,869});870}871872/// If the condition is true, `csel` writes rn to rd. If the873/// condition is false, it writes rm to rd874pub fn fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize) {875match size {876OperandSize::S32 => {877self.emit(Inst::FpuCSel32 {878rd: rd.map(Into::into),879rn: rn.into(),880rm: rm.into(),881cond,882});883}884OperandSize::S64 => {885self.emit(Inst::FpuCSel64 {886rd: rd.map(Into::into),887rn: rn.into(),888rm: rm.into(),889cond,890});891}892_ => todo!(),893}894}895896/// Population count per byte.897pub fn cnt(&mut self, rd: WritableReg) {898self.emit(Inst::VecMisc {899op: VecMisc2::Cnt,900rd: rd.map(Into::into),901rn: rd.to_reg().into(),902size: VectorSize::Size8x8,903});904}905906pub fn extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind) {907self.emit(Inst::Extend {908rd: rd.map(Into::into),909rn: rn.into(),910signed: kind.signed(),911from_bits: kind.from_bits(),912to_bits: kind.to_bits(),913})914}915916/// Bitwise AND (shifted register), setting flags.917pub fn ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize) {918self.alu_rrr(ALUOp::AndS, rm, rn, writable!(regs::zero()), size);919}920921/// Permanently Undefined.922pub fn udf(&mut self, code: TrapCode) {923self.emit(Inst::Udf { trap_code: code });924}925926/// Conditional trap.927pub fn trapif(&mut self, cc: Cond, code: TrapCode) {928self.emit(Inst::TrapIf {929kind: CondBrKind::Cond(cc),930trap_code: code,931});932}933934/// Trap if `rn` is zero.935pub fn trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize) {936self.emit(Inst::TrapIf {937kind: CondBrKind::Zero(rn.into(), size.into()),938trap_code: code,939});940}941942// Helpers for ALU operations.943944fn alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {945self.emit(Inst::AluRRImm12 {946alu_op: op,947size: size.into(),948rd: rd.map(Into::into),949rn: rn.into(),950imm12: imm,951});952}953954fn alu_rri_logic(955&mut self,956op: ALUOp,957imm: ImmLogic,958rn: Reg,959rd: WritableReg,960size: OperandSize,961) {962self.emit(Inst::AluRRImmLogic {963alu_op: op,964size: size.into(),965rd: rd.map(Into::into),966rn: rn.into(),967imml: imm,968});969}970971fn alu_rri_shift(972&mut self,973op: ALUOp,974imm: ImmShift,975rn: Reg,976rd: WritableReg,977size: OperandSize,978) {979self.emit(Inst::AluRRImmShift {980alu_op: op,981size: size.into(),982rd: rd.map(Into::into),983rn: rn.into(),984immshift: imm,985});986}987988fn alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {989self.emit(Inst::AluRRR {990alu_op: op,991size: size.into(),992rd: rd.map(Into::into),993rn: rn.into(),994rm: rm.into(),995});996}997998fn alu_rrr_extend(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {999self.emit(Inst::AluRRRExtend {1000alu_op: op,1001size: size.into(),1002rd: rd.map(Into::into),1003rn: rn.into(),1004rm: rm.into(),1005extendop: ExtendOp::UXTX,1006});1007}10081009fn alu_rrrr(1010&mut self,1011op: ALUOp3,1012rm: Reg,1013rn: Reg,1014rd: WritableReg,1015ra: Reg,1016size: OperandSize,1017) {1018self.emit(Inst::AluRRRR {1019alu_op: op,1020size: size.into(),1021rd: rd.map(Into::into),1022rn: rn.into(),1023rm: rm.into(),1024ra: ra.into(),1025});1026}10271028fn fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {1029self.emit(Inst::FpuRRR {1030fpu_op: op,1031size: size.into(),1032rd: rd.map(Into::into),1033rn: rn.into(),1034rm: rm.into(),1035});1036}10371038fn fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg) {1039self.emit(Inst::FpuRRI {1040fpu_op: op,1041rd: rd.map(Into::into),1042rn: rn.into(),1043});1044}10451046fn fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg) {1047self.emit(Inst::FpuRRIMod {1048fpu_op: op,1049rd: rd.map(Into::into),1050ri: ri.into(),1051rn: rn.into(),1052});1053}10541055fn fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize) {1056self.emit(Inst::FpuRR {1057fpu_op: op,1058size: size.into(),1059rd: rd.map(Into::into),1060rn: rn.into(),1061});1062}10631064fn fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg) {1065self.emit(Inst::FpuRound {1066op,1067rd: rd.map(Into::into),1068rn: rn.into(),1069});1070}10711072fn bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize) {1073self.emit(Inst::BitRR {1074op,1075size: size.into(),1076rd: rd.map(Into::into),1077rn: rn.into(),1078});1079}10801081/// Get a label from the underlying machine code buffer.1082pub fn get_label(&mut self) -> MachLabel {1083self.buffer.get_label()1084}10851086/// Get a mutable reference to underlying1087/// machine buffer.1088pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {1089&mut self.buffer1090}10911092/// Get a reference to the underlying machine buffer.1093pub fn buffer(&self) -> &MachBuffer<Inst> {1094&self.buffer1095}10961097/// Emit a direct call to a function defined locally and1098/// referenced to by `name`.1099pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) {1100self.emit(Inst::Call {1101info: Box::new(cranelift_codegen::CallInfo::empty(1102ExternalName::user(name),1103call_conv.into(),1104)),1105})1106}11071108/// Emit an indirect call to a function whose address is1109/// stored the `callee` register.1110pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) {1111self.emit(Inst::CallInd {1112info: Box::new(cranelift_codegen::CallInfo::empty(1113callee.into(),1114call_conv.into(),1115)),1116})1117}11181119/// Load the min value for an integer of size out_size, as a floating-point1120/// of size `in-size`, into register `rd`.1121fn min_fp_value(1122&mut self,1123signed: bool,1124in_size: OperandSize,1125out_size: OperandSize,1126rd: Writable<Reg>,1127) {1128match in_size {1129OperandSize::S32 => {1130let (min, _) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());1131self.mov_ir(rd, Imm::f32(min.to_bits()), in_size);1132}1133OperandSize::S64 => {1134let (min, _) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());1135self.mov_ir(rd, Imm::f64(min.to_bits()), in_size);1136}1137s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),1138};1139}11401141/// Load the max value for an integer of size out_size, as a floating-point1142/// of size `in_size`, into register `rd`.1143fn max_fp_value(1144&mut self,1145signed: bool,1146in_size: OperandSize,1147out_size: OperandSize,1148rd: Writable<Reg>,1149) {1150match in_size {1151OperandSize::S32 => {1152let (_, max) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());1153self.mov_ir(rd, Imm::f32(max.to_bits()), in_size);1154}1155OperandSize::S64 => {1156let (_, max) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());1157self.mov_ir(rd, Imm::f64(max.to_bits()), in_size);1158}1159s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),1160};1161}11621163/// Emit instructions to check if the value in `rn` is NaN.1164fn check_nan(&mut self, rn: Reg, size: OperandSize) {1165self.fcmp(rn, rn, size);1166self.trapif(Cond::Vs, TrapCode::BAD_CONVERSION_TO_INTEGER);1167}11681169/// Convert the floating point of size `src_size` stored in `src`, into a integer of size1170/// `dst_size`, storing the result in `dst`.1171pub fn fpu_to_int(1172&mut self,1173dst: Writable<Reg>,1174src: Reg,1175tmp_reg: WritableReg,1176src_size: OperandSize,1177dst_size: OperandSize,1178kind: TruncKind,1179signed: bool,1180) {1181if kind.is_unchecked() {1182// Confusingly, when `kind` is `Unchecked` is when we actually need to perform the checks:1183// - check if fp is NaN1184// - check bounds1185self.check_nan(src, src_size);11861187self.min_fp_value(signed, src_size, dst_size, tmp_reg);1188self.fcmp(src, tmp_reg.to_reg(), src_size);1189self.trapif(Cond::Le, TrapCode::INTEGER_OVERFLOW);11901191self.max_fp_value(signed, src_size, dst_size, tmp_reg);1192self.fcmp(src, tmp_reg.to_reg(), src_size);1193self.trapif(Cond::Ge, TrapCode::INTEGER_OVERFLOW);1194}11951196self.cvt_fpu_to_int(dst, src, src_size, dst_size, signed)1197}11981199/// Select and emit the appropriate `fcvt*` instruction1200pub fn cvt_fpu_to_int(1201&mut self,1202dst: Writable<Reg>,1203src: Reg,1204src_size: OperandSize,1205dst_size: OperandSize,1206signed: bool,1207) {1208let op = match (src_size, dst_size, signed) {1209(OperandSize::S32, OperandSize::S32, false) => FpuToIntOp::F32ToU32,1210(OperandSize::S32, OperandSize::S32, true) => FpuToIntOp::F32ToI32,1211(OperandSize::S32, OperandSize::S64, false) => FpuToIntOp::F32ToU64,1212(OperandSize::S32, OperandSize::S64, true) => FpuToIntOp::F32ToI64,1213(OperandSize::S64, OperandSize::S32, false) => FpuToIntOp::F64ToU32,1214(OperandSize::S64, OperandSize::S32, true) => FpuToIntOp::F64ToI32,1215(OperandSize::S64, OperandSize::S64, false) => FpuToIntOp::F64ToU64,1216(OperandSize::S64, OperandSize::S64, true) => FpuToIntOp::F64ToI64,1217(fsize, int_size, signed) => unimplemented!(1218"unsupported conversion: f{} to {}{}",1219fsize.num_bits(),1220if signed { "i" } else { "u" },1221int_size.num_bits(),1222),1223};12241225self.emit(Inst::FpuToInt {1226op,1227rd: dst.map(Into::into),1228rn: src.into(),1229});1230}1231}12321233/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,1234/// but the immediate is not yet known.1235pub(crate) struct PatchableAddToReg {1236/// The region to be patched in the [`MachBuffer`]. It contains1237/// space for 3 32-bit instructions, i.e. it's 12 bytes long.1238region: PatchRegion,12391240// The destination register for the add instruction.1241reg: Writable<Reg>,12421243// The temporary register used to hold the immediate value.1244tmp: Writable<Reg>,1245}12461247impl PatchableAddToReg {1248/// Create a new [`PatchableAddToReg`] by capturing a region in the output1249/// buffer containing an instruction sequence that loads an immediate into a1250/// register `tmp`, then adds it to a register `reg`. The [`MachBuffer`]1251/// will have that instruction sequence written to the region, though the1252/// immediate loaded into `tmp` will be `0` until the `::finalize` method is1253/// called.1254pub(crate) fn new(reg: Writable<Reg>, tmp: Writable<Reg>, buf: &mut MachBuffer<Inst>) -> Self {1255let insns = Self::add_immediate_instruction_sequence(reg, tmp, 0);1256let open = buf.start_patchable();1257buf.put_data(&insns);1258let region = buf.end_patchable(open);12591260Self { region, reg, tmp }1261}12621263fn add_immediate_instruction_sequence(1264reg: Writable<Reg>,1265tmp: Writable<Reg>,1266imm: i32,1267) -> [u8; 12] {1268let imm_hi = imm as u64 & 0xffff_0000;1269let imm_hi = MoveWideConst::maybe_from_u64(imm_hi).unwrap();12701271let imm_lo = imm as u64 & 0x0000_ffff;1272let imm_lo = MoveWideConst::maybe_from_u64(imm_lo).unwrap();12731274let size = OperandSize::S64.into();12751276let tmp = tmp.map(Into::into);1277let rd = reg.map(Into::into);12781279// This is "movz to bits 16-31 of 64 bit reg tmp and zero the rest"1280let mov_insn = enc_move_wide(inst::MoveWideOp::MovZ, tmp, imm_hi, size);12811282// This is "movk to bits 0-15 of 64 bit reg tmp"1283let movk_insn = enc_movk(tmp, imm_lo, size);12841285// This is "add tmp to rd". The opcodes are somewhat buried in the1286// instruction encoder so we just repeat them here.1287let add_bits_31_21: u32 = 0b00001011_000 | (size.sf_bit() << 10);1288let add_bits_15_10: u32 = 0;1289let add_insn = enc_arith_rrr(1290add_bits_31_21,1291add_bits_15_10,1292rd,1293rd.to_reg(),1294tmp.to_reg(),1295);12961297let mut buf = [0u8; 12];1298buf[0..4].copy_from_slice(&mov_insn.to_le_bytes());1299buf[4..8].copy_from_slice(&movk_insn.to_le_bytes());1300buf[8..12].copy_from_slice(&add_insn.to_le_bytes());1301buf1302}13031304/// Patch the [`MachBuffer`] with the known constant to be added to the register. The final1305/// value is passed in as an i32, but the instruction encoding is fixed when1306/// [`PatchableAddToReg::new`] is called.1307pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {1308let insns = Self::add_immediate_instruction_sequence(self.reg, self.tmp, val);1309let slice = self.region.patch(buffer);1310assert_eq!(slice.len(), insns.len());1311slice.copy_from_slice(&insns);1312}1313}131413151316