Path: blob/main/winch/codegen/src/isa/aarch64/asm.rs
1692 views
//! Assembler library implementation for Aarch64.1use super::{address::Address, regs};2use crate::CallingConvention;3use crate::aarch64::regs::zero;4use crate::masm::{5DivKind, Extend, ExtendKind, FloatCmpKind, Imm, IntCmpKind, RemKind, RoundingMode, ShiftKind,6Signed, TRUSTED_FLAGS, TruncKind,7};8use crate::{9constant_pool::ConstantPool,10masm::OperandSize,11reg::{Reg, WritableReg, writable},12};1314use cranelift_codegen::PatchRegion;15use cranelift_codegen::isa::aarch64::inst::emit::{enc_arith_rrr, enc_move_wide, enc_movk};16use cranelift_codegen::isa::aarch64::inst::{17ASIMDFPModImm, FpuToIntOp, MoveWideConst, NZCV, UImm5,18};19use cranelift_codegen::{20Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel,21Writable,22ir::{ExternalName, MemFlags, SourceLoc, TrapCode, UserExternalNameRef},23isa::aarch64::inst::{24self, ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp,25FPULeftShiftImm, FPUOp1, FPUOp2,26FPUOpRI::{self, UShr32, UShr64},27FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp,28PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize,29emit::{EmitInfo, EmitState},30},31settings,32};33use regalloc2::RegClass;34use wasmtime_math::{f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};3536impl From<OperandSize> for inst::OperandSize {37fn from(size: OperandSize) -> Self {38match size {39OperandSize::S32 => Self::Size32,40OperandSize::S64 => Self::Size64,41s => panic!("Invalid operand size {s:?}"),42}43}44}4546impl From<IntCmpKind> for Cond {47fn from(value: IntCmpKind) -> Self {48match value {49IntCmpKind::Eq => Cond::Eq,50IntCmpKind::Ne => Cond::Ne,51IntCmpKind::LtS => Cond::Lt,52IntCmpKind::LtU => Cond::Lo,53IntCmpKind::GtS => Cond::Gt,54IntCmpKind::GtU => Cond::Hi,55IntCmpKind::LeS => Cond::Le,56IntCmpKind::LeU => Cond::Ls,57IntCmpKind::GeS => Cond::Ge,58IntCmpKind::GeU => Cond::Hs,59}60}61}6263impl From<FloatCmpKind> for Cond {64fn from(value: FloatCmpKind) -> Self {65match value {66FloatCmpKind::Eq => Cond::Eq,67FloatCmpKind::Ne => Cond::Ne,68FloatCmpKind::Lt => Cond::Mi,69FloatCmpKind::Gt => Cond::Gt,70FloatCmpKind::Le => Cond::Ls,71FloatCmpKind::Ge => Cond::Ge,72}73}74}7576impl From<OperandSize> for ScalarSize {77fn from(size: OperandSize) -> ScalarSize {78match size {79OperandSize::S8 => ScalarSize::Size8,80OperandSize::S16 => ScalarSize::Size16,81OperandSize::S32 => ScalarSize::Size32,82OperandSize::S64 => ScalarSize::Size64,83OperandSize::S128 => ScalarSize::Size128,84}85}86}8788/// Low level assembler implementation for Aarch64.89pub(crate) struct Assembler {90/// The machine instruction buffer.91buffer: MachBuffer<Inst>,92/// Constant emission information.93emit_info: EmitInfo,94/// Emission state.95emit_state: EmitState,96/// Constant pool.97pool: ConstantPool,98}99100impl Assembler {101/// Create a new Aarch64 assembler.102pub fn new(shared_flags: settings::Flags) -> Self {103Self {104buffer: MachBuffer::<Inst>::new(),105emit_state: Default::default(),106emit_info: EmitInfo::new(shared_flags),107pool: ConstantPool::new(),108}109}110}111112impl Assembler {113/// Return the emitted code.114pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {115let stencil = self116.buffer117.finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());118stencil.apply_base_srcloc(loc.unwrap_or_default())119}120121fn emit(&mut self, inst: Inst) {122self.emit_with_island(inst, Inst::worst_case_size());123}124125fn emit_with_island(&mut self, inst: Inst, needed_space: u32) {126if self.buffer.island_needed(needed_space) {127let label = self.buffer.get_label();128let jmp = Inst::Jump {129dest: BranchTarget::Label(label),130};131jmp.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);132self.buffer133.emit_island(needed_space, self.emit_state.ctrl_plane_mut());134self.buffer135.bind_label(label, self.emit_state.ctrl_plane_mut());136}137inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);138}139140/// Adds a constant to the constant pool, returning its address.141pub fn add_constant(&mut self, constant: &[u8]) -> Address {142let handle = self.pool.register(constant, &mut self.buffer);143Address::constant(handle)144}145146/// Store a pair of registers.147pub fn stp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {148let mem: PairAMode = addr.try_into().unwrap();149self.emit(Inst::StoreP64 {150rt: xt1.into(),151rt2: xt2.into(),152mem,153flags: MemFlags::trusted(),154});155}156157/// Store a register.158pub fn str(&mut self, reg: Reg, addr: Address, size: OperandSize, flags: MemFlags) {159let mem: AMode = addr.try_into().unwrap();160161use OperandSize::*;162let inst = match (reg.is_int(), size) {163(_, S8) => Inst::Store8 {164rd: reg.into(),165mem,166flags,167},168(_, S16) => Inst::Store16 {169rd: reg.into(),170mem,171flags,172},173(true, S32) => Inst::Store32 {174rd: reg.into(),175mem,176flags,177},178(false, S32) => Inst::FpuStore32 {179rd: reg.into(),180mem,181flags,182},183(true, S64) => Inst::Store64 {184rd: reg.into(),185mem,186flags,187},188(false, S64) => Inst::FpuStore64 {189rd: reg.into(),190mem,191flags,192},193(_, S128) => Inst::FpuStore128 {194rd: reg.into(),195mem,196flags,197},198};199200self.emit(inst);201}202203/// Load a signed register.204pub fn sload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {205self.ldr(addr, rd, size, true, flags);206}207208/// Load an unsigned register.209pub fn uload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {210self.ldr(addr, rd, size, false, flags);211}212213/// Load address into a register.214fn ldr(215&mut self,216addr: Address,217rd: WritableReg,218size: OperandSize,219signed: bool,220flags: MemFlags,221) {222use OperandSize::*;223let writable_reg = rd.map(Into::into);224let mem: AMode = addr.try_into().unwrap();225226let inst = match (rd.to_reg().is_int(), signed, size) {227(_, false, S8) => Inst::ULoad8 {228rd: writable_reg,229mem,230flags,231},232(_, true, S8) => Inst::SLoad8 {233rd: writable_reg,234mem,235flags,236},237(_, false, S16) => Inst::ULoad16 {238rd: writable_reg,239mem,240flags,241},242(_, true, S16) => Inst::SLoad16 {243rd: writable_reg,244mem,245flags,246},247(true, false, S32) => Inst::ULoad32 {248rd: writable_reg,249mem,250flags,251},252(false, _, S32) => Inst::FpuLoad32 {253rd: writable_reg,254mem,255flags,256},257(true, true, S32) => Inst::SLoad32 {258rd: writable_reg,259mem,260flags,261},262(true, _, S64) => Inst::ULoad64 {263rd: writable_reg,264mem,265flags,266},267(false, _, S64) => Inst::FpuLoad64 {268rd: writable_reg,269mem,270flags,271},272(_, _, S128) => Inst::FpuLoad128 {273rd: writable_reg,274mem,275flags,276},277};278279self.emit(inst);280}281282/// Load a pair of registers.283pub fn ldp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {284let writable_xt1 = Writable::from_reg(xt1.into());285let writable_xt2 = Writable::from_reg(xt2.into());286let mem = addr.try_into().unwrap();287288self.emit(Inst::LoadP64 {289rt: writable_xt1,290rt2: writable_xt2,291mem,292flags: MemFlags::trusted(),293});294}295296/// Emit a series of instructions to move an arbitrary 64-bit immediate297/// into the destination register.298/// The emitted instructions will depend on the destination register class.299pub fn mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize) {300match rd.to_reg().class() {301RegClass::Int => {302Inst::load_constant(rd.map(Into::into), imm.unwrap_as_u64())303.into_iter()304.for_each(|i| self.emit(i));305}306RegClass::Float => {307match ASIMDFPModImm::maybe_from_u64(imm.unwrap_as_u64(), size.into()) {308Some(imm) => {309self.emit(Inst::FpuMoveFPImm {310rd: rd.map(Into::into),311imm,312size: size.into(),313});314}315_ => {316let addr = self.add_constant(&imm.to_bytes());317self.uload(addr, rd, size, TRUSTED_FLAGS);318}319}320}321_ => unreachable!(),322}323}324325/// Register to register move.326pub fn mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize) {327let writable_rd = rd.map(Into::into);328self.emit(Inst::Mov {329size: size.into(),330rd: writable_rd,331rm: rm.into(),332});333}334335/// Floating point register to register move.336pub fn fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {337let writable = rd.map(Into::into);338let inst = match size {339OperandSize::S32 => Inst::FpuMove32 {340rd: writable,341rn: rn.into(),342},343OperandSize::S64 => Inst::FpuMove64 {344rd: writable,345rn: rn.into(),346},347_ => unreachable!(),348};349350self.emit(inst);351}352353pub fn mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {354let writable_rd = rd.map(Into::into);355self.emit(Inst::MovToFpu {356size: size.into(),357rd: writable_rd,358rn: rn.into(),359});360}361362pub fn mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize) {363self.emit(Inst::MovFromVec {364rd: rd.map(Into::into),365rn: rn.into(),366idx,367size: size.into(),368});369}370371/// Add immediate and register.372pub fn add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {373self.alu_rri(ALUOp::Add, imm, rn, rd, size);374}375376/// Add immediate and register, setting overflow flags.377pub fn adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {378self.alu_rri(ALUOp::AddS, imm, rn, rd, size);379}380381/// Add with three registers.382pub fn add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {383self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size);384}385386/// Add with three registers, setting overflow flags.387pub fn adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {388self.alu_rrr_extend(ALUOp::AddS, rm, rn, rd, size);389}390391/// Add across Vector.392pub fn addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize) {393self.emit(Inst::VecLanes {394op: VecLanesOp::Addv,395rd: rd.map(Into::into),396rn: rn.into(),397size,398});399}400401/// Subtract immediate and register.402pub fn sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {403self.alu_rri(ALUOp::Sub, imm, rn, rd, size);404}405406/// Subtract immediate and register, setting flags.407pub fn subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize) {408self.alu_rri(ALUOp::SubS, imm, rn, writable!(regs::zero()), size);409}410411/// Subtract with three registers.412pub fn sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {413self.alu_rrr_extend(ALUOp::Sub, rm, rn, rd, size);414}415416/// Subtract with three registers, setting flags.417pub fn subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize) {418self.alu_rrr_extend(ALUOp::SubS, rm, rn, writable!(regs::zero()), size);419}420421/// Multiply with three registers.422pub fn mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {423self.alu_rrrr(ALUOp3::MAdd, rm, rn, rd, regs::zero(), size);424}425426/// Signed/unsigned division with three registers.427pub fn div_rrr(428&mut self,429divisor: Reg,430dividend: Reg,431dest: Writable<Reg>,432kind: DivKind,433size: OperandSize,434) {435// Check for division by 0.436self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);437438// check for overflow439if kind == DivKind::Signed {440// Check for divisor overflow.441self.alu_rri(442ALUOp::AddS,443Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"),444divisor,445writable!(zero()),446size,447);448449// Check if the dividend is 1.450self.emit(Inst::CCmpImm {451size: size.into(),452rn: dividend.into(),453imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"),454nzcv: NZCV::new(false, false, false, false),455cond: Cond::Eq,456});457458// Finally, trap if the previous operation overflowed.459self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW);460}461462// `cranelift-codegen` doesn't support emitting sdiv for anything but I64,463// we therefore sign-extend the operand.464// see: https://github.com/bytecodealliance/wasmtime/issues/9766465let size = if size == OperandSize::S32 && kind == DivKind::Signed {466self.extend(467divisor,468writable!(divisor),469ExtendKind::Signed(Extend::<Signed>::I64Extend32),470);471self.extend(472dividend,473writable!(dividend),474ExtendKind::Signed(Extend::<Signed>::I64Extend32),475);476OperandSize::S64477} else {478size479};480481let op = match kind {482DivKind::Signed => ALUOp::SDiv,483DivKind::Unsigned => ALUOp::UDiv,484};485486self.alu_rrr(op, divisor, dividend, dest.map(Into::into), size);487}488489/// Signed/unsigned remainder operation with three registers.490pub fn rem_rrr(491&mut self,492divisor: Reg,493dividend: Reg,494dest: Writable<Reg>,495scratch: WritableReg,496kind: RemKind,497size: OperandSize,498) {499// Check for division by 0500self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);501502// `cranelift-codegen` doesn't support emitting sdiv for anything but I64,503// we therefore sign-extend the operand.504// see: https://github.com/bytecodealliance/wasmtime/issues/9766505let size = if size == OperandSize::S32 && kind.is_signed() {506self.extend(507divisor,508writable!(divisor),509ExtendKind::Signed(Extend::<Signed>::I64Extend32),510);511self.extend(512dividend,513writable!(dividend),514ExtendKind::Signed(Extend::<Signed>::I64Extend32),515);516OperandSize::S64517} else {518size519};520521let op = match kind {522RemKind::Signed => ALUOp::SDiv,523RemKind::Unsigned => ALUOp::UDiv,524};525526self.alu_rrr(op, divisor, dividend, scratch, size);527528self.alu_rrrr(529ALUOp3::MSub,530scratch.to_reg(),531divisor,532dest.map(Into::into),533dividend,534size,535);536}537538/// And with three registers.539pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {540self.alu_rrr(ALUOp::And, rm, rn, rd, size);541}542543/// And immediate and register.544pub fn and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {545self.alu_rri_logic(ALUOp::And, imm, rn, rd, size);546}547548/// Or with three registers.549pub fn or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {550self.alu_rrr(ALUOp::Orr, rm, rn, rd, size);551}552553/// Or immediate and register.554pub fn or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {555self.alu_rri_logic(ALUOp::Orr, imm, rn, rd, size);556}557558/// Xor with three registers.559pub fn xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {560self.alu_rrr(ALUOp::Eor, rm, rn, rd, size);561}562563/// Xor immediate and register.564pub fn xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {565self.alu_rri_logic(ALUOp::Eor, imm, rn, rd, size);566}567568/// Shift with three registers.569pub fn shift_rrr(570&mut self,571rm: Reg,572rn: Reg,573rd: WritableReg,574kind: ShiftKind,575size: OperandSize,576) {577let shift_op = self.shift_kind_to_alu_op(kind, rm, size);578self.alu_rrr(shift_op, rm, rn, rd, size);579}580581/// Shift immediate and register.582pub fn shift_ir(583&mut self,584imm: ImmShift,585rn: Reg,586rd: WritableReg,587kind: ShiftKind,588size: OperandSize,589) {590let shift_op = self.shift_kind_to_alu_op(kind, rn, size);591self.alu_rri_shift(shift_op, imm, rn, rd, size);592}593594/// Count Leading Zeros.595pub fn clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {596self.bit_rr(BitOp::Clz, rn, rd, size);597}598599/// Reverse Bits reverses the bit order in a register.600pub fn rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {601self.bit_rr(BitOp::RBit, rn, rd, size);602}603604/// Float add with three registers.605pub fn fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {606self.fpu_rrr(FPUOp2::Add, rm, rn, rd, size);607}608609/// Float sub with three registers.610pub fn fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {611self.fpu_rrr(FPUOp2::Sub, rm, rn, rd, size);612}613614/// Float multiply with three registers.615pub fn fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {616self.fpu_rrr(FPUOp2::Mul, rm, rn, rd, size);617}618619/// Float division with three registers.620pub fn fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {621self.fpu_rrr(FPUOp2::Div, rm, rn, rd, size);622}623624/// Float max with three registers.625pub fn fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {626self.fpu_rrr(FPUOp2::Max, rm, rn, rd, size);627}628629/// Float min with three registers.630pub fn fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {631self.fpu_rrr(FPUOp2::Min, rm, rn, rd, size);632}633634/// Float neg with two registers.635pub fn fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {636self.fpu_rr(FPUOp1::Neg, rn, rd, size);637}638639/// Float abs with two registers.640pub fn fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {641self.fpu_rr(FPUOp1::Abs, rn, rd, size);642}643644/// Float sqrt with two registers.645pub fn fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {646self.fpu_rr(FPUOp1::Sqrt, rn, rd, size);647}648649/// Float round (ceil, trunc, floor) with two registers.650pub fn fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize) {651let fpu_mode = match (mode, size) {652(RoundingMode::Nearest, OperandSize::S32) => FpuRoundMode::Nearest32,653(RoundingMode::Up, OperandSize::S32) => FpuRoundMode::Plus32,654(RoundingMode::Down, OperandSize::S32) => FpuRoundMode::Minus32,655(RoundingMode::Zero, OperandSize::S32) => FpuRoundMode::Zero32,656(RoundingMode::Nearest, OperandSize::S64) => FpuRoundMode::Nearest64,657(RoundingMode::Up, OperandSize::S64) => FpuRoundMode::Plus64,658(RoundingMode::Down, OperandSize::S64) => FpuRoundMode::Minus64,659(RoundingMode::Zero, OperandSize::S64) => FpuRoundMode::Zero64,660(m, o) => panic!("Invalid rounding mode or operand size {m:?}, {o:?}"),661};662self.fpu_round(fpu_mode, rn, rd)663}664665/// Float unsigned shift right with two registers and an immediate.666pub fn fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize) {667let imm = FPURightShiftImm {668amount,669lane_size_in_bits: size.num_bits(),670};671let ushr = match size {672OperandSize::S32 => UShr32(imm),673OperandSize::S64 => UShr64(imm),674_ => unreachable!(),675};676self.fpu_rri(ushr, rn, rd)677}678679/// Float unsigned shift left and insert with three registers680/// and an immediate.681pub fn fsli_rri_mod(682&mut self,683ri: Reg,684rn: Reg,685rd: WritableReg,686amount: u8,687size: OperandSize,688) {689let imm = FPULeftShiftImm {690amount,691lane_size_in_bits: size.num_bits(),692};693let sli = match size {694OperandSize::S32 => FPUOpRIMod::Sli32(imm),695OperandSize::S64 => FPUOpRIMod::Sli64(imm),696_ => unreachable!(),697};698self.fpu_rri_mod(sli, ri, rn, rd)699}700701/// Float compare.702pub fn fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize) {703self.emit(Inst::FpuCmp {704size: size.into(),705rn: rn.into(),706rm: rm.into(),707})708}709710/// Convert an signed integer to a float.711pub fn cvt_sint_to_float(712&mut self,713rn: Reg,714rd: WritableReg,715src_size: OperandSize,716dst_size: OperandSize,717) {718let op = match (src_size, dst_size) {719(OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32,720(OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32,721(OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64,722(OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64,723_ => unreachable!(),724};725726self.emit(Inst::IntToFpu {727op,728rd: rd.map(Into::into),729rn: rn.into(),730});731}732733/// Convert an unsigned integer to a float.734pub fn cvt_uint_to_float(735&mut self,736rn: Reg,737rd: WritableReg,738src_size: OperandSize,739dst_size: OperandSize,740) {741let op = match (src_size, dst_size) {742(OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32,743(OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32,744(OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64,745(OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64,746_ => unreachable!(),747};748749self.emit(Inst::IntToFpu {750op,751rd: rd.map(Into::into),752rn: rn.into(),753});754}755756/// Change precision of float.757pub fn cvt_float_to_float(758&mut self,759rn: Reg,760rd: WritableReg,761src_size: OperandSize,762dst_size: OperandSize,763) {764let (fpu_op, size) = match (src_size, dst_size) {765(OperandSize::S32, OperandSize::S64) => (FPUOp1::Cvt32To64, ScalarSize::Size32),766(OperandSize::S64, OperandSize::S32) => (FPUOp1::Cvt64To32, ScalarSize::Size64),767_ => unimplemented!(),768};769self.emit(Inst::FpuRR {770fpu_op,771size,772rd: rd.map(Into::into),773rn: rn.into(),774});775}776777/// Return instruction.778pub fn ret(&mut self) {779self.emit(Inst::Ret {});780}781782/// An unconditional branch.783pub fn jmp(&mut self, target: MachLabel) {784self.emit(Inst::Jump {785dest: BranchTarget::Label(target),786});787}788789/// A conditional branch.790pub fn jmp_if(&mut self, kind: Cond, taken: MachLabel) {791self.emit(Inst::CondBr {792taken: BranchTarget::Label(taken),793not_taken: BranchTarget::ResolvedOffset(4),794kind: CondBrKind::Cond(kind),795});796}797798/// Emits a jump table sequence.799pub fn jmp_table(800&mut self,801targets: &[MachLabel],802default: MachLabel,803index: Reg,804tmp1: Reg,805tmp2: Reg,806) {807self.emit_with_island(808Inst::JTSequence {809default,810targets: Box::new(targets.to_vec()),811ridx: index.into(),812rtmp1: Writable::from_reg(tmp1.into()),813rtmp2: Writable::from_reg(tmp2.into()),814},815// number of bytes needed for the jumptable sequence:816// 4 bytes per instruction, with 8 instructions base + the size of817// the jumptable more.818(4 * (8 + targets.len())).try_into().unwrap(),819);820}821822/// Conditional Set sets the destination register to 1 if the condition823/// is true, and otherwise sets it to 0.824pub fn cset(&mut self, rd: WritableReg, cond: Cond) {825self.emit(Inst::CSet {826rd: rd.map(Into::into),827cond,828});829}830831/// If the condition is true, `csel` writes rn to rd. If the832/// condition is false, it writes rm to rd833pub fn csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond) {834self.emit(Inst::CSel {835rd: rd.map(Into::into),836rn: rn.into(),837rm: rm.into(),838cond,839});840}841842/// If the condition is true, `csel` writes rn to rd. If the843/// condition is false, it writes rm to rd844pub fn fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize) {845match size {846OperandSize::S32 => {847self.emit(Inst::FpuCSel32 {848rd: rd.map(Into::into),849rn: rn.into(),850rm: rm.into(),851cond,852});853}854OperandSize::S64 => {855self.emit(Inst::FpuCSel64 {856rd: rd.map(Into::into),857rn: rn.into(),858rm: rm.into(),859cond,860});861}862_ => todo!(),863}864}865866/// Population count per byte.867pub fn cnt(&mut self, rd: WritableReg) {868self.emit(Inst::VecMisc {869op: VecMisc2::Cnt,870rd: rd.map(Into::into),871rn: rd.to_reg().into(),872size: VectorSize::Size8x8,873});874}875876pub fn extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind) {877self.emit(Inst::Extend {878rd: rd.map(Into::into),879rn: rn.into(),880signed: kind.signed(),881from_bits: kind.from_bits(),882to_bits: kind.to_bits(),883})884}885886/// Bitwise AND (shifted register), setting flags.887pub fn ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize) {888self.alu_rrr(ALUOp::AndS, rm, rn, writable!(regs::zero()), size);889}890891/// Permanently Undefined.892pub fn udf(&mut self, code: TrapCode) {893self.emit(Inst::Udf { trap_code: code });894}895896/// Conditional trap.897pub fn trapif(&mut self, cc: Cond, code: TrapCode) {898self.emit(Inst::TrapIf {899kind: CondBrKind::Cond(cc),900trap_code: code,901});902}903904/// Trap if `rn` is zero.905pub fn trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize) {906self.emit(Inst::TrapIf {907kind: CondBrKind::Zero(rn.into(), size.into()),908trap_code: code,909});910}911912// Helpers for ALU operations.913914fn alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {915self.emit(Inst::AluRRImm12 {916alu_op: op,917size: size.into(),918rd: rd.map(Into::into),919rn: rn.into(),920imm12: imm,921});922}923924fn alu_rri_logic(925&mut self,926op: ALUOp,927imm: ImmLogic,928rn: Reg,929rd: WritableReg,930size: OperandSize,931) {932self.emit(Inst::AluRRImmLogic {933alu_op: op,934size: size.into(),935rd: rd.map(Into::into),936rn: rn.into(),937imml: imm,938});939}940941fn alu_rri_shift(942&mut self,943op: ALUOp,944imm: ImmShift,945rn: Reg,946rd: WritableReg,947size: OperandSize,948) {949self.emit(Inst::AluRRImmShift {950alu_op: op,951size: size.into(),952rd: rd.map(Into::into),953rn: rn.into(),954immshift: imm,955});956}957958fn alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {959self.emit(Inst::AluRRR {960alu_op: op,961size: size.into(),962rd: rd.map(Into::into),963rn: rn.into(),964rm: rm.into(),965});966}967968fn alu_rrr_extend(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {969self.emit(Inst::AluRRRExtend {970alu_op: op,971size: size.into(),972rd: rd.map(Into::into),973rn: rn.into(),974rm: rm.into(),975extendop: ExtendOp::UXTX,976});977}978979fn alu_rrrr(980&mut self,981op: ALUOp3,982rm: Reg,983rn: Reg,984rd: WritableReg,985ra: Reg,986size: OperandSize,987) {988self.emit(Inst::AluRRRR {989alu_op: op,990size: size.into(),991rd: rd.map(Into::into),992rn: rn.into(),993rm: rm.into(),994ra: ra.into(),995});996}997998fn fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {999self.emit(Inst::FpuRRR {1000fpu_op: op,1001size: size.into(),1002rd: rd.map(Into::into),1003rn: rn.into(),1004rm: rm.into(),1005});1006}10071008fn fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg) {1009self.emit(Inst::FpuRRI {1010fpu_op: op,1011rd: rd.map(Into::into),1012rn: rn.into(),1013});1014}10151016fn fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg) {1017self.emit(Inst::FpuRRIMod {1018fpu_op: op,1019rd: rd.map(Into::into),1020ri: ri.into(),1021rn: rn.into(),1022});1023}10241025fn fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize) {1026self.emit(Inst::FpuRR {1027fpu_op: op,1028size: size.into(),1029rd: rd.map(Into::into),1030rn: rn.into(),1031});1032}10331034fn fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg) {1035self.emit(Inst::FpuRound {1036op,1037rd: rd.map(Into::into),1038rn: rn.into(),1039});1040}10411042fn bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize) {1043self.emit(Inst::BitRR {1044op,1045size: size.into(),1046rd: rd.map(Into::into),1047rn: rn.into(),1048});1049}10501051// Convert ShiftKind to ALUOp. If kind == Rotl, then emulate it by emitting1052// the negation of the given reg r, and returns ALUOp::Extr (an alias for1053// `ror` the rotate-right instruction)1054fn shift_kind_to_alu_op(&mut self, kind: ShiftKind, r: Reg, size: OperandSize) -> ALUOp {1055match kind {1056ShiftKind::Shl => ALUOp::Lsl,1057ShiftKind::ShrS => ALUOp::Asr,1058ShiftKind::ShrU => ALUOp::Lsr,1059ShiftKind::Rotr => ALUOp::Extr,1060ShiftKind::Rotl => {1061// neg(r) is sub(zero, r).1062self.alu_rrr(ALUOp::Sub, r, regs::zero(), writable!(r), size);1063ALUOp::Extr1064}1065}1066}10671068/// Get a label from the underlying machine code buffer.1069pub fn get_label(&mut self) -> MachLabel {1070self.buffer.get_label()1071}10721073/// Get a mutable reference to underlying1074/// machine buffer.1075pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {1076&mut self.buffer1077}10781079/// Get a reference to the underlying machine buffer.1080pub fn buffer(&self) -> &MachBuffer<Inst> {1081&self.buffer1082}10831084/// Emit a direct call to a function defined locally and1085/// referenced to by `name`.1086pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) {1087self.emit(Inst::Call {1088info: Box::new(cranelift_codegen::CallInfo::empty(1089ExternalName::user(name),1090call_conv.into(),1091)),1092})1093}10941095/// Emit an indirect call to a function whose address is1096/// stored the `callee` register.1097pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) {1098self.emit(Inst::CallInd {1099info: Box::new(cranelift_codegen::CallInfo::empty(1100callee.into(),1101call_conv.into(),1102)),1103})1104}11051106/// Load the min value for an integer of size out_size, as a floating-point1107/// of size `in-size`, into register `rd`.1108fn min_fp_value(1109&mut self,1110signed: bool,1111in_size: OperandSize,1112out_size: OperandSize,1113rd: Writable<Reg>,1114) {1115match in_size {1116OperandSize::S32 => {1117let (min, _) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());1118self.mov_ir(rd, Imm::f32(min.to_bits()), in_size);1119}1120OperandSize::S64 => {1121let (min, _) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());1122self.mov_ir(rd, Imm::f64(min.to_bits()), in_size);1123}1124s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),1125};1126}11271128/// Load the max value for an integer of size out_size, as a floating-point1129/// of size `in_size`, into register `rd`.1130fn max_fp_value(1131&mut self,1132signed: bool,1133in_size: OperandSize,1134out_size: OperandSize,1135rd: Writable<Reg>,1136) {1137match in_size {1138OperandSize::S32 => {1139let (_, max) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());1140self.mov_ir(rd, Imm::f32(max.to_bits()), in_size);1141}1142OperandSize::S64 => {1143let (_, max) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());1144self.mov_ir(rd, Imm::f64(max.to_bits()), in_size);1145}1146s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),1147};1148}11491150/// Emit instructions to check if the value in `rn` is NaN.1151fn check_nan(&mut self, rn: Reg, size: OperandSize) {1152self.fcmp(rn, rn, size);1153self.trapif(Cond::Vs, TrapCode::BAD_CONVERSION_TO_INTEGER);1154}11551156/// Convert the floating point of size `src_size` stored in `src`, into a integer of size1157/// `dst_size`, storing the result in `dst`.1158pub fn fpu_to_int(1159&mut self,1160dst: Writable<Reg>,1161src: Reg,1162tmp_reg: WritableReg,1163src_size: OperandSize,1164dst_size: OperandSize,1165kind: TruncKind,1166signed: bool,1167) {1168if kind.is_unchecked() {1169// Confusingly, when `kind` is `Unchecked` is when we actually need to perform the checks:1170// - check if fp is NaN1171// - check bounds1172self.check_nan(src, src_size);11731174self.min_fp_value(signed, src_size, dst_size, tmp_reg);1175self.fcmp(src, tmp_reg.to_reg(), src_size);1176self.trapif(Cond::Le, TrapCode::INTEGER_OVERFLOW);11771178self.max_fp_value(signed, src_size, dst_size, tmp_reg);1179self.fcmp(src, tmp_reg.to_reg(), src_size);1180self.trapif(Cond::Ge, TrapCode::INTEGER_OVERFLOW);1181}11821183self.cvt_fpu_to_int(dst, src, src_size, dst_size, signed)1184}11851186/// Select and emit the appropriate `fcvt*` instruction1187pub fn cvt_fpu_to_int(1188&mut self,1189dst: Writable<Reg>,1190src: Reg,1191src_size: OperandSize,1192dst_size: OperandSize,1193signed: bool,1194) {1195let op = match (src_size, dst_size, signed) {1196(OperandSize::S32, OperandSize::S32, false) => FpuToIntOp::F32ToU32,1197(OperandSize::S32, OperandSize::S32, true) => FpuToIntOp::F32ToI32,1198(OperandSize::S32, OperandSize::S64, false) => FpuToIntOp::F32ToU64,1199(OperandSize::S32, OperandSize::S64, true) => FpuToIntOp::F32ToI64,1200(OperandSize::S64, OperandSize::S32, false) => FpuToIntOp::F64ToU32,1201(OperandSize::S64, OperandSize::S32, true) => FpuToIntOp::F64ToI32,1202(OperandSize::S64, OperandSize::S64, false) => FpuToIntOp::F64ToU64,1203(OperandSize::S64, OperandSize::S64, true) => FpuToIntOp::F64ToI64,1204(fsize, int_size, signed) => unimplemented!(1205"unsupported conversion: f{} to {}{}",1206fsize.num_bits(),1207if signed { "i" } else { "u" },1208int_size.num_bits(),1209),1210};12111212self.emit(Inst::FpuToInt {1213op,1214rd: dst.map(Into::into),1215rn: src.into(),1216});1217}1218}12191220/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,1221/// but the immediate is not yet known.1222pub(crate) struct PatchableAddToReg {1223/// The region to be patched in the [`MachBuffer`]. It contains1224/// space for 3 32-bit instructions, i.e. it's 12 bytes long.1225region: PatchRegion,12261227// The destination register for the add instruction.1228reg: Writable<Reg>,12291230// The temporary register used to hold the immediate value.1231tmp: Writable<Reg>,1232}12331234impl PatchableAddToReg {1235/// Create a new [`PatchableAddToReg`] by capturing a region in the output1236/// buffer containing an instruction sequence that loads an immediate into a1237/// register `tmp`, then adds it to a register `reg`. The [`MachBuffer`]1238/// will have that instruction sequence written to the region, though the1239/// immediate loaded into `tmp` will be `0` until the `::finalize` method is1240/// called.1241pub(crate) fn new(reg: Writable<Reg>, tmp: Writable<Reg>, buf: &mut MachBuffer<Inst>) -> Self {1242let insns = Self::add_immediate_instruction_sequence(reg, tmp, 0);1243let open = buf.start_patchable();1244buf.put_data(&insns);1245let region = buf.end_patchable(open);12461247Self { region, reg, tmp }1248}12491250fn add_immediate_instruction_sequence(1251reg: Writable<Reg>,1252tmp: Writable<Reg>,1253imm: i32,1254) -> [u8; 12] {1255let imm_hi = imm as u64 & 0xffff_0000;1256let imm_hi = MoveWideConst::maybe_from_u64(imm_hi).unwrap();12571258let imm_lo = imm as u64 & 0x0000_ffff;1259let imm_lo = MoveWideConst::maybe_from_u64(imm_lo).unwrap();12601261let size = OperandSize::S64.into();12621263let tmp = tmp.map(Into::into);1264let rd = reg.map(Into::into);12651266// This is "movz to bits 16-31 of 64 bit reg tmp and zero the rest"1267let mov_insn = enc_move_wide(inst::MoveWideOp::MovZ, tmp, imm_hi, size);12681269// This is "movk to bits 0-15 of 64 bit reg tmp"1270let movk_insn = enc_movk(tmp, imm_lo, size);12711272// This is "add tmp to rd". The opcodes are somewhat buried in the1273// instruction encoder so we just repeat them here.1274let add_bits_31_21: u32 = 0b00001011_000 | (size.sf_bit() << 10);1275let add_bits_15_10: u32 = 0;1276let add_insn = enc_arith_rrr(1277add_bits_31_21,1278add_bits_15_10,1279rd,1280rd.to_reg(),1281tmp.to_reg(),1282);12831284let mut buf = [0u8; 12];1285buf[0..4].copy_from_slice(&mov_insn.to_le_bytes());1286buf[4..8].copy_from_slice(&movk_insn.to_le_bytes());1287buf[8..12].copy_from_slice(&add_insn.to_le_bytes());1288buf1289}12901291/// Patch the [`MachBuffer`] with the known constant to be added to the register. The final1292/// value is passed in as an i32, but the instruction encoding is fixed when1293/// [`PatchableAddToReg::new`] is called.1294pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {1295let insns = Self::add_immediate_instruction_sequence(self.reg, self.tmp, val);1296let slice = self.region.patch(buffer);1297assert_eq!(slice.len(), insns.len());1298slice.copy_from_slice(&insns);1299}1300}130113021303