Path: blob/main/winch/codegen/src/isa/x64/asm.rs
1693 views
//! Assembler library implementation for x64.12use crate::{3constant_pool::ConstantPool,4isa::{CallingConvention, reg::Reg},5masm::{6DivKind, Extend, ExtendKind, ExtendType, IntCmpKind, MulWideKind, OperandSize, RemKind,7RoundingMode, ShiftKind, Signed, V128ExtendKind, V128LoadExtendKind, Zero,8},9reg::writable,10};11use cranelift_codegen::{12CallInfo, Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState,13MachLabel, PatchRegion, Writable,14ir::{ExternalName, MemFlags, SourceLoc, TrapCode, Type, UserExternalNameRef, types},15isa::{16unwind::UnwindInst,17x64::{18AtomicRmwSeqOp, EmitInfo, EmitState, Inst,19args::{20self, Amode, CC, ExtMode, FromWritableReg, Gpr, GprMem, GprMemImm, RegMem,21RegMemImm, SyntheticAmode, WritableGpr, WritableXmm, Xmm, XmmMem, XmmMemImm,22},23external::{PairedGpr, PairedXmm},24settings as x64_settings,25},26},27settings,28};2930use crate::reg::WritableReg;31use cranelift_assembler_x64 as asm;3233use super::address::Address;34use smallvec::SmallVec;3536// Conversions between winch-codegen x64 types and cranelift-codegen x64 types.3738impl From<Reg> for RegMemImm {39fn from(reg: Reg) -> Self {40RegMemImm::reg(reg.into())41}42}4344impl From<Reg> for RegMem {45fn from(value: Reg) -> Self {46RegMem::Reg { reg: value.into() }47}48}4950impl From<Reg> for WritableGpr {51fn from(reg: Reg) -> Self {52let writable = Writable::from_reg(reg.into());53WritableGpr::from_writable_reg(writable).expect("valid writable gpr")54}55}5657impl From<Reg> for WritableXmm {58fn from(reg: Reg) -> Self {59let writable = Writable::from_reg(reg.into());60WritableXmm::from_writable_reg(writable).expect("valid writable xmm")61}62}6364/// Convert a writable GPR register to the read-write pair expected by65/// `cranelift-codegen`.66fn pair_gpr(reg: WritableReg) -> PairedGpr {67assert!(reg.to_reg().is_int());68let read = Gpr::unwrap_new(reg.to_reg().into());69let write = WritableGpr::from_reg(reg.to_reg().into());70PairedGpr { read, write }71}7273impl From<Reg> for asm::Gpr<Gpr> {74fn from(reg: Reg) -> Self {75asm::Gpr::new(reg.into())76}77}7879impl From<Reg> for asm::GprMem<Gpr, Gpr> {80fn from(reg: Reg) -> Self {81asm::GprMem::Gpr(reg.into())82}83}8485/// Convert a writable XMM register to the read-write pair expected by86/// `cranelift-codegen`.87fn pair_xmm(reg: WritableReg) -> PairedXmm {88assert!(reg.to_reg().is_float());89let read = Xmm::unwrap_new(reg.to_reg().into());90let write = WritableXmm::from_reg(reg.to_reg().into());91PairedXmm { read, write }92}9394impl From<Reg> for asm::Xmm<Xmm> {95fn from(reg: Reg) -> Self {96asm::Xmm::new(reg.into())97}98}99100impl From<Reg> for asm::XmmMem<Xmm, Gpr> {101fn from(reg: Reg) -> Self {102asm::XmmMem::Xmm(reg.into())103}104}105106impl From<Reg> for Gpr {107fn from(reg: Reg) -> Self {108Gpr::unwrap_new(reg.into())109}110}111112impl From<Reg> for GprMem {113fn from(value: Reg) -> Self {114GprMem::unwrap_new(value.into())115}116}117118impl From<Reg> for GprMemImm {119fn from(reg: Reg) -> Self {120GprMemImm::unwrap_new(reg.into())121}122}123124impl From<Reg> for Xmm {125fn from(reg: Reg) -> Self {126Xmm::unwrap_new(reg.into())127}128}129130impl From<Reg> for XmmMem {131fn from(value: Reg) -> Self {132XmmMem::unwrap_new(value.into())133}134}135136impl From<Reg> for XmmMemImm {137fn from(value: Reg) -> Self {138XmmMemImm::unwrap_new(value.into())139}140}141142impl From<OperandSize> for args::OperandSize {143fn from(size: OperandSize) -> Self {144match size {145OperandSize::S8 => Self::Size8,146OperandSize::S16 => Self::Size16,147OperandSize::S32 => Self::Size32,148OperandSize::S64 => Self::Size64,149s => panic!("Invalid operand size {s:?}"),150}151}152}153154impl From<IntCmpKind> for CC {155fn from(value: IntCmpKind) -> Self {156match value {157IntCmpKind::Eq => CC::Z,158IntCmpKind::Ne => CC::NZ,159IntCmpKind::LtS => CC::L,160IntCmpKind::LtU => CC::B,161IntCmpKind::GtS => CC::NLE,162IntCmpKind::GtU => CC::NBE,163IntCmpKind::LeS => CC::LE,164IntCmpKind::LeU => CC::BE,165IntCmpKind::GeS => CC::NL,166IntCmpKind::GeU => CC::NB,167}168}169}170171impl<T: ExtendType> From<Extend<T>> for ExtMode {172fn from(value: Extend<T>) -> Self {173match value {174Extend::I32Extend8 => ExtMode::BL,175Extend::I32Extend16 => ExtMode::WL,176Extend::I64Extend8 => ExtMode::BQ,177Extend::I64Extend16 => ExtMode::WQ,178Extend::I64Extend32 => ExtMode::LQ,179Extend::__Kind(_) => unreachable!(),180}181}182}183184impl From<ExtendKind> for ExtMode {185fn from(value: ExtendKind) -> Self {186match value {187ExtendKind::Signed(s) => s.into(),188ExtendKind::Unsigned(u) => u.into(),189}190}191}192193/// Kinds of extends supported by `vpmov`.194pub(super) enum VpmovKind {195/// Sign extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.196E8x8S,197/// Zero extends 8 lanes of 8-bit integers to 8 lanes of 16-bit integers.198E8x8U,199/// Sign extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.200E16x4S,201/// Zero extends 4 lanes of 16-bit integers to 4 lanes of 32-bit integers.202E16x4U,203/// Sign extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.204E32x2S,205/// Zero extends 2 lanes of 32-bit integers to 2 lanes of 64-bit integers.206E32x2U,207}208209impl From<V128LoadExtendKind> for VpmovKind {210fn from(value: V128LoadExtendKind) -> Self {211match value {212V128LoadExtendKind::E8x8S => Self::E8x8S,213V128LoadExtendKind::E8x8U => Self::E8x8U,214V128LoadExtendKind::E16x4S => Self::E16x4S,215V128LoadExtendKind::E16x4U => Self::E16x4U,216V128LoadExtendKind::E32x2S => Self::E32x2S,217V128LoadExtendKind::E32x2U => Self::E32x2U,218}219}220}221222impl From<V128ExtendKind> for VpmovKind {223fn from(value: V128ExtendKind) -> Self {224match value {225V128ExtendKind::LowI8x16S | V128ExtendKind::HighI8x16S => Self::E8x8S,226V128ExtendKind::LowI8x16U => Self::E8x8U,227V128ExtendKind::LowI16x8S | V128ExtendKind::HighI16x8S => Self::E16x4S,228V128ExtendKind::LowI16x8U => Self::E16x4U,229V128ExtendKind::LowI32x4S | V128ExtendKind::HighI32x4S => Self::E32x2S,230V128ExtendKind::LowI32x4U => Self::E32x2U,231_ => unimplemented!(),232}233}234}235236/// Kinds of comparisons supported by `vcmp`.237pub(super) enum VcmpKind {238/// Equal comparison.239Eq,240/// Not equal comparison.241Ne,242/// Less than comparison.243Lt,244/// Less than or equal comparison.245Le,246/// Unordered comparison. Sets result to all 1s if either source operand is247/// NaN.248Unord,249}250251/// Kinds of conversions supported by `vcvt`.252pub(super) enum VcvtKind {253/// Converts 32-bit integers to 32-bit floats.254I32ToF32,255/// Converts doubleword integers to double precision floats.256I32ToF64,257/// Converts double precision floats to single precision floats.258F64ToF32,259// Converts double precision floats to 32-bit integers.260F64ToI32,261/// Converts single precision floats to double precision floats.262F32ToF64,263/// Converts single precision floats to 32-bit integers.264F32ToI32,265}266267/// Modes supported by `vround`.268pub(crate) enum VroundMode {269/// Rounds toward nearest (ties to even).270TowardNearest,271/// Rounds toward negative infinity.272TowardNegativeInfinity,273/// Rounds toward positive infinity.274TowardPositiveInfinity,275/// Rounds toward zero.276TowardZero,277}278279/// Low level assembler implementation for x64.280pub(crate) struct Assembler {281/// The machine instruction buffer.282buffer: MachBuffer<Inst>,283/// Constant emission information.284emit_info: EmitInfo,285/// Emission state.286emit_state: EmitState,287/// x64 flags.288isa_flags: x64_settings::Flags,289/// Constant pool.290pool: ConstantPool,291}292293impl Assembler {294/// Create a new x64 assembler.295pub fn new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {296Self {297buffer: MachBuffer::<Inst>::new(),298emit_state: Default::default(),299emit_info: EmitInfo::new(shared_flags, isa_flags.clone()),300pool: ConstantPool::new(),301isa_flags,302}303}304305/// Get a mutable reference to underlying306/// machine buffer.307pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {308&mut self.buffer309}310311/// Get a reference to the underlying machine buffer.312pub fn buffer(&self) -> &MachBuffer<Inst> {313&self.buffer314}315316/// Adds a constant to the constant pool and returns its address.317pub fn add_constant(&mut self, constant: &[u8]) -> Address {318let handle = self.pool.register(constant, &mut self.buffer);319Address::constant(handle)320}321322/// Load a floating point constant, using the constant pool.323pub fn load_fp_const(&mut self, dst: WritableReg, constant: &[u8], size: OperandSize) {324let addr = self.add_constant(constant);325self.xmm_mov_mr(&addr, dst, size, MemFlags::trusted());326}327328/// Return the emitted code.329pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {330let stencil = self331.buffer332.finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());333stencil.apply_base_srcloc(loc.unwrap_or_default())334}335336fn emit(&mut self, inst: Inst) {337inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);338}339340fn to_synthetic_amode(addr: &Address, memflags: MemFlags) -> SyntheticAmode {341match *addr {342Address::Offset { base, offset } => {343let amode = Amode::imm_reg(offset as i32, base.into()).with_flags(memflags);344SyntheticAmode::real(amode)345}346Address::Const(c) => SyntheticAmode::ConstantOffset(c),347Address::ImmRegRegShift {348simm32,349base,350index,351shift,352} => SyntheticAmode::Real(Amode::ImmRegRegShift {353simm32,354base: base.into(),355index: index.into(),356shift,357flags: memflags,358}),359}360}361362/// Emit an unwind instruction.363pub fn unwind_inst(&mut self, inst: UnwindInst) {364self.emit(Inst::Unwind { inst })365}366367/// Push register.368pub fn push_r(&mut self, reg: Reg) {369let inst = asm::inst::pushq_o::new(reg).into();370self.emit(Inst::External { inst });371}372373/// Pop to register.374pub fn pop_r(&mut self, dst: WritableReg) {375let writable: WritableGpr = dst.map(Into::into);376let inst = asm::inst::popq_o::new(writable).into();377self.emit(Inst::External { inst });378}379380/// Return instruction.381pub fn ret(&mut self) {382let inst = asm::inst::retq_zo::new().into();383self.emit(Inst::External { inst });384}385386/// Register-to-register move.387pub fn mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {388let dst: WritableGpr = dst.map(|r| r.into());389let inst = match size {390OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),391OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),392OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),393OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),394_ => unreachable!(),395};396self.emit(Inst::External { inst });397}398399/// Register-to-memory move.400pub fn mov_rm(&mut self, src: Reg, addr: &Address, size: OperandSize, flags: MemFlags) {401assert!(addr.is_offset());402let dst = Self::to_synthetic_amode(addr, flags);403let inst = match size {404OperandSize::S8 => asm::inst::movb_mr::new(dst, src).into(),405OperandSize::S16 => asm::inst::movw_mr::new(dst, src).into(),406OperandSize::S32 => asm::inst::movl_mr::new(dst, src).into(),407OperandSize::S64 => asm::inst::movq_mr::new(dst, src).into(),408_ => unreachable!(),409};410self.emit(Inst::External { inst });411}412413/// Immediate-to-memory move.414pub fn mov_im(&mut self, src: i32, addr: &Address, size: OperandSize, flags: MemFlags) {415assert!(addr.is_offset());416let dst = Self::to_synthetic_amode(addr, flags);417let inst = match size {418OperandSize::S8 => {419let src = i8::try_from(src).unwrap();420asm::inst::movb_mi::new(dst, src.cast_unsigned()).into()421}422OperandSize::S16 => {423let src = i16::try_from(src).unwrap();424asm::inst::movw_mi::new(dst, src.cast_unsigned()).into()425}426OperandSize::S32 => asm::inst::movl_mi::new(dst, src.cast_unsigned()).into(),427OperandSize::S64 => asm::inst::movq_mi_sxl::new(dst, src).into(),428_ => unreachable!(),429};430self.emit(Inst::External { inst });431}432433/// Immediate-to-register move.434pub fn mov_ir(&mut self, imm: u64, dst: WritableReg, size: OperandSize) {435self.emit(Inst::imm(size.into(), imm, dst.map(Into::into)));436}437438/// Zero-extend memory-to-register load.439pub fn movzx_mr(440&mut self,441addr: &Address,442dst: WritableReg,443ext: Option<Extend<Zero>>,444memflags: MemFlags,445) {446let src = Self::to_synthetic_amode(addr, memflags);447448if let Some(ext) = ext {449let dst = WritableGpr::from_reg(dst.to_reg().into());450let inst = match ext.into() {451ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),452ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),453ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),454ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),455ExtMode::LQ => {456// This instruction selection may seem strange but is457// correct in 64-bit mode: section 3.4.1.1 of the Intel458// manual says that "32-bit operands generate a 32-bit459// result, zero-extended to a 64-bit result in the460// destination general-purpose register." This is applicable461// beyond `mov` but we use this fact to zero-extend `src`462// into `dst`.463asm::inst::movl_rm::new(dst, src).into()464}465};466self.emit(Inst::External { inst });467} else {468let dst = WritableGpr::from_reg(dst.to_reg().into());469let inst = asm::inst::movq_rm::new(dst, src).into();470self.emit(Inst::External { inst });471}472}473474// Sign-extend memory-to-register load.475pub fn movsx_mr(476&mut self,477addr: &Address,478dst: WritableReg,479ext: Extend<Signed>,480memflags: MemFlags,481) {482let src = Self::to_synthetic_amode(addr, memflags);483let dst = WritableGpr::from_reg(dst.to_reg().into());484let inst = match ext.into() {485ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),486ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),487ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),488ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),489ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),490};491self.emit(Inst::External { inst });492}493494/// Register-to-register move with zero extension.495pub fn movzx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Zero>) {496let dst = WritableGpr::from_reg(dst.to_reg().into());497let inst = match kind.into() {498ExtMode::BL => asm::inst::movzbl_rm::new(dst, src).into(),499ExtMode::BQ => asm::inst::movzbq_rm::new(dst, src).into(),500ExtMode::WL => asm::inst::movzwl_rm::new(dst, src).into(),501ExtMode::WQ => asm::inst::movzwq_rm::new(dst, src).into(),502ExtMode::LQ => {503// This instruction selection may seem strange but is correct in504// 64-bit mode: section 3.4.1.1 of the Intel manual says that505// "32-bit operands generate a 32-bit result, zero-extended to a506// 64-bit result in the destination general-purpose register."507// This is applicable beyond `mov` but we use this fact to508// zero-extend `src` into `dst`.509asm::inst::movl_rm::new(dst, src).into()510}511};512self.emit(Inst::External { inst });513}514515/// Register-to-register move with sign extension.516pub fn movsx_rr(&mut self, src: Reg, dst: WritableReg, kind: Extend<Signed>) {517let dst = WritableGpr::from_reg(dst.to_reg().into());518let inst = match kind.into() {519ExtMode::BL => asm::inst::movsbl_rm::new(dst, src).into(),520ExtMode::BQ => asm::inst::movsbq_rm::new(dst, src).into(),521ExtMode::WL => asm::inst::movswl_rm::new(dst, src).into(),522ExtMode::WQ => asm::inst::movswq_rm::new(dst, src).into(),523ExtMode::LQ => asm::inst::movslq_rm::new(dst, src).into(),524};525self.emit(Inst::External { inst });526}527528/// Integer register conditional move.529pub fn cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {530use IntCmpKind::*;531use OperandSize::*;532533let dst: WritableGpr = dst.map(Into::into);534let inst = match size {535S8 | S16 | S32 => match cc {536Eq => asm::inst::cmovel_rm::new(dst, src).into(),537Ne => asm::inst::cmovnel_rm::new(dst, src).into(),538LtS => asm::inst::cmovll_rm::new(dst, src).into(),539LtU => asm::inst::cmovbl_rm::new(dst, src).into(),540GtS => asm::inst::cmovgl_rm::new(dst, src).into(),541GtU => asm::inst::cmoval_rm::new(dst, src).into(),542LeS => asm::inst::cmovlel_rm::new(dst, src).into(),543LeU => asm::inst::cmovbel_rm::new(dst, src).into(),544GeS => asm::inst::cmovgel_rm::new(dst, src).into(),545GeU => asm::inst::cmovael_rm::new(dst, src).into(),546},547S64 => match cc {548Eq => asm::inst::cmoveq_rm::new(dst, src).into(),549Ne => asm::inst::cmovneq_rm::new(dst, src).into(),550LtS => asm::inst::cmovlq_rm::new(dst, src).into(),551LtU => asm::inst::cmovbq_rm::new(dst, src).into(),552GtS => asm::inst::cmovgq_rm::new(dst, src).into(),553GtU => asm::inst::cmovaq_rm::new(dst, src).into(),554LeS => asm::inst::cmovleq_rm::new(dst, src).into(),555LeU => asm::inst::cmovbeq_rm::new(dst, src).into(),556GeS => asm::inst::cmovgeq_rm::new(dst, src).into(),557GeU => asm::inst::cmovaeq_rm::new(dst, src).into(),558},559_ => unreachable!(),560};561self.emit(Inst::External { inst });562}563564/// Single and double precision floating point565/// register-to-register move.566pub fn xmm_mov_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {567let ty = match size {568OperandSize::S32 => types::F32,569OperandSize::S64 => types::F64,570OperandSize::S128 => types::I32X4,571OperandSize::S8 | OperandSize::S16 => unreachable!(),572};573self.emit(Inst::gen_move(dst.map(|r| r.into()), src.into(), ty));574}575576/// Single and double precision floating point load.577pub fn xmm_mov_mr(578&mut self,579src: &Address,580dst: WritableReg,581size: OperandSize,582flags: MemFlags,583) {584use OperandSize::*;585586assert!(dst.to_reg().is_float());587588let src = Self::to_synthetic_amode(src, flags);589let dst: WritableXmm = dst.map(|r| r.into());590let inst = match size {591S32 => asm::inst::movss_a_m::new(dst, src).into(),592S64 => asm::inst::movsd_a_m::new(dst, src).into(),593S128 => asm::inst::movdqu_a::new(dst, src).into(),594S8 | S16 => unreachable!(),595};596self.emit(Inst::External { inst });597}598599/// Vector load and extend.600pub fn xmm_vpmov_mr(601&mut self,602src: &Address,603dst: WritableReg,604kind: VpmovKind,605flags: MemFlags,606) {607assert!(dst.to_reg().is_float());608let src = Self::to_synthetic_amode(src, flags);609let dst: WritableXmm = dst.map(|r| r.into());610let inst = match kind {611VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),612VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),613VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),614VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),615VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),616VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),617};618self.emit(Inst::External { inst });619}620621/// Extends vector of integers in `src` and puts results in `dst`.622pub fn xmm_vpmov_rr(&mut self, src: Reg, dst: WritableReg, kind: VpmovKind) {623let dst: WritableXmm = dst.map(|r| r.into());624let inst = match kind {625VpmovKind::E8x8S => asm::inst::vpmovsxbw_a::new(dst, src).into(),626VpmovKind::E8x8U => asm::inst::vpmovzxbw_a::new(dst, src).into(),627VpmovKind::E16x4S => asm::inst::vpmovsxwd_a::new(dst, src).into(),628VpmovKind::E16x4U => asm::inst::vpmovzxwd_a::new(dst, src).into(),629VpmovKind::E32x2S => asm::inst::vpmovsxdq_a::new(dst, src).into(),630VpmovKind::E32x2U => asm::inst::vpmovzxdq_a::new(dst, src).into(),631};632self.emit(Inst::External { inst });633}634635/// Vector load and broadcast.636pub fn xmm_vpbroadcast_mr(637&mut self,638src: &Address,639dst: WritableReg,640size: OperandSize,641flags: MemFlags,642) {643assert!(dst.to_reg().is_float());644let src = Self::to_synthetic_amode(src, flags);645let dst: WritableXmm = dst.map(|r| r.into());646let inst = match size {647OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),648OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),649OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),650_ => unimplemented!(),651};652self.emit(Inst::External { inst });653}654655/// Value in `src` is broadcast into lanes of `size` in `dst`.656pub fn xmm_vpbroadcast_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {657assert!(src.is_float() && dst.to_reg().is_float());658let dst: WritableXmm = dst.map(|r| r.into());659let inst = match size {660OperandSize::S8 => asm::inst::vpbroadcastb_a::new(dst, src).into(),661OperandSize::S16 => asm::inst::vpbroadcastw_a::new(dst, src).into(),662OperandSize::S32 => asm::inst::vpbroadcastd_a::new(dst, src).into(),663_ => unimplemented!(),664};665self.emit(Inst::External { inst });666}667668/// Memory to register shuffle of bytes in vector.669pub fn xmm_vpshuf_mr(670&mut self,671src: &Address,672dst: WritableReg,673mask: u8,674size: OperandSize,675flags: MemFlags,676) {677let dst: WritableXmm = dst.map(|r| r.into());678let src = Self::to_synthetic_amode(src, flags);679let inst = match size {680OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),681_ => unimplemented!(),682};683self.emit(Inst::External { inst });684}685686/// Register to register shuffle of bytes in vector.687pub fn xmm_vpshuf_rr(&mut self, src: Reg, dst: WritableReg, mask: u8, size: OperandSize) {688let dst: WritableXmm = dst.map(|r| r.into());689690let inst = match size {691OperandSize::S16 => asm::inst::vpshuflw_a::new(dst, src, mask).into(),692OperandSize::S32 => asm::inst::vpshufd_a::new(dst, src, mask).into(),693_ => unimplemented!(),694};695696self.emit(Inst::External { inst });697}698699/// Single and double precision floating point store.700pub fn xmm_mov_rm(&mut self, src: Reg, dst: &Address, size: OperandSize, flags: MemFlags) {701use OperandSize::*;702703assert!(src.is_float());704705let dst = Self::to_synthetic_amode(dst, flags);706let src: Xmm = src.into();707let inst = match size {708S32 => asm::inst::movss_c_m::new(dst, src).into(),709S64 => asm::inst::movsd_c_m::new(dst, src).into(),710S128 => asm::inst::movdqu_b::new(dst, src).into(),711S16 | S8 => unreachable!(),712};713self.emit(Inst::External { inst })714}715716/// Floating point register conditional move.717pub fn xmm_cmov(&mut self, src: Reg, dst: WritableReg, cc: IntCmpKind, size: OperandSize) {718let dst: WritableXmm = dst.map(Into::into);719let ty = match size {720OperandSize::S32 => types::F32,721OperandSize::S64 => types::F64,722// Move the entire 128 bits via movdqa.723OperandSize::S128 => types::I32X4,724OperandSize::S8 | OperandSize::S16 => unreachable!(),725};726727self.emit(Inst::XmmCmove {728ty,729cc: cc.into(),730consequent: Xmm::unwrap_new(src.into()),731alternative: dst.to_reg(),732dst,733})734}735736/// Subtract register and register737pub fn sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {738let dst = pair_gpr(dst);739let inst = match size {740OperandSize::S8 => asm::inst::subb_rm::new(dst, src).into(),741OperandSize::S16 => asm::inst::subw_rm::new(dst, src).into(),742OperandSize::S32 => asm::inst::subl_rm::new(dst, src).into(),743OperandSize::S64 => asm::inst::subq_rm::new(dst, src).into(),744OperandSize::S128 => unimplemented!(),745};746self.emit(Inst::External { inst });747}748749/// Subtract immediate register.750pub fn sub_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {751let dst = pair_gpr(dst);752let inst = match size {753OperandSize::S8 => asm::inst::subb_mi::new(dst, u8::try_from(imm).unwrap()).into(),754OperandSize::S16 => asm::inst::subw_mi::new(dst, u16::try_from(imm).unwrap()).into(),755OperandSize::S32 => asm::inst::subl_mi::new(dst, imm as u32).into(),756OperandSize::S64 => asm::inst::subq_mi_sxl::new(dst, imm).into(),757OperandSize::S128 => unimplemented!(),758};759self.emit(Inst::External { inst });760}761762/// "and" two registers.763pub fn and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {764let dst = pair_gpr(dst);765let inst = match size {766OperandSize::S8 => asm::inst::andb_rm::new(dst, src).into(),767OperandSize::S16 => asm::inst::andw_rm::new(dst, src).into(),768OperandSize::S32 => asm::inst::andl_rm::new(dst, src).into(),769OperandSize::S64 => asm::inst::andq_rm::new(dst, src).into(),770OperandSize::S128 => unimplemented!(),771};772self.emit(Inst::External { inst });773}774775pub fn and_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {776let dst = pair_gpr(dst);777let inst = match size {778OperandSize::S8 => asm::inst::andb_mi::new(dst, u8::try_from(imm).unwrap()).into(),779OperandSize::S16 => asm::inst::andw_mi::new(dst, u16::try_from(imm).unwrap()).into(),780OperandSize::S32 => asm::inst::andl_mi::new(dst, imm as u32).into(),781OperandSize::S64 => asm::inst::andq_mi_sxl::new(dst, imm).into(),782OperandSize::S128 => unimplemented!(),783};784self.emit(Inst::External { inst });785}786787/// "and" two float registers.788pub fn xmm_and_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {789let dst = pair_xmm(dst);790let inst = match size {791OperandSize::S32 => asm::inst::andps_a::new(dst, src).into(),792OperandSize::S64 => asm::inst::andpd_a::new(dst, src).into(),793OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),794};795self.emit(Inst::External { inst });796}797798/// "and not" two float registers.799pub fn xmm_andn_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {800let dst = pair_xmm(dst);801let inst = match size {802OperandSize::S32 => asm::inst::andnps_a::new(dst, src).into(),803OperandSize::S64 => asm::inst::andnpd_a::new(dst, src).into(),804OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),805};806self.emit(Inst::External { inst });807}808809pub fn gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {810let dst: WritableXmm = dst.map(|r| r.into());811let inst = match size {812OperandSize::S32 => asm::inst::movd_a::new(dst, src).into(),813OperandSize::S64 => asm::inst::movq_a::new(dst, src).into(),814OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),815};816817self.emit(Inst::External { inst });818}819820pub fn xmm_to_gpr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {821let dst: WritableGpr = dst.map(Into::into);822let src: Xmm = src.into();823let inst = match size {824OperandSize::S32 => asm::inst::movd_b::new(dst, src).into(),825OperandSize::S64 => asm::inst::movq_b::new(dst, src).into(),826OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),827};828829self.emit(Inst::External { inst })830}831832/// Convert float to signed int.833pub fn cvt_float_to_sint_seq(834&mut self,835src: Reg,836dst: WritableReg,837tmp_gpr: Reg,838tmp_xmm: Reg,839src_size: OperandSize,840dst_size: OperandSize,841saturating: bool,842) {843self.emit(Inst::CvtFloatToSintSeq {844dst_size: dst_size.into(),845src_size: src_size.into(),846is_saturating: saturating,847src: src.into(),848dst: dst.map(Into::into),849tmp_gpr: tmp_gpr.into(),850tmp_xmm: tmp_xmm.into(),851});852}853854/// Convert float to unsigned int.855pub fn cvt_float_to_uint_seq(856&mut self,857src: Reg,858dst: WritableReg,859tmp_gpr: Reg,860tmp_xmm: Reg,861tmp_xmm2: Reg,862src_size: OperandSize,863dst_size: OperandSize,864saturating: bool,865) {866self.emit(Inst::CvtFloatToUintSeq {867dst_size: dst_size.into(),868src_size: src_size.into(),869is_saturating: saturating,870src: src.into(),871dst: dst.map(Into::into),872tmp_gpr: tmp_gpr.into(),873tmp_xmm: tmp_xmm.into(),874tmp_xmm2: tmp_xmm2.into(),875});876}877878/// Convert signed int to float.879pub fn cvt_sint_to_float(880&mut self,881src: Reg,882dst: WritableReg,883src_size: OperandSize,884dst_size: OperandSize,885) {886use OperandSize::*;887let dst = pair_xmm(dst);888let inst = match (src_size, dst_size) {889(S32, S32) => asm::inst::cvtsi2ssl_a::new(dst, src).into(),890(S32, S64) => asm::inst::cvtsi2sdl_a::new(dst, src).into(),891(S64, S32) => asm::inst::cvtsi2ssq_a::new(dst, src).into(),892(S64, S64) => asm::inst::cvtsi2sdq_a::new(dst, src).into(),893_ => unreachable!(),894};895self.emit(Inst::External { inst });896}897898/// Convert unsigned 64-bit int to float.899pub fn cvt_uint64_to_float_seq(900&mut self,901src: Reg,902dst: WritableReg,903tmp_gpr1: Reg,904tmp_gpr2: Reg,905dst_size: OperandSize,906) {907self.emit(Inst::CvtUint64ToFloatSeq {908dst_size: dst_size.into(),909src: src.into(),910dst: dst.map(Into::into),911tmp_gpr1: tmp_gpr1.into(),912tmp_gpr2: tmp_gpr2.into(),913});914}915916/// Change precision of float.917pub fn cvt_float_to_float(918&mut self,919src: Reg,920dst: WritableReg,921src_size: OperandSize,922dst_size: OperandSize,923) {924use OperandSize::*;925let dst = pair_xmm(dst);926let inst = match (src_size, dst_size) {927(S32, S64) => asm::inst::cvtss2sd_a::new(dst, src).into(),928(S64, S32) => asm::inst::cvtsd2ss_a::new(dst, src).into(),929_ => unimplemented!(),930};931self.emit(Inst::External { inst });932}933934pub fn or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {935let dst = pair_gpr(dst);936let inst = match size {937OperandSize::S8 => asm::inst::orb_rm::new(dst, src).into(),938OperandSize::S16 => asm::inst::orw_rm::new(dst, src).into(),939OperandSize::S32 => asm::inst::orl_rm::new(dst, src).into(),940OperandSize::S64 => asm::inst::orq_rm::new(dst, src).into(),941OperandSize::S128 => unimplemented!(),942};943self.emit(Inst::External { inst });944}945946pub fn or_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {947let dst = pair_gpr(dst);948let inst = match size {949OperandSize::S8 => asm::inst::orb_mi::new(dst, u8::try_from(imm).unwrap()).into(),950OperandSize::S16 => asm::inst::orw_mi::new(dst, u16::try_from(imm).unwrap()).into(),951OperandSize::S32 => asm::inst::orl_mi::new(dst, imm as u32).into(),952OperandSize::S64 => asm::inst::orq_mi_sxl::new(dst, imm).into(),953OperandSize::S128 => unimplemented!(),954};955self.emit(Inst::External { inst });956}957958pub fn xmm_or_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {959let dst = pair_xmm(dst);960let inst = match size {961OperandSize::S32 => asm::inst::orps_a::new(dst, src).into(),962OperandSize::S64 => asm::inst::orpd_a::new(dst, src).into(),963OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),964};965self.emit(Inst::External { inst });966}967968/// Logical exclusive or with registers.969pub fn xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {970let dst = pair_gpr(dst);971let inst = match size {972OperandSize::S8 => asm::inst::xorb_rm::new(dst, src).into(),973OperandSize::S16 => asm::inst::xorw_rm::new(dst, src).into(),974OperandSize::S32 => asm::inst::xorl_rm::new(dst, src).into(),975OperandSize::S64 => asm::inst::xorq_rm::new(dst, src).into(),976OperandSize::S128 => unimplemented!(),977};978self.emit(Inst::External { inst });979}980981pub fn xor_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {982let dst = pair_gpr(dst);983let inst = match size {984OperandSize::S8 => asm::inst::xorb_mi::new(dst, u8::try_from(imm).unwrap()).into(),985OperandSize::S16 => asm::inst::xorw_mi::new(dst, u16::try_from(imm).unwrap()).into(),986OperandSize::S32 => asm::inst::xorl_mi::new(dst, imm as u32).into(),987OperandSize::S64 => asm::inst::xorq_mi_sxl::new(dst, imm).into(),988OperandSize::S128 => unimplemented!(),989};990self.emit(Inst::External { inst });991}992993/// Logical exclusive or with float registers.994pub fn xmm_xor_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {995let dst = pair_xmm(dst);996let inst = match size {997OperandSize::S32 => asm::inst::xorps_a::new(dst, src).into(),998OperandSize::S64 => asm::inst::xorpd_a::new(dst, src).into(),999OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),1000};1001self.emit(Inst::External { inst });1002}10031004/// Shift with register and register.1005pub fn shift_rr(&mut self, src: Reg, dst: WritableReg, kind: ShiftKind, size: OperandSize) {1006let dst = pair_gpr(dst);1007let src: Gpr = src.into();1008let inst = match (kind, size) {1009(ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mc::new(dst, src).into(),1010(ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mc::new(dst, src).into(),1011(ShiftKind::Shl, _) => todo!(),1012(ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mc::new(dst, src).into(),1013(ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mc::new(dst, src).into(),1014(ShiftKind::ShrS, _) => todo!(),1015(ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mc::new(dst, src).into(),1016(ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mc::new(dst, src).into(),1017(ShiftKind::ShrU, _) => todo!(),1018(ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mc::new(dst, src).into(),1019(ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mc::new(dst, src).into(),1020(ShiftKind::Rotl, _) => todo!(),1021(ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mc::new(dst, src).into(),1022(ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mc::new(dst, src).into(),1023(ShiftKind::Rotr, _) => todo!(),1024};1025self.emit(Inst::External { inst });1026}10271028/// Shift with immediate and register.1029pub fn shift_ir(&mut self, imm: u8, dst: WritableReg, kind: ShiftKind, size: OperandSize) {1030let dst = pair_gpr(dst);1031let inst = match (kind, size) {1032(ShiftKind::Shl, OperandSize::S32) => asm::inst::shll_mi::new(dst, imm).into(),1033(ShiftKind::Shl, OperandSize::S64) => asm::inst::shlq_mi::new(dst, imm).into(),1034(ShiftKind::Shl, _) => todo!(),1035(ShiftKind::ShrS, OperandSize::S32) => asm::inst::sarl_mi::new(dst, imm).into(),1036(ShiftKind::ShrS, OperandSize::S64) => asm::inst::sarq_mi::new(dst, imm).into(),1037(ShiftKind::ShrS, _) => todo!(),1038(ShiftKind::ShrU, OperandSize::S32) => asm::inst::shrl_mi::new(dst, imm).into(),1039(ShiftKind::ShrU, OperandSize::S64) => asm::inst::shrq_mi::new(dst, imm).into(),1040(ShiftKind::ShrU, _) => todo!(),1041(ShiftKind::Rotl, OperandSize::S32) => asm::inst::roll_mi::new(dst, imm).into(),1042(ShiftKind::Rotl, OperandSize::S64) => asm::inst::rolq_mi::new(dst, imm).into(),1043(ShiftKind::Rotl, _) => todo!(),1044(ShiftKind::Rotr, OperandSize::S32) => asm::inst::rorl_mi::new(dst, imm).into(),1045(ShiftKind::Rotr, OperandSize::S64) => asm::inst::rorq_mi::new(dst, imm).into(),1046(ShiftKind::Rotr, _) => todo!(),1047};1048self.emit(Inst::External { inst });1049}10501051/// Signed/unsigned division.1052///1053/// Emits a sequence of instructions to ensure the correctness of1054/// the division invariants. This function assumes that the1055/// caller has correctly allocated the dividend as `(rdx:rax)` and1056/// accounted for the quotient to be stored in `rax`.1057pub fn div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize) {1058let trap = match kind {1059// Signed division has two trapping conditions, integer overflow and1060// divide-by-zero. Check for divide-by-zero explicitly and let the1061// hardware detect overflow.1062DivKind::Signed => {1063self.cmp_ir(divisor, 0, size);1064self.emit(Inst::TrapIf {1065cc: CC::Z,1066trap_code: TrapCode::INTEGER_DIVISION_BY_ZERO,1067});10681069// Sign-extend the dividend with tailor-made instructoins for1070// just this operation.1071let ext_dst: WritableGpr = dst.1.into();1072let ext_src: Gpr = dst.0.into();1073let inst = match size {1074OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),1075OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),1076_ => unimplemented!(),1077};1078self.emit(Inst::External { inst });1079TrapCode::INTEGER_OVERFLOW1080}10811082// Unsigned division only traps in one case, on divide-by-zero, so1083// defer that to the trap opcode.1084//1085// The divisor_hi reg is initialized with zero through an1086// xor-against-itself op.1087DivKind::Unsigned => {1088self.xor_rr(dst.1, writable!(dst.1), size);1089TrapCode::INTEGER_DIVISION_BY_ZERO1090}1091};1092let dst0 = pair_gpr(writable!(dst.0));1093let dst1 = pair_gpr(writable!(dst.1));1094let inst = match (kind, size) {1095(DivKind::Signed, OperandSize::S32) => {1096asm::inst::idivl_m::new(dst0, dst1, divisor, trap).into()1097}1098(DivKind::Unsigned, OperandSize::S32) => {1099asm::inst::divl_m::new(dst0, dst1, divisor, trap).into()1100}1101(DivKind::Signed, OperandSize::S64) => {1102asm::inst::idivq_m::new(dst0, dst1, divisor, trap).into()1103}1104(DivKind::Unsigned, OperandSize::S64) => {1105asm::inst::divq_m::new(dst0, dst1, divisor, trap).into()1106}1107_ => todo!(),1108};1109self.emit(Inst::External { inst });1110}11111112/// Signed/unsigned remainder.1113///1114/// Emits a sequence of instructions to ensure the correctness of the1115/// division invariants and ultimately calculate the remainder.1116/// This function assumes that the1117/// caller has correctly allocated the dividend as `(rdx:rax)` and1118/// accounted for the remainder to be stored in `rdx`.1119pub fn rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize) {1120match kind {1121// Signed remainder goes through a pseudo-instruction which has1122// some internal branching. The `dividend_hi`, or `rdx`, is1123// initialized here with a `SignExtendData` instruction.1124RemKind::Signed => {1125let ext_dst: WritableGpr = dst.1.into();11261127// Initialize `dividend_hi`, or `rdx`, with a tailor-made1128// instruction for this operation.1129let ext_src: Gpr = dst.0.into();1130let inst = match size {1131OperandSize::S32 => asm::inst::cltd_zo::new(ext_dst, ext_src).into(),1132OperandSize::S64 => asm::inst::cqto_zo::new(ext_dst, ext_src).into(),1133_ => unimplemented!(),1134};1135self.emit(Inst::External { inst });1136self.emit(Inst::CheckedSRemSeq {1137size: size.into(),1138divisor: divisor.into(),1139dividend_lo: dst.0.into(),1140dividend_hi: dst.1.into(),1141dst_quotient: dst.0.into(),1142dst_remainder: dst.1.into(),1143});1144}11451146// Unsigned remainder initializes `dividend_hi` with zero and1147// then executes a normal `div` instruction.1148RemKind::Unsigned => {1149self.xor_rr(dst.1, writable!(dst.1), size);1150let dst0 = pair_gpr(writable!(dst.0));1151let dst1 = pair_gpr(writable!(dst.1));1152let trap = TrapCode::INTEGER_DIVISION_BY_ZERO;1153let inst = match size {1154OperandSize::S32 => asm::inst::divl_m::new(dst0, dst1, divisor, trap).into(),1155OperandSize::S64 => asm::inst::divq_m::new(dst0, dst1, divisor, trap).into(),1156_ => todo!(),1157};1158self.emit(Inst::External { inst });1159}1160}1161}11621163/// Multiply immediate and register.1164pub fn mul_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {1165use OperandSize::*;1166let src = dst.to_reg();1167let dst: WritableGpr = dst.to_reg().into();1168let inst = match size {1169S16 => asm::inst::imulw_rmi::new(dst, src, u16::try_from(imm).unwrap()).into(),1170S32 => asm::inst::imull_rmi::new(dst, src, imm as u32).into(),1171S64 => asm::inst::imulq_rmi_sxl::new(dst, src, imm).into(),1172S8 | S128 => unimplemented!(),1173};1174self.emit(Inst::External { inst });1175}11761177/// Multiply register and register.1178pub fn mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1179use OperandSize::*;1180let dst = pair_gpr(dst);1181let inst = match size {1182S16 => asm::inst::imulw_rm::new(dst, src).into(),1183S32 => asm::inst::imull_rm::new(dst, src).into(),1184S64 => asm::inst::imulq_rm::new(dst, src).into(),1185S8 | S128 => unimplemented!(),1186};1187self.emit(Inst::External { inst });1188}11891190/// Add immediate and register.1191pub fn add_ir(&mut self, imm: i32, dst: WritableReg, size: OperandSize) {1192let dst = pair_gpr(dst);1193let inst = match size {1194OperandSize::S8 => asm::inst::addb_mi::new(dst, u8::try_from(imm).unwrap()).into(),1195OperandSize::S16 => asm::inst::addw_mi::new(dst, u16::try_from(imm).unwrap()).into(),1196OperandSize::S32 => asm::inst::addl_mi::new(dst, imm as u32).into(),1197OperandSize::S64 => asm::inst::addq_mi_sxl::new(dst, imm).into(),1198OperandSize::S128 => unimplemented!(),1199};1200self.emit(Inst::External { inst });1201}12021203/// Add register and register.1204pub fn add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1205let dst = pair_gpr(dst);1206let inst = match size {1207OperandSize::S8 => asm::inst::addb_rm::new(dst, src).into(),1208OperandSize::S16 => asm::inst::addw_rm::new(dst, src).into(),1209OperandSize::S32 => asm::inst::addl_rm::new(dst, src).into(),1210OperandSize::S64 => asm::inst::addq_rm::new(dst, src).into(),1211OperandSize::S128 => unimplemented!(),1212};1213self.emit(Inst::External { inst });1214}12151216pub fn lock_xadd(1217&mut self,1218addr: Address,1219dst: WritableReg,1220size: OperandSize,1221flags: MemFlags,1222) {1223assert!(addr.is_offset());1224let mem = Self::to_synthetic_amode(&addr, flags);1225let dst = pair_gpr(dst);1226let inst = match size {1227OperandSize::S8 => asm::inst::lock_xaddb_mr::new(mem, dst).into(),1228OperandSize::S16 => asm::inst::lock_xaddw_mr::new(mem, dst).into(),1229OperandSize::S32 => asm::inst::lock_xaddl_mr::new(mem, dst).into(),1230OperandSize::S64 => asm::inst::lock_xaddq_mr::new(mem, dst).into(),1231OperandSize::S128 => unimplemented!(),1232};12331234self.emit(Inst::External { inst });1235}12361237pub fn atomic_rmw_seq(1238&mut self,1239addr: Address,1240operand: Reg,1241dst: WritableReg,1242temp: WritableReg,1243size: OperandSize,1244flags: MemFlags,1245op: AtomicRmwSeqOp,1246) {1247assert!(addr.is_offset());1248let mem = Self::to_synthetic_amode(&addr, flags);1249self.emit(Inst::AtomicRmwSeq {1250ty: Type::int_with_byte_size(size.bytes() as _).unwrap(),1251mem,1252operand: operand.into(),1253temp: temp.map(Into::into),1254dst_old: dst.map(Into::into),1255op,1256});1257}12581259pub fn xchg(&mut self, addr: Address, dst: WritableReg, size: OperandSize, flags: MemFlags) {1260assert!(addr.is_offset());1261let mem = Self::to_synthetic_amode(&addr, flags);1262let dst = pair_gpr(dst);1263let inst = match size {1264OperandSize::S8 => asm::inst::xchgb_rm::new(dst, mem).into(),1265OperandSize::S16 => asm::inst::xchgw_rm::new(dst, mem).into(),1266OperandSize::S32 => asm::inst::xchgl_rm::new(dst, mem).into(),1267OperandSize::S64 => asm::inst::xchgq_rm::new(dst, mem).into(),1268OperandSize::S128 => unimplemented!(),1269};12701271self.emit(Inst::External { inst });1272}1273pub fn cmpxchg(1274&mut self,1275addr: Address,1276replacement: Reg,1277dst: WritableReg,1278size: OperandSize,1279flags: MemFlags,1280) {1281assert!(addr.is_offset());1282let mem = Self::to_synthetic_amode(&addr, flags);1283let dst = pair_gpr(dst);1284let inst = match size {1285OperandSize::S8 => asm::inst::lock_cmpxchgb_mr::new(mem, replacement, dst).into(),1286OperandSize::S16 => asm::inst::lock_cmpxchgw_mr::new(mem, replacement, dst).into(),1287OperandSize::S32 => asm::inst::lock_cmpxchgl_mr::new(mem, replacement, dst).into(),1288OperandSize::S64 => asm::inst::lock_cmpxchgq_mr::new(mem, replacement, dst).into(),1289OperandSize::S128 => unimplemented!(),1290};12911292self.emit(Inst::External { inst });1293}12941295pub fn cmp_ir(&mut self, src1: Reg, imm: i32, size: OperandSize) {1296let inst = match size {1297OperandSize::S8 => {1298let imm = i8::try_from(imm).unwrap();1299asm::inst::cmpb_mi::new(src1, imm.cast_unsigned()).into()1300}1301OperandSize::S16 => match i8::try_from(imm) {1302Ok(imm8) => asm::inst::cmpw_mi_sxb::new(src1, imm8).into(),1303Err(_) => {1304asm::inst::cmpw_mi::new(src1, i16::try_from(imm).unwrap().cast_unsigned())1305.into()1306}1307},1308OperandSize::S32 => match i8::try_from(imm) {1309Ok(imm8) => asm::inst::cmpl_mi_sxb::new(src1, imm8).into(),1310Err(_) => asm::inst::cmpl_mi::new(src1, imm.cast_unsigned()).into(),1311},1312OperandSize::S64 => match i8::try_from(imm) {1313Ok(imm8) => asm::inst::cmpq_mi_sxb::new(src1, imm8).into(),1314Err(_) => asm::inst::cmpq_mi::new(src1, imm).into(),1315},1316OperandSize::S128 => unimplemented!(),1317};13181319self.emit(Inst::External { inst });1320}13211322pub fn cmp_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {1323let inst = match size {1324OperandSize::S8 => asm::inst::cmpb_rm::new(src1, src2).into(),1325OperandSize::S16 => asm::inst::cmpw_rm::new(src1, src2).into(),1326OperandSize::S32 => asm::inst::cmpl_rm::new(src1, src2).into(),1327OperandSize::S64 => asm::inst::cmpq_rm::new(src1, src2).into(),1328OperandSize::S128 => unimplemented!(),1329};13301331self.emit(Inst::External { inst });1332}13331334/// Compares values in src1 and src2 and sets ZF, PF, and CF flags in EFLAGS1335/// register.1336pub fn ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize) {1337let inst = match size {1338OperandSize::S32 => asm::inst::ucomiss_a::new(src1, src2).into(),1339OperandSize::S64 => asm::inst::ucomisd_a::new(src1, src2).into(),1340OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),1341};1342self.emit(Inst::External { inst });1343}13441345pub fn popcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1346assert!(1347self.isa_flags.has_popcnt() && self.isa_flags.has_sse42(),1348"Requires has_popcnt and has_sse42 flags"1349);1350let dst = WritableGpr::from_reg(dst.to_reg().into());1351let inst = match size {1352OperandSize::S16 => asm::inst::popcntw_rm::new(dst, src).into(),1353OperandSize::S32 => asm::inst::popcntl_rm::new(dst, src).into(),1354OperandSize::S64 => asm::inst::popcntq_rm::new(dst, src).into(),1355OperandSize::S8 | OperandSize::S128 => unreachable!(),1356};1357self.emit(Inst::External { inst });1358}13591360/// Emit a test instruction with two register operands.1361pub fn test_rr(&mut self, src1: Reg, src2: Reg, size: OperandSize) {1362let inst = match size {1363OperandSize::S8 => asm::inst::testb_mr::new(src1, src2).into(),1364OperandSize::S16 => asm::inst::testw_mr::new(src1, src2).into(),1365OperandSize::S32 => asm::inst::testl_mr::new(src1, src2).into(),1366OperandSize::S64 => asm::inst::testq_mr::new(src1, src2).into(),1367OperandSize::S128 => unimplemented!(),1368};13691370self.emit(Inst::External { inst });1371}13721373/// Set value in dst to `0` or `1` based on flags in status register and1374/// [`CmpKind`].1375pub fn setcc(&mut self, kind: IntCmpKind, dst: WritableReg) {1376self.setcc_impl(kind.into(), dst);1377}13781379/// Set value in dst to `1` if parity flag in status register is set, `0`1380/// otherwise.1381pub fn setp(&mut self, dst: WritableReg) {1382self.setcc_impl(CC::P, dst);1383}13841385/// Set value in dst to `1` if parity flag in status register is not set,1386/// `0` otherwise.1387pub fn setnp(&mut self, dst: WritableReg) {1388self.setcc_impl(CC::NP, dst);1389}13901391fn setcc_impl(&mut self, cc: CC, dst: WritableReg) {1392// Clear the dst register or bits 1 to 31 may be incorrectly set.1393// Don't use xor since it updates the status register.1394let dst: WritableGpr = dst.map(Into::into);1395let inst = asm::inst::movl_oi::new(dst, 0).into();1396self.emit(Inst::External { inst });13971398// Copy correct bit from status register into dst register.1399//1400// Note that some of these mnemonics don't match exactly and that's1401// intentional as there are multiple mnemonics for the same encoding in1402// some cases and the assembler picked ones that match Capstone rather1403// than Cranelift.1404let inst = match cc {1405CC::O => asm::inst::seto_m::new(dst).into(),1406CC::NO => asm::inst::setno_m::new(dst).into(),1407CC::B => asm::inst::setb_m::new(dst).into(),1408CC::NB => asm::inst::setae_m::new(dst).into(), // nb == ae1409CC::Z => asm::inst::sete_m::new(dst).into(), // z == e1410CC::NZ => asm::inst::setne_m::new(dst).into(), // nz == ne1411CC::BE => asm::inst::setbe_m::new(dst).into(),1412CC::NBE => asm::inst::seta_m::new(dst).into(), // nbe == a1413CC::S => asm::inst::sets_m::new(dst).into(),1414CC::NS => asm::inst::setns_m::new(dst).into(),1415CC::L => asm::inst::setl_m::new(dst).into(),1416CC::NL => asm::inst::setge_m::new(dst).into(), // nl == ge1417CC::LE => asm::inst::setle_m::new(dst).into(),1418CC::NLE => asm::inst::setg_m::new(dst).into(), // nle == g1419CC::P => asm::inst::setp_m::new(dst).into(),1420CC::NP => asm::inst::setnp_m::new(dst).into(),1421};1422self.emit(Inst::External { inst });1423}14241425/// Store the count of leading zeroes in src in dst.1426/// Requires `has_lzcnt` flag.1427pub fn lzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1428assert!(self.isa_flags.has_lzcnt(), "Requires has_lzcnt flag");1429let dst = WritableGpr::from_reg(dst.to_reg().into());1430let inst = match size {1431OperandSize::S16 => asm::inst::lzcntw_rm::new(dst, src).into(),1432OperandSize::S32 => asm::inst::lzcntl_rm::new(dst, src).into(),1433OperandSize::S64 => asm::inst::lzcntq_rm::new(dst, src).into(),1434OperandSize::S8 | OperandSize::S128 => unreachable!(),1435};1436self.emit(Inst::External { inst });1437}14381439/// Store the count of trailing zeroes in src in dst.1440/// Requires `has_bmi1` flag.1441pub fn tzcnt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1442assert!(self.isa_flags.has_bmi1(), "Requires has_bmi1 flag");1443let dst = WritableGpr::from_reg(dst.to_reg().into());1444let inst = match size {1445OperandSize::S16 => asm::inst::tzcntw_a::new(dst, src).into(),1446OperandSize::S32 => asm::inst::tzcntl_a::new(dst, src).into(),1447OperandSize::S64 => asm::inst::tzcntq_a::new(dst, src).into(),1448OperandSize::S8 | OperandSize::S128 => unreachable!(),1449};1450self.emit(Inst::External { inst });1451}14521453/// Stores position of the most significant bit set in src in dst.1454/// Zero flag is set if src is equal to 0.1455pub fn bsr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1456let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());1457let inst = match size {1458OperandSize::S16 => asm::inst::bsrw_rm::new(dst, src).into(),1459OperandSize::S32 => asm::inst::bsrl_rm::new(dst, src).into(),1460OperandSize::S64 => asm::inst::bsrq_rm::new(dst, src).into(),1461OperandSize::S8 | OperandSize::S128 => unreachable!(),1462};1463self.emit(Inst::External { inst });1464}14651466/// Performs integer negation on `src` and places result in `dst`.1467pub fn neg(&mut self, read: Reg, write: WritableReg, size: OperandSize) {1468let gpr = PairedGpr {1469read: read.into(),1470write: WritableGpr::from_reg(write.to_reg().into()),1471};1472let inst = match size {1473OperandSize::S8 => asm::inst::negb_m::new(gpr).into(),1474OperandSize::S16 => asm::inst::negw_m::new(gpr).into(),1475OperandSize::S32 => asm::inst::negl_m::new(gpr).into(),1476OperandSize::S64 => asm::inst::negq_m::new(gpr).into(),1477OperandSize::S128 => unreachable!(),1478};1479self.emit(Inst::External { inst });1480}14811482/// Stores position of the least significant bit set in src in dst.1483/// Zero flag is set if src is equal to 0.1484pub fn bsf(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1485let dst: WritableGpr = WritableGpr::from_reg(dst.to_reg().into());1486let inst = match size {1487OperandSize::S16 => asm::inst::bsfw_rm::new(dst, src).into(),1488OperandSize::S32 => asm::inst::bsfl_rm::new(dst, src).into(),1489OperandSize::S64 => asm::inst::bsfq_rm::new(dst, src).into(),1490OperandSize::S8 | OperandSize::S128 => unreachable!(),1491};1492self.emit(Inst::External { inst });1493}14941495/// Performs float addition on src and dst and places result in dst.1496pub fn xmm_add_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1497let dst = pair_xmm(dst);1498let inst = match size {1499OperandSize::S32 => asm::inst::addss_a::new(dst, src).into(),1500OperandSize::S64 => asm::inst::addsd_a::new(dst, src).into(),1501OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),1502};1503self.emit(Inst::External { inst });1504}15051506/// Performs float subtraction on src and dst and places result in dst.1507pub fn xmm_sub_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1508let dst = pair_xmm(dst);1509let inst = match size {1510OperandSize::S32 => asm::inst::subss_a::new(dst, src).into(),1511OperandSize::S64 => asm::inst::subsd_a::new(dst, src).into(),1512OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),1513};1514self.emit(Inst::External { inst });1515}15161517/// Performs float multiplication on src and dst and places result in dst.1518pub fn xmm_mul_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1519use OperandSize::*;1520let dst = pair_xmm(dst);1521let inst = match size {1522S32 => asm::inst::mulss_a::new(dst, src).into(),1523S64 => asm::inst::mulsd_a::new(dst, src).into(),1524S8 | S16 | S128 => unreachable!(),1525};1526self.emit(Inst::External { inst });1527}15281529/// Performs float division on src and dst and places result in dst.1530pub fn xmm_div_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1531let dst = pair_xmm(dst);1532let inst = match size {1533OperandSize::S32 => asm::inst::divss_a::new(dst, src).into(),1534OperandSize::S64 => asm::inst::divsd_a::new(dst, src).into(),1535OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),1536};1537self.emit(Inst::External { inst });1538}15391540/// Minimum for src and dst XMM registers with results put in dst.1541pub fn xmm_min_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1542self.emit(Inst::XmmMinMaxSeq {1543size: size.into(),1544is_min: true,1545lhs: src.into(),1546rhs: dst.to_reg().into(),1547dst: dst.map(Into::into),1548});1549}15501551/// Maximum for src and dst XMM registers with results put in dst.1552pub fn xmm_max_seq(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1553self.emit(Inst::XmmMinMaxSeq {1554size: size.into(),1555is_min: false,1556lhs: src.into(),1557rhs: dst.to_reg().into(),1558dst: dst.map(Into::into),1559});1560}15611562/// Perform rounding operation on float register src and place results in1563/// float register dst.1564pub fn xmm_rounds_rr(1565&mut self,1566src: Reg,1567dst: WritableReg,1568mode: RoundingMode,1569size: OperandSize,1570) {1571let dst = dst.map(|r| r.into());15721573let imm: u8 = match mode {1574RoundingMode::Nearest => 0x00,1575RoundingMode::Down => 0x01,1576RoundingMode::Up => 0x02,1577RoundingMode::Zero => 0x03,1578};15791580let inst = match size {1581OperandSize::S32 => asm::inst::roundss_rmi::new(dst, src, imm).into(),1582OperandSize::S64 => asm::inst::roundsd_rmi::new(dst, src, imm).into(),1583OperandSize::S8 | OperandSize::S16 | OperandSize::S128 => unreachable!(),1584};15851586self.emit(Inst::External { inst });1587}15881589pub fn sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1590use OperandSize::*;1591let dst = pair_xmm(dst);1592let inst = match size {1593S32 => asm::inst::sqrtss_a::new(dst, src).into(),1594S64 => asm::inst::sqrtsd_a::new(dst, src).into(),1595S8 | S16 | S128 => unimplemented!(),1596};1597self.emit(Inst::External { inst });1598}15991600/// Emit a call to an unknown location through a register.1601pub fn call_with_reg(&mut self, cc: CallingConvention, callee: Reg) {1602self.emit(Inst::CallUnknown {1603info: Box::new(CallInfo::empty(RegMem::reg(callee.into()), cc.into())),1604});1605}16061607/// Emit a call to a locally defined function through an index.1608pub fn call_with_name(&mut self, cc: CallingConvention, name: UserExternalNameRef) {1609self.emit(Inst::CallKnown {1610info: Box::new(CallInfo::empty(ExternalName::user(name), cc.into())),1611});1612}16131614/// Emits a conditional jump to the given label.1615pub fn jmp_if(&mut self, cc: impl Into<CC>, taken: MachLabel) {1616self.emit(Inst::WinchJmpIf {1617cc: cc.into(),1618taken,1619});1620}16211622/// Performs an unconditional jump to the given label.1623pub fn jmp(&mut self, target: MachLabel) {1624self.emit(Inst::JmpKnown { dst: target });1625}16261627/// Emits a jump table sequence.1628pub fn jmp_table(1629&mut self,1630targets: SmallVec<[MachLabel; 4]>,1631default: MachLabel,1632index: Reg,1633tmp1: Reg,1634tmp2: Reg,1635) {1636self.emit(Inst::JmpTableSeq {1637idx: index.into(),1638tmp1: Writable::from_reg(tmp1.into()),1639tmp2: Writable::from_reg(tmp2.into()),1640default_target: default,1641targets: Box::new(targets.to_vec()),1642})1643}16441645/// Emit a trap instruction.1646pub fn trap(&mut self, code: TrapCode) {1647let inst = asm::inst::ud2_zo::new(code).into();1648self.emit(Inst::External { inst });1649}16501651/// Conditional trap.1652pub fn trapif(&mut self, cc: impl Into<CC>, trap_code: TrapCode) {1653self.emit(Inst::TrapIf {1654cc: cc.into(),1655trap_code,1656});1657}16581659/// Load effective address.1660pub fn lea(&mut self, addr: &Address, dst: WritableReg, size: OperandSize) {1661let addr = Self::to_synthetic_amode(addr, MemFlags::trusted());1662let dst: WritableGpr = dst.map(Into::into);1663let inst = match size {1664OperandSize::S16 => asm::inst::leaw_rm::new(dst, addr).into(),1665OperandSize::S32 => asm::inst::leal_rm::new(dst, addr).into(),1666OperandSize::S64 => asm::inst::leaq_rm::new(dst, addr).into(),1667OperandSize::S8 | OperandSize::S128 => unimplemented!(),1668};1669self.emit(Inst::External { inst });1670}16711672pub fn adc_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1673let dst = pair_gpr(dst);1674let inst = match size {1675OperandSize::S8 => asm::inst::adcb_rm::new(dst, src).into(),1676OperandSize::S16 => asm::inst::adcw_rm::new(dst, src).into(),1677OperandSize::S32 => asm::inst::adcl_rm::new(dst, src).into(),1678OperandSize::S64 => asm::inst::adcq_rm::new(dst, src).into(),1679OperandSize::S128 => unimplemented!(),1680};1681self.emit(Inst::External { inst });1682}16831684pub fn sbb_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1685let dst = pair_gpr(dst);1686let inst = match size {1687OperandSize::S8 => asm::inst::sbbb_rm::new(dst, src).into(),1688OperandSize::S16 => asm::inst::sbbw_rm::new(dst, src).into(),1689OperandSize::S32 => asm::inst::sbbl_rm::new(dst, src).into(),1690OperandSize::S64 => asm::inst::sbbq_rm::new(dst, src).into(),1691OperandSize::S128 => unimplemented!(),1692};1693self.emit(Inst::External { inst });1694}16951696pub fn mul_wide(1697&mut self,1698dst_lo: WritableReg,1699dst_hi: WritableReg,1700lhs: Reg,1701rhs: Reg,1702kind: MulWideKind,1703size: OperandSize,1704) {1705use MulWideKind::*;1706use OperandSize::*;1707let rax = asm::Fixed(PairedGpr {1708read: lhs.into(),1709write: WritableGpr::from_reg(dst_lo.to_reg().into()),1710});1711let rdx = asm::Fixed(dst_hi.to_reg().into());1712if size == S8 {1713// For `mulb` and `imulb`, both the high and low bits are written to1714// RAX.1715assert_eq!(dst_lo, dst_hi);1716}1717let inst = match (size, kind) {1718(S8, Unsigned) => asm::inst::mulb_m::new(rax, rhs).into(),1719(S8, Signed) => asm::inst::imulb_m::new(rax, rhs).into(),1720(S16, Unsigned) => asm::inst::mulw_m::new(rax, rdx, rhs).into(),1721(S16, Signed) => asm::inst::imulw_m::new(rax, rdx, rhs).into(),1722(S32, Unsigned) => asm::inst::mull_m::new(rax, rdx, rhs).into(),1723(S32, Signed) => asm::inst::imull_m::new(rax, rdx, rhs).into(),1724(S64, Unsigned) => asm::inst::mulq_m::new(rax, rdx, rhs).into(),1725(S64, Signed) => asm::inst::imulq_m::new(rax, rdx, rhs).into(),1726(S128, _) => unimplemented!(),1727};1728self.emit(Inst::External { inst });1729}17301731/// Shuffles bytes in `src` according to contents of `mask` and puts1732/// result in `dst`.1733pub fn xmm_vpshufb_rrm(&mut self, dst: WritableReg, src: Reg, mask: &Address) {1734let dst: WritableXmm = dst.map(|r| r.into());1735let mask = Self::to_synthetic_amode(mask, MemFlags::trusted());1736let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();1737self.emit(Inst::External { inst });1738}17391740/// Shuffles bytes in `src` according to contents of `mask` and puts1741/// result in `dst`.1742pub fn xmm_vpshufb_rrr(&mut self, dst: WritableReg, src: Reg, mask: Reg) {1743let dst: WritableXmm = dst.map(|r| r.into());1744let inst = asm::inst::vpshufb_b::new(dst, src, mask).into();1745self.emit(Inst::External { inst });1746}17471748/// Add unsigned integers with unsigned saturation.1749///1750/// Adds the src operands but when an individual byte result is larger than1751/// an unsigned byte integer, 0xFF is written instead.1752pub fn xmm_vpaddus_rrm(1753&mut self,1754dst: WritableReg,1755src1: Reg,1756src2: &Address,1757size: OperandSize,1758) {1759let dst: WritableXmm = dst.map(|r| r.into());1760let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());1761let inst = match size {1762OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),1763OperandSize::S32 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),1764_ => unimplemented!(),1765};1766self.emit(Inst::External { inst });1767}17681769/// Add unsigned integers with unsigned saturation.1770///1771/// Adds the src operands but when an individual byte result is larger than1772/// an unsigned byte integer, 0xFF is written instead.1773pub fn xmm_vpaddus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {1774let dst: WritableXmm = dst.map(|r| r.into());1775let inst = match size {1776OperandSize::S8 => asm::inst::vpaddusb_b::new(dst, src1, src2).into(),1777OperandSize::S16 => asm::inst::vpaddusw_b::new(dst, src1, src2).into(),1778_ => unimplemented!(),1779};1780self.emit(Inst::External { inst });1781}17821783/// Add signed integers.1784pub fn xmm_vpadds_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {1785let dst: WritableXmm = dst.map(|r| r.into());1786let inst = match size {1787OperandSize::S8 => asm::inst::vpaddsb_b::new(dst, src1, src2).into(),1788OperandSize::S16 => asm::inst::vpaddsw_b::new(dst, src1, src2).into(),1789_ => unimplemented!(),1790};1791self.emit(Inst::External { inst });1792}17931794pub fn xmm_vpadd_rmr(1795&mut self,1796src1: Reg,1797src2: &Address,1798dst: WritableReg,1799size: OperandSize,1800) {1801let dst: WritableXmm = dst.map(|r| r.into());1802let address = Self::to_synthetic_amode(src2, MemFlags::trusted());1803let inst = match size {1804OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, address).into(),1805OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, address).into(),1806OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, address).into(),1807_ => unimplemented!(),1808};1809self.emit(Inst::External { inst });1810}18111812/// Adds vectors of integers in `src1` and `src2` and puts the results in1813/// `dst`.1814pub fn xmm_vpadd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {1815let dst: WritableXmm = dst.map(|r| r.into());1816let inst = match size {1817OperandSize::S8 => asm::inst::vpaddb_b::new(dst, src1, src2).into(),1818OperandSize::S16 => asm::inst::vpaddw_b::new(dst, src1, src2).into(),1819OperandSize::S32 => asm::inst::vpaddd_b::new(dst, src1, src2).into(),1820OperandSize::S64 => asm::inst::vpaddq_b::new(dst, src1, src2).into(),1821_ => unimplemented!(),1822};1823self.emit(Inst::External { inst });1824}18251826pub fn mfence(&mut self) {1827self.emit(Inst::External {1828inst: asm::inst::mfence_zo::new().into(),1829});1830}18311832/// Extract a value from `src` into `addr` determined by `lane`.1833pub(crate) fn xmm_vpextr_rm(1834&mut self,1835addr: &Address,1836src: Reg,1837lane: u8,1838size: OperandSize,1839flags: MemFlags,1840) {1841assert!(addr.is_offset());1842let dst = Self::to_synthetic_amode(addr, flags);1843let inst = match size {1844OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),1845OperandSize::S16 => asm::inst::vpextrw_b::new(dst, src, lane).into(),1846OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),1847OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),1848_ => unimplemented!(),1849};1850self.emit(Inst::External { inst });1851}18521853/// Extract a value from `src` into `dst` (zero extended) determined by `lane`.1854pub fn xmm_vpextr_rr(&mut self, dst: WritableReg, src: Reg, lane: u8, size: OperandSize) {1855let dst: WritableGpr = dst.map(|r| r.into());1856let inst = match size {1857OperandSize::S8 => asm::inst::vpextrb_a::new(dst, src, lane).into(),1858OperandSize::S16 => asm::inst::vpextrw_a::new(dst, src, lane).into(),1859OperandSize::S32 => asm::inst::vpextrd_a::new(dst, src, lane).into(),1860OperandSize::S64 => asm::inst::vpextrq_a::new(dst, src, lane).into(),1861_ => unimplemented!(),1862};1863self.emit(Inst::External { inst });1864}18651866/// Copy value from `src2`, merge into `src1`, and put result in `dst` at1867/// the location specified in `count`.1868pub fn xmm_vpinsr_rrm(1869&mut self,1870dst: WritableReg,1871src1: Reg,1872src2: &Address,1873count: u8,1874size: OperandSize,1875) {1876let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());1877let dst: WritableXmm = dst.map(|r| r.into());18781879let inst = match size {1880OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),1881OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),1882OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),1883OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),1884OperandSize::S128 => unreachable!(),1885};1886self.emit(Inst::External { inst });1887}18881889/// Copy value from `src2`, merge into `src1`, and put result in `dst` at1890/// the location specified in `count`.1891pub fn xmm_vpinsr_rrr(1892&mut self,1893dst: WritableReg,1894src1: Reg,1895src2: Reg,1896count: u8,1897size: OperandSize,1898) {1899let dst: WritableXmm = dst.map(|r| r.into());1900let inst = match size {1901OperandSize::S8 => asm::inst::vpinsrb_b::new(dst, src1, src2, count).into(),1902OperandSize::S16 => asm::inst::vpinsrw_b::new(dst, src1, src2, count).into(),1903OperandSize::S32 => asm::inst::vpinsrd_b::new(dst, src1, src2, count).into(),1904OperandSize::S64 => asm::inst::vpinsrq_b::new(dst, src1, src2, count).into(),1905OperandSize::S128 => unreachable!(),1906};1907self.emit(Inst::External { inst });1908}19091910/// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.1911pub fn xmm_vinsertps_rrm(&mut self, dst: WritableReg, src1: Reg, address: &Address, imm: u8) {1912let dst: WritableXmm = dst.map(|r| r.into());1913let address = Self::to_synthetic_amode(address, MemFlags::trusted());1914let inst = asm::inst::vinsertps_b::new(dst, src1, address, imm).into();1915self.emit(Inst::External { inst });1916}19171918/// Copy a 32-bit float in `src2`, merge into `src1`, and put result in `dst`.1919pub fn xmm_vinsertps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, imm: u8) {1920let dst: WritableXmm = dst.map(|r| r.into());1921let inst = asm::inst::vinsertps_b::new(dst, src1, src2, imm).into();1922self.emit(Inst::External { inst });1923}19241925/// Moves lower 64-bit float from `src2` into lower 64-bits of `dst` and the1926/// upper 64-bits in `src1` into the upper 64-bits of `dst`.1927pub fn xmm_vmovsd_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {1928let dst: WritableXmm = dst.map(|r| r.into());1929let inst = asm::inst::vmovsd_b::new(dst, src1, src2).into();1930self.emit(Inst::External { inst });1931}19321933/// Moves 64-bit float from `src` into lower 64-bits of `dst`.1934/// Zeroes out the upper 64 bits of `dst`.1935pub fn xmm_vmovsd_rm(&mut self, dst: WritableReg, src: &Address) {1936let src = Self::to_synthetic_amode(src, MemFlags::trusted());1937let dst: WritableXmm = dst.map(|r| r.into());1938let inst = asm::inst::vmovsd_d::new(dst, src).into();1939self.emit(Inst::External { inst });1940}19411942/// Moves two 32-bit floats from `src2` to the upper 64-bits of `dst`.1943/// Copies two 32-bit floats from the lower 64-bits of `src1` to lower1944/// 64-bits of `dst`.1945pub fn xmm_vmovlhps_rrm(&mut self, dst: WritableReg, src1: Reg, src2: &Address) {1946let src2 = Self::to_synthetic_amode(src2, MemFlags::trusted());1947let dst: WritableXmm = dst.map(|r| r.into());1948let inst = asm::inst::vmovhps_b::new(dst, src1, src2).into();1949self.emit(Inst::External { inst });1950}19511952/// Moves two 32-bit floats from the lower 64-bits of `src2` to the upper1953/// 64-bits of `dst`. Copies two 32-bit floats from the lower 64-bits of1954/// `src1` to lower 64-bits of `dst`.1955pub fn xmm_vmovlhps_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {1956let dst: WritableXmm = dst.map(|r| r.into());1957let inst = asm::inst::vmovlhps_rvm::new(dst, src1, src2).into();1958self.emit(Inst::External { inst });1959}19601961/// Move unaligned packed integer values from address `src` to `dst`.1962pub fn xmm_vmovdqu_mr(&mut self, src: &Address, dst: WritableReg, flags: MemFlags) {1963let src = Self::to_synthetic_amode(src, flags);1964let dst: WritableXmm = dst.map(|r| r.into());1965let inst = asm::inst::vmovdqu_a::new(dst, src).into();1966self.emit(Inst::External { inst });1967}19681969/// Move integer from `src` to xmm register `dst` using an AVX instruction.1970pub fn avx_gpr_to_xmm(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {1971let dst: WritableXmm = dst.map(|r| r.into());1972let inst = match size {1973OperandSize::S32 => asm::inst::vmovd_a::new(dst, src).into(),1974OperandSize::S64 => asm::inst::vmovq_a::new(dst, src).into(),1975_ => unreachable!(),1976};19771978self.emit(Inst::External { inst });1979}19801981pub fn xmm_vptest(&mut self, src1: Reg, src2: Reg) {1982let inst = asm::inst::vptest_rm::new(src1, src2).into();1983self.emit(Inst::External { inst });1984}19851986/// Converts vector of integers into vector of floating values.1987pub fn xmm_vcvt_rr(&mut self, src: Reg, dst: WritableReg, kind: VcvtKind) {1988let dst: WritableXmm = dst.map(|x| x.into());1989let inst = match kind {1990VcvtKind::I32ToF32 => asm::inst::vcvtdq2ps_a::new(dst, src).into(),1991VcvtKind::I32ToF64 => asm::inst::vcvtdq2pd_a::new(dst, src).into(),1992VcvtKind::F64ToF32 => asm::inst::vcvtpd2ps_a::new(dst, src).into(),1993VcvtKind::F64ToI32 => asm::inst::vcvttpd2dq_a::new(dst, src).into(),1994VcvtKind::F32ToF64 => asm::inst::vcvtps2pd_a::new(dst, src).into(),1995VcvtKind::F32ToI32 => asm::inst::vcvttps2dq_a::new(dst, src).into(),1996};1997self.emit(Inst::External { inst });1998}19992000/// Subtract floats in vector `src1` to floats in vector `src2`.2001pub fn xmm_vsubp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2002let dst: WritableXmm = dst.map(|r| r.into());2003let inst = match size {2004OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),2005OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),2006_ => unimplemented!(),2007};2008self.emit(Inst::External { inst });2009}20102011/// Subtract integers in vector `src1` from integers in vector `src2`.2012pub fn xmm_vpsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2013let dst: WritableXmm = dst.map(|r| r.into());2014let inst = match size {2015OperandSize::S8 => asm::inst::vpsubb_b::new(dst, src1, src2).into(),2016OperandSize::S16 => asm::inst::vpsubw_b::new(dst, src1, src2).into(),2017OperandSize::S32 => asm::inst::vpsubd_b::new(dst, src1, src2).into(),2018OperandSize::S64 => asm::inst::vpsubq_b::new(dst, src1, src2).into(),2019_ => unimplemented!(),2020};2021self.emit(Inst::External { inst });2022}20232024/// Subtract unsigned integers with unsigned saturation.2025pub fn xmm_vpsubus_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {2026let dst: WritableXmm = dst.map(|r| r.into());2027let inst = match size {2028OperandSize::S8 => asm::inst::vpsubusb_b::new(dst, src1, src2).into(),2029OperandSize::S16 => asm::inst::vpsubusw_b::new(dst, src1, src2).into(),2030_ => unimplemented!(),2031};2032self.emit(Inst::External { inst });2033}20342035/// Subtract signed integers with signed saturation.2036pub fn xmm_vpsubs_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg, size: OperandSize) {2037let dst: WritableXmm = dst.map(|r| r.into());2038let inst = match size {2039OperandSize::S8 => asm::inst::vpsubsb_b::new(dst, src1, src2).into(),2040OperandSize::S16 => asm::inst::vpsubsw_b::new(dst, src1, src2).into(),2041_ => unimplemented!(),2042};2043self.emit(Inst::External { inst });2044}20452046/// Add floats in vector `src1` to floats in vector `src2`.2047pub fn xmm_vaddp_rrm(2048&mut self,2049src1: Reg,2050src2: &Address,2051dst: WritableReg,2052size: OperandSize,2053) {2054let dst: WritableXmm = dst.map(|r| r.into());2055let address = Self::to_synthetic_amode(src2, MemFlags::trusted());2056let inst = match size {2057OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, address).into(),2058OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, address).into(),2059_ => unimplemented!(),2060};2061self.emit(Inst::External { inst });2062}20632064/// Add floats in vector `src1` to floats in vector `src2`.2065pub fn xmm_vaddp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2066let dst: WritableXmm = dst.map(|r| r.into());2067let inst = match size {2068OperandSize::S32 => asm::inst::vaddps_b::new(dst, src1, src2).into(),2069OperandSize::S64 => asm::inst::vaddpd_b::new(dst, src1, src2).into(),2070_ => unimplemented!(),2071};2072self.emit(Inst::External { inst });2073}20742075/// Compare vector register `lhs` with a vector of integers in `rhs` for2076/// equality between packed integers and write the resulting vector into2077/// `dst`.2078pub fn xmm_vpcmpeq_rrm(2079&mut self,2080dst: WritableReg,2081lhs: Reg,2082address: &Address,2083size: OperandSize,2084) {2085let dst: WritableXmm = dst.map(|r| r.into());2086let address = Self::to_synthetic_amode(address, MemFlags::trusted());2087let inst = match size {2088OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, address).into(),2089OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, address).into(),2090OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, address).into(),2091OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, address).into(),2092_ => unimplemented!(),2093};2094self.emit(Inst::External { inst });2095}20962097/// Compare vector registers `lhs` and `rhs` for equality between packed2098/// integers and write the resulting vector into `dst`.2099pub fn xmm_vpcmpeq_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {2100let dst: WritableXmm = dst.map(|r| r.into());2101let inst = match size {2102OperandSize::S8 => asm::inst::vpcmpeqb_b::new(dst, lhs, rhs).into(),2103OperandSize::S16 => asm::inst::vpcmpeqw_b::new(dst, lhs, rhs).into(),2104OperandSize::S32 => asm::inst::vpcmpeqd_b::new(dst, lhs, rhs).into(),2105OperandSize::S64 => asm::inst::vpcmpeqq_b::new(dst, lhs, rhs).into(),2106_ => unimplemented!(),2107};2108self.emit(Inst::External { inst });2109}21102111/// Performs a greater than comparison with vectors of signed integers in2112/// `lhs` and `rhs` and puts the results in `dst`.2113pub fn xmm_vpcmpgt_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {2114let dst: WritableXmm = dst.map(|r| r.into());2115let inst = match size {2116OperandSize::S8 => asm::inst::vpcmpgtb_b::new(dst, lhs, rhs).into(),2117OperandSize::S16 => asm::inst::vpcmpgtw_b::new(dst, lhs, rhs).into(),2118OperandSize::S32 => asm::inst::vpcmpgtd_b::new(dst, lhs, rhs).into(),2119OperandSize::S64 => asm::inst::vpcmpgtq_b::new(dst, lhs, rhs).into(),2120_ => unimplemented!(),2121};2122self.emit(Inst::External { inst });2123}21242125/// Performs a max operation with vectors of signed integers in `lhs` and2126/// `rhs` and puts the results in `dst`.2127pub fn xmm_vpmaxs_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {2128let dst: WritableXmm = dst.map(|r| r.into());2129let inst = match size {2130OperandSize::S8 => asm::inst::vpmaxsb_b::new(dst, lhs, rhs).into(),2131OperandSize::S16 => asm::inst::vpmaxsw_b::new(dst, lhs, rhs).into(),2132OperandSize::S32 => asm::inst::vpmaxsd_b::new(dst, lhs, rhs).into(),2133_ => unimplemented!(),2134};2135self.emit(Inst::External { inst });2136}21372138/// Performs a max operation with vectors of unsigned integers in `lhs` and2139/// `rhs` and puts the results in `dst`.2140pub fn xmm_vpmaxu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {2141let dst: WritableXmm = dst.map(|r| r.into());2142let inst = match size {2143OperandSize::S8 => asm::inst::vpmaxub_b::new(dst, lhs, rhs).into(),2144OperandSize::S16 => asm::inst::vpmaxuw_b::new(dst, lhs, rhs).into(),2145OperandSize::S32 => asm::inst::vpmaxud_b::new(dst, lhs, rhs).into(),2146_ => unimplemented!(),2147};2148self.emit(Inst::External { inst });2149}21502151/// Performs a min operation with vectors of signed integers in `lhs` and2152/// `rhs` and puts the results in `dst`.2153pub fn xmm_vpmins_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {2154let dst: WritableXmm = dst.map(|r| r.into());2155let inst = match size {2156OperandSize::S8 => asm::inst::vpminsb_b::new(dst, lhs, rhs).into(),2157OperandSize::S16 => asm::inst::vpminsw_b::new(dst, lhs, rhs).into(),2158OperandSize::S32 => asm::inst::vpminsd_b::new(dst, lhs, rhs).into(),2159_ => unimplemented!(),2160};2161self.emit(Inst::External { inst });2162}21632164/// Performs a min operation with vectors of unsigned integers in `lhs` and2165/// `rhs` and puts the results in `dst`.2166pub fn xmm_vpminu_rrr(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) {2167let dst: WritableXmm = dst.map(|r| r.into());2168let inst = match size {2169OperandSize::S8 => asm::inst::vpminub_b::new(dst, lhs, rhs).into(),2170OperandSize::S16 => asm::inst::vpminuw_b::new(dst, lhs, rhs).into(),2171OperandSize::S32 => asm::inst::vpminud_b::new(dst, lhs, rhs).into(),2172_ => unimplemented!(),2173};2174self.emit(Inst::External { inst });2175}21762177/// Performs a comparison operation between vectors of floats in `lhs` and2178/// `rhs` and puts the results in `dst`.2179pub fn xmm_vcmpp_rrr(2180&mut self,2181dst: WritableReg,2182lhs: Reg,2183rhs: Reg,2184size: OperandSize,2185kind: VcmpKind,2186) {2187let dst: WritableXmm = dst.map(|r| r.into());2188let imm = match kind {2189VcmpKind::Eq => 0,2190VcmpKind::Lt => 1,2191VcmpKind::Le => 2,2192VcmpKind::Unord => 3,2193VcmpKind::Ne => 4,2194};2195let inst = match size {2196OperandSize::S32 => asm::inst::vcmpps_b::new(dst, lhs, rhs, imm).into(),2197OperandSize::S64 => asm::inst::vcmppd_b::new(dst, lhs, rhs, imm).into(),2198_ => unimplemented!(),2199};2200self.emit(Inst::External { inst });2201}22022203/// Performs a subtraction on two vectors of floats and puts the results in2204/// `dst`.2205pub fn xmm_vsub_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg, size: OperandSize) {2206let dst: WritableXmm = dst.map(|r| r.into());2207let address = Self::to_synthetic_amode(src2, MemFlags::trusted());2208let inst = match size {2209OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, address).into(),2210_ => unimplemented!(),2211};2212self.emit(Inst::External { inst });2213}22142215/// Performs a subtraction on two vectors of floats and puts the results in2216/// `dst`.2217pub fn xmm_vsub_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2218let dst: WritableXmm = dst.map(|r| r.into());2219let inst = match size {2220OperandSize::S32 => asm::inst::vsubps_b::new(dst, src1, src2).into(),2221OperandSize::S64 => asm::inst::vsubpd_b::new(dst, src1, src2).into(),2222_ => unimplemented!(),2223};2224self.emit(Inst::External { inst });2225}22262227/// Converts a vector of signed integers into a vector of narrower integers2228/// using saturation to handle overflow.2229pub fn xmm_vpackss_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2230let dst: WritableXmm = dst.map(|r| r.into());2231let inst = match size {2232OperandSize::S8 => asm::inst::vpacksswb_b::new(dst, src1, src2).into(),2233OperandSize::S16 => asm::inst::vpackssdw_b::new(dst, src1, src2).into(),2234_ => unimplemented!(),2235};2236self.emit(Inst::External { inst });2237}22382239/// Converts a vector of unsigned integers into a vector of narrower2240/// integers using saturation to handle overflow.2241pub fn xmm_vpackus_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2242let dst: WritableXmm = dst.map(|r| r.into());2243let inst = match size {2244OperandSize::S8 => asm::inst::vpackuswb_b::new(dst, src1, src2).into(),2245OperandSize::S16 => asm::inst::vpackusdw_b::new(dst, src1, src2).into(),2246_ => unimplemented!(),2247};2248self.emit(Inst::External { inst });2249}22502251/// Concatenates `src1` and `src2` and shifts right by `imm` and puts2252/// result in `dst`.2253pub fn xmm_vpalignr_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, imm: u8) {2254let dst: WritableXmm = dst.map(|r| r.into());2255let inst = asm::inst::vpalignr_b::new(dst, src1, src2, imm).into();2256self.emit(Inst::External { inst });2257}22582259/// Takes the lower lanes of vectors of floats in `src1` and `src2` and2260/// interleaves them in `dst`.2261pub fn xmm_vunpcklp_rrm(2262&mut self,2263src1: Reg,2264src2: &Address,2265dst: WritableReg,2266size: OperandSize,2267) {2268let dst: WritableXmm = dst.map(|r| r.into());2269let address = Self::to_synthetic_amode(src2, MemFlags::trusted());2270let inst = match size {2271OperandSize::S32 => asm::inst::vunpcklps_b::new(dst, src1, address).into(),2272_ => unimplemented!(),2273};2274self.emit(Inst::External { inst });2275}22762277/// Unpacks and interleaves high order data of floats in `src1` and `src2`2278/// and puts the results in `dst`.2279pub fn xmm_vunpckhp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2280let dst: WritableXmm = dst.map(|r| r.into());2281let inst = match size {2282OperandSize::S32 => asm::inst::vunpckhps_b::new(dst, src1, src2).into(),2283_ => unimplemented!(),2284};2285self.emit(Inst::External { inst });2286}22872288/// Unpacks and interleaves the lower lanes of vectors of integers in `src1`2289/// and `src2` and puts the results in `dst`.2290pub fn xmm_vpunpckl_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2291let dst: WritableXmm = dst.map(|r| r.into());2292let inst = match size {2293OperandSize::S8 => asm::inst::vpunpcklbw_b::new(dst, src1, src2).into(),2294OperandSize::S16 => asm::inst::vpunpcklwd_b::new(dst, src1, src2).into(),2295_ => unimplemented!(),2296};2297self.emit(Inst::External { inst });2298}22992300/// Unpacks and interleaves the higher lanes of vectors of integers in2301/// `src1` and `src2` and puts the results in `dst`.2302pub fn xmm_vpunpckh_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2303let dst: WritableXmm = dst.map(|r| r.into());2304let inst = match size {2305OperandSize::S8 => asm::inst::vpunpckhbw_b::new(dst, src1, src2).into(),2306OperandSize::S16 => asm::inst::vpunpckhwd_b::new(dst, src1, src2).into(),2307_ => unimplemented!(),2308};2309self.emit(Inst::External { inst });2310}23112312pub(crate) fn vpmullq(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {2313let dst: WritableXmm = dst.map(|r| r.into());2314let inst = asm::inst::vpmullq_c::new(dst, src1, src2).into();2315self.emit(Inst::External { inst });2316}23172318/// Creates a mask made up of the most significant bit of each byte of2319/// `src` and stores the result in `dst`.2320pub fn xmm_vpmovmsk_rr(2321&mut self,2322src: Reg,2323dst: WritableReg,2324src_size: OperandSize,2325dst_size: OperandSize,2326) {2327assert_eq!(dst_size, OperandSize::S32);2328let dst: WritableGpr = dst.map(|r| r.into());2329let inst = match src_size {2330OperandSize::S8 => asm::inst::vpmovmskb_rm::new(dst, src).into(),2331_ => unimplemented!(),2332};23332334self.emit(Inst::External { inst });2335}23362337/// Creates a mask made up of the most significant bit of each byte of2338/// in `src` and stores the result in `dst`.2339pub fn xmm_vmovskp_rr(2340&mut self,2341src: Reg,2342dst: WritableReg,2343src_size: OperandSize,2344dst_size: OperandSize,2345) {2346assert_eq!(dst_size, OperandSize::S32);2347let dst: WritableGpr = dst.map(|r| r.into());2348let inst = match src_size {2349OperandSize::S32 => asm::inst::vmovmskps_rm::new(dst, src).into(),2350OperandSize::S64 => asm::inst::vmovmskpd_rm::new(dst, src).into(),2351_ => unimplemented!(),2352};23532354self.emit(Inst::External { inst });2355}23562357/// Compute the absolute value of elements in vector `src` and put the2358/// results in `dst`.2359pub fn xmm_vpabs_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {2360let dst: WritableXmm = dst.map(|r| r.into());2361let inst = match size {2362OperandSize::S8 => asm::inst::vpabsb_a::new(dst, src).into(),2363OperandSize::S16 => asm::inst::vpabsw_a::new(dst, src).into(),2364OperandSize::S32 => asm::inst::vpabsd_a::new(dst, src).into(),2365_ => unimplemented!(),2366};2367self.emit(Inst::External { inst });2368}23692370/// Arithmetically (sign preserving) right shift on vector in `src` by2371/// `amount` with result written to `dst`.2372pub fn xmm_vpsra_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {2373let dst: WritableXmm = dst.map(|r| r.into());2374let inst = match size {2375OperandSize::S16 => asm::inst::vpsraw_c::new(dst, src, amount).into(),2376OperandSize::S32 => asm::inst::vpsrad_c::new(dst, src, amount).into(),2377_ => unimplemented!(),2378};2379self.emit(Inst::External { inst });2380}23812382/// Arithmetically (sign preserving) right shift on vector in `src` by2383/// `imm` with result written to `dst`.2384pub fn xmm_vpsra_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {2385let dst: WritableXmm = dst.map(|r| r.into());2386let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");2387let inst = match size {2388OperandSize::S32 => asm::inst::vpsrad_d::new(dst, src, imm).into(),2389_ => unimplemented!(),2390};2391self.emit(Inst::External { inst });2392}23932394/// Shift vector data left by `imm`.2395pub fn xmm_vpsll_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {2396let dst: WritableXmm = dst.map(|r| r.into());2397let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");2398let inst = match size {2399OperandSize::S32 => asm::inst::vpslld_d::new(dst, src, imm).into(),2400OperandSize::S64 => asm::inst::vpsllq_d::new(dst, src, imm).into(),2401_ => unimplemented!(),2402};2403self.emit(Inst::External { inst });2404}24052406/// Shift vector data left by `amount`.2407pub fn xmm_vpsll_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {2408let dst: WritableXmm = dst.map(|r| r.into());2409let inst = match size {2410OperandSize::S16 => asm::inst::vpsllw_c::new(dst, src, amount).into(),2411OperandSize::S32 => asm::inst::vpslld_c::new(dst, src, amount).into(),2412OperandSize::S64 => asm::inst::vpsllq_c::new(dst, src, amount).into(),2413_ => unimplemented!(),2414};2415self.emit(Inst::External { inst });2416}24172418/// Shift vector data right by `imm`.2419pub fn xmm_vpsrl_rri(&mut self, src: Reg, dst: WritableReg, imm: u32, size: OperandSize) {2420let dst: WritableXmm = dst.map(|r| r.into());2421let imm = u8::try_from(imm).expect("immediate must fit in 8 bits");2422let inst = match size {2423OperandSize::S16 => asm::inst::vpsrlw_d::new(dst, src, imm).into(),2424OperandSize::S32 => asm::inst::vpsrld_d::new(dst, src, imm).into(),2425OperandSize::S64 => asm::inst::vpsrlq_d::new(dst, src, imm).into(),2426_ => unimplemented!(),2427};2428self.emit(Inst::External { inst });2429}24302431/// Shift vector data right by `amount`.2432pub fn xmm_vpsrl_rrr(&mut self, src: Reg, amount: Reg, dst: WritableReg, size: OperandSize) {2433let dst: WritableXmm = dst.map(|r| r.into());2434let inst = match size {2435OperandSize::S16 => asm::inst::vpsrlw_c::new(dst, src, amount).into(),2436OperandSize::S32 => asm::inst::vpsrld_c::new(dst, src, amount).into(),2437OperandSize::S64 => asm::inst::vpsrlq_c::new(dst, src, amount).into(),2438_ => unimplemented!(),2439};2440self.emit(Inst::External { inst });2441}24422443/// Perform an `and` operation on vectors of floats in `src1` and `src2`2444/// and put the results in `dst`.2445pub fn xmm_vandp_rrm(2446&mut self,2447src1: Reg,2448src2: &Address,2449dst: WritableReg,2450size: OperandSize,2451) {2452let dst: WritableXmm = dst.map(|r| r.into());2453let address = Self::to_synthetic_amode(src2, MemFlags::trusted());2454let inst = match size {2455OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, address).into(),2456OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, address).into(),2457_ => unimplemented!(),2458};2459self.emit(Inst::External { inst });2460}24612462/// Perform an `and` operation on vectors of floats in `src1` and `src2`2463/// and put the results in `dst`.2464pub fn xmm_vandp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2465let dst: WritableXmm = dst.map(|r| r.into());2466let inst = match size {2467OperandSize::S32 => asm::inst::vandps_b::new(dst, src1, src2).into(),2468OperandSize::S64 => asm::inst::vandpd_b::new(dst, src1, src2).into(),2469_ => unimplemented!(),2470};2471self.emit(Inst::External { inst });2472}24732474/// Performs a bitwise `and` operation on the vectors in `src1` and `src2`2475/// and stores the results in `dst`.2476pub fn xmm_vpand_rrm(&mut self, src1: Reg, src2: &Address, dst: WritableReg) {2477let dst: WritableXmm = dst.map(|r| r.into());2478let address = Self::to_synthetic_amode(&src2, MemFlags::trusted());2479let inst = asm::inst::vpand_b::new(dst, src1, address).into();2480self.emit(Inst::External { inst });2481}24822483/// Performs a bitwise `and` operation on the vectors in `src1` and `src2`2484/// and stores the results in `dst`.2485pub fn xmm_vpand_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {2486let dst: WritableXmm = dst.map(|r| r.into());2487let inst = asm::inst::vpand_b::new(dst, src1, src2).into();2488self.emit(Inst::External { inst });2489}24902491/// Perform an `and not` operation on vectors of floats in `src1` and2492/// `src2` and put the results in `dst`.2493pub fn xmm_vandnp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2494let dst: WritableXmm = dst.map(|r| r.into());2495let inst = match size {2496OperandSize::S32 => asm::inst::vandnps_b::new(dst, src1, src2).into(),2497OperandSize::S64 => asm::inst::vandnpd_b::new(dst, src1, src2).into(),2498_ => unimplemented!(),2499};2500self.emit(Inst::External { inst });2501}25022503/// Perform an `and not` operation on vectors in `src1` and `src2` and put2504/// the results in `dst`.2505pub fn xmm_vpandn_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {2506let dst: WritableXmm = dst.map(|r| r.into());2507let inst = asm::inst::vpandn_b::new(dst, src1, src2).into();2508self.emit(Inst::External { inst });2509}25102511/// Perform an or operation for the vectors of floats in `src1` and `src2`2512/// and put the results in `dst`.2513pub fn xmm_vorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2514let dst: WritableXmm = dst.map(|r| r.into());2515let inst = match size {2516OperandSize::S32 => asm::inst::vorps_b::new(dst, src1, src2).into(),2517OperandSize::S64 => asm::inst::vorpd_b::new(dst, src1, src2).into(),2518_ => unimplemented!(),2519};2520self.emit(Inst::External { inst });2521}25222523/// Bitwise OR of `src1` and `src2`.2524pub fn xmm_vpor_rrr(&mut self, dst: WritableReg, src1: Reg, src2: Reg) {2525let dst: WritableXmm = dst.map(|r| r.into());2526let inst = asm::inst::vpor_b::new(dst, src1, src2).into();2527self.emit(Inst::External { inst });2528}25292530/// Bitwise logical xor of vectors of floats in `src1` and `src2` and puts2531/// the results in `dst`.2532pub fn xmm_vxorp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2533let dst: WritableXmm = dst.map(|r| r.into());2534let inst = match size {2535OperandSize::S32 => asm::inst::vxorps_b::new(dst, src1, src2).into(),2536OperandSize::S64 => asm::inst::vxorpd_b::new(dst, src1, src2).into(),2537_ => unimplemented!(),2538};2539self.emit(Inst::External { inst });2540}25412542/// Perform a logical on vector in `src` and in `address` and put the2543/// results in `dst`.2544pub fn xmm_vpxor_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {2545let dst: WritableXmm = dst.map(|r| r.into());2546let address = Self::to_synthetic_amode(address, MemFlags::trusted());2547let inst = asm::inst::vpxor_b::new(dst, src, address).into();2548self.emit(Inst::External { inst });2549}25502551/// Perform a logical on vectors in `src1` and `src2` and put the results in2552/// `dst`.2553pub fn xmm_vpxor_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {2554let dst: WritableXmm = dst.map(|r| r.into());2555let inst = asm::inst::vpxor_b::new(dst, src1, src2).into();2556self.emit(Inst::External { inst });2557}25582559/// Perform a max operation across two vectors of floats and put the2560/// results in `dst`.2561pub fn xmm_vmaxp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2562let dst: WritableXmm = dst.map(|r| r.into());2563let inst = match size {2564OperandSize::S32 => asm::inst::vmaxps_b::new(dst, src1, src2).into(),2565OperandSize::S64 => asm::inst::vmaxpd_b::new(dst, src1, src2).into(),2566_ => unimplemented!(),2567};2568self.emit(Inst::External { inst });2569}25702571// Perform a min operation across two vectors of floats and put the2572// results in `dst`.2573pub fn xmm_vminp_rrm(2574&mut self,2575src1: Reg,2576src2: &Address,2577dst: WritableReg,2578size: OperandSize,2579) {2580let dst: WritableXmm = dst.map(|r| r.into());2581let address = Self::to_synthetic_amode(src2, MemFlags::trusted());2582let inst = match size {2583OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, address).into(),2584OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, address).into(),2585_ => unimplemented!(),2586};2587self.emit(Inst::External { inst });2588}25892590// Perform a min operation across two vectors of floats and put the2591// results in `dst`.2592pub fn xmm_vminp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2593let dst: WritableXmm = dst.map(|r| r.into());2594let inst = match size {2595OperandSize::S32 => asm::inst::vminps_b::new(dst, src1, src2).into(),2596OperandSize::S64 => asm::inst::vminpd_b::new(dst, src1, src2).into(),2597_ => unimplemented!(),2598};2599self.emit(Inst::External { inst });2600}26012602// Round a vector of floats.2603pub fn xmm_vroundp_rri(2604&mut self,2605src: Reg,2606dst: WritableReg,2607mode: VroundMode,2608size: OperandSize,2609) {2610let dst: WritableXmm = dst.map(|r| r.into());2611let imm = match mode {2612VroundMode::TowardNearest => 0,2613VroundMode::TowardNegativeInfinity => 1,2614VroundMode::TowardPositiveInfinity => 2,2615VroundMode::TowardZero => 3,2616};26172618let inst = match size {2619OperandSize::S32 => asm::inst::vroundps_rmi::new(dst, src, imm).into(),2620OperandSize::S64 => asm::inst::vroundpd_rmi::new(dst, src, imm).into(),2621_ => unimplemented!(),2622};26232624self.emit(Inst::External { inst });2625}26262627/// Shuffle of vectors of floats.2628pub fn xmm_vshufp_rrri(2629&mut self,2630src1: Reg,2631src2: Reg,2632dst: WritableReg,2633imm: u8,2634size: OperandSize,2635) {2636let dst: WritableXmm = dst.map(|r| r.into());2637let inst = match size {2638OperandSize::S32 => asm::inst::vshufps_b::new(dst, src1, src2, imm).into(),2639_ => unimplemented!(),2640};2641self.emit(Inst::External { inst });2642}26432644/// Each lane in `src1` is multiplied by the corresponding lane in `src2`2645/// producing intermediate 32-bit operands. Each intermediate 32-bit2646/// operand is truncated to 18 most significant bits. Rounding is performed2647/// by adding 1 to the least significant bit of the 18-bit intermediate2648/// result. The 16 bits immediately to the right of the most significant2649/// bit of each 18-bit intermediate result is placed in each lane of `dst`.2650pub fn xmm_vpmulhrs_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2651let dst: WritableXmm = dst.map(|r| r.into());2652let inst = match size {2653OperandSize::S16 => asm::inst::vpmulhrsw_b::new(dst, src1, src2).into(),2654_ => unimplemented!(),2655};2656self.emit(Inst::External { inst });2657}26582659pub fn xmm_vpmuldq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {2660let dst: WritableXmm = dst.map(|r| r.into());2661let inst = asm::inst::vpmuldq_b::new(dst, src1, src2).into();2662self.emit(Inst::External { inst });2663}26642665pub fn xmm_vpmuludq_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {2666let dst: WritableXmm = dst.map(|r| r.into());2667let inst = asm::inst::vpmuludq_b::new(dst, src1, src2).into();2668self.emit(Inst::External { inst });2669}26702671pub fn xmm_vpmull_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2672let dst: WritableXmm = dst.map(|r| r.into());2673let inst = match size {2674OperandSize::S16 => asm::inst::vpmullw_b::new(dst, src1, src2).into(),2675OperandSize::S32 => asm::inst::vpmulld_b::new(dst, src1, src2).into(),2676_ => unimplemented!(),2677};2678self.emit(Inst::External { inst });2679}26802681pub fn xmm_vmulp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2682let dst: WritableXmm = dst.map(|r| r.into());2683let inst = match size {2684OperandSize::S32 => asm::inst::vmulps_b::new(dst, src1, src2).into(),2685OperandSize::S64 => asm::inst::vmulpd_b::new(dst, src1, src2).into(),2686_ => unimplemented!(),2687};2688self.emit(Inst::External { inst });2689}26902691/// Perform an average operation for the vector of unsigned integers in2692/// `src1` and `src2` and put the results in `dst`.2693pub fn xmm_vpavg_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2694let dst: WritableXmm = dst.map(|r| r.into());2695let inst = match size {2696OperandSize::S8 => asm::inst::vpavgb_b::new(dst, src1, src2).into(),2697OperandSize::S16 => asm::inst::vpavgw_b::new(dst, src1, src2).into(),2698_ => unimplemented!(),2699};2700self.emit(Inst::External { inst });2701}27022703/// Divide the vector of floats in `src1` by the vector of floats in `src2`2704/// and put the results in `dst`.2705pub fn xmm_vdivp_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg, size: OperandSize) {2706let dst: WritableXmm = dst.map(|r| r.into());2707let inst = match size {2708OperandSize::S32 => asm::inst::vdivps_b::new(dst, src1, src2).into(),2709OperandSize::S64 => asm::inst::vdivpd_b::new(dst, src1, src2).into(),2710_ => unimplemented!(),2711};2712self.emit(Inst::External { inst });2713}27142715/// Compute square roots of vector of floats in `src` and put the results2716/// in `dst`.2717pub fn xmm_vsqrtp_rr(&mut self, src: Reg, dst: WritableReg, size: OperandSize) {2718let dst: WritableXmm = dst.map(|r| r.into());2719let inst = match size {2720OperandSize::S32 => asm::inst::vsqrtps_b::new(dst, src).into(),2721OperandSize::S64 => asm::inst::vsqrtpd_b::new(dst, src).into(),2722_ => unimplemented!(),2723};2724self.emit(Inst::External { inst });2725}27262727/// Multiply and add packed signed and unsigned bytes.2728pub fn xmm_vpmaddubsw_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {2729let dst: WritableXmm = dst.map(|r| r.into());2730let address = Self::to_synthetic_amode(address, MemFlags::trusted());2731let inst = asm::inst::vpmaddubsw_b::new(dst, src, address).into();2732self.emit(Inst::External { inst });2733}27342735/// Multiply and add packed signed and unsigned bytes.2736pub fn xmm_vpmaddubsw_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {2737let dst: WritableXmm = dst.map(|r| r.into());2738let inst = asm::inst::vpmaddubsw_b::new(dst, src1, src2).into();2739self.emit(Inst::External { inst });2740}27412742/// Multiple and add packed integers.2743pub fn xmm_vpmaddwd_rmr(&mut self, src: Reg, address: &Address, dst: WritableReg) {2744let dst: WritableXmm = dst.map(|r| r.into());2745let address = Self::to_synthetic_amode(address, MemFlags::trusted());2746let inst = asm::inst::vpmaddwd_b::new(dst, src, address).into();2747self.emit(Inst::External { inst });2748}27492750/// Multiple and add packed integers.2751pub fn xmm_vpmaddwd_rrr(&mut self, src1: Reg, src2: Reg, dst: WritableReg) {2752let dst: WritableXmm = dst.map(|r| r.into());2753let inst = asm::inst::vpmaddwd_b::new(dst, src1, src2).into();2754self.emit(Inst::External { inst });2755}2756}27572758/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,2759/// but the immediate is not yet known. Currently, this implementation expects a 32-bit immediate,2760/// so 8 and 16 bit operand sizes are not supported.2761pub(crate) struct PatchableAddToReg {2762/// The region to be patched in the [`MachBuffer`]. It must contain a valid add instruction2763/// sequence, accepting a 32-bit immediate.2764region: PatchRegion,27652766/// The offset into the patchable region where the patchable constant begins.2767constant_offset: usize,2768}27692770impl PatchableAddToReg {2771/// Create a new [`PatchableAddToReg`] by capturing a region in the output buffer where the2772/// add-with-immediate occurs. The [`MachBuffer`] will have and add-with-immediate instruction2773/// present in that region, though it will add `0` until the `::finalize` method is called.2774///2775/// Currently this implementation expects to be able to patch a 32-bit immediate, which means2776/// that 8 and 16-bit addition cannot be supported.2777pub(crate) fn new(reg: Reg, size: OperandSize, asm: &mut Assembler) -> Self {2778let open = asm.buffer_mut().start_patchable();2779let start = asm.buffer().cur_offset();27802781// Emit the opcode and register use for the add instruction.2782let reg = pair_gpr(Writable::from_reg(reg));2783let inst = match size {2784OperandSize::S32 => asm::inst::addl_mi::new(reg, 0_u32).into(),2785OperandSize::S64 => asm::inst::addq_mi_sxl::new(reg, 0_i32).into(),2786_ => {2787panic!(2788"{}-bit addition is not supported, please see the comment on PatchableAddToReg::new",2789size.num_bits(),2790)2791}2792};2793asm.emit(Inst::External { inst });27942795// The offset to the constant is the width of what was just emitted2796// minus 4, the width of the 32-bit immediate.2797let constant_offset = usize::try_from(asm.buffer().cur_offset() - start - 4).unwrap();27982799let region = asm.buffer_mut().end_patchable(open);28002801Self {2802region,2803constant_offset,2804}2805}28062807/// Patch the [`MachBuffer`] with the known constant to be added to the register. The final2808/// value is passed in as an i32, but the instruction encoding is fixed when2809/// [`PatchableAddToReg::new`] is called.2810pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {2811let slice = self.region.patch(buffer);2812debug_assert_eq!(slice.len(), self.constant_offset + 4);2813slice[self.constant_offset..].copy_from_slice(val.to_le_bytes().as_slice());2814}2815}281628172818