use crate::abi::{self, LocalSlot, align_to};1use crate::codegen::{CodeGenContext, Emission, FuncEnv};2use crate::isa::{3CallingConvention,4reg::{Reg, RegClass, WritableReg, writable},5};6use anyhow::Result;7use cranelift_codegen::{8Final, MachBufferFinalized, MachLabel,9binemit::CodeOffset,10ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},11};12use std::{fmt::Debug, ops::Range};13use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType};1415pub(crate) use cranelift_codegen::ir::TrapCode;1617#[derive(Eq, PartialEq)]18pub(crate) enum DivKind {19/// Signed division.20Signed,21/// Unsigned division.22Unsigned,23}2425/// Represents the `memory.atomic.wait*` kind.26#[derive(Debug, Clone, Copy)]27pub(crate) enum AtomicWaitKind {28Wait32,29Wait64,30}3132/// Remainder kind.33#[derive(Copy, Clone)]34pub(crate) enum RemKind {35/// Signed remainder.36Signed,37/// Unsigned remainder.38Unsigned,39}4041impl RemKind {42pub fn is_signed(&self) -> bool {43matches!(self, Self::Signed)44}45}4647/// Kinds of vector min operation supported by WebAssembly.48pub(crate) enum V128MinKind {49/// 4 lanes of 32-bit floats.50F32x4,51/// 2 lanes of 64-bit floats.52F64x2,53/// 16 lanes of signed 8-bit integers.54I8x16S,55/// 16 lanes of unsigned 8-bit integers.56I8x16U,57/// 8 lanes of signed 16-bit integers.58I16x8S,59/// 8 lanes of unsigned 16-bit integers.60I16x8U,61/// 4 lanes of signed 32-bit integers.62I32x4S,63/// 4 lanes of unsigned 32-bit integers.64I32x4U,65}6667impl V128MinKind {68/// The size of each lane.69pub(crate) fn lane_size(&self) -> OperandSize {70match self {71Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,72Self::F64x2 => OperandSize::S64,73Self::I8x16S | Self::I8x16U => OperandSize::S8,74Self::I16x8S | Self::I16x8U => OperandSize::S16,75}76}77}7879/// Kinds of vector max operation supported by WebAssembly.80pub(crate) enum V128MaxKind {81/// 4 lanes of 32-bit floats.82F32x4,83/// 2 lanes of 64-bit floats.84F64x2,85/// 16 lanes of signed 8-bit integers.86I8x16S,87/// 16 lanes of unsigned 8-bit integers.88I8x16U,89/// 8 lanes of signed 16-bit integers.90I16x8S,91/// 8 lanes of unsigned 16-bit integers.92I16x8U,93/// 4 lanes of signed 32-bit integers.94I32x4S,95/// 4 lanes of unsigned 32-bit integers.96I32x4U,97}9899impl V128MaxKind {100/// The size of each lane.101pub(crate) fn lane_size(&self) -> OperandSize {102match self {103Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,104Self::F64x2 => OperandSize::S64,105Self::I8x16S | Self::I8x16U => OperandSize::S8,106Self::I16x8S | Self::I16x8U => OperandSize::S16,107}108}109}110111#[derive(Eq, PartialEq)]112pub(crate) enum MulWideKind {113Signed,114Unsigned,115}116117/// Type of operation for a read-modify-write instruction.118pub(crate) enum RmwOp {119Add,120Sub,121Xchg,122And,123Or,124Xor,125}126127/// The direction to perform the memory move.128#[derive(Debug, Clone, Eq, PartialEq)]129pub(crate) enum MemMoveDirection {130/// From high memory addresses to low memory addresses.131/// Invariant: the source location is closer to the FP than the destination132/// location, which will be closer to the SP.133HighToLow,134/// From low memory addresses to high memory addresses.135/// Invariant: the source location is closer to the SP than the destination136/// location, which will be closer to the FP.137LowToHigh,138}139140/// Classifies how to treat float-to-int conversions.141#[derive(Debug, Copy, Clone, Eq, PartialEq)]142pub(crate) enum TruncKind {143/// Saturating conversion. If the source value is greater than the maximum144/// value of the destination type, the result is clamped to the145/// destination maximum value.146Checked,147/// An exception is raised if the source value is greater than the maximum148/// value of the destination type.149Unchecked,150}151152impl TruncKind {153/// Returns true if the truncation kind is checked.154pub(crate) fn is_checked(&self) -> bool {155*self == TruncKind::Checked156}157158/// Returns `true` if the trunc kind is [`Unchecked`].159///160/// [`Unchecked`]: TruncKind::Unchecked161#[must_use]162pub(crate) fn is_unchecked(&self) -> bool {163matches!(self, Self::Unchecked)164}165}166167/// Representation of the stack pointer offset.168#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]169pub struct SPOffset(u32);170171impl SPOffset {172pub fn from_u32(offs: u32) -> Self {173Self(offs)174}175176pub fn as_u32(&self) -> u32 {177self.0178}179}180181/// A stack slot.182#[derive(Debug, Clone, Copy, Eq, PartialEq)]183pub struct StackSlot {184/// The location of the slot, relative to the stack pointer.185pub offset: SPOffset,186/// The size of the slot, in bytes.187pub size: u32,188}189190impl StackSlot {191pub fn new(offs: SPOffset, size: u32) -> Self {192Self { offset: offs, size }193}194}195196pub trait ScratchType {197/// Derive the register class from the scratch register type.198fn reg_class() -> RegClass;199}200201/// A scratch register type of integer class.202pub struct IntScratch;203/// A scratch register type of floating point class.204pub struct FloatScratch;205206impl ScratchType for IntScratch {207fn reg_class() -> RegClass {208RegClass::Int209}210}211212impl ScratchType for FloatScratch {213fn reg_class() -> RegClass {214RegClass::Float215}216}217218/// A scratch register scope.219pub struct Scratch(Reg);220221impl Scratch {222pub fn new(r: Reg) -> Self {223Self(r)224}225226#[inline]227pub fn inner(&self) -> Reg {228self.0229}230231#[inline]232pub fn writable(&self) -> WritableReg {233writable!(self.0)234}235}236237/// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]238/// implementation for each ISA is responsible for emitting the correct239/// sequence of instructions when lowering to machine code.240#[derive(Debug, Clone, Copy, Eq, PartialEq)]241pub(crate) enum IntCmpKind {242/// Equal.243Eq,244/// Not equal.245Ne,246/// Signed less than.247LtS,248/// Unsigned less than.249LtU,250/// Signed greater than.251GtS,252/// Unsigned greater than.253GtU,254/// Signed less than or equal.255LeS,256/// Unsigned less than or equal.257LeU,258/// Signed greater than or equal.259GeS,260/// Unsigned greater than or equal.261GeU,262}263264/// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]265/// implementation for each ISA is responsible for emitting the correct266/// sequence of instructions when lowering code.267#[derive(Debug)]268pub(crate) enum FloatCmpKind {269/// Equal.270Eq,271/// Not equal.272Ne,273/// Less than.274Lt,275/// Greater than.276Gt,277/// Less than or equal.278Le,279/// Greater than or equal.280Ge,281}282283/// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is284/// responsible for emitting the correct sequence of instructions when285/// lowering to machine code.286#[derive(Debug, Clone, Copy, Eq, PartialEq)]287pub(crate) enum ShiftKind {288/// Left shift.289Shl,290/// Signed right shift.291ShrS,292/// Unsigned right shift.293ShrU,294/// Left rotate.295Rotl,296/// Right rotate.297Rotr,298}299300/// Kinds of extends in WebAssembly. Each MacroAssembler implementation301/// is responsible for emitting the correct sequence of instructions when302/// lowering to machine code.303#[derive(Copy, Clone)]304pub(crate) enum ExtendKind {305Signed(Extend<Signed>),306Unsigned(Extend<Zero>),307}308309#[derive(Copy, Clone)]310pub(crate) enum Signed {}311#[derive(Copy, Clone)]312pub(crate) enum Zero {}313314pub(crate) trait ExtendType {}315316impl ExtendType for Signed {}317impl ExtendType for Zero {}318319#[derive(Copy, Clone)]320pub(crate) enum Extend<T: ExtendType> {321/// 8 to 32 bit extend.322I32Extend8,323/// 16 to 32 bit extend.324I32Extend16,325/// 8 to 64 bit extend.326I64Extend8,327/// 16 to 64 bit extend.328I64Extend16,329/// 32 to 64 bit extend.330I64Extend32,331332/// Variant to hold the kind of extend marker.333///334/// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be335/// constructed.336__Kind(T),337}338339impl From<Extend<Zero>> for ExtendKind {340fn from(value: Extend<Zero>) -> Self {341ExtendKind::Unsigned(value)342}343}344345impl<T: ExtendType> Extend<T> {346pub fn from_size(&self) -> OperandSize {347match self {348Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,349Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,350Extend::I64Extend32 => OperandSize::S32,351Extend::__Kind(_) => unreachable!(),352}353}354355pub fn to_size(&self) -> OperandSize {356match self {357Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,358Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,359Extend::__Kind(_) => unreachable!(),360}361}362363pub fn from_bits(&self) -> u8 {364self.from_size().num_bits()365}366367pub fn to_bits(&self) -> u8 {368self.to_size().num_bits()369}370}371372impl From<Extend<Signed>> for ExtendKind {373fn from(value: Extend<Signed>) -> Self {374ExtendKind::Signed(value)375}376}377378impl ExtendKind {379pub fn signed(&self) -> bool {380match self {381Self::Signed(_) => true,382_ => false,383}384}385386pub fn from_bits(&self) -> u8 {387match self {388Self::Signed(s) => s.from_bits(),389Self::Unsigned(u) => u.from_bits(),390}391}392393pub fn to_bits(&self) -> u8 {394match self {395Self::Signed(s) => s.to_bits(),396Self::Unsigned(u) => u.to_bits(),397}398}399}400401/// Kinds of vector load and extends in WebAssembly. Each MacroAssembler402/// implementation is responsible for emitting the correct sequence of403/// instructions when lowering to machine code.404#[derive(Copy, Clone)]405pub(crate) enum V128LoadExtendKind {406/// Sign extends eight 8 bit integers to eight 16 bit lanes.407E8x8S,408/// Zero extends eight 8 bit integers to eight 16 bit lanes.409E8x8U,410/// Sign extends four 16 bit integers to four 32 bit lanes.411E16x4S,412/// Zero extends four 16 bit integers to four 32 bit lanes.413E16x4U,414/// Sign extends two 32 bit integers to two 64 bit lanes.415E32x2S,416/// Zero extends two 32 bit integers to two 64 bit lanes.417E32x2U,418}419420/// Kinds of splat loads supported by WebAssembly.421pub(crate) enum SplatLoadKind {422/// 8 bits.423S8,424/// 16 bits.425S16,426/// 32 bits.427S32,428/// 64 bits.429S64,430}431432/// Kinds of splat supported by WebAssembly.433#[derive(Copy, Debug, Clone, Eq, PartialEq)]434pub(crate) enum SplatKind {435/// 8 bit integer.436I8x16,437/// 16 bit integer.438I16x8,439/// 32 bit integer.440I32x4,441/// 64 bit integer.442I64x2,443/// 32 bit float.444F32x4,445/// 64 bit float.446F64x2,447}448449impl SplatKind {450/// The lane size to use for different kinds of splats.451pub(crate) fn lane_size(&self) -> OperandSize {452match self {453SplatKind::I8x16 => OperandSize::S8,454SplatKind::I16x8 => OperandSize::S16,455SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,456SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,457}458}459}460461/// Kinds of extract lane supported by WebAssembly.462#[derive(Copy, Debug, Clone, Eq, PartialEq)]463pub(crate) enum ExtractLaneKind {464/// 16 lanes of 8-bit integers sign extended to 32-bits.465I8x16S,466/// 16 lanes of 8-bit integers zero extended to 32-bits.467I8x16U,468/// 8 lanes of 16-bit integers sign extended to 32-bits.469I16x8S,470/// 8 lanes of 16-bit integers zero extended to 32-bits.471I16x8U,472/// 4 lanes of 32-bit integers.473I32x4,474/// 2 lanes of 64-bit integers.475I64x2,476/// 4 lanes of 32-bit floats.477F32x4,478/// 2 lanes of 64-bit floats.479F64x2,480}481482impl ExtractLaneKind {483/// The lane size to use for different kinds of extract lane kinds.484pub(crate) fn lane_size(&self) -> OperandSize {485match self {486ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,487ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,488ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,489ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,490}491}492}493494impl From<ExtractLaneKind> for Extend<Signed> {495fn from(value: ExtractLaneKind) -> Self {496match value {497ExtractLaneKind::I8x16S => Extend::I32Extend8,498ExtractLaneKind::I16x8S => Extend::I32Extend16,499_ => unimplemented!(),500}501}502}503504/// Kinds of replace lane supported by WebAssembly.505pub(crate) enum ReplaceLaneKind {506/// 16 lanes of 8 bit integers.507I8x16,508/// 8 lanes of 16 bit integers.509I16x8,510/// 4 lanes of 32 bit integers.511I32x4,512/// 2 lanes of 64 bit integers.513I64x2,514/// 4 lanes of 32 bit floats.515F32x4,516/// 2 lanes of 64 bit floats.517F64x2,518}519520impl ReplaceLaneKind {521/// The lane size to use for different kinds of replace lane kinds.522pub(crate) fn lane_size(&self) -> OperandSize {523match self {524ReplaceLaneKind::I8x16 => OperandSize::S8,525ReplaceLaneKind::I16x8 => OperandSize::S16,526ReplaceLaneKind::I32x4 => OperandSize::S32,527ReplaceLaneKind::I64x2 => OperandSize::S64,528ReplaceLaneKind::F32x4 => OperandSize::S32,529ReplaceLaneKind::F64x2 => OperandSize::S64,530}531}532}533534/// Kinds of behavior supported by Wasm loads.535pub(crate) enum LoadKind {536/// Load the entire bytes of the operand size without any modifications.537Operand(OperandSize),538/// Atomic load, with optional scalar extend.539Atomic(OperandSize, Option<ExtendKind>),540/// Duplicate value into vector lanes.541Splat(SplatLoadKind),542/// Scalar (non-vector) extend.543ScalarExtend(ExtendKind),544/// Vector extend.545VectorExtend(V128LoadExtendKind),546/// Load content into select lane.547VectorLane(LaneSelector),548/// Load a single element into the lowest bits of a vector and initialize549/// all other bits to zero.550VectorZero(OperandSize),551}552553impl LoadKind {554/// Returns the [`OperandSize`] used in the load operation.555pub(crate) fn derive_operand_size(&self) -> OperandSize {556match self {557Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {558Self::operand_size_for_scalar(extend)559}560Self::VectorExtend(_) => OperandSize::S64,561Self::Splat(kind) => Self::operand_size_for_splat(kind),562Self::Operand(size)563| Self::Atomic(size, None)564| Self::VectorLane(LaneSelector { size, .. })565| Self::VectorZero(size) => *size,566}567}568569pub fn vector_lane(lane: u8, size: OperandSize) -> Self {570Self::VectorLane(LaneSelector { lane, size })571}572573fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {574match extend_kind {575ExtendKind::Signed(s) => s.from_size(),576ExtendKind::Unsigned(u) => u.from_size(),577}578}579580fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {581match kind {582SplatLoadKind::S8 => OperandSize::S8,583SplatLoadKind::S16 => OperandSize::S16,584SplatLoadKind::S32 => OperandSize::S32,585SplatLoadKind::S64 => OperandSize::S64,586}587}588589pub(crate) fn is_atomic(&self) -> bool {590matches!(self, Self::Atomic(_, _))591}592}593594/// Kinds of behavior supported by Wasm loads.595#[derive(Copy, Clone)]596pub enum StoreKind {597/// Store the entire bytes of the operand size without any modifications.598Operand(OperandSize),599/// Store the entire bytes of the operand size without any modifications, atomically.600Atomic(OperandSize),601/// Store the content of selected lane.602VectorLane(LaneSelector),603}604605impl StoreKind {606pub fn vector_lane(lane: u8, size: OperandSize) -> Self {607Self::VectorLane(LaneSelector { lane, size })608}609}610611#[derive(Copy, Clone)]612pub struct LaneSelector {613pub lane: u8,614pub size: OperandSize,615}616617/// Types of vector integer to float conversions supported by WebAssembly.618pub(crate) enum V128ConvertKind {619/// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.620I32x4S,621/// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.622I32x4U,623/// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit624/// floats.625I32x4LowS,626/// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit627/// floats.628I32x4LowU,629}630631impl V128ConvertKind {632pub(crate) fn src_lane_size(&self) -> OperandSize {633match self {634V128ConvertKind::I32x4S635| V128ConvertKind::I32x4U636| V128ConvertKind::I32x4LowS637| V128ConvertKind::I32x4LowU => OperandSize::S32,638}639}640641pub(crate) fn dst_lane_size(&self) -> OperandSize {642match self {643V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,644V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,645}646}647}648649/// Kinds of vector narrowing operations supported by WebAssembly.650pub(crate) enum V128NarrowKind {651/// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using652/// signed saturation.653I16x8S,654/// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using655/// unsigned saturation.656I16x8U,657/// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using658/// signed saturation.659I32x4S,660/// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using661/// unsigned saturation.662I32x4U,663}664665impl V128NarrowKind {666/// Return the size of the destination lanes.667pub(crate) fn dst_lane_size(&self) -> OperandSize {668match self {669Self::I16x8S | Self::I16x8U => OperandSize::S8,670Self::I32x4S | Self::I32x4U => OperandSize::S16,671}672}673}674675/// Kinds of vector extending operations supported by WebAssembly.676#[derive(Debug, Copy, Clone)]677pub(crate) enum V128ExtendKind {678/// Low half of i8x16 sign extended.679LowI8x16S,680/// High half of i8x16 sign extended.681HighI8x16S,682/// Low half of i8x16 zero extended.683LowI8x16U,684/// High half of i8x16 zero extended.685HighI8x16U,686/// Low half of i16x8 sign extended.687LowI16x8S,688/// High half of i16x8 sign extended.689HighI16x8S,690/// Low half of i16x8 zero extended.691LowI16x8U,692/// High half of i16x8 zero extended.693HighI16x8U,694/// Low half of i32x4 sign extended.695LowI32x4S,696/// High half of i32x4 sign extended.697HighI32x4S,698/// Low half of i32x4 zero extended.699LowI32x4U,700/// High half of i32x4 zero extended.701HighI32x4U,702}703704impl V128ExtendKind {705/// The size of the source's lanes.706pub(crate) fn src_lane_size(&self) -> OperandSize {707match self {708Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {709OperandSize::S8710}711Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {712OperandSize::S16713}714Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {715OperandSize::S32716}717}718}719}720721/// Kinds of vector equalities and non-equalities supported by WebAssembly.722pub(crate) enum VectorEqualityKind {723/// 16 lanes of 8 bit integers.724I8x16,725/// 8 lanes of 16 bit integers.726I16x8,727/// 4 lanes of 32 bit integers.728I32x4,729/// 2 lanes of 64 bit integers.730I64x2,731/// 4 lanes of 32 bit floats.732F32x4,733/// 2 lanes of 64 bit floats.734F64x2,735}736737impl VectorEqualityKind {738/// Get the lane size to use.739pub(crate) fn lane_size(&self) -> OperandSize {740match self {741Self::I8x16 => OperandSize::S8,742Self::I16x8 => OperandSize::S16,743Self::I32x4 | Self::F32x4 => OperandSize::S32,744Self::I64x2 | Self::F64x2 => OperandSize::S64,745}746}747}748749/// Kinds of vector comparisons supported by WebAssembly.750pub(crate) enum VectorCompareKind {751/// 16 lanes of signed 8 bit integers.752I8x16S,753/// 16 lanes of unsigned 8 bit integers.754I8x16U,755/// 8 lanes of signed 16 bit integers.756I16x8S,757/// 8 lanes of unsigned 16 bit integers.758I16x8U,759/// 4 lanes of signed 32 bit integers.760I32x4S,761/// 4 lanes of unsigned 32 bit integers.762I32x4U,763/// 2 lanes of signed 64 bit integers.764I64x2S,765/// 4 lanes of 32 bit floats.766F32x4,767/// 2 lanes of 64 bit floats.768F64x2,769}770771impl VectorCompareKind {772/// Get the lane size to use.773pub(crate) fn lane_size(&self) -> OperandSize {774match self {775Self::I8x16S | Self::I8x16U => OperandSize::S8,776Self::I16x8S | Self::I16x8U => OperandSize::S16,777Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,778Self::I64x2S | Self::F64x2 => OperandSize::S64,779}780}781}782783/// Kinds of vector absolute operations supported by WebAssembly.784#[derive(Copy, Debug, Clone, Eq, PartialEq)]785pub(crate) enum V128AbsKind {786/// 8 bit integers.787I8x16,788/// 16 bit integers.789I16x8,790/// 32 bit integers.791I32x4,792/// 64 bit integers.793I64x2,794/// 32 bit floats.795F32x4,796/// 64 bit floats.797F64x2,798}799800impl V128AbsKind {801/// The lane size to use.802pub(crate) fn lane_size(&self) -> OperandSize {803match self {804Self::I8x16 => OperandSize::S8,805Self::I16x8 => OperandSize::S16,806Self::I32x4 | Self::F32x4 => OperandSize::S32,807Self::I64x2 | Self::F64x2 => OperandSize::S64,808}809}810}811812/// Kinds of truncation for vectors supported by WebAssembly.813pub(crate) enum V128TruncKind {814/// Truncates 4 lanes of 32-bit floats to nearest integral value.815F32x4,816/// Truncates 2 lanes of 64-bit floats to nearest integral value.817F64x2,818/// Integers from signed F32x4.819I32x4FromF32x4S,820/// Integers from unsigned F32x4.821I32x4FromF32x4U,822/// Integers from signed F64x2.823I32x4FromF64x2SZero,824/// Integers from unsigned F64x2.825I32x4FromF64x2UZero,826}827828impl V128TruncKind {829/// The size of the source lanes.830pub(crate) fn src_lane_size(&self) -> OperandSize {831match self {832V128TruncKind::F32x4833| V128TruncKind::I32x4FromF32x4S834| V128TruncKind::I32x4FromF32x4U => OperandSize::S32,835V128TruncKind::F64x2836| V128TruncKind::I32x4FromF64x2SZero837| V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,838}839}840841/// The size of the destination lanes.842pub(crate) fn dst_lane_size(&self) -> OperandSize {843if let V128TruncKind::F64x2 = self {844OperandSize::S64845} else {846OperandSize::S32847}848}849}850851/// Kinds of vector addition supported by WebAssembly.852pub(crate) enum V128AddKind {853/// 4 lanes of 32-bit floats wrapping.854F32x4,855/// 2 lanes of 64-bit floats wrapping.856F64x2,857/// 16 lanes of 8-bit integers wrapping.858I8x16,859/// 16 lanes of 8-bit integers signed saturating.860I8x16SatS,861/// 16 lanes of 8-bit integers unsigned saturating.862I8x16SatU,863/// 8 lanes of 16-bit integers wrapping.864I16x8,865/// 8 lanes of 16-bit integers signed saturating.866I16x8SatS,867/// 8 lanes of 16-bit integers unsigned saturating.868I16x8SatU,869/// 4 lanes of 32-bit integers wrapping.870I32x4,871/// 2 lanes of 64-bit integers wrapping.872I64x2,873}874875/// Kinds of vector subtraction supported by WebAssembly.876pub(crate) enum V128SubKind {877/// 4 lanes of 32-bit floats wrapping.878F32x4,879/// 2 lanes of 64-bit floats wrapping.880F64x2,881/// 16 lanes of 8-bit integers wrapping.882I8x16,883/// 16 lanes of 8-bit integers signed saturating.884I8x16SatS,885/// 16 lanes of 8-bit integers unsigned saturating.886I8x16SatU,887/// 8 lanes of 16-bit integers wrapping.888I16x8,889/// 8 lanes of 16-bit integers signed saturating.890I16x8SatS,891/// 8 lanes of 16-bit integers unsigned saturating.892I16x8SatU,893/// 4 lanes of 32-bit integers wrapping.894I32x4,895/// 2 lanes of 64-bit integers wrapping.896I64x2,897}898899impl From<V128NegKind> for V128SubKind {900fn from(value: V128NegKind) -> Self {901match value {902V128NegKind::I8x16 => Self::I8x16,903V128NegKind::I16x8 => Self::I16x8,904V128NegKind::I32x4 => Self::I32x4,905V128NegKind::I64x2 => Self::I64x2,906V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),907}908}909}910911/// Kinds of vector multiplication supported by WebAssembly.912pub(crate) enum V128MulKind {913/// 4 lanes of 32-bit floats.914F32x4,915/// 2 lanes of 64-bit floats.916F64x2,917/// 8 lanes of 16-bit integers.918I16x8,919/// 4 lanes of 32-bit integers.920I32x4,921/// 2 lanes of 64-bit integers.922I64x2,923}924925/// Kinds of vector negation supported by WebAssembly.926#[derive(Copy, Clone)]927pub(crate) enum V128NegKind {928/// 4 lanes of 32-bit floats.929F32x4,930/// 2 lanes of 64-bit floats.931F64x2,932/// 16 lanes of 8-bit integers.933I8x16,934/// 8 lanes of 16-bit integers.935I16x8,936/// 4 lanes of 32-bit integers.937I32x4,938/// 2 lanes of 64-bit integers.939I64x2,940}941942impl V128NegKind {943/// The size of the lanes.944pub(crate) fn lane_size(&self) -> OperandSize {945match self {946Self::F32x4 | Self::I32x4 => OperandSize::S32,947Self::F64x2 | Self::I64x2 => OperandSize::S64,948Self::I8x16 => OperandSize::S8,949Self::I16x8 => OperandSize::S16,950}951}952}953954/// Kinds of extended pairwise addition supported by WebAssembly.955pub(crate) enum V128ExtAddKind {956/// 16 lanes of signed 8-bit integers.957I8x16S,958/// 16 lanes of unsigned 8-bit integers.959I8x16U,960/// 8 lanes of signed 16-bit integers.961I16x8S,962/// 8 lanes of unsigned 16-bit integers.963I16x8U,964}965966/// Kinds of vector extended multiplication supported by WebAssembly.967#[derive(Debug, Clone, Copy)]968pub(crate) enum V128ExtMulKind {969LowI8x16S,970HighI8x16S,971LowI8x16U,972HighI8x16U,973LowI16x8S,974HighI16x8S,975LowI16x8U,976HighI16x8U,977LowI32x4S,978HighI32x4S,979LowI32x4U,980HighI32x4U,981}982983impl From<V128ExtMulKind> for V128ExtendKind {984fn from(value: V128ExtMulKind) -> Self {985match value {986V128ExtMulKind::LowI8x16S => Self::LowI8x16S,987V128ExtMulKind::HighI8x16S => Self::HighI8x16S,988V128ExtMulKind::LowI8x16U => Self::LowI8x16U,989V128ExtMulKind::HighI8x16U => Self::HighI8x16U,990V128ExtMulKind::LowI16x8S => Self::LowI16x8S,991V128ExtMulKind::HighI16x8S => Self::HighI16x8S,992V128ExtMulKind::LowI16x8U => Self::LowI16x8U,993V128ExtMulKind::HighI16x8U => Self::HighI16x8U,994V128ExtMulKind::LowI32x4S => Self::LowI32x4S,995V128ExtMulKind::HighI32x4S => Self::HighI32x4S,996V128ExtMulKind::LowI32x4U => Self::LowI32x4U,997V128ExtMulKind::HighI32x4U => Self::HighI32x4U,998}999}1000}10011002impl From<V128ExtMulKind> for V128MulKind {1003fn from(value: V128ExtMulKind) -> Self {1004match value {1005V128ExtMulKind::LowI8x16S1006| V128ExtMulKind::HighI8x16S1007| V128ExtMulKind::LowI8x16U1008| V128ExtMulKind::HighI8x16U => Self::I16x8,1009V128ExtMulKind::LowI16x8S1010| V128ExtMulKind::HighI16x8S1011| V128ExtMulKind::LowI16x8U1012| V128ExtMulKind::HighI16x8U => Self::I32x4,1013V128ExtMulKind::LowI32x4S1014| V128ExtMulKind::HighI32x4S1015| V128ExtMulKind::LowI32x4U1016| V128ExtMulKind::HighI32x4U => Self::I64x2,1017}1018}1019}10201021/// Operand size, in bits.1022#[derive(Copy, Debug, Clone, Eq, PartialEq)]1023pub(crate) enum OperandSize {1024/// 8 bits.1025S8,1026/// 16 bits.1027S16,1028/// 32 bits.1029S32,1030/// 64 bits.1031S64,1032/// 128 bits.1033S128,1034}10351036impl OperandSize {1037/// The number of bits in the operand.1038pub fn num_bits(&self) -> u8 {1039match self {1040OperandSize::S8 => 8,1041OperandSize::S16 => 16,1042OperandSize::S32 => 32,1043OperandSize::S64 => 64,1044OperandSize::S128 => 128,1045}1046}10471048/// The number of bytes in the operand.1049pub fn bytes(&self) -> u32 {1050match self {1051Self::S8 => 1,1052Self::S16 => 2,1053Self::S32 => 4,1054Self::S64 => 8,1055Self::S128 => 16,1056}1057}10581059/// The binary logarithm of the number of bits in the operand.1060pub fn log2(&self) -> u8 {1061match self {1062OperandSize::S8 => 3,1063OperandSize::S16 => 4,1064OperandSize::S32 => 5,1065OperandSize::S64 => 6,1066OperandSize::S128 => 7,1067}1068}10691070/// Create an [`OperandSize`] from the given number of bytes.1071pub fn from_bytes(bytes: u8) -> Self {1072use OperandSize::*;1073match bytes {10744 => S32,10758 => S64,107616 => S128,1077_ => panic!("Invalid bytes {bytes} for OperandSize"),1078}1079}10801081pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {1082match to {1083OperandSize::S32 => match self {1084OperandSize::S8 => Some(Extend::I32Extend8),1085OperandSize::S16 => Some(Extend::I32Extend16),1086_ => None,1087},1088OperandSize::S64 => match self {1089OperandSize::S8 => Some(Extend::I64Extend8),1090OperandSize::S16 => Some(Extend::I64Extend16),1091OperandSize::S32 => Some(Extend::I64Extend32),1092_ => None,1093},1094_ => None,1095}1096}10971098/// The number of bits in the mantissa.1099///1100/// Only implemented for floats.1101pub fn mantissa_bits(&self) -> u8 {1102match self {1103Self::S32 => 8,1104Self::S64 => 11,1105_ => unimplemented!(),1106}1107}1108}11091110/// An abstraction over a register or immediate.1111#[derive(Copy, Clone, Debug, PartialEq, Eq)]1112pub(crate) enum RegImm {1113/// A register.1114Reg(Reg),1115/// A tagged immediate argument.1116Imm(Imm),1117}11181119/// An tagged representation of an immediate.1120#[derive(Copy, Clone, Debug, PartialEq, Eq)]1121pub(crate) enum Imm {1122/// I32 immediate.1123I32(u32),1124/// I64 immediate.1125I64(u64),1126/// F32 immediate.1127F32(u32),1128/// F64 immediate.1129F64(u64),1130/// V128 immediate.1131V128(i128),1132}11331134impl Imm {1135/// Create a new I64 immediate.1136pub fn i64(val: i64) -> Self {1137Self::I64(val as u64)1138}11391140/// Create a new I32 immediate.1141pub fn i32(val: i32) -> Self {1142Self::I32(val as u32)1143}11441145/// Create a new F32 immediate.1146pub fn f32(bits: u32) -> Self {1147Self::F32(bits)1148}11491150/// Create a new F64 immediate.1151pub fn f64(bits: u64) -> Self {1152Self::F64(bits)1153}11541155/// Create a new V128 immediate.1156pub fn v128(bits: i128) -> Self {1157Self::V128(bits)1158}11591160/// Convert the immediate to i32, if possible.1161pub fn to_i32(&self) -> Option<i32> {1162match self {1163Self::I32(v) => Some(*v as i32),1164Self::I64(v) => i32::try_from(*v as i64).ok(),1165_ => None,1166}1167}11681169/// Unwraps the underlying integer value as u64.1170/// # Panics1171/// This function panics if the underlying value can't be represented1172/// as u64.1173pub fn unwrap_as_u64(&self) -> u64 {1174match self {1175Self::I32(v) => *v as u64,1176Self::I64(v) => *v,1177Self::F32(v) => *v as u64,1178Self::F64(v) => *v,1179_ => unreachable!(),1180}1181}11821183/// Get the operand size of the immediate.1184pub fn size(&self) -> OperandSize {1185match self {1186Self::I32(_) | Self::F32(_) => OperandSize::S32,1187Self::I64(_) | Self::F64(_) => OperandSize::S64,1188Self::V128(_) => OperandSize::S128,1189}1190}11911192/// Get a little endian representation of the immediate.1193///1194/// This method heap allocates and is intended to be used when adding1195/// values to the constant pool.1196pub fn to_bytes(&self) -> Vec<u8> {1197match self {1198Imm::I32(n) => n.to_le_bytes().to_vec(),1199Imm::I64(n) => n.to_le_bytes().to_vec(),1200Imm::F32(n) => n.to_le_bytes().to_vec(),1201Imm::F64(n) => n.to_le_bytes().to_vec(),1202Imm::V128(n) => n.to_le_bytes().to_vec(),1203}1204}1205}12061207/// The location of the [VMcontext] used for function calls.1208#[derive(Copy, Clone, Debug, Eq, PartialEq)]1209pub(crate) enum VMContextLoc {1210/// Dynamic, stored in the given register.1211Reg(Reg),1212/// The pinned [VMContext] register.1213Pinned,1214/// A different VMContext is loaded at the provided offset from the current1215/// VMContext.1216OffsetFromPinned(u32),1217}12181219/// The maximum number of context arguments currently used across the compiler.1220pub(crate) const MAX_CONTEXT_ARGS: usize = 2;12211222/// Out-of-band special purpose arguments used for function call emission.1223///1224/// We cannot rely on the value stack for these values given that inserting1225/// register or memory values at arbitrary locations of the value stack has the1226/// potential to break the stack ordering principle, which states that older1227/// values must always precede newer values, effectively simulating the order of1228/// values in the machine stack.1229/// The [ContextArgs] are meant to be resolved at every callsite; in some cases1230/// it might be possible to construct it early on, but given that it might1231/// contain allocatable registers, it's preferred to construct it in1232/// [FnCall::emit].1233#[derive(Clone, Debug)]1234pub(crate) enum ContextArgs {1235/// A single context argument is required; the current pinned [VMcontext]1236/// register must be passed as the first argument of the function call.1237VMContext([VMContextLoc; 1]),1238/// The callee and caller context arguments are required. In this case, the1239/// callee context argument is usually stored into an allocatable register1240/// and the caller is always the current pinned [VMContext] pointer.1241CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),1242}12431244impl ContextArgs {1245/// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]1246/// register as both the caller and callee context arguments.1247pub fn pinned_callee_and_caller_vmctx() -> Self {1248Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])1249}12501251/// Construct a [ContextArgs] that declares the usage of the pinned1252/// [VMContext] register as the only context argument.1253pub fn pinned_vmctx() -> Self {1254Self::VMContext([VMContextLoc::Pinned])1255}12561257/// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded1258/// indirectly from the pinned [VMContext] register as the only context1259/// argument.1260pub fn offset_from_pinned_vmctx(offset: u32) -> Self {1261Self::VMContext([VMContextLoc::OffsetFromPinned(offset)])1262}12631264/// Construct a [ContextArgs] that declares a dynamic callee context and the1265/// pinned [VMContext] register as the context arguments.1266pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {1267Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])1268}12691270/// Get the length of the [ContextArgs].1271pub fn len(&self) -> usize {1272self.as_slice().len()1273}12741275/// Get a slice of the context arguments.1276pub fn as_slice(&self) -> &[VMContextLoc] {1277match self {1278Self::VMContext(a) => a.as_slice(),1279Self::CalleeAndCallerVMContext(a) => a.as_slice(),1280}1281}1282}12831284#[derive(Copy, Clone, Debug)]1285pub(crate) enum CalleeKind {1286/// A function call to a raw address.1287Indirect(Reg),1288/// A function call to a local function.1289Direct(UserExternalNameRef),1290}12911292impl CalleeKind {1293/// Creates a callee kind from a register.1294pub fn indirect(reg: Reg) -> Self {1295Self::Indirect(reg)1296}12971298/// Creates a direct callee kind from a function name.1299pub fn direct(name: UserExternalNameRef) -> Self {1300Self::Direct(name)1301}1302}13031304impl RegImm {1305/// Register constructor.1306pub fn reg(r: Reg) -> Self {1307RegImm::Reg(r)1308}13091310/// I64 immediate constructor.1311pub fn i64(val: i64) -> Self {1312RegImm::Imm(Imm::i64(val))1313}13141315/// I32 immediate constructor.1316pub fn i32(val: i32) -> Self {1317RegImm::Imm(Imm::i32(val))1318}13191320/// F32 immediate, stored using its bits representation.1321pub fn f32(bits: u32) -> Self {1322RegImm::Imm(Imm::f32(bits))1323}13241325/// F64 immediate, stored using its bits representation.1326pub fn f64(bits: u64) -> Self {1327RegImm::Imm(Imm::f64(bits))1328}13291330/// V128 immediate.1331pub fn v128(bits: i128) -> Self {1332RegImm::Imm(Imm::v128(bits))1333}1334}13351336impl From<Reg> for RegImm {1337fn from(r: Reg) -> Self {1338Self::Reg(r)1339}1340}13411342#[derive(Debug)]1343pub enum RoundingMode {1344Nearest,1345Up,1346Down,1347Zero,1348}13491350/// Memory flags for trusted loads/stores.1351pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();13521353/// Flags used for WebAssembly loads / stores.1354/// Untrusted by default so we don't set `no_trap`.1355/// We also ensure that the endianness is the right one for WebAssembly.1356pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);13571358/// Generic MacroAssembler interface used by the code generation.1359///1360/// The MacroAssembler trait aims to expose an interface, high-level enough,1361/// so that each ISA can provide its own lowering to machine code. For example,1362/// for WebAssembly operators that don't have a direct mapping to a machine1363/// a instruction, the interface defines a signature matching the WebAssembly1364/// operator, allowing each implementation to lower such operator entirely.1365/// This approach attributes more responsibility to the MacroAssembler, but frees1366/// the caller from concerning about assembling the right sequence of1367/// instructions at the operator callsite.1368///1369/// The interface defaults to a three-argument form for binary operations;1370/// this allows a natural mapping to instructions for RISC architectures,1371/// that use three-argument form.1372/// This approach allows for a more general interface that can be restricted1373/// where needed, in the case of architectures that use a two-argument form.13741375pub(crate) trait MacroAssembler {1376/// The addressing mode.1377type Address: Copy + Debug;13781379/// The pointer representation of the target ISA,1380/// used to access information from [`VMOffsets`].1381type Ptr: PtrSize;13821383/// The ABI details of the target.1384type ABI: abi::ABI;13851386/// Emit the function prologue.1387fn prologue(&mut self, vmctx: Reg) -> Result<()> {1388self.frame_setup()?;1389self.check_stack(vmctx)1390}13911392/// Generate the frame setup sequence.1393fn frame_setup(&mut self) -> Result<()>;13941395/// Generate the frame restore sequence.1396fn frame_restore(&mut self) -> Result<()>;13971398/// Emit a stack check.1399fn check_stack(&mut self, vmctx: Reg) -> Result<()>;14001401/// Emit the function epilogue.1402fn epilogue(&mut self) -> Result<()> {1403self.frame_restore()1404}14051406/// Reserve stack space.1407fn reserve_stack(&mut self, bytes: u32) -> Result<()>;14081409/// Free stack space.1410fn free_stack(&mut self, bytes: u32) -> Result<()>;14111412/// Reset the stack pointer to the given offset;1413///1414/// Used to reset the stack pointer to a given offset1415/// when dealing with unreachable code.1416fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;14171418/// Get the address of a local slot.1419fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;14201421/// Constructs an address with an offset that is relative to the1422/// current position of the stack pointer (e.g. [sp + (sp_offset -1423/// offset)].1424fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;14251426/// Constructs an address with an offset that is absolute to the1427/// current position of the stack pointer (e.g. [sp + offset].1428fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;14291430/// Alias for [`Self::address_at_reg`] using the VMContext register as1431/// a base. The VMContext register is derived from the ABI type that is1432/// associated to the MacroAssembler.1433fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;14341435/// Construct an address that is absolute to the current position1436/// of the given register.1437fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;14381439/// Emit a function call to either a local or external function.1440fn call(1441&mut self,1442stack_args_size: u32,1443f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,1444) -> Result<u32>;14451446/// Acquire a scratch register and execute the given callback.1447fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R;14481449/// Convenience wrapper over [`Self::with_scratch`], derives the register class1450/// for a particular Wasm value type.1451fn with_scratch_for<R>(1452&mut self,1453ty: WasmValType,1454f: impl FnOnce(&mut Self, Scratch) -> R,1455) -> R {1456match ty {1457WasmValType::I321458| WasmValType::I641459| WasmValType::Ref(WasmRefType {1460heap_type: WasmHeapType::Func,1461..1462}) => self.with_scratch::<IntScratch, _>(f),1463WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => {1464self.with_scratch::<FloatScratch, _>(f)1465}1466_ => unimplemented!(),1467}1468}14691470/// Get stack pointer offset.1471fn sp_offset(&self) -> Result<SPOffset>;14721473/// Perform a stack store.1474fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;14751476/// Alias for `MacroAssembler::store` with the operand size corresponding1477/// to the pointer size of the target.1478fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;14791480/// Perform a WebAssembly store.1481/// A WebAssembly store introduces several additional invariants compared to1482/// [Self::store], more precisely, it can implicitly trap, in certain1483/// circumstances, even if explicit bounds checks are elided, in that sense,1484/// we consider this type of load as untrusted. It can also differ with1485/// regards to the endianness depending on the target ISA. For this reason,1486/// [Self::wasm_store], should be explicitly used when emitting WebAssembly1487/// stores.1488fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;14891490/// Perform a zero-extended stack load.1491fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;14921493/// Perform a WebAssembly load.1494/// A WebAssembly load introduces several additional invariants compared to1495/// [Self::load], more precisely, it can implicitly trap, in certain1496/// circumstances, even if explicit bounds checks are elided, in that sense,1497/// we consider this type of load as untrusted. It can also differ with1498/// regards to the endianness depending on the target ISA. For this reason,1499/// [Self::wasm_load], should be explicitly used when emitting WebAssembly1500/// loads.1501fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;15021503/// Alias for `MacroAssembler::load` with the operand size corresponding1504/// to the pointer size of the target.1505fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;15061507/// Computes the effective address and stores the result in the destination1508/// register.1509fn compute_addr(1510&mut self,1511_src: Self::Address,1512_dst: WritableReg,1513_size: OperandSize,1514) -> Result<()>;15151516/// Pop a value from the machine stack into the given register.1517fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;15181519/// Perform a move.1520fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;15211522/// Perform a conditional move.1523fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)1524-> Result<()>;15251526/// Performs a memory move of bytes from src to dest.1527/// Bytes are moved in blocks of 8 bytes, where possible.1528fn memmove(1529&mut self,1530src: SPOffset,1531dst: SPOffset,1532bytes: u32,1533direction: MemMoveDirection,1534) -> Result<()> {1535match direction {1536MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),1537MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),1538}1539// At least 4 byte aligned.1540debug_assert!(bytes % 4 == 0);1541let mut remaining = bytes;1542let word_bytes = <Self::ABI as abi::ABI>::word_bytes();15431544let word_bytes = word_bytes as u32;15451546let mut dst_offs;1547let mut src_offs;1548match direction {1549MemMoveDirection::LowToHigh => {1550dst_offs = dst.as_u32() - bytes;1551src_offs = src.as_u32() - bytes;1552self.with_scratch::<IntScratch, _>(|masm, scratch| {1553while remaining >= word_bytes {1554remaining -= word_bytes;1555dst_offs += word_bytes;1556src_offs += word_bytes;15571558masm.load_ptr(1559masm.address_from_sp(SPOffset::from_u32(src_offs))?,1560scratch.writable(),1561)?;1562masm.store_ptr(1563scratch.inner(),1564masm.address_from_sp(SPOffset::from_u32(dst_offs))?,1565)?;1566}1567anyhow::Ok(())1568})?;1569}1570MemMoveDirection::HighToLow => {1571// Go from the end to the beginning to handle overlapping addresses.1572src_offs = src.as_u32();1573dst_offs = dst.as_u32();1574self.with_scratch::<IntScratch, _>(|masm, scratch| {1575while remaining >= word_bytes {1576masm.load_ptr(1577masm.address_from_sp(SPOffset::from_u32(src_offs))?,1578scratch.writable(),1579)?;1580masm.store_ptr(1581scratch.inner(),1582masm.address_from_sp(SPOffset::from_u32(dst_offs))?,1583)?;15841585remaining -= word_bytes;1586src_offs -= word_bytes;1587dst_offs -= word_bytes;1588}1589anyhow::Ok(())1590})?;1591}1592}15931594if remaining > 0 {1595let half_word = word_bytes / 2;1596let ptr_size = OperandSize::from_bytes(half_word as u8);1597debug_assert!(remaining == half_word);1598// Need to move the offsets ahead in the `LowToHigh` case to1599// compensate for the initial subtraction of `bytes`.1600if direction == MemMoveDirection::LowToHigh {1601dst_offs += half_word;1602src_offs += half_word;1603}16041605self.with_scratch::<IntScratch, _>(|masm, scratch| {1606masm.load(1607masm.address_from_sp(SPOffset::from_u32(src_offs))?,1608scratch.writable(),1609ptr_size,1610)?;1611masm.store(1612scratch.inner().into(),1613masm.address_from_sp(SPOffset::from_u32(dst_offs))?,1614ptr_size,1615)?;1616anyhow::Ok(())1617})?;1618}1619Ok(())1620}16211622/// Perform add operation.1623fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16241625/// Perform a checked unsigned integer addition, emitting the provided trap1626/// if the addition overflows.1627fn checked_uadd(1628&mut self,1629dst: WritableReg,1630lhs: Reg,1631rhs: RegImm,1632size: OperandSize,1633trap: TrapCode,1634) -> Result<()>;16351636/// Perform subtraction operation.1637fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16381639/// Perform multiplication operation.1640fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16411642/// Perform a floating point add operation.1643fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16441645/// Perform a floating point subtraction operation.1646fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16471648/// Perform a floating point multiply operation.1649fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16501651/// Perform a floating point divide operation.1652fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16531654/// Perform a floating point minimum operation. In x86, this will emit1655/// multiple instructions.1656fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16571658/// Perform a floating point maximum operation. In x86, this will emit1659/// multiple instructions.1660fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16611662/// Perform a floating point copysign operation. In x86, this will emit1663/// multiple instructions.1664fn float_copysign(1665&mut self,1666dst: WritableReg,1667lhs: Reg,1668rhs: Reg,1669size: OperandSize,1670) -> Result<()>;16711672/// Perform a floating point abs operation.1673fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;16741675/// Perform a floating point negation operation.1676fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;16771678/// Perform a floating point floor operation.1679fn float_round<1680F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,1681>(1682&mut self,1683mode: RoundingMode,1684env: &mut FuncEnv<Self::Ptr>,1685context: &mut CodeGenContext<Emission>,1686size: OperandSize,1687fallback: F,1688) -> Result<()>;16891690/// Perform a floating point square root operation.1691fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;16921693/// Perform logical and operation.1694fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16951696/// Perform logical or operation.1697fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16981699/// Perform logical exclusive or operation.1700fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;17011702/// Perform a shift operation between a register and an immediate.1703fn shift_ir(1704&mut self,1705dst: WritableReg,1706imm: Imm,1707lhs: Reg,1708kind: ShiftKind,1709size: OperandSize,1710) -> Result<()>;17111712/// Perform a shift operation between two registers.1713/// This case is special in that some architectures have specific expectations1714/// regarding the location of the instruction arguments. To free the1715/// caller from having to deal with the architecture specific constraints1716/// we give this function access to the code generation context, allowing1717/// each implementation to decide the lowering path.1718fn shift(1719&mut self,1720context: &mut CodeGenContext<Emission>,1721kind: ShiftKind,1722size: OperandSize,1723) -> Result<()>;17241725/// Perform division operation.1726/// Division is special in that some architectures have specific1727/// expectations regarding the location of the instruction1728/// arguments and regarding the location of the quotient /1729/// remainder. To free the caller from having to deal with the1730/// architecture specific constraints we give this function access1731/// to the code generation context, allowing each implementation1732/// to decide the lowering path. For cases in which division is a1733/// unconstrained binary operation, the caller can decide to use1734/// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`1735/// functions.1736fn div(1737&mut self,1738context: &mut CodeGenContext<Emission>,1739kind: DivKind,1740size: OperandSize,1741) -> Result<()>;17421743/// Calculate remainder.1744fn rem(1745&mut self,1746context: &mut CodeGenContext<Emission>,1747kind: RemKind,1748size: OperandSize,1749) -> Result<()>;17501751/// Compares `src1` against `src2` for the side effect of setting processor1752/// flags.1753///1754/// Note that `src1` is the left-hand-side of the comparison and `src2` is1755/// the right-hand-side, so if testing `a < b` then `src1 == a` and1756/// `src2 == b`1757fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;17581759/// Compare src and dst and put the result in dst.1760/// This function will potentially emit a series of instructions.1761///1762/// The initial value in `dst` is the left-hand-side of the comparison and1763/// the initial value in `src` is the right-hand-side of the comparison.1764/// That means for `a < b` then `dst == a` and `src == b`.1765fn cmp_with_set(1766&mut self,1767dst: WritableReg,1768src: RegImm,1769kind: IntCmpKind,1770size: OperandSize,1771) -> Result<()>;17721773/// Compare floats in src1 and src2 and put the result in dst.1774/// In x86, this will emit multiple instructions.1775fn float_cmp_with_set(1776&mut self,1777dst: WritableReg,1778src1: Reg,1779src2: Reg,1780kind: FloatCmpKind,1781size: OperandSize,1782) -> Result<()>;17831784/// Count the number of leading zeroes in src and put the result in dst.1785/// In x64, this will emit multiple instructions if the `has_lzcnt` flag is1786/// false.1787fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;17881789/// Count the number of trailing zeroes in src and put the result in dst.masm1790/// In x64, this will emit multiple instructions if the `has_tzcnt` flag is1791/// false.1792fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;17931794/// Push the register to the stack, returning the stack slot metadata.1795// NB1796// The stack alignment should not be assumed after any call to `push`,1797// unless explicitly aligned otherwise. Typically, stack alignment is1798// maintained at call sites and during the execution of1799// epilogues.1800fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;18011802/// Finalize the assembly and return the result.1803fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;18041805/// Zero a particular register.1806fn zero(&mut self, reg: WritableReg) -> Result<()>;18071808/// Count the number of 1 bits in src and put the result in dst. In x64,1809/// this will emit multiple instructions if the `has_popcnt` flag is false.1810fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;18111812/// Converts an i64 to an i32 by discarding the high 32 bits.1813fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;18141815/// Extends an integer of a given size to a larger size.1816fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;18171818/// Emits one or more instructions to perform a signed truncation of a1819/// float into an integer.1820fn signed_truncate(1821&mut self,1822dst: WritableReg,1823src: Reg,1824src_size: OperandSize,1825dst_size: OperandSize,1826kind: TruncKind,1827) -> Result<()>;18281829/// Emits one or more instructions to perform an unsigned truncation of a1830/// float into an integer.1831fn unsigned_truncate(1832&mut self,1833context: &mut CodeGenContext<Emission>,1834src_size: OperandSize,1835dst_size: OperandSize,1836kind: TruncKind,1837) -> Result<()>;18381839/// Emits one or more instructions to perform a signed convert of an1840/// integer into a float.1841fn signed_convert(1842&mut self,1843dst: WritableReg,1844src: Reg,1845src_size: OperandSize,1846dst_size: OperandSize,1847) -> Result<()>;18481849/// Emits one or more instructions to perform an unsigned convert of an1850/// integer into a float.1851fn unsigned_convert(1852&mut self,1853dst: WritableReg,1854src: Reg,1855tmp_gpr: Reg,1856src_size: OperandSize,1857dst_size: OperandSize,1858) -> Result<()>;18591860/// Reinterpret a float as an integer.1861fn reinterpret_float_as_int(1862&mut self,1863dst: WritableReg,1864src: Reg,1865size: OperandSize,1866) -> Result<()>;18671868/// Reinterpret an integer as a float.1869fn reinterpret_int_as_float(1870&mut self,1871dst: WritableReg,1872src: Reg,1873size: OperandSize,1874) -> Result<()>;18751876/// Demote an f64 to an f32.1877fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;18781879/// Promote an f32 to an f64.1880fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;18811882/// Zero a given memory range.1883///1884/// The default implementation divides the given memory range1885/// into word-sized slots. Then it unrolls a series of store1886/// instructions, effectively assigning zero to each slot.1887fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {1888let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;1889if mem.is_empty() {1890return Ok(());1891}18921893let start = if mem.start % word_size == 0 {1894mem.start1895} else {1896// Ensure that the start of the range is at least 4-byte aligned.1897assert!(mem.start % 4 == 0);1898let start = align_to(mem.start, word_size);1899let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;1900self.store(RegImm::i32(0), addr, OperandSize::S32)?;1901// Ensure that the new start of the range, is word-size aligned.1902assert!(start % word_size == 0);1903start1904};19051906let end = align_to(mem.end, word_size);1907let slots = (end - start) / word_size;19081909if slots == 1 {1910let slot = LocalSlot::i64(start + word_size);1911let addr: Self::Address = self.local_address(&slot)?;1912self.store(RegImm::i64(0), addr, OperandSize::S64)?;1913} else {1914// TODO1915// Add an upper bound to this generation;1916// given a considerably large amount of slots1917// this will be inefficient.1918self.with_scratch::<IntScratch, _>(|masm, scratch| {1919masm.zero(scratch.writable())?;1920let zero = RegImm::reg(scratch.inner());19211922for step in (start..end).step_by(word_size as usize) {1923let slot = LocalSlot::i64(step + word_size);1924let addr: Self::Address = masm.local_address(&slot)?;1925masm.store(zero, addr, OperandSize::S64)?;1926}1927anyhow::Ok(())1928})?;1929}19301931Ok(())1932}19331934/// Generate a label.1935fn get_label(&mut self) -> Result<MachLabel>;19361937/// Bind the given label at the current code offset.1938fn bind(&mut self, label: MachLabel) -> Result<()>;19391940/// Conditional branch.1941///1942/// Performs a comparison between the two operands,1943/// and immediately after emits a jump to the given1944/// label destination if the condition is met.1945fn branch(1946&mut self,1947kind: IntCmpKind,1948lhs: Reg,1949rhs: RegImm,1950taken: MachLabel,1951size: OperandSize,1952) -> Result<()>;19531954/// Emits and unconditional jump to the given label.1955fn jmp(&mut self, target: MachLabel) -> Result<()>;19561957/// Emits a jump table sequence. The default label is specified as1958/// the last element of the targets slice.1959fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;19601961/// Emit an unreachable code trap.1962fn unreachable(&mut self) -> Result<()>;19631964/// Emit an unconditional trap.1965fn trap(&mut self, code: TrapCode) -> Result<()>;19661967/// Traps if the condition code is met.1968fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;19691970/// Trap if the source register is zero.1971fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;19721973/// Ensures that the stack pointer is correctly positioned before an unconditional1974/// jump according to the requirements of the destination target.1975fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {1976let bytes = self1977.sp_offset()?1978.as_u32()1979.checked_sub(target.as_u32())1980.unwrap_or(0);19811982if bytes > 0 {1983self.free_stack(bytes)?;1984}19851986Ok(())1987}19881989/// Mark the start of a source location returning the machine code offset1990/// and the relative source code location.1991fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;19921993/// Mark the end of a source location.1994fn end_source_loc(&mut self) -> Result<()>;19951996/// The current offset, in bytes from the beginning of the function.1997fn current_code_offset(&self) -> Result<CodeOffset>;19981999/// Performs a 128-bit addition2000fn add128(2001&mut self,2002dst_lo: WritableReg,2003dst_hi: WritableReg,2004lhs_lo: Reg,2005lhs_hi: Reg,2006rhs_lo: Reg,2007rhs_hi: Reg,2008) -> Result<()>;20092010/// Performs a 128-bit subtraction2011fn sub128(2012&mut self,2013dst_lo: WritableReg,2014dst_hi: WritableReg,2015lhs_lo: Reg,2016lhs_hi: Reg,2017rhs_lo: Reg,2018rhs_hi: Reg,2019) -> Result<()>;20202021/// Performs a widening multiplication from two 64-bit operands into a2022/// 128-bit result.2023///2024/// Note that some platforms require special handling of registers in this2025/// instruction (e.g. x64) so full access to `CodeGenContext` is provided.2026fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)2027-> Result<()>;20282029/// Takes the value in a src operand and replicates it across lanes of2030/// `size` in a destination result.2031fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;20322033/// Performs a shuffle between two 128-bit vectors into a 128-bit result2034/// using lanes as a mask to select which indexes to copy.2035fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;20362037/// Performs a swizzle between two 128-bit vectors into a 128-bit result.2038fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;20392040/// Performs the RMW `op` operation on the passed `addr`.2041///2042/// The value *before* the operation was performed is written back to the `operand` register.2043fn atomic_rmw(2044&mut self,2045context: &mut CodeGenContext<Emission>,2046addr: Self::Address,2047size: OperandSize,2048op: RmwOp,2049flags: MemFlags,2050extend: Option<Extend<Zero>>,2051) -> Result<()>;20522053/// Extracts the scalar value from `src` in `lane` to `dst`.2054fn extract_lane(2055&mut self,2056src: Reg,2057dst: WritableReg,2058lane: u8,2059kind: ExtractLaneKind,2060) -> Result<()>;20612062/// Replaces the value in `lane` in `dst` with the value in `src`.2063fn replace_lane(2064&mut self,2065src: RegImm,2066dst: WritableReg,2067lane: u8,2068kind: ReplaceLaneKind,2069) -> Result<()>;20702071/// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`2072/// and `replacement` (at the top of the context's stack).2073///2074/// This method takes the `CodeGenContext` as an arguments to accommodate architectures that2075/// expect parameters in specific registers. The context stack contains the `replacement`,2076/// and `expected` values in that order. The implementer is expected to push the value at2077/// `addr` before the update to the context's stack before returning.2078fn atomic_cas(2079&mut self,2080context: &mut CodeGenContext<Emission>,2081addr: Self::Address,2082size: OperandSize,2083flags: MemFlags,2084extend: Option<Extend<Zero>>,2085) -> Result<()>;20862087/// Compares vector registers `lhs` and `rhs` for equality and puts the2088/// vector of results in `dst`.2089fn v128_eq(2090&mut self,2091dst: WritableReg,2092lhs: Reg,2093rhs: Reg,2094kind: VectorEqualityKind,2095) -> Result<()>;20962097/// Compares vector registers `lhs` and `rhs` for inequality and puts the2098/// vector of results in `dst`.2099fn v128_ne(2100&mut self,2101dst: WritableReg,2102lhs: Reg,2103rhs: Reg,2104kind: VectorEqualityKind,2105) -> Result<()>;21062107/// Performs a less than comparison with vector registers `lhs` and `rhs`2108/// and puts the vector of results in `dst`.2109fn v128_lt(2110&mut self,2111dst: WritableReg,2112lhs: Reg,2113rhs: Reg,2114kind: VectorCompareKind,2115) -> Result<()>;21162117/// Performs a less than or equal comparison with vector registers `lhs`2118/// and `rhs` and puts the vector of results in `dst`.2119fn v128_le(2120&mut self,2121dst: WritableReg,2122lhs: Reg,2123rhs: Reg,2124kind: VectorCompareKind,2125) -> Result<()>;21262127/// Performs a greater than comparison with vector registers `lhs` and2128/// `rhs` and puts the vector of results in `dst`.2129fn v128_gt(2130&mut self,2131dst: WritableReg,2132lhs: Reg,2133rhs: Reg,2134kind: VectorCompareKind,2135) -> Result<()>;21362137/// Performs a greater than or equal comparison with vector registers `lhs`2138/// and `rhs` and puts the vector of results in `dst`.2139fn v128_ge(2140&mut self,2141dst: WritableReg,2142lhs: Reg,2143rhs: Reg,2144kind: VectorCompareKind,2145) -> Result<()>;21462147/// Emit a memory fence.2148fn fence(&mut self) -> Result<()>;21492150/// Perform a logical `not` operation on the 128bits vector value in `dst`.2151fn v128_not(&mut self, dst: WritableReg) -> Result<()>;21522153/// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing2154/// the result to `dst`.2155fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;21562157/// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing2158/// the result to `dst`.2159///2160/// `and_not` is not commutative: dst = !src1 & src2.2161fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;21622163/// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing2164/// the result to `dst`.2165fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;21662167/// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing2168/// the result to `dst`.2169fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;21702171/// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits2172/// from `src1` when mask is 1, and from `src2` when mask is 0.2173///2174/// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.2175fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;21762177/// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.2178fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;21792180/// Convert vector of integers to vector of floating points.2181fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;21822183/// Convert two input vectors into a smaller lane vector by narrowing each2184/// lane.2185fn v128_narrow(2186&mut self,2187src1: Reg,2188src2: Reg,2189dst: WritableReg,2190kind: V128NarrowKind,2191) -> Result<()>;21922193/// Converts a vector containing two 64-bit floating point lanes to two2194/// 32-bit floating point lanes and setting the two higher lanes to 0.2195fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;21962197/// Converts a vector containing four 32-bit floating point lanes to two2198/// 64-bit floating point lanes. Only the two lower lanes are converted.2199fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;22002201/// Converts low or high half of the smaller lane vector to a larger lane2202/// vector.2203fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;22042205/// Perform a vector add between `lsh` and `rhs`, placing the result in2206/// `dst`.2207fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;22082209/// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.2210fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;22112212/// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.2213fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)2214-> Result<()>;22152216/// Perform an absolute operation on a vector.2217fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;22182219/// Vectorized negate of the content of `op`.2220fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;22212222/// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit2223/// integer at the top of the stack, on the 128-bit vector specified by the second value2224/// from the top of the stack, interpreted as packed integers of size `lane_width`.2225///2226/// The shift amount is taken modulo `lane_width`.2227fn v128_shift(2228&mut self,2229context: &mut CodeGenContext<Emission>,2230lane_width: OperandSize,2231kind: ShiftKind,2232) -> Result<()>;22332234/// Perform a saturating integer q-format rounding multiplication.2235fn v128_q15mulr_sat_s(2236&mut self,2237lhs: Reg,2238rhs: Reg,2239dst: WritableReg,2240size: OperandSize,2241) -> Result<()>;22422243/// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 02244/// otherwise.2245fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;22462247/// Extracts the high bit of each lane in `src` and produces a scalar mask2248/// with all bits concatenated in `dst`.2249fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;22502251/// Lanewise truncation operation.2252///2253/// If using an integer kind of truncation, then this performs a lane-wise2254/// saturating conversion from float to integer using the IEEE2255/// `convertToIntegerTowardZero` function. If any input lane is NaN, the2256/// resulting lane is 0. If the rounded integer value of a lane is outside2257/// the range of the destination type, the result is saturated to the2258/// nearest representable integer value.2259fn v128_trunc(2260&mut self,2261context: &mut CodeGenContext<Emission>,2262kind: V128TruncKind,2263) -> Result<()>;22642265/// Perform a lane-wise `min` operation between `src1` and `src2`.2266fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)2267-> Result<()>;22682269/// Perform a lane-wise `max` operation between `src1` and `src2`.2270fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)2271-> Result<()>;22722273/// Perform the lane-wise integer extended multiplication producing twice wider result than the2274/// inputs. This is equivalent to an extend followed by a multiply.2275///2276/// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,2277/// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower2278/// 8bits of the 16bits lanes.2279fn v128_extmul(2280&mut self,2281context: &mut CodeGenContext<Emission>,2282kind: V128ExtMulKind,2283) -> Result<()>;22842285/// Perform the lane-wise integer extended pairwise addition producing extended results (twice2286/// wider results than the inputs).2287fn v128_extadd_pairwise(2288&mut self,2289src: Reg,2290dst: WritableReg,2291kind: V128ExtAddKind,2292) -> Result<()>;22932294/// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add2295/// adjacent pairs of the 32-bit results.2296fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;22972298/// Count the number of bits set in each lane.2299fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;23002301/// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`2302/// and put the results in `dst`.2303fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23042305/// Lane-wise IEEE division on vectors of floats.2306fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23072308/// Lane-wise IEEE square root of vector of floats.2309fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23102311/// Lane-wise ceiling of vector of floats.2312fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23132314/// Lane-wise flooring of vector of floats.2315fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23162317/// Lane-wise rounding to nearest integer for vector of floats.2318fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23192320/// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.2321fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23222323/// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.2324fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;2325}232623272328