use crate::Result;1use crate::abi::{self, LocalSlot, align_to};2use crate::codegen::{CodeGenContext, Emission, FuncEnv};3use crate::isa::{4CallingConvention,5reg::{Reg, RegClass, WritableReg, writable},6};7use cranelift_codegen::{8Final, MachBufferFinalized, MachLabel,9binemit::CodeOffset,10ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},11};12use std::{fmt::Debug, ops::Range};13use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType};1415pub(crate) use cranelift_codegen::ir::TrapCode;1617#[derive(Eq, PartialEq)]18pub(crate) enum DivKind {19/// Signed division.20Signed,21/// Unsigned division.22Unsigned,23}2425/// Represents the `memory.atomic.wait*` kind.26#[derive(Debug, Clone, Copy)]27pub(crate) enum AtomicWaitKind {28Wait32,29Wait64,30}3132/// Remainder kind.33#[derive(Copy, Clone)]34pub(crate) enum RemKind {35/// Signed remainder.36Signed,37/// Unsigned remainder.38Unsigned,39}4041impl RemKind {42pub fn is_signed(&self) -> bool {43matches!(self, Self::Signed)44}45}4647/// Kinds of vector min operation supported by WebAssembly.48pub(crate) enum V128MinKind {49/// 4 lanes of 32-bit floats.50F32x4,51/// 2 lanes of 64-bit floats.52F64x2,53/// 16 lanes of signed 8-bit integers.54I8x16S,55/// 16 lanes of unsigned 8-bit integers.56I8x16U,57/// 8 lanes of signed 16-bit integers.58I16x8S,59/// 8 lanes of unsigned 16-bit integers.60I16x8U,61/// 4 lanes of signed 32-bit integers.62I32x4S,63/// 4 lanes of unsigned 32-bit integers.64I32x4U,65}6667impl V128MinKind {68/// The size of each lane.69pub(crate) fn lane_size(&self) -> OperandSize {70match self {71Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,72Self::F64x2 => OperandSize::S64,73Self::I8x16S | Self::I8x16U => OperandSize::S8,74Self::I16x8S | Self::I16x8U => OperandSize::S16,75}76}77}7879/// Kinds of vector max operation supported by WebAssembly.80pub(crate) enum V128MaxKind {81/// 4 lanes of 32-bit floats.82F32x4,83/// 2 lanes of 64-bit floats.84F64x2,85/// 16 lanes of signed 8-bit integers.86I8x16S,87/// 16 lanes of unsigned 8-bit integers.88I8x16U,89/// 8 lanes of signed 16-bit integers.90I16x8S,91/// 8 lanes of unsigned 16-bit integers.92I16x8U,93/// 4 lanes of signed 32-bit integers.94I32x4S,95/// 4 lanes of unsigned 32-bit integers.96I32x4U,97}9899impl V128MaxKind {100/// The size of each lane.101pub(crate) fn lane_size(&self) -> OperandSize {102match self {103Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,104Self::F64x2 => OperandSize::S64,105Self::I8x16S | Self::I8x16U => OperandSize::S8,106Self::I16x8S | Self::I16x8U => OperandSize::S16,107}108}109}110111#[derive(Eq, PartialEq)]112pub(crate) enum MulWideKind {113Signed,114Unsigned,115}116117/// Type of operation for a read-modify-write instruction.118pub(crate) enum RmwOp {119Add,120Sub,121Xchg,122And,123Or,124Xor,125}126127/// The direction to perform the memory move.128#[derive(Debug, Clone, Eq, PartialEq)]129pub(crate) enum MemMoveDirection {130/// From high memory addresses to low memory addresses.131/// Invariant: the source location is closer to the FP than the destination132/// location, which will be closer to the SP.133HighToLow,134/// From low memory addresses to high memory addresses.135/// Invariant: the source location is closer to the SP than the destination136/// location, which will be closer to the FP.137LowToHigh,138}139140/// Classifies how to treat float-to-int conversions.141#[derive(Debug, Copy, Clone, Eq, PartialEq)]142pub(crate) enum TruncKind {143/// Saturating conversion. If the source value is greater than the maximum144/// value of the destination type, the result is clamped to the145/// destination maximum value.146Checked,147/// An exception is raised if the source value is greater than the maximum148/// value of the destination type.149Unchecked,150}151152impl TruncKind {153/// Returns true if the truncation kind is checked.154pub(crate) fn is_checked(&self) -> bool {155*self == TruncKind::Checked156}157158/// Returns `true` if the trunc kind is [`Unchecked`].159///160/// [`Unchecked`]: TruncKind::Unchecked161#[must_use]162pub(crate) fn is_unchecked(&self) -> bool {163matches!(self, Self::Unchecked)164}165}166167/// Representation of the stack pointer offset.168#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]169pub struct SPOffset(u32);170171impl SPOffset {172pub fn from_u32(offs: u32) -> Self {173Self(offs)174}175176pub fn as_u32(&self) -> u32 {177self.0178}179}180181/// A stack slot.182#[derive(Debug, Clone, Copy, Eq, PartialEq)]183pub struct StackSlot {184/// The location of the slot, relative to the stack pointer.185pub offset: SPOffset,186/// The size of the slot, in bytes.187pub size: u32,188}189190impl StackSlot {191pub fn new(offs: SPOffset, size: u32) -> Self {192Self { offset: offs, size }193}194}195196pub trait ScratchType {197/// Derive the register class from the scratch register type.198fn reg_class() -> RegClass;199}200201/// A scratch register type of integer class.202pub struct IntScratch;203/// A scratch register type of floating point class.204pub struct FloatScratch;205206impl ScratchType for IntScratch {207fn reg_class() -> RegClass {208RegClass::Int209}210}211212impl ScratchType for FloatScratch {213fn reg_class() -> RegClass {214RegClass::Float215}216}217218/// A scratch register scope.219#[derive(Debug, Clone, Copy)]220pub struct Scratch(Reg);221222impl Scratch {223pub fn new(r: Reg) -> Self {224Self(r)225}226227#[inline]228pub fn inner(&self) -> Reg {229self.0230}231232#[inline]233pub fn writable(&self) -> WritableReg {234writable!(self.0)235}236}237238/// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]239/// implementation for each ISA is responsible for emitting the correct240/// sequence of instructions when lowering to machine code.241#[derive(Debug, Clone, Copy, Eq, PartialEq)]242pub(crate) enum IntCmpKind {243/// Equal.244Eq,245/// Not equal.246Ne,247/// Signed less than.248LtS,249/// Unsigned less than.250LtU,251/// Signed greater than.252GtS,253/// Unsigned greater than.254GtU,255/// Signed less than or equal.256LeS,257/// Unsigned less than or equal.258LeU,259/// Signed greater than or equal.260GeS,261/// Unsigned greater than or equal.262GeU,263}264265/// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]266/// implementation for each ISA is responsible for emitting the correct267/// sequence of instructions when lowering code.268#[derive(Debug)]269pub(crate) enum FloatCmpKind {270/// Equal.271Eq,272/// Not equal.273Ne,274/// Less than.275Lt,276/// Greater than.277Gt,278/// Less than or equal.279Le,280/// Greater than or equal.281Ge,282}283284/// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is285/// responsible for emitting the correct sequence of instructions when286/// lowering to machine code.287#[derive(Debug, Clone, Copy, Eq, PartialEq)]288pub(crate) enum ShiftKind {289/// Left shift.290Shl,291/// Signed right shift.292ShrS,293/// Unsigned right shift.294ShrU,295/// Left rotate.296Rotl,297/// Right rotate.298Rotr,299}300301/// Kinds of extends in WebAssembly. Each MacroAssembler implementation302/// is responsible for emitting the correct sequence of instructions when303/// lowering to machine code.304#[derive(Copy, Clone)]305pub(crate) enum ExtendKind {306Signed(Extend<Signed>),307Unsigned(Extend<Zero>),308}309310#[derive(Copy, Clone)]311pub(crate) enum Signed {}312#[derive(Copy, Clone)]313pub(crate) enum Zero {}314315pub(crate) trait ExtendType {}316317impl ExtendType for Signed {}318impl ExtendType for Zero {}319320#[derive(Copy, Clone)]321pub(crate) enum Extend<T: ExtendType> {322/// 8 to 32 bit extend.323I32Extend8,324/// 16 to 32 bit extend.325I32Extend16,326/// 8 to 64 bit extend.327I64Extend8,328/// 16 to 64 bit extend.329I64Extend16,330/// 32 to 64 bit extend.331I64Extend32,332333/// Variant to hold the kind of extend marker.334///335/// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be336/// constructed.337__Kind(T),338}339340impl From<Extend<Zero>> for ExtendKind {341fn from(value: Extend<Zero>) -> Self {342ExtendKind::Unsigned(value)343}344}345346impl<T: ExtendType> Extend<T> {347pub fn from_size(&self) -> OperandSize {348match self {349Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,350Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,351Extend::I64Extend32 => OperandSize::S32,352Extend::__Kind(_) => unreachable!(),353}354}355356pub fn to_size(&self) -> OperandSize {357match self {358Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,359Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,360Extend::__Kind(_) => unreachable!(),361}362}363364pub fn from_bits(&self) -> u8 {365self.from_size().num_bits()366}367368pub fn to_bits(&self) -> u8 {369self.to_size().num_bits()370}371}372373impl From<Extend<Signed>> for ExtendKind {374fn from(value: Extend<Signed>) -> Self {375ExtendKind::Signed(value)376}377}378379impl ExtendKind {380pub fn signed(&self) -> bool {381match self {382Self::Signed(_) => true,383_ => false,384}385}386387pub fn from_bits(&self) -> u8 {388match self {389Self::Signed(s) => s.from_bits(),390Self::Unsigned(u) => u.from_bits(),391}392}393394pub fn to_bits(&self) -> u8 {395match self {396Self::Signed(s) => s.to_bits(),397Self::Unsigned(u) => u.to_bits(),398}399}400}401402/// Kinds of vector load and extends in WebAssembly. Each MacroAssembler403/// implementation is responsible for emitting the correct sequence of404/// instructions when lowering to machine code.405#[derive(Copy, Clone)]406pub(crate) enum V128LoadExtendKind {407/// Sign extends eight 8 bit integers to eight 16 bit lanes.408E8x8S,409/// Zero extends eight 8 bit integers to eight 16 bit lanes.410E8x8U,411/// Sign extends four 16 bit integers to four 32 bit lanes.412E16x4S,413/// Zero extends four 16 bit integers to four 32 bit lanes.414E16x4U,415/// Sign extends two 32 bit integers to two 64 bit lanes.416E32x2S,417/// Zero extends two 32 bit integers to two 64 bit lanes.418E32x2U,419}420421/// Kinds of splat loads supported by WebAssembly.422pub(crate) enum SplatLoadKind {423/// 8 bits.424S8,425/// 16 bits.426S16,427/// 32 bits.428S32,429/// 64 bits.430S64,431}432433/// Kinds of splat supported by WebAssembly.434#[derive(Copy, Debug, Clone, Eq, PartialEq)]435pub(crate) enum SplatKind {436/// 8 bit integer.437I8x16,438/// 16 bit integer.439I16x8,440/// 32 bit integer.441I32x4,442/// 64 bit integer.443I64x2,444/// 32 bit float.445F32x4,446/// 64 bit float.447F64x2,448}449450impl SplatKind {451/// The lane size to use for different kinds of splats.452pub(crate) fn lane_size(&self) -> OperandSize {453match self {454SplatKind::I8x16 => OperandSize::S8,455SplatKind::I16x8 => OperandSize::S16,456SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,457SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,458}459}460}461462/// Kinds of extract lane supported by WebAssembly.463#[derive(Copy, Debug, Clone, Eq, PartialEq)]464pub(crate) enum ExtractLaneKind {465/// 16 lanes of 8-bit integers sign extended to 32-bits.466I8x16S,467/// 16 lanes of 8-bit integers zero extended to 32-bits.468I8x16U,469/// 8 lanes of 16-bit integers sign extended to 32-bits.470I16x8S,471/// 8 lanes of 16-bit integers zero extended to 32-bits.472I16x8U,473/// 4 lanes of 32-bit integers.474I32x4,475/// 2 lanes of 64-bit integers.476I64x2,477/// 4 lanes of 32-bit floats.478F32x4,479/// 2 lanes of 64-bit floats.480F64x2,481}482483impl ExtractLaneKind {484/// The lane size to use for different kinds of extract lane kinds.485pub(crate) fn lane_size(&self) -> OperandSize {486match self {487ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,488ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,489ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,490ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,491}492}493}494495impl From<ExtractLaneKind> for Extend<Signed> {496fn from(value: ExtractLaneKind) -> Self {497match value {498ExtractLaneKind::I8x16S => Extend::I32Extend8,499ExtractLaneKind::I16x8S => Extend::I32Extend16,500_ => unimplemented!(),501}502}503}504505/// Kinds of replace lane supported by WebAssembly.506pub(crate) enum ReplaceLaneKind {507/// 16 lanes of 8 bit integers.508I8x16,509/// 8 lanes of 16 bit integers.510I16x8,511/// 4 lanes of 32 bit integers.512I32x4,513/// 2 lanes of 64 bit integers.514I64x2,515/// 4 lanes of 32 bit floats.516F32x4,517/// 2 lanes of 64 bit floats.518F64x2,519}520521impl ReplaceLaneKind {522/// The lane size to use for different kinds of replace lane kinds.523pub(crate) fn lane_size(&self) -> OperandSize {524match self {525ReplaceLaneKind::I8x16 => OperandSize::S8,526ReplaceLaneKind::I16x8 => OperandSize::S16,527ReplaceLaneKind::I32x4 => OperandSize::S32,528ReplaceLaneKind::I64x2 => OperandSize::S64,529ReplaceLaneKind::F32x4 => OperandSize::S32,530ReplaceLaneKind::F64x2 => OperandSize::S64,531}532}533}534535/// Kinds of behavior supported by Wasm loads.536pub(crate) enum LoadKind {537/// Load the entire bytes of the operand size without any modifications.538Operand(OperandSize),539/// Atomic load, with optional scalar extend.540Atomic(OperandSize, Option<ExtendKind>),541/// Duplicate value into vector lanes.542Splat(SplatLoadKind),543/// Scalar (non-vector) extend.544ScalarExtend(ExtendKind),545/// Vector extend.546VectorExtend(V128LoadExtendKind),547/// Load content into select lane.548VectorLane(LaneSelector),549/// Load a single element into the lowest bits of a vector and initialize550/// all other bits to zero.551VectorZero(OperandSize),552}553554impl LoadKind {555/// Returns the [`OperandSize`] used in the load operation.556pub(crate) fn derive_operand_size(&self) -> OperandSize {557match self {558Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {559Self::operand_size_for_scalar(extend)560}561Self::VectorExtend(_) => OperandSize::S64,562Self::Splat(kind) => Self::operand_size_for_splat(kind),563Self::Operand(size)564| Self::Atomic(size, None)565| Self::VectorLane(LaneSelector { size, .. })566| Self::VectorZero(size) => *size,567}568}569570pub fn vector_lane(lane: u8, size: OperandSize) -> Self {571Self::VectorLane(LaneSelector { lane, size })572}573574fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {575match extend_kind {576ExtendKind::Signed(s) => s.from_size(),577ExtendKind::Unsigned(u) => u.from_size(),578}579}580581fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {582match kind {583SplatLoadKind::S8 => OperandSize::S8,584SplatLoadKind::S16 => OperandSize::S16,585SplatLoadKind::S32 => OperandSize::S32,586SplatLoadKind::S64 => OperandSize::S64,587}588}589590pub(crate) fn is_atomic(&self) -> bool {591matches!(self, Self::Atomic(_, _))592}593}594595/// Kinds of behavior supported by Wasm loads.596#[derive(Copy, Clone)]597pub enum StoreKind {598/// Store the entire bytes of the operand size without any modifications.599Operand(OperandSize),600/// Store the entire bytes of the operand size without any modifications, atomically.601Atomic(OperandSize),602/// Store the content of selected lane.603VectorLane(LaneSelector),604}605606impl StoreKind {607pub fn vector_lane(lane: u8, size: OperandSize) -> Self {608Self::VectorLane(LaneSelector { lane, size })609}610}611612#[derive(Copy, Clone)]613pub struct LaneSelector {614pub lane: u8,615pub size: OperandSize,616}617618/// Types of vector integer to float conversions supported by WebAssembly.619pub(crate) enum V128ConvertKind {620/// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.621I32x4S,622/// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.623I32x4U,624/// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit625/// floats.626I32x4LowS,627/// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit628/// floats.629I32x4LowU,630}631632impl V128ConvertKind {633pub(crate) fn src_lane_size(&self) -> OperandSize {634match self {635V128ConvertKind::I32x4S636| V128ConvertKind::I32x4U637| V128ConvertKind::I32x4LowS638| V128ConvertKind::I32x4LowU => OperandSize::S32,639}640}641642pub(crate) fn dst_lane_size(&self) -> OperandSize {643match self {644V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,645V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,646}647}648}649650/// Kinds of vector narrowing operations supported by WebAssembly.651pub(crate) enum V128NarrowKind {652/// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using653/// signed saturation.654I16x8S,655/// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using656/// unsigned saturation.657I16x8U,658/// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using659/// signed saturation.660I32x4S,661/// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using662/// unsigned saturation.663I32x4U,664}665666impl V128NarrowKind {667/// Return the size of the destination lanes.668pub(crate) fn dst_lane_size(&self) -> OperandSize {669match self {670Self::I16x8S | Self::I16x8U => OperandSize::S8,671Self::I32x4S | Self::I32x4U => OperandSize::S16,672}673}674}675676/// Kinds of vector extending operations supported by WebAssembly.677#[derive(Debug, Copy, Clone)]678pub(crate) enum V128ExtendKind {679/// Low half of i8x16 sign extended.680LowI8x16S,681/// High half of i8x16 sign extended.682HighI8x16S,683/// Low half of i8x16 zero extended.684LowI8x16U,685/// High half of i8x16 zero extended.686HighI8x16U,687/// Low half of i16x8 sign extended.688LowI16x8S,689/// High half of i16x8 sign extended.690HighI16x8S,691/// Low half of i16x8 zero extended.692LowI16x8U,693/// High half of i16x8 zero extended.694HighI16x8U,695/// Low half of i32x4 sign extended.696LowI32x4S,697/// High half of i32x4 sign extended.698HighI32x4S,699/// Low half of i32x4 zero extended.700LowI32x4U,701/// High half of i32x4 zero extended.702HighI32x4U,703}704705impl V128ExtendKind {706/// The size of the source's lanes.707pub(crate) fn src_lane_size(&self) -> OperandSize {708match self {709Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {710OperandSize::S8711}712Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {713OperandSize::S16714}715Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {716OperandSize::S32717}718}719}720}721722/// Kinds of vector equalities and non-equalities supported by WebAssembly.723pub(crate) enum VectorEqualityKind {724/// 16 lanes of 8 bit integers.725I8x16,726/// 8 lanes of 16 bit integers.727I16x8,728/// 4 lanes of 32 bit integers.729I32x4,730/// 2 lanes of 64 bit integers.731I64x2,732/// 4 lanes of 32 bit floats.733F32x4,734/// 2 lanes of 64 bit floats.735F64x2,736}737738impl VectorEqualityKind {739/// Get the lane size to use.740pub(crate) fn lane_size(&self) -> OperandSize {741match self {742Self::I8x16 => OperandSize::S8,743Self::I16x8 => OperandSize::S16,744Self::I32x4 | Self::F32x4 => OperandSize::S32,745Self::I64x2 | Self::F64x2 => OperandSize::S64,746}747}748}749750/// Kinds of vector comparisons supported by WebAssembly.751pub(crate) enum VectorCompareKind {752/// 16 lanes of signed 8 bit integers.753I8x16S,754/// 16 lanes of unsigned 8 bit integers.755I8x16U,756/// 8 lanes of signed 16 bit integers.757I16x8S,758/// 8 lanes of unsigned 16 bit integers.759I16x8U,760/// 4 lanes of signed 32 bit integers.761I32x4S,762/// 4 lanes of unsigned 32 bit integers.763I32x4U,764/// 2 lanes of signed 64 bit integers.765I64x2S,766/// 4 lanes of 32 bit floats.767F32x4,768/// 2 lanes of 64 bit floats.769F64x2,770}771772impl VectorCompareKind {773/// Get the lane size to use.774pub(crate) fn lane_size(&self) -> OperandSize {775match self {776Self::I8x16S | Self::I8x16U => OperandSize::S8,777Self::I16x8S | Self::I16x8U => OperandSize::S16,778Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,779Self::I64x2S | Self::F64x2 => OperandSize::S64,780}781}782}783784/// Kinds of vector absolute operations supported by WebAssembly.785#[derive(Copy, Debug, Clone, Eq, PartialEq)]786pub(crate) enum V128AbsKind {787/// 8 bit integers.788I8x16,789/// 16 bit integers.790I16x8,791/// 32 bit integers.792I32x4,793/// 64 bit integers.794I64x2,795/// 32 bit floats.796F32x4,797/// 64 bit floats.798F64x2,799}800801impl V128AbsKind {802/// The lane size to use.803pub(crate) fn lane_size(&self) -> OperandSize {804match self {805Self::I8x16 => OperandSize::S8,806Self::I16x8 => OperandSize::S16,807Self::I32x4 | Self::F32x4 => OperandSize::S32,808Self::I64x2 | Self::F64x2 => OperandSize::S64,809}810}811}812813/// Kinds of truncation for vectors supported by WebAssembly.814pub(crate) enum V128TruncKind {815/// Truncates 4 lanes of 32-bit floats to nearest integral value.816F32x4,817/// Truncates 2 lanes of 64-bit floats to nearest integral value.818F64x2,819/// Integers from signed F32x4.820I32x4FromF32x4S,821/// Integers from unsigned F32x4.822I32x4FromF32x4U,823/// Integers from signed F64x2.824I32x4FromF64x2SZero,825/// Integers from unsigned F64x2.826I32x4FromF64x2UZero,827}828829impl V128TruncKind {830/// The size of the source lanes.831pub(crate) fn src_lane_size(&self) -> OperandSize {832match self {833V128TruncKind::F32x4834| V128TruncKind::I32x4FromF32x4S835| V128TruncKind::I32x4FromF32x4U => OperandSize::S32,836V128TruncKind::F64x2837| V128TruncKind::I32x4FromF64x2SZero838| V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,839}840}841842/// The size of the destination lanes.843pub(crate) fn dst_lane_size(&self) -> OperandSize {844if let V128TruncKind::F64x2 = self {845OperandSize::S64846} else {847OperandSize::S32848}849}850}851852/// Kinds of vector addition supported by WebAssembly.853pub(crate) enum V128AddKind {854/// 4 lanes of 32-bit floats wrapping.855F32x4,856/// 2 lanes of 64-bit floats wrapping.857F64x2,858/// 16 lanes of 8-bit integers wrapping.859I8x16,860/// 16 lanes of 8-bit integers signed saturating.861I8x16SatS,862/// 16 lanes of 8-bit integers unsigned saturating.863I8x16SatU,864/// 8 lanes of 16-bit integers wrapping.865I16x8,866/// 8 lanes of 16-bit integers signed saturating.867I16x8SatS,868/// 8 lanes of 16-bit integers unsigned saturating.869I16x8SatU,870/// 4 lanes of 32-bit integers wrapping.871I32x4,872/// 2 lanes of 64-bit integers wrapping.873I64x2,874}875876/// Kinds of vector subtraction supported by WebAssembly.877pub(crate) enum V128SubKind {878/// 4 lanes of 32-bit floats wrapping.879F32x4,880/// 2 lanes of 64-bit floats wrapping.881F64x2,882/// 16 lanes of 8-bit integers wrapping.883I8x16,884/// 16 lanes of 8-bit integers signed saturating.885I8x16SatS,886/// 16 lanes of 8-bit integers unsigned saturating.887I8x16SatU,888/// 8 lanes of 16-bit integers wrapping.889I16x8,890/// 8 lanes of 16-bit integers signed saturating.891I16x8SatS,892/// 8 lanes of 16-bit integers unsigned saturating.893I16x8SatU,894/// 4 lanes of 32-bit integers wrapping.895I32x4,896/// 2 lanes of 64-bit integers wrapping.897I64x2,898}899900impl From<V128NegKind> for V128SubKind {901fn from(value: V128NegKind) -> Self {902match value {903V128NegKind::I8x16 => Self::I8x16,904V128NegKind::I16x8 => Self::I16x8,905V128NegKind::I32x4 => Self::I32x4,906V128NegKind::I64x2 => Self::I64x2,907V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),908}909}910}911912/// Kinds of vector multiplication supported by WebAssembly.913pub(crate) enum V128MulKind {914/// 4 lanes of 32-bit floats.915F32x4,916/// 2 lanes of 64-bit floats.917F64x2,918/// 8 lanes of 16-bit integers.919I16x8,920/// 4 lanes of 32-bit integers.921I32x4,922/// 2 lanes of 64-bit integers.923I64x2,924}925926/// Kinds of vector negation supported by WebAssembly.927#[derive(Copy, Clone)]928pub(crate) enum V128NegKind {929/// 4 lanes of 32-bit floats.930F32x4,931/// 2 lanes of 64-bit floats.932F64x2,933/// 16 lanes of 8-bit integers.934I8x16,935/// 8 lanes of 16-bit integers.936I16x8,937/// 4 lanes of 32-bit integers.938I32x4,939/// 2 lanes of 64-bit integers.940I64x2,941}942943impl V128NegKind {944/// The size of the lanes.945pub(crate) fn lane_size(&self) -> OperandSize {946match self {947Self::F32x4 | Self::I32x4 => OperandSize::S32,948Self::F64x2 | Self::I64x2 => OperandSize::S64,949Self::I8x16 => OperandSize::S8,950Self::I16x8 => OperandSize::S16,951}952}953}954955/// Kinds of extended pairwise addition supported by WebAssembly.956pub(crate) enum V128ExtAddKind {957/// 16 lanes of signed 8-bit integers.958I8x16S,959/// 16 lanes of unsigned 8-bit integers.960I8x16U,961/// 8 lanes of signed 16-bit integers.962I16x8S,963/// 8 lanes of unsigned 16-bit integers.964I16x8U,965}966967/// Kinds of vector extended multiplication supported by WebAssembly.968#[derive(Debug, Clone, Copy)]969pub(crate) enum V128ExtMulKind {970LowI8x16S,971HighI8x16S,972LowI8x16U,973HighI8x16U,974LowI16x8S,975HighI16x8S,976LowI16x8U,977HighI16x8U,978LowI32x4S,979HighI32x4S,980LowI32x4U,981HighI32x4U,982}983984impl From<V128ExtMulKind> for V128ExtendKind {985fn from(value: V128ExtMulKind) -> Self {986match value {987V128ExtMulKind::LowI8x16S => Self::LowI8x16S,988V128ExtMulKind::HighI8x16S => Self::HighI8x16S,989V128ExtMulKind::LowI8x16U => Self::LowI8x16U,990V128ExtMulKind::HighI8x16U => Self::HighI8x16U,991V128ExtMulKind::LowI16x8S => Self::LowI16x8S,992V128ExtMulKind::HighI16x8S => Self::HighI16x8S,993V128ExtMulKind::LowI16x8U => Self::LowI16x8U,994V128ExtMulKind::HighI16x8U => Self::HighI16x8U,995V128ExtMulKind::LowI32x4S => Self::LowI32x4S,996V128ExtMulKind::HighI32x4S => Self::HighI32x4S,997V128ExtMulKind::LowI32x4U => Self::LowI32x4U,998V128ExtMulKind::HighI32x4U => Self::HighI32x4U,999}1000}1001}10021003impl From<V128ExtMulKind> for V128MulKind {1004fn from(value: V128ExtMulKind) -> Self {1005match value {1006V128ExtMulKind::LowI8x16S1007| V128ExtMulKind::HighI8x16S1008| V128ExtMulKind::LowI8x16U1009| V128ExtMulKind::HighI8x16U => Self::I16x8,1010V128ExtMulKind::LowI16x8S1011| V128ExtMulKind::HighI16x8S1012| V128ExtMulKind::LowI16x8U1013| V128ExtMulKind::HighI16x8U => Self::I32x4,1014V128ExtMulKind::LowI32x4S1015| V128ExtMulKind::HighI32x4S1016| V128ExtMulKind::LowI32x4U1017| V128ExtMulKind::HighI32x4U => Self::I64x2,1018}1019}1020}10211022/// Operand size, in bits.1023#[derive(Copy, Debug, Clone, Eq, PartialEq)]1024pub(crate) enum OperandSize {1025/// 8 bits.1026S8,1027/// 16 bits.1028S16,1029/// 32 bits.1030S32,1031/// 64 bits.1032S64,1033/// 128 bits.1034S128,1035}10361037impl OperandSize {1038/// The number of bits in the operand.1039pub fn num_bits(&self) -> u8 {1040match self {1041OperandSize::S8 => 8,1042OperandSize::S16 => 16,1043OperandSize::S32 => 32,1044OperandSize::S64 => 64,1045OperandSize::S128 => 128,1046}1047}10481049/// The number of bytes in the operand.1050pub fn bytes(&self) -> u32 {1051match self {1052Self::S8 => 1,1053Self::S16 => 2,1054Self::S32 => 4,1055Self::S64 => 8,1056Self::S128 => 16,1057}1058}10591060/// The binary logarithm of the number of bits in the operand.1061pub fn log2(&self) -> u8 {1062match self {1063OperandSize::S8 => 3,1064OperandSize::S16 => 4,1065OperandSize::S32 => 5,1066OperandSize::S64 => 6,1067OperandSize::S128 => 7,1068}1069}10701071/// Create an [`OperandSize`] from the given number of bytes.1072pub fn from_bytes(bytes: u8) -> Self {1073use OperandSize::*;1074match bytes {10754 => S32,10768 => S64,107716 => S128,1078_ => panic!("Invalid bytes {bytes} for OperandSize"),1079}1080}10811082pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {1083match to {1084OperandSize::S32 => match self {1085OperandSize::S8 => Some(Extend::I32Extend8),1086OperandSize::S16 => Some(Extend::I32Extend16),1087_ => None,1088},1089OperandSize::S64 => match self {1090OperandSize::S8 => Some(Extend::I64Extend8),1091OperandSize::S16 => Some(Extend::I64Extend16),1092OperandSize::S32 => Some(Extend::I64Extend32),1093_ => None,1094},1095_ => None,1096}1097}10981099/// The number of bits in the mantissa.1100///1101/// Only implemented for floats.1102pub fn mantissa_bits(&self) -> u8 {1103match self {1104Self::S32 => 8,1105Self::S64 => 11,1106_ => unimplemented!(),1107}1108}1109}11101111/// An abstraction over a register or immediate.1112#[derive(Copy, Clone, Debug, PartialEq, Eq)]1113pub(crate) enum RegImm {1114/// A register.1115Reg(Reg),1116/// A tagged immediate argument.1117Imm(Imm),1118}11191120/// An tagged representation of an immediate.1121#[derive(Copy, Clone, Debug, PartialEq, Eq)]1122pub(crate) enum Imm {1123/// I32 immediate.1124I32(u32),1125/// I64 immediate.1126I64(u64),1127/// F32 immediate.1128F32(u32),1129/// F64 immediate.1130F64(u64),1131/// V128 immediate.1132V128(i128),1133}11341135impl Imm {1136/// Create a new I64 immediate.1137pub fn i64(val: i64) -> Self {1138Self::I64(val as u64)1139}11401141/// Create a new I32 immediate.1142pub fn i32(val: i32) -> Self {1143Self::I32(val as u32)1144}11451146/// Create a new F32 immediate.1147pub fn f32(bits: u32) -> Self {1148Self::F32(bits)1149}11501151/// Create a new F64 immediate.1152pub fn f64(bits: u64) -> Self {1153Self::F64(bits)1154}11551156/// Create a new V128 immediate.1157pub fn v128(bits: i128) -> Self {1158Self::V128(bits)1159}11601161/// Convert the immediate to i32, if possible.1162pub fn to_i32(&self) -> Option<i32> {1163match self {1164Self::I32(v) => Some(*v as i32),1165Self::I64(v) => i32::try_from(*v as i64).ok(),1166_ => None,1167}1168}11691170/// Unwraps the underlying integer value as u64.1171/// # Panics1172/// This function panics if the underlying value can't be represented1173/// as u64.1174pub fn unwrap_as_u64(&self) -> u64 {1175match self {1176Self::I32(v) => *v as u64,1177Self::I64(v) => *v,1178Self::F32(v) => *v as u64,1179Self::F64(v) => *v,1180_ => unreachable!(),1181}1182}11831184/// Get the operand size of the immediate.1185pub fn size(&self) -> OperandSize {1186match self {1187Self::I32(_) | Self::F32(_) => OperandSize::S32,1188Self::I64(_) | Self::F64(_) => OperandSize::S64,1189Self::V128(_) => OperandSize::S128,1190}1191}11921193/// Get a little endian representation of the immediate.1194///1195/// This method heap allocates and is intended to be used when adding1196/// values to the constant pool.1197pub fn to_bytes(&self) -> Vec<u8> {1198match self {1199Imm::I32(n) => n.to_le_bytes().to_vec(),1200Imm::I64(n) => n.to_le_bytes().to_vec(),1201Imm::F32(n) => n.to_le_bytes().to_vec(),1202Imm::F64(n) => n.to_le_bytes().to_vec(),1203Imm::V128(n) => n.to_le_bytes().to_vec(),1204}1205}1206}12071208/// The location of the [VMcontext] used for function calls.1209#[derive(Copy, Clone, Debug, Eq, PartialEq)]1210pub(crate) enum VMContextLoc {1211/// Dynamic, stored in the given register.1212Reg(Reg),1213/// The pinned [VMContext] register.1214Pinned,1215/// A different VMContext is loaded at the provided offset from the current1216/// VMContext.1217OffsetFromPinned(u32),1218}12191220/// The maximum number of context arguments currently used across the compiler.1221pub(crate) const MAX_CONTEXT_ARGS: usize = 2;12221223/// Out-of-band special purpose arguments used for function call emission.1224///1225/// We cannot rely on the value stack for these values given that inserting1226/// register or memory values at arbitrary locations of the value stack has the1227/// potential to break the stack ordering principle, which states that older1228/// values must always precede newer values, effectively simulating the order of1229/// values in the machine stack.1230/// The [ContextArgs] are meant to be resolved at every callsite; in some cases1231/// it might be possible to construct it early on, but given that it might1232/// contain allocatable registers, it's preferred to construct it in1233/// [FnCall::emit].1234#[derive(Clone, Debug)]1235pub(crate) enum ContextArgs {1236/// A single context argument is required; the current pinned [VMcontext]1237/// register must be passed as the first argument of the function call.1238VMContext([VMContextLoc; 1]),1239/// The callee and caller context arguments are required. In this case, the1240/// callee context argument is usually stored into an allocatable register1241/// and the caller is always the current pinned [VMContext] pointer.1242CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),1243}12441245impl ContextArgs {1246/// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]1247/// register as both the caller and callee context arguments.1248pub fn pinned_callee_and_caller_vmctx() -> Self {1249Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])1250}12511252/// Construct a [ContextArgs] that declares the usage of the pinned1253/// [VMContext] register as the only context argument.1254pub fn pinned_vmctx() -> Self {1255Self::VMContext([VMContextLoc::Pinned])1256}12571258/// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded1259/// indirectly from the pinned [VMContext] register as the only context1260/// argument.1261pub fn offset_from_pinned_vmctx(offset: u32) -> Self {1262Self::VMContext([VMContextLoc::OffsetFromPinned(offset)])1263}12641265/// Construct a [ContextArgs] that declares a dynamic callee context and the1266/// pinned [VMContext] register as the context arguments.1267pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {1268Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])1269}12701271/// Get the length of the [ContextArgs].1272pub fn len(&self) -> usize {1273self.as_slice().len()1274}12751276/// Get a slice of the context arguments.1277pub fn as_slice(&self) -> &[VMContextLoc] {1278match self {1279Self::VMContext(a) => a.as_slice(),1280Self::CalleeAndCallerVMContext(a) => a.as_slice(),1281}1282}1283}12841285#[derive(Copy, Clone, Debug)]1286pub(crate) enum CalleeKind {1287/// A function call to a raw address.1288Indirect(Reg),1289/// A function call to a local function.1290Direct(UserExternalNameRef),1291}12921293impl CalleeKind {1294/// Creates a callee kind from a register.1295pub fn indirect(reg: Reg) -> Self {1296Self::Indirect(reg)1297}12981299/// Creates a direct callee kind from a function name.1300pub fn direct(name: UserExternalNameRef) -> Self {1301Self::Direct(name)1302}1303}13041305impl RegImm {1306/// Register constructor.1307pub fn reg(r: Reg) -> Self {1308RegImm::Reg(r)1309}13101311/// I64 immediate constructor.1312pub fn i64(val: i64) -> Self {1313RegImm::Imm(Imm::i64(val))1314}13151316/// I32 immediate constructor.1317pub fn i32(val: i32) -> Self {1318RegImm::Imm(Imm::i32(val))1319}13201321/// F32 immediate, stored using its bits representation.1322pub fn f32(bits: u32) -> Self {1323RegImm::Imm(Imm::f32(bits))1324}13251326/// F64 immediate, stored using its bits representation.1327pub fn f64(bits: u64) -> Self {1328RegImm::Imm(Imm::f64(bits))1329}13301331/// V128 immediate.1332pub fn v128(bits: i128) -> Self {1333RegImm::Imm(Imm::v128(bits))1334}1335}13361337impl From<Reg> for RegImm {1338fn from(r: Reg) -> Self {1339Self::Reg(r)1340}1341}13421343#[derive(Debug)]1344pub enum RoundingMode {1345Nearest,1346Up,1347Down,1348Zero,1349}13501351/// Memory flags for trusted loads/stores.1352pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();13531354/// Flags used for WebAssembly loads / stores.1355/// Untrusted by default so we don't set `no_trap`.1356/// We also ensure that the endianness is the right one for WebAssembly.1357pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);13581359/// Generic MacroAssembler interface used by the code generation.1360///1361/// The MacroAssembler trait aims to expose an interface, high-level enough,1362/// so that each ISA can provide its own lowering to machine code. For example,1363/// for WebAssembly operators that don't have a direct mapping to a machine1364/// a instruction, the interface defines a signature matching the WebAssembly1365/// operator, allowing each implementation to lower such operator entirely.1366/// This approach attributes more responsibility to the MacroAssembler, but frees1367/// the caller from concerning about assembling the right sequence of1368/// instructions at the operator callsite.1369///1370/// The interface defaults to a three-argument form for binary operations;1371/// this allows a natural mapping to instructions for RISC architectures,1372/// that use three-argument form.1373/// This approach allows for a more general interface that can be restricted1374/// where needed, in the case of architectures that use a two-argument form.13751376pub(crate) trait MacroAssembler {1377/// The addressing mode.1378type Address: Copy + Debug;13791380/// The pointer representation of the target ISA,1381/// used to access information from [`VMOffsets`].1382type Ptr: PtrSize;13831384/// The ABI details of the target.1385type ABI: abi::ABI;13861387/// Emit the function prologue.1388fn prologue(&mut self, vmctx: Reg) -> Result<()> {1389self.frame_setup()?;1390self.check_stack(vmctx)1391}13921393/// Generate the frame setup sequence.1394fn frame_setup(&mut self) -> Result<()>;13951396/// Generate the frame restore sequence.1397fn frame_restore(&mut self) -> Result<()>;13981399/// Emit a stack check.1400fn check_stack(&mut self, vmctx: Reg) -> Result<()>;14011402/// Emit the function epilogue.1403fn epilogue(&mut self) -> Result<()> {1404self.frame_restore()1405}14061407/// Reserve stack space.1408fn reserve_stack(&mut self, bytes: u32) -> Result<()>;14091410/// Free stack space.1411fn free_stack(&mut self, bytes: u32) -> Result<()>;14121413/// Reset the stack pointer to the given offset;1414///1415/// Used to reset the stack pointer to a given offset1416/// when dealing with unreachable code.1417fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;14181419/// Get the address of a local slot.1420fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;14211422/// Constructs an address with an offset that is relative to the1423/// current position of the stack pointer (e.g. [sp + (sp_offset -1424/// offset)].1425fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;14261427/// Constructs an address with an offset that is absolute to the1428/// current position of the stack pointer (e.g. [sp + offset].1429fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;14301431/// Alias for [`Self::address_at_reg`] using the VMContext register as1432/// a base. The VMContext register is derived from the ABI type that is1433/// associated to the MacroAssembler.1434fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;14351436/// Construct an address that is absolute to the current position1437/// of the given register.1438fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;14391440/// Emit a function call to either a local or external function.1441fn call(1442&mut self,1443stack_args_size: u32,1444f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,1445) -> Result<u32>;14461447/// Acquire a scratch register and execute the given callback.1448fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R;14491450/// Convenience wrapper over [`Self::with_scratch`], derives the register class1451/// for a particular Wasm value type.1452fn with_scratch_for<R>(1453&mut self,1454ty: WasmValType,1455f: impl FnOnce(&mut Self, Scratch) -> R,1456) -> R {1457match ty {1458WasmValType::I321459| WasmValType::I641460| WasmValType::Ref(WasmRefType {1461heap_type: WasmHeapType::Func,1462..1463}) => self.with_scratch::<IntScratch, _>(f),1464WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => {1465self.with_scratch::<FloatScratch, _>(f)1466}1467_ => unimplemented!(),1468}1469}14701471/// Get stack pointer offset.1472fn sp_offset(&self) -> Result<SPOffset>;14731474/// Perform a stack store.1475fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;14761477/// Alias for `MacroAssembler::store` with the operand size corresponding1478/// to the pointer size of the target.1479fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;14801481/// Perform a WebAssembly store.1482/// A WebAssembly store introduces several additional invariants compared to1483/// [Self::store], more precisely, it can implicitly trap, in certain1484/// circumstances, even if explicit bounds checks are elided, in that sense,1485/// we consider this type of load as untrusted. It can also differ with1486/// regards to the endianness depending on the target ISA. For this reason,1487/// [Self::wasm_store], should be explicitly used when emitting WebAssembly1488/// stores.1489fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;14901491/// Perform a zero-extended stack load.1492fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;14931494/// Perform a WebAssembly load.1495/// A WebAssembly load introduces several additional invariants compared to1496/// [Self::load], more precisely, it can implicitly trap, in certain1497/// circumstances, even if explicit bounds checks are elided, in that sense,1498/// we consider this type of load as untrusted. It can also differ with1499/// regards to the endianness depending on the target ISA. For this reason,1500/// [Self::wasm_load], should be explicitly used when emitting WebAssembly1501/// loads.1502fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;15031504/// Alias for `MacroAssembler::load` with the operand size corresponding1505/// to the pointer size of the target.1506fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;15071508/// Computes the effective address and stores the result in the destination1509/// register.1510fn compute_addr(1511&mut self,1512_src: Self::Address,1513_dst: WritableReg,1514_size: OperandSize,1515) -> Result<()>;15161517/// Pop a value from the machine stack into the given register.1518fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;15191520/// Perform a move.1521fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;15221523/// Perform a conditional move.1524fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)1525-> Result<()>;15261527/// Performs a memory move of bytes from src to dest.1528/// Bytes are moved in blocks of 8 bytes, where possible.1529fn memmove(1530&mut self,1531src: SPOffset,1532dst: SPOffset,1533bytes: u32,1534direction: MemMoveDirection,1535) -> Result<()> {1536match direction {1537MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),1538MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),1539}1540// At least 4 byte aligned.1541debug_assert!(bytes % 4 == 0);1542let mut remaining = bytes;1543let word_bytes = <Self::ABI as abi::ABI>::word_bytes();15441545let word_bytes = word_bytes as u32;15461547let mut dst_offs;1548let mut src_offs;1549match direction {1550MemMoveDirection::LowToHigh => {1551dst_offs = dst.as_u32() - bytes;1552src_offs = src.as_u32() - bytes;1553self.with_scratch::<IntScratch, _>(|masm, scratch| {1554while remaining >= word_bytes {1555remaining -= word_bytes;1556dst_offs += word_bytes;1557src_offs += word_bytes;15581559masm.load_ptr(1560masm.address_from_sp(SPOffset::from_u32(src_offs))?,1561scratch.writable(),1562)?;1563masm.store_ptr(1564scratch.inner(),1565masm.address_from_sp(SPOffset::from_u32(dst_offs))?,1566)?;1567}1568wasmtime_environ::error::Ok(())1569})?;1570}1571MemMoveDirection::HighToLow => {1572// Go from the end to the beginning to handle overlapping addresses.1573src_offs = src.as_u32();1574dst_offs = dst.as_u32();1575self.with_scratch::<IntScratch, _>(|masm, scratch| {1576while remaining >= word_bytes {1577masm.load_ptr(1578masm.address_from_sp(SPOffset::from_u32(src_offs))?,1579scratch.writable(),1580)?;1581masm.store_ptr(1582scratch.inner(),1583masm.address_from_sp(SPOffset::from_u32(dst_offs))?,1584)?;15851586remaining -= word_bytes;1587src_offs -= word_bytes;1588dst_offs -= word_bytes;1589}1590wasmtime_environ::error::Ok(())1591})?;1592}1593}15941595if remaining > 0 {1596let half_word = word_bytes / 2;1597let ptr_size = OperandSize::from_bytes(half_word as u8);1598debug_assert!(remaining == half_word);1599// Need to move the offsets ahead in the `LowToHigh` case to1600// compensate for the initial subtraction of `bytes`.1601if direction == MemMoveDirection::LowToHigh {1602dst_offs += half_word;1603src_offs += half_word;1604}16051606self.with_scratch::<IntScratch, _>(|masm, scratch| {1607masm.load(1608masm.address_from_sp(SPOffset::from_u32(src_offs))?,1609scratch.writable(),1610ptr_size,1611)?;1612masm.store(1613scratch.inner().into(),1614masm.address_from_sp(SPOffset::from_u32(dst_offs))?,1615ptr_size,1616)?;1617wasmtime_environ::error::Ok(())1618})?;1619}1620Ok(())1621}16221623/// Perform add operation.1624fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16251626/// Perform a checked unsigned integer addition, emitting the provided trap1627/// if the addition overflows.1628fn checked_uadd(1629&mut self,1630dst: WritableReg,1631lhs: Reg,1632rhs: RegImm,1633size: OperandSize,1634trap: TrapCode,1635) -> Result<()>;16361637/// Perform subtraction operation.1638fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16391640/// Perform multiplication operation.1641fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16421643/// Perform a floating point add operation.1644fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16451646/// Perform a floating point subtraction operation.1647fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16481649/// Perform a floating point multiply operation.1650fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16511652/// Perform a floating point divide operation.1653fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16541655/// Perform a floating point minimum operation. In x86, this will emit1656/// multiple instructions.1657fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16581659/// Perform a floating point maximum operation. In x86, this will emit1660/// multiple instructions.1661fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;16621663/// Perform a floating point copysign operation. In x86, this will emit1664/// multiple instructions.1665fn float_copysign(1666&mut self,1667dst: WritableReg,1668lhs: Reg,1669rhs: Reg,1670size: OperandSize,1671) -> Result<()>;16721673/// Perform a floating point abs operation.1674fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;16751676/// Perform a floating point negation operation.1677fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;16781679/// Perform a floating point floor operation.1680fn float_round<1681F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,1682>(1683&mut self,1684mode: RoundingMode,1685env: &mut FuncEnv<Self::Ptr>,1686context: &mut CodeGenContext<Emission>,1687size: OperandSize,1688fallback: F,1689) -> Result<()>;16901691/// Perform a floating point square root operation.1692fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;16931694/// Perform logical and operation.1695fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16961697/// Perform logical or operation.1698fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;16991700/// Perform logical exclusive or operation.1701fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;17021703/// Perform a shift operation between a register and an immediate.1704fn shift_ir(1705&mut self,1706dst: WritableReg,1707imm: Imm,1708lhs: Reg,1709kind: ShiftKind,1710size: OperandSize,1711) -> Result<()>;17121713/// Perform a shift operation between two registers.1714/// This case is special in that some architectures have specific expectations1715/// regarding the location of the instruction arguments. To free the1716/// caller from having to deal with the architecture specific constraints1717/// we give this function access to the code generation context, allowing1718/// each implementation to decide the lowering path.1719fn shift(1720&mut self,1721context: &mut CodeGenContext<Emission>,1722kind: ShiftKind,1723size: OperandSize,1724) -> Result<()>;17251726/// Perform division operation.1727/// Division is special in that some architectures have specific1728/// expectations regarding the location of the instruction1729/// arguments and regarding the location of the quotient /1730/// remainder. To free the caller from having to deal with the1731/// architecture specific constraints we give this function access1732/// to the code generation context, allowing each implementation1733/// to decide the lowering path. For cases in which division is a1734/// unconstrained binary operation, the caller can decide to use1735/// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`1736/// functions.1737fn div(1738&mut self,1739context: &mut CodeGenContext<Emission>,1740kind: DivKind,1741size: OperandSize,1742) -> Result<()>;17431744/// Calculate remainder.1745fn rem(1746&mut self,1747context: &mut CodeGenContext<Emission>,1748kind: RemKind,1749size: OperandSize,1750) -> Result<()>;17511752/// Compares `src1` against `src2` for the side effect of setting processor1753/// flags.1754///1755/// Note that `src1` is the left-hand-side of the comparison and `src2` is1756/// the right-hand-side, so if testing `a < b` then `src1 == a` and1757/// `src2 == b`1758fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;17591760/// Compare src and dst and put the result in dst.1761/// This function will potentially emit a series of instructions.1762///1763/// The initial value in `dst` is the left-hand-side of the comparison and1764/// the initial value in `src` is the right-hand-side of the comparison.1765/// That means for `a < b` then `dst == a` and `src == b`.1766fn cmp_with_set(1767&mut self,1768dst: WritableReg,1769src: RegImm,1770kind: IntCmpKind,1771size: OperandSize,1772) -> Result<()>;17731774/// Compare floats in src1 and src2 and put the result in dst.1775/// In x86, this will emit multiple instructions.1776fn float_cmp_with_set(1777&mut self,1778dst: WritableReg,1779src1: Reg,1780src2: Reg,1781kind: FloatCmpKind,1782size: OperandSize,1783) -> Result<()>;17841785/// Count the number of leading zeroes in src and put the result in dst.1786/// In x64, this will emit multiple instructions if the `has_lzcnt` flag is1787/// false.1788fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;17891790/// Count the number of trailing zeroes in src and put the result in dst.masm1791/// In x64, this will emit multiple instructions if the `has_tzcnt` flag is1792/// false.1793fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;17941795/// Push the register to the stack, returning the stack slot metadata.1796// NB1797// The stack alignment should not be assumed after any call to `push`,1798// unless explicitly aligned otherwise. Typically, stack alignment is1799// maintained at call sites and during the execution of1800// epilogues.1801fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;18021803/// Finalize the assembly and return the result.1804fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;18051806/// Zero a particular register.1807fn zero(&mut self, reg: WritableReg) -> Result<()>;18081809/// Count the number of 1 bits in src and put the result in dst. In x64,1810/// this will emit multiple instructions if the `has_popcnt` flag is false.1811fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;18121813/// Converts an i64 to an i32 by discarding the high 32 bits.1814fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;18151816/// Extends an integer of a given size to a larger size.1817fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;18181819/// Emits one or more instructions to perform a signed truncation of a1820/// float into an integer.1821fn signed_truncate(1822&mut self,1823dst: WritableReg,1824src: Reg,1825src_size: OperandSize,1826dst_size: OperandSize,1827kind: TruncKind,1828) -> Result<()>;18291830/// Emits one or more instructions to perform an unsigned truncation of a1831/// float into an integer.1832fn unsigned_truncate(1833&mut self,1834context: &mut CodeGenContext<Emission>,1835src_size: OperandSize,1836dst_size: OperandSize,1837kind: TruncKind,1838) -> Result<()>;18391840/// Emits one or more instructions to perform a signed convert of an1841/// integer into a float.1842fn signed_convert(1843&mut self,1844dst: WritableReg,1845src: Reg,1846src_size: OperandSize,1847dst_size: OperandSize,1848) -> Result<()>;18491850/// Emits one or more instructions to perform an unsigned convert of an1851/// integer into a float.1852fn unsigned_convert(1853&mut self,1854dst: WritableReg,1855src: Reg,1856tmp_gpr: Reg,1857src_size: OperandSize,1858dst_size: OperandSize,1859) -> Result<()>;18601861/// Reinterpret a float as an integer.1862fn reinterpret_float_as_int(1863&mut self,1864dst: WritableReg,1865src: Reg,1866size: OperandSize,1867) -> Result<()>;18681869/// Reinterpret an integer as a float.1870fn reinterpret_int_as_float(1871&mut self,1872dst: WritableReg,1873src: Reg,1874size: OperandSize,1875) -> Result<()>;18761877/// Demote an f64 to an f32.1878fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;18791880/// Promote an f32 to an f64.1881fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;18821883/// Zero a given memory range.1884///1885/// The default implementation divides the given memory range1886/// into word-sized slots. Then it unrolls a series of store1887/// instructions, effectively assigning zero to each slot.1888fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {1889let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;1890if mem.is_empty() {1891return Ok(());1892}18931894let start = if mem.start % word_size == 0 {1895mem.start1896} else {1897// Ensure that the start of the range is at least 4-byte aligned.1898assert!(mem.start % 4 == 0);1899let start = align_to(mem.start, word_size);1900let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;1901self.store(RegImm::i32(0), addr, OperandSize::S32)?;1902// Ensure that the new start of the range, is word-size aligned.1903assert!(start % word_size == 0);1904start1905};19061907let end = align_to(mem.end, word_size);1908let slots = (end - start) / word_size;19091910if slots == 1 {1911let slot = LocalSlot::i64(start + word_size);1912let addr: Self::Address = self.local_address(&slot)?;1913self.store(RegImm::i64(0), addr, OperandSize::S64)?;1914} else {1915// TODO1916// Add an upper bound to this generation;1917// given a considerably large amount of slots1918// this will be inefficient.1919self.with_scratch::<IntScratch, _>(|masm, scratch| {1920masm.zero(scratch.writable())?;1921let zero = RegImm::reg(scratch.inner());19221923for step in (start..end).step_by(word_size as usize) {1924let slot = LocalSlot::i64(step + word_size);1925let addr: Self::Address = masm.local_address(&slot)?;1926masm.store(zero, addr, OperandSize::S64)?;1927}1928wasmtime_environ::error::Ok(())1929})?;1930}19311932Ok(())1933}19341935/// Generate a label.1936fn get_label(&mut self) -> Result<MachLabel>;19371938/// Bind the given label at the current code offset.1939fn bind(&mut self, label: MachLabel) -> Result<()>;19401941/// Conditional branch.1942///1943/// Performs a comparison between the two operands,1944/// and immediately after emits a jump to the given1945/// label destination if the condition is met.1946fn branch(1947&mut self,1948kind: IntCmpKind,1949lhs: Reg,1950rhs: RegImm,1951taken: MachLabel,1952size: OperandSize,1953) -> Result<()>;19541955/// Emits and unconditional jump to the given label.1956fn jmp(&mut self, target: MachLabel) -> Result<()>;19571958/// Emits a jump table sequence. The default label is specified as1959/// the last element of the targets slice.1960fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;19611962/// Emit an unreachable code trap.1963fn unreachable(&mut self) -> Result<()>;19641965/// Emit an unconditional trap.1966fn trap(&mut self, code: TrapCode) -> Result<()>;19671968/// Traps if the condition code is met.1969fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;19701971/// Trap if the source register is zero.1972fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;19731974/// Ensures that the stack pointer is correctly positioned before an unconditional1975/// jump according to the requirements of the destination target.1976fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {1977let bytes = self1978.sp_offset()?1979.as_u32()1980.checked_sub(target.as_u32())1981.unwrap_or(0);19821983if bytes > 0 {1984self.free_stack(bytes)?;1985}19861987Ok(())1988}19891990/// Mark the start of a source location returning the machine code offset1991/// and the relative source code location.1992fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;19931994/// Mark the end of a source location.1995fn end_source_loc(&mut self) -> Result<()>;19961997/// The current offset, in bytes from the beginning of the function.1998fn current_code_offset(&self) -> Result<CodeOffset>;19992000/// Performs a 128-bit addition2001fn add128(2002&mut self,2003dst_lo: WritableReg,2004dst_hi: WritableReg,2005lhs_lo: Reg,2006lhs_hi: Reg,2007rhs_lo: Reg,2008rhs_hi: Reg,2009) -> Result<()>;20102011/// Performs a 128-bit subtraction2012fn sub128(2013&mut self,2014dst_lo: WritableReg,2015dst_hi: WritableReg,2016lhs_lo: Reg,2017lhs_hi: Reg,2018rhs_lo: Reg,2019rhs_hi: Reg,2020) -> Result<()>;20212022/// Performs a widening multiplication from two 64-bit operands into a2023/// 128-bit result.2024///2025/// Note that some platforms require special handling of registers in this2026/// instruction (e.g. x64) so full access to `CodeGenContext` is provided.2027fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)2028-> Result<()>;20292030/// Takes the value in a src operand and replicates it across lanes of2031/// `size` in a destination result.2032fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;20332034/// Performs a shuffle between two 128-bit vectors into a 128-bit result2035/// using lanes as a mask to select which indexes to copy.2036fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;20372038/// Performs a swizzle between two 128-bit vectors into a 128-bit result.2039fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;20402041/// Performs the RMW `op` operation on the passed `addr`.2042///2043/// The value *before* the operation was performed is written back to the `operand` register.2044fn atomic_rmw(2045&mut self,2046context: &mut CodeGenContext<Emission>,2047addr: Self::Address,2048size: OperandSize,2049op: RmwOp,2050flags: MemFlags,2051extend: Option<Extend<Zero>>,2052) -> Result<()>;20532054/// Extracts the scalar value from `src` in `lane` to `dst`.2055fn extract_lane(2056&mut self,2057src: Reg,2058dst: WritableReg,2059lane: u8,2060kind: ExtractLaneKind,2061) -> Result<()>;20622063/// Replaces the value in `lane` in `dst` with the value in `src`.2064fn replace_lane(2065&mut self,2066src: RegImm,2067dst: WritableReg,2068lane: u8,2069kind: ReplaceLaneKind,2070) -> Result<()>;20712072/// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`2073/// and `replacement` (at the top of the context's stack).2074///2075/// This method takes the `CodeGenContext` as an arguments to accommodate architectures that2076/// expect parameters in specific registers. The context stack contains the `replacement`,2077/// and `expected` values in that order. The implementer is expected to push the value at2078/// `addr` before the update to the context's stack before returning.2079fn atomic_cas(2080&mut self,2081context: &mut CodeGenContext<Emission>,2082addr: Self::Address,2083size: OperandSize,2084flags: MemFlags,2085extend: Option<Extend<Zero>>,2086) -> Result<()>;20872088/// Compares vector registers `lhs` and `rhs` for equality and puts the2089/// vector of results in `dst`.2090fn v128_eq(2091&mut self,2092dst: WritableReg,2093lhs: Reg,2094rhs: Reg,2095kind: VectorEqualityKind,2096) -> Result<()>;20972098/// Compares vector registers `lhs` and `rhs` for inequality and puts the2099/// vector of results in `dst`.2100fn v128_ne(2101&mut self,2102dst: WritableReg,2103lhs: Reg,2104rhs: Reg,2105kind: VectorEqualityKind,2106) -> Result<()>;21072108/// Performs a less than comparison with vector registers `lhs` and `rhs`2109/// and puts the vector of results in `dst`.2110fn v128_lt(2111&mut self,2112dst: WritableReg,2113lhs: Reg,2114rhs: Reg,2115kind: VectorCompareKind,2116) -> Result<()>;21172118/// Performs a less than or equal comparison with vector registers `lhs`2119/// and `rhs` and puts the vector of results in `dst`.2120fn v128_le(2121&mut self,2122dst: WritableReg,2123lhs: Reg,2124rhs: Reg,2125kind: VectorCompareKind,2126) -> Result<()>;21272128/// Performs a greater than comparison with vector registers `lhs` and2129/// `rhs` and puts the vector of results in `dst`.2130fn v128_gt(2131&mut self,2132dst: WritableReg,2133lhs: Reg,2134rhs: Reg,2135kind: VectorCompareKind,2136) -> Result<()>;21372138/// Performs a greater than or equal comparison with vector registers `lhs`2139/// and `rhs` and puts the vector of results in `dst`.2140fn v128_ge(2141&mut self,2142dst: WritableReg,2143lhs: Reg,2144rhs: Reg,2145kind: VectorCompareKind,2146) -> Result<()>;21472148/// Emit a memory fence.2149fn fence(&mut self) -> Result<()>;21502151/// Perform a logical `not` operation on the 128bits vector value in `dst`.2152fn v128_not(&mut self, dst: WritableReg) -> Result<()>;21532154/// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing2155/// the result to `dst`.2156fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;21572158/// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing2159/// the result to `dst`.2160///2161/// `and_not` is not commutative: dst = !src1 & src2.2162fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;21632164/// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing2165/// the result to `dst`.2166fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;21672168/// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing2169/// the result to `dst`.2170fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;21712172/// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits2173/// from `src1` when mask is 1, and from `src2` when mask is 0.2174///2175/// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.2176fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;21772178/// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.2179fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;21802181/// Convert vector of integers to vector of floating points.2182fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;21832184/// Convert two input vectors into a smaller lane vector by narrowing each2185/// lane.2186fn v128_narrow(2187&mut self,2188src1: Reg,2189src2: Reg,2190dst: WritableReg,2191kind: V128NarrowKind,2192) -> Result<()>;21932194/// Converts a vector containing two 64-bit floating point lanes to two2195/// 32-bit floating point lanes and setting the two higher lanes to 0.2196fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;21972198/// Converts a vector containing four 32-bit floating point lanes to two2199/// 64-bit floating point lanes. Only the two lower lanes are converted.2200fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;22012202/// Converts low or high half of the smaller lane vector to a larger lane2203/// vector.2204fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;22052206/// Perform a vector add between `lsh` and `rhs`, placing the result in2207/// `dst`.2208fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;22092210/// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.2211fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;22122213/// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.2214fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)2215-> Result<()>;22162217/// Perform an absolute operation on a vector.2218fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;22192220/// Vectorized negate of the content of `op`.2221fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;22222223/// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit2224/// integer at the top of the stack, on the 128-bit vector specified by the second value2225/// from the top of the stack, interpreted as packed integers of size `lane_width`.2226///2227/// The shift amount is taken modulo `lane_width`.2228fn v128_shift(2229&mut self,2230context: &mut CodeGenContext<Emission>,2231lane_width: OperandSize,2232kind: ShiftKind,2233) -> Result<()>;22342235/// Perform a saturating integer q-format rounding multiplication.2236fn v128_q15mulr_sat_s(2237&mut self,2238lhs: Reg,2239rhs: Reg,2240dst: WritableReg,2241size: OperandSize,2242) -> Result<()>;22432244/// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 02245/// otherwise.2246fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;22472248/// Extracts the high bit of each lane in `src` and produces a scalar mask2249/// with all bits concatenated in `dst`.2250fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;22512252/// Lanewise truncation operation.2253///2254/// If using an integer kind of truncation, then this performs a lane-wise2255/// saturating conversion from float to integer using the IEEE2256/// `convertToIntegerTowardZero` function. If any input lane is NaN, the2257/// resulting lane is 0. If the rounded integer value of a lane is outside2258/// the range of the destination type, the result is saturated to the2259/// nearest representable integer value.2260fn v128_trunc(2261&mut self,2262context: &mut CodeGenContext<Emission>,2263kind: V128TruncKind,2264) -> Result<()>;22652266/// Perform a lane-wise `min` operation between `src1` and `src2`.2267fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)2268-> Result<()>;22692270/// Perform a lane-wise `max` operation between `src1` and `src2`.2271fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)2272-> Result<()>;22732274/// Perform the lane-wise integer extended multiplication producing twice wider result than the2275/// inputs. This is equivalent to an extend followed by a multiply.2276///2277/// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,2278/// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower2279/// 8bits of the 16bits lanes.2280fn v128_extmul(2281&mut self,2282context: &mut CodeGenContext<Emission>,2283kind: V128ExtMulKind,2284) -> Result<()>;22852286/// Perform the lane-wise integer extended pairwise addition producing extended results (twice2287/// wider results than the inputs).2288fn v128_extadd_pairwise(2289&mut self,2290src: Reg,2291dst: WritableReg,2292kind: V128ExtAddKind,2293) -> Result<()>;22942295/// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add2296/// adjacent pairs of the 32-bit results.2297fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;22982299/// Count the number of bits set in each lane.2300fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;23012302/// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`2303/// and put the results in `dst`.2304fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23052306/// Lane-wise IEEE division on vectors of floats.2307fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23082309/// Lane-wise IEEE square root of vector of floats.2310fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23112312/// Lane-wise ceiling of vector of floats.2313fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23142315/// Lane-wise flooring of vector of floats.2316fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23172318/// Lane-wise rounding to nearest integer for vector of floats.2319fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23202321/// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.2322fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;23232324/// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.2325fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;2326}232723282329