CoCalc -- masm.rs

GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/winch/codegen/src/masm.rs
¹⁶⁹² views
1
use crate::abi::{self, LocalSlot, align_to};
2
use crate::codegen::{CodeGenContext, Emission, FuncEnv};
3
use crate::isa::{
4
    CallingConvention,
5
    reg::{Reg, RegClass, WritableReg, writable},
6
};
7
use anyhow::Result;
8
use cranelift_codegen::{
9
    Final, MachBufferFinalized, MachLabel,
10
    binemit::CodeOffset,
11
    ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},
12
};
13
use std::{fmt::Debug, ops::Range};
14
use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType};
15

16
pub(crate) use cranelift_codegen::ir::TrapCode;
17

18
#[derive(Eq, PartialEq)]
19
pub(crate) enum DivKind {
20
    /// Signed division.
21
    Signed,
22
    /// Unsigned division.
23
    Unsigned,
24
}
25

26
/// Represents the `memory.atomic.wait*` kind.
27
#[derive(Debug, Clone, Copy)]
28
pub(crate) enum AtomicWaitKind {
29
    Wait32,
30
    Wait64,
31
}
32

33
/// Remainder kind.
34
#[derive(Copy, Clone)]
35
pub(crate) enum RemKind {
36
    /// Signed remainder.
37
    Signed,
38
    /// Unsigned remainder.
39
    Unsigned,
40
}
41

42
impl RemKind {
43
    pub fn is_signed(&self) -> bool {
44
        matches!(self, Self::Signed)
45
    }
46
}
47

48
/// Kinds of vector min operation supported by WebAssembly.
49
pub(crate) enum V128MinKind {
50
    /// 4 lanes of 32-bit floats.
51
    F32x4,
52
    /// 2 lanes of 64-bit floats.
53
    F64x2,
54
    /// 16 lanes of signed 8-bit integers.
55
    I8x16S,
56
    /// 16 lanes of unsigned 8-bit integers.
57
    I8x16U,
58
    /// 8 lanes of signed 16-bit integers.
59
    I16x8S,
60
    /// 8 lanes of unsigned 16-bit integers.
61
    I16x8U,
62
    /// 4 lanes of signed 32-bit integers.
63
    I32x4S,
64
    /// 4 lanes of unsigned 32-bit integers.
65
    I32x4U,
66
}
67

68
impl V128MinKind {
69
    /// The size of each lane.
70
    pub(crate) fn lane_size(&self) -> OperandSize {
71
        match self {
72
            Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
73
            Self::F64x2 => OperandSize::S64,
74
            Self::I8x16S | Self::I8x16U => OperandSize::S8,
75
            Self::I16x8S | Self::I16x8U => OperandSize::S16,
76
        }
77
    }
78
}
79

80
/// Kinds of vector max operation supported by WebAssembly.
81
pub(crate) enum V128MaxKind {
82
    /// 4 lanes of 32-bit floats.
83
    F32x4,
84
    /// 2 lanes of 64-bit floats.
85
    F64x2,
86
    /// 16 lanes of signed 8-bit integers.
87
    I8x16S,
88
    /// 16 lanes of unsigned 8-bit integers.
89
    I8x16U,
90
    /// 8 lanes of signed 16-bit integers.
91
    I16x8S,
92
    /// 8 lanes of unsigned 16-bit integers.
93
    I16x8U,
94
    /// 4 lanes of signed 32-bit integers.
95
    I32x4S,
96
    /// 4 lanes of unsigned 32-bit integers.
97
    I32x4U,
98
}
99

100
impl V128MaxKind {
101
    /// The size of each lane.
102
    pub(crate) fn lane_size(&self) -> OperandSize {
103
        match self {
104
            Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
105
            Self::F64x2 => OperandSize::S64,
106
            Self::I8x16S | Self::I8x16U => OperandSize::S8,
107
            Self::I16x8S | Self::I16x8U => OperandSize::S16,
108
        }
109
    }
110
}
111

112
#[derive(Eq, PartialEq)]
113
pub(crate) enum MulWideKind {
114
    Signed,
115
    Unsigned,
116
}
117

118
/// Type of operation for a read-modify-write instruction.
119
pub(crate) enum RmwOp {
120
    Add,
121
    Sub,
122
    Xchg,
123
    And,
124
    Or,
125
    Xor,
126
}
127

128
/// The direction to perform the memory move.
129
#[derive(Debug, Clone, Eq, PartialEq)]
130
pub(crate) enum MemMoveDirection {
131
    /// From high memory addresses to low memory addresses.
132
    /// Invariant: the source location is closer to the FP than the destination
133
    /// location, which will be closer to the SP.
134
    HighToLow,
135
    /// From low memory addresses to high memory addresses.
136
    /// Invariant: the source location is closer to the SP than the destination
137
    /// location, which will be closer to the FP.
138
    LowToHigh,
139
}
140

141
/// Classifies how to treat float-to-int conversions.
142
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
143
pub(crate) enum TruncKind {
144
    /// Saturating conversion. If the source value is greater than the maximum
145
    /// value of the destination type, the result is clamped to the
146
    /// destination maximum value.
147
    Checked,
148
    /// An exception is raised if the source value is greater than the maximum
149
    /// value of the destination type.
150
    Unchecked,
151
}
152

153
impl TruncKind {
154
    /// Returns true if the truncation kind is checked.
155
    pub(crate) fn is_checked(&self) -> bool {
156
        *self == TruncKind::Checked
157
    }
158

159
    /// Returns `true` if the trunc kind is [`Unchecked`].
160
    ///
161
    /// [`Unchecked`]: TruncKind::Unchecked
162
    #[must_use]
163
    pub(crate) fn is_unchecked(&self) -> bool {
164
        matches!(self, Self::Unchecked)
165
    }
166
}
167

168
/// Representation of the stack pointer offset.
169
#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]
170
pub struct SPOffset(u32);
171

172
impl SPOffset {
173
    pub fn from_u32(offs: u32) -> Self {
174
        Self(offs)
175
    }
176

177
    pub fn as_u32(&self) -> u32 {
178
        self.0
179
    }
180
}
181

182
/// A stack slot.
183
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
184
pub struct StackSlot {
185
    /// The location of the slot, relative to the stack pointer.
186
    pub offset: SPOffset,
187
    /// The size of the slot, in bytes.
188
    pub size: u32,
189
}
190

191
impl StackSlot {
192
    pub fn new(offs: SPOffset, size: u32) -> Self {
193
        Self { offset: offs, size }
194
    }
195
}
196

197
pub trait ScratchType {
198
    /// Derive the register class from the scratch register type.
199
    fn reg_class() -> RegClass;
200
}
201

202
/// A scratch register type of integer class.
203
pub struct IntScratch;
204
/// A scratch register type of floating point class.
205
pub struct FloatScratch;
206

207
impl ScratchType for IntScratch {
208
    fn reg_class() -> RegClass {
209
        RegClass::Int
210
    }
211
}
212

213
impl ScratchType for FloatScratch {
214
    fn reg_class() -> RegClass {
215
        RegClass::Float
216
    }
217
}
218

219
/// A scratch register scope.
220
pub struct Scratch(Reg);
221

222
impl Scratch {
223
    pub fn new(r: Reg) -> Self {
224
        Self(r)
225
    }
226

227
    #[inline]
228
    pub fn inner(&self) -> Reg {
229
        self.0
230
    }
231

232
    #[inline]
233
    pub fn writable(&self) -> WritableReg {
234
        writable!(self.0)
235
    }
236
}
237

238
/// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]
239
/// implementation for each ISA is responsible for emitting the correct
240
/// sequence of instructions when lowering to machine code.
241
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
242
pub(crate) enum IntCmpKind {
243
    /// Equal.
244
    Eq,
245
    /// Not equal.
246
    Ne,
247
    /// Signed less than.
248
    LtS,
249
    /// Unsigned less than.
250
    LtU,
251
    /// Signed greater than.
252
    GtS,
253
    /// Unsigned greater than.
254
    GtU,
255
    /// Signed less than or equal.
256
    LeS,
257
    /// Unsigned less than or equal.
258
    LeU,
259
    /// Signed greater than or equal.
260
    GeS,
261
    /// Unsigned greater than or equal.
262
    GeU,
263
}
264

265
/// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]
266
/// implementation for each ISA is responsible for emitting the correct
267
/// sequence of instructions when lowering code.
268
#[derive(Debug)]
269
pub(crate) enum FloatCmpKind {
270
    /// Equal.
271
    Eq,
272
    /// Not equal.
273
    Ne,
274
    /// Less than.
275
    Lt,
276
    /// Greater than.
277
    Gt,
278
    /// Less than or equal.
279
    Le,
280
    /// Greater than or equal.
281
    Ge,
282
}
283

284
/// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is
285
/// responsible for emitting the correct sequence of instructions when
286
/// lowering to machine code.
287
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
288
pub(crate) enum ShiftKind {
289
    /// Left shift.
290
    Shl,
291
    /// Signed right shift.
292
    ShrS,
293
    /// Unsigned right shift.
294
    ShrU,
295
    /// Left rotate.
296
    Rotl,
297
    /// Right rotate.
298
    Rotr,
299
}
300

301
/// Kinds of extends in WebAssembly. Each MacroAssembler implementation
302
/// is responsible for emitting the correct sequence of instructions when
303
/// lowering to machine code.
304
#[derive(Copy, Clone)]
305
pub(crate) enum ExtendKind {
306
    Signed(Extend<Signed>),
307
    Unsigned(Extend<Zero>),
308
}
309

310
#[derive(Copy, Clone)]
311
pub(crate) enum Signed {}
312
#[derive(Copy, Clone)]
313
pub(crate) enum Zero {}
314

315
pub(crate) trait ExtendType {}
316

317
impl ExtendType for Signed {}
318
impl ExtendType for Zero {}
319

320
#[derive(Copy, Clone)]
321
pub(crate) enum Extend<T: ExtendType> {
322
    /// 8 to 32 bit extend.
323
    I32Extend8,
324
    /// 16 to 32 bit extend.
325
    I32Extend16,
326
    /// 8 to 64 bit extend.
327
    I64Extend8,
328
    /// 16 to 64 bit extend.
329
    I64Extend16,
330
    /// 32 to 64 bit extend.
331
    I64Extend32,
332

333
    /// Variant to hold the kind of extend marker.
334
    ///
335
    /// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be
336
    /// constructed.
337
    __Kind(T),
338
}
339

340
impl From<Extend<Zero>> for ExtendKind {
341
    fn from(value: Extend<Zero>) -> Self {
342
        ExtendKind::Unsigned(value)
343
    }
344
}
345

346
impl<T: ExtendType> Extend<T> {
347
    pub fn from_size(&self) -> OperandSize {
348
        match self {
349
            Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,
350
            Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,
351
            Extend::I64Extend32 => OperandSize::S32,
352
            Extend::__Kind(_) => unreachable!(),
353
        }
354
    }
355

356
    pub fn to_size(&self) -> OperandSize {
357
        match self {
358
            Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,
359
            Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,
360
            Extend::__Kind(_) => unreachable!(),
361
        }
362
    }
363

364
    pub fn from_bits(&self) -> u8 {
365
        self.from_size().num_bits()
366
    }
367

368
    pub fn to_bits(&self) -> u8 {
369
        self.to_size().num_bits()
370
    }
371
}
372

373
impl From<Extend<Signed>> for ExtendKind {
374
    fn from(value: Extend<Signed>) -> Self {
375
        ExtendKind::Signed(value)
376
    }
377
}
378

379
impl ExtendKind {
380
    pub fn signed(&self) -> bool {
381
        match self {
382
            Self::Signed(_) => true,
383
            _ => false,
384
        }
385
    }
386

387
    pub fn from_bits(&self) -> u8 {
388
        match self {
389
            Self::Signed(s) => s.from_bits(),
390
            Self::Unsigned(u) => u.from_bits(),
391
        }
392
    }
393

394
    pub fn to_bits(&self) -> u8 {
395
        match self {
396
            Self::Signed(s) => s.to_bits(),
397
            Self::Unsigned(u) => u.to_bits(),
398
        }
399
    }
400
}
401

402
/// Kinds of vector load and extends in WebAssembly. Each MacroAssembler
403
/// implementation is responsible for emitting the correct sequence of
404
/// instructions when lowering to machine code.
405
#[derive(Copy, Clone)]
406
pub(crate) enum V128LoadExtendKind {
407
    /// Sign extends eight 8 bit integers to eight 16 bit lanes.
408
    E8x8S,
409
    /// Zero extends eight 8 bit integers to eight 16 bit lanes.
410
    E8x8U,
411
    /// Sign extends four 16 bit integers to four 32 bit lanes.
412
    E16x4S,
413
    /// Zero extends four 16 bit integers to four 32 bit lanes.
414
    E16x4U,
415
    /// Sign extends two 32 bit integers to two 64 bit lanes.
416
    E32x2S,
417
    /// Zero extends two 32 bit integers to two 64 bit lanes.
418
    E32x2U,
419
}
420

421
/// Kinds of splat loads supported by WebAssembly.
422
pub(crate) enum SplatLoadKind {
423
    /// 8 bits.
424
    S8,
425
    /// 16 bits.
426
    S16,
427
    /// 32 bits.
428
    S32,
429
    /// 64 bits.
430
    S64,
431
}
432

433
/// Kinds of splat supported by WebAssembly.
434
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
435
pub(crate) enum SplatKind {
436
    /// 8 bit integer.
437
    I8x16,
438
    /// 16 bit integer.
439
    I16x8,
440
    /// 32 bit integer.
441
    I32x4,
442
    /// 64 bit integer.
443
    I64x2,
444
    /// 32 bit float.
445
    F32x4,
446
    /// 64 bit float.
447
    F64x2,
448
}
449

450
impl SplatKind {
451
    /// The lane size to use for different kinds of splats.
452
    pub(crate) fn lane_size(&self) -> OperandSize {
453
        match self {
454
            SplatKind::I8x16 => OperandSize::S8,
455
            SplatKind::I16x8 => OperandSize::S16,
456
            SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,
457
            SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,
458
        }
459
    }
460
}
461

462
/// Kinds of extract lane supported by WebAssembly.
463
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
464
pub(crate) enum ExtractLaneKind {
465
    /// 16 lanes of 8-bit integers sign extended to 32-bits.
466
    I8x16S,
467
    /// 16 lanes of 8-bit integers zero extended to 32-bits.
468
    I8x16U,
469
    /// 8 lanes of 16-bit integers sign extended to 32-bits.
470
    I16x8S,
471
    /// 8 lanes of 16-bit integers zero extended to 32-bits.
472
    I16x8U,
473
    /// 4 lanes of 32-bit integers.
474
    I32x4,
475
    /// 2 lanes of 64-bit integers.
476
    I64x2,
477
    /// 4 lanes of 32-bit floats.
478
    F32x4,
479
    /// 2 lanes of 64-bit floats.
480
    F64x2,
481
}
482

483
impl ExtractLaneKind {
484
    /// The lane size to use for different kinds of extract lane kinds.
485
    pub(crate) fn lane_size(&self) -> OperandSize {
486
        match self {
487
            ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,
488
            ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,
489
            ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,
490
            ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,
491
        }
492
    }
493
}
494

495
impl From<ExtractLaneKind> for Extend<Signed> {
496
    fn from(value: ExtractLaneKind) -> Self {
497
        match value {
498
            ExtractLaneKind::I8x16S => Extend::I32Extend8,
499
            ExtractLaneKind::I16x8S => Extend::I32Extend16,
500
            _ => unimplemented!(),
501
        }
502
    }
503
}
504

505
/// Kinds of replace lane supported by WebAssembly.
506
pub(crate) enum ReplaceLaneKind {
507
    /// 16 lanes of 8 bit integers.
508
    I8x16,
509
    /// 8 lanes of 16 bit integers.
510
    I16x8,
511
    /// 4 lanes of 32 bit integers.
512
    I32x4,
513
    /// 2 lanes of 64 bit integers.
514
    I64x2,
515
    /// 4 lanes of 32 bit floats.
516
    F32x4,
517
    /// 2 lanes of 64 bit floats.
518
    F64x2,
519
}
520

521
impl ReplaceLaneKind {
522
    /// The lane size to use for different kinds of replace lane kinds.
523
    pub(crate) fn lane_size(&self) -> OperandSize {
524
        match self {
525
            ReplaceLaneKind::I8x16 => OperandSize::S8,
526
            ReplaceLaneKind::I16x8 => OperandSize::S16,
527
            ReplaceLaneKind::I32x4 => OperandSize::S32,
528
            ReplaceLaneKind::I64x2 => OperandSize::S64,
529
            ReplaceLaneKind::F32x4 => OperandSize::S32,
530
            ReplaceLaneKind::F64x2 => OperandSize::S64,
531
        }
532
    }
533
}
534

535
/// Kinds of behavior supported by Wasm loads.
536
pub(crate) enum LoadKind {
537
    /// Load the entire bytes of the operand size without any modifications.
538
    Operand(OperandSize),
539
    /// Atomic load, with optional scalar extend.
540
    Atomic(OperandSize, Option<ExtendKind>),
541
    /// Duplicate value into vector lanes.
542
    Splat(SplatLoadKind),
543
    /// Scalar (non-vector) extend.
544
    ScalarExtend(ExtendKind),
545
    /// Vector extend.
546
    VectorExtend(V128LoadExtendKind),
547
    /// Load content into select lane.
548
    VectorLane(LaneSelector),
549
    /// Load a single element into the lowest bits of a vector and initialize
550
    /// all other bits to zero.
551
    VectorZero(OperandSize),
552
}
553

554
impl LoadKind {
555
    /// Returns the [`OperandSize`] used in the load operation.
556
    pub(crate) fn derive_operand_size(&self) -> OperandSize {
557
        match self {
558
            Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {
559
                Self::operand_size_for_scalar(extend)
560
            }
561
            Self::VectorExtend(_) => OperandSize::S64,
562
            Self::Splat(kind) => Self::operand_size_for_splat(kind),
563
            Self::Operand(size)
564
            | Self::Atomic(size, None)
565
            | Self::VectorLane(LaneSelector { size, .. })
566
            | Self::VectorZero(size) => *size,
567
        }
568
    }
569

570
    pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
571
        Self::VectorLane(LaneSelector { lane, size })
572
    }
573

574
    fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {
575
        match extend_kind {
576
            ExtendKind::Signed(s) => s.from_size(),
577
            ExtendKind::Unsigned(u) => u.from_size(),
578
        }
579
    }
580

581
    fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {
582
        match kind {
583
            SplatLoadKind::S8 => OperandSize::S8,
584
            SplatLoadKind::S16 => OperandSize::S16,
585
            SplatLoadKind::S32 => OperandSize::S32,
586
            SplatLoadKind::S64 => OperandSize::S64,
587
        }
588
    }
589

590
    pub(crate) fn is_atomic(&self) -> bool {
591
        matches!(self, Self::Atomic(_, _))
592
    }
593
}
594

595
/// Kinds of behavior supported by Wasm loads.
596
#[derive(Copy, Clone)]
597
pub enum StoreKind {
598
    /// Store the entire bytes of the operand size without any modifications.
599
    Operand(OperandSize),
600
    /// Store the entire bytes of the operand size without any modifications, atomically.
601
    Atomic(OperandSize),
602
    /// Store the content of selected lane.
603
    VectorLane(LaneSelector),
604
}
605

606
impl StoreKind {
607
    pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
608
        Self::VectorLane(LaneSelector { lane, size })
609
    }
610
}
611

612
#[derive(Copy, Clone)]
613
pub struct LaneSelector {
614
    pub lane: u8,
615
    pub size: OperandSize,
616
}
617

618
/// Types of vector integer to float conversions supported by WebAssembly.
619
pub(crate) enum V128ConvertKind {
620
    /// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.
621
    I32x4S,
622
    /// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.
623
    I32x4U,
624
    /// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit
625
    /// floats.
626
    I32x4LowS,
627
    /// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit
628
    /// floats.
629
    I32x4LowU,
630
}
631

632
impl V128ConvertKind {
633
    pub(crate) fn src_lane_size(&self) -> OperandSize {
634
        match self {
635
            V128ConvertKind::I32x4S
636
            | V128ConvertKind::I32x4U
637
            | V128ConvertKind::I32x4LowS
638
            | V128ConvertKind::I32x4LowU => OperandSize::S32,
639
        }
640
    }
641

642
    pub(crate) fn dst_lane_size(&self) -> OperandSize {
643
        match self {
644
            V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,
645
            V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,
646
        }
647
    }
648
}
649

650
/// Kinds of vector narrowing operations supported by WebAssembly.
651
pub(crate) enum V128NarrowKind {
652
    /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
653
    /// signed saturation.
654
    I16x8S,
655
    /// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
656
    /// unsigned saturation.
657
    I16x8U,
658
    /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
659
    /// signed saturation.
660
    I32x4S,
661
    /// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
662
    /// unsigned saturation.
663
    I32x4U,
664
}
665

666
impl V128NarrowKind {
667
    /// Return the size of the destination lanes.
668
    pub(crate) fn dst_lane_size(&self) -> OperandSize {
669
        match self {
670
            Self::I16x8S | Self::I16x8U => OperandSize::S8,
671
            Self::I32x4S | Self::I32x4U => OperandSize::S16,
672
        }
673
    }
674
}
675

676
/// Kinds of vector extending operations supported by WebAssembly.
677
#[derive(Debug, Copy, Clone)]
678
pub(crate) enum V128ExtendKind {
679
    /// Low half of i8x16 sign extended.
680
    LowI8x16S,
681
    /// High half of i8x16 sign extended.
682
    HighI8x16S,
683
    /// Low half of i8x16 zero extended.
684
    LowI8x16U,
685
    /// High half of i8x16 zero extended.
686
    HighI8x16U,
687
    /// Low half of i16x8 sign extended.
688
    LowI16x8S,
689
    /// High half of i16x8 sign extended.
690
    HighI16x8S,
691
    /// Low half of i16x8 zero extended.
692
    LowI16x8U,
693
    /// High half of i16x8 zero extended.
694
    HighI16x8U,
695
    /// Low half of i32x4 sign extended.
696
    LowI32x4S,
697
    /// High half of i32x4 sign extended.
698
    HighI32x4S,
699
    /// Low half of i32x4 zero extended.
700
    LowI32x4U,
701
    /// High half of i32x4 zero extended.
702
    HighI32x4U,
703
}
704

705
impl V128ExtendKind {
706
    /// The size of the source's lanes.
707
    pub(crate) fn src_lane_size(&self) -> OperandSize {
708
        match self {
709
            Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {
710
                OperandSize::S8
711
            }
712
            Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {
713
                OperandSize::S16
714
            }
715
            Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {
716
                OperandSize::S32
717
            }
718
        }
719
    }
720
}
721

722
/// Kinds of vector equalities and non-equalities supported by WebAssembly.
723
pub(crate) enum VectorEqualityKind {
724
    /// 16 lanes of 8 bit integers.
725
    I8x16,
726
    /// 8 lanes of 16 bit integers.
727
    I16x8,
728
    /// 4 lanes of 32 bit integers.
729
    I32x4,
730
    /// 2 lanes of 64 bit integers.
731
    I64x2,
732
    /// 4 lanes of 32 bit floats.
733
    F32x4,
734
    /// 2 lanes of 64 bit floats.
735
    F64x2,
736
}
737

738
impl VectorEqualityKind {
739
    /// Get the lane size to use.
740
    pub(crate) fn lane_size(&self) -> OperandSize {
741
        match self {
742
            Self::I8x16 => OperandSize::S8,
743
            Self::I16x8 => OperandSize::S16,
744
            Self::I32x4 | Self::F32x4 => OperandSize::S32,
745
            Self::I64x2 | Self::F64x2 => OperandSize::S64,
746
        }
747
    }
748
}
749

750
/// Kinds of vector comparisons supported by WebAssembly.
751
pub(crate) enum VectorCompareKind {
752
    /// 16 lanes of signed 8 bit integers.
753
    I8x16S,
754
    /// 16 lanes of unsigned 8 bit integers.
755
    I8x16U,
756
    /// 8 lanes of signed 16 bit integers.
757
    I16x8S,
758
    /// 8 lanes of unsigned 16 bit integers.
759
    I16x8U,
760
    /// 4 lanes of signed 32 bit integers.
761
    I32x4S,
762
    /// 4 lanes of unsigned 32 bit integers.
763
    I32x4U,
764
    /// 2 lanes of signed 64 bit integers.
765
    I64x2S,
766
    /// 4 lanes of 32 bit floats.
767
    F32x4,
768
    /// 2 lanes of 64 bit floats.
769
    F64x2,
770
}
771

772
impl VectorCompareKind {
773
    /// Get the lane size to use.
774
    pub(crate) fn lane_size(&self) -> OperandSize {
775
        match self {
776
            Self::I8x16S | Self::I8x16U => OperandSize::S8,
777
            Self::I16x8S | Self::I16x8U => OperandSize::S16,
778
            Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,
779
            Self::I64x2S | Self::F64x2 => OperandSize::S64,
780
        }
781
    }
782
}
783

784
/// Kinds of vector absolute operations supported by WebAssembly.
785
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
786
pub(crate) enum V128AbsKind {
787
    /// 8 bit integers.
788
    I8x16,
789
    /// 16 bit integers.
790
    I16x8,
791
    /// 32 bit integers.
792
    I32x4,
793
    /// 64 bit integers.
794
    I64x2,
795
    /// 32 bit floats.
796
    F32x4,
797
    /// 64 bit floats.
798
    F64x2,
799
}
800

801
impl V128AbsKind {
802
    /// The lane size to use.
803
    pub(crate) fn lane_size(&self) -> OperandSize {
804
        match self {
805
            Self::I8x16 => OperandSize::S8,
806
            Self::I16x8 => OperandSize::S16,
807
            Self::I32x4 | Self::F32x4 => OperandSize::S32,
808
            Self::I64x2 | Self::F64x2 => OperandSize::S64,
809
        }
810
    }
811
}
812

813
/// Kinds of truncation for vectors supported by WebAssembly.
814
pub(crate) enum V128TruncKind {
815
    /// Truncates 4 lanes of 32-bit floats to nearest integral value.
816
    F32x4,
817
    /// Truncates 2 lanes of 64-bit floats to nearest integral value.
818
    F64x2,
819
    /// Integers from signed F32x4.
820
    I32x4FromF32x4S,
821
    /// Integers from unsigned F32x4.
822
    I32x4FromF32x4U,
823
    /// Integers from signed F64x2.
824
    I32x4FromF64x2SZero,
825
    /// Integers from unsigned F64x2.
826
    I32x4FromF64x2UZero,
827
}
828

829
impl V128TruncKind {
830
    /// The size of the source lanes.
831
    pub(crate) fn src_lane_size(&self) -> OperandSize {
832
        match self {
833
            V128TruncKind::F32x4
834
            | V128TruncKind::I32x4FromF32x4S
835
            | V128TruncKind::I32x4FromF32x4U => OperandSize::S32,
836
            V128TruncKind::F64x2
837
            | V128TruncKind::I32x4FromF64x2SZero
838
            | V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,
839
        }
840
    }
841

842
    /// The size of the destination lanes.
843
    pub(crate) fn dst_lane_size(&self) -> OperandSize {
844
        if let V128TruncKind::F64x2 = self {
845
            OperandSize::S64
846
        } else {
847
            OperandSize::S32
848
        }
849
    }
850
}
851

852
/// Kinds of vector addition supported by WebAssembly.
853
pub(crate) enum V128AddKind {
854
    /// 4 lanes of 32-bit floats wrapping.
855
    F32x4,
856
    /// 2 lanes of 64-bit floats wrapping.
857
    F64x2,
858
    /// 16 lanes of 8-bit integers wrapping.
859
    I8x16,
860
    /// 16 lanes of 8-bit integers signed saturating.
861
    I8x16SatS,
862
    /// 16 lanes of 8-bit integers unsigned saturating.
863
    I8x16SatU,
864
    /// 8 lanes of 16-bit integers wrapping.
865
    I16x8,
866
    /// 8 lanes of 16-bit integers signed saturating.
867
    I16x8SatS,
868
    /// 8 lanes of 16-bit integers unsigned saturating.
869
    I16x8SatU,
870
    /// 4 lanes of 32-bit integers wrapping.
871
    I32x4,
872
    /// 2 lanes of 64-bit integers wrapping.
873
    I64x2,
874
}
875

876
/// Kinds of vector subtraction supported by WebAssembly.
877
pub(crate) enum V128SubKind {
878
    /// 4 lanes of 32-bit floats wrapping.
879
    F32x4,
880
    /// 2 lanes of 64-bit floats wrapping.
881
    F64x2,
882
    /// 16 lanes of 8-bit integers wrapping.
883
    I8x16,
884
    /// 16 lanes of 8-bit integers signed saturating.
885
    I8x16SatS,
886
    /// 16 lanes of 8-bit integers unsigned saturating.
887
    I8x16SatU,
888
    /// 8 lanes of 16-bit integers wrapping.
889
    I16x8,
890
    /// 8 lanes of 16-bit integers signed saturating.
891
    I16x8SatS,
892
    /// 8 lanes of 16-bit integers unsigned saturating.
893
    I16x8SatU,
894
    /// 4 lanes of 32-bit integers wrapping.
895
    I32x4,
896
    /// 2 lanes of 64-bit integers wrapping.
897
    I64x2,
898
}
899

900
impl From<V128NegKind> for V128SubKind {
901
    fn from(value: V128NegKind) -> Self {
902
        match value {
903
            V128NegKind::I8x16 => Self::I8x16,
904
            V128NegKind::I16x8 => Self::I16x8,
905
            V128NegKind::I32x4 => Self::I32x4,
906
            V128NegKind::I64x2 => Self::I64x2,
907
            V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),
908
        }
909
    }
910
}
911

912
/// Kinds of vector multiplication supported by WebAssembly.
913
pub(crate) enum V128MulKind {
914
    /// 4 lanes of 32-bit floats.
915
    F32x4,
916
    /// 2 lanes of 64-bit floats.
917
    F64x2,
918
    /// 8 lanes of 16-bit integers.
919
    I16x8,
920
    /// 4 lanes of 32-bit integers.
921
    I32x4,
922
    /// 2 lanes of 64-bit integers.
923
    I64x2,
924
}
925

926
/// Kinds of vector negation supported by WebAssembly.
927
#[derive(Copy, Clone)]
928
pub(crate) enum V128NegKind {
929
    /// 4 lanes of 32-bit floats.
930
    F32x4,
931
    /// 2 lanes of 64-bit floats.
932
    F64x2,
933
    /// 16 lanes of 8-bit integers.
934
    I8x16,
935
    /// 8 lanes of 16-bit integers.
936
    I16x8,
937
    /// 4 lanes of 32-bit integers.
938
    I32x4,
939
    /// 2 lanes of 64-bit integers.
940
    I64x2,
941
}
942

943
impl V128NegKind {
944
    /// The size of the lanes.
945
    pub(crate) fn lane_size(&self) -> OperandSize {
946
        match self {
947
            Self::F32x4 | Self::I32x4 => OperandSize::S32,
948
            Self::F64x2 | Self::I64x2 => OperandSize::S64,
949
            Self::I8x16 => OperandSize::S8,
950
            Self::I16x8 => OperandSize::S16,
951
        }
952
    }
953
}
954

955
/// Kinds of extended pairwise addition supported by WebAssembly.
956
pub(crate) enum V128ExtAddKind {
957
    /// 16 lanes of signed 8-bit integers.
958
    I8x16S,
959
    /// 16 lanes of unsigned 8-bit integers.
960
    I8x16U,
961
    /// 8 lanes of signed 16-bit integers.
962
    I16x8S,
963
    /// 8 lanes of unsigned 16-bit integers.
964
    I16x8U,
965
}
966

967
/// Kinds of vector extended multiplication supported by WebAssembly.
968
#[derive(Debug, Clone, Copy)]
969
pub(crate) enum V128ExtMulKind {
970
    LowI8x16S,
971
    HighI8x16S,
972
    LowI8x16U,
973
    HighI8x16U,
974
    LowI16x8S,
975
    HighI16x8S,
976
    LowI16x8U,
977
    HighI16x8U,
978
    LowI32x4S,
979
    HighI32x4S,
980
    LowI32x4U,
981
    HighI32x4U,
982
}
983

984
impl From<V128ExtMulKind> for V128ExtendKind {
985
    fn from(value: V128ExtMulKind) -> Self {
986
        match value {
987
            V128ExtMulKind::LowI8x16S => Self::LowI8x16S,
988
            V128ExtMulKind::HighI8x16S => Self::HighI8x16S,
989
            V128ExtMulKind::LowI8x16U => Self::LowI8x16U,
990
            V128ExtMulKind::HighI8x16U => Self::HighI8x16U,
991
            V128ExtMulKind::LowI16x8S => Self::LowI16x8S,
992
            V128ExtMulKind::HighI16x8S => Self::HighI16x8S,
993
            V128ExtMulKind::LowI16x8U => Self::LowI16x8U,
994
            V128ExtMulKind::HighI16x8U => Self::HighI16x8U,
995
            V128ExtMulKind::LowI32x4S => Self::LowI32x4S,
996
            V128ExtMulKind::HighI32x4S => Self::HighI32x4S,
997
            V128ExtMulKind::LowI32x4U => Self::LowI32x4U,
998
            V128ExtMulKind::HighI32x4U => Self::HighI32x4U,
999
        }
1000
    }
1001
}
1002

1003
impl From<V128ExtMulKind> for V128MulKind {
1004
    fn from(value: V128ExtMulKind) -> Self {
1005
        match value {
1006
            V128ExtMulKind::LowI8x16S
1007
            | V128ExtMulKind::HighI8x16S
1008
            | V128ExtMulKind::LowI8x16U
1009
            | V128ExtMulKind::HighI8x16U => Self::I16x8,
1010
            V128ExtMulKind::LowI16x8S
1011
            | V128ExtMulKind::HighI16x8S
1012
            | V128ExtMulKind::LowI16x8U
1013
            | V128ExtMulKind::HighI16x8U => Self::I32x4,
1014
            V128ExtMulKind::LowI32x4S
1015
            | V128ExtMulKind::HighI32x4S
1016
            | V128ExtMulKind::LowI32x4U
1017
            | V128ExtMulKind::HighI32x4U => Self::I64x2,
1018
        }
1019
    }
1020
}
1021

1022
/// Operand size, in bits.
1023
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
1024
pub(crate) enum OperandSize {
1025
    /// 8 bits.
1026
    S8,
1027
    /// 16 bits.
1028
    S16,
1029
    /// 32 bits.
1030
    S32,
1031
    /// 64 bits.
1032
    S64,
1033
    /// 128 bits.
1034
    S128,
1035
}
1036

1037
impl OperandSize {
1038
    /// The number of bits in the operand.
1039
    pub fn num_bits(&self) -> u8 {
1040
        match self {
1041
            OperandSize::S8 => 8,
1042
            OperandSize::S16 => 16,
1043
            OperandSize::S32 => 32,
1044
            OperandSize::S64 => 64,
1045
            OperandSize::S128 => 128,
1046
        }
1047
    }
1048

1049
    /// The number of bytes in the operand.
1050
    pub fn bytes(&self) -> u32 {
1051
        match self {
1052
            Self::S8 => 1,
1053
            Self::S16 => 2,
1054
            Self::S32 => 4,
1055
            Self::S64 => 8,
1056
            Self::S128 => 16,
1057
        }
1058
    }
1059

1060
    /// The binary logarithm of the number of bits in the operand.
1061
    pub fn log2(&self) -> u8 {
1062
        match self {
1063
            OperandSize::S8 => 3,
1064
            OperandSize::S16 => 4,
1065
            OperandSize::S32 => 5,
1066
            OperandSize::S64 => 6,
1067
            OperandSize::S128 => 7,
1068
        }
1069
    }
1070

1071
    /// Create an [`OperandSize`]  from the given number of bytes.
1072
    pub fn from_bytes(bytes: u8) -> Self {
1073
        use OperandSize::*;
1074
        match bytes {
1075
            4 => S32,
1076
            8 => S64,
1077
            16 => S128,
1078
            _ => panic!("Invalid bytes {bytes} for OperandSize"),
1079
        }
1080
    }
1081

1082
    pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {
1083
        match to {
1084
            OperandSize::S32 => match self {
1085
                OperandSize::S8 => Some(Extend::I32Extend8),
1086
                OperandSize::S16 => Some(Extend::I32Extend16),
1087
                _ => None,
1088
            },
1089
            OperandSize::S64 => match self {
1090
                OperandSize::S8 => Some(Extend::I64Extend8),
1091
                OperandSize::S16 => Some(Extend::I64Extend16),
1092
                OperandSize::S32 => Some(Extend::I64Extend32),
1093
                _ => None,
1094
            },
1095
            _ => None,
1096
        }
1097
    }
1098

1099
    /// The number of bits in the mantissa.
1100
    ///
1101
    /// Only implemented for floats.
1102
    pub fn mantissa_bits(&self) -> u8 {
1103
        match self {
1104
            Self::S32 => 8,
1105
            Self::S64 => 11,
1106
            _ => unimplemented!(),
1107
        }
1108
    }
1109
}
1110

1111
/// An abstraction over a register or immediate.
1112
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1113
pub(crate) enum RegImm {
1114
    /// A register.
1115
    Reg(Reg),
1116
    /// A tagged immediate argument.
1117
    Imm(Imm),
1118
}
1119

1120
/// An tagged representation of an immediate.
1121
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1122
pub(crate) enum Imm {
1123
    /// I32 immediate.
1124
    I32(u32),
1125
    /// I64 immediate.
1126
    I64(u64),
1127
    /// F32 immediate.
1128
    F32(u32),
1129
    /// F64 immediate.
1130
    F64(u64),
1131
    /// V128 immediate.
1132
    V128(i128),
1133
}
1134

1135
impl Imm {
1136
    /// Create a new I64 immediate.
1137
    pub fn i64(val: i64) -> Self {
1138
        Self::I64(val as u64)
1139
    }
1140

1141
    /// Create a new I32 immediate.
1142
    pub fn i32(val: i32) -> Self {
1143
        Self::I32(val as u32)
1144
    }
1145

1146
    /// Create a new F32 immediate.
1147
    pub fn f32(bits: u32) -> Self {
1148
        Self::F32(bits)
1149
    }
1150

1151
    /// Create a new F64 immediate.
1152
    pub fn f64(bits: u64) -> Self {
1153
        Self::F64(bits)
1154
    }
1155

1156
    /// Create a new V128 immediate.
1157
    pub fn v128(bits: i128) -> Self {
1158
        Self::V128(bits)
1159
    }
1160

1161
    /// Convert the immediate to i32, if possible.
1162
    pub fn to_i32(&self) -> Option<i32> {
1163
        match self {
1164
            Self::I32(v) => Some(*v as i32),
1165
            Self::I64(v) => i32::try_from(*v as i64).ok(),
1166
            _ => None,
1167
        }
1168
    }
1169

1170
    /// Unwraps the underlying integer value as u64.
1171
    /// # Panics
1172
    /// This function panics if the underlying value can't be represented
1173
    /// as u64.
1174
    pub fn unwrap_as_u64(&self) -> u64 {
1175
        match self {
1176
            Self::I32(v) => *v as u64,
1177
            Self::I64(v) => *v,
1178
            Self::F32(v) => *v as u64,
1179
            Self::F64(v) => *v,
1180
            _ => unreachable!(),
1181
        }
1182
    }
1183

1184
    /// Get the operand size of the immediate.
1185
    pub fn size(&self) -> OperandSize {
1186
        match self {
1187
            Self::I32(_) | Self::F32(_) => OperandSize::S32,
1188
            Self::I64(_) | Self::F64(_) => OperandSize::S64,
1189
            Self::V128(_) => OperandSize::S128,
1190
        }
1191
    }
1192

1193
    /// Get a little endian representation of the immediate.
1194
    ///
1195
    /// This method heap allocates and is intended to be used when adding
1196
    /// values to the constant pool.
1197
    pub fn to_bytes(&self) -> Vec<u8> {
1198
        match self {
1199
            Imm::I32(n) => n.to_le_bytes().to_vec(),
1200
            Imm::I64(n) => n.to_le_bytes().to_vec(),
1201
            Imm::F32(n) => n.to_le_bytes().to_vec(),
1202
            Imm::F64(n) => n.to_le_bytes().to_vec(),
1203
            Imm::V128(n) => n.to_le_bytes().to_vec(),
1204
        }
1205
    }
1206
}
1207

1208
/// The location of the [VMcontext] used for function calls.
1209
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1210
pub(crate) enum VMContextLoc {
1211
    /// Dynamic, stored in the given register.
1212
    Reg(Reg),
1213
    /// The pinned [VMContext] register.
1214
    Pinned,
1215
    /// A different VMContext is loaded at the provided offset from the current
1216
    /// VMContext.
1217
    OffsetFromPinned(u32),
1218
}
1219

1220
/// The maximum number of context arguments currently used across the compiler.
1221
pub(crate) const MAX_CONTEXT_ARGS: usize = 2;
1222

1223
/// Out-of-band special purpose arguments used for function call emission.
1224
///
1225
/// We cannot rely on the value stack for these values given that inserting
1226
/// register or memory values at arbitrary locations of the value stack has the
1227
/// potential to break the stack ordering principle, which states that older
1228
/// values must always precede newer values, effectively simulating the order of
1229
/// values in the machine stack.
1230
/// The [ContextArgs] are meant to be resolved at every callsite; in some cases
1231
/// it might be possible to construct it early on, but given that it might
1232
/// contain allocatable registers, it's preferred to construct it in
1233
/// [FnCall::emit].
1234
#[derive(Clone, Debug)]
1235
pub(crate) enum ContextArgs {
1236
    /// A single context argument is required; the current pinned [VMcontext]
1237
    /// register must be passed as the first argument of the function call.
1238
    VMContext([VMContextLoc; 1]),
1239
    /// The callee and caller context arguments are required. In this case, the
1240
    /// callee context argument is usually stored into an allocatable register
1241
    /// and the caller is always the current pinned [VMContext] pointer.
1242
    CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),
1243
}
1244

1245
impl ContextArgs {
1246
    /// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]
1247
    /// register as both the caller and callee context arguments.
1248
    pub fn pinned_callee_and_caller_vmctx() -> Self {
1249
        Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])
1250
    }
1251

1252
    /// Construct a [ContextArgs] that declares the usage of the pinned
1253
    /// [VMContext] register as the only context argument.
1254
    pub fn pinned_vmctx() -> Self {
1255
        Self::VMContext([VMContextLoc::Pinned])
1256
    }
1257

1258
    /// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded
1259
    /// indirectly from the pinned [VMContext] register as the only context
1260
    /// argument.
1261
    pub fn offset_from_pinned_vmctx(offset: u32) -> Self {
1262
        Self::VMContext([VMContextLoc::OffsetFromPinned(offset)])
1263
    }
1264

1265
    /// Construct a [ContextArgs] that declares a dynamic callee context and the
1266
    /// pinned [VMContext] register as the context arguments.
1267
    pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {
1268
        Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])
1269
    }
1270

1271
    /// Get the length of the [ContextArgs].
1272
    pub fn len(&self) -> usize {
1273
        self.as_slice().len()
1274
    }
1275

1276
    /// Get a slice of the context arguments.
1277
    pub fn as_slice(&self) -> &[VMContextLoc] {
1278
        match self {
1279
            Self::VMContext(a) => a.as_slice(),
1280
            Self::CalleeAndCallerVMContext(a) => a.as_slice(),
1281
        }
1282
    }
1283
}
1284

1285
#[derive(Copy, Clone, Debug)]
1286
pub(crate) enum CalleeKind {
1287
    /// A function call to a raw address.
1288
    Indirect(Reg),
1289
    /// A function call to a local function.
1290
    Direct(UserExternalNameRef),
1291
}
1292

1293
impl CalleeKind {
1294
    /// Creates a callee kind from a register.
1295
    pub fn indirect(reg: Reg) -> Self {
1296
        Self::Indirect(reg)
1297
    }
1298

1299
    /// Creates a direct callee kind from a function name.
1300
    pub fn direct(name: UserExternalNameRef) -> Self {
1301
        Self::Direct(name)
1302
    }
1303
}
1304

1305
impl RegImm {
1306
    /// Register constructor.
1307
    pub fn reg(r: Reg) -> Self {
1308
        RegImm::Reg(r)
1309
    }
1310

1311
    /// I64 immediate constructor.
1312
    pub fn i64(val: i64) -> Self {
1313
        RegImm::Imm(Imm::i64(val))
1314
    }
1315

1316
    /// I32 immediate constructor.
1317
    pub fn i32(val: i32) -> Self {
1318
        RegImm::Imm(Imm::i32(val))
1319
    }
1320

1321
    /// F32 immediate, stored using its bits representation.
1322
    pub fn f32(bits: u32) -> Self {
1323
        RegImm::Imm(Imm::f32(bits))
1324
    }
1325

1326
    /// F64 immediate, stored using its bits representation.
1327
    pub fn f64(bits: u64) -> Self {
1328
        RegImm::Imm(Imm::f64(bits))
1329
    }
1330

1331
    /// V128 immediate.
1332
    pub fn v128(bits: i128) -> Self {
1333
        RegImm::Imm(Imm::v128(bits))
1334
    }
1335
}
1336

1337
impl From<Reg> for RegImm {
1338
    fn from(r: Reg) -> Self {
1339
        Self::Reg(r)
1340
    }
1341
}
1342

1343
#[derive(Debug)]
1344
pub enum RoundingMode {
1345
    Nearest,
1346
    Up,
1347
    Down,
1348
    Zero,
1349
}
1350

1351
/// Memory flags for trusted loads/stores.
1352
pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();
1353

1354
/// Flags used for WebAssembly loads / stores.
1355
/// Untrusted by default so we don't set `no_trap`.
1356
/// We also ensure that the endianness is the right one for WebAssembly.
1357
pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);
1358

1359
/// Generic MacroAssembler interface used by the code generation.
1360
///
1361
/// The MacroAssembler trait aims to expose an interface, high-level enough,
1362
/// so that each ISA can provide its own lowering to machine code. For example,
1363
/// for WebAssembly operators that don't have a direct mapping to a machine
1364
/// a instruction, the interface defines a signature matching the WebAssembly
1365
/// operator, allowing each implementation to lower such operator entirely.
1366
/// This approach attributes more responsibility to the MacroAssembler, but frees
1367
/// the caller from concerning about assembling the right sequence of
1368
/// instructions at the operator callsite.
1369
///
1370
/// The interface defaults to a three-argument form for binary operations;
1371
/// this allows a natural mapping to instructions for RISC architectures,
1372
/// that use three-argument form.
1373
/// This approach allows for a more general interface that can be restricted
1374
/// where needed, in the case of architectures that use a two-argument form.
1375

1376
pub(crate) trait MacroAssembler {
1377
    /// The addressing mode.
1378
    type Address: Copy + Debug;
1379

1380
    /// The pointer representation of the target ISA,
1381
    /// used to access information from [`VMOffsets`].
1382
    type Ptr: PtrSize;
1383

1384
    /// The ABI details of the target.
1385
    type ABI: abi::ABI;
1386

1387
    /// Emit the function prologue.
1388
    fn prologue(&mut self, vmctx: Reg) -> Result<()> {
1389
        self.frame_setup()?;
1390
        self.check_stack(vmctx)
1391
    }
1392

1393
    /// Generate the frame setup sequence.
1394
    fn frame_setup(&mut self) -> Result<()>;
1395

1396
    /// Generate the frame restore sequence.
1397
    fn frame_restore(&mut self) -> Result<()>;
1398

1399
    /// Emit a stack check.
1400
    fn check_stack(&mut self, vmctx: Reg) -> Result<()>;
1401

1402
    /// Emit the function epilogue.
1403
    fn epilogue(&mut self) -> Result<()> {
1404
        self.frame_restore()
1405
    }
1406

1407
    /// Reserve stack space.
1408
    fn reserve_stack(&mut self, bytes: u32) -> Result<()>;
1409

1410
    /// Free stack space.
1411
    fn free_stack(&mut self, bytes: u32) -> Result<()>;
1412

1413
    /// Reset the stack pointer to the given offset;
1414
    ///
1415
    /// Used to reset the stack pointer to a given offset
1416
    /// when dealing with unreachable code.
1417
    fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;
1418

1419
    /// Get the address of a local slot.
1420
    fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;
1421

1422
    /// Constructs an address with an offset that is relative to the
1423
    /// current position of the stack pointer (e.g. [sp + (sp_offset -
1424
    /// offset)].
1425
    fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1426

1427
    /// Constructs an address with an offset that is absolute to the
1428
    /// current position of the stack pointer (e.g. [sp + offset].
1429
    fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1430

1431
    /// Alias for [`Self::address_at_reg`] using the VMContext register as
1432
    /// a base. The VMContext register is derived from the ABI type that is
1433
    /// associated to the MacroAssembler.
1434
    fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;
1435

1436
    /// Construct an address that is absolute to the current position
1437
    /// of the given register.
1438
    fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;
1439

1440
    /// Emit a function call to either a local or external function.
1441
    fn call(
1442
        &mut self,
1443
        stack_args_size: u32,
1444
        f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
1445
    ) -> Result<u32>;
1446

1447
    /// Acquire a scratch register and execute the given callback.
1448
    fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R;
1449

1450
    /// Convenience wrapper over [`Self::with_scratch`], derives the register class
1451
    /// for a particular Wasm value type.
1452
    fn with_scratch_for<R>(
1453
        &mut self,
1454
        ty: WasmValType,
1455
        f: impl FnOnce(&mut Self, Scratch) -> R,
1456
    ) -> R {
1457
        match ty {
1458
            WasmValType::I32
1459
            | WasmValType::I64
1460
            | WasmValType::Ref(WasmRefType {
1461
                heap_type: WasmHeapType::Func,
1462
                ..
1463
            }) => self.with_scratch::<IntScratch, _>(f),
1464
            WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => {
1465
                self.with_scratch::<FloatScratch, _>(f)
1466
            }
1467
            _ => unimplemented!(),
1468
        }
1469
    }
1470

1471
    /// Get stack pointer offset.
1472
    fn sp_offset(&self) -> Result<SPOffset>;
1473

1474
    /// Perform a stack store.
1475
    fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;
1476

1477
    /// Alias for `MacroAssembler::store` with the operand size corresponding
1478
    /// to the pointer size of the target.
1479
    fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;
1480

1481
    /// Perform a WebAssembly store.
1482
    /// A WebAssembly store introduces several additional invariants compared to
1483
    /// [Self::store], more precisely, it can implicitly trap, in certain
1484
    /// circumstances, even if explicit bounds checks are elided, in that sense,
1485
    /// we consider this type of load as untrusted. It can also differ with
1486
    /// regards to the endianness depending on the target ISA. For this reason,
1487
    /// [Self::wasm_store], should be explicitly used when emitting WebAssembly
1488
    /// stores.
1489
    fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;
1490

1491
    /// Perform a zero-extended stack load.
1492
    fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;
1493

1494
    /// Perform a WebAssembly load.
1495
    /// A WebAssembly load introduces several additional invariants compared to
1496
    /// [Self::load], more precisely, it can implicitly trap, in certain
1497
    /// circumstances, even if explicit bounds checks are elided, in that sense,
1498
    /// we consider this type of load as untrusted. It can also differ with
1499
    /// regards to the endianness depending on the target ISA. For this reason,
1500
    /// [Self::wasm_load], should be explicitly used when emitting WebAssembly
1501
    /// loads.
1502
    fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;
1503

1504
    /// Alias for `MacroAssembler::load` with the operand size corresponding
1505
    /// to the pointer size of the target.
1506
    fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;
1507

1508
    /// Computes the effective address and stores the result in the destination
1509
    /// register.
1510
    fn compute_addr(
1511
        &mut self,
1512
        _src: Self::Address,
1513
        _dst: WritableReg,
1514
        _size: OperandSize,
1515
    ) -> Result<()>;
1516

1517
    /// Pop a value from the machine stack into the given register.
1518
    fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1519

1520
    /// Perform a move.
1521
    fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;
1522

1523
    /// Perform a conditional move.
1524
    fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)
1525
    -> Result<()>;
1526

1527
    /// Performs a memory move of bytes from src to dest.
1528
    /// Bytes are moved in blocks of 8 bytes, where possible.
1529
    fn memmove(
1530
        &mut self,
1531
        src: SPOffset,
1532
        dst: SPOffset,
1533
        bytes: u32,
1534
        direction: MemMoveDirection,
1535
    ) -> Result<()> {
1536
        match direction {
1537
            MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),
1538
            MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),
1539
        }
1540
        // At least 4 byte aligned.
1541
        debug_assert!(bytes % 4 == 0);
1542
        let mut remaining = bytes;
1543
        let word_bytes = <Self::ABI as abi::ABI>::word_bytes();
1544

1545
        let word_bytes = word_bytes as u32;
1546

1547
        let mut dst_offs;
1548
        let mut src_offs;
1549
        match direction {
1550
            MemMoveDirection::LowToHigh => {
1551
                dst_offs = dst.as_u32() - bytes;
1552
                src_offs = src.as_u32() - bytes;
1553
                self.with_scratch::<IntScratch, _>(|masm, scratch| {
1554
                    while remaining >= word_bytes {
1555
                        remaining -= word_bytes;
1556
                        dst_offs += word_bytes;
1557
                        src_offs += word_bytes;
1558

1559
                        masm.load_ptr(
1560
                            masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1561
                            scratch.writable(),
1562
                        )?;
1563
                        masm.store_ptr(
1564
                            scratch.inner(),
1565
                            masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1566
                        )?;
1567
                    }
1568
                    anyhow::Ok(())
1569
                })?;
1570
            }
1571
            MemMoveDirection::HighToLow => {
1572
                // Go from the end to the beginning to handle overlapping addresses.
1573
                src_offs = src.as_u32();
1574
                dst_offs = dst.as_u32();
1575
                self.with_scratch::<IntScratch, _>(|masm, scratch| {
1576
                    while remaining >= word_bytes {
1577
                        masm.load_ptr(
1578
                            masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1579
                            scratch.writable(),
1580
                        )?;
1581
                        masm.store_ptr(
1582
                            scratch.inner(),
1583
                            masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1584
                        )?;
1585

1586
                        remaining -= word_bytes;
1587
                        src_offs -= word_bytes;
1588
                        dst_offs -= word_bytes;
1589
                    }
1590
                    anyhow::Ok(())
1591
                })?;
1592
            }
1593
        }
1594

1595
        if remaining > 0 {
1596
            let half_word = word_bytes / 2;
1597
            let ptr_size = OperandSize::from_bytes(half_word as u8);
1598
            debug_assert!(remaining == half_word);
1599
            // Need to move the offsets ahead in the `LowToHigh` case to
1600
            // compensate for the initial subtraction of `bytes`.
1601
            if direction == MemMoveDirection::LowToHigh {
1602
                dst_offs += half_word;
1603
                src_offs += half_word;
1604
            }
1605

1606
            self.with_scratch::<IntScratch, _>(|masm, scratch| {
1607
                masm.load(
1608
                    masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1609
                    scratch.writable(),
1610
                    ptr_size,
1611
                )?;
1612
                masm.store(
1613
                    scratch.inner().into(),
1614
                    masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1615
                    ptr_size,
1616
                )?;
1617
                anyhow::Ok(())
1618
            })?;
1619
        }
1620
        Ok(())
1621
    }
1622

1623
    /// Perform add operation.
1624
    fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1625

1626
    /// Perform a checked unsigned integer addition, emitting the provided trap
1627
    /// if the addition overflows.
1628
    fn checked_uadd(
1629
        &mut self,
1630
        dst: WritableReg,
1631
        lhs: Reg,
1632
        rhs: RegImm,
1633
        size: OperandSize,
1634
        trap: TrapCode,
1635
    ) -> Result<()>;
1636

1637
    /// Perform subtraction operation.
1638
    fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1639

1640
    /// Perform multiplication operation.
1641
    fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1642

1643
    /// Perform a floating point add operation.
1644
    fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1645

1646
    /// Perform a floating point subtraction operation.
1647
    fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1648

1649
    /// Perform a floating point multiply operation.
1650
    fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1651

1652
    /// Perform a floating point divide operation.
1653
    fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1654

1655
    /// Perform a floating point minimum operation. In x86, this will emit
1656
    /// multiple instructions.
1657
    fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1658

1659
    /// Perform a floating point maximum operation. In x86, this will emit
1660
    /// multiple instructions.
1661
    fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1662

1663
    /// Perform a floating point copysign operation. In x86, this will emit
1664
    /// multiple instructions.
1665
    fn float_copysign(
1666
        &mut self,
1667
        dst: WritableReg,
1668
        lhs: Reg,
1669
        rhs: Reg,
1670
        size: OperandSize,
1671
    ) -> Result<()>;
1672

1673
    /// Perform a floating point abs operation.
1674
    fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1675

1676
    /// Perform a floating point negation operation.
1677
    fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1678

1679
    /// Perform a floating point floor operation.
1680
    fn float_round<
1681
        F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
1682
    >(
1683
        &mut self,
1684
        mode: RoundingMode,
1685
        env: &mut FuncEnv<Self::Ptr>,
1686
        context: &mut CodeGenContext<Emission>,
1687
        size: OperandSize,
1688
        fallback: F,
1689
    ) -> Result<()>;
1690

1691
    /// Perform a floating point square root operation.
1692
    fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1693

1694
    /// Perform logical and operation.
1695
    fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1696

1697
    /// Perform logical or operation.
1698
    fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1699

1700
    /// Perform logical exclusive or operation.
1701
    fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1702

1703
    /// Perform a shift operation between a register and an immediate.
1704
    fn shift_ir(
1705
        &mut self,
1706
        dst: WritableReg,
1707
        imm: Imm,
1708
        lhs: Reg,
1709
        kind: ShiftKind,
1710
        size: OperandSize,
1711
    ) -> Result<()>;
1712

1713
    /// Perform a shift operation between two registers.
1714
    /// This case is special in that some architectures have specific expectations
1715
    /// regarding the location of the instruction arguments. To free the
1716
    /// caller from having to deal with the architecture specific constraints
1717
    /// we give this function access to the code generation context, allowing
1718
    /// each implementation to decide the lowering path.
1719
    fn shift(
1720
        &mut self,
1721
        context: &mut CodeGenContext<Emission>,
1722
        kind: ShiftKind,
1723
        size: OperandSize,
1724
    ) -> Result<()>;
1725

1726
    /// Perform division operation.
1727
    /// Division is special in that some architectures have specific
1728
    /// expectations regarding the location of the instruction
1729
    /// arguments and regarding the location of the quotient /
1730
    /// remainder. To free the caller from having to deal with the
1731
    /// architecture specific constraints we give this function access
1732
    /// to the code generation context, allowing each implementation
1733
    /// to decide the lowering path.  For cases in which division is a
1734
    /// unconstrained binary operation, the caller can decide to use
1735
    /// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`
1736
    /// functions.
1737
    fn div(
1738
        &mut self,
1739
        context: &mut CodeGenContext<Emission>,
1740
        kind: DivKind,
1741
        size: OperandSize,
1742
    ) -> Result<()>;
1743

1744
    /// Calculate remainder.
1745
    fn rem(
1746
        &mut self,
1747
        context: &mut CodeGenContext<Emission>,
1748
        kind: RemKind,
1749
        size: OperandSize,
1750
    ) -> Result<()>;
1751

1752
    /// Compares `src1` against `src2` for the side effect of setting processor
1753
    /// flags.
1754
    ///
1755
    /// Note that `src1` is the left-hand-side of the comparison and `src2` is
1756
    /// the right-hand-side, so if testing `a < b` then `src1 == a` and
1757
    /// `src2 == b`
1758
    fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;
1759

1760
    /// Compare src and dst and put the result in dst.
1761
    /// This function will potentially emit a series of instructions.
1762
    ///
1763
    /// The initial value in `dst` is the left-hand-side of the comparison and
1764
    /// the initial value in `src` is the right-hand-side of the comparison.
1765
    /// That means for `a < b` then `dst == a` and `src == b`.
1766
    fn cmp_with_set(
1767
        &mut self,
1768
        dst: WritableReg,
1769
        src: RegImm,
1770
        kind: IntCmpKind,
1771
        size: OperandSize,
1772
    ) -> Result<()>;
1773

1774
    /// Compare floats in src1 and src2 and put the result in dst.
1775
    /// In x86, this will emit multiple instructions.
1776
    fn float_cmp_with_set(
1777
        &mut self,
1778
        dst: WritableReg,
1779
        src1: Reg,
1780
        src2: Reg,
1781
        kind: FloatCmpKind,
1782
        size: OperandSize,
1783
    ) -> Result<()>;
1784

1785
    /// Count the number of leading zeroes in src and put the result in dst.
1786
    /// In x64, this will emit multiple instructions if the `has_lzcnt` flag is
1787
    /// false.
1788
    fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1789

1790
    /// Count the number of trailing zeroes in src and put the result in dst.masm
1791
    /// In x64, this will emit multiple instructions if the `has_tzcnt` flag is
1792
    /// false.
1793
    fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1794

1795
    /// Push the register to the stack, returning the stack slot metadata.
1796
    // NB
1797
    // The stack alignment should not be assumed after any call to `push`,
1798
    // unless explicitly aligned otherwise.  Typically, stack alignment is
1799
    // maintained at call sites and during the execution of
1800
    // epilogues.
1801
    fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;
1802

1803
    /// Finalize the assembly and return the result.
1804
    fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;
1805

1806
    /// Zero a particular register.
1807
    fn zero(&mut self, reg: WritableReg) -> Result<()>;
1808

1809
    /// Count the number of 1 bits in src and put the result in dst. In x64,
1810
    /// this will emit multiple instructions if the `has_popcnt` flag is false.
1811
    fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;
1812

1813
    /// Converts an i64 to an i32 by discarding the high 32 bits.
1814
    fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1815

1816
    /// Extends an integer of a given size to a larger size.
1817
    fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;
1818

1819
    /// Emits one or more instructions to perform a signed truncation of a
1820
    /// float into an integer.
1821
    fn signed_truncate(
1822
        &mut self,
1823
        dst: WritableReg,
1824
        src: Reg,
1825
        src_size: OperandSize,
1826
        dst_size: OperandSize,
1827
        kind: TruncKind,
1828
    ) -> Result<()>;
1829

1830
    /// Emits one or more instructions to perform an unsigned truncation of a
1831
    /// float into an integer.
1832
    fn unsigned_truncate(
1833
        &mut self,
1834
        context: &mut CodeGenContext<Emission>,
1835
        src_size: OperandSize,
1836
        dst_size: OperandSize,
1837
        kind: TruncKind,
1838
    ) -> Result<()>;
1839

1840
    /// Emits one or more instructions to perform a signed convert of an
1841
    /// integer into a float.
1842
    fn signed_convert(
1843
        &mut self,
1844
        dst: WritableReg,
1845
        src: Reg,
1846
        src_size: OperandSize,
1847
        dst_size: OperandSize,
1848
    ) -> Result<()>;
1849

1850
    /// Emits one or more instructions to perform an unsigned convert of an
1851
    /// integer into a float.
1852
    fn unsigned_convert(
1853
        &mut self,
1854
        dst: WritableReg,
1855
        src: Reg,
1856
        tmp_gpr: Reg,
1857
        src_size: OperandSize,
1858
        dst_size: OperandSize,
1859
    ) -> Result<()>;
1860

1861
    /// Reinterpret a float as an integer.
1862
    fn reinterpret_float_as_int(
1863
        &mut self,
1864
        dst: WritableReg,
1865
        src: Reg,
1866
        size: OperandSize,
1867
    ) -> Result<()>;
1868

1869
    /// Reinterpret an integer as a float.
1870
    fn reinterpret_int_as_float(
1871
        &mut self,
1872
        dst: WritableReg,
1873
        src: Reg,
1874
        size: OperandSize,
1875
    ) -> Result<()>;
1876

1877
    /// Demote an f64 to an f32.
1878
    fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1879

1880
    /// Promote an f32 to an f64.
1881
    fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1882

1883
    /// Zero a given memory range.
1884
    ///
1885
    /// The default implementation divides the given memory range
1886
    /// into word-sized slots. Then it unrolls a series of store
1887
    /// instructions, effectively assigning zero to each slot.
1888
    fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {
1889
        let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;
1890
        if mem.is_empty() {
1891
            return Ok(());
1892
        }
1893

1894
        let start = if mem.start % word_size == 0 {
1895
            mem.start
1896
        } else {
1897
            // Ensure that the start of the range is at least 4-byte aligned.
1898
            assert!(mem.start % 4 == 0);
1899
            let start = align_to(mem.start, word_size);
1900
            let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;
1901
            self.store(RegImm::i32(0), addr, OperandSize::S32)?;
1902
            // Ensure that the new start of the range, is word-size aligned.
1903
            assert!(start % word_size == 0);
1904
            start
1905
        };
1906

1907
        let end = align_to(mem.end, word_size);
1908
        let slots = (end - start) / word_size;
1909

1910
        if slots == 1 {
1911
            let slot = LocalSlot::i64(start + word_size);
1912
            let addr: Self::Address = self.local_address(&slot)?;
1913
            self.store(RegImm::i64(0), addr, OperandSize::S64)?;
1914
        } else {
1915
            // TODO
1916
            // Add an upper bound to this generation;
1917
            // given a considerably large amount of slots
1918
            // this will be inefficient.
1919
            self.with_scratch::<IntScratch, _>(|masm, scratch| {
1920
                masm.zero(scratch.writable())?;
1921
                let zero = RegImm::reg(scratch.inner());
1922

1923
                for step in (start..end).step_by(word_size as usize) {
1924
                    let slot = LocalSlot::i64(step + word_size);
1925
                    let addr: Self::Address = masm.local_address(&slot)?;
1926
                    masm.store(zero, addr, OperandSize::S64)?;
1927
                }
1928
                anyhow::Ok(())
1929
            })?;
1930
        }
1931

1932
        Ok(())
1933
    }
1934

1935
    /// Generate a label.
1936
    fn get_label(&mut self) -> Result<MachLabel>;
1937

1938
    /// Bind the given label at the current code offset.
1939
    fn bind(&mut self, label: MachLabel) -> Result<()>;
1940

1941
    /// Conditional branch.
1942
    ///
1943
    /// Performs a comparison between the two operands,
1944
    /// and immediately after emits a jump to the given
1945
    /// label destination if the condition is met.
1946
    fn branch(
1947
        &mut self,
1948
        kind: IntCmpKind,
1949
        lhs: Reg,
1950
        rhs: RegImm,
1951
        taken: MachLabel,
1952
        size: OperandSize,
1953
    ) -> Result<()>;
1954

1955
    /// Emits and unconditional jump to the given label.
1956
    fn jmp(&mut self, target: MachLabel) -> Result<()>;
1957

1958
    /// Emits a jump table sequence. The default label is specified as
1959
    /// the last element of the targets slice.
1960
    fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;
1961

1962
    /// Emit an unreachable code trap.
1963
    fn unreachable(&mut self) -> Result<()>;
1964

1965
    /// Emit an unconditional trap.
1966
    fn trap(&mut self, code: TrapCode) -> Result<()>;
1967

1968
    /// Traps if the condition code is met.
1969
    fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;
1970

1971
    /// Trap if the source register is zero.
1972
    fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;
1973

1974
    /// Ensures that the stack pointer is correctly positioned before an unconditional
1975
    /// jump according to the requirements of the destination target.
1976
    fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {
1977
        let bytes = self
1978
            .sp_offset()?
1979
            .as_u32()
1980
            .checked_sub(target.as_u32())
1981
            .unwrap_or(0);
1982

1983
        if bytes > 0 {
1984
            self.free_stack(bytes)?;
1985
        }
1986

1987
        Ok(())
1988
    }
1989

1990
    /// Mark the start of a source location returning the machine code offset
1991
    /// and the relative source code location.
1992
    fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;
1993

1994
    /// Mark the end of a source location.
1995
    fn end_source_loc(&mut self) -> Result<()>;
1996

1997
    /// The current offset, in bytes from the beginning of the function.
1998
    fn current_code_offset(&self) -> Result<CodeOffset>;
1999

2000
    /// Performs a 128-bit addition
2001
    fn add128(
2002
        &mut self,
2003
        dst_lo: WritableReg,
2004
        dst_hi: WritableReg,
2005
        lhs_lo: Reg,
2006
        lhs_hi: Reg,
2007
        rhs_lo: Reg,
2008
        rhs_hi: Reg,
2009
    ) -> Result<()>;
2010

2011
    /// Performs a 128-bit subtraction
2012
    fn sub128(
2013
        &mut self,
2014
        dst_lo: WritableReg,
2015
        dst_hi: WritableReg,
2016
        lhs_lo: Reg,
2017
        lhs_hi: Reg,
2018
        rhs_lo: Reg,
2019
        rhs_hi: Reg,
2020
    ) -> Result<()>;
2021

2022
    /// Performs a widening multiplication from two 64-bit operands into a
2023
    /// 128-bit result.
2024
    ///
2025
    /// Note that some platforms require special handling of registers in this
2026
    /// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
2027
    fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
2028
    -> Result<()>;
2029

2030
    /// Takes the value in a src operand and replicates it across lanes of
2031
    /// `size` in a destination result.
2032
    fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;
2033

2034
    /// Performs a shuffle between two 128-bit vectors into a 128-bit result
2035
    /// using lanes as a mask to select which indexes to copy.
2036
    fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;
2037

2038
    /// Performs a swizzle between two 128-bit vectors into a 128-bit result.
2039
    fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;
2040

2041
    /// Performs the RMW `op` operation on the passed `addr`.
2042
    ///
2043
    /// The value *before* the operation was performed is written back to the `operand` register.
2044
    fn atomic_rmw(
2045
        &mut self,
2046
        context: &mut CodeGenContext<Emission>,
2047
        addr: Self::Address,
2048
        size: OperandSize,
2049
        op: RmwOp,
2050
        flags: MemFlags,
2051
        extend: Option<Extend<Zero>>,
2052
    ) -> Result<()>;
2053

2054
    /// Extracts the scalar value from `src` in `lane` to `dst`.
2055
    fn extract_lane(
2056
        &mut self,
2057
        src: Reg,
2058
        dst: WritableReg,
2059
        lane: u8,
2060
        kind: ExtractLaneKind,
2061
    ) -> Result<()>;
2062

2063
    /// Replaces the value in `lane` in `dst` with the value in `src`.
2064
    fn replace_lane(
2065
        &mut self,
2066
        src: RegImm,
2067
        dst: WritableReg,
2068
        lane: u8,
2069
        kind: ReplaceLaneKind,
2070
    ) -> Result<()>;
2071

2072
    /// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`
2073
    /// and `replacement` (at the top of the context's stack).
2074
    ///
2075
    /// This method takes the `CodeGenContext` as an arguments to accommodate architectures that
2076
    /// expect parameters in specific registers. The context stack contains the `replacement`,
2077
    /// and `expected` values in that order. The implementer is expected to push the value at
2078
    /// `addr` before the update to the context's stack before returning.
2079
    fn atomic_cas(
2080
        &mut self,
2081
        context: &mut CodeGenContext<Emission>,
2082
        addr: Self::Address,
2083
        size: OperandSize,
2084
        flags: MemFlags,
2085
        extend: Option<Extend<Zero>>,
2086
    ) -> Result<()>;
2087

2088
    /// Compares vector registers `lhs` and `rhs` for equality and puts the
2089
    /// vector of results in `dst`.
2090
    fn v128_eq(
2091
        &mut self,
2092
        dst: WritableReg,
2093
        lhs: Reg,
2094
        rhs: Reg,
2095
        kind: VectorEqualityKind,
2096
    ) -> Result<()>;
2097

2098
    /// Compares vector registers `lhs` and `rhs` for inequality and puts the
2099
    /// vector of results in `dst`.
2100
    fn v128_ne(
2101
        &mut self,
2102
        dst: WritableReg,
2103
        lhs: Reg,
2104
        rhs: Reg,
2105
        kind: VectorEqualityKind,
2106
    ) -> Result<()>;
2107

2108
    /// Performs a less than comparison with vector registers `lhs` and `rhs`
2109
    /// and puts the vector of results in `dst`.
2110
    fn v128_lt(
2111
        &mut self,
2112
        dst: WritableReg,
2113
        lhs: Reg,
2114
        rhs: Reg,
2115
        kind: VectorCompareKind,
2116
    ) -> Result<()>;
2117

2118
    /// Performs a less than or equal comparison with vector registers `lhs`
2119
    /// and `rhs` and puts the vector of results in `dst`.
2120
    fn v128_le(
2121
        &mut self,
2122
        dst: WritableReg,
2123
        lhs: Reg,
2124
        rhs: Reg,
2125
        kind: VectorCompareKind,
2126
    ) -> Result<()>;
2127

2128
    /// Performs a greater than comparison with vector registers `lhs` and
2129
    /// `rhs` and puts the vector of results in `dst`.
2130
    fn v128_gt(
2131
        &mut self,
2132
        dst: WritableReg,
2133
        lhs: Reg,
2134
        rhs: Reg,
2135
        kind: VectorCompareKind,
2136
    ) -> Result<()>;
2137

2138
    /// Performs a greater than or equal comparison with vector registers `lhs`
2139
    /// and `rhs` and puts the vector of results in `dst`.
2140
    fn v128_ge(
2141
        &mut self,
2142
        dst: WritableReg,
2143
        lhs: Reg,
2144
        rhs: Reg,
2145
        kind: VectorCompareKind,
2146
    ) -> Result<()>;
2147

2148
    /// Emit a memory fence.
2149
    fn fence(&mut self) -> Result<()>;
2150

2151
    /// Perform a logical `not` operation on the 128bits vector value in `dst`.
2152
    fn v128_not(&mut self, dst: WritableReg) -> Result<()>;
2153

2154
    /// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing
2155
    /// the result to `dst`.
2156
    fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2157

2158
    /// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing
2159
    /// the result to `dst`.
2160
    ///
2161
    /// `and_not` is not commutative: dst = !src1 & src2.
2162
    fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2163

2164
    /// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing
2165
    /// the result to `dst`.
2166
    fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2167

2168
    /// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing
2169
    /// the result to `dst`.
2170
    fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2171

2172
    /// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits
2173
    /// from `src1` when mask is 1, and from `src2` when mask is 0.
2174
    ///
2175
    /// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.
2176
    fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;
2177

2178
    /// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.
2179
    fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2180

2181
    /// Convert vector of integers to vector of floating points.
2182
    fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;
2183

2184
    /// Convert two input vectors into a smaller lane vector by narrowing each
2185
    /// lane.
2186
    fn v128_narrow(
2187
        &mut self,
2188
        src1: Reg,
2189
        src2: Reg,
2190
        dst: WritableReg,
2191
        kind: V128NarrowKind,
2192
    ) -> Result<()>;
2193

2194
    /// Converts a vector containing two 64-bit floating point lanes to two
2195
    /// 32-bit floating point lanes and setting the two higher lanes to 0.
2196
    fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2197

2198
    /// Converts a vector containing four 32-bit floating point lanes to two
2199
    /// 64-bit floating point lanes. Only the two lower lanes are converted.
2200
    fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2201

2202
    /// Converts low or high half of the smaller lane vector to a larger lane
2203
    /// vector.
2204
    fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;
2205

2206
    /// Perform a vector add between `lsh` and `rhs`, placing the result in
2207
    /// `dst`.
2208
    fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;
2209

2210
    /// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.
2211
    fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;
2212

2213
    /// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.
2214
    fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)
2215
    -> Result<()>;
2216

2217
    /// Perform an absolute operation on a vector.
2218
    fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;
2219

2220
    /// Vectorized negate of the content of `op`.
2221
    fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;
2222

2223
    /// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit
2224
    /// integer at the top of the stack, on the 128-bit vector specified by the second value
2225
    /// from the top of the stack, interpreted as packed integers of size `lane_width`.
2226
    ///
2227
    /// The shift amount is taken modulo `lane_width`.
2228
    fn v128_shift(
2229
        &mut self,
2230
        context: &mut CodeGenContext<Emission>,
2231
        lane_width: OperandSize,
2232
        kind: ShiftKind,
2233
    ) -> Result<()>;
2234

2235
    /// Perform a saturating integer q-format rounding multiplication.
2236
    fn v128_q15mulr_sat_s(
2237
        &mut self,
2238
        lhs: Reg,
2239
        rhs: Reg,
2240
        dst: WritableReg,
2241
        size: OperandSize,
2242
    ) -> Result<()>;
2243

2244
    /// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0
2245
    /// otherwise.
2246
    fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2247

2248
    /// Extracts the high bit of each lane in `src` and produces a scalar mask
2249
    /// with all bits concatenated in `dst`.
2250
    fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2251

2252
    /// Lanewise truncation operation.
2253
    ///
2254
    /// If using an integer kind of truncation, then this performs a lane-wise
2255
    /// saturating conversion from float to integer using the IEEE
2256
    /// `convertToIntegerTowardZero` function. If any input lane is NaN, the
2257
    /// resulting lane is 0. If the rounded integer value of a lane is outside
2258
    /// the range of the destination type, the result is saturated to the
2259
    /// nearest representable integer value.
2260
    fn v128_trunc(
2261
        &mut self,
2262
        context: &mut CodeGenContext<Emission>,
2263
        kind: V128TruncKind,
2264
    ) -> Result<()>;
2265

2266
    /// Perform a lane-wise `min` operation between `src1` and `src2`.
2267
    fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)
2268
    -> Result<()>;
2269

2270
    /// Perform a lane-wise `max` operation between `src1` and `src2`.
2271
    fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)
2272
    -> Result<()>;
2273

2274
    /// Perform the lane-wise integer extended multiplication producing twice wider result than the
2275
    /// inputs. This is equivalent to an extend followed by a multiply.
2276
    ///
2277
    /// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,
2278
    /// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower
2279
    /// 8bits of the 16bits lanes.
2280
    fn v128_extmul(
2281
        &mut self,
2282
        context: &mut CodeGenContext<Emission>,
2283
        kind: V128ExtMulKind,
2284
    ) -> Result<()>;
2285

2286
    /// Perform the lane-wise integer extended pairwise addition producing extended results (twice
2287
    /// wider results than the inputs).
2288
    fn v128_extadd_pairwise(
2289
        &mut self,
2290
        src: Reg,
2291
        dst: WritableReg,
2292
        kind: V128ExtAddKind,
2293
    ) -> Result<()>;
2294

2295
    /// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
2296
    /// adjacent pairs of the 32-bit results.
2297
    fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
2298

2299
    /// Count the number of bits set in each lane.
2300
    fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;
2301

2302
    /// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`
2303
    /// and put the results in `dst`.
2304
    fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2305

2306
    /// Lane-wise IEEE division on vectors of floats.
2307
    fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2308

2309
    /// Lane-wise IEEE square root of vector of floats.
2310
    fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2311

2312
    /// Lane-wise ceiling of vector of floats.
2313
    fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2314

2315
    /// Lane-wise flooring of vector of floats.
2316
    fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2317

2318
    /// Lane-wise rounding to nearest integer for vector of floats.
2319
    fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2320

2321
    /// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.
2322
    fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2323

2324
    /// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.
2325
    fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2326
}
2327

2328
Product

Resources

Company