Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/winch/codegen/src/masm.rs
3064 views
1
use crate::Result;
2
use crate::abi::{self, LocalSlot, align_to};
3
use crate::codegen::{CodeGenContext, Emission, FuncEnv};
4
use crate::isa::{
5
CallingConvention,
6
reg::{Reg, RegClass, WritableReg, writable},
7
};
8
use cranelift_codegen::{
9
Final, MachBufferFinalized, MachLabel,
10
binemit::CodeOffset,
11
ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},
12
};
13
use std::{fmt::Debug, ops::Range};
14
use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType};
15
16
pub(crate) use cranelift_codegen::ir::TrapCode;
17
18
#[derive(Eq, PartialEq)]
19
pub(crate) enum DivKind {
20
/// Signed division.
21
Signed,
22
/// Unsigned division.
23
Unsigned,
24
}
25
26
/// Represents the `memory.atomic.wait*` kind.
27
#[derive(Debug, Clone, Copy)]
28
pub(crate) enum AtomicWaitKind {
29
Wait32,
30
Wait64,
31
}
32
33
/// Remainder kind.
34
#[derive(Copy, Clone)]
35
pub(crate) enum RemKind {
36
/// Signed remainder.
37
Signed,
38
/// Unsigned remainder.
39
Unsigned,
40
}
41
42
impl RemKind {
43
pub fn is_signed(&self) -> bool {
44
matches!(self, Self::Signed)
45
}
46
}
47
48
/// Kinds of vector min operation supported by WebAssembly.
49
pub(crate) enum V128MinKind {
50
/// 4 lanes of 32-bit floats.
51
F32x4,
52
/// 2 lanes of 64-bit floats.
53
F64x2,
54
/// 16 lanes of signed 8-bit integers.
55
I8x16S,
56
/// 16 lanes of unsigned 8-bit integers.
57
I8x16U,
58
/// 8 lanes of signed 16-bit integers.
59
I16x8S,
60
/// 8 lanes of unsigned 16-bit integers.
61
I16x8U,
62
/// 4 lanes of signed 32-bit integers.
63
I32x4S,
64
/// 4 lanes of unsigned 32-bit integers.
65
I32x4U,
66
}
67
68
impl V128MinKind {
69
/// The size of each lane.
70
pub(crate) fn lane_size(&self) -> OperandSize {
71
match self {
72
Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
73
Self::F64x2 => OperandSize::S64,
74
Self::I8x16S | Self::I8x16U => OperandSize::S8,
75
Self::I16x8S | Self::I16x8U => OperandSize::S16,
76
}
77
}
78
}
79
80
/// Kinds of vector max operation supported by WebAssembly.
81
pub(crate) enum V128MaxKind {
82
/// 4 lanes of 32-bit floats.
83
F32x4,
84
/// 2 lanes of 64-bit floats.
85
F64x2,
86
/// 16 lanes of signed 8-bit integers.
87
I8x16S,
88
/// 16 lanes of unsigned 8-bit integers.
89
I8x16U,
90
/// 8 lanes of signed 16-bit integers.
91
I16x8S,
92
/// 8 lanes of unsigned 16-bit integers.
93
I16x8U,
94
/// 4 lanes of signed 32-bit integers.
95
I32x4S,
96
/// 4 lanes of unsigned 32-bit integers.
97
I32x4U,
98
}
99
100
impl V128MaxKind {
101
/// The size of each lane.
102
pub(crate) fn lane_size(&self) -> OperandSize {
103
match self {
104
Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
105
Self::F64x2 => OperandSize::S64,
106
Self::I8x16S | Self::I8x16U => OperandSize::S8,
107
Self::I16x8S | Self::I16x8U => OperandSize::S16,
108
}
109
}
110
}
111
112
#[derive(Eq, PartialEq)]
113
pub(crate) enum MulWideKind {
114
Signed,
115
Unsigned,
116
}
117
118
/// Type of operation for a read-modify-write instruction.
119
pub(crate) enum RmwOp {
120
Add,
121
Sub,
122
Xchg,
123
And,
124
Or,
125
Xor,
126
}
127
128
/// The direction to perform the memory move.
129
#[derive(Debug, Clone, Eq, PartialEq)]
130
pub(crate) enum MemMoveDirection {
131
/// From high memory addresses to low memory addresses.
132
/// Invariant: the source location is closer to the FP than the destination
133
/// location, which will be closer to the SP.
134
HighToLow,
135
/// From low memory addresses to high memory addresses.
136
/// Invariant: the source location is closer to the SP than the destination
137
/// location, which will be closer to the FP.
138
LowToHigh,
139
}
140
141
/// Classifies how to treat float-to-int conversions.
142
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
143
pub(crate) enum TruncKind {
144
/// Saturating conversion. If the source value is greater than the maximum
145
/// value of the destination type, the result is clamped to the
146
/// destination maximum value.
147
Checked,
148
/// An exception is raised if the source value is greater than the maximum
149
/// value of the destination type.
150
Unchecked,
151
}
152
153
impl TruncKind {
154
/// Returns true if the truncation kind is checked.
155
pub(crate) fn is_checked(&self) -> bool {
156
*self == TruncKind::Checked
157
}
158
159
/// Returns `true` if the trunc kind is [`Unchecked`].
160
///
161
/// [`Unchecked`]: TruncKind::Unchecked
162
#[must_use]
163
pub(crate) fn is_unchecked(&self) -> bool {
164
matches!(self, Self::Unchecked)
165
}
166
}
167
168
/// Representation of the stack pointer offset.
169
#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]
170
pub struct SPOffset(u32);
171
172
impl SPOffset {
173
pub fn from_u32(offs: u32) -> Self {
174
Self(offs)
175
}
176
177
pub fn as_u32(&self) -> u32 {
178
self.0
179
}
180
}
181
182
/// A stack slot.
183
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
184
pub struct StackSlot {
185
/// The location of the slot, relative to the stack pointer.
186
pub offset: SPOffset,
187
/// The size of the slot, in bytes.
188
pub size: u32,
189
}
190
191
impl StackSlot {
192
pub fn new(offs: SPOffset, size: u32) -> Self {
193
Self { offset: offs, size }
194
}
195
}
196
197
pub trait ScratchType {
198
/// Derive the register class from the scratch register type.
199
fn reg_class() -> RegClass;
200
}
201
202
/// A scratch register type of integer class.
203
pub struct IntScratch;
204
/// A scratch register type of floating point class.
205
pub struct FloatScratch;
206
207
impl ScratchType for IntScratch {
208
fn reg_class() -> RegClass {
209
RegClass::Int
210
}
211
}
212
213
impl ScratchType for FloatScratch {
214
fn reg_class() -> RegClass {
215
RegClass::Float
216
}
217
}
218
219
/// A scratch register scope.
220
#[derive(Debug, Clone, Copy)]
221
pub struct Scratch(Reg);
222
223
impl Scratch {
224
pub fn new(r: Reg) -> Self {
225
Self(r)
226
}
227
228
#[inline]
229
pub fn inner(&self) -> Reg {
230
self.0
231
}
232
233
#[inline]
234
pub fn writable(&self) -> WritableReg {
235
writable!(self.0)
236
}
237
}
238
239
/// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]
240
/// implementation for each ISA is responsible for emitting the correct
241
/// sequence of instructions when lowering to machine code.
242
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
243
pub(crate) enum IntCmpKind {
244
/// Equal.
245
Eq,
246
/// Not equal.
247
Ne,
248
/// Signed less than.
249
LtS,
250
/// Unsigned less than.
251
LtU,
252
/// Signed greater than.
253
GtS,
254
/// Unsigned greater than.
255
GtU,
256
/// Signed less than or equal.
257
LeS,
258
/// Unsigned less than or equal.
259
LeU,
260
/// Signed greater than or equal.
261
GeS,
262
/// Unsigned greater than or equal.
263
GeU,
264
}
265
266
/// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]
267
/// implementation for each ISA is responsible for emitting the correct
268
/// sequence of instructions when lowering code.
269
#[derive(Debug)]
270
pub(crate) enum FloatCmpKind {
271
/// Equal.
272
Eq,
273
/// Not equal.
274
Ne,
275
/// Less than.
276
Lt,
277
/// Greater than.
278
Gt,
279
/// Less than or equal.
280
Le,
281
/// Greater than or equal.
282
Ge,
283
}
284
285
/// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is
286
/// responsible for emitting the correct sequence of instructions when
287
/// lowering to machine code.
288
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
289
pub(crate) enum ShiftKind {
290
/// Left shift.
291
Shl,
292
/// Signed right shift.
293
ShrS,
294
/// Unsigned right shift.
295
ShrU,
296
/// Left rotate.
297
Rotl,
298
/// Right rotate.
299
Rotr,
300
}
301
302
/// Kinds of extends in WebAssembly. Each MacroAssembler implementation
303
/// is responsible for emitting the correct sequence of instructions when
304
/// lowering to machine code.
305
#[derive(Copy, Clone)]
306
pub(crate) enum ExtendKind {
307
Signed(Extend<Signed>),
308
Unsigned(Extend<Zero>),
309
}
310
311
#[derive(Copy, Clone)]
312
pub(crate) enum Signed {}
313
#[derive(Copy, Clone)]
314
pub(crate) enum Zero {}
315
316
pub(crate) trait ExtendType {}
317
318
impl ExtendType for Signed {}
319
impl ExtendType for Zero {}
320
321
#[derive(Copy, Clone)]
322
pub(crate) enum Extend<T: ExtendType> {
323
/// 8 to 32 bit extend.
324
I32Extend8,
325
/// 16 to 32 bit extend.
326
I32Extend16,
327
/// 8 to 64 bit extend.
328
I64Extend8,
329
/// 16 to 64 bit extend.
330
I64Extend16,
331
/// 32 to 64 bit extend.
332
I64Extend32,
333
334
/// Variant to hold the kind of extend marker.
335
///
336
/// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be
337
/// constructed.
338
__Kind(T),
339
}
340
341
impl From<Extend<Zero>> for ExtendKind {
342
fn from(value: Extend<Zero>) -> Self {
343
ExtendKind::Unsigned(value)
344
}
345
}
346
347
impl<T: ExtendType> Extend<T> {
348
pub fn from_size(&self) -> OperandSize {
349
match self {
350
Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,
351
Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,
352
Extend::I64Extend32 => OperandSize::S32,
353
Extend::__Kind(_) => unreachable!(),
354
}
355
}
356
357
pub fn to_size(&self) -> OperandSize {
358
match self {
359
Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,
360
Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,
361
Extend::__Kind(_) => unreachable!(),
362
}
363
}
364
365
pub fn from_bits(&self) -> u8 {
366
self.from_size().num_bits()
367
}
368
369
pub fn to_bits(&self) -> u8 {
370
self.to_size().num_bits()
371
}
372
}
373
374
impl From<Extend<Signed>> for ExtendKind {
375
fn from(value: Extend<Signed>) -> Self {
376
ExtendKind::Signed(value)
377
}
378
}
379
380
impl ExtendKind {
381
pub fn signed(&self) -> bool {
382
match self {
383
Self::Signed(_) => true,
384
_ => false,
385
}
386
}
387
388
pub fn from_bits(&self) -> u8 {
389
match self {
390
Self::Signed(s) => s.from_bits(),
391
Self::Unsigned(u) => u.from_bits(),
392
}
393
}
394
395
pub fn to_bits(&self) -> u8 {
396
match self {
397
Self::Signed(s) => s.to_bits(),
398
Self::Unsigned(u) => u.to_bits(),
399
}
400
}
401
}
402
403
/// Kinds of vector load and extends in WebAssembly. Each MacroAssembler
404
/// implementation is responsible for emitting the correct sequence of
405
/// instructions when lowering to machine code.
406
#[derive(Copy, Clone)]
407
pub(crate) enum V128LoadExtendKind {
408
/// Sign extends eight 8 bit integers to eight 16 bit lanes.
409
E8x8S,
410
/// Zero extends eight 8 bit integers to eight 16 bit lanes.
411
E8x8U,
412
/// Sign extends four 16 bit integers to four 32 bit lanes.
413
E16x4S,
414
/// Zero extends four 16 bit integers to four 32 bit lanes.
415
E16x4U,
416
/// Sign extends two 32 bit integers to two 64 bit lanes.
417
E32x2S,
418
/// Zero extends two 32 bit integers to two 64 bit lanes.
419
E32x2U,
420
}
421
422
/// Kinds of splat loads supported by WebAssembly.
423
pub(crate) enum SplatLoadKind {
424
/// 8 bits.
425
S8,
426
/// 16 bits.
427
S16,
428
/// 32 bits.
429
S32,
430
/// 64 bits.
431
S64,
432
}
433
434
/// Kinds of splat supported by WebAssembly.
435
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
436
pub(crate) enum SplatKind {
437
/// 8 bit integer.
438
I8x16,
439
/// 16 bit integer.
440
I16x8,
441
/// 32 bit integer.
442
I32x4,
443
/// 64 bit integer.
444
I64x2,
445
/// 32 bit float.
446
F32x4,
447
/// 64 bit float.
448
F64x2,
449
}
450
451
impl SplatKind {
452
/// The lane size to use for different kinds of splats.
453
pub(crate) fn lane_size(&self) -> OperandSize {
454
match self {
455
SplatKind::I8x16 => OperandSize::S8,
456
SplatKind::I16x8 => OperandSize::S16,
457
SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,
458
SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,
459
}
460
}
461
}
462
463
/// Kinds of extract lane supported by WebAssembly.
464
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
465
pub(crate) enum ExtractLaneKind {
466
/// 16 lanes of 8-bit integers sign extended to 32-bits.
467
I8x16S,
468
/// 16 lanes of 8-bit integers zero extended to 32-bits.
469
I8x16U,
470
/// 8 lanes of 16-bit integers sign extended to 32-bits.
471
I16x8S,
472
/// 8 lanes of 16-bit integers zero extended to 32-bits.
473
I16x8U,
474
/// 4 lanes of 32-bit integers.
475
I32x4,
476
/// 2 lanes of 64-bit integers.
477
I64x2,
478
/// 4 lanes of 32-bit floats.
479
F32x4,
480
/// 2 lanes of 64-bit floats.
481
F64x2,
482
}
483
484
impl ExtractLaneKind {
485
/// The lane size to use for different kinds of extract lane kinds.
486
pub(crate) fn lane_size(&self) -> OperandSize {
487
match self {
488
ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,
489
ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,
490
ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,
491
ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,
492
}
493
}
494
}
495
496
impl From<ExtractLaneKind> for Extend<Signed> {
497
fn from(value: ExtractLaneKind) -> Self {
498
match value {
499
ExtractLaneKind::I8x16S => Extend::I32Extend8,
500
ExtractLaneKind::I16x8S => Extend::I32Extend16,
501
_ => unimplemented!(),
502
}
503
}
504
}
505
506
/// Kinds of replace lane supported by WebAssembly.
507
pub(crate) enum ReplaceLaneKind {
508
/// 16 lanes of 8 bit integers.
509
I8x16,
510
/// 8 lanes of 16 bit integers.
511
I16x8,
512
/// 4 lanes of 32 bit integers.
513
I32x4,
514
/// 2 lanes of 64 bit integers.
515
I64x2,
516
/// 4 lanes of 32 bit floats.
517
F32x4,
518
/// 2 lanes of 64 bit floats.
519
F64x2,
520
}
521
522
impl ReplaceLaneKind {
523
/// The lane size to use for different kinds of replace lane kinds.
524
pub(crate) fn lane_size(&self) -> OperandSize {
525
match self {
526
ReplaceLaneKind::I8x16 => OperandSize::S8,
527
ReplaceLaneKind::I16x8 => OperandSize::S16,
528
ReplaceLaneKind::I32x4 => OperandSize::S32,
529
ReplaceLaneKind::I64x2 => OperandSize::S64,
530
ReplaceLaneKind::F32x4 => OperandSize::S32,
531
ReplaceLaneKind::F64x2 => OperandSize::S64,
532
}
533
}
534
}
535
536
/// Kinds of behavior supported by Wasm loads.
537
pub(crate) enum LoadKind {
538
/// Load the entire bytes of the operand size without any modifications.
539
Operand(OperandSize),
540
/// Atomic load, with optional scalar extend.
541
Atomic(OperandSize, Option<ExtendKind>),
542
/// Duplicate value into vector lanes.
543
Splat(SplatLoadKind),
544
/// Scalar (non-vector) extend.
545
ScalarExtend(ExtendKind),
546
/// Vector extend.
547
VectorExtend(V128LoadExtendKind),
548
/// Load content into select lane.
549
VectorLane(LaneSelector),
550
/// Load a single element into the lowest bits of a vector and initialize
551
/// all other bits to zero.
552
VectorZero(OperandSize),
553
}
554
555
impl LoadKind {
556
/// Returns the [`OperandSize`] used in the load operation.
557
pub(crate) fn derive_operand_size(&self) -> OperandSize {
558
match self {
559
Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {
560
Self::operand_size_for_scalar(extend)
561
}
562
Self::VectorExtend(_) => OperandSize::S64,
563
Self::Splat(kind) => Self::operand_size_for_splat(kind),
564
Self::Operand(size)
565
| Self::Atomic(size, None)
566
| Self::VectorLane(LaneSelector { size, .. })
567
| Self::VectorZero(size) => *size,
568
}
569
}
570
571
pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
572
Self::VectorLane(LaneSelector { lane, size })
573
}
574
575
fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {
576
match extend_kind {
577
ExtendKind::Signed(s) => s.from_size(),
578
ExtendKind::Unsigned(u) => u.from_size(),
579
}
580
}
581
582
fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {
583
match kind {
584
SplatLoadKind::S8 => OperandSize::S8,
585
SplatLoadKind::S16 => OperandSize::S16,
586
SplatLoadKind::S32 => OperandSize::S32,
587
SplatLoadKind::S64 => OperandSize::S64,
588
}
589
}
590
591
pub(crate) fn is_atomic(&self) -> bool {
592
matches!(self, Self::Atomic(_, _))
593
}
594
}
595
596
/// Kinds of behavior supported by Wasm loads.
597
#[derive(Copy, Clone)]
598
pub enum StoreKind {
599
/// Store the entire bytes of the operand size without any modifications.
600
Operand(OperandSize),
601
/// Store the entire bytes of the operand size without any modifications, atomically.
602
Atomic(OperandSize),
603
/// Store the content of selected lane.
604
VectorLane(LaneSelector),
605
}
606
607
impl StoreKind {
608
pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
609
Self::VectorLane(LaneSelector { lane, size })
610
}
611
}
612
613
#[derive(Copy, Clone)]
614
pub struct LaneSelector {
615
pub lane: u8,
616
pub size: OperandSize,
617
}
618
619
/// Types of vector integer to float conversions supported by WebAssembly.
620
pub(crate) enum V128ConvertKind {
621
/// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.
622
I32x4S,
623
/// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.
624
I32x4U,
625
/// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit
626
/// floats.
627
I32x4LowS,
628
/// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit
629
/// floats.
630
I32x4LowU,
631
}
632
633
impl V128ConvertKind {
634
pub(crate) fn src_lane_size(&self) -> OperandSize {
635
match self {
636
V128ConvertKind::I32x4S
637
| V128ConvertKind::I32x4U
638
| V128ConvertKind::I32x4LowS
639
| V128ConvertKind::I32x4LowU => OperandSize::S32,
640
}
641
}
642
643
pub(crate) fn dst_lane_size(&self) -> OperandSize {
644
match self {
645
V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,
646
V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,
647
}
648
}
649
}
650
651
/// Kinds of vector narrowing operations supported by WebAssembly.
652
pub(crate) enum V128NarrowKind {
653
/// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
654
/// signed saturation.
655
I16x8S,
656
/// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
657
/// unsigned saturation.
658
I16x8U,
659
/// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
660
/// signed saturation.
661
I32x4S,
662
/// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
663
/// unsigned saturation.
664
I32x4U,
665
}
666
667
impl V128NarrowKind {
668
/// Return the size of the destination lanes.
669
pub(crate) fn dst_lane_size(&self) -> OperandSize {
670
match self {
671
Self::I16x8S | Self::I16x8U => OperandSize::S8,
672
Self::I32x4S | Self::I32x4U => OperandSize::S16,
673
}
674
}
675
}
676
677
/// Kinds of vector extending operations supported by WebAssembly.
678
#[derive(Debug, Copy, Clone)]
679
pub(crate) enum V128ExtendKind {
680
/// Low half of i8x16 sign extended.
681
LowI8x16S,
682
/// High half of i8x16 sign extended.
683
HighI8x16S,
684
/// Low half of i8x16 zero extended.
685
LowI8x16U,
686
/// High half of i8x16 zero extended.
687
HighI8x16U,
688
/// Low half of i16x8 sign extended.
689
LowI16x8S,
690
/// High half of i16x8 sign extended.
691
HighI16x8S,
692
/// Low half of i16x8 zero extended.
693
LowI16x8U,
694
/// High half of i16x8 zero extended.
695
HighI16x8U,
696
/// Low half of i32x4 sign extended.
697
LowI32x4S,
698
/// High half of i32x4 sign extended.
699
HighI32x4S,
700
/// Low half of i32x4 zero extended.
701
LowI32x4U,
702
/// High half of i32x4 zero extended.
703
HighI32x4U,
704
}
705
706
impl V128ExtendKind {
707
/// The size of the source's lanes.
708
pub(crate) fn src_lane_size(&self) -> OperandSize {
709
match self {
710
Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {
711
OperandSize::S8
712
}
713
Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {
714
OperandSize::S16
715
}
716
Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {
717
OperandSize::S32
718
}
719
}
720
}
721
}
722
723
/// Kinds of vector equalities and non-equalities supported by WebAssembly.
724
pub(crate) enum VectorEqualityKind {
725
/// 16 lanes of 8 bit integers.
726
I8x16,
727
/// 8 lanes of 16 bit integers.
728
I16x8,
729
/// 4 lanes of 32 bit integers.
730
I32x4,
731
/// 2 lanes of 64 bit integers.
732
I64x2,
733
/// 4 lanes of 32 bit floats.
734
F32x4,
735
/// 2 lanes of 64 bit floats.
736
F64x2,
737
}
738
739
impl VectorEqualityKind {
740
/// Get the lane size to use.
741
pub(crate) fn lane_size(&self) -> OperandSize {
742
match self {
743
Self::I8x16 => OperandSize::S8,
744
Self::I16x8 => OperandSize::S16,
745
Self::I32x4 | Self::F32x4 => OperandSize::S32,
746
Self::I64x2 | Self::F64x2 => OperandSize::S64,
747
}
748
}
749
}
750
751
/// Kinds of vector comparisons supported by WebAssembly.
752
pub(crate) enum VectorCompareKind {
753
/// 16 lanes of signed 8 bit integers.
754
I8x16S,
755
/// 16 lanes of unsigned 8 bit integers.
756
I8x16U,
757
/// 8 lanes of signed 16 bit integers.
758
I16x8S,
759
/// 8 lanes of unsigned 16 bit integers.
760
I16x8U,
761
/// 4 lanes of signed 32 bit integers.
762
I32x4S,
763
/// 4 lanes of unsigned 32 bit integers.
764
I32x4U,
765
/// 2 lanes of signed 64 bit integers.
766
I64x2S,
767
/// 4 lanes of 32 bit floats.
768
F32x4,
769
/// 2 lanes of 64 bit floats.
770
F64x2,
771
}
772
773
impl VectorCompareKind {
774
/// Get the lane size to use.
775
pub(crate) fn lane_size(&self) -> OperandSize {
776
match self {
777
Self::I8x16S | Self::I8x16U => OperandSize::S8,
778
Self::I16x8S | Self::I16x8U => OperandSize::S16,
779
Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,
780
Self::I64x2S | Self::F64x2 => OperandSize::S64,
781
}
782
}
783
}
784
785
/// Kinds of vector absolute operations supported by WebAssembly.
786
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
787
pub(crate) enum V128AbsKind {
788
/// 8 bit integers.
789
I8x16,
790
/// 16 bit integers.
791
I16x8,
792
/// 32 bit integers.
793
I32x4,
794
/// 64 bit integers.
795
I64x2,
796
/// 32 bit floats.
797
F32x4,
798
/// 64 bit floats.
799
F64x2,
800
}
801
802
impl V128AbsKind {
803
/// The lane size to use.
804
pub(crate) fn lane_size(&self) -> OperandSize {
805
match self {
806
Self::I8x16 => OperandSize::S8,
807
Self::I16x8 => OperandSize::S16,
808
Self::I32x4 | Self::F32x4 => OperandSize::S32,
809
Self::I64x2 | Self::F64x2 => OperandSize::S64,
810
}
811
}
812
}
813
814
/// Kinds of truncation for vectors supported by WebAssembly.
815
pub(crate) enum V128TruncKind {
816
/// Truncates 4 lanes of 32-bit floats to nearest integral value.
817
F32x4,
818
/// Truncates 2 lanes of 64-bit floats to nearest integral value.
819
F64x2,
820
/// Integers from signed F32x4.
821
I32x4FromF32x4S,
822
/// Integers from unsigned F32x4.
823
I32x4FromF32x4U,
824
/// Integers from signed F64x2.
825
I32x4FromF64x2SZero,
826
/// Integers from unsigned F64x2.
827
I32x4FromF64x2UZero,
828
}
829
830
impl V128TruncKind {
831
/// The size of the source lanes.
832
pub(crate) fn src_lane_size(&self) -> OperandSize {
833
match self {
834
V128TruncKind::F32x4
835
| V128TruncKind::I32x4FromF32x4S
836
| V128TruncKind::I32x4FromF32x4U => OperandSize::S32,
837
V128TruncKind::F64x2
838
| V128TruncKind::I32x4FromF64x2SZero
839
| V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,
840
}
841
}
842
843
/// The size of the destination lanes.
844
pub(crate) fn dst_lane_size(&self) -> OperandSize {
845
if let V128TruncKind::F64x2 = self {
846
OperandSize::S64
847
} else {
848
OperandSize::S32
849
}
850
}
851
}
852
853
/// Kinds of vector addition supported by WebAssembly.
854
pub(crate) enum V128AddKind {
855
/// 4 lanes of 32-bit floats wrapping.
856
F32x4,
857
/// 2 lanes of 64-bit floats wrapping.
858
F64x2,
859
/// 16 lanes of 8-bit integers wrapping.
860
I8x16,
861
/// 16 lanes of 8-bit integers signed saturating.
862
I8x16SatS,
863
/// 16 lanes of 8-bit integers unsigned saturating.
864
I8x16SatU,
865
/// 8 lanes of 16-bit integers wrapping.
866
I16x8,
867
/// 8 lanes of 16-bit integers signed saturating.
868
I16x8SatS,
869
/// 8 lanes of 16-bit integers unsigned saturating.
870
I16x8SatU,
871
/// 4 lanes of 32-bit integers wrapping.
872
I32x4,
873
/// 2 lanes of 64-bit integers wrapping.
874
I64x2,
875
}
876
877
/// Kinds of vector subtraction supported by WebAssembly.
878
pub(crate) enum V128SubKind {
879
/// 4 lanes of 32-bit floats wrapping.
880
F32x4,
881
/// 2 lanes of 64-bit floats wrapping.
882
F64x2,
883
/// 16 lanes of 8-bit integers wrapping.
884
I8x16,
885
/// 16 lanes of 8-bit integers signed saturating.
886
I8x16SatS,
887
/// 16 lanes of 8-bit integers unsigned saturating.
888
I8x16SatU,
889
/// 8 lanes of 16-bit integers wrapping.
890
I16x8,
891
/// 8 lanes of 16-bit integers signed saturating.
892
I16x8SatS,
893
/// 8 lanes of 16-bit integers unsigned saturating.
894
I16x8SatU,
895
/// 4 lanes of 32-bit integers wrapping.
896
I32x4,
897
/// 2 lanes of 64-bit integers wrapping.
898
I64x2,
899
}
900
901
impl From<V128NegKind> for V128SubKind {
902
fn from(value: V128NegKind) -> Self {
903
match value {
904
V128NegKind::I8x16 => Self::I8x16,
905
V128NegKind::I16x8 => Self::I16x8,
906
V128NegKind::I32x4 => Self::I32x4,
907
V128NegKind::I64x2 => Self::I64x2,
908
V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),
909
}
910
}
911
}
912
913
/// Kinds of vector multiplication supported by WebAssembly.
914
pub(crate) enum V128MulKind {
915
/// 4 lanes of 32-bit floats.
916
F32x4,
917
/// 2 lanes of 64-bit floats.
918
F64x2,
919
/// 8 lanes of 16-bit integers.
920
I16x8,
921
/// 4 lanes of 32-bit integers.
922
I32x4,
923
/// 2 lanes of 64-bit integers.
924
I64x2,
925
}
926
927
/// Kinds of vector negation supported by WebAssembly.
928
#[derive(Copy, Clone)]
929
pub(crate) enum V128NegKind {
930
/// 4 lanes of 32-bit floats.
931
F32x4,
932
/// 2 lanes of 64-bit floats.
933
F64x2,
934
/// 16 lanes of 8-bit integers.
935
I8x16,
936
/// 8 lanes of 16-bit integers.
937
I16x8,
938
/// 4 lanes of 32-bit integers.
939
I32x4,
940
/// 2 lanes of 64-bit integers.
941
I64x2,
942
}
943
944
impl V128NegKind {
945
/// The size of the lanes.
946
pub(crate) fn lane_size(&self) -> OperandSize {
947
match self {
948
Self::F32x4 | Self::I32x4 => OperandSize::S32,
949
Self::F64x2 | Self::I64x2 => OperandSize::S64,
950
Self::I8x16 => OperandSize::S8,
951
Self::I16x8 => OperandSize::S16,
952
}
953
}
954
}
955
956
/// Kinds of extended pairwise addition supported by WebAssembly.
957
pub(crate) enum V128ExtAddKind {
958
/// 16 lanes of signed 8-bit integers.
959
I8x16S,
960
/// 16 lanes of unsigned 8-bit integers.
961
I8x16U,
962
/// 8 lanes of signed 16-bit integers.
963
I16x8S,
964
/// 8 lanes of unsigned 16-bit integers.
965
I16x8U,
966
}
967
968
/// Kinds of vector extended multiplication supported by WebAssembly.
969
#[derive(Debug, Clone, Copy)]
970
pub(crate) enum V128ExtMulKind {
971
LowI8x16S,
972
HighI8x16S,
973
LowI8x16U,
974
HighI8x16U,
975
LowI16x8S,
976
HighI16x8S,
977
LowI16x8U,
978
HighI16x8U,
979
LowI32x4S,
980
HighI32x4S,
981
LowI32x4U,
982
HighI32x4U,
983
}
984
985
impl From<V128ExtMulKind> for V128ExtendKind {
986
fn from(value: V128ExtMulKind) -> Self {
987
match value {
988
V128ExtMulKind::LowI8x16S => Self::LowI8x16S,
989
V128ExtMulKind::HighI8x16S => Self::HighI8x16S,
990
V128ExtMulKind::LowI8x16U => Self::LowI8x16U,
991
V128ExtMulKind::HighI8x16U => Self::HighI8x16U,
992
V128ExtMulKind::LowI16x8S => Self::LowI16x8S,
993
V128ExtMulKind::HighI16x8S => Self::HighI16x8S,
994
V128ExtMulKind::LowI16x8U => Self::LowI16x8U,
995
V128ExtMulKind::HighI16x8U => Self::HighI16x8U,
996
V128ExtMulKind::LowI32x4S => Self::LowI32x4S,
997
V128ExtMulKind::HighI32x4S => Self::HighI32x4S,
998
V128ExtMulKind::LowI32x4U => Self::LowI32x4U,
999
V128ExtMulKind::HighI32x4U => Self::HighI32x4U,
1000
}
1001
}
1002
}
1003
1004
impl From<V128ExtMulKind> for V128MulKind {
1005
fn from(value: V128ExtMulKind) -> Self {
1006
match value {
1007
V128ExtMulKind::LowI8x16S
1008
| V128ExtMulKind::HighI8x16S
1009
| V128ExtMulKind::LowI8x16U
1010
| V128ExtMulKind::HighI8x16U => Self::I16x8,
1011
V128ExtMulKind::LowI16x8S
1012
| V128ExtMulKind::HighI16x8S
1013
| V128ExtMulKind::LowI16x8U
1014
| V128ExtMulKind::HighI16x8U => Self::I32x4,
1015
V128ExtMulKind::LowI32x4S
1016
| V128ExtMulKind::HighI32x4S
1017
| V128ExtMulKind::LowI32x4U
1018
| V128ExtMulKind::HighI32x4U => Self::I64x2,
1019
}
1020
}
1021
}
1022
1023
/// Operand size, in bits.
1024
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
1025
pub(crate) enum OperandSize {
1026
/// 8 bits.
1027
S8,
1028
/// 16 bits.
1029
S16,
1030
/// 32 bits.
1031
S32,
1032
/// 64 bits.
1033
S64,
1034
/// 128 bits.
1035
S128,
1036
}
1037
1038
impl OperandSize {
1039
/// The number of bits in the operand.
1040
pub fn num_bits(&self) -> u8 {
1041
match self {
1042
OperandSize::S8 => 8,
1043
OperandSize::S16 => 16,
1044
OperandSize::S32 => 32,
1045
OperandSize::S64 => 64,
1046
OperandSize::S128 => 128,
1047
}
1048
}
1049
1050
/// The number of bytes in the operand.
1051
pub fn bytes(&self) -> u32 {
1052
match self {
1053
Self::S8 => 1,
1054
Self::S16 => 2,
1055
Self::S32 => 4,
1056
Self::S64 => 8,
1057
Self::S128 => 16,
1058
}
1059
}
1060
1061
/// The binary logarithm of the number of bits in the operand.
1062
pub fn log2(&self) -> u8 {
1063
match self {
1064
OperandSize::S8 => 3,
1065
OperandSize::S16 => 4,
1066
OperandSize::S32 => 5,
1067
OperandSize::S64 => 6,
1068
OperandSize::S128 => 7,
1069
}
1070
}
1071
1072
/// Create an [`OperandSize`] from the given number of bytes.
1073
pub fn from_bytes(bytes: u8) -> Self {
1074
use OperandSize::*;
1075
match bytes {
1076
4 => S32,
1077
8 => S64,
1078
16 => S128,
1079
_ => panic!("Invalid bytes {bytes} for OperandSize"),
1080
}
1081
}
1082
1083
pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {
1084
match to {
1085
OperandSize::S32 => match self {
1086
OperandSize::S8 => Some(Extend::I32Extend8),
1087
OperandSize::S16 => Some(Extend::I32Extend16),
1088
_ => None,
1089
},
1090
OperandSize::S64 => match self {
1091
OperandSize::S8 => Some(Extend::I64Extend8),
1092
OperandSize::S16 => Some(Extend::I64Extend16),
1093
OperandSize::S32 => Some(Extend::I64Extend32),
1094
_ => None,
1095
},
1096
_ => None,
1097
}
1098
}
1099
1100
/// The number of bits in the mantissa.
1101
///
1102
/// Only implemented for floats.
1103
pub fn mantissa_bits(&self) -> u8 {
1104
match self {
1105
Self::S32 => 8,
1106
Self::S64 => 11,
1107
_ => unimplemented!(),
1108
}
1109
}
1110
}
1111
1112
/// An abstraction over a register or immediate.
1113
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1114
pub(crate) enum RegImm {
1115
/// A register.
1116
Reg(Reg),
1117
/// A tagged immediate argument.
1118
Imm(Imm),
1119
}
1120
1121
/// An tagged representation of an immediate.
1122
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1123
pub(crate) enum Imm {
1124
/// I32 immediate.
1125
I32(u32),
1126
/// I64 immediate.
1127
I64(u64),
1128
/// F32 immediate.
1129
F32(u32),
1130
/// F64 immediate.
1131
F64(u64),
1132
/// V128 immediate.
1133
V128(i128),
1134
}
1135
1136
impl Imm {
1137
/// Create a new I64 immediate.
1138
pub fn i64(val: i64) -> Self {
1139
Self::I64(val as u64)
1140
}
1141
1142
/// Create a new I32 immediate.
1143
pub fn i32(val: i32) -> Self {
1144
Self::I32(val as u32)
1145
}
1146
1147
/// Create a new F32 immediate.
1148
pub fn f32(bits: u32) -> Self {
1149
Self::F32(bits)
1150
}
1151
1152
/// Create a new F64 immediate.
1153
pub fn f64(bits: u64) -> Self {
1154
Self::F64(bits)
1155
}
1156
1157
/// Create a new V128 immediate.
1158
pub fn v128(bits: i128) -> Self {
1159
Self::V128(bits)
1160
}
1161
1162
/// Convert the immediate to i32, if possible.
1163
pub fn to_i32(&self) -> Option<i32> {
1164
match self {
1165
Self::I32(v) => Some(*v as i32),
1166
Self::I64(v) => i32::try_from(*v as i64).ok(),
1167
_ => None,
1168
}
1169
}
1170
1171
/// Unwraps the underlying integer value as u64.
1172
/// # Panics
1173
/// This function panics if the underlying value can't be represented
1174
/// as u64.
1175
pub fn unwrap_as_u64(&self) -> u64 {
1176
match self {
1177
Self::I32(v) => *v as u64,
1178
Self::I64(v) => *v,
1179
Self::F32(v) => *v as u64,
1180
Self::F64(v) => *v,
1181
_ => unreachable!(),
1182
}
1183
}
1184
1185
/// Get the operand size of the immediate.
1186
pub fn size(&self) -> OperandSize {
1187
match self {
1188
Self::I32(_) | Self::F32(_) => OperandSize::S32,
1189
Self::I64(_) | Self::F64(_) => OperandSize::S64,
1190
Self::V128(_) => OperandSize::S128,
1191
}
1192
}
1193
1194
/// Get a little endian representation of the immediate.
1195
///
1196
/// This method heap allocates and is intended to be used when adding
1197
/// values to the constant pool.
1198
pub fn to_bytes(&self) -> Vec<u8> {
1199
match self {
1200
Imm::I32(n) => n.to_le_bytes().to_vec(),
1201
Imm::I64(n) => n.to_le_bytes().to_vec(),
1202
Imm::F32(n) => n.to_le_bytes().to_vec(),
1203
Imm::F64(n) => n.to_le_bytes().to_vec(),
1204
Imm::V128(n) => n.to_le_bytes().to_vec(),
1205
}
1206
}
1207
}
1208
1209
/// The location of the [VMcontext] used for function calls.
1210
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1211
pub(crate) enum VMContextLoc {
1212
/// Dynamic, stored in the given register.
1213
Reg(Reg),
1214
/// The pinned [VMContext] register.
1215
Pinned,
1216
/// A different VMContext is loaded at the provided offset from the current
1217
/// VMContext.
1218
OffsetFromPinned(u32),
1219
}
1220
1221
/// The maximum number of context arguments currently used across the compiler.
1222
pub(crate) const MAX_CONTEXT_ARGS: usize = 2;
1223
1224
/// Out-of-band special purpose arguments used for function call emission.
1225
///
1226
/// We cannot rely on the value stack for these values given that inserting
1227
/// register or memory values at arbitrary locations of the value stack has the
1228
/// potential to break the stack ordering principle, which states that older
1229
/// values must always precede newer values, effectively simulating the order of
1230
/// values in the machine stack.
1231
/// The [ContextArgs] are meant to be resolved at every callsite; in some cases
1232
/// it might be possible to construct it early on, but given that it might
1233
/// contain allocatable registers, it's preferred to construct it in
1234
/// [FnCall::emit].
1235
#[derive(Clone, Debug)]
1236
pub(crate) enum ContextArgs {
1237
/// A single context argument is required; the current pinned [VMcontext]
1238
/// register must be passed as the first argument of the function call.
1239
VMContext([VMContextLoc; 1]),
1240
/// The callee and caller context arguments are required. In this case, the
1241
/// callee context argument is usually stored into an allocatable register
1242
/// and the caller is always the current pinned [VMContext] pointer.
1243
CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),
1244
}
1245
1246
impl ContextArgs {
1247
/// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]
1248
/// register as both the caller and callee context arguments.
1249
pub fn pinned_callee_and_caller_vmctx() -> Self {
1250
Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])
1251
}
1252
1253
/// Construct a [ContextArgs] that declares the usage of the pinned
1254
/// [VMContext] register as the only context argument.
1255
pub fn pinned_vmctx() -> Self {
1256
Self::VMContext([VMContextLoc::Pinned])
1257
}
1258
1259
/// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded
1260
/// indirectly from the pinned [VMContext] register as the only context
1261
/// argument.
1262
pub fn offset_from_pinned_vmctx(offset: u32) -> Self {
1263
Self::VMContext([VMContextLoc::OffsetFromPinned(offset)])
1264
}
1265
1266
/// Construct a [ContextArgs] that declares a dynamic callee context and the
1267
/// pinned [VMContext] register as the context arguments.
1268
pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {
1269
Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])
1270
}
1271
1272
/// Get the length of the [ContextArgs].
1273
pub fn len(&self) -> usize {
1274
self.as_slice().len()
1275
}
1276
1277
/// Get a slice of the context arguments.
1278
pub fn as_slice(&self) -> &[VMContextLoc] {
1279
match self {
1280
Self::VMContext(a) => a.as_slice(),
1281
Self::CalleeAndCallerVMContext(a) => a.as_slice(),
1282
}
1283
}
1284
}
1285
1286
#[derive(Copy, Clone, Debug)]
1287
pub(crate) enum CalleeKind {
1288
/// A function call to a raw address.
1289
Indirect(Reg),
1290
/// A function call to a local function.
1291
Direct(UserExternalNameRef),
1292
}
1293
1294
impl CalleeKind {
1295
/// Creates a callee kind from a register.
1296
pub fn indirect(reg: Reg) -> Self {
1297
Self::Indirect(reg)
1298
}
1299
1300
/// Creates a direct callee kind from a function name.
1301
pub fn direct(name: UserExternalNameRef) -> Self {
1302
Self::Direct(name)
1303
}
1304
}
1305
1306
impl RegImm {
1307
/// Register constructor.
1308
pub fn reg(r: Reg) -> Self {
1309
RegImm::Reg(r)
1310
}
1311
1312
/// I64 immediate constructor.
1313
pub fn i64(val: i64) -> Self {
1314
RegImm::Imm(Imm::i64(val))
1315
}
1316
1317
/// I32 immediate constructor.
1318
pub fn i32(val: i32) -> Self {
1319
RegImm::Imm(Imm::i32(val))
1320
}
1321
1322
/// F32 immediate, stored using its bits representation.
1323
pub fn f32(bits: u32) -> Self {
1324
RegImm::Imm(Imm::f32(bits))
1325
}
1326
1327
/// F64 immediate, stored using its bits representation.
1328
pub fn f64(bits: u64) -> Self {
1329
RegImm::Imm(Imm::f64(bits))
1330
}
1331
1332
/// V128 immediate.
1333
pub fn v128(bits: i128) -> Self {
1334
RegImm::Imm(Imm::v128(bits))
1335
}
1336
}
1337
1338
impl From<Reg> for RegImm {
1339
fn from(r: Reg) -> Self {
1340
Self::Reg(r)
1341
}
1342
}
1343
1344
#[derive(Debug)]
1345
pub enum RoundingMode {
1346
Nearest,
1347
Up,
1348
Down,
1349
Zero,
1350
}
1351
1352
/// Memory flags for trusted loads/stores.
1353
pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();
1354
1355
/// Flags used for WebAssembly loads / stores.
1356
/// Untrusted by default so we don't set `no_trap`.
1357
/// We also ensure that the endianness is the right one for WebAssembly.
1358
pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);
1359
1360
/// Generic MacroAssembler interface used by the code generation.
1361
///
1362
/// The MacroAssembler trait aims to expose an interface, high-level enough,
1363
/// so that each ISA can provide its own lowering to machine code. For example,
1364
/// for WebAssembly operators that don't have a direct mapping to a machine
1365
/// a instruction, the interface defines a signature matching the WebAssembly
1366
/// operator, allowing each implementation to lower such operator entirely.
1367
/// This approach attributes more responsibility to the MacroAssembler, but frees
1368
/// the caller from concerning about assembling the right sequence of
1369
/// instructions at the operator callsite.
1370
///
1371
/// The interface defaults to a three-argument form for binary operations;
1372
/// this allows a natural mapping to instructions for RISC architectures,
1373
/// that use three-argument form.
1374
/// This approach allows for a more general interface that can be restricted
1375
/// where needed, in the case of architectures that use a two-argument form.
1376
1377
pub(crate) trait MacroAssembler {
1378
/// The addressing mode.
1379
type Address: Copy + Debug;
1380
1381
/// The pointer representation of the target ISA,
1382
/// used to access information from [`VMOffsets`].
1383
type Ptr: PtrSize;
1384
1385
/// The ABI details of the target.
1386
type ABI: abi::ABI;
1387
1388
/// Emit the function prologue.
1389
fn prologue(&mut self, vmctx: Reg) -> Result<()> {
1390
self.frame_setup()?;
1391
self.check_stack(vmctx)
1392
}
1393
1394
/// Generate the frame setup sequence.
1395
fn frame_setup(&mut self) -> Result<()>;
1396
1397
/// Generate the frame restore sequence.
1398
fn frame_restore(&mut self) -> Result<()>;
1399
1400
/// Emit a stack check.
1401
fn check_stack(&mut self, vmctx: Reg) -> Result<()>;
1402
1403
/// Emit the function epilogue.
1404
fn epilogue(&mut self) -> Result<()> {
1405
self.frame_restore()
1406
}
1407
1408
/// Reserve stack space.
1409
fn reserve_stack(&mut self, bytes: u32) -> Result<()>;
1410
1411
/// Free stack space.
1412
fn free_stack(&mut self, bytes: u32) -> Result<()>;
1413
1414
/// Reset the stack pointer to the given offset;
1415
///
1416
/// Used to reset the stack pointer to a given offset
1417
/// when dealing with unreachable code.
1418
fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;
1419
1420
/// Get the address of a local slot.
1421
fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;
1422
1423
/// Constructs an address with an offset that is relative to the
1424
/// current position of the stack pointer (e.g. [sp + (sp_offset -
1425
/// offset)].
1426
fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1427
1428
/// Constructs an address with an offset that is absolute to the
1429
/// current position of the stack pointer (e.g. [sp + offset].
1430
fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1431
1432
/// Alias for [`Self::address_at_reg`] using the VMContext register as
1433
/// a base. The VMContext register is derived from the ABI type that is
1434
/// associated to the MacroAssembler.
1435
fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;
1436
1437
/// Construct an address that is absolute to the current position
1438
/// of the given register.
1439
fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;
1440
1441
/// Emit a function call to either a local or external function.
1442
fn call(
1443
&mut self,
1444
stack_args_size: u32,
1445
f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
1446
) -> Result<u32>;
1447
1448
/// Acquire a scratch register and execute the given callback.
1449
fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R;
1450
1451
/// Convenience wrapper over [`Self::with_scratch`], derives the register class
1452
/// for a particular Wasm value type.
1453
fn with_scratch_for<R>(
1454
&mut self,
1455
ty: WasmValType,
1456
f: impl FnOnce(&mut Self, Scratch) -> R,
1457
) -> R {
1458
match ty {
1459
WasmValType::I32
1460
| WasmValType::I64
1461
| WasmValType::Ref(WasmRefType {
1462
heap_type: WasmHeapType::Func,
1463
..
1464
}) => self.with_scratch::<IntScratch, _>(f),
1465
WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => {
1466
self.with_scratch::<FloatScratch, _>(f)
1467
}
1468
_ => unimplemented!(),
1469
}
1470
}
1471
1472
/// Get stack pointer offset.
1473
fn sp_offset(&self) -> Result<SPOffset>;
1474
1475
/// Perform a stack store.
1476
fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;
1477
1478
/// Alias for `MacroAssembler::store` with the operand size corresponding
1479
/// to the pointer size of the target.
1480
fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;
1481
1482
/// Perform a WebAssembly store.
1483
/// A WebAssembly store introduces several additional invariants compared to
1484
/// [Self::store], more precisely, it can implicitly trap, in certain
1485
/// circumstances, even if explicit bounds checks are elided, in that sense,
1486
/// we consider this type of load as untrusted. It can also differ with
1487
/// regards to the endianness depending on the target ISA. For this reason,
1488
/// [Self::wasm_store], should be explicitly used when emitting WebAssembly
1489
/// stores.
1490
fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;
1491
1492
/// Perform a zero-extended stack load.
1493
fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;
1494
1495
/// Perform a WebAssembly load.
1496
/// A WebAssembly load introduces several additional invariants compared to
1497
/// [Self::load], more precisely, it can implicitly trap, in certain
1498
/// circumstances, even if explicit bounds checks are elided, in that sense,
1499
/// we consider this type of load as untrusted. It can also differ with
1500
/// regards to the endianness depending on the target ISA. For this reason,
1501
/// [Self::wasm_load], should be explicitly used when emitting WebAssembly
1502
/// loads.
1503
fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;
1504
1505
/// Alias for `MacroAssembler::load` with the operand size corresponding
1506
/// to the pointer size of the target.
1507
fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;
1508
1509
/// Computes the effective address and stores the result in the destination
1510
/// register.
1511
fn compute_addr(
1512
&mut self,
1513
_src: Self::Address,
1514
_dst: WritableReg,
1515
_size: OperandSize,
1516
) -> Result<()>;
1517
1518
/// Pop a value from the machine stack into the given register.
1519
fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1520
1521
/// Perform a move.
1522
fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;
1523
1524
/// Perform a conditional move.
1525
fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)
1526
-> Result<()>;
1527
1528
/// Performs a memory move of bytes from src to dest.
1529
/// Bytes are moved in blocks of 8 bytes, where possible.
1530
fn memmove(
1531
&mut self,
1532
src: SPOffset,
1533
dst: SPOffset,
1534
bytes: u32,
1535
direction: MemMoveDirection,
1536
) -> Result<()> {
1537
match direction {
1538
MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),
1539
MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),
1540
}
1541
// At least 4 byte aligned.
1542
debug_assert!(bytes % 4 == 0);
1543
let mut remaining = bytes;
1544
let word_bytes = <Self::ABI as abi::ABI>::word_bytes();
1545
1546
let word_bytes = word_bytes as u32;
1547
1548
let mut dst_offs;
1549
let mut src_offs;
1550
match direction {
1551
MemMoveDirection::LowToHigh => {
1552
dst_offs = dst.as_u32() - bytes;
1553
src_offs = src.as_u32() - bytes;
1554
self.with_scratch::<IntScratch, _>(|masm, scratch| {
1555
while remaining >= word_bytes {
1556
remaining -= word_bytes;
1557
dst_offs += word_bytes;
1558
src_offs += word_bytes;
1559
1560
masm.load_ptr(
1561
masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1562
scratch.writable(),
1563
)?;
1564
masm.store_ptr(
1565
scratch.inner(),
1566
masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1567
)?;
1568
}
1569
wasmtime_environ::error::Ok(())
1570
})?;
1571
}
1572
MemMoveDirection::HighToLow => {
1573
// Go from the end to the beginning to handle overlapping addresses.
1574
src_offs = src.as_u32();
1575
dst_offs = dst.as_u32();
1576
self.with_scratch::<IntScratch, _>(|masm, scratch| {
1577
while remaining >= word_bytes {
1578
masm.load_ptr(
1579
masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1580
scratch.writable(),
1581
)?;
1582
masm.store_ptr(
1583
scratch.inner(),
1584
masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1585
)?;
1586
1587
remaining -= word_bytes;
1588
src_offs -= word_bytes;
1589
dst_offs -= word_bytes;
1590
}
1591
wasmtime_environ::error::Ok(())
1592
})?;
1593
}
1594
}
1595
1596
if remaining > 0 {
1597
let half_word = word_bytes / 2;
1598
let ptr_size = OperandSize::from_bytes(half_word as u8);
1599
debug_assert!(remaining == half_word);
1600
// Need to move the offsets ahead in the `LowToHigh` case to
1601
// compensate for the initial subtraction of `bytes`.
1602
if direction == MemMoveDirection::LowToHigh {
1603
dst_offs += half_word;
1604
src_offs += half_word;
1605
}
1606
1607
self.with_scratch::<IntScratch, _>(|masm, scratch| {
1608
masm.load(
1609
masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1610
scratch.writable(),
1611
ptr_size,
1612
)?;
1613
masm.store(
1614
scratch.inner().into(),
1615
masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1616
ptr_size,
1617
)?;
1618
wasmtime_environ::error::Ok(())
1619
})?;
1620
}
1621
Ok(())
1622
}
1623
1624
/// Perform add operation.
1625
fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1626
1627
/// Perform a checked unsigned integer addition, emitting the provided trap
1628
/// if the addition overflows.
1629
fn checked_uadd(
1630
&mut self,
1631
dst: WritableReg,
1632
lhs: Reg,
1633
rhs: RegImm,
1634
size: OperandSize,
1635
trap: TrapCode,
1636
) -> Result<()>;
1637
1638
/// Perform subtraction operation.
1639
fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1640
1641
/// Perform multiplication operation.
1642
fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1643
1644
/// Perform a floating point add operation.
1645
fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1646
1647
/// Perform a floating point subtraction operation.
1648
fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1649
1650
/// Perform a floating point multiply operation.
1651
fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1652
1653
/// Perform a floating point divide operation.
1654
fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1655
1656
/// Perform a floating point minimum operation. In x86, this will emit
1657
/// multiple instructions.
1658
fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1659
1660
/// Perform a floating point maximum operation. In x86, this will emit
1661
/// multiple instructions.
1662
fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1663
1664
/// Perform a floating point copysign operation. In x86, this will emit
1665
/// multiple instructions.
1666
fn float_copysign(
1667
&mut self,
1668
dst: WritableReg,
1669
lhs: Reg,
1670
rhs: Reg,
1671
size: OperandSize,
1672
) -> Result<()>;
1673
1674
/// Perform a floating point abs operation.
1675
fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1676
1677
/// Perform a floating point negation operation.
1678
fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1679
1680
/// Perform a floating point floor operation.
1681
fn float_round<
1682
F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
1683
>(
1684
&mut self,
1685
mode: RoundingMode,
1686
env: &mut FuncEnv<Self::Ptr>,
1687
context: &mut CodeGenContext<Emission>,
1688
size: OperandSize,
1689
fallback: F,
1690
) -> Result<()>;
1691
1692
/// Perform a floating point square root operation.
1693
fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1694
1695
/// Perform logical and operation.
1696
fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1697
1698
/// Perform logical or operation.
1699
fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1700
1701
/// Perform logical exclusive or operation.
1702
fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1703
1704
/// Perform a shift operation between a register and an immediate.
1705
fn shift_ir(
1706
&mut self,
1707
dst: WritableReg,
1708
imm: Imm,
1709
lhs: Reg,
1710
kind: ShiftKind,
1711
size: OperandSize,
1712
) -> Result<()>;
1713
1714
/// Perform a shift operation between two registers.
1715
/// This case is special in that some architectures have specific expectations
1716
/// regarding the location of the instruction arguments. To free the
1717
/// caller from having to deal with the architecture specific constraints
1718
/// we give this function access to the code generation context, allowing
1719
/// each implementation to decide the lowering path.
1720
fn shift(
1721
&mut self,
1722
context: &mut CodeGenContext<Emission>,
1723
kind: ShiftKind,
1724
size: OperandSize,
1725
) -> Result<()>;
1726
1727
/// Perform division operation.
1728
/// Division is special in that some architectures have specific
1729
/// expectations regarding the location of the instruction
1730
/// arguments and regarding the location of the quotient /
1731
/// remainder. To free the caller from having to deal with the
1732
/// architecture specific constraints we give this function access
1733
/// to the code generation context, allowing each implementation
1734
/// to decide the lowering path. For cases in which division is a
1735
/// unconstrained binary operation, the caller can decide to use
1736
/// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`
1737
/// functions.
1738
fn div(
1739
&mut self,
1740
context: &mut CodeGenContext<Emission>,
1741
kind: DivKind,
1742
size: OperandSize,
1743
) -> Result<()>;
1744
1745
/// Calculate remainder.
1746
fn rem(
1747
&mut self,
1748
context: &mut CodeGenContext<Emission>,
1749
kind: RemKind,
1750
size: OperandSize,
1751
) -> Result<()>;
1752
1753
/// Compares `src1` against `src2` for the side effect of setting processor
1754
/// flags.
1755
///
1756
/// Note that `src1` is the left-hand-side of the comparison and `src2` is
1757
/// the right-hand-side, so if testing `a < b` then `src1 == a` and
1758
/// `src2 == b`
1759
fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;
1760
1761
/// Compare src and dst and put the result in dst.
1762
/// This function will potentially emit a series of instructions.
1763
///
1764
/// The initial value in `dst` is the left-hand-side of the comparison and
1765
/// the initial value in `src` is the right-hand-side of the comparison.
1766
/// That means for `a < b` then `dst == a` and `src == b`.
1767
fn cmp_with_set(
1768
&mut self,
1769
dst: WritableReg,
1770
src: RegImm,
1771
kind: IntCmpKind,
1772
size: OperandSize,
1773
) -> Result<()>;
1774
1775
/// Compare floats in src1 and src2 and put the result in dst.
1776
/// In x86, this will emit multiple instructions.
1777
fn float_cmp_with_set(
1778
&mut self,
1779
dst: WritableReg,
1780
src1: Reg,
1781
src2: Reg,
1782
kind: FloatCmpKind,
1783
size: OperandSize,
1784
) -> Result<()>;
1785
1786
/// Count the number of leading zeroes in src and put the result in dst.
1787
/// In x64, this will emit multiple instructions if the `has_lzcnt` flag is
1788
/// false.
1789
fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1790
1791
/// Count the number of trailing zeroes in src and put the result in dst.masm
1792
/// In x64, this will emit multiple instructions if the `has_tzcnt` flag is
1793
/// false.
1794
fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1795
1796
/// Push the register to the stack, returning the stack slot metadata.
1797
// NB
1798
// The stack alignment should not be assumed after any call to `push`,
1799
// unless explicitly aligned otherwise. Typically, stack alignment is
1800
// maintained at call sites and during the execution of
1801
// epilogues.
1802
fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;
1803
1804
/// Finalize the assembly and return the result.
1805
fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;
1806
1807
/// Zero a particular register.
1808
fn zero(&mut self, reg: WritableReg) -> Result<()>;
1809
1810
/// Count the number of 1 bits in src and put the result in dst. In x64,
1811
/// this will emit multiple instructions if the `has_popcnt` flag is false.
1812
fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;
1813
1814
/// Converts an i64 to an i32 by discarding the high 32 bits.
1815
fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1816
1817
/// Extends an integer of a given size to a larger size.
1818
fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;
1819
1820
/// Emits one or more instructions to perform a signed truncation of a
1821
/// float into an integer.
1822
fn signed_truncate(
1823
&mut self,
1824
dst: WritableReg,
1825
src: Reg,
1826
src_size: OperandSize,
1827
dst_size: OperandSize,
1828
kind: TruncKind,
1829
) -> Result<()>;
1830
1831
/// Emits one or more instructions to perform an unsigned truncation of a
1832
/// float into an integer.
1833
fn unsigned_truncate(
1834
&mut self,
1835
context: &mut CodeGenContext<Emission>,
1836
src_size: OperandSize,
1837
dst_size: OperandSize,
1838
kind: TruncKind,
1839
) -> Result<()>;
1840
1841
/// Emits one or more instructions to perform a signed convert of an
1842
/// integer into a float.
1843
fn signed_convert(
1844
&mut self,
1845
dst: WritableReg,
1846
src: Reg,
1847
src_size: OperandSize,
1848
dst_size: OperandSize,
1849
) -> Result<()>;
1850
1851
/// Emits one or more instructions to perform an unsigned convert of an
1852
/// integer into a float.
1853
fn unsigned_convert(
1854
&mut self,
1855
dst: WritableReg,
1856
src: Reg,
1857
tmp_gpr: Reg,
1858
src_size: OperandSize,
1859
dst_size: OperandSize,
1860
) -> Result<()>;
1861
1862
/// Reinterpret a float as an integer.
1863
fn reinterpret_float_as_int(
1864
&mut self,
1865
dst: WritableReg,
1866
src: Reg,
1867
size: OperandSize,
1868
) -> Result<()>;
1869
1870
/// Reinterpret an integer as a float.
1871
fn reinterpret_int_as_float(
1872
&mut self,
1873
dst: WritableReg,
1874
src: Reg,
1875
size: OperandSize,
1876
) -> Result<()>;
1877
1878
/// Demote an f64 to an f32.
1879
fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1880
1881
/// Promote an f32 to an f64.
1882
fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1883
1884
/// Zero a given memory range.
1885
///
1886
/// The default implementation divides the given memory range
1887
/// into word-sized slots. Then it unrolls a series of store
1888
/// instructions, effectively assigning zero to each slot.
1889
fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {
1890
let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;
1891
if mem.is_empty() {
1892
return Ok(());
1893
}
1894
1895
let start = if mem.start % word_size == 0 {
1896
mem.start
1897
} else {
1898
// Ensure that the start of the range is at least 4-byte aligned.
1899
assert!(mem.start % 4 == 0);
1900
let start = align_to(mem.start, word_size);
1901
let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;
1902
self.store(RegImm::i32(0), addr, OperandSize::S32)?;
1903
// Ensure that the new start of the range, is word-size aligned.
1904
assert!(start % word_size == 0);
1905
start
1906
};
1907
1908
let end = align_to(mem.end, word_size);
1909
let slots = (end - start) / word_size;
1910
1911
if slots == 1 {
1912
let slot = LocalSlot::i64(start + word_size);
1913
let addr: Self::Address = self.local_address(&slot)?;
1914
self.store(RegImm::i64(0), addr, OperandSize::S64)?;
1915
} else {
1916
// TODO
1917
// Add an upper bound to this generation;
1918
// given a considerably large amount of slots
1919
// this will be inefficient.
1920
self.with_scratch::<IntScratch, _>(|masm, scratch| {
1921
masm.zero(scratch.writable())?;
1922
let zero = RegImm::reg(scratch.inner());
1923
1924
for step in (start..end).step_by(word_size as usize) {
1925
let slot = LocalSlot::i64(step + word_size);
1926
let addr: Self::Address = masm.local_address(&slot)?;
1927
masm.store(zero, addr, OperandSize::S64)?;
1928
}
1929
wasmtime_environ::error::Ok(())
1930
})?;
1931
}
1932
1933
Ok(())
1934
}
1935
1936
/// Generate a label.
1937
fn get_label(&mut self) -> Result<MachLabel>;
1938
1939
/// Bind the given label at the current code offset.
1940
fn bind(&mut self, label: MachLabel) -> Result<()>;
1941
1942
/// Conditional branch.
1943
///
1944
/// Performs a comparison between the two operands,
1945
/// and immediately after emits a jump to the given
1946
/// label destination if the condition is met.
1947
fn branch(
1948
&mut self,
1949
kind: IntCmpKind,
1950
lhs: Reg,
1951
rhs: RegImm,
1952
taken: MachLabel,
1953
size: OperandSize,
1954
) -> Result<()>;
1955
1956
/// Emits and unconditional jump to the given label.
1957
fn jmp(&mut self, target: MachLabel) -> Result<()>;
1958
1959
/// Emits a jump table sequence. The default label is specified as
1960
/// the last element of the targets slice.
1961
fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;
1962
1963
/// Emit an unreachable code trap.
1964
fn unreachable(&mut self) -> Result<()>;
1965
1966
/// Emit an unconditional trap.
1967
fn trap(&mut self, code: TrapCode) -> Result<()>;
1968
1969
/// Traps if the condition code is met.
1970
fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;
1971
1972
/// Trap if the source register is zero.
1973
fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;
1974
1975
/// Ensures that the stack pointer is correctly positioned before an unconditional
1976
/// jump according to the requirements of the destination target.
1977
fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {
1978
let bytes = self
1979
.sp_offset()?
1980
.as_u32()
1981
.checked_sub(target.as_u32())
1982
.unwrap_or(0);
1983
1984
if bytes > 0 {
1985
self.free_stack(bytes)?;
1986
}
1987
1988
Ok(())
1989
}
1990
1991
/// Mark the start of a source location returning the machine code offset
1992
/// and the relative source code location.
1993
fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;
1994
1995
/// Mark the end of a source location.
1996
fn end_source_loc(&mut self) -> Result<()>;
1997
1998
/// The current offset, in bytes from the beginning of the function.
1999
fn current_code_offset(&self) -> Result<CodeOffset>;
2000
2001
/// Performs a 128-bit addition
2002
fn add128(
2003
&mut self,
2004
dst_lo: WritableReg,
2005
dst_hi: WritableReg,
2006
lhs_lo: Reg,
2007
lhs_hi: Reg,
2008
rhs_lo: Reg,
2009
rhs_hi: Reg,
2010
) -> Result<()>;
2011
2012
/// Performs a 128-bit subtraction
2013
fn sub128(
2014
&mut self,
2015
dst_lo: WritableReg,
2016
dst_hi: WritableReg,
2017
lhs_lo: Reg,
2018
lhs_hi: Reg,
2019
rhs_lo: Reg,
2020
rhs_hi: Reg,
2021
) -> Result<()>;
2022
2023
/// Performs a widening multiplication from two 64-bit operands into a
2024
/// 128-bit result.
2025
///
2026
/// Note that some platforms require special handling of registers in this
2027
/// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
2028
fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
2029
-> Result<()>;
2030
2031
/// Takes the value in a src operand and replicates it across lanes of
2032
/// `size` in a destination result.
2033
fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;
2034
2035
/// Performs a shuffle between two 128-bit vectors into a 128-bit result
2036
/// using lanes as a mask to select which indexes to copy.
2037
fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;
2038
2039
/// Performs a swizzle between two 128-bit vectors into a 128-bit result.
2040
fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;
2041
2042
/// Performs the RMW `op` operation on the passed `addr`.
2043
///
2044
/// The value *before* the operation was performed is written back to the `operand` register.
2045
fn atomic_rmw(
2046
&mut self,
2047
context: &mut CodeGenContext<Emission>,
2048
addr: Self::Address,
2049
size: OperandSize,
2050
op: RmwOp,
2051
flags: MemFlags,
2052
extend: Option<Extend<Zero>>,
2053
) -> Result<()>;
2054
2055
/// Extracts the scalar value from `src` in `lane` to `dst`.
2056
fn extract_lane(
2057
&mut self,
2058
src: Reg,
2059
dst: WritableReg,
2060
lane: u8,
2061
kind: ExtractLaneKind,
2062
) -> Result<()>;
2063
2064
/// Replaces the value in `lane` in `dst` with the value in `src`.
2065
fn replace_lane(
2066
&mut self,
2067
src: RegImm,
2068
dst: WritableReg,
2069
lane: u8,
2070
kind: ReplaceLaneKind,
2071
) -> Result<()>;
2072
2073
/// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`
2074
/// and `replacement` (at the top of the context's stack).
2075
///
2076
/// This method takes the `CodeGenContext` as an arguments to accommodate architectures that
2077
/// expect parameters in specific registers. The context stack contains the `replacement`,
2078
/// and `expected` values in that order. The implementer is expected to push the value at
2079
/// `addr` before the update to the context's stack before returning.
2080
fn atomic_cas(
2081
&mut self,
2082
context: &mut CodeGenContext<Emission>,
2083
addr: Self::Address,
2084
size: OperandSize,
2085
flags: MemFlags,
2086
extend: Option<Extend<Zero>>,
2087
) -> Result<()>;
2088
2089
/// Compares vector registers `lhs` and `rhs` for equality and puts the
2090
/// vector of results in `dst`.
2091
fn v128_eq(
2092
&mut self,
2093
dst: WritableReg,
2094
lhs: Reg,
2095
rhs: Reg,
2096
kind: VectorEqualityKind,
2097
) -> Result<()>;
2098
2099
/// Compares vector registers `lhs` and `rhs` for inequality and puts the
2100
/// vector of results in `dst`.
2101
fn v128_ne(
2102
&mut self,
2103
dst: WritableReg,
2104
lhs: Reg,
2105
rhs: Reg,
2106
kind: VectorEqualityKind,
2107
) -> Result<()>;
2108
2109
/// Performs a less than comparison with vector registers `lhs` and `rhs`
2110
/// and puts the vector of results in `dst`.
2111
fn v128_lt(
2112
&mut self,
2113
dst: WritableReg,
2114
lhs: Reg,
2115
rhs: Reg,
2116
kind: VectorCompareKind,
2117
) -> Result<()>;
2118
2119
/// Performs a less than or equal comparison with vector registers `lhs`
2120
/// and `rhs` and puts the vector of results in `dst`.
2121
fn v128_le(
2122
&mut self,
2123
dst: WritableReg,
2124
lhs: Reg,
2125
rhs: Reg,
2126
kind: VectorCompareKind,
2127
) -> Result<()>;
2128
2129
/// Performs a greater than comparison with vector registers `lhs` and
2130
/// `rhs` and puts the vector of results in `dst`.
2131
fn v128_gt(
2132
&mut self,
2133
dst: WritableReg,
2134
lhs: Reg,
2135
rhs: Reg,
2136
kind: VectorCompareKind,
2137
) -> Result<()>;
2138
2139
/// Performs a greater than or equal comparison with vector registers `lhs`
2140
/// and `rhs` and puts the vector of results in `dst`.
2141
fn v128_ge(
2142
&mut self,
2143
dst: WritableReg,
2144
lhs: Reg,
2145
rhs: Reg,
2146
kind: VectorCompareKind,
2147
) -> Result<()>;
2148
2149
/// Emit a memory fence.
2150
fn fence(&mut self) -> Result<()>;
2151
2152
/// Perform a logical `not` operation on the 128bits vector value in `dst`.
2153
fn v128_not(&mut self, dst: WritableReg) -> Result<()>;
2154
2155
/// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing
2156
/// the result to `dst`.
2157
fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2158
2159
/// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing
2160
/// the result to `dst`.
2161
///
2162
/// `and_not` is not commutative: dst = !src1 & src2.
2163
fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2164
2165
/// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing
2166
/// the result to `dst`.
2167
fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2168
2169
/// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing
2170
/// the result to `dst`.
2171
fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2172
2173
/// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits
2174
/// from `src1` when mask is 1, and from `src2` when mask is 0.
2175
///
2176
/// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.
2177
fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;
2178
2179
/// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.
2180
fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2181
2182
/// Convert vector of integers to vector of floating points.
2183
fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;
2184
2185
/// Convert two input vectors into a smaller lane vector by narrowing each
2186
/// lane.
2187
fn v128_narrow(
2188
&mut self,
2189
src1: Reg,
2190
src2: Reg,
2191
dst: WritableReg,
2192
kind: V128NarrowKind,
2193
) -> Result<()>;
2194
2195
/// Converts a vector containing two 64-bit floating point lanes to two
2196
/// 32-bit floating point lanes and setting the two higher lanes to 0.
2197
fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2198
2199
/// Converts a vector containing four 32-bit floating point lanes to two
2200
/// 64-bit floating point lanes. Only the two lower lanes are converted.
2201
fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2202
2203
/// Converts low or high half of the smaller lane vector to a larger lane
2204
/// vector.
2205
fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;
2206
2207
/// Perform a vector add between `lsh` and `rhs`, placing the result in
2208
/// `dst`.
2209
fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;
2210
2211
/// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.
2212
fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;
2213
2214
/// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.
2215
fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)
2216
-> Result<()>;
2217
2218
/// Perform an absolute operation on a vector.
2219
fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;
2220
2221
/// Vectorized negate of the content of `op`.
2222
fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;
2223
2224
/// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit
2225
/// integer at the top of the stack, on the 128-bit vector specified by the second value
2226
/// from the top of the stack, interpreted as packed integers of size `lane_width`.
2227
///
2228
/// The shift amount is taken modulo `lane_width`.
2229
fn v128_shift(
2230
&mut self,
2231
context: &mut CodeGenContext<Emission>,
2232
lane_width: OperandSize,
2233
kind: ShiftKind,
2234
) -> Result<()>;
2235
2236
/// Perform a saturating integer q-format rounding multiplication.
2237
fn v128_q15mulr_sat_s(
2238
&mut self,
2239
lhs: Reg,
2240
rhs: Reg,
2241
dst: WritableReg,
2242
size: OperandSize,
2243
) -> Result<()>;
2244
2245
/// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0
2246
/// otherwise.
2247
fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2248
2249
/// Extracts the high bit of each lane in `src` and produces a scalar mask
2250
/// with all bits concatenated in `dst`.
2251
fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2252
2253
/// Lanewise truncation operation.
2254
///
2255
/// If using an integer kind of truncation, then this performs a lane-wise
2256
/// saturating conversion from float to integer using the IEEE
2257
/// `convertToIntegerTowardZero` function. If any input lane is NaN, the
2258
/// resulting lane is 0. If the rounded integer value of a lane is outside
2259
/// the range of the destination type, the result is saturated to the
2260
/// nearest representable integer value.
2261
fn v128_trunc(
2262
&mut self,
2263
context: &mut CodeGenContext<Emission>,
2264
kind: V128TruncKind,
2265
) -> Result<()>;
2266
2267
/// Perform a lane-wise `min` operation between `src1` and `src2`.
2268
fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)
2269
-> Result<()>;
2270
2271
/// Perform a lane-wise `max` operation between `src1` and `src2`.
2272
fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)
2273
-> Result<()>;
2274
2275
/// Perform the lane-wise integer extended multiplication producing twice wider result than the
2276
/// inputs. This is equivalent to an extend followed by a multiply.
2277
///
2278
/// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,
2279
/// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower
2280
/// 8bits of the 16bits lanes.
2281
fn v128_extmul(
2282
&mut self,
2283
context: &mut CodeGenContext<Emission>,
2284
kind: V128ExtMulKind,
2285
) -> Result<()>;
2286
2287
/// Perform the lane-wise integer extended pairwise addition producing extended results (twice
2288
/// wider results than the inputs).
2289
fn v128_extadd_pairwise(
2290
&mut self,
2291
src: Reg,
2292
dst: WritableReg,
2293
kind: V128ExtAddKind,
2294
) -> Result<()>;
2295
2296
/// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
2297
/// adjacent pairs of the 32-bit results.
2298
fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
2299
2300
/// Count the number of bits set in each lane.
2301
fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;
2302
2303
/// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`
2304
/// and put the results in `dst`.
2305
fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2306
2307
/// Lane-wise IEEE division on vectors of floats.
2308
fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2309
2310
/// Lane-wise IEEE square root of vector of floats.
2311
fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2312
2313
/// Lane-wise ceiling of vector of floats.
2314
fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2315
2316
/// Lane-wise flooring of vector of floats.
2317
fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2318
2319
/// Lane-wise rounding to nearest integer for vector of floats.
2320
fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2321
2322
/// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.
2323
fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2324
2325
/// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.
2326
fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2327
}
2328
2329