Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/winch/codegen/src/masm.rs
1692 views
1
use crate::abi::{self, LocalSlot, align_to};
2
use crate::codegen::{CodeGenContext, Emission, FuncEnv};
3
use crate::isa::{
4
CallingConvention,
5
reg::{Reg, RegClass, WritableReg, writable},
6
};
7
use anyhow::Result;
8
use cranelift_codegen::{
9
Final, MachBufferFinalized, MachLabel,
10
binemit::CodeOffset,
11
ir::{Endianness, MemFlags, RelSourceLoc, SourceLoc, UserExternalNameRef},
12
};
13
use std::{fmt::Debug, ops::Range};
14
use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType};
15
16
pub(crate) use cranelift_codegen::ir::TrapCode;
17
18
#[derive(Eq, PartialEq)]
19
pub(crate) enum DivKind {
20
/// Signed division.
21
Signed,
22
/// Unsigned division.
23
Unsigned,
24
}
25
26
/// Represents the `memory.atomic.wait*` kind.
27
#[derive(Debug, Clone, Copy)]
28
pub(crate) enum AtomicWaitKind {
29
Wait32,
30
Wait64,
31
}
32
33
/// Remainder kind.
34
#[derive(Copy, Clone)]
35
pub(crate) enum RemKind {
36
/// Signed remainder.
37
Signed,
38
/// Unsigned remainder.
39
Unsigned,
40
}
41
42
impl RemKind {
43
pub fn is_signed(&self) -> bool {
44
matches!(self, Self::Signed)
45
}
46
}
47
48
/// Kinds of vector min operation supported by WebAssembly.
49
pub(crate) enum V128MinKind {
50
/// 4 lanes of 32-bit floats.
51
F32x4,
52
/// 2 lanes of 64-bit floats.
53
F64x2,
54
/// 16 lanes of signed 8-bit integers.
55
I8x16S,
56
/// 16 lanes of unsigned 8-bit integers.
57
I8x16U,
58
/// 8 lanes of signed 16-bit integers.
59
I16x8S,
60
/// 8 lanes of unsigned 16-bit integers.
61
I16x8U,
62
/// 4 lanes of signed 32-bit integers.
63
I32x4S,
64
/// 4 lanes of unsigned 32-bit integers.
65
I32x4U,
66
}
67
68
impl V128MinKind {
69
/// The size of each lane.
70
pub(crate) fn lane_size(&self) -> OperandSize {
71
match self {
72
Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
73
Self::F64x2 => OperandSize::S64,
74
Self::I8x16S | Self::I8x16U => OperandSize::S8,
75
Self::I16x8S | Self::I16x8U => OperandSize::S16,
76
}
77
}
78
}
79
80
/// Kinds of vector max operation supported by WebAssembly.
81
pub(crate) enum V128MaxKind {
82
/// 4 lanes of 32-bit floats.
83
F32x4,
84
/// 2 lanes of 64-bit floats.
85
F64x2,
86
/// 16 lanes of signed 8-bit integers.
87
I8x16S,
88
/// 16 lanes of unsigned 8-bit integers.
89
I8x16U,
90
/// 8 lanes of signed 16-bit integers.
91
I16x8S,
92
/// 8 lanes of unsigned 16-bit integers.
93
I16x8U,
94
/// 4 lanes of signed 32-bit integers.
95
I32x4S,
96
/// 4 lanes of unsigned 32-bit integers.
97
I32x4U,
98
}
99
100
impl V128MaxKind {
101
/// The size of each lane.
102
pub(crate) fn lane_size(&self) -> OperandSize {
103
match self {
104
Self::F32x4 | Self::I32x4S | Self::I32x4U => OperandSize::S32,
105
Self::F64x2 => OperandSize::S64,
106
Self::I8x16S | Self::I8x16U => OperandSize::S8,
107
Self::I16x8S | Self::I16x8U => OperandSize::S16,
108
}
109
}
110
}
111
112
#[derive(Eq, PartialEq)]
113
pub(crate) enum MulWideKind {
114
Signed,
115
Unsigned,
116
}
117
118
/// Type of operation for a read-modify-write instruction.
119
pub(crate) enum RmwOp {
120
Add,
121
Sub,
122
Xchg,
123
And,
124
Or,
125
Xor,
126
}
127
128
/// The direction to perform the memory move.
129
#[derive(Debug, Clone, Eq, PartialEq)]
130
pub(crate) enum MemMoveDirection {
131
/// From high memory addresses to low memory addresses.
132
/// Invariant: the source location is closer to the FP than the destination
133
/// location, which will be closer to the SP.
134
HighToLow,
135
/// From low memory addresses to high memory addresses.
136
/// Invariant: the source location is closer to the SP than the destination
137
/// location, which will be closer to the FP.
138
LowToHigh,
139
}
140
141
/// Classifies how to treat float-to-int conversions.
142
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
143
pub(crate) enum TruncKind {
144
/// Saturating conversion. If the source value is greater than the maximum
145
/// value of the destination type, the result is clamped to the
146
/// destination maximum value.
147
Checked,
148
/// An exception is raised if the source value is greater than the maximum
149
/// value of the destination type.
150
Unchecked,
151
}
152
153
impl TruncKind {
154
/// Returns true if the truncation kind is checked.
155
pub(crate) fn is_checked(&self) -> bool {
156
*self == TruncKind::Checked
157
}
158
159
/// Returns `true` if the trunc kind is [`Unchecked`].
160
///
161
/// [`Unchecked`]: TruncKind::Unchecked
162
#[must_use]
163
pub(crate) fn is_unchecked(&self) -> bool {
164
matches!(self, Self::Unchecked)
165
}
166
}
167
168
/// Representation of the stack pointer offset.
169
#[derive(Copy, Clone, Eq, PartialEq, Debug, PartialOrd, Ord, Default)]
170
pub struct SPOffset(u32);
171
172
impl SPOffset {
173
pub fn from_u32(offs: u32) -> Self {
174
Self(offs)
175
}
176
177
pub fn as_u32(&self) -> u32 {
178
self.0
179
}
180
}
181
182
/// A stack slot.
183
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
184
pub struct StackSlot {
185
/// The location of the slot, relative to the stack pointer.
186
pub offset: SPOffset,
187
/// The size of the slot, in bytes.
188
pub size: u32,
189
}
190
191
impl StackSlot {
192
pub fn new(offs: SPOffset, size: u32) -> Self {
193
Self { offset: offs, size }
194
}
195
}
196
197
pub trait ScratchType {
198
/// Derive the register class from the scratch register type.
199
fn reg_class() -> RegClass;
200
}
201
202
/// A scratch register type of integer class.
203
pub struct IntScratch;
204
/// A scratch register type of floating point class.
205
pub struct FloatScratch;
206
207
impl ScratchType for IntScratch {
208
fn reg_class() -> RegClass {
209
RegClass::Int
210
}
211
}
212
213
impl ScratchType for FloatScratch {
214
fn reg_class() -> RegClass {
215
RegClass::Float
216
}
217
}
218
219
/// A scratch register scope.
220
pub struct Scratch(Reg);
221
222
impl Scratch {
223
pub fn new(r: Reg) -> Self {
224
Self(r)
225
}
226
227
#[inline]
228
pub fn inner(&self) -> Reg {
229
self.0
230
}
231
232
#[inline]
233
pub fn writable(&self) -> WritableReg {
234
writable!(self.0)
235
}
236
}
237
238
/// Kinds of integer binary comparison in WebAssembly. The [`MacroAssembler`]
239
/// implementation for each ISA is responsible for emitting the correct
240
/// sequence of instructions when lowering to machine code.
241
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
242
pub(crate) enum IntCmpKind {
243
/// Equal.
244
Eq,
245
/// Not equal.
246
Ne,
247
/// Signed less than.
248
LtS,
249
/// Unsigned less than.
250
LtU,
251
/// Signed greater than.
252
GtS,
253
/// Unsigned greater than.
254
GtU,
255
/// Signed less than or equal.
256
LeS,
257
/// Unsigned less than or equal.
258
LeU,
259
/// Signed greater than or equal.
260
GeS,
261
/// Unsigned greater than or equal.
262
GeU,
263
}
264
265
/// Kinds of float binary comparison in WebAssembly. The [`MacroAssembler`]
266
/// implementation for each ISA is responsible for emitting the correct
267
/// sequence of instructions when lowering code.
268
#[derive(Debug)]
269
pub(crate) enum FloatCmpKind {
270
/// Equal.
271
Eq,
272
/// Not equal.
273
Ne,
274
/// Less than.
275
Lt,
276
/// Greater than.
277
Gt,
278
/// Less than or equal.
279
Le,
280
/// Greater than or equal.
281
Ge,
282
}
283
284
/// Kinds of shifts in WebAssembly.The [`masm`] implementation for each ISA is
285
/// responsible for emitting the correct sequence of instructions when
286
/// lowering to machine code.
287
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
288
pub(crate) enum ShiftKind {
289
/// Left shift.
290
Shl,
291
/// Signed right shift.
292
ShrS,
293
/// Unsigned right shift.
294
ShrU,
295
/// Left rotate.
296
Rotl,
297
/// Right rotate.
298
Rotr,
299
}
300
301
/// Kinds of extends in WebAssembly. Each MacroAssembler implementation
302
/// is responsible for emitting the correct sequence of instructions when
303
/// lowering to machine code.
304
#[derive(Copy, Clone)]
305
pub(crate) enum ExtendKind {
306
Signed(Extend<Signed>),
307
Unsigned(Extend<Zero>),
308
}
309
310
#[derive(Copy, Clone)]
311
pub(crate) enum Signed {}
312
#[derive(Copy, Clone)]
313
pub(crate) enum Zero {}
314
315
pub(crate) trait ExtendType {}
316
317
impl ExtendType for Signed {}
318
impl ExtendType for Zero {}
319
320
#[derive(Copy, Clone)]
321
pub(crate) enum Extend<T: ExtendType> {
322
/// 8 to 32 bit extend.
323
I32Extend8,
324
/// 16 to 32 bit extend.
325
I32Extend16,
326
/// 8 to 64 bit extend.
327
I64Extend8,
328
/// 16 to 64 bit extend.
329
I64Extend16,
330
/// 32 to 64 bit extend.
331
I64Extend32,
332
333
/// Variant to hold the kind of extend marker.
334
///
335
/// This is `Signed` or `Zero`, that are empty enums, which means that this variant cannot be
336
/// constructed.
337
__Kind(T),
338
}
339
340
impl From<Extend<Zero>> for ExtendKind {
341
fn from(value: Extend<Zero>) -> Self {
342
ExtendKind::Unsigned(value)
343
}
344
}
345
346
impl<T: ExtendType> Extend<T> {
347
pub fn from_size(&self) -> OperandSize {
348
match self {
349
Extend::I32Extend8 | Extend::I64Extend8 => OperandSize::S8,
350
Extend::I32Extend16 | Extend::I64Extend16 => OperandSize::S16,
351
Extend::I64Extend32 => OperandSize::S32,
352
Extend::__Kind(_) => unreachable!(),
353
}
354
}
355
356
pub fn to_size(&self) -> OperandSize {
357
match self {
358
Extend::I32Extend8 | Extend::I32Extend16 => OperandSize::S32,
359
Extend::I64Extend8 | Extend::I64Extend16 | Extend::I64Extend32 => OperandSize::S64,
360
Extend::__Kind(_) => unreachable!(),
361
}
362
}
363
364
pub fn from_bits(&self) -> u8 {
365
self.from_size().num_bits()
366
}
367
368
pub fn to_bits(&self) -> u8 {
369
self.to_size().num_bits()
370
}
371
}
372
373
impl From<Extend<Signed>> for ExtendKind {
374
fn from(value: Extend<Signed>) -> Self {
375
ExtendKind::Signed(value)
376
}
377
}
378
379
impl ExtendKind {
380
pub fn signed(&self) -> bool {
381
match self {
382
Self::Signed(_) => true,
383
_ => false,
384
}
385
}
386
387
pub fn from_bits(&self) -> u8 {
388
match self {
389
Self::Signed(s) => s.from_bits(),
390
Self::Unsigned(u) => u.from_bits(),
391
}
392
}
393
394
pub fn to_bits(&self) -> u8 {
395
match self {
396
Self::Signed(s) => s.to_bits(),
397
Self::Unsigned(u) => u.to_bits(),
398
}
399
}
400
}
401
402
/// Kinds of vector load and extends in WebAssembly. Each MacroAssembler
403
/// implementation is responsible for emitting the correct sequence of
404
/// instructions when lowering to machine code.
405
#[derive(Copy, Clone)]
406
pub(crate) enum V128LoadExtendKind {
407
/// Sign extends eight 8 bit integers to eight 16 bit lanes.
408
E8x8S,
409
/// Zero extends eight 8 bit integers to eight 16 bit lanes.
410
E8x8U,
411
/// Sign extends four 16 bit integers to four 32 bit lanes.
412
E16x4S,
413
/// Zero extends four 16 bit integers to four 32 bit lanes.
414
E16x4U,
415
/// Sign extends two 32 bit integers to two 64 bit lanes.
416
E32x2S,
417
/// Zero extends two 32 bit integers to two 64 bit lanes.
418
E32x2U,
419
}
420
421
/// Kinds of splat loads supported by WebAssembly.
422
pub(crate) enum SplatLoadKind {
423
/// 8 bits.
424
S8,
425
/// 16 bits.
426
S16,
427
/// 32 bits.
428
S32,
429
/// 64 bits.
430
S64,
431
}
432
433
/// Kinds of splat supported by WebAssembly.
434
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
435
pub(crate) enum SplatKind {
436
/// 8 bit integer.
437
I8x16,
438
/// 16 bit integer.
439
I16x8,
440
/// 32 bit integer.
441
I32x4,
442
/// 64 bit integer.
443
I64x2,
444
/// 32 bit float.
445
F32x4,
446
/// 64 bit float.
447
F64x2,
448
}
449
450
impl SplatKind {
451
/// The lane size to use for different kinds of splats.
452
pub(crate) fn lane_size(&self) -> OperandSize {
453
match self {
454
SplatKind::I8x16 => OperandSize::S8,
455
SplatKind::I16x8 => OperandSize::S16,
456
SplatKind::I32x4 | SplatKind::F32x4 => OperandSize::S32,
457
SplatKind::I64x2 | SplatKind::F64x2 => OperandSize::S64,
458
}
459
}
460
}
461
462
/// Kinds of extract lane supported by WebAssembly.
463
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
464
pub(crate) enum ExtractLaneKind {
465
/// 16 lanes of 8-bit integers sign extended to 32-bits.
466
I8x16S,
467
/// 16 lanes of 8-bit integers zero extended to 32-bits.
468
I8x16U,
469
/// 8 lanes of 16-bit integers sign extended to 32-bits.
470
I16x8S,
471
/// 8 lanes of 16-bit integers zero extended to 32-bits.
472
I16x8U,
473
/// 4 lanes of 32-bit integers.
474
I32x4,
475
/// 2 lanes of 64-bit integers.
476
I64x2,
477
/// 4 lanes of 32-bit floats.
478
F32x4,
479
/// 2 lanes of 64-bit floats.
480
F64x2,
481
}
482
483
impl ExtractLaneKind {
484
/// The lane size to use for different kinds of extract lane kinds.
485
pub(crate) fn lane_size(&self) -> OperandSize {
486
match self {
487
ExtractLaneKind::I8x16S | ExtractLaneKind::I8x16U => OperandSize::S8,
488
ExtractLaneKind::I16x8S | ExtractLaneKind::I16x8U => OperandSize::S16,
489
ExtractLaneKind::I32x4 | ExtractLaneKind::F32x4 => OperandSize::S32,
490
ExtractLaneKind::I64x2 | ExtractLaneKind::F64x2 => OperandSize::S64,
491
}
492
}
493
}
494
495
impl From<ExtractLaneKind> for Extend<Signed> {
496
fn from(value: ExtractLaneKind) -> Self {
497
match value {
498
ExtractLaneKind::I8x16S => Extend::I32Extend8,
499
ExtractLaneKind::I16x8S => Extend::I32Extend16,
500
_ => unimplemented!(),
501
}
502
}
503
}
504
505
/// Kinds of replace lane supported by WebAssembly.
506
pub(crate) enum ReplaceLaneKind {
507
/// 16 lanes of 8 bit integers.
508
I8x16,
509
/// 8 lanes of 16 bit integers.
510
I16x8,
511
/// 4 lanes of 32 bit integers.
512
I32x4,
513
/// 2 lanes of 64 bit integers.
514
I64x2,
515
/// 4 lanes of 32 bit floats.
516
F32x4,
517
/// 2 lanes of 64 bit floats.
518
F64x2,
519
}
520
521
impl ReplaceLaneKind {
522
/// The lane size to use for different kinds of replace lane kinds.
523
pub(crate) fn lane_size(&self) -> OperandSize {
524
match self {
525
ReplaceLaneKind::I8x16 => OperandSize::S8,
526
ReplaceLaneKind::I16x8 => OperandSize::S16,
527
ReplaceLaneKind::I32x4 => OperandSize::S32,
528
ReplaceLaneKind::I64x2 => OperandSize::S64,
529
ReplaceLaneKind::F32x4 => OperandSize::S32,
530
ReplaceLaneKind::F64x2 => OperandSize::S64,
531
}
532
}
533
}
534
535
/// Kinds of behavior supported by Wasm loads.
536
pub(crate) enum LoadKind {
537
/// Load the entire bytes of the operand size without any modifications.
538
Operand(OperandSize),
539
/// Atomic load, with optional scalar extend.
540
Atomic(OperandSize, Option<ExtendKind>),
541
/// Duplicate value into vector lanes.
542
Splat(SplatLoadKind),
543
/// Scalar (non-vector) extend.
544
ScalarExtend(ExtendKind),
545
/// Vector extend.
546
VectorExtend(V128LoadExtendKind),
547
/// Load content into select lane.
548
VectorLane(LaneSelector),
549
/// Load a single element into the lowest bits of a vector and initialize
550
/// all other bits to zero.
551
VectorZero(OperandSize),
552
}
553
554
impl LoadKind {
555
/// Returns the [`OperandSize`] used in the load operation.
556
pub(crate) fn derive_operand_size(&self) -> OperandSize {
557
match self {
558
Self::ScalarExtend(extend) | Self::Atomic(_, Some(extend)) => {
559
Self::operand_size_for_scalar(extend)
560
}
561
Self::VectorExtend(_) => OperandSize::S64,
562
Self::Splat(kind) => Self::operand_size_for_splat(kind),
563
Self::Operand(size)
564
| Self::Atomic(size, None)
565
| Self::VectorLane(LaneSelector { size, .. })
566
| Self::VectorZero(size) => *size,
567
}
568
}
569
570
pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
571
Self::VectorLane(LaneSelector { lane, size })
572
}
573
574
fn operand_size_for_scalar(extend_kind: &ExtendKind) -> OperandSize {
575
match extend_kind {
576
ExtendKind::Signed(s) => s.from_size(),
577
ExtendKind::Unsigned(u) => u.from_size(),
578
}
579
}
580
581
fn operand_size_for_splat(kind: &SplatLoadKind) -> OperandSize {
582
match kind {
583
SplatLoadKind::S8 => OperandSize::S8,
584
SplatLoadKind::S16 => OperandSize::S16,
585
SplatLoadKind::S32 => OperandSize::S32,
586
SplatLoadKind::S64 => OperandSize::S64,
587
}
588
}
589
590
pub(crate) fn is_atomic(&self) -> bool {
591
matches!(self, Self::Atomic(_, _))
592
}
593
}
594
595
/// Kinds of behavior supported by Wasm loads.
596
#[derive(Copy, Clone)]
597
pub enum StoreKind {
598
/// Store the entire bytes of the operand size without any modifications.
599
Operand(OperandSize),
600
/// Store the entire bytes of the operand size without any modifications, atomically.
601
Atomic(OperandSize),
602
/// Store the content of selected lane.
603
VectorLane(LaneSelector),
604
}
605
606
impl StoreKind {
607
pub fn vector_lane(lane: u8, size: OperandSize) -> Self {
608
Self::VectorLane(LaneSelector { lane, size })
609
}
610
}
611
612
#[derive(Copy, Clone)]
613
pub struct LaneSelector {
614
pub lane: u8,
615
pub size: OperandSize,
616
}
617
618
/// Types of vector integer to float conversions supported by WebAssembly.
619
pub(crate) enum V128ConvertKind {
620
/// 4 lanes of signed 32-bit integers to 4 lanes of 32-bit floats.
621
I32x4S,
622
/// 4 lanes of unsigned 32-bit integers to 4 lanes of 32-bit floats.
623
I32x4U,
624
/// 4 lanes of signed 32-bit integers to low bits of 2 lanes of 64-bit
625
/// floats.
626
I32x4LowS,
627
/// 4 lanes of unsigned 32-bit integers to low bits of 2 lanes of 64-bit
628
/// floats.
629
I32x4LowU,
630
}
631
632
impl V128ConvertKind {
633
pub(crate) fn src_lane_size(&self) -> OperandSize {
634
match self {
635
V128ConvertKind::I32x4S
636
| V128ConvertKind::I32x4U
637
| V128ConvertKind::I32x4LowS
638
| V128ConvertKind::I32x4LowU => OperandSize::S32,
639
}
640
}
641
642
pub(crate) fn dst_lane_size(&self) -> OperandSize {
643
match self {
644
V128ConvertKind::I32x4S | V128ConvertKind::I32x4U => OperandSize::S32,
645
V128ConvertKind::I32x4LowS | V128ConvertKind::I32x4LowU => OperandSize::S64,
646
}
647
}
648
}
649
650
/// Kinds of vector narrowing operations supported by WebAssembly.
651
pub(crate) enum V128NarrowKind {
652
/// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
653
/// signed saturation.
654
I16x8S,
655
/// Narrow 8 lanes of 16-bit integers to 16 lanes of 8-bit integers using
656
/// unsigned saturation.
657
I16x8U,
658
/// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
659
/// signed saturation.
660
I32x4S,
661
/// Narrow 4 lanes of 32-bit integers to 8 lanes of 16-bit integers using
662
/// unsigned saturation.
663
I32x4U,
664
}
665
666
impl V128NarrowKind {
667
/// Return the size of the destination lanes.
668
pub(crate) fn dst_lane_size(&self) -> OperandSize {
669
match self {
670
Self::I16x8S | Self::I16x8U => OperandSize::S8,
671
Self::I32x4S | Self::I32x4U => OperandSize::S16,
672
}
673
}
674
}
675
676
/// Kinds of vector extending operations supported by WebAssembly.
677
#[derive(Debug, Copy, Clone)]
678
pub(crate) enum V128ExtendKind {
679
/// Low half of i8x16 sign extended.
680
LowI8x16S,
681
/// High half of i8x16 sign extended.
682
HighI8x16S,
683
/// Low half of i8x16 zero extended.
684
LowI8x16U,
685
/// High half of i8x16 zero extended.
686
HighI8x16U,
687
/// Low half of i16x8 sign extended.
688
LowI16x8S,
689
/// High half of i16x8 sign extended.
690
HighI16x8S,
691
/// Low half of i16x8 zero extended.
692
LowI16x8U,
693
/// High half of i16x8 zero extended.
694
HighI16x8U,
695
/// Low half of i32x4 sign extended.
696
LowI32x4S,
697
/// High half of i32x4 sign extended.
698
HighI32x4S,
699
/// Low half of i32x4 zero extended.
700
LowI32x4U,
701
/// High half of i32x4 zero extended.
702
HighI32x4U,
703
}
704
705
impl V128ExtendKind {
706
/// The size of the source's lanes.
707
pub(crate) fn src_lane_size(&self) -> OperandSize {
708
match self {
709
Self::LowI8x16S | Self::LowI8x16U | Self::HighI8x16S | Self::HighI8x16U => {
710
OperandSize::S8
711
}
712
Self::LowI16x8S | Self::LowI16x8U | Self::HighI16x8S | Self::HighI16x8U => {
713
OperandSize::S16
714
}
715
Self::LowI32x4S | Self::LowI32x4U | Self::HighI32x4S | Self::HighI32x4U => {
716
OperandSize::S32
717
}
718
}
719
}
720
}
721
722
/// Kinds of vector equalities and non-equalities supported by WebAssembly.
723
pub(crate) enum VectorEqualityKind {
724
/// 16 lanes of 8 bit integers.
725
I8x16,
726
/// 8 lanes of 16 bit integers.
727
I16x8,
728
/// 4 lanes of 32 bit integers.
729
I32x4,
730
/// 2 lanes of 64 bit integers.
731
I64x2,
732
/// 4 lanes of 32 bit floats.
733
F32x4,
734
/// 2 lanes of 64 bit floats.
735
F64x2,
736
}
737
738
impl VectorEqualityKind {
739
/// Get the lane size to use.
740
pub(crate) fn lane_size(&self) -> OperandSize {
741
match self {
742
Self::I8x16 => OperandSize::S8,
743
Self::I16x8 => OperandSize::S16,
744
Self::I32x4 | Self::F32x4 => OperandSize::S32,
745
Self::I64x2 | Self::F64x2 => OperandSize::S64,
746
}
747
}
748
}
749
750
/// Kinds of vector comparisons supported by WebAssembly.
751
pub(crate) enum VectorCompareKind {
752
/// 16 lanes of signed 8 bit integers.
753
I8x16S,
754
/// 16 lanes of unsigned 8 bit integers.
755
I8x16U,
756
/// 8 lanes of signed 16 bit integers.
757
I16x8S,
758
/// 8 lanes of unsigned 16 bit integers.
759
I16x8U,
760
/// 4 lanes of signed 32 bit integers.
761
I32x4S,
762
/// 4 lanes of unsigned 32 bit integers.
763
I32x4U,
764
/// 2 lanes of signed 64 bit integers.
765
I64x2S,
766
/// 4 lanes of 32 bit floats.
767
F32x4,
768
/// 2 lanes of 64 bit floats.
769
F64x2,
770
}
771
772
impl VectorCompareKind {
773
/// Get the lane size to use.
774
pub(crate) fn lane_size(&self) -> OperandSize {
775
match self {
776
Self::I8x16S | Self::I8x16U => OperandSize::S8,
777
Self::I16x8S | Self::I16x8U => OperandSize::S16,
778
Self::I32x4S | Self::I32x4U | Self::F32x4 => OperandSize::S32,
779
Self::I64x2S | Self::F64x2 => OperandSize::S64,
780
}
781
}
782
}
783
784
/// Kinds of vector absolute operations supported by WebAssembly.
785
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
786
pub(crate) enum V128AbsKind {
787
/// 8 bit integers.
788
I8x16,
789
/// 16 bit integers.
790
I16x8,
791
/// 32 bit integers.
792
I32x4,
793
/// 64 bit integers.
794
I64x2,
795
/// 32 bit floats.
796
F32x4,
797
/// 64 bit floats.
798
F64x2,
799
}
800
801
impl V128AbsKind {
802
/// The lane size to use.
803
pub(crate) fn lane_size(&self) -> OperandSize {
804
match self {
805
Self::I8x16 => OperandSize::S8,
806
Self::I16x8 => OperandSize::S16,
807
Self::I32x4 | Self::F32x4 => OperandSize::S32,
808
Self::I64x2 | Self::F64x2 => OperandSize::S64,
809
}
810
}
811
}
812
813
/// Kinds of truncation for vectors supported by WebAssembly.
814
pub(crate) enum V128TruncKind {
815
/// Truncates 4 lanes of 32-bit floats to nearest integral value.
816
F32x4,
817
/// Truncates 2 lanes of 64-bit floats to nearest integral value.
818
F64x2,
819
/// Integers from signed F32x4.
820
I32x4FromF32x4S,
821
/// Integers from unsigned F32x4.
822
I32x4FromF32x4U,
823
/// Integers from signed F64x2.
824
I32x4FromF64x2SZero,
825
/// Integers from unsigned F64x2.
826
I32x4FromF64x2UZero,
827
}
828
829
impl V128TruncKind {
830
/// The size of the source lanes.
831
pub(crate) fn src_lane_size(&self) -> OperandSize {
832
match self {
833
V128TruncKind::F32x4
834
| V128TruncKind::I32x4FromF32x4S
835
| V128TruncKind::I32x4FromF32x4U => OperandSize::S32,
836
V128TruncKind::F64x2
837
| V128TruncKind::I32x4FromF64x2SZero
838
| V128TruncKind::I32x4FromF64x2UZero => OperandSize::S64,
839
}
840
}
841
842
/// The size of the destination lanes.
843
pub(crate) fn dst_lane_size(&self) -> OperandSize {
844
if let V128TruncKind::F64x2 = self {
845
OperandSize::S64
846
} else {
847
OperandSize::S32
848
}
849
}
850
}
851
852
/// Kinds of vector addition supported by WebAssembly.
853
pub(crate) enum V128AddKind {
854
/// 4 lanes of 32-bit floats wrapping.
855
F32x4,
856
/// 2 lanes of 64-bit floats wrapping.
857
F64x2,
858
/// 16 lanes of 8-bit integers wrapping.
859
I8x16,
860
/// 16 lanes of 8-bit integers signed saturating.
861
I8x16SatS,
862
/// 16 lanes of 8-bit integers unsigned saturating.
863
I8x16SatU,
864
/// 8 lanes of 16-bit integers wrapping.
865
I16x8,
866
/// 8 lanes of 16-bit integers signed saturating.
867
I16x8SatS,
868
/// 8 lanes of 16-bit integers unsigned saturating.
869
I16x8SatU,
870
/// 4 lanes of 32-bit integers wrapping.
871
I32x4,
872
/// 2 lanes of 64-bit integers wrapping.
873
I64x2,
874
}
875
876
/// Kinds of vector subtraction supported by WebAssembly.
877
pub(crate) enum V128SubKind {
878
/// 4 lanes of 32-bit floats wrapping.
879
F32x4,
880
/// 2 lanes of 64-bit floats wrapping.
881
F64x2,
882
/// 16 lanes of 8-bit integers wrapping.
883
I8x16,
884
/// 16 lanes of 8-bit integers signed saturating.
885
I8x16SatS,
886
/// 16 lanes of 8-bit integers unsigned saturating.
887
I8x16SatU,
888
/// 8 lanes of 16-bit integers wrapping.
889
I16x8,
890
/// 8 lanes of 16-bit integers signed saturating.
891
I16x8SatS,
892
/// 8 lanes of 16-bit integers unsigned saturating.
893
I16x8SatU,
894
/// 4 lanes of 32-bit integers wrapping.
895
I32x4,
896
/// 2 lanes of 64-bit integers wrapping.
897
I64x2,
898
}
899
900
impl From<V128NegKind> for V128SubKind {
901
fn from(value: V128NegKind) -> Self {
902
match value {
903
V128NegKind::I8x16 => Self::I8x16,
904
V128NegKind::I16x8 => Self::I16x8,
905
V128NegKind::I32x4 => Self::I32x4,
906
V128NegKind::I64x2 => Self::I64x2,
907
V128NegKind::F32x4 | V128NegKind::F64x2 => unimplemented!(),
908
}
909
}
910
}
911
912
/// Kinds of vector multiplication supported by WebAssembly.
913
pub(crate) enum V128MulKind {
914
/// 4 lanes of 32-bit floats.
915
F32x4,
916
/// 2 lanes of 64-bit floats.
917
F64x2,
918
/// 8 lanes of 16-bit integers.
919
I16x8,
920
/// 4 lanes of 32-bit integers.
921
I32x4,
922
/// 2 lanes of 64-bit integers.
923
I64x2,
924
}
925
926
/// Kinds of vector negation supported by WebAssembly.
927
#[derive(Copy, Clone)]
928
pub(crate) enum V128NegKind {
929
/// 4 lanes of 32-bit floats.
930
F32x4,
931
/// 2 lanes of 64-bit floats.
932
F64x2,
933
/// 16 lanes of 8-bit integers.
934
I8x16,
935
/// 8 lanes of 16-bit integers.
936
I16x8,
937
/// 4 lanes of 32-bit integers.
938
I32x4,
939
/// 2 lanes of 64-bit integers.
940
I64x2,
941
}
942
943
impl V128NegKind {
944
/// The size of the lanes.
945
pub(crate) fn lane_size(&self) -> OperandSize {
946
match self {
947
Self::F32x4 | Self::I32x4 => OperandSize::S32,
948
Self::F64x2 | Self::I64x2 => OperandSize::S64,
949
Self::I8x16 => OperandSize::S8,
950
Self::I16x8 => OperandSize::S16,
951
}
952
}
953
}
954
955
/// Kinds of extended pairwise addition supported by WebAssembly.
956
pub(crate) enum V128ExtAddKind {
957
/// 16 lanes of signed 8-bit integers.
958
I8x16S,
959
/// 16 lanes of unsigned 8-bit integers.
960
I8x16U,
961
/// 8 lanes of signed 16-bit integers.
962
I16x8S,
963
/// 8 lanes of unsigned 16-bit integers.
964
I16x8U,
965
}
966
967
/// Kinds of vector extended multiplication supported by WebAssembly.
968
#[derive(Debug, Clone, Copy)]
969
pub(crate) enum V128ExtMulKind {
970
LowI8x16S,
971
HighI8x16S,
972
LowI8x16U,
973
HighI8x16U,
974
LowI16x8S,
975
HighI16x8S,
976
LowI16x8U,
977
HighI16x8U,
978
LowI32x4S,
979
HighI32x4S,
980
LowI32x4U,
981
HighI32x4U,
982
}
983
984
impl From<V128ExtMulKind> for V128ExtendKind {
985
fn from(value: V128ExtMulKind) -> Self {
986
match value {
987
V128ExtMulKind::LowI8x16S => Self::LowI8x16S,
988
V128ExtMulKind::HighI8x16S => Self::HighI8x16S,
989
V128ExtMulKind::LowI8x16U => Self::LowI8x16U,
990
V128ExtMulKind::HighI8x16U => Self::HighI8x16U,
991
V128ExtMulKind::LowI16x8S => Self::LowI16x8S,
992
V128ExtMulKind::HighI16x8S => Self::HighI16x8S,
993
V128ExtMulKind::LowI16x8U => Self::LowI16x8U,
994
V128ExtMulKind::HighI16x8U => Self::HighI16x8U,
995
V128ExtMulKind::LowI32x4S => Self::LowI32x4S,
996
V128ExtMulKind::HighI32x4S => Self::HighI32x4S,
997
V128ExtMulKind::LowI32x4U => Self::LowI32x4U,
998
V128ExtMulKind::HighI32x4U => Self::HighI32x4U,
999
}
1000
}
1001
}
1002
1003
impl From<V128ExtMulKind> for V128MulKind {
1004
fn from(value: V128ExtMulKind) -> Self {
1005
match value {
1006
V128ExtMulKind::LowI8x16S
1007
| V128ExtMulKind::HighI8x16S
1008
| V128ExtMulKind::LowI8x16U
1009
| V128ExtMulKind::HighI8x16U => Self::I16x8,
1010
V128ExtMulKind::LowI16x8S
1011
| V128ExtMulKind::HighI16x8S
1012
| V128ExtMulKind::LowI16x8U
1013
| V128ExtMulKind::HighI16x8U => Self::I32x4,
1014
V128ExtMulKind::LowI32x4S
1015
| V128ExtMulKind::HighI32x4S
1016
| V128ExtMulKind::LowI32x4U
1017
| V128ExtMulKind::HighI32x4U => Self::I64x2,
1018
}
1019
}
1020
}
1021
1022
/// Operand size, in bits.
1023
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
1024
pub(crate) enum OperandSize {
1025
/// 8 bits.
1026
S8,
1027
/// 16 bits.
1028
S16,
1029
/// 32 bits.
1030
S32,
1031
/// 64 bits.
1032
S64,
1033
/// 128 bits.
1034
S128,
1035
}
1036
1037
impl OperandSize {
1038
/// The number of bits in the operand.
1039
pub fn num_bits(&self) -> u8 {
1040
match self {
1041
OperandSize::S8 => 8,
1042
OperandSize::S16 => 16,
1043
OperandSize::S32 => 32,
1044
OperandSize::S64 => 64,
1045
OperandSize::S128 => 128,
1046
}
1047
}
1048
1049
/// The number of bytes in the operand.
1050
pub fn bytes(&self) -> u32 {
1051
match self {
1052
Self::S8 => 1,
1053
Self::S16 => 2,
1054
Self::S32 => 4,
1055
Self::S64 => 8,
1056
Self::S128 => 16,
1057
}
1058
}
1059
1060
/// The binary logarithm of the number of bits in the operand.
1061
pub fn log2(&self) -> u8 {
1062
match self {
1063
OperandSize::S8 => 3,
1064
OperandSize::S16 => 4,
1065
OperandSize::S32 => 5,
1066
OperandSize::S64 => 6,
1067
OperandSize::S128 => 7,
1068
}
1069
}
1070
1071
/// Create an [`OperandSize`] from the given number of bytes.
1072
pub fn from_bytes(bytes: u8) -> Self {
1073
use OperandSize::*;
1074
match bytes {
1075
4 => S32,
1076
8 => S64,
1077
16 => S128,
1078
_ => panic!("Invalid bytes {bytes} for OperandSize"),
1079
}
1080
}
1081
1082
pub fn extend_to<T: ExtendType>(&self, to: Self) -> Option<Extend<T>> {
1083
match to {
1084
OperandSize::S32 => match self {
1085
OperandSize::S8 => Some(Extend::I32Extend8),
1086
OperandSize::S16 => Some(Extend::I32Extend16),
1087
_ => None,
1088
},
1089
OperandSize::S64 => match self {
1090
OperandSize::S8 => Some(Extend::I64Extend8),
1091
OperandSize::S16 => Some(Extend::I64Extend16),
1092
OperandSize::S32 => Some(Extend::I64Extend32),
1093
_ => None,
1094
},
1095
_ => None,
1096
}
1097
}
1098
1099
/// The number of bits in the mantissa.
1100
///
1101
/// Only implemented for floats.
1102
pub fn mantissa_bits(&self) -> u8 {
1103
match self {
1104
Self::S32 => 8,
1105
Self::S64 => 11,
1106
_ => unimplemented!(),
1107
}
1108
}
1109
}
1110
1111
/// An abstraction over a register or immediate.
1112
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1113
pub(crate) enum RegImm {
1114
/// A register.
1115
Reg(Reg),
1116
/// A tagged immediate argument.
1117
Imm(Imm),
1118
}
1119
1120
/// An tagged representation of an immediate.
1121
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1122
pub(crate) enum Imm {
1123
/// I32 immediate.
1124
I32(u32),
1125
/// I64 immediate.
1126
I64(u64),
1127
/// F32 immediate.
1128
F32(u32),
1129
/// F64 immediate.
1130
F64(u64),
1131
/// V128 immediate.
1132
V128(i128),
1133
}
1134
1135
impl Imm {
1136
/// Create a new I64 immediate.
1137
pub fn i64(val: i64) -> Self {
1138
Self::I64(val as u64)
1139
}
1140
1141
/// Create a new I32 immediate.
1142
pub fn i32(val: i32) -> Self {
1143
Self::I32(val as u32)
1144
}
1145
1146
/// Create a new F32 immediate.
1147
pub fn f32(bits: u32) -> Self {
1148
Self::F32(bits)
1149
}
1150
1151
/// Create a new F64 immediate.
1152
pub fn f64(bits: u64) -> Self {
1153
Self::F64(bits)
1154
}
1155
1156
/// Create a new V128 immediate.
1157
pub fn v128(bits: i128) -> Self {
1158
Self::V128(bits)
1159
}
1160
1161
/// Convert the immediate to i32, if possible.
1162
pub fn to_i32(&self) -> Option<i32> {
1163
match self {
1164
Self::I32(v) => Some(*v as i32),
1165
Self::I64(v) => i32::try_from(*v as i64).ok(),
1166
_ => None,
1167
}
1168
}
1169
1170
/// Unwraps the underlying integer value as u64.
1171
/// # Panics
1172
/// This function panics if the underlying value can't be represented
1173
/// as u64.
1174
pub fn unwrap_as_u64(&self) -> u64 {
1175
match self {
1176
Self::I32(v) => *v as u64,
1177
Self::I64(v) => *v,
1178
Self::F32(v) => *v as u64,
1179
Self::F64(v) => *v,
1180
_ => unreachable!(),
1181
}
1182
}
1183
1184
/// Get the operand size of the immediate.
1185
pub fn size(&self) -> OperandSize {
1186
match self {
1187
Self::I32(_) | Self::F32(_) => OperandSize::S32,
1188
Self::I64(_) | Self::F64(_) => OperandSize::S64,
1189
Self::V128(_) => OperandSize::S128,
1190
}
1191
}
1192
1193
/// Get a little endian representation of the immediate.
1194
///
1195
/// This method heap allocates and is intended to be used when adding
1196
/// values to the constant pool.
1197
pub fn to_bytes(&self) -> Vec<u8> {
1198
match self {
1199
Imm::I32(n) => n.to_le_bytes().to_vec(),
1200
Imm::I64(n) => n.to_le_bytes().to_vec(),
1201
Imm::F32(n) => n.to_le_bytes().to_vec(),
1202
Imm::F64(n) => n.to_le_bytes().to_vec(),
1203
Imm::V128(n) => n.to_le_bytes().to_vec(),
1204
}
1205
}
1206
}
1207
1208
/// The location of the [VMcontext] used for function calls.
1209
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1210
pub(crate) enum VMContextLoc {
1211
/// Dynamic, stored in the given register.
1212
Reg(Reg),
1213
/// The pinned [VMContext] register.
1214
Pinned,
1215
/// A different VMContext is loaded at the provided offset from the current
1216
/// VMContext.
1217
OffsetFromPinned(u32),
1218
}
1219
1220
/// The maximum number of context arguments currently used across the compiler.
1221
pub(crate) const MAX_CONTEXT_ARGS: usize = 2;
1222
1223
/// Out-of-band special purpose arguments used for function call emission.
1224
///
1225
/// We cannot rely on the value stack for these values given that inserting
1226
/// register or memory values at arbitrary locations of the value stack has the
1227
/// potential to break the stack ordering principle, which states that older
1228
/// values must always precede newer values, effectively simulating the order of
1229
/// values in the machine stack.
1230
/// The [ContextArgs] are meant to be resolved at every callsite; in some cases
1231
/// it might be possible to construct it early on, but given that it might
1232
/// contain allocatable registers, it's preferred to construct it in
1233
/// [FnCall::emit].
1234
#[derive(Clone, Debug)]
1235
pub(crate) enum ContextArgs {
1236
/// A single context argument is required; the current pinned [VMcontext]
1237
/// register must be passed as the first argument of the function call.
1238
VMContext([VMContextLoc; 1]),
1239
/// The callee and caller context arguments are required. In this case, the
1240
/// callee context argument is usually stored into an allocatable register
1241
/// and the caller is always the current pinned [VMContext] pointer.
1242
CalleeAndCallerVMContext([VMContextLoc; MAX_CONTEXT_ARGS]),
1243
}
1244
1245
impl ContextArgs {
1246
/// Construct a [ContextArgs] declaring the usage of the pinned [VMContext]
1247
/// register as both the caller and callee context arguments.
1248
pub fn pinned_callee_and_caller_vmctx() -> Self {
1249
Self::CalleeAndCallerVMContext([VMContextLoc::Pinned, VMContextLoc::Pinned])
1250
}
1251
1252
/// Construct a [ContextArgs] that declares the usage of the pinned
1253
/// [VMContext] register as the only context argument.
1254
pub fn pinned_vmctx() -> Self {
1255
Self::VMContext([VMContextLoc::Pinned])
1256
}
1257
1258
/// Construct a [ContextArgs] that declares the usage of a [VMContext] loaded
1259
/// indirectly from the pinned [VMContext] register as the only context
1260
/// argument.
1261
pub fn offset_from_pinned_vmctx(offset: u32) -> Self {
1262
Self::VMContext([VMContextLoc::OffsetFromPinned(offset)])
1263
}
1264
1265
/// Construct a [ContextArgs] that declares a dynamic callee context and the
1266
/// pinned [VMContext] register as the context arguments.
1267
pub fn with_callee_and_pinned_caller(callee_vmctx: Reg) -> Self {
1268
Self::CalleeAndCallerVMContext([VMContextLoc::Reg(callee_vmctx), VMContextLoc::Pinned])
1269
}
1270
1271
/// Get the length of the [ContextArgs].
1272
pub fn len(&self) -> usize {
1273
self.as_slice().len()
1274
}
1275
1276
/// Get a slice of the context arguments.
1277
pub fn as_slice(&self) -> &[VMContextLoc] {
1278
match self {
1279
Self::VMContext(a) => a.as_slice(),
1280
Self::CalleeAndCallerVMContext(a) => a.as_slice(),
1281
}
1282
}
1283
}
1284
1285
#[derive(Copy, Clone, Debug)]
1286
pub(crate) enum CalleeKind {
1287
/// A function call to a raw address.
1288
Indirect(Reg),
1289
/// A function call to a local function.
1290
Direct(UserExternalNameRef),
1291
}
1292
1293
impl CalleeKind {
1294
/// Creates a callee kind from a register.
1295
pub fn indirect(reg: Reg) -> Self {
1296
Self::Indirect(reg)
1297
}
1298
1299
/// Creates a direct callee kind from a function name.
1300
pub fn direct(name: UserExternalNameRef) -> Self {
1301
Self::Direct(name)
1302
}
1303
}
1304
1305
impl RegImm {
1306
/// Register constructor.
1307
pub fn reg(r: Reg) -> Self {
1308
RegImm::Reg(r)
1309
}
1310
1311
/// I64 immediate constructor.
1312
pub fn i64(val: i64) -> Self {
1313
RegImm::Imm(Imm::i64(val))
1314
}
1315
1316
/// I32 immediate constructor.
1317
pub fn i32(val: i32) -> Self {
1318
RegImm::Imm(Imm::i32(val))
1319
}
1320
1321
/// F32 immediate, stored using its bits representation.
1322
pub fn f32(bits: u32) -> Self {
1323
RegImm::Imm(Imm::f32(bits))
1324
}
1325
1326
/// F64 immediate, stored using its bits representation.
1327
pub fn f64(bits: u64) -> Self {
1328
RegImm::Imm(Imm::f64(bits))
1329
}
1330
1331
/// V128 immediate.
1332
pub fn v128(bits: i128) -> Self {
1333
RegImm::Imm(Imm::v128(bits))
1334
}
1335
}
1336
1337
impl From<Reg> for RegImm {
1338
fn from(r: Reg) -> Self {
1339
Self::Reg(r)
1340
}
1341
}
1342
1343
#[derive(Debug)]
1344
pub enum RoundingMode {
1345
Nearest,
1346
Up,
1347
Down,
1348
Zero,
1349
}
1350
1351
/// Memory flags for trusted loads/stores.
1352
pub const TRUSTED_FLAGS: MemFlags = MemFlags::trusted();
1353
1354
/// Flags used for WebAssembly loads / stores.
1355
/// Untrusted by default so we don't set `no_trap`.
1356
/// We also ensure that the endianness is the right one for WebAssembly.
1357
pub const UNTRUSTED_FLAGS: MemFlags = MemFlags::new().with_endianness(Endianness::Little);
1358
1359
/// Generic MacroAssembler interface used by the code generation.
1360
///
1361
/// The MacroAssembler trait aims to expose an interface, high-level enough,
1362
/// so that each ISA can provide its own lowering to machine code. For example,
1363
/// for WebAssembly operators that don't have a direct mapping to a machine
1364
/// a instruction, the interface defines a signature matching the WebAssembly
1365
/// operator, allowing each implementation to lower such operator entirely.
1366
/// This approach attributes more responsibility to the MacroAssembler, but frees
1367
/// the caller from concerning about assembling the right sequence of
1368
/// instructions at the operator callsite.
1369
///
1370
/// The interface defaults to a three-argument form for binary operations;
1371
/// this allows a natural mapping to instructions for RISC architectures,
1372
/// that use three-argument form.
1373
/// This approach allows for a more general interface that can be restricted
1374
/// where needed, in the case of architectures that use a two-argument form.
1375
1376
pub(crate) trait MacroAssembler {
1377
/// The addressing mode.
1378
type Address: Copy + Debug;
1379
1380
/// The pointer representation of the target ISA,
1381
/// used to access information from [`VMOffsets`].
1382
type Ptr: PtrSize;
1383
1384
/// The ABI details of the target.
1385
type ABI: abi::ABI;
1386
1387
/// Emit the function prologue.
1388
fn prologue(&mut self, vmctx: Reg) -> Result<()> {
1389
self.frame_setup()?;
1390
self.check_stack(vmctx)
1391
}
1392
1393
/// Generate the frame setup sequence.
1394
fn frame_setup(&mut self) -> Result<()>;
1395
1396
/// Generate the frame restore sequence.
1397
fn frame_restore(&mut self) -> Result<()>;
1398
1399
/// Emit a stack check.
1400
fn check_stack(&mut self, vmctx: Reg) -> Result<()>;
1401
1402
/// Emit the function epilogue.
1403
fn epilogue(&mut self) -> Result<()> {
1404
self.frame_restore()
1405
}
1406
1407
/// Reserve stack space.
1408
fn reserve_stack(&mut self, bytes: u32) -> Result<()>;
1409
1410
/// Free stack space.
1411
fn free_stack(&mut self, bytes: u32) -> Result<()>;
1412
1413
/// Reset the stack pointer to the given offset;
1414
///
1415
/// Used to reset the stack pointer to a given offset
1416
/// when dealing with unreachable code.
1417
fn reset_stack_pointer(&mut self, offset: SPOffset) -> Result<()>;
1418
1419
/// Get the address of a local slot.
1420
fn local_address(&mut self, local: &LocalSlot) -> Result<Self::Address>;
1421
1422
/// Constructs an address with an offset that is relative to the
1423
/// current position of the stack pointer (e.g. [sp + (sp_offset -
1424
/// offset)].
1425
fn address_from_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1426
1427
/// Constructs an address with an offset that is absolute to the
1428
/// current position of the stack pointer (e.g. [sp + offset].
1429
fn address_at_sp(&self, offset: SPOffset) -> Result<Self::Address>;
1430
1431
/// Alias for [`Self::address_at_reg`] using the VMContext register as
1432
/// a base. The VMContext register is derived from the ABI type that is
1433
/// associated to the MacroAssembler.
1434
fn address_at_vmctx(&self, offset: u32) -> Result<Self::Address>;
1435
1436
/// Construct an address that is absolute to the current position
1437
/// of the given register.
1438
fn address_at_reg(&self, reg: Reg, offset: u32) -> Result<Self::Address>;
1439
1440
/// Emit a function call to either a local or external function.
1441
fn call(
1442
&mut self,
1443
stack_args_size: u32,
1444
f: impl FnMut(&mut Self) -> Result<(CalleeKind, CallingConvention)>,
1445
) -> Result<u32>;
1446
1447
/// Acquire a scratch register and execute the given callback.
1448
fn with_scratch<T: ScratchType, R>(&mut self, f: impl FnOnce(&mut Self, Scratch) -> R) -> R;
1449
1450
/// Convenience wrapper over [`Self::with_scratch`], derives the register class
1451
/// for a particular Wasm value type.
1452
fn with_scratch_for<R>(
1453
&mut self,
1454
ty: WasmValType,
1455
f: impl FnOnce(&mut Self, Scratch) -> R,
1456
) -> R {
1457
match ty {
1458
WasmValType::I32
1459
| WasmValType::I64
1460
| WasmValType::Ref(WasmRefType {
1461
heap_type: WasmHeapType::Func,
1462
..
1463
}) => self.with_scratch::<IntScratch, _>(f),
1464
WasmValType::F32 | WasmValType::F64 | WasmValType::V128 => {
1465
self.with_scratch::<FloatScratch, _>(f)
1466
}
1467
_ => unimplemented!(),
1468
}
1469
}
1470
1471
/// Get stack pointer offset.
1472
fn sp_offset(&self) -> Result<SPOffset>;
1473
1474
/// Perform a stack store.
1475
fn store(&mut self, src: RegImm, dst: Self::Address, size: OperandSize) -> Result<()>;
1476
1477
/// Alias for `MacroAssembler::store` with the operand size corresponding
1478
/// to the pointer size of the target.
1479
fn store_ptr(&mut self, src: Reg, dst: Self::Address) -> Result<()>;
1480
1481
/// Perform a WebAssembly store.
1482
/// A WebAssembly store introduces several additional invariants compared to
1483
/// [Self::store], more precisely, it can implicitly trap, in certain
1484
/// circumstances, even if explicit bounds checks are elided, in that sense,
1485
/// we consider this type of load as untrusted. It can also differ with
1486
/// regards to the endianness depending on the target ISA. For this reason,
1487
/// [Self::wasm_store], should be explicitly used when emitting WebAssembly
1488
/// stores.
1489
fn wasm_store(&mut self, src: Reg, dst: Self::Address, store_kind: StoreKind) -> Result<()>;
1490
1491
/// Perform a zero-extended stack load.
1492
fn load(&mut self, src: Self::Address, dst: WritableReg, size: OperandSize) -> Result<()>;
1493
1494
/// Perform a WebAssembly load.
1495
/// A WebAssembly load introduces several additional invariants compared to
1496
/// [Self::load], more precisely, it can implicitly trap, in certain
1497
/// circumstances, even if explicit bounds checks are elided, in that sense,
1498
/// we consider this type of load as untrusted. It can also differ with
1499
/// regards to the endianness depending on the target ISA. For this reason,
1500
/// [Self::wasm_load], should be explicitly used when emitting WebAssembly
1501
/// loads.
1502
fn wasm_load(&mut self, src: Self::Address, dst: WritableReg, kind: LoadKind) -> Result<()>;
1503
1504
/// Alias for `MacroAssembler::load` with the operand size corresponding
1505
/// to the pointer size of the target.
1506
fn load_ptr(&mut self, src: Self::Address, dst: WritableReg) -> Result<()>;
1507
1508
/// Computes the effective address and stores the result in the destination
1509
/// register.
1510
fn compute_addr(
1511
&mut self,
1512
_src: Self::Address,
1513
_dst: WritableReg,
1514
_size: OperandSize,
1515
) -> Result<()>;
1516
1517
/// Pop a value from the machine stack into the given register.
1518
fn pop(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1519
1520
/// Perform a move.
1521
fn mov(&mut self, dst: WritableReg, src: RegImm, size: OperandSize) -> Result<()>;
1522
1523
/// Perform a conditional move.
1524
fn cmov(&mut self, dst: WritableReg, src: Reg, cc: IntCmpKind, size: OperandSize)
1525
-> Result<()>;
1526
1527
/// Performs a memory move of bytes from src to dest.
1528
/// Bytes are moved in blocks of 8 bytes, where possible.
1529
fn memmove(
1530
&mut self,
1531
src: SPOffset,
1532
dst: SPOffset,
1533
bytes: u32,
1534
direction: MemMoveDirection,
1535
) -> Result<()> {
1536
match direction {
1537
MemMoveDirection::LowToHigh => debug_assert!(dst.as_u32() < src.as_u32()),
1538
MemMoveDirection::HighToLow => debug_assert!(dst.as_u32() > src.as_u32()),
1539
}
1540
// At least 4 byte aligned.
1541
debug_assert!(bytes % 4 == 0);
1542
let mut remaining = bytes;
1543
let word_bytes = <Self::ABI as abi::ABI>::word_bytes();
1544
1545
let word_bytes = word_bytes as u32;
1546
1547
let mut dst_offs;
1548
let mut src_offs;
1549
match direction {
1550
MemMoveDirection::LowToHigh => {
1551
dst_offs = dst.as_u32() - bytes;
1552
src_offs = src.as_u32() - bytes;
1553
self.with_scratch::<IntScratch, _>(|masm, scratch| {
1554
while remaining >= word_bytes {
1555
remaining -= word_bytes;
1556
dst_offs += word_bytes;
1557
src_offs += word_bytes;
1558
1559
masm.load_ptr(
1560
masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1561
scratch.writable(),
1562
)?;
1563
masm.store_ptr(
1564
scratch.inner(),
1565
masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1566
)?;
1567
}
1568
anyhow::Ok(())
1569
})?;
1570
}
1571
MemMoveDirection::HighToLow => {
1572
// Go from the end to the beginning to handle overlapping addresses.
1573
src_offs = src.as_u32();
1574
dst_offs = dst.as_u32();
1575
self.with_scratch::<IntScratch, _>(|masm, scratch| {
1576
while remaining >= word_bytes {
1577
masm.load_ptr(
1578
masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1579
scratch.writable(),
1580
)?;
1581
masm.store_ptr(
1582
scratch.inner(),
1583
masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1584
)?;
1585
1586
remaining -= word_bytes;
1587
src_offs -= word_bytes;
1588
dst_offs -= word_bytes;
1589
}
1590
anyhow::Ok(())
1591
})?;
1592
}
1593
}
1594
1595
if remaining > 0 {
1596
let half_word = word_bytes / 2;
1597
let ptr_size = OperandSize::from_bytes(half_word as u8);
1598
debug_assert!(remaining == half_word);
1599
// Need to move the offsets ahead in the `LowToHigh` case to
1600
// compensate for the initial subtraction of `bytes`.
1601
if direction == MemMoveDirection::LowToHigh {
1602
dst_offs += half_word;
1603
src_offs += half_word;
1604
}
1605
1606
self.with_scratch::<IntScratch, _>(|masm, scratch| {
1607
masm.load(
1608
masm.address_from_sp(SPOffset::from_u32(src_offs))?,
1609
scratch.writable(),
1610
ptr_size,
1611
)?;
1612
masm.store(
1613
scratch.inner().into(),
1614
masm.address_from_sp(SPOffset::from_u32(dst_offs))?,
1615
ptr_size,
1616
)?;
1617
anyhow::Ok(())
1618
})?;
1619
}
1620
Ok(())
1621
}
1622
1623
/// Perform add operation.
1624
fn add(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1625
1626
/// Perform a checked unsigned integer addition, emitting the provided trap
1627
/// if the addition overflows.
1628
fn checked_uadd(
1629
&mut self,
1630
dst: WritableReg,
1631
lhs: Reg,
1632
rhs: RegImm,
1633
size: OperandSize,
1634
trap: TrapCode,
1635
) -> Result<()>;
1636
1637
/// Perform subtraction operation.
1638
fn sub(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1639
1640
/// Perform multiplication operation.
1641
fn mul(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1642
1643
/// Perform a floating point add operation.
1644
fn float_add(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1645
1646
/// Perform a floating point subtraction operation.
1647
fn float_sub(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1648
1649
/// Perform a floating point multiply operation.
1650
fn float_mul(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1651
1652
/// Perform a floating point divide operation.
1653
fn float_div(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1654
1655
/// Perform a floating point minimum operation. In x86, this will emit
1656
/// multiple instructions.
1657
fn float_min(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1658
1659
/// Perform a floating point maximum operation. In x86, this will emit
1660
/// multiple instructions.
1661
fn float_max(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, size: OperandSize) -> Result<()>;
1662
1663
/// Perform a floating point copysign operation. In x86, this will emit
1664
/// multiple instructions.
1665
fn float_copysign(
1666
&mut self,
1667
dst: WritableReg,
1668
lhs: Reg,
1669
rhs: Reg,
1670
size: OperandSize,
1671
) -> Result<()>;
1672
1673
/// Perform a floating point abs operation.
1674
fn float_abs(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1675
1676
/// Perform a floating point negation operation.
1677
fn float_neg(&mut self, dst: WritableReg, size: OperandSize) -> Result<()>;
1678
1679
/// Perform a floating point floor operation.
1680
fn float_round<
1681
F: FnMut(&mut FuncEnv<Self::Ptr>, &mut CodeGenContext<Emission>, &mut Self) -> Result<()>,
1682
>(
1683
&mut self,
1684
mode: RoundingMode,
1685
env: &mut FuncEnv<Self::Ptr>,
1686
context: &mut CodeGenContext<Emission>,
1687
size: OperandSize,
1688
fallback: F,
1689
) -> Result<()>;
1690
1691
/// Perform a floating point square root operation.
1692
fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1693
1694
/// Perform logical and operation.
1695
fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1696
1697
/// Perform logical or operation.
1698
fn or(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1699
1700
/// Perform logical exclusive or operation.
1701
fn xor(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>;
1702
1703
/// Perform a shift operation between a register and an immediate.
1704
fn shift_ir(
1705
&mut self,
1706
dst: WritableReg,
1707
imm: Imm,
1708
lhs: Reg,
1709
kind: ShiftKind,
1710
size: OperandSize,
1711
) -> Result<()>;
1712
1713
/// Perform a shift operation between two registers.
1714
/// This case is special in that some architectures have specific expectations
1715
/// regarding the location of the instruction arguments. To free the
1716
/// caller from having to deal with the architecture specific constraints
1717
/// we give this function access to the code generation context, allowing
1718
/// each implementation to decide the lowering path.
1719
fn shift(
1720
&mut self,
1721
context: &mut CodeGenContext<Emission>,
1722
kind: ShiftKind,
1723
size: OperandSize,
1724
) -> Result<()>;
1725
1726
/// Perform division operation.
1727
/// Division is special in that some architectures have specific
1728
/// expectations regarding the location of the instruction
1729
/// arguments and regarding the location of the quotient /
1730
/// remainder. To free the caller from having to deal with the
1731
/// architecture specific constraints we give this function access
1732
/// to the code generation context, allowing each implementation
1733
/// to decide the lowering path. For cases in which division is a
1734
/// unconstrained binary operation, the caller can decide to use
1735
/// the `CodeGenContext::i32_binop` or `CodeGenContext::i64_binop`
1736
/// functions.
1737
fn div(
1738
&mut self,
1739
context: &mut CodeGenContext<Emission>,
1740
kind: DivKind,
1741
size: OperandSize,
1742
) -> Result<()>;
1743
1744
/// Calculate remainder.
1745
fn rem(
1746
&mut self,
1747
context: &mut CodeGenContext<Emission>,
1748
kind: RemKind,
1749
size: OperandSize,
1750
) -> Result<()>;
1751
1752
/// Compares `src1` against `src2` for the side effect of setting processor
1753
/// flags.
1754
///
1755
/// Note that `src1` is the left-hand-side of the comparison and `src2` is
1756
/// the right-hand-side, so if testing `a < b` then `src1 == a` and
1757
/// `src2 == b`
1758
fn cmp(&mut self, src1: Reg, src2: RegImm, size: OperandSize) -> Result<()>;
1759
1760
/// Compare src and dst and put the result in dst.
1761
/// This function will potentially emit a series of instructions.
1762
///
1763
/// The initial value in `dst` is the left-hand-side of the comparison and
1764
/// the initial value in `src` is the right-hand-side of the comparison.
1765
/// That means for `a < b` then `dst == a` and `src == b`.
1766
fn cmp_with_set(
1767
&mut self,
1768
dst: WritableReg,
1769
src: RegImm,
1770
kind: IntCmpKind,
1771
size: OperandSize,
1772
) -> Result<()>;
1773
1774
/// Compare floats in src1 and src2 and put the result in dst.
1775
/// In x86, this will emit multiple instructions.
1776
fn float_cmp_with_set(
1777
&mut self,
1778
dst: WritableReg,
1779
src1: Reg,
1780
src2: Reg,
1781
kind: FloatCmpKind,
1782
size: OperandSize,
1783
) -> Result<()>;
1784
1785
/// Count the number of leading zeroes in src and put the result in dst.
1786
/// In x64, this will emit multiple instructions if the `has_lzcnt` flag is
1787
/// false.
1788
fn clz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1789
1790
/// Count the number of trailing zeroes in src and put the result in dst.masm
1791
/// In x64, this will emit multiple instructions if the `has_tzcnt` flag is
1792
/// false.
1793
fn ctz(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>;
1794
1795
/// Push the register to the stack, returning the stack slot metadata.
1796
// NB
1797
// The stack alignment should not be assumed after any call to `push`,
1798
// unless explicitly aligned otherwise. Typically, stack alignment is
1799
// maintained at call sites and during the execution of
1800
// epilogues.
1801
fn push(&mut self, src: Reg, size: OperandSize) -> Result<StackSlot>;
1802
1803
/// Finalize the assembly and return the result.
1804
fn finalize(self, base: Option<SourceLoc>) -> Result<MachBufferFinalized<Final>>;
1805
1806
/// Zero a particular register.
1807
fn zero(&mut self, reg: WritableReg) -> Result<()>;
1808
1809
/// Count the number of 1 bits in src and put the result in dst. In x64,
1810
/// this will emit multiple instructions if the `has_popcnt` flag is false.
1811
fn popcnt(&mut self, context: &mut CodeGenContext<Emission>, size: OperandSize) -> Result<()>;
1812
1813
/// Converts an i64 to an i32 by discarding the high 32 bits.
1814
fn wrap(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1815
1816
/// Extends an integer of a given size to a larger size.
1817
fn extend(&mut self, dst: WritableReg, src: Reg, kind: ExtendKind) -> Result<()>;
1818
1819
/// Emits one or more instructions to perform a signed truncation of a
1820
/// float into an integer.
1821
fn signed_truncate(
1822
&mut self,
1823
dst: WritableReg,
1824
src: Reg,
1825
src_size: OperandSize,
1826
dst_size: OperandSize,
1827
kind: TruncKind,
1828
) -> Result<()>;
1829
1830
/// Emits one or more instructions to perform an unsigned truncation of a
1831
/// float into an integer.
1832
fn unsigned_truncate(
1833
&mut self,
1834
context: &mut CodeGenContext<Emission>,
1835
src_size: OperandSize,
1836
dst_size: OperandSize,
1837
kind: TruncKind,
1838
) -> Result<()>;
1839
1840
/// Emits one or more instructions to perform a signed convert of an
1841
/// integer into a float.
1842
fn signed_convert(
1843
&mut self,
1844
dst: WritableReg,
1845
src: Reg,
1846
src_size: OperandSize,
1847
dst_size: OperandSize,
1848
) -> Result<()>;
1849
1850
/// Emits one or more instructions to perform an unsigned convert of an
1851
/// integer into a float.
1852
fn unsigned_convert(
1853
&mut self,
1854
dst: WritableReg,
1855
src: Reg,
1856
tmp_gpr: Reg,
1857
src_size: OperandSize,
1858
dst_size: OperandSize,
1859
) -> Result<()>;
1860
1861
/// Reinterpret a float as an integer.
1862
fn reinterpret_float_as_int(
1863
&mut self,
1864
dst: WritableReg,
1865
src: Reg,
1866
size: OperandSize,
1867
) -> Result<()>;
1868
1869
/// Reinterpret an integer as a float.
1870
fn reinterpret_int_as_float(
1871
&mut self,
1872
dst: WritableReg,
1873
src: Reg,
1874
size: OperandSize,
1875
) -> Result<()>;
1876
1877
/// Demote an f64 to an f32.
1878
fn demote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1879
1880
/// Promote an f32 to an f64.
1881
fn promote(&mut self, dst: WritableReg, src: Reg) -> Result<()>;
1882
1883
/// Zero a given memory range.
1884
///
1885
/// The default implementation divides the given memory range
1886
/// into word-sized slots. Then it unrolls a series of store
1887
/// instructions, effectively assigning zero to each slot.
1888
fn zero_mem_range(&mut self, mem: &Range<u32>) -> Result<()> {
1889
let word_size = <Self::ABI as abi::ABI>::word_bytes() as u32;
1890
if mem.is_empty() {
1891
return Ok(());
1892
}
1893
1894
let start = if mem.start % word_size == 0 {
1895
mem.start
1896
} else {
1897
// Ensure that the start of the range is at least 4-byte aligned.
1898
assert!(mem.start % 4 == 0);
1899
let start = align_to(mem.start, word_size);
1900
let addr: Self::Address = self.local_address(&LocalSlot::i32(start))?;
1901
self.store(RegImm::i32(0), addr, OperandSize::S32)?;
1902
// Ensure that the new start of the range, is word-size aligned.
1903
assert!(start % word_size == 0);
1904
start
1905
};
1906
1907
let end = align_to(mem.end, word_size);
1908
let slots = (end - start) / word_size;
1909
1910
if slots == 1 {
1911
let slot = LocalSlot::i64(start + word_size);
1912
let addr: Self::Address = self.local_address(&slot)?;
1913
self.store(RegImm::i64(0), addr, OperandSize::S64)?;
1914
} else {
1915
// TODO
1916
// Add an upper bound to this generation;
1917
// given a considerably large amount of slots
1918
// this will be inefficient.
1919
self.with_scratch::<IntScratch, _>(|masm, scratch| {
1920
masm.zero(scratch.writable())?;
1921
let zero = RegImm::reg(scratch.inner());
1922
1923
for step in (start..end).step_by(word_size as usize) {
1924
let slot = LocalSlot::i64(step + word_size);
1925
let addr: Self::Address = masm.local_address(&slot)?;
1926
masm.store(zero, addr, OperandSize::S64)?;
1927
}
1928
anyhow::Ok(())
1929
})?;
1930
}
1931
1932
Ok(())
1933
}
1934
1935
/// Generate a label.
1936
fn get_label(&mut self) -> Result<MachLabel>;
1937
1938
/// Bind the given label at the current code offset.
1939
fn bind(&mut self, label: MachLabel) -> Result<()>;
1940
1941
/// Conditional branch.
1942
///
1943
/// Performs a comparison between the two operands,
1944
/// and immediately after emits a jump to the given
1945
/// label destination if the condition is met.
1946
fn branch(
1947
&mut self,
1948
kind: IntCmpKind,
1949
lhs: Reg,
1950
rhs: RegImm,
1951
taken: MachLabel,
1952
size: OperandSize,
1953
) -> Result<()>;
1954
1955
/// Emits and unconditional jump to the given label.
1956
fn jmp(&mut self, target: MachLabel) -> Result<()>;
1957
1958
/// Emits a jump table sequence. The default label is specified as
1959
/// the last element of the targets slice.
1960
fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) -> Result<()>;
1961
1962
/// Emit an unreachable code trap.
1963
fn unreachable(&mut self) -> Result<()>;
1964
1965
/// Emit an unconditional trap.
1966
fn trap(&mut self, code: TrapCode) -> Result<()>;
1967
1968
/// Traps if the condition code is met.
1969
fn trapif(&mut self, cc: IntCmpKind, code: TrapCode) -> Result<()>;
1970
1971
/// Trap if the source register is zero.
1972
fn trapz(&mut self, src: Reg, code: TrapCode) -> Result<()>;
1973
1974
/// Ensures that the stack pointer is correctly positioned before an unconditional
1975
/// jump according to the requirements of the destination target.
1976
fn ensure_sp_for_jump(&mut self, target: SPOffset) -> Result<()> {
1977
let bytes = self
1978
.sp_offset()?
1979
.as_u32()
1980
.checked_sub(target.as_u32())
1981
.unwrap_or(0);
1982
1983
if bytes > 0 {
1984
self.free_stack(bytes)?;
1985
}
1986
1987
Ok(())
1988
}
1989
1990
/// Mark the start of a source location returning the machine code offset
1991
/// and the relative source code location.
1992
fn start_source_loc(&mut self, loc: RelSourceLoc) -> Result<(CodeOffset, RelSourceLoc)>;
1993
1994
/// Mark the end of a source location.
1995
fn end_source_loc(&mut self) -> Result<()>;
1996
1997
/// The current offset, in bytes from the beginning of the function.
1998
fn current_code_offset(&self) -> Result<CodeOffset>;
1999
2000
/// Performs a 128-bit addition
2001
fn add128(
2002
&mut self,
2003
dst_lo: WritableReg,
2004
dst_hi: WritableReg,
2005
lhs_lo: Reg,
2006
lhs_hi: Reg,
2007
rhs_lo: Reg,
2008
rhs_hi: Reg,
2009
) -> Result<()>;
2010
2011
/// Performs a 128-bit subtraction
2012
fn sub128(
2013
&mut self,
2014
dst_lo: WritableReg,
2015
dst_hi: WritableReg,
2016
lhs_lo: Reg,
2017
lhs_hi: Reg,
2018
rhs_lo: Reg,
2019
rhs_hi: Reg,
2020
) -> Result<()>;
2021
2022
/// Performs a widening multiplication from two 64-bit operands into a
2023
/// 128-bit result.
2024
///
2025
/// Note that some platforms require special handling of registers in this
2026
/// instruction (e.g. x64) so full access to `CodeGenContext` is provided.
2027
fn mul_wide(&mut self, context: &mut CodeGenContext<Emission>, kind: MulWideKind)
2028
-> Result<()>;
2029
2030
/// Takes the value in a src operand and replicates it across lanes of
2031
/// `size` in a destination result.
2032
fn splat(&mut self, context: &mut CodeGenContext<Emission>, size: SplatKind) -> Result<()>;
2033
2034
/// Performs a shuffle between two 128-bit vectors into a 128-bit result
2035
/// using lanes as a mask to select which indexes to copy.
2036
fn shuffle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg, lanes: [u8; 16]) -> Result<()>;
2037
2038
/// Performs a swizzle between two 128-bit vectors into a 128-bit result.
2039
fn swizzle(&mut self, dst: WritableReg, lhs: Reg, rhs: Reg) -> Result<()>;
2040
2041
/// Performs the RMW `op` operation on the passed `addr`.
2042
///
2043
/// The value *before* the operation was performed is written back to the `operand` register.
2044
fn atomic_rmw(
2045
&mut self,
2046
context: &mut CodeGenContext<Emission>,
2047
addr: Self::Address,
2048
size: OperandSize,
2049
op: RmwOp,
2050
flags: MemFlags,
2051
extend: Option<Extend<Zero>>,
2052
) -> Result<()>;
2053
2054
/// Extracts the scalar value from `src` in `lane` to `dst`.
2055
fn extract_lane(
2056
&mut self,
2057
src: Reg,
2058
dst: WritableReg,
2059
lane: u8,
2060
kind: ExtractLaneKind,
2061
) -> Result<()>;
2062
2063
/// Replaces the value in `lane` in `dst` with the value in `src`.
2064
fn replace_lane(
2065
&mut self,
2066
src: RegImm,
2067
dst: WritableReg,
2068
lane: u8,
2069
kind: ReplaceLaneKind,
2070
) -> Result<()>;
2071
2072
/// Perform an atomic CAS (compare-and-swap) operation with the value at `addr`, and `expected`
2073
/// and `replacement` (at the top of the context's stack).
2074
///
2075
/// This method takes the `CodeGenContext` as an arguments to accommodate architectures that
2076
/// expect parameters in specific registers. The context stack contains the `replacement`,
2077
/// and `expected` values in that order. The implementer is expected to push the value at
2078
/// `addr` before the update to the context's stack before returning.
2079
fn atomic_cas(
2080
&mut self,
2081
context: &mut CodeGenContext<Emission>,
2082
addr: Self::Address,
2083
size: OperandSize,
2084
flags: MemFlags,
2085
extend: Option<Extend<Zero>>,
2086
) -> Result<()>;
2087
2088
/// Compares vector registers `lhs` and `rhs` for equality and puts the
2089
/// vector of results in `dst`.
2090
fn v128_eq(
2091
&mut self,
2092
dst: WritableReg,
2093
lhs: Reg,
2094
rhs: Reg,
2095
kind: VectorEqualityKind,
2096
) -> Result<()>;
2097
2098
/// Compares vector registers `lhs` and `rhs` for inequality and puts the
2099
/// vector of results in `dst`.
2100
fn v128_ne(
2101
&mut self,
2102
dst: WritableReg,
2103
lhs: Reg,
2104
rhs: Reg,
2105
kind: VectorEqualityKind,
2106
) -> Result<()>;
2107
2108
/// Performs a less than comparison with vector registers `lhs` and `rhs`
2109
/// and puts the vector of results in `dst`.
2110
fn v128_lt(
2111
&mut self,
2112
dst: WritableReg,
2113
lhs: Reg,
2114
rhs: Reg,
2115
kind: VectorCompareKind,
2116
) -> Result<()>;
2117
2118
/// Performs a less than or equal comparison with vector registers `lhs`
2119
/// and `rhs` and puts the vector of results in `dst`.
2120
fn v128_le(
2121
&mut self,
2122
dst: WritableReg,
2123
lhs: Reg,
2124
rhs: Reg,
2125
kind: VectorCompareKind,
2126
) -> Result<()>;
2127
2128
/// Performs a greater than comparison with vector registers `lhs` and
2129
/// `rhs` and puts the vector of results in `dst`.
2130
fn v128_gt(
2131
&mut self,
2132
dst: WritableReg,
2133
lhs: Reg,
2134
rhs: Reg,
2135
kind: VectorCompareKind,
2136
) -> Result<()>;
2137
2138
/// Performs a greater than or equal comparison with vector registers `lhs`
2139
/// and `rhs` and puts the vector of results in `dst`.
2140
fn v128_ge(
2141
&mut self,
2142
dst: WritableReg,
2143
lhs: Reg,
2144
rhs: Reg,
2145
kind: VectorCompareKind,
2146
) -> Result<()>;
2147
2148
/// Emit a memory fence.
2149
fn fence(&mut self) -> Result<()>;
2150
2151
/// Perform a logical `not` operation on the 128bits vector value in `dst`.
2152
fn v128_not(&mut self, dst: WritableReg) -> Result<()>;
2153
2154
/// Perform a logical `and` operation on `src1` and `src1`, both 128bits vector values, writing
2155
/// the result to `dst`.
2156
fn v128_and(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2157
2158
/// Perform a logical `and_not` operation on `src1` and `src1`, both 128bits vector values, writing
2159
/// the result to `dst`.
2160
///
2161
/// `and_not` is not commutative: dst = !src1 & src2.
2162
fn v128_and_not(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2163
2164
/// Perform a logical `or` operation on `src1` and `src1`, both 128bits vector values, writing
2165
/// the result to `dst`.
2166
fn v128_or(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2167
2168
/// Perform a logical `xor` operation on `src1` and `src1`, both 128bits vector values, writing
2169
/// the result to `dst`.
2170
fn v128_xor(&mut self, src1: Reg, src2: Reg, dst: WritableReg) -> Result<()>;
2171
2172
/// Given two 128bits vectors `src1` and `src2`, and a 128bits bitmask `mask`, selects bits
2173
/// from `src1` when mask is 1, and from `src2` when mask is 0.
2174
///
2175
/// This is equivalent to: `v128.or(v128.and(src1, mask), v128.and(src2, v128.not(mask)))`.
2176
fn v128_bitselect(&mut self, src1: Reg, src2: Reg, mask: Reg, dst: WritableReg) -> Result<()>;
2177
2178
/// If any bit in `src` is 1, set `dst` to 1, or 0 otherwise.
2179
fn v128_any_true(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2180
2181
/// Convert vector of integers to vector of floating points.
2182
fn v128_convert(&mut self, src: Reg, dst: WritableReg, kind: V128ConvertKind) -> Result<()>;
2183
2184
/// Convert two input vectors into a smaller lane vector by narrowing each
2185
/// lane.
2186
fn v128_narrow(
2187
&mut self,
2188
src1: Reg,
2189
src2: Reg,
2190
dst: WritableReg,
2191
kind: V128NarrowKind,
2192
) -> Result<()>;
2193
2194
/// Converts a vector containing two 64-bit floating point lanes to two
2195
/// 32-bit floating point lanes and setting the two higher lanes to 0.
2196
fn v128_demote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2197
2198
/// Converts a vector containing four 32-bit floating point lanes to two
2199
/// 64-bit floating point lanes. Only the two lower lanes are converted.
2200
fn v128_promote(&mut self, src: Reg, dst: WritableReg) -> Result<()>;
2201
2202
/// Converts low or high half of the smaller lane vector to a larger lane
2203
/// vector.
2204
fn v128_extend(&mut self, src: Reg, dst: WritableReg, kind: V128ExtendKind) -> Result<()>;
2205
2206
/// Perform a vector add between `lsh` and `rhs`, placing the result in
2207
/// `dst`.
2208
fn v128_add(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128AddKind) -> Result<()>;
2209
2210
/// Perform a vector sub between `lhs` and `rhs`, placing the result in `dst`.
2211
fn v128_sub(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, kind: V128SubKind) -> Result<()>;
2212
2213
/// Perform a vector lane-wise mul between `lhs` and `rhs`, placing the result in `dst`.
2214
fn v128_mul(&mut self, context: &mut CodeGenContext<Emission>, kind: V128MulKind)
2215
-> Result<()>;
2216
2217
/// Perform an absolute operation on a vector.
2218
fn v128_abs(&mut self, src: Reg, dst: WritableReg, kind: V128AbsKind) -> Result<()>;
2219
2220
/// Vectorized negate of the content of `op`.
2221
fn v128_neg(&mut self, op: WritableReg, kind: V128NegKind) -> Result<()>;
2222
2223
/// Perform the shift operation specified by `kind`, by the shift amount specified by the 32-bit
2224
/// integer at the top of the stack, on the 128-bit vector specified by the second value
2225
/// from the top of the stack, interpreted as packed integers of size `lane_width`.
2226
///
2227
/// The shift amount is taken modulo `lane_width`.
2228
fn v128_shift(
2229
&mut self,
2230
context: &mut CodeGenContext<Emission>,
2231
lane_width: OperandSize,
2232
kind: ShiftKind,
2233
) -> Result<()>;
2234
2235
/// Perform a saturating integer q-format rounding multiplication.
2236
fn v128_q15mulr_sat_s(
2237
&mut self,
2238
lhs: Reg,
2239
rhs: Reg,
2240
dst: WritableReg,
2241
size: OperandSize,
2242
) -> Result<()>;
2243
2244
/// Sets `dst` to 1 if all lanes in `src` are non-zero, sets `dst` to 0
2245
/// otherwise.
2246
fn v128_all_true(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2247
2248
/// Extracts the high bit of each lane in `src` and produces a scalar mask
2249
/// with all bits concatenated in `dst`.
2250
fn v128_bitmask(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2251
2252
/// Lanewise truncation operation.
2253
///
2254
/// If using an integer kind of truncation, then this performs a lane-wise
2255
/// saturating conversion from float to integer using the IEEE
2256
/// `convertToIntegerTowardZero` function. If any input lane is NaN, the
2257
/// resulting lane is 0. If the rounded integer value of a lane is outside
2258
/// the range of the destination type, the result is saturated to the
2259
/// nearest representable integer value.
2260
fn v128_trunc(
2261
&mut self,
2262
context: &mut CodeGenContext<Emission>,
2263
kind: V128TruncKind,
2264
) -> Result<()>;
2265
2266
/// Perform a lane-wise `min` operation between `src1` and `src2`.
2267
fn v128_min(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MinKind)
2268
-> Result<()>;
2269
2270
/// Perform a lane-wise `max` operation between `src1` and `src2`.
2271
fn v128_max(&mut self, src1: Reg, src2: Reg, dst: WritableReg, kind: V128MaxKind)
2272
-> Result<()>;
2273
2274
/// Perform the lane-wise integer extended multiplication producing twice wider result than the
2275
/// inputs. This is equivalent to an extend followed by a multiply.
2276
///
2277
/// The extension to be performed is inferred from the `lane_width` and the `kind` of extmul,
2278
/// e.g, if `lane_width` is `S16`, and `kind` is `LowSigned`, then we sign-extend the lower
2279
/// 8bits of the 16bits lanes.
2280
fn v128_extmul(
2281
&mut self,
2282
context: &mut CodeGenContext<Emission>,
2283
kind: V128ExtMulKind,
2284
) -> Result<()>;
2285
2286
/// Perform the lane-wise integer extended pairwise addition producing extended results (twice
2287
/// wider results than the inputs).
2288
fn v128_extadd_pairwise(
2289
&mut self,
2290
src: Reg,
2291
dst: WritableReg,
2292
kind: V128ExtAddKind,
2293
) -> Result<()>;
2294
2295
/// Lane-wise multiply signed 16-bit integers in `lhs` and `rhs` and add
2296
/// adjacent pairs of the 32-bit results.
2297
fn v128_dot(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg) -> Result<()>;
2298
2299
/// Count the number of bits set in each lane.
2300
fn v128_popcnt(&mut self, context: &mut CodeGenContext<Emission>) -> Result<()>;
2301
2302
/// Lane-wise rounding average of vectors of integers in `lhs` and `rhs`
2303
/// and put the results in `dst`.
2304
fn v128_avgr(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2305
2306
/// Lane-wise IEEE division on vectors of floats.
2307
fn v128_div(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2308
2309
/// Lane-wise IEEE square root of vector of floats.
2310
fn v128_sqrt(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2311
2312
/// Lane-wise ceiling of vector of floats.
2313
fn v128_ceil(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2314
2315
/// Lane-wise flooring of vector of floats.
2316
fn v128_floor(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2317
2318
/// Lane-wise rounding to nearest integer for vector of floats.
2319
fn v128_nearest(&mut self, src: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2320
2321
/// Lane-wise minimum value defined as `rhs < lhs ? rhs : lhs`.
2322
fn v128_pmin(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2323
2324
/// Lane-wise maximum value defined as `lhs < rhs ? rhs : lhs`.
2325
fn v128_pmax(&mut self, lhs: Reg, rhs: Reg, dst: WritableReg, size: OperandSize) -> Result<()>;
2326
}
2327
2328