Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/winch/codegen/src/isa/aarch64/asm.rs
1692 views
1
//! Assembler library implementation for Aarch64.
2
use super::{address::Address, regs};
3
use crate::CallingConvention;
4
use crate::aarch64::regs::zero;
5
use crate::masm::{
6
DivKind, Extend, ExtendKind, FloatCmpKind, Imm, IntCmpKind, RemKind, RoundingMode, ShiftKind,
7
Signed, TRUSTED_FLAGS, TruncKind,
8
};
9
use crate::{
10
constant_pool::ConstantPool,
11
masm::OperandSize,
12
reg::{Reg, WritableReg, writable},
13
};
14
15
use cranelift_codegen::PatchRegion;
16
use cranelift_codegen::isa::aarch64::inst::emit::{enc_arith_rrr, enc_move_wide, enc_movk};
17
use cranelift_codegen::isa::aarch64::inst::{
18
ASIMDFPModImm, FpuToIntOp, MoveWideConst, NZCV, UImm5,
19
};
20
use cranelift_codegen::{
21
Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel,
22
Writable,
23
ir::{ExternalName, MemFlags, SourceLoc, TrapCode, UserExternalNameRef},
24
isa::aarch64::inst::{
25
self, ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp,
26
FPULeftShiftImm, FPUOp1, FPUOp2,
27
FPUOpRI::{self, UShr32, UShr64},
28
FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp,
29
PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize,
30
emit::{EmitInfo, EmitState},
31
},
32
settings,
33
};
34
use regalloc2::RegClass;
35
use wasmtime_math::{f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
36
37
impl From<OperandSize> for inst::OperandSize {
38
fn from(size: OperandSize) -> Self {
39
match size {
40
OperandSize::S32 => Self::Size32,
41
OperandSize::S64 => Self::Size64,
42
s => panic!("Invalid operand size {s:?}"),
43
}
44
}
45
}
46
47
impl From<IntCmpKind> for Cond {
48
fn from(value: IntCmpKind) -> Self {
49
match value {
50
IntCmpKind::Eq => Cond::Eq,
51
IntCmpKind::Ne => Cond::Ne,
52
IntCmpKind::LtS => Cond::Lt,
53
IntCmpKind::LtU => Cond::Lo,
54
IntCmpKind::GtS => Cond::Gt,
55
IntCmpKind::GtU => Cond::Hi,
56
IntCmpKind::LeS => Cond::Le,
57
IntCmpKind::LeU => Cond::Ls,
58
IntCmpKind::GeS => Cond::Ge,
59
IntCmpKind::GeU => Cond::Hs,
60
}
61
}
62
}
63
64
impl From<FloatCmpKind> for Cond {
65
fn from(value: FloatCmpKind) -> Self {
66
match value {
67
FloatCmpKind::Eq => Cond::Eq,
68
FloatCmpKind::Ne => Cond::Ne,
69
FloatCmpKind::Lt => Cond::Mi,
70
FloatCmpKind::Gt => Cond::Gt,
71
FloatCmpKind::Le => Cond::Ls,
72
FloatCmpKind::Ge => Cond::Ge,
73
}
74
}
75
}
76
77
impl From<OperandSize> for ScalarSize {
78
fn from(size: OperandSize) -> ScalarSize {
79
match size {
80
OperandSize::S8 => ScalarSize::Size8,
81
OperandSize::S16 => ScalarSize::Size16,
82
OperandSize::S32 => ScalarSize::Size32,
83
OperandSize::S64 => ScalarSize::Size64,
84
OperandSize::S128 => ScalarSize::Size128,
85
}
86
}
87
}
88
89
/// Low level assembler implementation for Aarch64.
90
pub(crate) struct Assembler {
91
/// The machine instruction buffer.
92
buffer: MachBuffer<Inst>,
93
/// Constant emission information.
94
emit_info: EmitInfo,
95
/// Emission state.
96
emit_state: EmitState,
97
/// Constant pool.
98
pool: ConstantPool,
99
}
100
101
impl Assembler {
102
/// Create a new Aarch64 assembler.
103
pub fn new(shared_flags: settings::Flags) -> Self {
104
Self {
105
buffer: MachBuffer::<Inst>::new(),
106
emit_state: Default::default(),
107
emit_info: EmitInfo::new(shared_flags),
108
pool: ConstantPool::new(),
109
}
110
}
111
}
112
113
impl Assembler {
114
/// Return the emitted code.
115
pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
116
let stencil = self
117
.buffer
118
.finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());
119
stencil.apply_base_srcloc(loc.unwrap_or_default())
120
}
121
122
fn emit(&mut self, inst: Inst) {
123
self.emit_with_island(inst, Inst::worst_case_size());
124
}
125
126
fn emit_with_island(&mut self, inst: Inst, needed_space: u32) {
127
if self.buffer.island_needed(needed_space) {
128
let label = self.buffer.get_label();
129
let jmp = Inst::Jump {
130
dest: BranchTarget::Label(label),
131
};
132
jmp.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
133
self.buffer
134
.emit_island(needed_space, self.emit_state.ctrl_plane_mut());
135
self.buffer
136
.bind_label(label, self.emit_state.ctrl_plane_mut());
137
}
138
inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
139
}
140
141
/// Adds a constant to the constant pool, returning its address.
142
pub fn add_constant(&mut self, constant: &[u8]) -> Address {
143
let handle = self.pool.register(constant, &mut self.buffer);
144
Address::constant(handle)
145
}
146
147
/// Store a pair of registers.
148
pub fn stp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {
149
let mem: PairAMode = addr.try_into().unwrap();
150
self.emit(Inst::StoreP64 {
151
rt: xt1.into(),
152
rt2: xt2.into(),
153
mem,
154
flags: MemFlags::trusted(),
155
});
156
}
157
158
/// Store a register.
159
pub fn str(&mut self, reg: Reg, addr: Address, size: OperandSize, flags: MemFlags) {
160
let mem: AMode = addr.try_into().unwrap();
161
162
use OperandSize::*;
163
let inst = match (reg.is_int(), size) {
164
(_, S8) => Inst::Store8 {
165
rd: reg.into(),
166
mem,
167
flags,
168
},
169
(_, S16) => Inst::Store16 {
170
rd: reg.into(),
171
mem,
172
flags,
173
},
174
(true, S32) => Inst::Store32 {
175
rd: reg.into(),
176
mem,
177
flags,
178
},
179
(false, S32) => Inst::FpuStore32 {
180
rd: reg.into(),
181
mem,
182
flags,
183
},
184
(true, S64) => Inst::Store64 {
185
rd: reg.into(),
186
mem,
187
flags,
188
},
189
(false, S64) => Inst::FpuStore64 {
190
rd: reg.into(),
191
mem,
192
flags,
193
},
194
(_, S128) => Inst::FpuStore128 {
195
rd: reg.into(),
196
mem,
197
flags,
198
},
199
};
200
201
self.emit(inst);
202
}
203
204
/// Load a signed register.
205
pub fn sload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {
206
self.ldr(addr, rd, size, true, flags);
207
}
208
209
/// Load an unsigned register.
210
pub fn uload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {
211
self.ldr(addr, rd, size, false, flags);
212
}
213
214
/// Load address into a register.
215
fn ldr(
216
&mut self,
217
addr: Address,
218
rd: WritableReg,
219
size: OperandSize,
220
signed: bool,
221
flags: MemFlags,
222
) {
223
use OperandSize::*;
224
let writable_reg = rd.map(Into::into);
225
let mem: AMode = addr.try_into().unwrap();
226
227
let inst = match (rd.to_reg().is_int(), signed, size) {
228
(_, false, S8) => Inst::ULoad8 {
229
rd: writable_reg,
230
mem,
231
flags,
232
},
233
(_, true, S8) => Inst::SLoad8 {
234
rd: writable_reg,
235
mem,
236
flags,
237
},
238
(_, false, S16) => Inst::ULoad16 {
239
rd: writable_reg,
240
mem,
241
flags,
242
},
243
(_, true, S16) => Inst::SLoad16 {
244
rd: writable_reg,
245
mem,
246
flags,
247
},
248
(true, false, S32) => Inst::ULoad32 {
249
rd: writable_reg,
250
mem,
251
flags,
252
},
253
(false, _, S32) => Inst::FpuLoad32 {
254
rd: writable_reg,
255
mem,
256
flags,
257
},
258
(true, true, S32) => Inst::SLoad32 {
259
rd: writable_reg,
260
mem,
261
flags,
262
},
263
(true, _, S64) => Inst::ULoad64 {
264
rd: writable_reg,
265
mem,
266
flags,
267
},
268
(false, _, S64) => Inst::FpuLoad64 {
269
rd: writable_reg,
270
mem,
271
flags,
272
},
273
(_, _, S128) => Inst::FpuLoad128 {
274
rd: writable_reg,
275
mem,
276
flags,
277
},
278
};
279
280
self.emit(inst);
281
}
282
283
/// Load a pair of registers.
284
pub fn ldp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {
285
let writable_xt1 = Writable::from_reg(xt1.into());
286
let writable_xt2 = Writable::from_reg(xt2.into());
287
let mem = addr.try_into().unwrap();
288
289
self.emit(Inst::LoadP64 {
290
rt: writable_xt1,
291
rt2: writable_xt2,
292
mem,
293
flags: MemFlags::trusted(),
294
});
295
}
296
297
/// Emit a series of instructions to move an arbitrary 64-bit immediate
298
/// into the destination register.
299
/// The emitted instructions will depend on the destination register class.
300
pub fn mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize) {
301
match rd.to_reg().class() {
302
RegClass::Int => {
303
Inst::load_constant(rd.map(Into::into), imm.unwrap_as_u64())
304
.into_iter()
305
.for_each(|i| self.emit(i));
306
}
307
RegClass::Float => {
308
match ASIMDFPModImm::maybe_from_u64(imm.unwrap_as_u64(), size.into()) {
309
Some(imm) => {
310
self.emit(Inst::FpuMoveFPImm {
311
rd: rd.map(Into::into),
312
imm,
313
size: size.into(),
314
});
315
}
316
_ => {
317
let addr = self.add_constant(&imm.to_bytes());
318
self.uload(addr, rd, size, TRUSTED_FLAGS);
319
}
320
}
321
}
322
_ => unreachable!(),
323
}
324
}
325
326
/// Register to register move.
327
pub fn mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize) {
328
let writable_rd = rd.map(Into::into);
329
self.emit(Inst::Mov {
330
size: size.into(),
331
rd: writable_rd,
332
rm: rm.into(),
333
});
334
}
335
336
/// Floating point register to register move.
337
pub fn fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
338
let writable = rd.map(Into::into);
339
let inst = match size {
340
OperandSize::S32 => Inst::FpuMove32 {
341
rd: writable,
342
rn: rn.into(),
343
},
344
OperandSize::S64 => Inst::FpuMove64 {
345
rd: writable,
346
rn: rn.into(),
347
},
348
_ => unreachable!(),
349
};
350
351
self.emit(inst);
352
}
353
354
pub fn mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
355
let writable_rd = rd.map(Into::into);
356
self.emit(Inst::MovToFpu {
357
size: size.into(),
358
rd: writable_rd,
359
rn: rn.into(),
360
});
361
}
362
363
pub fn mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize) {
364
self.emit(Inst::MovFromVec {
365
rd: rd.map(Into::into),
366
rn: rn.into(),
367
idx,
368
size: size.into(),
369
});
370
}
371
372
/// Add immediate and register.
373
pub fn add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
374
self.alu_rri(ALUOp::Add, imm, rn, rd, size);
375
}
376
377
/// Add immediate and register, setting overflow flags.
378
pub fn adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
379
self.alu_rri(ALUOp::AddS, imm, rn, rd, size);
380
}
381
382
/// Add with three registers.
383
pub fn add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
384
self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size);
385
}
386
387
/// Add with three registers, setting overflow flags.
388
pub fn adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
389
self.alu_rrr_extend(ALUOp::AddS, rm, rn, rd, size);
390
}
391
392
/// Add across Vector.
393
pub fn addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize) {
394
self.emit(Inst::VecLanes {
395
op: VecLanesOp::Addv,
396
rd: rd.map(Into::into),
397
rn: rn.into(),
398
size,
399
});
400
}
401
402
/// Subtract immediate and register.
403
pub fn sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
404
self.alu_rri(ALUOp::Sub, imm, rn, rd, size);
405
}
406
407
/// Subtract immediate and register, setting flags.
408
pub fn subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize) {
409
self.alu_rri(ALUOp::SubS, imm, rn, writable!(regs::zero()), size);
410
}
411
412
/// Subtract with three registers.
413
pub fn sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
414
self.alu_rrr_extend(ALUOp::Sub, rm, rn, rd, size);
415
}
416
417
/// Subtract with three registers, setting flags.
418
pub fn subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize) {
419
self.alu_rrr_extend(ALUOp::SubS, rm, rn, writable!(regs::zero()), size);
420
}
421
422
/// Multiply with three registers.
423
pub fn mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
424
self.alu_rrrr(ALUOp3::MAdd, rm, rn, rd, regs::zero(), size);
425
}
426
427
/// Signed/unsigned division with three registers.
428
pub fn div_rrr(
429
&mut self,
430
divisor: Reg,
431
dividend: Reg,
432
dest: Writable<Reg>,
433
kind: DivKind,
434
size: OperandSize,
435
) {
436
// Check for division by 0.
437
self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);
438
439
// check for overflow
440
if kind == DivKind::Signed {
441
// Check for divisor overflow.
442
self.alu_rri(
443
ALUOp::AddS,
444
Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"),
445
divisor,
446
writable!(zero()),
447
size,
448
);
449
450
// Check if the dividend is 1.
451
self.emit(Inst::CCmpImm {
452
size: size.into(),
453
rn: dividend.into(),
454
imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"),
455
nzcv: NZCV::new(false, false, false, false),
456
cond: Cond::Eq,
457
});
458
459
// Finally, trap if the previous operation overflowed.
460
self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW);
461
}
462
463
// `cranelift-codegen` doesn't support emitting sdiv for anything but I64,
464
// we therefore sign-extend the operand.
465
// see: https://github.com/bytecodealliance/wasmtime/issues/9766
466
let size = if size == OperandSize::S32 && kind == DivKind::Signed {
467
self.extend(
468
divisor,
469
writable!(divisor),
470
ExtendKind::Signed(Extend::<Signed>::I64Extend32),
471
);
472
self.extend(
473
dividend,
474
writable!(dividend),
475
ExtendKind::Signed(Extend::<Signed>::I64Extend32),
476
);
477
OperandSize::S64
478
} else {
479
size
480
};
481
482
let op = match kind {
483
DivKind::Signed => ALUOp::SDiv,
484
DivKind::Unsigned => ALUOp::UDiv,
485
};
486
487
self.alu_rrr(op, divisor, dividend, dest.map(Into::into), size);
488
}
489
490
/// Signed/unsigned remainder operation with three registers.
491
pub fn rem_rrr(
492
&mut self,
493
divisor: Reg,
494
dividend: Reg,
495
dest: Writable<Reg>,
496
scratch: WritableReg,
497
kind: RemKind,
498
size: OperandSize,
499
) {
500
// Check for division by 0
501
self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);
502
503
// `cranelift-codegen` doesn't support emitting sdiv for anything but I64,
504
// we therefore sign-extend the operand.
505
// see: https://github.com/bytecodealliance/wasmtime/issues/9766
506
let size = if size == OperandSize::S32 && kind.is_signed() {
507
self.extend(
508
divisor,
509
writable!(divisor),
510
ExtendKind::Signed(Extend::<Signed>::I64Extend32),
511
);
512
self.extend(
513
dividend,
514
writable!(dividend),
515
ExtendKind::Signed(Extend::<Signed>::I64Extend32),
516
);
517
OperandSize::S64
518
} else {
519
size
520
};
521
522
let op = match kind {
523
RemKind::Signed => ALUOp::SDiv,
524
RemKind::Unsigned => ALUOp::UDiv,
525
};
526
527
self.alu_rrr(op, divisor, dividend, scratch, size);
528
529
self.alu_rrrr(
530
ALUOp3::MSub,
531
scratch.to_reg(),
532
divisor,
533
dest.map(Into::into),
534
dividend,
535
size,
536
);
537
}
538
539
/// And with three registers.
540
pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
541
self.alu_rrr(ALUOp::And, rm, rn, rd, size);
542
}
543
544
/// And immediate and register.
545
pub fn and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
546
self.alu_rri_logic(ALUOp::And, imm, rn, rd, size);
547
}
548
549
/// Or with three registers.
550
pub fn or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
551
self.alu_rrr(ALUOp::Orr, rm, rn, rd, size);
552
}
553
554
/// Or immediate and register.
555
pub fn or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
556
self.alu_rri_logic(ALUOp::Orr, imm, rn, rd, size);
557
}
558
559
/// Xor with three registers.
560
pub fn xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
561
self.alu_rrr(ALUOp::Eor, rm, rn, rd, size);
562
}
563
564
/// Xor immediate and register.
565
pub fn xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
566
self.alu_rri_logic(ALUOp::Eor, imm, rn, rd, size);
567
}
568
569
/// Shift with three registers.
570
pub fn shift_rrr(
571
&mut self,
572
rm: Reg,
573
rn: Reg,
574
rd: WritableReg,
575
kind: ShiftKind,
576
size: OperandSize,
577
) {
578
let shift_op = self.shift_kind_to_alu_op(kind, rm, size);
579
self.alu_rrr(shift_op, rm, rn, rd, size);
580
}
581
582
/// Shift immediate and register.
583
pub fn shift_ir(
584
&mut self,
585
imm: ImmShift,
586
rn: Reg,
587
rd: WritableReg,
588
kind: ShiftKind,
589
size: OperandSize,
590
) {
591
let shift_op = self.shift_kind_to_alu_op(kind, rn, size);
592
self.alu_rri_shift(shift_op, imm, rn, rd, size);
593
}
594
595
/// Count Leading Zeros.
596
pub fn clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
597
self.bit_rr(BitOp::Clz, rn, rd, size);
598
}
599
600
/// Reverse Bits reverses the bit order in a register.
601
pub fn rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
602
self.bit_rr(BitOp::RBit, rn, rd, size);
603
}
604
605
/// Float add with three registers.
606
pub fn fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
607
self.fpu_rrr(FPUOp2::Add, rm, rn, rd, size);
608
}
609
610
/// Float sub with three registers.
611
pub fn fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
612
self.fpu_rrr(FPUOp2::Sub, rm, rn, rd, size);
613
}
614
615
/// Float multiply with three registers.
616
pub fn fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
617
self.fpu_rrr(FPUOp2::Mul, rm, rn, rd, size);
618
}
619
620
/// Float division with three registers.
621
pub fn fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
622
self.fpu_rrr(FPUOp2::Div, rm, rn, rd, size);
623
}
624
625
/// Float max with three registers.
626
pub fn fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
627
self.fpu_rrr(FPUOp2::Max, rm, rn, rd, size);
628
}
629
630
/// Float min with three registers.
631
pub fn fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
632
self.fpu_rrr(FPUOp2::Min, rm, rn, rd, size);
633
}
634
635
/// Float neg with two registers.
636
pub fn fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
637
self.fpu_rr(FPUOp1::Neg, rn, rd, size);
638
}
639
640
/// Float abs with two registers.
641
pub fn fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
642
self.fpu_rr(FPUOp1::Abs, rn, rd, size);
643
}
644
645
/// Float sqrt with two registers.
646
pub fn fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
647
self.fpu_rr(FPUOp1::Sqrt, rn, rd, size);
648
}
649
650
/// Float round (ceil, trunc, floor) with two registers.
651
pub fn fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize) {
652
let fpu_mode = match (mode, size) {
653
(RoundingMode::Nearest, OperandSize::S32) => FpuRoundMode::Nearest32,
654
(RoundingMode::Up, OperandSize::S32) => FpuRoundMode::Plus32,
655
(RoundingMode::Down, OperandSize::S32) => FpuRoundMode::Minus32,
656
(RoundingMode::Zero, OperandSize::S32) => FpuRoundMode::Zero32,
657
(RoundingMode::Nearest, OperandSize::S64) => FpuRoundMode::Nearest64,
658
(RoundingMode::Up, OperandSize::S64) => FpuRoundMode::Plus64,
659
(RoundingMode::Down, OperandSize::S64) => FpuRoundMode::Minus64,
660
(RoundingMode::Zero, OperandSize::S64) => FpuRoundMode::Zero64,
661
(m, o) => panic!("Invalid rounding mode or operand size {m:?}, {o:?}"),
662
};
663
self.fpu_round(fpu_mode, rn, rd)
664
}
665
666
/// Float unsigned shift right with two registers and an immediate.
667
pub fn fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize) {
668
let imm = FPURightShiftImm {
669
amount,
670
lane_size_in_bits: size.num_bits(),
671
};
672
let ushr = match size {
673
OperandSize::S32 => UShr32(imm),
674
OperandSize::S64 => UShr64(imm),
675
_ => unreachable!(),
676
};
677
self.fpu_rri(ushr, rn, rd)
678
}
679
680
/// Float unsigned shift left and insert with three registers
681
/// and an immediate.
682
pub fn fsli_rri_mod(
683
&mut self,
684
ri: Reg,
685
rn: Reg,
686
rd: WritableReg,
687
amount: u8,
688
size: OperandSize,
689
) {
690
let imm = FPULeftShiftImm {
691
amount,
692
lane_size_in_bits: size.num_bits(),
693
};
694
let sli = match size {
695
OperandSize::S32 => FPUOpRIMod::Sli32(imm),
696
OperandSize::S64 => FPUOpRIMod::Sli64(imm),
697
_ => unreachable!(),
698
};
699
self.fpu_rri_mod(sli, ri, rn, rd)
700
}
701
702
/// Float compare.
703
pub fn fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize) {
704
self.emit(Inst::FpuCmp {
705
size: size.into(),
706
rn: rn.into(),
707
rm: rm.into(),
708
})
709
}
710
711
/// Convert an signed integer to a float.
712
pub fn cvt_sint_to_float(
713
&mut self,
714
rn: Reg,
715
rd: WritableReg,
716
src_size: OperandSize,
717
dst_size: OperandSize,
718
) {
719
let op = match (src_size, dst_size) {
720
(OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32,
721
(OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32,
722
(OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64,
723
(OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64,
724
_ => unreachable!(),
725
};
726
727
self.emit(Inst::IntToFpu {
728
op,
729
rd: rd.map(Into::into),
730
rn: rn.into(),
731
});
732
}
733
734
/// Convert an unsigned integer to a float.
735
pub fn cvt_uint_to_float(
736
&mut self,
737
rn: Reg,
738
rd: WritableReg,
739
src_size: OperandSize,
740
dst_size: OperandSize,
741
) {
742
let op = match (src_size, dst_size) {
743
(OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32,
744
(OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32,
745
(OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64,
746
(OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64,
747
_ => unreachable!(),
748
};
749
750
self.emit(Inst::IntToFpu {
751
op,
752
rd: rd.map(Into::into),
753
rn: rn.into(),
754
});
755
}
756
757
/// Change precision of float.
758
pub fn cvt_float_to_float(
759
&mut self,
760
rn: Reg,
761
rd: WritableReg,
762
src_size: OperandSize,
763
dst_size: OperandSize,
764
) {
765
let (fpu_op, size) = match (src_size, dst_size) {
766
(OperandSize::S32, OperandSize::S64) => (FPUOp1::Cvt32To64, ScalarSize::Size32),
767
(OperandSize::S64, OperandSize::S32) => (FPUOp1::Cvt64To32, ScalarSize::Size64),
768
_ => unimplemented!(),
769
};
770
self.emit(Inst::FpuRR {
771
fpu_op,
772
size,
773
rd: rd.map(Into::into),
774
rn: rn.into(),
775
});
776
}
777
778
/// Return instruction.
779
pub fn ret(&mut self) {
780
self.emit(Inst::Ret {});
781
}
782
783
/// An unconditional branch.
784
pub fn jmp(&mut self, target: MachLabel) {
785
self.emit(Inst::Jump {
786
dest: BranchTarget::Label(target),
787
});
788
}
789
790
/// A conditional branch.
791
pub fn jmp_if(&mut self, kind: Cond, taken: MachLabel) {
792
self.emit(Inst::CondBr {
793
taken: BranchTarget::Label(taken),
794
not_taken: BranchTarget::ResolvedOffset(4),
795
kind: CondBrKind::Cond(kind),
796
});
797
}
798
799
/// Emits a jump table sequence.
800
pub fn jmp_table(
801
&mut self,
802
targets: &[MachLabel],
803
default: MachLabel,
804
index: Reg,
805
tmp1: Reg,
806
tmp2: Reg,
807
) {
808
self.emit_with_island(
809
Inst::JTSequence {
810
default,
811
targets: Box::new(targets.to_vec()),
812
ridx: index.into(),
813
rtmp1: Writable::from_reg(tmp1.into()),
814
rtmp2: Writable::from_reg(tmp2.into()),
815
},
816
// number of bytes needed for the jumptable sequence:
817
// 4 bytes per instruction, with 8 instructions base + the size of
818
// the jumptable more.
819
(4 * (8 + targets.len())).try_into().unwrap(),
820
);
821
}
822
823
/// Conditional Set sets the destination register to 1 if the condition
824
/// is true, and otherwise sets it to 0.
825
pub fn cset(&mut self, rd: WritableReg, cond: Cond) {
826
self.emit(Inst::CSet {
827
rd: rd.map(Into::into),
828
cond,
829
});
830
}
831
832
/// If the condition is true, `csel` writes rn to rd. If the
833
/// condition is false, it writes rm to rd
834
pub fn csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond) {
835
self.emit(Inst::CSel {
836
rd: rd.map(Into::into),
837
rn: rn.into(),
838
rm: rm.into(),
839
cond,
840
});
841
}
842
843
/// If the condition is true, `csel` writes rn to rd. If the
844
/// condition is false, it writes rm to rd
845
pub fn fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize) {
846
match size {
847
OperandSize::S32 => {
848
self.emit(Inst::FpuCSel32 {
849
rd: rd.map(Into::into),
850
rn: rn.into(),
851
rm: rm.into(),
852
cond,
853
});
854
}
855
OperandSize::S64 => {
856
self.emit(Inst::FpuCSel64 {
857
rd: rd.map(Into::into),
858
rn: rn.into(),
859
rm: rm.into(),
860
cond,
861
});
862
}
863
_ => todo!(),
864
}
865
}
866
867
/// Population count per byte.
868
pub fn cnt(&mut self, rd: WritableReg) {
869
self.emit(Inst::VecMisc {
870
op: VecMisc2::Cnt,
871
rd: rd.map(Into::into),
872
rn: rd.to_reg().into(),
873
size: VectorSize::Size8x8,
874
});
875
}
876
877
pub fn extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind) {
878
self.emit(Inst::Extend {
879
rd: rd.map(Into::into),
880
rn: rn.into(),
881
signed: kind.signed(),
882
from_bits: kind.from_bits(),
883
to_bits: kind.to_bits(),
884
})
885
}
886
887
/// Bitwise AND (shifted register), setting flags.
888
pub fn ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize) {
889
self.alu_rrr(ALUOp::AndS, rm, rn, writable!(regs::zero()), size);
890
}
891
892
/// Permanently Undefined.
893
pub fn udf(&mut self, code: TrapCode) {
894
self.emit(Inst::Udf { trap_code: code });
895
}
896
897
/// Conditional trap.
898
pub fn trapif(&mut self, cc: Cond, code: TrapCode) {
899
self.emit(Inst::TrapIf {
900
kind: CondBrKind::Cond(cc),
901
trap_code: code,
902
});
903
}
904
905
/// Trap if `rn` is zero.
906
pub fn trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize) {
907
self.emit(Inst::TrapIf {
908
kind: CondBrKind::Zero(rn.into(), size.into()),
909
trap_code: code,
910
});
911
}
912
913
// Helpers for ALU operations.
914
915
fn alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
916
self.emit(Inst::AluRRImm12 {
917
alu_op: op,
918
size: size.into(),
919
rd: rd.map(Into::into),
920
rn: rn.into(),
921
imm12: imm,
922
});
923
}
924
925
fn alu_rri_logic(
926
&mut self,
927
op: ALUOp,
928
imm: ImmLogic,
929
rn: Reg,
930
rd: WritableReg,
931
size: OperandSize,
932
) {
933
self.emit(Inst::AluRRImmLogic {
934
alu_op: op,
935
size: size.into(),
936
rd: rd.map(Into::into),
937
rn: rn.into(),
938
imml: imm,
939
});
940
}
941
942
fn alu_rri_shift(
943
&mut self,
944
op: ALUOp,
945
imm: ImmShift,
946
rn: Reg,
947
rd: WritableReg,
948
size: OperandSize,
949
) {
950
self.emit(Inst::AluRRImmShift {
951
alu_op: op,
952
size: size.into(),
953
rd: rd.map(Into::into),
954
rn: rn.into(),
955
immshift: imm,
956
});
957
}
958
959
fn alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
960
self.emit(Inst::AluRRR {
961
alu_op: op,
962
size: size.into(),
963
rd: rd.map(Into::into),
964
rn: rn.into(),
965
rm: rm.into(),
966
});
967
}
968
969
fn alu_rrr_extend(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
970
self.emit(Inst::AluRRRExtend {
971
alu_op: op,
972
size: size.into(),
973
rd: rd.map(Into::into),
974
rn: rn.into(),
975
rm: rm.into(),
976
extendop: ExtendOp::UXTX,
977
});
978
}
979
980
fn alu_rrrr(
981
&mut self,
982
op: ALUOp3,
983
rm: Reg,
984
rn: Reg,
985
rd: WritableReg,
986
ra: Reg,
987
size: OperandSize,
988
) {
989
self.emit(Inst::AluRRRR {
990
alu_op: op,
991
size: size.into(),
992
rd: rd.map(Into::into),
993
rn: rn.into(),
994
rm: rm.into(),
995
ra: ra.into(),
996
});
997
}
998
999
fn fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
1000
self.emit(Inst::FpuRRR {
1001
fpu_op: op,
1002
size: size.into(),
1003
rd: rd.map(Into::into),
1004
rn: rn.into(),
1005
rm: rm.into(),
1006
});
1007
}
1008
1009
fn fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg) {
1010
self.emit(Inst::FpuRRI {
1011
fpu_op: op,
1012
rd: rd.map(Into::into),
1013
rn: rn.into(),
1014
});
1015
}
1016
1017
fn fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg) {
1018
self.emit(Inst::FpuRRIMod {
1019
fpu_op: op,
1020
rd: rd.map(Into::into),
1021
ri: ri.into(),
1022
rn: rn.into(),
1023
});
1024
}
1025
1026
fn fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize) {
1027
self.emit(Inst::FpuRR {
1028
fpu_op: op,
1029
size: size.into(),
1030
rd: rd.map(Into::into),
1031
rn: rn.into(),
1032
});
1033
}
1034
1035
fn fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg) {
1036
self.emit(Inst::FpuRound {
1037
op,
1038
rd: rd.map(Into::into),
1039
rn: rn.into(),
1040
});
1041
}
1042
1043
fn bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize) {
1044
self.emit(Inst::BitRR {
1045
op,
1046
size: size.into(),
1047
rd: rd.map(Into::into),
1048
rn: rn.into(),
1049
});
1050
}
1051
1052
// Convert ShiftKind to ALUOp. If kind == Rotl, then emulate it by emitting
1053
// the negation of the given reg r, and returns ALUOp::Extr (an alias for
1054
// `ror` the rotate-right instruction)
1055
fn shift_kind_to_alu_op(&mut self, kind: ShiftKind, r: Reg, size: OperandSize) -> ALUOp {
1056
match kind {
1057
ShiftKind::Shl => ALUOp::Lsl,
1058
ShiftKind::ShrS => ALUOp::Asr,
1059
ShiftKind::ShrU => ALUOp::Lsr,
1060
ShiftKind::Rotr => ALUOp::Extr,
1061
ShiftKind::Rotl => {
1062
// neg(r) is sub(zero, r).
1063
self.alu_rrr(ALUOp::Sub, r, regs::zero(), writable!(r), size);
1064
ALUOp::Extr
1065
}
1066
}
1067
}
1068
1069
/// Get a label from the underlying machine code buffer.
1070
pub fn get_label(&mut self) -> MachLabel {
1071
self.buffer.get_label()
1072
}
1073
1074
/// Get a mutable reference to underlying
1075
/// machine buffer.
1076
pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
1077
&mut self.buffer
1078
}
1079
1080
/// Get a reference to the underlying machine buffer.
1081
pub fn buffer(&self) -> &MachBuffer<Inst> {
1082
&self.buffer
1083
}
1084
1085
/// Emit a direct call to a function defined locally and
1086
/// referenced to by `name`.
1087
pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) {
1088
self.emit(Inst::Call {
1089
info: Box::new(cranelift_codegen::CallInfo::empty(
1090
ExternalName::user(name),
1091
call_conv.into(),
1092
)),
1093
})
1094
}
1095
1096
/// Emit an indirect call to a function whose address is
1097
/// stored the `callee` register.
1098
pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) {
1099
self.emit(Inst::CallInd {
1100
info: Box::new(cranelift_codegen::CallInfo::empty(
1101
callee.into(),
1102
call_conv.into(),
1103
)),
1104
})
1105
}
1106
1107
/// Load the min value for an integer of size out_size, as a floating-point
1108
/// of size `in-size`, into register `rd`.
1109
fn min_fp_value(
1110
&mut self,
1111
signed: bool,
1112
in_size: OperandSize,
1113
out_size: OperandSize,
1114
rd: Writable<Reg>,
1115
) {
1116
match in_size {
1117
OperandSize::S32 => {
1118
let (min, _) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());
1119
self.mov_ir(rd, Imm::f32(min.to_bits()), in_size);
1120
}
1121
OperandSize::S64 => {
1122
let (min, _) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());
1123
self.mov_ir(rd, Imm::f64(min.to_bits()), in_size);
1124
}
1125
s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),
1126
};
1127
}
1128
1129
/// Load the max value for an integer of size out_size, as a floating-point
1130
/// of size `in_size`, into register `rd`.
1131
fn max_fp_value(
1132
&mut self,
1133
signed: bool,
1134
in_size: OperandSize,
1135
out_size: OperandSize,
1136
rd: Writable<Reg>,
1137
) {
1138
match in_size {
1139
OperandSize::S32 => {
1140
let (_, max) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());
1141
self.mov_ir(rd, Imm::f32(max.to_bits()), in_size);
1142
}
1143
OperandSize::S64 => {
1144
let (_, max) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());
1145
self.mov_ir(rd, Imm::f64(max.to_bits()), in_size);
1146
}
1147
s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),
1148
};
1149
}
1150
1151
/// Emit instructions to check if the value in `rn` is NaN.
1152
fn check_nan(&mut self, rn: Reg, size: OperandSize) {
1153
self.fcmp(rn, rn, size);
1154
self.trapif(Cond::Vs, TrapCode::BAD_CONVERSION_TO_INTEGER);
1155
}
1156
1157
/// Convert the floating point of size `src_size` stored in `src`, into a integer of size
1158
/// `dst_size`, storing the result in `dst`.
1159
pub fn fpu_to_int(
1160
&mut self,
1161
dst: Writable<Reg>,
1162
src: Reg,
1163
tmp_reg: WritableReg,
1164
src_size: OperandSize,
1165
dst_size: OperandSize,
1166
kind: TruncKind,
1167
signed: bool,
1168
) {
1169
if kind.is_unchecked() {
1170
// Confusingly, when `kind` is `Unchecked` is when we actually need to perform the checks:
1171
// - check if fp is NaN
1172
// - check bounds
1173
self.check_nan(src, src_size);
1174
1175
self.min_fp_value(signed, src_size, dst_size, tmp_reg);
1176
self.fcmp(src, tmp_reg.to_reg(), src_size);
1177
self.trapif(Cond::Le, TrapCode::INTEGER_OVERFLOW);
1178
1179
self.max_fp_value(signed, src_size, dst_size, tmp_reg);
1180
self.fcmp(src, tmp_reg.to_reg(), src_size);
1181
self.trapif(Cond::Ge, TrapCode::INTEGER_OVERFLOW);
1182
}
1183
1184
self.cvt_fpu_to_int(dst, src, src_size, dst_size, signed)
1185
}
1186
1187
/// Select and emit the appropriate `fcvt*` instruction
1188
pub fn cvt_fpu_to_int(
1189
&mut self,
1190
dst: Writable<Reg>,
1191
src: Reg,
1192
src_size: OperandSize,
1193
dst_size: OperandSize,
1194
signed: bool,
1195
) {
1196
let op = match (src_size, dst_size, signed) {
1197
(OperandSize::S32, OperandSize::S32, false) => FpuToIntOp::F32ToU32,
1198
(OperandSize::S32, OperandSize::S32, true) => FpuToIntOp::F32ToI32,
1199
(OperandSize::S32, OperandSize::S64, false) => FpuToIntOp::F32ToU64,
1200
(OperandSize::S32, OperandSize::S64, true) => FpuToIntOp::F32ToI64,
1201
(OperandSize::S64, OperandSize::S32, false) => FpuToIntOp::F64ToU32,
1202
(OperandSize::S64, OperandSize::S32, true) => FpuToIntOp::F64ToI32,
1203
(OperandSize::S64, OperandSize::S64, false) => FpuToIntOp::F64ToU64,
1204
(OperandSize::S64, OperandSize::S64, true) => FpuToIntOp::F64ToI64,
1205
(fsize, int_size, signed) => unimplemented!(
1206
"unsupported conversion: f{} to {}{}",
1207
fsize.num_bits(),
1208
if signed { "i" } else { "u" },
1209
int_size.num_bits(),
1210
),
1211
};
1212
1213
self.emit(Inst::FpuToInt {
1214
op,
1215
rd: dst.map(Into::into),
1216
rn: src.into(),
1217
});
1218
}
1219
}
1220
1221
/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
1222
/// but the immediate is not yet known.
1223
pub(crate) struct PatchableAddToReg {
1224
/// The region to be patched in the [`MachBuffer`]. It contains
1225
/// space for 3 32-bit instructions, i.e. it's 12 bytes long.
1226
region: PatchRegion,
1227
1228
// The destination register for the add instruction.
1229
reg: Writable<Reg>,
1230
1231
// The temporary register used to hold the immediate value.
1232
tmp: Writable<Reg>,
1233
}
1234
1235
impl PatchableAddToReg {
1236
/// Create a new [`PatchableAddToReg`] by capturing a region in the output
1237
/// buffer containing an instruction sequence that loads an immediate into a
1238
/// register `tmp`, then adds it to a register `reg`. The [`MachBuffer`]
1239
/// will have that instruction sequence written to the region, though the
1240
/// immediate loaded into `tmp` will be `0` until the `::finalize` method is
1241
/// called.
1242
pub(crate) fn new(reg: Writable<Reg>, tmp: Writable<Reg>, buf: &mut MachBuffer<Inst>) -> Self {
1243
let insns = Self::add_immediate_instruction_sequence(reg, tmp, 0);
1244
let open = buf.start_patchable();
1245
buf.put_data(&insns);
1246
let region = buf.end_patchable(open);
1247
1248
Self { region, reg, tmp }
1249
}
1250
1251
fn add_immediate_instruction_sequence(
1252
reg: Writable<Reg>,
1253
tmp: Writable<Reg>,
1254
imm: i32,
1255
) -> [u8; 12] {
1256
let imm_hi = imm as u64 & 0xffff_0000;
1257
let imm_hi = MoveWideConst::maybe_from_u64(imm_hi).unwrap();
1258
1259
let imm_lo = imm as u64 & 0x0000_ffff;
1260
let imm_lo = MoveWideConst::maybe_from_u64(imm_lo).unwrap();
1261
1262
let size = OperandSize::S64.into();
1263
1264
let tmp = tmp.map(Into::into);
1265
let rd = reg.map(Into::into);
1266
1267
// This is "movz to bits 16-31 of 64 bit reg tmp and zero the rest"
1268
let mov_insn = enc_move_wide(inst::MoveWideOp::MovZ, tmp, imm_hi, size);
1269
1270
// This is "movk to bits 0-15 of 64 bit reg tmp"
1271
let movk_insn = enc_movk(tmp, imm_lo, size);
1272
1273
// This is "add tmp to rd". The opcodes are somewhat buried in the
1274
// instruction encoder so we just repeat them here.
1275
let add_bits_31_21: u32 = 0b00001011_000 | (size.sf_bit() << 10);
1276
let add_bits_15_10: u32 = 0;
1277
let add_insn = enc_arith_rrr(
1278
add_bits_31_21,
1279
add_bits_15_10,
1280
rd,
1281
rd.to_reg(),
1282
tmp.to_reg(),
1283
);
1284
1285
let mut buf = [0u8; 12];
1286
buf[0..4].copy_from_slice(&mov_insn.to_le_bytes());
1287
buf[4..8].copy_from_slice(&movk_insn.to_le_bytes());
1288
buf[8..12].copy_from_slice(&add_insn.to_le_bytes());
1289
buf
1290
}
1291
1292
/// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
1293
/// value is passed in as an i32, but the instruction encoding is fixed when
1294
/// [`PatchableAddToReg::new`] is called.
1295
pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
1296
let insns = Self::add_immediate_instruction_sequence(self.reg, self.tmp, val);
1297
let slice = self.region.patch(buffer);
1298
assert_eq!(slice.len(), insns.len());
1299
slice.copy_from_slice(&insns);
1300
}
1301
}
1302
1303