CoCalc -- asm.rs

GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/winch/codegen/src/isa/aarch64/asm.rs
¹⁶⁹² views
1
//! Assembler library implementation for Aarch64.
2
use super::{address::Address, regs};
3
use crate::CallingConvention;
4
use crate::aarch64::regs::zero;
5
use crate::masm::{
6
    DivKind, Extend, ExtendKind, FloatCmpKind, Imm, IntCmpKind, RemKind, RoundingMode, ShiftKind,
7
    Signed, TRUSTED_FLAGS, TruncKind,
8
};
9
use crate::{
10
    constant_pool::ConstantPool,
11
    masm::OperandSize,
12
    reg::{Reg, WritableReg, writable},
13
};
14

15
use cranelift_codegen::PatchRegion;
16
use cranelift_codegen::isa::aarch64::inst::emit::{enc_arith_rrr, enc_move_wide, enc_movk};
17
use cranelift_codegen::isa::aarch64::inst::{
18
    ASIMDFPModImm, FpuToIntOp, MoveWideConst, NZCV, UImm5,
19
};
20
use cranelift_codegen::{
21
    Final, MachBuffer, MachBufferFinalized, MachInst, MachInstEmit, MachInstEmitState, MachLabel,
22
    Writable,
23
    ir::{ExternalName, MemFlags, SourceLoc, TrapCode, UserExternalNameRef},
24
    isa::aarch64::inst::{
25
        self, ALUOp, ALUOp3, AMode, BitOp, BranchTarget, Cond, CondBrKind, ExtendOp,
26
        FPULeftShiftImm, FPUOp1, FPUOp2,
27
        FPUOpRI::{self, UShr32, UShr64},
28
        FPUOpRIMod, FPURightShiftImm, FpuRoundMode, Imm12, ImmLogic, ImmShift, Inst, IntToFpuOp,
29
        PairAMode, ScalarSize, VecLanesOp, VecMisc2, VectorSize,
30
        emit::{EmitInfo, EmitState},
31
    },
32
    settings,
33
};
34
use regalloc2::RegClass;
35
use wasmtime_math::{f32_cvt_to_int_bounds, f64_cvt_to_int_bounds};
36

37
impl From<OperandSize> for inst::OperandSize {
38
    fn from(size: OperandSize) -> Self {
39
        match size {
40
            OperandSize::S32 => Self::Size32,
41
            OperandSize::S64 => Self::Size64,
42
            s => panic!("Invalid operand size {s:?}"),
43
        }
44
    }
45
}
46

47
impl From<IntCmpKind> for Cond {
48
    fn from(value: IntCmpKind) -> Self {
49
        match value {
50
            IntCmpKind::Eq => Cond::Eq,
51
            IntCmpKind::Ne => Cond::Ne,
52
            IntCmpKind::LtS => Cond::Lt,
53
            IntCmpKind::LtU => Cond::Lo,
54
            IntCmpKind::GtS => Cond::Gt,
55
            IntCmpKind::GtU => Cond::Hi,
56
            IntCmpKind::LeS => Cond::Le,
57
            IntCmpKind::LeU => Cond::Ls,
58
            IntCmpKind::GeS => Cond::Ge,
59
            IntCmpKind::GeU => Cond::Hs,
60
        }
61
    }
62
}
63

64
impl From<FloatCmpKind> for Cond {
65
    fn from(value: FloatCmpKind) -> Self {
66
        match value {
67
            FloatCmpKind::Eq => Cond::Eq,
68
            FloatCmpKind::Ne => Cond::Ne,
69
            FloatCmpKind::Lt => Cond::Mi,
70
            FloatCmpKind::Gt => Cond::Gt,
71
            FloatCmpKind::Le => Cond::Ls,
72
            FloatCmpKind::Ge => Cond::Ge,
73
        }
74
    }
75
}
76

77
impl From<OperandSize> for ScalarSize {
78
    fn from(size: OperandSize) -> ScalarSize {
79
        match size {
80
            OperandSize::S8 => ScalarSize::Size8,
81
            OperandSize::S16 => ScalarSize::Size16,
82
            OperandSize::S32 => ScalarSize::Size32,
83
            OperandSize::S64 => ScalarSize::Size64,
84
            OperandSize::S128 => ScalarSize::Size128,
85
        }
86
    }
87
}
88

89
/// Low level assembler implementation for Aarch64.
90
pub(crate) struct Assembler {
91
    /// The machine instruction buffer.
92
    buffer: MachBuffer<Inst>,
93
    /// Constant emission information.
94
    emit_info: EmitInfo,
95
    /// Emission state.
96
    emit_state: EmitState,
97
    /// Constant pool.
98
    pool: ConstantPool,
99
}
100

101
impl Assembler {
102
    /// Create a new Aarch64 assembler.
103
    pub fn new(shared_flags: settings::Flags) -> Self {
104
        Self {
105
            buffer: MachBuffer::<Inst>::new(),
106
            emit_state: Default::default(),
107
            emit_info: EmitInfo::new(shared_flags),
108
            pool: ConstantPool::new(),
109
        }
110
    }
111
}
112

113
impl Assembler {
114
    /// Return the emitted code.
115
    pub fn finalize(mut self, loc: Option<SourceLoc>) -> MachBufferFinalized<Final> {
116
        let stencil = self
117
            .buffer
118
            .finish(&self.pool.constants(), self.emit_state.ctrl_plane_mut());
119
        stencil.apply_base_srcloc(loc.unwrap_or_default())
120
    }
121

122
    fn emit(&mut self, inst: Inst) {
123
        self.emit_with_island(inst, Inst::worst_case_size());
124
    }
125

126
    fn emit_with_island(&mut self, inst: Inst, needed_space: u32) {
127
        if self.buffer.island_needed(needed_space) {
128
            let label = self.buffer.get_label();
129
            let jmp = Inst::Jump {
130
                dest: BranchTarget::Label(label),
131
            };
132
            jmp.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
133
            self.buffer
134
                .emit_island(needed_space, self.emit_state.ctrl_plane_mut());
135
            self.buffer
136
                .bind_label(label, self.emit_state.ctrl_plane_mut());
137
        }
138
        inst.emit(&mut self.buffer, &self.emit_info, &mut self.emit_state);
139
    }
140

141
    /// Adds a constant to the constant pool, returning its address.
142
    pub fn add_constant(&mut self, constant: &[u8]) -> Address {
143
        let handle = self.pool.register(constant, &mut self.buffer);
144
        Address::constant(handle)
145
    }
146

147
    /// Store a pair of registers.
148
    pub fn stp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {
149
        let mem: PairAMode = addr.try_into().unwrap();
150
        self.emit(Inst::StoreP64 {
151
            rt: xt1.into(),
152
            rt2: xt2.into(),
153
            mem,
154
            flags: MemFlags::trusted(),
155
        });
156
    }
157

158
    /// Store a register.
159
    pub fn str(&mut self, reg: Reg, addr: Address, size: OperandSize, flags: MemFlags) {
160
        let mem: AMode = addr.try_into().unwrap();
161

162
        use OperandSize::*;
163
        let inst = match (reg.is_int(), size) {
164
            (_, S8) => Inst::Store8 {
165
                rd: reg.into(),
166
                mem,
167
                flags,
168
            },
169
            (_, S16) => Inst::Store16 {
170
                rd: reg.into(),
171
                mem,
172
                flags,
173
            },
174
            (true, S32) => Inst::Store32 {
175
                rd: reg.into(),
176
                mem,
177
                flags,
178
            },
179
            (false, S32) => Inst::FpuStore32 {
180
                rd: reg.into(),
181
                mem,
182
                flags,
183
            },
184
            (true, S64) => Inst::Store64 {
185
                rd: reg.into(),
186
                mem,
187
                flags,
188
            },
189
            (false, S64) => Inst::FpuStore64 {
190
                rd: reg.into(),
191
                mem,
192
                flags,
193
            },
194
            (_, S128) => Inst::FpuStore128 {
195
                rd: reg.into(),
196
                mem,
197
                flags,
198
            },
199
        };
200

201
        self.emit(inst);
202
    }
203

204
    /// Load a signed register.
205
    pub fn sload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {
206
        self.ldr(addr, rd, size, true, flags);
207
    }
208

209
    /// Load an unsigned register.
210
    pub fn uload(&mut self, addr: Address, rd: WritableReg, size: OperandSize, flags: MemFlags) {
211
        self.ldr(addr, rd, size, false, flags);
212
    }
213

214
    /// Load address into a register.
215
    fn ldr(
216
        &mut self,
217
        addr: Address,
218
        rd: WritableReg,
219
        size: OperandSize,
220
        signed: bool,
221
        flags: MemFlags,
222
    ) {
223
        use OperandSize::*;
224
        let writable_reg = rd.map(Into::into);
225
        let mem: AMode = addr.try_into().unwrap();
226

227
        let inst = match (rd.to_reg().is_int(), signed, size) {
228
            (_, false, S8) => Inst::ULoad8 {
229
                rd: writable_reg,
230
                mem,
231
                flags,
232
            },
233
            (_, true, S8) => Inst::SLoad8 {
234
                rd: writable_reg,
235
                mem,
236
                flags,
237
            },
238
            (_, false, S16) => Inst::ULoad16 {
239
                rd: writable_reg,
240
                mem,
241
                flags,
242
            },
243
            (_, true, S16) => Inst::SLoad16 {
244
                rd: writable_reg,
245
                mem,
246
                flags,
247
            },
248
            (true, false, S32) => Inst::ULoad32 {
249
                rd: writable_reg,
250
                mem,
251
                flags,
252
            },
253
            (false, _, S32) => Inst::FpuLoad32 {
254
                rd: writable_reg,
255
                mem,
256
                flags,
257
            },
258
            (true, true, S32) => Inst::SLoad32 {
259
                rd: writable_reg,
260
                mem,
261
                flags,
262
            },
263
            (true, _, S64) => Inst::ULoad64 {
264
                rd: writable_reg,
265
                mem,
266
                flags,
267
            },
268
            (false, _, S64) => Inst::FpuLoad64 {
269
                rd: writable_reg,
270
                mem,
271
                flags,
272
            },
273
            (_, _, S128) => Inst::FpuLoad128 {
274
                rd: writable_reg,
275
                mem,
276
                flags,
277
            },
278
        };
279

280
        self.emit(inst);
281
    }
282

283
    /// Load a pair of registers.
284
    pub fn ldp(&mut self, xt1: Reg, xt2: Reg, addr: Address) {
285
        let writable_xt1 = Writable::from_reg(xt1.into());
286
        let writable_xt2 = Writable::from_reg(xt2.into());
287
        let mem = addr.try_into().unwrap();
288

289
        self.emit(Inst::LoadP64 {
290
            rt: writable_xt1,
291
            rt2: writable_xt2,
292
            mem,
293
            flags: MemFlags::trusted(),
294
        });
295
    }
296

297
    /// Emit a series of instructions to move an arbitrary 64-bit immediate
298
    /// into the destination register.
299
    /// The emitted instructions will depend on the destination register class.
300
    pub fn mov_ir(&mut self, rd: WritableReg, imm: Imm, size: OperandSize) {
301
        match rd.to_reg().class() {
302
            RegClass::Int => {
303
                Inst::load_constant(rd.map(Into::into), imm.unwrap_as_u64())
304
                    .into_iter()
305
                    .for_each(|i| self.emit(i));
306
            }
307
            RegClass::Float => {
308
                match ASIMDFPModImm::maybe_from_u64(imm.unwrap_as_u64(), size.into()) {
309
                    Some(imm) => {
310
                        self.emit(Inst::FpuMoveFPImm {
311
                            rd: rd.map(Into::into),
312
                            imm,
313
                            size: size.into(),
314
                        });
315
                    }
316
                    _ => {
317
                        let addr = self.add_constant(&imm.to_bytes());
318
                        self.uload(addr, rd, size, TRUSTED_FLAGS);
319
                    }
320
                }
321
            }
322
            _ => unreachable!(),
323
        }
324
    }
325

326
    /// Register to register move.
327
    pub fn mov_rr(&mut self, rm: Reg, rd: WritableReg, size: OperandSize) {
328
        let writable_rd = rd.map(Into::into);
329
        self.emit(Inst::Mov {
330
            size: size.into(),
331
            rd: writable_rd,
332
            rm: rm.into(),
333
        });
334
    }
335

336
    /// Floating point register to register move.
337
    pub fn fmov_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
338
        let writable = rd.map(Into::into);
339
        let inst = match size {
340
            OperandSize::S32 => Inst::FpuMove32 {
341
                rd: writable,
342
                rn: rn.into(),
343
            },
344
            OperandSize::S64 => Inst::FpuMove64 {
345
                rd: writable,
346
                rn: rn.into(),
347
            },
348
            _ => unreachable!(),
349
        };
350

351
        self.emit(inst);
352
    }
353

354
    pub fn mov_to_fpu(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
355
        let writable_rd = rd.map(Into::into);
356
        self.emit(Inst::MovToFpu {
357
            size: size.into(),
358
            rd: writable_rd,
359
            rn: rn.into(),
360
        });
361
    }
362

363
    pub fn mov_from_vec(&mut self, rn: Reg, rd: WritableReg, idx: u8, size: OperandSize) {
364
        self.emit(Inst::MovFromVec {
365
            rd: rd.map(Into::into),
366
            rn: rn.into(),
367
            idx,
368
            size: size.into(),
369
        });
370
    }
371

372
    /// Add immediate and register.
373
    pub fn add_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
374
        self.alu_rri(ALUOp::Add, imm, rn, rd, size);
375
    }
376

377
    /// Add immediate and register, setting overflow flags.
378
    pub fn adds_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
379
        self.alu_rri(ALUOp::AddS, imm, rn, rd, size);
380
    }
381

382
    /// Add with three registers.
383
    pub fn add_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
384
        self.alu_rrr_extend(ALUOp::Add, rm, rn, rd, size);
385
    }
386

387
    /// Add with three registers, setting overflow flags.
388
    pub fn adds_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
389
        self.alu_rrr_extend(ALUOp::AddS, rm, rn, rd, size);
390
    }
391

392
    /// Add across Vector.
393
    pub fn addv(&mut self, rn: Reg, rd: WritableReg, size: VectorSize) {
394
        self.emit(Inst::VecLanes {
395
            op: VecLanesOp::Addv,
396
            rd: rd.map(Into::into),
397
            rn: rn.into(),
398
            size,
399
        });
400
    }
401

402
    /// Subtract immediate and register.
403
    pub fn sub_ir(&mut self, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
404
        self.alu_rri(ALUOp::Sub, imm, rn, rd, size);
405
    }
406

407
    /// Subtract immediate and register, setting flags.
408
    pub fn subs_ir(&mut self, imm: Imm12, rn: Reg, size: OperandSize) {
409
        self.alu_rri(ALUOp::SubS, imm, rn, writable!(regs::zero()), size);
410
    }
411

412
    /// Subtract with three registers.
413
    pub fn sub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
414
        self.alu_rrr_extend(ALUOp::Sub, rm, rn, rd, size);
415
    }
416

417
    /// Subtract with three registers, setting flags.
418
    pub fn subs_rrr(&mut self, rm: Reg, rn: Reg, size: OperandSize) {
419
        self.alu_rrr_extend(ALUOp::SubS, rm, rn, writable!(regs::zero()), size);
420
    }
421

422
    /// Multiply with three registers.
423
    pub fn mul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
424
        self.alu_rrrr(ALUOp3::MAdd, rm, rn, rd, regs::zero(), size);
425
    }
426

427
    /// Signed/unsigned division with three registers.
428
    pub fn div_rrr(
429
        &mut self,
430
        divisor: Reg,
431
        dividend: Reg,
432
        dest: Writable<Reg>,
433
        kind: DivKind,
434
        size: OperandSize,
435
    ) {
436
        // Check for division by 0.
437
        self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);
438

439
        // check for overflow
440
        if kind == DivKind::Signed {
441
            // Check for divisor overflow.
442
            self.alu_rri(
443
                ALUOp::AddS,
444
                Imm12::maybe_from_u64(1).expect("1 to fit in 12 bits"),
445
                divisor,
446
                writable!(zero()),
447
                size,
448
            );
449

450
            // Check if the dividend is 1.
451
            self.emit(Inst::CCmpImm {
452
                size: size.into(),
453
                rn: dividend.into(),
454
                imm: UImm5::maybe_from_u8(1).expect("1 fits in 5 bits"),
455
                nzcv: NZCV::new(false, false, false, false),
456
                cond: Cond::Eq,
457
            });
458

459
            // Finally, trap if the previous operation overflowed.
460
            self.trapif(Cond::Vs, TrapCode::INTEGER_OVERFLOW);
461
        }
462

463
        // `cranelift-codegen` doesn't support emitting sdiv for anything but I64,
464
        // we therefore sign-extend the operand.
465
        // see: https://github.com/bytecodealliance/wasmtime/issues/9766
466
        let size = if size == OperandSize::S32 && kind == DivKind::Signed {
467
            self.extend(
468
                divisor,
469
                writable!(divisor),
470
                ExtendKind::Signed(Extend::<Signed>::I64Extend32),
471
            );
472
            self.extend(
473
                dividend,
474
                writable!(dividend),
475
                ExtendKind::Signed(Extend::<Signed>::I64Extend32),
476
            );
477
            OperandSize::S64
478
        } else {
479
            size
480
        };
481

482
        let op = match kind {
483
            DivKind::Signed => ALUOp::SDiv,
484
            DivKind::Unsigned => ALUOp::UDiv,
485
        };
486

487
        self.alu_rrr(op, divisor, dividend, dest.map(Into::into), size);
488
    }
489

490
    /// Signed/unsigned remainder operation with three registers.
491
    pub fn rem_rrr(
492
        &mut self,
493
        divisor: Reg,
494
        dividend: Reg,
495
        dest: Writable<Reg>,
496
        scratch: WritableReg,
497
        kind: RemKind,
498
        size: OperandSize,
499
    ) {
500
        // Check for division by 0
501
        self.trapz(divisor, TrapCode::INTEGER_DIVISION_BY_ZERO, size);
502

503
        // `cranelift-codegen` doesn't support emitting sdiv for anything but I64,
504
        // we therefore sign-extend the operand.
505
        // see: https://github.com/bytecodealliance/wasmtime/issues/9766
506
        let size = if size == OperandSize::S32 && kind.is_signed() {
507
            self.extend(
508
                divisor,
509
                writable!(divisor),
510
                ExtendKind::Signed(Extend::<Signed>::I64Extend32),
511
            );
512
            self.extend(
513
                dividend,
514
                writable!(dividend),
515
                ExtendKind::Signed(Extend::<Signed>::I64Extend32),
516
            );
517
            OperandSize::S64
518
        } else {
519
            size
520
        };
521

522
        let op = match kind {
523
            RemKind::Signed => ALUOp::SDiv,
524
            RemKind::Unsigned => ALUOp::UDiv,
525
        };
526

527
        self.alu_rrr(op, divisor, dividend, scratch, size);
528

529
        self.alu_rrrr(
530
            ALUOp3::MSub,
531
            scratch.to_reg(),
532
            divisor,
533
            dest.map(Into::into),
534
            dividend,
535
            size,
536
        );
537
    }
538

539
    /// And with three registers.
540
    pub fn and_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
541
        self.alu_rrr(ALUOp::And, rm, rn, rd, size);
542
    }
543

544
    /// And immediate and register.
545
    pub fn and_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
546
        self.alu_rri_logic(ALUOp::And, imm, rn, rd, size);
547
    }
548

549
    /// Or with three registers.
550
    pub fn or_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
551
        self.alu_rrr(ALUOp::Orr, rm, rn, rd, size);
552
    }
553

554
    /// Or immediate and register.
555
    pub fn or_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
556
        self.alu_rri_logic(ALUOp::Orr, imm, rn, rd, size);
557
    }
558

559
    /// Xor with three registers.
560
    pub fn xor_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
561
        self.alu_rrr(ALUOp::Eor, rm, rn, rd, size);
562
    }
563

564
    /// Xor immediate and register.
565
    pub fn xor_ir(&mut self, imm: ImmLogic, rn: Reg, rd: WritableReg, size: OperandSize) {
566
        self.alu_rri_logic(ALUOp::Eor, imm, rn, rd, size);
567
    }
568

569
    /// Shift with three registers.
570
    pub fn shift_rrr(
571
        &mut self,
572
        rm: Reg,
573
        rn: Reg,
574
        rd: WritableReg,
575
        kind: ShiftKind,
576
        size: OperandSize,
577
    ) {
578
        let shift_op = self.shift_kind_to_alu_op(kind, rm, size);
579
        self.alu_rrr(shift_op, rm, rn, rd, size);
580
    }
581

582
    /// Shift immediate and register.
583
    pub fn shift_ir(
584
        &mut self,
585
        imm: ImmShift,
586
        rn: Reg,
587
        rd: WritableReg,
588
        kind: ShiftKind,
589
        size: OperandSize,
590
    ) {
591
        let shift_op = self.shift_kind_to_alu_op(kind, rn, size);
592
        self.alu_rri_shift(shift_op, imm, rn, rd, size);
593
    }
594

595
    /// Count Leading Zeros.
596
    pub fn clz(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
597
        self.bit_rr(BitOp::Clz, rn, rd, size);
598
    }
599

600
    /// Reverse Bits reverses the bit order in a register.
601
    pub fn rbit(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
602
        self.bit_rr(BitOp::RBit, rn, rd, size);
603
    }
604

605
    /// Float add with three registers.
606
    pub fn fadd_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
607
        self.fpu_rrr(FPUOp2::Add, rm, rn, rd, size);
608
    }
609

610
    /// Float sub with three registers.
611
    pub fn fsub_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
612
        self.fpu_rrr(FPUOp2::Sub, rm, rn, rd, size);
613
    }
614

615
    /// Float multiply with three registers.
616
    pub fn fmul_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
617
        self.fpu_rrr(FPUOp2::Mul, rm, rn, rd, size);
618
    }
619

620
    /// Float division with three registers.
621
    pub fn fdiv_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
622
        self.fpu_rrr(FPUOp2::Div, rm, rn, rd, size);
623
    }
624

625
    /// Float max with three registers.
626
    pub fn fmax_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
627
        self.fpu_rrr(FPUOp2::Max, rm, rn, rd, size);
628
    }
629

630
    /// Float min with three registers.
631
    pub fn fmin_rrr(&mut self, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
632
        self.fpu_rrr(FPUOp2::Min, rm, rn, rd, size);
633
    }
634

635
    /// Float neg with two registers.
636
    pub fn fneg_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
637
        self.fpu_rr(FPUOp1::Neg, rn, rd, size);
638
    }
639

640
    /// Float abs with two registers.
641
    pub fn fabs_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
642
        self.fpu_rr(FPUOp1::Abs, rn, rd, size);
643
    }
644

645
    /// Float sqrt with two registers.
646
    pub fn fsqrt_rr(&mut self, rn: Reg, rd: WritableReg, size: OperandSize) {
647
        self.fpu_rr(FPUOp1::Sqrt, rn, rd, size);
648
    }
649

650
    /// Float round (ceil, trunc, floor) with two registers.
651
    pub fn fround_rr(&mut self, rn: Reg, rd: WritableReg, mode: RoundingMode, size: OperandSize) {
652
        let fpu_mode = match (mode, size) {
653
            (RoundingMode::Nearest, OperandSize::S32) => FpuRoundMode::Nearest32,
654
            (RoundingMode::Up, OperandSize::S32) => FpuRoundMode::Plus32,
655
            (RoundingMode::Down, OperandSize::S32) => FpuRoundMode::Minus32,
656
            (RoundingMode::Zero, OperandSize::S32) => FpuRoundMode::Zero32,
657
            (RoundingMode::Nearest, OperandSize::S64) => FpuRoundMode::Nearest64,
658
            (RoundingMode::Up, OperandSize::S64) => FpuRoundMode::Plus64,
659
            (RoundingMode::Down, OperandSize::S64) => FpuRoundMode::Minus64,
660
            (RoundingMode::Zero, OperandSize::S64) => FpuRoundMode::Zero64,
661
            (m, o) => panic!("Invalid rounding mode or operand size {m:?}, {o:?}"),
662
        };
663
        self.fpu_round(fpu_mode, rn, rd)
664
    }
665

666
    /// Float unsigned shift right with two registers and an immediate.
667
    pub fn fushr_rri(&mut self, rn: Reg, rd: WritableReg, amount: u8, size: OperandSize) {
668
        let imm = FPURightShiftImm {
669
            amount,
670
            lane_size_in_bits: size.num_bits(),
671
        };
672
        let ushr = match size {
673
            OperandSize::S32 => UShr32(imm),
674
            OperandSize::S64 => UShr64(imm),
675
            _ => unreachable!(),
676
        };
677
        self.fpu_rri(ushr, rn, rd)
678
    }
679

680
    /// Float unsigned shift left and insert with three registers
681
    /// and an immediate.
682
    pub fn fsli_rri_mod(
683
        &mut self,
684
        ri: Reg,
685
        rn: Reg,
686
        rd: WritableReg,
687
        amount: u8,
688
        size: OperandSize,
689
    ) {
690
        let imm = FPULeftShiftImm {
691
            amount,
692
            lane_size_in_bits: size.num_bits(),
693
        };
694
        let sli = match size {
695
            OperandSize::S32 => FPUOpRIMod::Sli32(imm),
696
            OperandSize::S64 => FPUOpRIMod::Sli64(imm),
697
            _ => unreachable!(),
698
        };
699
        self.fpu_rri_mod(sli, ri, rn, rd)
700
    }
701

702
    /// Float compare.
703
    pub fn fcmp(&mut self, rn: Reg, rm: Reg, size: OperandSize) {
704
        self.emit(Inst::FpuCmp {
705
            size: size.into(),
706
            rn: rn.into(),
707
            rm: rm.into(),
708
        })
709
    }
710

711
    /// Convert an signed integer to a float.
712
    pub fn cvt_sint_to_float(
713
        &mut self,
714
        rn: Reg,
715
        rd: WritableReg,
716
        src_size: OperandSize,
717
        dst_size: OperandSize,
718
    ) {
719
        let op = match (src_size, dst_size) {
720
            (OperandSize::S32, OperandSize::S32) => IntToFpuOp::I32ToF32,
721
            (OperandSize::S64, OperandSize::S32) => IntToFpuOp::I64ToF32,
722
            (OperandSize::S32, OperandSize::S64) => IntToFpuOp::I32ToF64,
723
            (OperandSize::S64, OperandSize::S64) => IntToFpuOp::I64ToF64,
724
            _ => unreachable!(),
725
        };
726

727
        self.emit(Inst::IntToFpu {
728
            op,
729
            rd: rd.map(Into::into),
730
            rn: rn.into(),
731
        });
732
    }
733

734
    /// Convert an unsigned integer to a float.
735
    pub fn cvt_uint_to_float(
736
        &mut self,
737
        rn: Reg,
738
        rd: WritableReg,
739
        src_size: OperandSize,
740
        dst_size: OperandSize,
741
    ) {
742
        let op = match (src_size, dst_size) {
743
            (OperandSize::S32, OperandSize::S32) => IntToFpuOp::U32ToF32,
744
            (OperandSize::S64, OperandSize::S32) => IntToFpuOp::U64ToF32,
745
            (OperandSize::S32, OperandSize::S64) => IntToFpuOp::U32ToF64,
746
            (OperandSize::S64, OperandSize::S64) => IntToFpuOp::U64ToF64,
747
            _ => unreachable!(),
748
        };
749

750
        self.emit(Inst::IntToFpu {
751
            op,
752
            rd: rd.map(Into::into),
753
            rn: rn.into(),
754
        });
755
    }
756

757
    /// Change precision of float.
758
    pub fn cvt_float_to_float(
759
        &mut self,
760
        rn: Reg,
761
        rd: WritableReg,
762
        src_size: OperandSize,
763
        dst_size: OperandSize,
764
    ) {
765
        let (fpu_op, size) = match (src_size, dst_size) {
766
            (OperandSize::S32, OperandSize::S64) => (FPUOp1::Cvt32To64, ScalarSize::Size32),
767
            (OperandSize::S64, OperandSize::S32) => (FPUOp1::Cvt64To32, ScalarSize::Size64),
768
            _ => unimplemented!(),
769
        };
770
        self.emit(Inst::FpuRR {
771
            fpu_op,
772
            size,
773
            rd: rd.map(Into::into),
774
            rn: rn.into(),
775
        });
776
    }
777

778
    /// Return instruction.
779
    pub fn ret(&mut self) {
780
        self.emit(Inst::Ret {});
781
    }
782

783
    /// An unconditional branch.
784
    pub fn jmp(&mut self, target: MachLabel) {
785
        self.emit(Inst::Jump {
786
            dest: BranchTarget::Label(target),
787
        });
788
    }
789

790
    /// A conditional branch.
791
    pub fn jmp_if(&mut self, kind: Cond, taken: MachLabel) {
792
        self.emit(Inst::CondBr {
793
            taken: BranchTarget::Label(taken),
794
            not_taken: BranchTarget::ResolvedOffset(4),
795
            kind: CondBrKind::Cond(kind),
796
        });
797
    }
798

799
    /// Emits a jump table sequence.
800
    pub fn jmp_table(
801
        &mut self,
802
        targets: &[MachLabel],
803
        default: MachLabel,
804
        index: Reg,
805
        tmp1: Reg,
806
        tmp2: Reg,
807
    ) {
808
        self.emit_with_island(
809
            Inst::JTSequence {
810
                default,
811
                targets: Box::new(targets.to_vec()),
812
                ridx: index.into(),
813
                rtmp1: Writable::from_reg(tmp1.into()),
814
                rtmp2: Writable::from_reg(tmp2.into()),
815
            },
816
            // number of bytes needed for the jumptable sequence:
817
            // 4 bytes per instruction, with 8 instructions base + the size of
818
            // the jumptable more.
819
            (4 * (8 + targets.len())).try_into().unwrap(),
820
        );
821
    }
822

823
    /// Conditional Set sets the destination register to 1 if the condition
824
    /// is true, and otherwise sets it to 0.
825
    pub fn cset(&mut self, rd: WritableReg, cond: Cond) {
826
        self.emit(Inst::CSet {
827
            rd: rd.map(Into::into),
828
            cond,
829
        });
830
    }
831

832
    /// If the condition is true, `csel` writes rn to rd. If the
833
    /// condition is false, it writes rm to rd
834
    pub fn csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond) {
835
        self.emit(Inst::CSel {
836
            rd: rd.map(Into::into),
837
            rn: rn.into(),
838
            rm: rm.into(),
839
            cond,
840
        });
841
    }
842

843
    /// If the condition is true, `csel` writes rn to rd. If the
844
    /// condition is false, it writes rm to rd
845
    pub fn fpu_csel(&mut self, rn: Reg, rm: Reg, rd: WritableReg, cond: Cond, size: OperandSize) {
846
        match size {
847
            OperandSize::S32 => {
848
                self.emit(Inst::FpuCSel32 {
849
                    rd: rd.map(Into::into),
850
                    rn: rn.into(),
851
                    rm: rm.into(),
852
                    cond,
853
                });
854
            }
855
            OperandSize::S64 => {
856
                self.emit(Inst::FpuCSel64 {
857
                    rd: rd.map(Into::into),
858
                    rn: rn.into(),
859
                    rm: rm.into(),
860
                    cond,
861
                });
862
            }
863
            _ => todo!(),
864
        }
865
    }
866

867
    /// Population count per byte.
868
    pub fn cnt(&mut self, rd: WritableReg) {
869
        self.emit(Inst::VecMisc {
870
            op: VecMisc2::Cnt,
871
            rd: rd.map(Into::into),
872
            rn: rd.to_reg().into(),
873
            size: VectorSize::Size8x8,
874
        });
875
    }
876

877
    pub fn extend(&mut self, rn: Reg, rd: WritableReg, kind: ExtendKind) {
878
        self.emit(Inst::Extend {
879
            rd: rd.map(Into::into),
880
            rn: rn.into(),
881
            signed: kind.signed(),
882
            from_bits: kind.from_bits(),
883
            to_bits: kind.to_bits(),
884
        })
885
    }
886

887
    /// Bitwise AND (shifted register), setting flags.
888
    pub fn ands_rr(&mut self, rn: Reg, rm: Reg, size: OperandSize) {
889
        self.alu_rrr(ALUOp::AndS, rm, rn, writable!(regs::zero()), size);
890
    }
891

892
    /// Permanently Undefined.
893
    pub fn udf(&mut self, code: TrapCode) {
894
        self.emit(Inst::Udf { trap_code: code });
895
    }
896

897
    /// Conditional trap.
898
    pub fn trapif(&mut self, cc: Cond, code: TrapCode) {
899
        self.emit(Inst::TrapIf {
900
            kind: CondBrKind::Cond(cc),
901
            trap_code: code,
902
        });
903
    }
904

905
    /// Trap if `rn` is zero.
906
    pub fn trapz(&mut self, rn: Reg, code: TrapCode, size: OperandSize) {
907
        self.emit(Inst::TrapIf {
908
            kind: CondBrKind::Zero(rn.into(), size.into()),
909
            trap_code: code,
910
        });
911
    }
912

913
    // Helpers for ALU operations.
914

915
    fn alu_rri(&mut self, op: ALUOp, imm: Imm12, rn: Reg, rd: WritableReg, size: OperandSize) {
916
        self.emit(Inst::AluRRImm12 {
917
            alu_op: op,
918
            size: size.into(),
919
            rd: rd.map(Into::into),
920
            rn: rn.into(),
921
            imm12: imm,
922
        });
923
    }
924

925
    fn alu_rri_logic(
926
        &mut self,
927
        op: ALUOp,
928
        imm: ImmLogic,
929
        rn: Reg,
930
        rd: WritableReg,
931
        size: OperandSize,
932
    ) {
933
        self.emit(Inst::AluRRImmLogic {
934
            alu_op: op,
935
            size: size.into(),
936
            rd: rd.map(Into::into),
937
            rn: rn.into(),
938
            imml: imm,
939
        });
940
    }
941

942
    fn alu_rri_shift(
943
        &mut self,
944
        op: ALUOp,
945
        imm: ImmShift,
946
        rn: Reg,
947
        rd: WritableReg,
948
        size: OperandSize,
949
    ) {
950
        self.emit(Inst::AluRRImmShift {
951
            alu_op: op,
952
            size: size.into(),
953
            rd: rd.map(Into::into),
954
            rn: rn.into(),
955
            immshift: imm,
956
        });
957
    }
958

959
    fn alu_rrr(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
960
        self.emit(Inst::AluRRR {
961
            alu_op: op,
962
            size: size.into(),
963
            rd: rd.map(Into::into),
964
            rn: rn.into(),
965
            rm: rm.into(),
966
        });
967
    }
968

969
    fn alu_rrr_extend(&mut self, op: ALUOp, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
970
        self.emit(Inst::AluRRRExtend {
971
            alu_op: op,
972
            size: size.into(),
973
            rd: rd.map(Into::into),
974
            rn: rn.into(),
975
            rm: rm.into(),
976
            extendop: ExtendOp::UXTX,
977
        });
978
    }
979

980
    fn alu_rrrr(
981
        &mut self,
982
        op: ALUOp3,
983
        rm: Reg,
984
        rn: Reg,
985
        rd: WritableReg,
986
        ra: Reg,
987
        size: OperandSize,
988
    ) {
989
        self.emit(Inst::AluRRRR {
990
            alu_op: op,
991
            size: size.into(),
992
            rd: rd.map(Into::into),
993
            rn: rn.into(),
994
            rm: rm.into(),
995
            ra: ra.into(),
996
        });
997
    }
998

999
    fn fpu_rrr(&mut self, op: FPUOp2, rm: Reg, rn: Reg, rd: WritableReg, size: OperandSize) {
1000
        self.emit(Inst::FpuRRR {
1001
            fpu_op: op,
1002
            size: size.into(),
1003
            rd: rd.map(Into::into),
1004
            rn: rn.into(),
1005
            rm: rm.into(),
1006
        });
1007
    }
1008

1009
    fn fpu_rri(&mut self, op: FPUOpRI, rn: Reg, rd: WritableReg) {
1010
        self.emit(Inst::FpuRRI {
1011
            fpu_op: op,
1012
            rd: rd.map(Into::into),
1013
            rn: rn.into(),
1014
        });
1015
    }
1016

1017
    fn fpu_rri_mod(&mut self, op: FPUOpRIMod, ri: Reg, rn: Reg, rd: WritableReg) {
1018
        self.emit(Inst::FpuRRIMod {
1019
            fpu_op: op,
1020
            rd: rd.map(Into::into),
1021
            ri: ri.into(),
1022
            rn: rn.into(),
1023
        });
1024
    }
1025

1026
    fn fpu_rr(&mut self, op: FPUOp1, rn: Reg, rd: WritableReg, size: OperandSize) {
1027
        self.emit(Inst::FpuRR {
1028
            fpu_op: op,
1029
            size: size.into(),
1030
            rd: rd.map(Into::into),
1031
            rn: rn.into(),
1032
        });
1033
    }
1034

1035
    fn fpu_round(&mut self, op: FpuRoundMode, rn: Reg, rd: WritableReg) {
1036
        self.emit(Inst::FpuRound {
1037
            op,
1038
            rd: rd.map(Into::into),
1039
            rn: rn.into(),
1040
        });
1041
    }
1042

1043
    fn bit_rr(&mut self, op: BitOp, rn: Reg, rd: WritableReg, size: OperandSize) {
1044
        self.emit(Inst::BitRR {
1045
            op,
1046
            size: size.into(),
1047
            rd: rd.map(Into::into),
1048
            rn: rn.into(),
1049
        });
1050
    }
1051

1052
    // Convert ShiftKind to ALUOp. If kind == Rotl, then emulate it by emitting
1053
    // the negation of the given reg r, and returns ALUOp::Extr (an alias for
1054
    // `ror` the rotate-right instruction)
1055
    fn shift_kind_to_alu_op(&mut self, kind: ShiftKind, r: Reg, size: OperandSize) -> ALUOp {
1056
        match kind {
1057
            ShiftKind::Shl => ALUOp::Lsl,
1058
            ShiftKind::ShrS => ALUOp::Asr,
1059
            ShiftKind::ShrU => ALUOp::Lsr,
1060
            ShiftKind::Rotr => ALUOp::Extr,
1061
            ShiftKind::Rotl => {
1062
                // neg(r) is sub(zero, r).
1063
                self.alu_rrr(ALUOp::Sub, r, regs::zero(), writable!(r), size);
1064
                ALUOp::Extr
1065
            }
1066
        }
1067
    }
1068

1069
    /// Get a label from the underlying machine code buffer.
1070
    pub fn get_label(&mut self) -> MachLabel {
1071
        self.buffer.get_label()
1072
    }
1073

1074
    /// Get a mutable reference to underlying
1075
    /// machine buffer.
1076
    pub fn buffer_mut(&mut self) -> &mut MachBuffer<Inst> {
1077
        &mut self.buffer
1078
    }
1079

1080
    /// Get a reference to the underlying machine buffer.
1081
    pub fn buffer(&self) -> &MachBuffer<Inst> {
1082
        &self.buffer
1083
    }
1084

1085
    /// Emit a direct call to a function defined locally and
1086
    /// referenced to by `name`.
1087
    pub fn call_with_name(&mut self, name: UserExternalNameRef, call_conv: CallingConvention) {
1088
        self.emit(Inst::Call {
1089
            info: Box::new(cranelift_codegen::CallInfo::empty(
1090
                ExternalName::user(name),
1091
                call_conv.into(),
1092
            )),
1093
        })
1094
    }
1095

1096
    /// Emit an indirect call to a function whose address is
1097
    /// stored the `callee` register.
1098
    pub fn call_with_reg(&mut self, callee: Reg, call_conv: CallingConvention) {
1099
        self.emit(Inst::CallInd {
1100
            info: Box::new(cranelift_codegen::CallInfo::empty(
1101
                callee.into(),
1102
                call_conv.into(),
1103
            )),
1104
        })
1105
    }
1106

1107
    /// Load the min value for an integer of size out_size, as a floating-point
1108
    /// of size `in-size`, into register `rd`.
1109
    fn min_fp_value(
1110
        &mut self,
1111
        signed: bool,
1112
        in_size: OperandSize,
1113
        out_size: OperandSize,
1114
        rd: Writable<Reg>,
1115
    ) {
1116
        match in_size {
1117
            OperandSize::S32 => {
1118
                let (min, _) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());
1119
                self.mov_ir(rd, Imm::f32(min.to_bits()), in_size);
1120
            }
1121
            OperandSize::S64 => {
1122
                let (min, _) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());
1123
                self.mov_ir(rd, Imm::f64(min.to_bits()), in_size);
1124
            }
1125
            s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),
1126
        };
1127
    }
1128

1129
    /// Load the max value for an integer of size out_size, as a floating-point
1130
    /// of size `in_size`, into register `rd`.
1131
    fn max_fp_value(
1132
        &mut self,
1133
        signed: bool,
1134
        in_size: OperandSize,
1135
        out_size: OperandSize,
1136
        rd: Writable<Reg>,
1137
    ) {
1138
        match in_size {
1139
            OperandSize::S32 => {
1140
                let (_, max) = f32_cvt_to_int_bounds(signed, out_size.num_bits().into());
1141
                self.mov_ir(rd, Imm::f32(max.to_bits()), in_size);
1142
            }
1143
            OperandSize::S64 => {
1144
                let (_, max) = f64_cvt_to_int_bounds(signed, out_size.num_bits().into());
1145
                self.mov_ir(rd, Imm::f64(max.to_bits()), in_size);
1146
            }
1147
            s => unreachable!("unsupported floating-point size: {}bit", s.num_bits()),
1148
        };
1149
    }
1150

1151
    /// Emit instructions to check if the value in `rn` is NaN.
1152
    fn check_nan(&mut self, rn: Reg, size: OperandSize) {
1153
        self.fcmp(rn, rn, size);
1154
        self.trapif(Cond::Vs, TrapCode::BAD_CONVERSION_TO_INTEGER);
1155
    }
1156

1157
    /// Convert the floating point of size `src_size` stored in `src`, into a integer of size
1158
    /// `dst_size`, storing the result in `dst`.
1159
    pub fn fpu_to_int(
1160
        &mut self,
1161
        dst: Writable<Reg>,
1162
        src: Reg,
1163
        tmp_reg: WritableReg,
1164
        src_size: OperandSize,
1165
        dst_size: OperandSize,
1166
        kind: TruncKind,
1167
        signed: bool,
1168
    ) {
1169
        if kind.is_unchecked() {
1170
            // Confusingly, when `kind` is `Unchecked` is when we actually need to perform the checks:
1171
            // - check if fp is NaN
1172
            // - check bounds
1173
            self.check_nan(src, src_size);
1174

1175
            self.min_fp_value(signed, src_size, dst_size, tmp_reg);
1176
            self.fcmp(src, tmp_reg.to_reg(), src_size);
1177
            self.trapif(Cond::Le, TrapCode::INTEGER_OVERFLOW);
1178

1179
            self.max_fp_value(signed, src_size, dst_size, tmp_reg);
1180
            self.fcmp(src, tmp_reg.to_reg(), src_size);
1181
            self.trapif(Cond::Ge, TrapCode::INTEGER_OVERFLOW);
1182
        }
1183

1184
        self.cvt_fpu_to_int(dst, src, src_size, dst_size, signed)
1185
    }
1186

1187
    /// Select and emit the appropriate `fcvt*` instruction
1188
    pub fn cvt_fpu_to_int(
1189
        &mut self,
1190
        dst: Writable<Reg>,
1191
        src: Reg,
1192
        src_size: OperandSize,
1193
        dst_size: OperandSize,
1194
        signed: bool,
1195
    ) {
1196
        let op = match (src_size, dst_size, signed) {
1197
            (OperandSize::S32, OperandSize::S32, false) => FpuToIntOp::F32ToU32,
1198
            (OperandSize::S32, OperandSize::S32, true) => FpuToIntOp::F32ToI32,
1199
            (OperandSize::S32, OperandSize::S64, false) => FpuToIntOp::F32ToU64,
1200
            (OperandSize::S32, OperandSize::S64, true) => FpuToIntOp::F32ToI64,
1201
            (OperandSize::S64, OperandSize::S32, false) => FpuToIntOp::F64ToU32,
1202
            (OperandSize::S64, OperandSize::S32, true) => FpuToIntOp::F64ToI32,
1203
            (OperandSize::S64, OperandSize::S64, false) => FpuToIntOp::F64ToU64,
1204
            (OperandSize::S64, OperandSize::S64, true) => FpuToIntOp::F64ToI64,
1205
            (fsize, int_size, signed) => unimplemented!(
1206
                "unsupported conversion: f{} to {}{}",
1207
                fsize.num_bits(),
1208
                if signed { "i" } else { "u" },
1209
                int_size.num_bits(),
1210
            ),
1211
        };
1212

1213
        self.emit(Inst::FpuToInt {
1214
            op,
1215
            rd: dst.map(Into::into),
1216
            rn: src.into(),
1217
        });
1218
    }
1219
}
1220

1221
/// Captures the region in a MachBuffer where an add-with-immediate instruction would be emitted,
1222
/// but the immediate is not yet known.
1223
pub(crate) struct PatchableAddToReg {
1224
    /// The region to be patched in the [`MachBuffer`]. It contains
1225
    /// space for 3 32-bit instructions, i.e. it's 12 bytes long.
1226
    region: PatchRegion,
1227

1228
    // The destination register for the add instruction.
1229
    reg: Writable<Reg>,
1230

1231
    // The temporary register used to hold the immediate value.
1232
    tmp: Writable<Reg>,
1233
}
1234

1235
impl PatchableAddToReg {
1236
    /// Create a new [`PatchableAddToReg`] by capturing a region in the output
1237
    /// buffer containing an instruction sequence that loads an immediate into a
1238
    /// register `tmp`, then adds it to a register `reg`. The [`MachBuffer`]
1239
    /// will have that instruction sequence written to the region, though the
1240
    /// immediate loaded into `tmp` will be `0` until the `::finalize` method is
1241
    /// called.
1242
    pub(crate) fn new(reg: Writable<Reg>, tmp: Writable<Reg>, buf: &mut MachBuffer<Inst>) -> Self {
1243
        let insns = Self::add_immediate_instruction_sequence(reg, tmp, 0);
1244
        let open = buf.start_patchable();
1245
        buf.put_data(&insns);
1246
        let region = buf.end_patchable(open);
1247

1248
        Self { region, reg, tmp }
1249
    }
1250

1251
    fn add_immediate_instruction_sequence(
1252
        reg: Writable<Reg>,
1253
        tmp: Writable<Reg>,
1254
        imm: i32,
1255
    ) -> [u8; 12] {
1256
        let imm_hi = imm as u64 & 0xffff_0000;
1257
        let imm_hi = MoveWideConst::maybe_from_u64(imm_hi).unwrap();
1258

1259
        let imm_lo = imm as u64 & 0x0000_ffff;
1260
        let imm_lo = MoveWideConst::maybe_from_u64(imm_lo).unwrap();
1261

1262
        let size = OperandSize::S64.into();
1263

1264
        let tmp = tmp.map(Into::into);
1265
        let rd = reg.map(Into::into);
1266

1267
        // This is "movz to bits 16-31 of 64 bit reg tmp and zero the rest"
1268
        let mov_insn = enc_move_wide(inst::MoveWideOp::MovZ, tmp, imm_hi, size);
1269

1270
        // This is "movk to bits 0-15 of 64 bit reg tmp"
1271
        let movk_insn = enc_movk(tmp, imm_lo, size);
1272

1273
        // This is "add tmp to rd". The opcodes are somewhat buried in the
1274
        // instruction encoder so we just repeat them here.
1275
        let add_bits_31_21: u32 = 0b00001011_000 | (size.sf_bit() << 10);
1276
        let add_bits_15_10: u32 = 0;
1277
        let add_insn = enc_arith_rrr(
1278
            add_bits_31_21,
1279
            add_bits_15_10,
1280
            rd,
1281
            rd.to_reg(),
1282
            tmp.to_reg(),
1283
        );
1284

1285
        let mut buf = [0u8; 12];
1286
        buf[0..4].copy_from_slice(&mov_insn.to_le_bytes());
1287
        buf[4..8].copy_from_slice(&movk_insn.to_le_bytes());
1288
        buf[8..12].copy_from_slice(&add_insn.to_le_bytes());
1289
        buf
1290
    }
1291

1292
    /// Patch the [`MachBuffer`] with the known constant to be added to the register. The final
1293
    /// value is passed in as an i32, but the instruction encoding is fixed when
1294
    /// [`PatchableAddToReg::new`] is called.
1295
    pub(crate) fn finalize(self, val: i32, buffer: &mut MachBuffer<Inst>) {
1296
        let insns = Self::add_immediate_instruction_sequence(self.reg, self.tmp, val);
1297
        let slice = self.region.patch(buffer);
1298
        assert_eq!(slice.len(), insns.len());
1299
        slice.copy_from_slice(&insns);
1300
    }
1301
}
1302

1303
Product

Resources

Company