CoCalc -- emit.rs

GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/riscv64/inst/emit.rs
¹⁶⁹³ views
1
//! Riscv64 ISA: binary code emission.
2

3
use crate::ir::{self, LibCall, TrapCode};
4
use crate::isa::riscv64::inst::*;
5
use crate::isa::riscv64::lower::isle::generated_code::{
6
    CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7
};
8
use cranelift_control::ControlPlane;
9

10
pub struct EmitInfo {
11
    #[expect(dead_code, reason = "may want to be used in the future")]
12
    shared_flag: settings::Flags,
13
    isa_flags: super::super::riscv_settings::Flags,
14
}
15

16
impl EmitInfo {
17
    pub(crate) fn new(
18
        shared_flag: settings::Flags,
19
        isa_flags: super::super::riscv_settings::Flags,
20
    ) -> Self {
21
        Self {
22
            shared_flag,
23
            isa_flags,
24
        }
25
    }
26
}
27

28
pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
29
    u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
30
}
31

32
pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
33
    let real_reg = m.to_real_reg().unwrap().hw_enc();
34
    debug_assert!(real_reg >= 8 && real_reg < 16);
35
    let compressed_reg = real_reg - 8;
36
    u32::from(compressed_reg)
37
}
38

39
#[derive(Clone, Debug, PartialEq, Default)]
40
pub enum EmitVState {
41
    #[default]
42
    Unknown,
43
    Known(VState),
44
}
45

46
/// State carried between emissions of a sequence of instructions.
47
#[derive(Default, Clone, Debug)]
48
pub struct EmitState {
49
    /// The user stack map for the upcoming instruction, as provided to
50
    /// `pre_safepoint()`.
51
    user_stack_map: Option<ir::UserStackMap>,
52

53
    /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
54
    /// optimized away at compiletime. See [cranelift_control].
55
    ctrl_plane: ControlPlane,
56

57
    /// Vector State
58
    /// Controls the current state of the vector unit at the emission point.
59
    vstate: EmitVState,
60

61
    frame_layout: FrameLayout,
62
}
63

64
impl EmitState {
65
    fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
66
        self.user_stack_map.take()
67
    }
68

69
    fn clobber_vstate(&mut self) {
70
        self.vstate = EmitVState::Unknown;
71
    }
72
}
73

74
impl MachInstEmitState<Inst> for EmitState {
75
    fn new(
76
        abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
77
        ctrl_plane: ControlPlane,
78
    ) -> Self {
79
        EmitState {
80
            user_stack_map: None,
81
            ctrl_plane,
82
            vstate: EmitVState::Unknown,
83
            frame_layout: abi.frame_layout().clone(),
84
        }
85
    }
86

87
    fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
88
        self.user_stack_map = user_stack_map;
89
    }
90

91
    fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
92
        &mut self.ctrl_plane
93
    }
94

95
    fn take_ctrl_plane(self) -> ControlPlane {
96
        self.ctrl_plane
97
    }
98

99
    fn on_new_block(&mut self) {
100
        // Reset the vector state.
101
        self.clobber_vstate();
102
    }
103

104
    fn frame_layout(&self) -> &FrameLayout {
105
        &self.frame_layout
106
    }
107
}
108

109
impl Inst {
110
    /// Load int mask.
111
    /// If ty is int then 0xff in rd.
112
    pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
113
        let mut insts = SmallInstVec::new();
114
        assert!(ty.is_int() && ty.bits() <= 64);
115
        match ty {
116
            I64 => {
117
                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
118
            }
119
            I32 | I16 => {
120
                insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
121
                insts.push(Inst::Extend {
122
                    rd,
123
                    rn: rd.to_reg(),
124
                    signed: false,
125
                    from_bits: ty.bits() as u8,
126
                    to_bits: 64,
127
                });
128
            }
129
            I8 => {
130
                insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
131
            }
132
            _ => unreachable!("ty:{:?}", ty),
133
        }
134
        insts
135
    }
136
    ///  inverse all bit
137
    pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
138
        Inst::AluRRImm12 {
139
            alu_op: AluOPRRI::Xori,
140
            rd,
141
            rs,
142
            imm12: Imm12::from_i16(-1),
143
        }
144
    }
145

146
    /// Returns Some(VState) if this instruction is expecting a specific vector state
147
    /// before emission.
148
    fn expected_vstate(&self) -> Option<&VState> {
149
        match self {
150
            Inst::Nop0
151
            | Inst::Nop4
152
            | Inst::BrTable { .. }
153
            | Inst::Auipc { .. }
154
            | Inst::Fli { .. }
155
            | Inst::Lui { .. }
156
            | Inst::LoadInlineConst { .. }
157
            | Inst::AluRRR { .. }
158
            | Inst::FpuRRR { .. }
159
            | Inst::AluRRImm12 { .. }
160
            | Inst::CsrReg { .. }
161
            | Inst::CsrImm { .. }
162
            | Inst::Load { .. }
163
            | Inst::Store { .. }
164
            | Inst::Args { .. }
165
            | Inst::Rets { .. }
166
            | Inst::Ret { .. }
167
            | Inst::Extend { .. }
168
            | Inst::Call { .. }
169
            | Inst::CallInd { .. }
170
            | Inst::ReturnCall { .. }
171
            | Inst::ReturnCallInd { .. }
172
            | Inst::Jal { .. }
173
            | Inst::CondBr { .. }
174
            | Inst::LoadExtNameGot { .. }
175
            | Inst::LoadExtNameNear { .. }
176
            | Inst::LoadExtNameFar { .. }
177
            | Inst::ElfTlsGetAddr { .. }
178
            | Inst::LoadAddr { .. }
179
            | Inst::Mov { .. }
180
            | Inst::MovFromPReg { .. }
181
            | Inst::Fence { .. }
182
            | Inst::EBreak
183
            | Inst::Udf { .. }
184
            | Inst::FpuRR { .. }
185
            | Inst::FpuRRRR { .. }
186
            | Inst::Jalr { .. }
187
            | Inst::Atomic { .. }
188
            | Inst::Select { .. }
189
            | Inst::AtomicCas { .. }
190
            | Inst::RawData { .. }
191
            | Inst::AtomicStore { .. }
192
            | Inst::AtomicLoad { .. }
193
            | Inst::AtomicRmwLoop { .. }
194
            | Inst::TrapIf { .. }
195
            | Inst::Unwind { .. }
196
            | Inst::DummyUse { .. }
197
            | Inst::LabelAddress { .. }
198
            | Inst::Popcnt { .. }
199
            | Inst::Cltz { .. }
200
            | Inst::Brev8 { .. }
201
            | Inst::StackProbeLoop { .. } => None,
202

203
            // VecSetState does not expect any vstate, rather it updates it.
204
            Inst::VecSetState { .. } => None,
205

206
            // `vmv` instructions copy a set of registers and ignore vstate.
207
            Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
208

209
            Inst::VecAluRR { vstate, .. } |
210
            Inst::VecAluRRR { vstate, .. } |
211
            Inst::VecAluRRRR { vstate, .. } |
212
            Inst::VecAluRImm5 { vstate, .. } |
213
            Inst::VecAluRRImm5 { vstate, .. } |
214
            Inst::VecAluRRRImm5 { vstate, .. } |
215
            // TODO: Unit-stride loads and stores only need the AVL to be correct, not
216
            // the full vtype. A future optimization could be to decouple these two when
217
            // updating vstate. This would allow us to avoid emitting a VecSetState in
218
            // some cases.
219
            Inst::VecLoad { vstate, .. }
220
            | Inst::VecStore { vstate, .. } => Some(vstate),
221
            Inst::EmitIsland { .. } => None,
222
        }
223
    }
224
}
225

226
impl MachInstEmit for Inst {
227
    type State = EmitState;
228
    type Info = EmitInfo;
229

230
    fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
231
        // Check if we need to update the vector state before emitting this instruction
232
        if let Some(expected) = self.expected_vstate() {
233
            if state.vstate != EmitVState::Known(*expected) {
234
                // Update the vector state.
235
                Inst::VecSetState {
236
                    rd: writable_zero_reg(),
237
                    vstate: *expected,
238
                }
239
                .emit(sink, emit_info, state);
240
            }
241
        }
242

243
        // N.B.: we *must* not exceed the "worst-case size" used to compute
244
        // where to insert islands, except when islands are explicitly triggered
245
        // (with an `EmitIsland`). We check this in debug builds. This is `mut`
246
        // to allow disabling the check for `JTSequence`, which is always
247
        // emitted following an `EmitIsland`.
248
        let mut start_off = sink.cur_offset();
249

250
        // First try to emit this as a compressed instruction
251
        let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
252
        if res.is_none() {
253
            // If we can't lets emit it as a normal instruction
254
            self.emit_uncompressed(sink, emit_info, state, &mut start_off);
255
        }
256

257
        // We exclude br_table, call, return_call and try_call from
258
        // these checks since they emit their own islands, and thus
259
        // are allowed to exceed the worst case size.
260
        let emits_own_island = match self {
261
            Inst::BrTable { .. }
262
            | Inst::ReturnCall { .. }
263
            | Inst::ReturnCallInd { .. }
264
            | Inst::Call { .. }
265
            | Inst::CallInd { .. }
266
            | Inst::EmitIsland { .. } => true,
267
            _ => false,
268
        };
269
        if !emits_own_island {
270
            let end_off = sink.cur_offset();
271
            assert!(
272
                (end_off - start_off) <= Inst::worst_case_size(),
273
                "Inst:{:?} length:{} worst_case_size:{}",
274
                self,
275
                end_off - start_off,
276
                Inst::worst_case_size()
277
            );
278
        }
279
    }
280

281
    fn pretty_print_inst(&self, state: &mut Self::State) -> String {
282
        self.print_with_state(state)
283
    }
284
}
285

286
impl Inst {
287
    /// Tries to emit an instruction as compressed, if we can't return false.
288
    fn try_emit_compressed(
289
        &self,
290
        sink: &mut MachBuffer<Inst>,
291
        emit_info: &EmitInfo,
292
        state: &mut EmitState,
293
        start_off: &mut u32,
294
    ) -> Option<()> {
295
        let has_m = emit_info.isa_flags.has_m();
296
        let has_zba = emit_info.isa_flags.has_zba();
297
        let has_zbb = emit_info.isa_flags.has_zbb();
298
        let has_zca = emit_info.isa_flags.has_zca();
299
        let has_zcb = emit_info.isa_flags.has_zcb();
300
        let has_zcd = emit_info.isa_flags.has_zcd();
301

302
        // Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
303
        // to be enabled, so check it early.
304
        if !has_zca {
305
            return None;
306
        }
307

308
        fn reg_is_compressible(r: Reg) -> bool {
309
            r.to_real_reg()
310
                .map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
311
                .unwrap_or(false)
312
        }
313

314
        match *self {
315
            // C.ADD
316
            Inst::AluRRR {
317
                alu_op: AluOPRRR::Add,
318
                rd,
319
                rs1,
320
                rs2,
321
            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
322
                && rs1 != zero_reg()
323
                && rs2 != zero_reg() =>
324
            {
325
                // Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
326
                // also swap rs1 with rs2 and we get an equivalent instruction. i.e we
327
                // can also compress `add rd, rs, rd` into `c.add rd, rs`.
328
                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
329

330
                sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
331
            }
332

333
            // C.MV
334
            Inst::AluRRImm12 {
335
                alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
336
                rd,
337
                rs,
338
                imm12,
339
            } if rd.to_reg() != rs
340
                && rd.to_reg() != zero_reg()
341
                && rs != zero_reg()
342
                && imm12.as_i16() == 0 =>
343
            {
344
                sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
345
            }
346

347
            // CA Ops
348
            Inst::AluRRR {
349
                alu_op:
350
                    alu_op @ (AluOPRRR::And
351
                    | AluOPRRR::Or
352
                    | AluOPRRR::Xor
353
                    | AluOPRRR::Addw
354
                    | AluOPRRR::Mul),
355
                rd,
356
                rs1,
357
                rs2,
358
            } if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
359
                && reg_is_compressible(rs1)
360
                && reg_is_compressible(rs2) =>
361
            {
362
                let op = match alu_op {
363
                    AluOPRRR::And => CaOp::CAnd,
364
                    AluOPRRR::Or => CaOp::COr,
365
                    AluOPRRR::Xor => CaOp::CXor,
366
                    AluOPRRR::Addw => CaOp::CAddw,
367
                    AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
368
                    _ => return None,
369
                };
370
                // The canonical expansion for these instruction has `rd == rs1`, but
371
                // these are all commutative operations, so we can swap the operands.
372
                let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
373

374
                sink.put2(encode_ca_type(op, rd, src));
375
            }
376

377
            // The sub instructions are non commutative, so we can't swap the operands.
378
            Inst::AluRRR {
379
                alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
380
                rd,
381
                rs1,
382
                rs2,
383
            } if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
384
                let op = match alu_op {
385
                    AluOPRRR::Sub => CaOp::CSub,
386
                    AluOPRRR::Subw => CaOp::CSubw,
387
                    _ => return None,
388
                };
389
                sink.put2(encode_ca_type(op, rd, rs2));
390
            }
391

392
            // c.j
393
            //
394
            // We don't have a separate JAL as that is only available in RV32C
395
            Inst::Jal { label } => {
396
                sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
397
                sink.add_uncond_branch(*start_off, *start_off + 2, label);
398
                sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
399
            }
400

401
            // c.jr
402
            Inst::Jalr { rd, base, offset }
403
                if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
404
            {
405
                sink.put2(encode_cr2_type(CrOp::CJr, base));
406
                state.clobber_vstate();
407
            }
408

409
            // c.jalr
410
            Inst::Jalr { rd, base, offset }
411
                if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
412
            {
413
                sink.put2(encode_cr2_type(CrOp::CJalr, base));
414
                state.clobber_vstate();
415
            }
416

417
            // c.ebreak
418
            Inst::EBreak => {
419
                sink.put2(encode_cr_type(
420
                    CrOp::CEbreak,
421
                    writable_zero_reg(),
422
                    zero_reg(),
423
                ));
424
            }
425

426
            // c.unimp
427
            Inst::Udf { trap_code } => {
428
                sink.add_trap(trap_code);
429
                sink.put2(0x0000);
430
            }
431
            // c.addi16sp
432
            //
433
            // c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
434
            // c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
435
            // where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
436
            // to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
437
            // is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
438
            Inst::AluRRImm12 {
439
                alu_op: AluOPRRI::Addi,
440
                rd,
441
                rs,
442
                imm12,
443
            } if rd.to_reg() == rs
444
                && rs == stack_reg()
445
                && imm12.as_i16() != 0
446
                && (imm12.as_i16() % 16) == 0
447
                && Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
448
            {
449
                let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
450
                sink.put2(encode_c_addi16sp(imm6));
451
            }
452

453
            // c.addi4spn
454
            //
455
            // c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
456
            // immediate, scaled by 4, to the stack pointer, x2, and writes the result to
457
            // rd. This instruction is used to generate pointers to stack-allocated variables
458
            // and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
459
            // the code points with nzuimm=0 are reserved.
460
            Inst::AluRRImm12 {
461
                alu_op: AluOPRRI::Addi,
462
                rd,
463
                rs,
464
                imm12,
465
            } if reg_is_compressible(rd.to_reg())
466
                && rs == stack_reg()
467
                && imm12.as_i16() != 0
468
                && (imm12.as_i16() % 4) == 0
469
                && u8::try_from(imm12.as_i16() / 4).is_ok() =>
470
            {
471
                let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
472
                sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
473
            }
474

475
            // c.li
476
            Inst::AluRRImm12 {
477
                alu_op: AluOPRRI::Addi,
478
                rd,
479
                rs,
480
                imm12,
481
            } if rd.to_reg() != zero_reg() && rs == zero_reg() => {
482
                let imm6 = Imm6::maybe_from_imm12(imm12)?;
483
                sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
484
            }
485

486
            // c.addi
487
            Inst::AluRRImm12 {
488
                alu_op: AluOPRRI::Addi,
489
                rd,
490
                rs,
491
                imm12,
492
            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
493
                let imm6 = Imm6::maybe_from_imm12(imm12)?;
494
                sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
495
            }
496

497
            // c.addiw
498
            Inst::AluRRImm12 {
499
                alu_op: AluOPRRI::Addiw,
500
                rd,
501
                rs,
502
                imm12,
503
            } if rd.to_reg() == rs && rs != zero_reg() => {
504
                let imm6 = Imm6::maybe_from_imm12(imm12)?;
505
                sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
506
            }
507

508
            // c.lui
509
            //
510
            // c.lui loads the non-zero 6-bit immediate field into bits 17–12
511
            // of the destination register, clears the bottom 12 bits, and
512
            // sign-extends bit 17 into all higher bits of the destination.
513
            Inst::Lui { rd, imm: imm20 }
514
                if rd.to_reg() != zero_reg()
515
                    && rd.to_reg() != stack_reg()
516
                    && imm20.as_i32() != 0 =>
517
            {
518
                // Check that the top bits are sign extended
519
                let imm = imm20.as_i32() << 14 >> 14;
520
                if imm != imm20.as_i32() {
521
                    return None;
522
                }
523
                let imm6 = Imm6::maybe_from_i32(imm)?;
524
                sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
525
            }
526

527
            // c.slli
528
            Inst::AluRRImm12 {
529
                alu_op: AluOPRRI::Slli,
530
                rd,
531
                rs,
532
                imm12,
533
            } if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
534
                // The shift amount is unsigned, but we encode it as signed.
535
                let shift = imm12.as_i16() & 0x3f;
536
                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
537
                sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
538
            }
539

540
            // c.srli / c.srai
541
            Inst::AluRRImm12 {
542
                alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
543
                rd,
544
                rs,
545
                imm12,
546
            } if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
547
                let op = match op {
548
                    AluOPRRI::Srli => CbOp::CSrli,
549
                    AluOPRRI::Srai => CbOp::CSrai,
550
                    _ => unreachable!(),
551
                };
552

553
                // The shift amount is unsigned, but we encode it as signed.
554
                let shift = imm12.as_i16() & 0x3f;
555
                let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
556
                sink.put2(encode_cb_type(op, rd, imm6));
557
            }
558

559
            // c.zextb
560
            //
561
            // This is an alias for `andi rd, rd, 0xff`
562
            Inst::AluRRImm12 {
563
                alu_op: AluOPRRI::Andi,
564
                rd,
565
                rs,
566
                imm12,
567
            } if has_zcb
568
                && rd.to_reg() == rs
569
                && reg_is_compressible(rs)
570
                && imm12.as_i16() == 0xff =>
571
            {
572
                sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
573
            }
574

575
            // c.andi
576
            Inst::AluRRImm12 {
577
                alu_op: AluOPRRI::Andi,
578
                rd,
579
                rs,
580
                imm12,
581
            } if rd.to_reg() == rs && reg_is_compressible(rs) => {
582
                let imm6 = Imm6::maybe_from_imm12(imm12)?;
583
                sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
584
            }
585

586
            // Stack Based Loads
587
            Inst::Load {
588
                rd,
589
                op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
590
                from,
591
                flags,
592
            } if from.get_base_register() == Some(stack_reg())
593
                && (from.get_offset_with_state(state) % op.size()) == 0 =>
594
            {
595
                // We encode the offset in multiples of the load size.
596
                let offset = from.get_offset_with_state(state);
597
                let imm6 = u8::try_from(offset / op.size())
598
                    .ok()
599
                    .and_then(Uimm6::maybe_from_u8)?;
600

601
                // Some additional constraints on these instructions.
602
                //
603
                // Integer loads are not allowed to target x0, but floating point loads
604
                // are, since f0 is not a special register.
605
                //
606
                // Floating point loads are not included in the base Zca extension
607
                // but in a separate Zcd extension. Both of these are part of the C Extension.
608
                let rd_is_zero = rd.to_reg() == zero_reg();
609
                let op = match op {
610
                    LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
611
                    LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
612
                    LoadOP::Fld if has_zcd => CiOp::CFldsp,
613
                    _ => return None,
614
                };
615

616
                if let Some(trap_code) = flags.trap_code() {
617
                    // Register the offset at which the actual load instruction starts.
618
                    sink.add_trap(trap_code);
619
                }
620
                sink.put2(encode_ci_sp_load(op, rd, imm6));
621
            }
622

623
            // Regular Loads
624
            Inst::Load {
625
                rd,
626
                op:
627
                    op
628
                    @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
629
                from,
630
                flags,
631
            } if reg_is_compressible(rd.to_reg())
632
                && from
633
                    .get_base_register()
634
                    .map(reg_is_compressible)
635
                    .unwrap_or(false)
636
                && (from.get_offset_with_state(state) % op.size()) == 0 =>
637
            {
638
                let base = from.get_base_register().unwrap();
639

640
                // We encode the offset in multiples of the store size.
641
                let offset = from.get_offset_with_state(state);
642
                let offset = u8::try_from(offset / op.size()).ok()?;
643

644
                // We mix two different formats here.
645
                //
646
                // c.lw / c.ld / c.fld instructions are available in the standard Zca
647
                // extension using the CL format.
648
                //
649
                // c.lbu / c.lhu / c.lh are only available in the Zcb extension and
650
                // are also encoded differently. Technically they each have a different
651
                // format, but they are similar enough that we can group them.
652
                let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
653
                let encoded = if is_zcb_load {
654
                    if !has_zcb {
655
                        return None;
656
                    }
657

658
                    let op = match op {
659
                        LoadOP::Lbu => ZcbMemOp::CLbu,
660
                        LoadOP::Lhu => ZcbMemOp::CLhu,
661
                        LoadOP::Lh => ZcbMemOp::CLh,
662
                        _ => unreachable!(),
663
                    };
664

665
                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
666
                    // and loads only have 1 bit.
667
                    let imm2 = Uimm2::maybe_from_u8(offset)?;
668
                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
669
                        return None;
670
                    }
671

672
                    encode_zcbmem_load(op, rd, base, imm2)
673
                } else {
674
                    // Floating point loads are not included in the base Zca extension
675
                    // but in a separate Zcd extension. Both of these are part of the C Extension.
676
                    let op = match op {
677
                        LoadOP::Lw => ClOp::CLw,
678
                        LoadOP::Ld => ClOp::CLd,
679
                        LoadOP::Fld if has_zcd => ClOp::CFld,
680
                        _ => return None,
681
                    };
682
                    let imm5 = Uimm5::maybe_from_u8(offset)?;
683

684
                    encode_cl_type(op, rd, base, imm5)
685
                };
686

687
                if let Some(trap_code) = flags.trap_code() {
688
                    // Register the offset at which the actual load instruction starts.
689
                    sink.add_trap(trap_code);
690
                }
691
                sink.put2(encoded);
692
            }
693

694
            // Stack Based Stores
695
            Inst::Store {
696
                src,
697
                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
698
                to,
699
                flags,
700
            } if to.get_base_register() == Some(stack_reg())
701
                && (to.get_offset_with_state(state) % op.size()) == 0 =>
702
            {
703
                // We encode the offset in multiples of the store size.
704
                let offset = to.get_offset_with_state(state);
705
                let imm6 = u8::try_from(offset / op.size())
706
                    .ok()
707
                    .and_then(Uimm6::maybe_from_u8)?;
708

709
                // Floating point stores are not included in the base Zca extension
710
                // but in a separate Zcd extension. Both of these are part of the C Extension.
711
                let op = match op {
712
                    StoreOP::Sw => CssOp::CSwsp,
713
                    StoreOP::Sd => CssOp::CSdsp,
714
                    StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
715
                    _ => return None,
716
                };
717

718
                if let Some(trap_code) = flags.trap_code() {
719
                    // Register the offset at which the actual load instruction starts.
720
                    sink.add_trap(trap_code);
721
                }
722
                sink.put2(encode_css_type(op, src, imm6));
723
            }
724

725
            // Regular Stores
726
            Inst::Store {
727
                src,
728
                op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
729
                to,
730
                flags,
731
            } if reg_is_compressible(src)
732
                && to
733
                    .get_base_register()
734
                    .map(reg_is_compressible)
735
                    .unwrap_or(false)
736
                && (to.get_offset_with_state(state) % op.size()) == 0 =>
737
            {
738
                let base = to.get_base_register().unwrap();
739

740
                // We encode the offset in multiples of the store size.
741
                let offset = to.get_offset_with_state(state);
742
                let offset = u8::try_from(offset / op.size()).ok()?;
743

744
                // We mix two different formats here.
745
                //
746
                // c.sw / c.sd / c.fsd instructions are available in the standard Zca
747
                // extension using the CL format.
748
                //
749
                // c.sb / c.sh are only available in the Zcb extension and are also
750
                // encoded differently.
751
                let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
752
                let encoded = if is_zcb_store {
753
                    if !has_zcb {
754
                        return None;
755
                    }
756

757
                    let op = match op {
758
                        StoreOP::Sh => ZcbMemOp::CSh,
759
                        StoreOP::Sb => ZcbMemOp::CSb,
760
                        _ => unreachable!(),
761
                    };
762

763
                    // Byte stores & loads have 2 bits of immediate offset. Halfword stores
764
                    // and loads only have 1 bit.
765
                    let imm2 = Uimm2::maybe_from_u8(offset)?;
766
                    if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
767
                        return None;
768
                    }
769

770
                    encode_zcbmem_store(op, src, base, imm2)
771
                } else {
772
                    // Floating point stores are not included in the base Zca extension
773
                    // but in a separate Zcd extension. Both of these are part of the C Extension.
774
                    let op = match op {
775
                        StoreOP::Sw => CsOp::CSw,
776
                        StoreOP::Sd => CsOp::CSd,
777
                        StoreOP::Fsd if has_zcd => CsOp::CFsd,
778
                        _ => return None,
779
                    };
780
                    let imm5 = Uimm5::maybe_from_u8(offset)?;
781

782
                    encode_cs_type(op, src, base, imm5)
783
                };
784

785
                if let Some(trap_code) = flags.trap_code() {
786
                    // Register the offset at which the actual load instruction starts.
787
                    sink.add_trap(trap_code);
788
                }
789
                sink.put2(encoded);
790
            }
791

792
            // c.not
793
            //
794
            // This is an alias for `xori rd, rd, -1`
795
            Inst::AluRRImm12 {
796
                alu_op: AluOPRRI::Xori,
797
                rd,
798
                rs,
799
                imm12,
800
            } if has_zcb
801
                && rd.to_reg() == rs
802
                && reg_is_compressible(rs)
803
                && imm12.as_i16() == -1 =>
804
            {
805
                sink.put2(encode_cszn_type(CsznOp::CNot, rd));
806
            }
807

808
            // c.sext.b / c.sext.h / c.zext.h
809
            //
810
            // These are all the extend instructions present in `Zcb`, they
811
            // also require `Zbb` since they aren't available in the base ISA.
812
            Inst::AluRRImm12 {
813
                alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
814
                rd,
815
                rs,
816
                imm12,
817
            } if has_zcb
818
                && has_zbb
819
                && rd.to_reg() == rs
820
                && reg_is_compressible(rs)
821
                && imm12.as_i16() == 0 =>
822
            {
823
                let op = match alu_op {
824
                    AluOPRRI::Sextb => CsznOp::CSextb,
825
                    AluOPRRI::Sexth => CsznOp::CSexth,
826
                    AluOPRRI::Zexth => CsznOp::CZexth,
827
                    _ => unreachable!(),
828
                };
829
                sink.put2(encode_cszn_type(op, rd));
830
            }
831

832
            // c.zext.w
833
            //
834
            // This is an alias for `add.uw rd, rd, zero`
835
            Inst::AluRRR {
836
                alu_op: AluOPRRR::Adduw,
837
                rd,
838
                rs1,
839
                rs2,
840
            } if has_zcb
841
                && has_zba
842
                && rd.to_reg() == rs1
843
                && reg_is_compressible(rs1)
844
                && rs2 == zero_reg() =>
845
            {
846
                sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
847
            }
848

849
            _ => return None,
850
        }
851

852
        return Some(());
853
    }
854

855
    fn emit_uncompressed(
856
        &self,
857
        sink: &mut MachBuffer<Inst>,
858
        emit_info: &EmitInfo,
859
        state: &mut EmitState,
860
        start_off: &mut u32,
861
    ) {
862
        match self {
863
            &Inst::Nop0 => {
864
                // do nothing
865
            }
866
            // Addi x0, x0, 0
867
            &Inst::Nop4 => {
868
                let x = Inst::AluRRImm12 {
869
                    alu_op: AluOPRRI::Addi,
870
                    rd: Writable::from_reg(zero_reg()),
871
                    rs: zero_reg(),
872
                    imm12: Imm12::ZERO,
873
                };
874
                x.emit(sink, emit_info, state)
875
            }
876
            &Inst::RawData { ref data } => {
877
                // Right now we only put a u32 or u64 in this instruction.
878
                // It is not very long, no need to check if need `emit_island`.
879
                // If data is very long , this is a bug because RawData is typically
880
                // use to load some data and rely on some position in the code stream.
881
                // and we may exceed `Inst::worst_case_size`.
882
                // for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
883
                sink.put_data(&data[..]);
884
            }
885
            &Inst::Lui { rd, ref imm } => {
886
                let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
887
                sink.put4(x);
888
            }
889
            &Inst::Fli { rd, width, imm } => {
890
                sink.put4(encode_fli(width, imm, rd));
891
            }
892
            &Inst::LoadInlineConst { rd, ty, imm } => {
893
                let data = &imm.to_le_bytes()[..ty.bytes() as usize];
894

895
                let label_data: MachLabel = sink.get_label();
896
                let label_end: MachLabel = sink.get_label();
897

898
                // Load into rd
899
                Inst::Load {
900
                    rd,
901
                    op: LoadOP::from_type(ty),
902
                    flags: MemFlags::new(),
903
                    from: AMode::Label(label_data),
904
                }
905
                .emit(sink, emit_info, state);
906

907
                // Jump over the inline pool
908
                Inst::gen_jump(label_end).emit(sink, emit_info, state);
909

910
                // Emit the inline data
911
                sink.bind_label(label_data, &mut state.ctrl_plane);
912
                Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
913

914
                sink.bind_label(label_end, &mut state.ctrl_plane);
915
            }
916
            &Inst::FpuRR {
917
                alu_op,
918
                width,
919
                frm,
920
                rd,
921
                rs,
922
            } => {
923
                if alu_op.is_convert_to_int() {
924
                    sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
925
                }
926
                sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
927
            }
928
            &Inst::FpuRRRR {
929
                alu_op,
930
                rd,
931
                rs1,
932
                rs2,
933
                rs3,
934
                frm,
935
                width,
936
            } => {
937
                sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
938
            }
939
            &Inst::FpuRRR {
940
                alu_op,
941
                width,
942
                frm,
943
                rd,
944
                rs1,
945
                rs2,
946
            } => {
947
                sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
948
            }
949
            &Inst::Unwind { ref inst } => {
950
                sink.add_unwind(inst.clone());
951
            }
952
            &Inst::DummyUse { .. } => {
953
                // This has already been handled by Inst::allocate.
954
            }
955
            &Inst::AluRRR {
956
                alu_op,
957
                rd,
958
                rs1,
959
                rs2,
960
            } => {
961
                let (rs1, rs2) = if alu_op.reverse_rs() {
962
                    (rs2, rs1)
963
                } else {
964
                    (rs1, rs2)
965
                };
966

967
                sink.put4(encode_r_type(
968
                    alu_op.op_code(),
969
                    rd,
970
                    alu_op.funct3(),
971
                    rs1,
972
                    rs2,
973
                    alu_op.funct7(),
974
                ));
975
            }
976
            &Inst::AluRRImm12 {
977
                alu_op,
978
                rd,
979
                rs,
980
                imm12,
981
            } => {
982
                let x = alu_op.op_code()
983
                    | reg_to_gpr_num(rd.to_reg()) << 7
984
                    | alu_op.funct3() << 12
985
                    | reg_to_gpr_num(rs) << 15
986
                    | alu_op.imm12(imm12) << 20;
987
                sink.put4(x);
988
            }
989
            &Inst::CsrReg { op, rd, rs, csr } => {
990
                sink.put4(encode_csr_reg(op, rd, rs, csr));
991
            }
992
            &Inst::CsrImm { op, rd, csr, imm } => {
993
                sink.put4(encode_csr_imm(op, rd, csr, imm));
994
            }
995
            &Inst::Load {
996
                rd,
997
                op: LoadOP::Flh,
998
                from,
999
                flags,
1000
            } if !emit_info.isa_flags.has_zfhmin() => {
1001
                // flh unavailable, use an integer load instead
1002
                Inst::Load {
1003
                    rd: writable_spilltmp_reg(),
1004
                    op: LoadOP::Lh,
1005
                    flags,
1006
                    from,
1007
                }
1008
                .emit(sink, emit_info, state);
1009
                // NaN-box the `f16` before loading it into the floating-point
1010
                // register with a 32-bit `fmv`.
1011
                Inst::Lui {
1012
                    rd: writable_spilltmp_reg2(),
1013
                    imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),
1014
                }
1015
                .emit(sink, emit_info, state);
1016
                Inst::AluRRR {
1017
                    alu_op: AluOPRRR::Or,
1018
                    rd: writable_spilltmp_reg(),
1019
                    rs1: spilltmp_reg(),
1020
                    rs2: spilltmp_reg2(),
1021
                }
1022
                .emit(sink, emit_info, state);
1023
                Inst::FpuRR {
1024
                    alu_op: FpuOPRR::FmvFmtX,
1025
                    width: FpuOPWidth::S,
1026
                    frm: FRM::RNE,
1027
                    rd,
1028
                    rs: spilltmp_reg(),
1029
                }
1030
                .emit(sink, emit_info, state);
1031
            }
1032
            &Inst::Load {
1033
                rd,
1034
                op,
1035
                from,
1036
                flags,
1037
            } => {
1038
                let base = from.get_base_register();
1039
                let offset = from.get_offset_with_state(state);
1040
                let offset_imm12 = Imm12::maybe_from_i64(offset);
1041
                let label = from.get_label_with_sink(sink);
1042

1043
                let (addr, imm12) = match (base, offset_imm12, label) {
1044
                    // When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
1045
                    (Some(base), Some(imm12), None) => (base, imm12),
1046

1047
                    // Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
1048
                    // register and load from that.
1049
                    (Some(_), None, None) => {
1050
                        let tmp = writable_spilltmp_reg();
1051
                        Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
1052
                        (tmp.to_reg(), Imm12::ZERO)
1053
                    }
1054

1055
                    // If the AMode contains a label we can emit an internal relocation that gets
1056
                    // resolved with the correct address later.
1057
                    (None, Some(imm), Some(label)) => {
1058
                        debug_assert_eq!(imm.as_i16(), 0);
1059

1060
                        // Get the current PC.
1061
                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1062
                        Inst::Auipc {
1063
                            rd,
1064
                            imm: Imm20::ZERO,
1065
                        }
1066
                        .emit_uncompressed(sink, emit_info, state, start_off);
1067

1068
                        // Emit a relocation for the load. This patches the offset into the instruction.
1069
                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1070

1071
                        // Imm12 here is meaningless since it's going to get replaced.
1072
                        (rd.to_reg(), Imm12::ZERO)
1073
                    }
1074

1075
                    // These cases are impossible with the current AModes that we have. We either
1076
                    // always have a register, or always have a label. Never both, and never neither.
1077
                    (None, None, None)
1078
                    | (None, Some(_), None)
1079
                    | (Some(_), None, Some(_))
1080
                    | (Some(_), Some(_), Some(_))
1081
                    | (None, None, Some(_)) => {
1082
                        unreachable!("Invalid load address")
1083
                    }
1084
                };
1085

1086
                if let Some(trap_code) = flags.trap_code() {
1087
                    // Register the offset at which the actual load instruction starts.
1088
                    sink.add_trap(trap_code);
1089
                }
1090

1091
                sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1092
            }
1093
            &Inst::Store {
1094
                op: StoreOP::Fsh,
1095
                src,
1096
                flags,
1097
                to,
1098
            } if !emit_info.isa_flags.has_zfhmin() => {
1099
                // fsh unavailable, use an integer store instead
1100
                Inst::FpuRR {
1101
                    alu_op: FpuOPRR::FmvXFmt,
1102
                    width: FpuOPWidth::S,
1103
                    frm: FRM::RNE,
1104
                    rd: writable_spilltmp_reg(),
1105
                    rs: src,
1106
                }
1107
                .emit(sink, emit_info, state);
1108
                Inst::Store {
1109
                    to,
1110
                    op: StoreOP::Sh,
1111
                    flags,
1112
                    src: spilltmp_reg(),
1113
                }
1114
                .emit(sink, emit_info, state);
1115
            }
1116
            &Inst::Store { op, src, flags, to } => {
1117
                let base = to.get_base_register();
1118
                let offset = to.get_offset_with_state(state);
1119
                let offset_imm12 = Imm12::maybe_from_i64(offset);
1120

1121
                let (addr, imm12) = match (base, offset_imm12) {
1122
                    // If the offset fits into an imm12 we can directly encode it.
1123
                    (Some(base), Some(imm12)) => (base, imm12),
1124
                    // Otherwise load the address it into a reg and load from it.
1125
                    _ => {
1126
                        let tmp = writable_spilltmp_reg();
1127
                        Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1128
                        (tmp.to_reg(), Imm12::ZERO)
1129
                    }
1130
                };
1131

1132
                if let Some(trap_code) = flags.trap_code() {
1133
                    // Register the offset at which the actual load instruction starts.
1134
                    sink.add_trap(trap_code);
1135
                }
1136

1137
                sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1138
            }
1139
            &Inst::Args { .. } | &Inst::Rets { .. } => {
1140
                // Nothing: this is a pseudoinstruction that serves
1141
                // only to constrain registers at a certain point.
1142
            }
1143
            &Inst::Ret {} => {
1144
                // RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1145
                // `jalr x0, x1, 0` that jumps to the return address.
1146
                Inst::Jalr {
1147
                    rd: writable_zero_reg(),
1148
                    base: link_reg(),
1149
                    offset: Imm12::ZERO,
1150
                }
1151
                .emit(sink, emit_info, state);
1152
            }
1153

1154
            &Inst::Extend {
1155
                rd,
1156
                rn,
1157
                signed,
1158
                from_bits,
1159
                to_bits: _to_bits,
1160
            } => {
1161
                let mut insts = SmallInstVec::new();
1162
                let shift_bits = (64 - from_bits) as i16;
1163
                let is_u8 = || from_bits == 8 && signed == false;
1164
                if is_u8() {
1165
                    // special for u8.
1166
                    insts.push(Inst::AluRRImm12 {
1167
                        alu_op: AluOPRRI::Andi,
1168
                        rd,
1169
                        rs: rn,
1170
                        imm12: Imm12::from_i16(255),
1171
                    });
1172
                } else {
1173
                    insts.push(Inst::AluRRImm12 {
1174
                        alu_op: AluOPRRI::Slli,
1175
                        rd,
1176
                        rs: rn,
1177
                        imm12: Imm12::from_i16(shift_bits),
1178
                    });
1179
                    insts.push(Inst::AluRRImm12 {
1180
                        alu_op: if signed {
1181
                            AluOPRRI::Srai
1182
                        } else {
1183
                            AluOPRRI::Srli
1184
                        },
1185
                        rd,
1186
                        rs: rd.to_reg(),
1187
                        imm12: Imm12::from_i16(shift_bits),
1188
                    });
1189
                }
1190
                insts
1191
                    .into_iter()
1192
                    .for_each(|i| i.emit(sink, emit_info, state));
1193
            }
1194

1195
            &Inst::Call { ref info } => {
1196
                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1197

1198
                Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1199
                    .into_iter()
1200
                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1201

1202
                if let Some(s) = state.take_stack_map() {
1203
                    let offset = sink.cur_offset();
1204
                    sink.push_user_stack_map(state, offset, s);
1205
                }
1206

1207
                if let Some(try_call) = info.try_call_info.as_ref() {
1208
                    sink.add_try_call_site(
1209
                        Some(state.frame_layout.sp_to_fp()),
1210
                        try_call.exception_handlers(&state.frame_layout),
1211
                    );
1212
                } else {
1213
                    sink.add_call_site();
1214
                }
1215

1216
                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1217
                if callee_pop_size > 0 {
1218
                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1219
                        inst.emit(sink, emit_info, state);
1220
                    }
1221
                }
1222

1223
                // Load any stack-carried return values.
1224
                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1225
                    state.frame_layout().stackslots_size,
1226
                    |inst| inst.emit(sink, emit_info, state),
1227
                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1228
                );
1229

1230
                // If this is a try-call, jump to the continuation
1231
                // (normal-return) block.
1232
                if let Some(try_call) = info.try_call_info.as_ref() {
1233
                    let jmp = Inst::Jal {
1234
                        label: try_call.continuation,
1235
                    };
1236
                    jmp.emit(sink, emit_info, state);
1237
                }
1238

1239
                *start_off = sink.cur_offset();
1240
            }
1241
            &Inst::CallInd { ref info } => {
1242
                Inst::Jalr {
1243
                    rd: writable_link_reg(),
1244
                    base: info.dest,
1245
                    offset: Imm12::ZERO,
1246
                }
1247
                .emit(sink, emit_info, state);
1248

1249
                if let Some(s) = state.take_stack_map() {
1250
                    let offset = sink.cur_offset();
1251
                    sink.push_user_stack_map(state, offset, s);
1252
                }
1253

1254
                if let Some(try_call) = info.try_call_info.as_ref() {
1255
                    sink.add_try_call_site(
1256
                        Some(state.frame_layout.sp_to_fp()),
1257
                        try_call.exception_handlers(&state.frame_layout),
1258
                    );
1259
                } else {
1260
                    sink.add_call_site();
1261
                }
1262

1263
                let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1264
                if callee_pop_size > 0 {
1265
                    for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1266
                        inst.emit(sink, emit_info, state);
1267
                    }
1268
                }
1269

1270
                // Load any stack-carried return values.
1271
                info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1272
                    state.frame_layout().stackslots_size,
1273
                    |inst| inst.emit(sink, emit_info, state),
1274
                    |needed_space| Some(Inst::EmitIsland { needed_space }),
1275
                );
1276

1277
                // If this is a try-call, jump to the continuation
1278
                // (normal-return) block.
1279
                if let Some(try_call) = info.try_call_info.as_ref() {
1280
                    let jmp = Inst::Jal {
1281
                        label: try_call.continuation,
1282
                    };
1283
                    jmp.emit(sink, emit_info, state);
1284
                }
1285

1286
                *start_off = sink.cur_offset();
1287
            }
1288

1289
            &Inst::ReturnCall { ref info } => {
1290
                emit_return_call_common_sequence(sink, emit_info, state, info);
1291

1292
                sink.add_call_site();
1293
                sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1294
                Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1295
                    .into_iter()
1296
                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1297
            }
1298

1299
            &Inst::ReturnCallInd { ref info } => {
1300
                emit_return_call_common_sequence(sink, emit_info, state, &info);
1301

1302
                Inst::Jalr {
1303
                    rd: writable_zero_reg(),
1304
                    base: info.dest,
1305
                    offset: Imm12::ZERO,
1306
                }
1307
                .emit(sink, emit_info, state);
1308
            }
1309
            &Inst::Jal { label } => {
1310
                sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1311
                sink.add_uncond_branch(*start_off, *start_off + 4, label);
1312
                sink.put4(0b1101111);
1313
                state.clobber_vstate();
1314
            }
1315
            &Inst::CondBr {
1316
                taken,
1317
                not_taken,
1318
                kind,
1319
            } => {
1320
                match taken {
1321
                    CondBrTarget::Label(label) => {
1322
                        let code = kind.emit();
1323
                        let code_inverse = kind.inverse().emit().to_le_bytes();
1324
                        sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1325
                        sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1326
                        sink.put4(code);
1327
                    }
1328
                    CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1329
                }
1330

1331
                match not_taken {
1332
                    CondBrTarget::Label(label) => {
1333
                        Inst::gen_jump(label).emit(sink, emit_info, state)
1334
                    }
1335
                    CondBrTarget::Fallthrough => {}
1336
                };
1337
            }
1338

1339
            &Inst::Mov { rd, rm, ty } => {
1340
                debug_assert_eq!(rd.to_reg().class(), rm.class());
1341
                if rd.to_reg() == rm {
1342
                    return;
1343
                }
1344

1345
                match rm.class() {
1346
                    RegClass::Int => Inst::AluRRImm12 {
1347
                        alu_op: AluOPRRI::Addi,
1348
                        rd,
1349
                        rs: rm,
1350
                        imm12: Imm12::ZERO,
1351
                    },
1352
                    RegClass::Float => Inst::FpuRRR {
1353
                        alu_op: FpuOPRRR::Fsgnj,
1354
                        width: FpuOPWidth::try_from(ty).unwrap(),
1355
                        frm: FRM::RNE,
1356
                        rd,
1357
                        rs1: rm,
1358
                        rs2: rm,
1359
                    },
1360
                    RegClass::Vector => Inst::VecAluRRImm5 {
1361
                        op: VecAluOpRRImm5::VmvrV,
1362
                        vd: rd,
1363
                        vs2: rm,
1364
                        // Imm 0 means copy 1 register.
1365
                        imm: Imm5::maybe_from_i8(0).unwrap(),
1366
                        mask: VecOpMasking::Disabled,
1367
                        // Vstate for this instruction is ignored.
1368
                        vstate: VState::from_type(ty),
1369
                    },
1370
                }
1371
                .emit(sink, emit_info, state);
1372
            }
1373

1374
            &Inst::MovFromPReg { rd, rm } => {
1375
                Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1376
            }
1377

1378
            &Inst::BrTable {
1379
                index,
1380
                tmp1,
1381
                tmp2,
1382
                ref targets,
1383
            } => {
1384
                let ext_index = writable_spilltmp_reg();
1385

1386
                let label_compute_target = sink.get_label();
1387

1388
                // The default target is passed in as the 0th element of `targets`
1389
                // separate it here for clarity.
1390
                let default_target = targets[0];
1391
                let targets = &targets[1..];
1392

1393
                // We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1394
                // now if we need one.
1395
                //
1396
                // The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1397
                // Check if we need to emit a jump table here to support that jump.
1398
                let inst_count = 12 + (targets.len() * 2);
1399
                let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1400
                if sink.island_needed(distance) {
1401
                    let jump_around_label = sink.get_label();
1402
                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1403
                    sink.emit_island(distance + 4, &mut state.ctrl_plane);
1404
                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1405
                }
1406

1407
                // We emit a bounds check on the index, if the index is larger than the number of
1408
                // jump table entries, we jump to the default block.  Otherwise we compute a jump
1409
                // offset by multiplying the index by 8 (the size of each entry) and then jump to
1410
                // that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1411
                //
1412
                // Build the following sequence:
1413
                //
1414
                // extend_index:
1415
                //     zext.w  ext_index, index
1416
                // bounds_check:
1417
                //     li      tmp, n_labels
1418
                //     bltu    ext_index, tmp, compute_target
1419
                // jump_to_default_block:
1420
                //     auipc   pc, 0
1421
                //     jalr    zero, pc, default_block
1422
                // compute_target:
1423
                //     auipc   pc, 0
1424
                //     slli    tmp, ext_index, 3
1425
                //     add     pc, pc, tmp
1426
                //     jalr    zero, pc, 0x10
1427
                // jump_table:
1428
                //     ; This repeats for each entry in the jumptable
1429
                //     auipc   pc, 0
1430
                //     jalr    zero, pc, block_target
1431

1432
                // Extend the index to 64 bits.
1433
                //
1434
                // This prevents us branching on the top 32 bits of the index, which
1435
                // are undefined.
1436
                Inst::Extend {
1437
                    rd: ext_index,
1438
                    rn: index,
1439
                    signed: false,
1440
                    from_bits: 32,
1441
                    to_bits: 64,
1442
                }
1443
                .emit(sink, emit_info, state);
1444

1445
                // Bounds check.
1446
                //
1447
                // Check if the index passed in is larger than the number of jumptable
1448
                // entries that we have. If it is, we fallthrough to a jump into the
1449
                // default block.
1450
                Inst::load_constant_u32(tmp2, targets.len() as u64)
1451
                    .iter()
1452
                    .for_each(|i| i.emit(sink, emit_info, state));
1453
                Inst::CondBr {
1454
                    taken: CondBrTarget::Label(label_compute_target),
1455
                    not_taken: CondBrTarget::Fallthrough,
1456
                    kind: IntegerCompare {
1457
                        kind: IntCC::UnsignedLessThan,
1458
                        rs1: ext_index.to_reg(),
1459
                        rs2: tmp2.to_reg(),
1460
                    },
1461
                }
1462
                .emit(sink, emit_info, state);
1463

1464
                sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1465
                Inst::construct_auipc_and_jalr(None, tmp2, 0)
1466
                    .iter()
1467
                    .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1468

1469
                // Compute the jump table offset.
1470
                // We need to emit a PC relative offset,
1471
                sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1472

1473
                // Get the current PC.
1474
                Inst::Auipc {
1475
                    rd: tmp1,
1476
                    imm: Imm20::ZERO,
1477
                }
1478
                .emit_uncompressed(sink, emit_info, state, start_off);
1479

1480
                // These instructions must be emitted as uncompressed since we
1481
                // are manually computing the offset from the PC.
1482

1483
                // Multiply the index by 8, since that is the size in
1484
                // bytes of each jump table entry
1485
                Inst::AluRRImm12 {
1486
                    alu_op: AluOPRRI::Slli,
1487
                    rd: tmp2,
1488
                    rs: ext_index.to_reg(),
1489
                    imm12: Imm12::from_i16(3),
1490
                }
1491
                .emit_uncompressed(sink, emit_info, state, start_off);
1492

1493
                // Calculate the base of the jump, PC + the offset from above.
1494
                Inst::AluRRR {
1495
                    alu_op: AluOPRRR::Add,
1496
                    rd: tmp1,
1497
                    rs1: tmp1.to_reg(),
1498
                    rs2: tmp2.to_reg(),
1499
                }
1500
                .emit_uncompressed(sink, emit_info, state, start_off);
1501

1502
                // Jump to the middle of the jump table.
1503
                // We add a 16 byte offset here, since we used 4 instructions
1504
                // since the AUIPC that was used to get the PC.
1505
                Inst::Jalr {
1506
                    rd: writable_zero_reg(),
1507
                    base: tmp1.to_reg(),
1508
                    offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1509
                }
1510
                .emit_uncompressed(sink, emit_info, state, start_off);
1511

1512
                // Emit the jump table.
1513
                //
1514
                // Each entry is a auipc + jalr to the target block. We also start with a island
1515
                // if necessary.
1516

1517
                // Emit the jumps back to back
1518
                for target in targets.iter() {
1519
                    sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1520

1521
                    Inst::construct_auipc_and_jalr(None, tmp2, 0)
1522
                        .iter()
1523
                        .for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1524
                }
1525

1526
                // We've just emitted an island that is safe up to *here*.
1527
                // Mark it as such so that we don't needlessly emit additional islands.
1528
                *start_off = sink.cur_offset();
1529
            }
1530

1531
            &Inst::Atomic {
1532
                op,
1533
                rd,
1534
                addr,
1535
                src,
1536
                amo,
1537
            } => {
1538
                // TODO: get flags from original CLIF atomic instruction
1539
                let flags = MemFlags::new();
1540
                if let Some(trap_code) = flags.trap_code() {
1541
                    sink.add_trap(trap_code);
1542
                }
1543
                let x = op.op_code()
1544
                    | reg_to_gpr_num(rd.to_reg()) << 7
1545
                    | op.funct3() << 12
1546
                    | reg_to_gpr_num(addr) << 15
1547
                    | reg_to_gpr_num(src) << 20
1548
                    | op.funct7(amo) << 25;
1549

1550
                sink.put4(x);
1551
            }
1552
            &Inst::Fence { pred, succ } => {
1553
                let x = 0b0001111
1554
                    | 0b00000 << 7
1555
                    | 0b000 << 12
1556
                    | 0b00000 << 15
1557
                    | (succ as u32) << 20
1558
                    | (pred as u32) << 24;
1559

1560
                sink.put4(x);
1561
            }
1562
            &Inst::Auipc { rd, imm } => {
1563
                sink.put4(enc_auipc(rd, imm));
1564
            }
1565

1566
            &Inst::LoadAddr { rd, mem } => {
1567
                let base = mem.get_base_register();
1568
                let offset = mem.get_offset_with_state(state);
1569
                let offset_imm12 = Imm12::maybe_from_i64(offset);
1570

1571
                match (mem, base, offset_imm12) {
1572
                    (_, Some(rs), Some(imm12)) => {
1573
                        Inst::AluRRImm12 {
1574
                            alu_op: AluOPRRI::Addi,
1575
                            rd,
1576
                            rs,
1577
                            imm12,
1578
                        }
1579
                        .emit(sink, emit_info, state);
1580
                    }
1581
                    (_, Some(rs), None) => {
1582
                        let mut insts = Inst::load_constant_u64(rd, offset as u64);
1583
                        insts.push(Inst::AluRRR {
1584
                            alu_op: AluOPRRR::Add,
1585
                            rd,
1586
                            rs1: rd.to_reg(),
1587
                            rs2: rs,
1588
                        });
1589
                        insts
1590
                            .into_iter()
1591
                            .for_each(|inst| inst.emit(sink, emit_info, state));
1592
                    }
1593
                    (AMode::Const(addr), None, _) => {
1594
                        // Get an address label for the constant and recurse.
1595
                        let label = sink.get_label_for_constant(addr);
1596
                        Inst::LoadAddr {
1597
                            rd,
1598
                            mem: AMode::Label(label),
1599
                        }
1600
                        .emit(sink, emit_info, state);
1601
                    }
1602
                    (AMode::Label(label), None, _) => {
1603
                        // Get the current PC.
1604
                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1605
                        let inst = Inst::Auipc {
1606
                            rd,
1607
                            imm: Imm20::ZERO,
1608
                        };
1609
                        inst.emit_uncompressed(sink, emit_info, state, start_off);
1610

1611
                        // Emit an add to the address with a relocation.
1612
                        // This later gets patched up with the correct offset.
1613
                        sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1614
                        Inst::AluRRImm12 {
1615
                            alu_op: AluOPRRI::Addi,
1616
                            rd,
1617
                            rs: rd.to_reg(),
1618
                            imm12: Imm12::ZERO,
1619
                        }
1620
                        .emit_uncompressed(sink, emit_info, state, start_off);
1621
                    }
1622
                    (amode, _, _) => {
1623
                        unimplemented!("LoadAddr: {:?}", amode);
1624
                    }
1625
                }
1626
            }
1627

1628
            &Inst::Select {
1629
                ref dst,
1630
                condition,
1631
                ref x,
1632
                ref y,
1633
            } => {
1634
                // The general form for this select is the following:
1635
                //
1636
                //     mv rd, x
1637
                //     b{cond} rcond, label_end
1638
                //     mv rd, y
1639
                // label_end:
1640
                //     ... etc
1641
                //
1642
                // This is built on the assumption that moves are cheap, but branches and jumps
1643
                // are not. So with this format we always avoid one jump instruction at the expense
1644
                // of an unconditional move.
1645
                //
1646
                // We also perform another optimization here. If the destination register is the same
1647
                // as one of the input registers, we can avoid emitting the first unconditional move
1648
                // and emit just the branch and the second move.
1649
                //
1650
                // To make sure that this happens as often as possible, we also try to invert the
1651
                // condition, so that if either of the input registers are the same as the destination
1652
                // we avoid that move.
1653

1654
                let label_end = sink.get_label();
1655

1656
                let xregs = x.regs();
1657
                let yregs = y.regs();
1658
                let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1659
                let condregs = condition.regs();
1660

1661
                // We are going to write to the destination register before evaluating
1662
                // the condition, so we need to make sure that the destination register
1663
                // is not one of the condition registers.
1664
                //
1665
                // This should never happen, since hopefully the regalloc constraints
1666
                // for this register are set up correctly.
1667
                debug_assert_ne!(dstregs, condregs);
1668

1669
                // Check if we can invert the condition and avoid moving the y registers into
1670
                // the destination. This allows us to only emit the branch and one of the moves.
1671
                let (uncond_move, cond_move, condition) = if yregs == dstregs {
1672
                    (yregs, xregs, condition.inverse())
1673
                } else {
1674
                    (xregs, yregs, condition)
1675
                };
1676

1677
                // Unconditionally move one of the values to the destination register.
1678
                //
1679
                // These moves may not end up being emitted if the source and
1680
                // destination registers are the same. That logic is built into
1681
                // the emit function for `Inst::Mov`.
1682
                for i in gen_moves(dst.regs(), uncond_move) {
1683
                    i.emit(sink, emit_info, state);
1684
                }
1685

1686
                // If the condition passes we skip over the conditional move
1687
                Inst::CondBr {
1688
                    taken: CondBrTarget::Label(label_end),
1689
                    not_taken: CondBrTarget::Fallthrough,
1690
                    kind: condition,
1691
                }
1692
                .emit(sink, emit_info, state);
1693

1694
                // Move the conditional value to the destination register.
1695
                for i in gen_moves(dst.regs(), cond_move) {
1696
                    i.emit(sink, emit_info, state);
1697
                }
1698

1699
                sink.bind_label(label_end, &mut state.ctrl_plane);
1700
            }
1701
            &Inst::Jalr { rd, base, offset } => {
1702
                sink.put4(enc_jalr(rd, base, offset));
1703
                state.clobber_vstate();
1704
            }
1705
            &Inst::EBreak => {
1706
                sink.put4(0x00100073);
1707
            }
1708
            &Inst::AtomicCas {
1709
                offset,
1710
                t0,
1711
                dst,
1712
                e,
1713
                addr,
1714
                v,
1715
                ty,
1716
            } => {
1717
                //     # addr holds address of memory location
1718
                //     # e holds expected value
1719
                //     # v holds desired value
1720
                //     # dst holds return value
1721
                // cas:
1722
                //     lr.w dst, (addr)       # Load original value.
1723
                //     bne dst, e, fail       # Doesn’t match, so fail.
1724
                //     sc.w t0, v, (addr)     # Try to update.
1725
                //     bnez t0 , cas          # if store not ok,retry.
1726
                // fail:
1727
                let fail_label = sink.get_label();
1728
                let cas_lebel = sink.get_label();
1729
                sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1730
                Inst::Atomic {
1731
                    op: AtomicOP::load_op(ty),
1732
                    rd: dst,
1733
                    addr,
1734
                    src: zero_reg(),
1735
                    amo: AMO::SeqCst,
1736
                }
1737
                .emit(sink, emit_info, state);
1738
                if ty.bits() < 32 {
1739
                    AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1740
                        .iter()
1741
                        .for_each(|i| i.emit(sink, emit_info, state));
1742
                } else if ty.bits() == 32 {
1743
                    Inst::Extend {
1744
                        rd: dst,
1745
                        rn: dst.to_reg(),
1746
                        signed: false,
1747
                        from_bits: 32,
1748
                        to_bits: 64,
1749
                    }
1750
                    .emit(sink, emit_info, state);
1751
                }
1752
                Inst::CondBr {
1753
                    taken: CondBrTarget::Label(fail_label),
1754
                    not_taken: CondBrTarget::Fallthrough,
1755
                    kind: IntegerCompare {
1756
                        kind: IntCC::NotEqual,
1757
                        rs1: e,
1758
                        rs2: dst.to_reg(),
1759
                    },
1760
                }
1761
                .emit(sink, emit_info, state);
1762
                let store_value = if ty.bits() < 32 {
1763
                    // reload value to t0.
1764
                    Inst::Atomic {
1765
                        op: AtomicOP::load_op(ty),
1766
                        rd: t0,
1767
                        addr,
1768
                        src: zero_reg(),
1769
                        amo: AMO::SeqCst,
1770
                    }
1771
                    .emit(sink, emit_info, state);
1772
                    // set reset part.
1773
                    AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1774
                        .iter()
1775
                        .for_each(|i| i.emit(sink, emit_info, state));
1776
                    t0.to_reg()
1777
                } else {
1778
                    v
1779
                };
1780
                Inst::Atomic {
1781
                    op: AtomicOP::store_op(ty),
1782
                    rd: t0,
1783
                    addr,
1784
                    src: store_value,
1785
                    amo: AMO::SeqCst,
1786
                }
1787
                .emit(sink, emit_info, state);
1788
                // check is our value stored.
1789
                Inst::CondBr {
1790
                    taken: CondBrTarget::Label(cas_lebel),
1791
                    not_taken: CondBrTarget::Fallthrough,
1792
                    kind: IntegerCompare {
1793
                        kind: IntCC::NotEqual,
1794
                        rs1: t0.to_reg(),
1795
                        rs2: zero_reg(),
1796
                    },
1797
                }
1798
                .emit(sink, emit_info, state);
1799
                sink.bind_label(fail_label, &mut state.ctrl_plane);
1800
            }
1801
            &Inst::AtomicRmwLoop {
1802
                offset,
1803
                op,
1804
                dst,
1805
                ty,
1806
                p,
1807
                x,
1808
                t0,
1809
            } => {
1810
                let retry = sink.get_label();
1811
                sink.bind_label(retry, &mut state.ctrl_plane);
1812
                // load old value.
1813
                Inst::Atomic {
1814
                    op: AtomicOP::load_op(ty),
1815
                    rd: dst,
1816
                    addr: p,
1817
                    src: zero_reg(),
1818
                    amo: AMO::SeqCst,
1819
                }
1820
                .emit(sink, emit_info, state);
1821
                //
1822

1823
                let store_value: Reg = match op {
1824
                    crate::ir::AtomicRmwOp::Add
1825
                    | crate::ir::AtomicRmwOp::Sub
1826
                    | crate::ir::AtomicRmwOp::And
1827
                    | crate::ir::AtomicRmwOp::Or
1828
                    | crate::ir::AtomicRmwOp::Xor => {
1829
                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1830
                            .iter()
1831
                            .for_each(|i| i.emit(sink, emit_info, state));
1832
                        Inst::AluRRR {
1833
                            alu_op: match op {
1834
                                crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1835
                                crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1836
                                crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1837
                                crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1838
                                crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1839
                                _ => unreachable!(),
1840
                            },
1841
                            rd: t0,
1842
                            rs1: dst.to_reg(),
1843
                            rs2: x,
1844
                        }
1845
                        .emit(sink, emit_info, state);
1846
                        Inst::Atomic {
1847
                            op: AtomicOP::load_op(ty),
1848
                            rd: writable_spilltmp_reg2(),
1849
                            addr: p,
1850
                            src: zero_reg(),
1851
                            amo: AMO::SeqCst,
1852
                        }
1853
                        .emit(sink, emit_info, state);
1854
                        AtomicOP::merge(
1855
                            writable_spilltmp_reg2(),
1856
                            writable_spilltmp_reg(),
1857
                            offset,
1858
                            t0.to_reg(),
1859
                            ty,
1860
                        )
1861
                        .iter()
1862
                        .for_each(|i| i.emit(sink, emit_info, state));
1863
                        spilltmp_reg2()
1864
                    }
1865
                    crate::ir::AtomicRmwOp::Nand => {
1866
                        if ty.bits() < 32 {
1867
                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1868
                                .iter()
1869
                                .for_each(|i| i.emit(sink, emit_info, state));
1870
                        }
1871
                        Inst::AluRRR {
1872
                            alu_op: AluOPRRR::And,
1873
                            rd: t0,
1874
                            rs1: x,
1875
                            rs2: dst.to_reg(),
1876
                        }
1877
                        .emit(sink, emit_info, state);
1878
                        Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1879
                        if ty.bits() < 32 {
1880
                            Inst::Atomic {
1881
                                op: AtomicOP::load_op(ty),
1882
                                rd: writable_spilltmp_reg2(),
1883
                                addr: p,
1884
                                src: zero_reg(),
1885
                                amo: AMO::SeqCst,
1886
                            }
1887
                            .emit(sink, emit_info, state);
1888
                            AtomicOP::merge(
1889
                                writable_spilltmp_reg2(),
1890
                                writable_spilltmp_reg(),
1891
                                offset,
1892
                                t0.to_reg(),
1893
                                ty,
1894
                            )
1895
                            .iter()
1896
                            .for_each(|i| i.emit(sink, emit_info, state));
1897
                            spilltmp_reg2()
1898
                        } else {
1899
                            t0.to_reg()
1900
                        }
1901
                    }
1902

1903
                    crate::ir::AtomicRmwOp::Umin
1904
                    | crate::ir::AtomicRmwOp::Umax
1905
                    | crate::ir::AtomicRmwOp::Smin
1906
                    | crate::ir::AtomicRmwOp::Smax => {
1907
                        let label_select_dst = sink.get_label();
1908
                        let label_select_done = sink.get_label();
1909
                        if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1910
                        {
1911
                            AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1912
                        } else {
1913
                            AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1914
                        }
1915
                        .iter()
1916
                        .for_each(|i| i.emit(sink, emit_info, state));
1917

1918
                        Inst::CondBr {
1919
                            taken: CondBrTarget::Label(label_select_dst),
1920
                            not_taken: CondBrTarget::Fallthrough,
1921
                            kind: IntegerCompare {
1922
                                kind: match op {
1923
                                    crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1924
                                    crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1925
                                    crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1926
                                    crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1927
                                    _ => unreachable!(),
1928
                                },
1929
                                rs1: dst.to_reg(),
1930
                                rs2: x,
1931
                            },
1932
                        }
1933
                        .emit(sink, emit_info, state);
1934
                        // here we select x.
1935
                        Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1936
                        Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1937
                        sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1938
                        Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1939
                        sink.bind_label(label_select_done, &mut state.ctrl_plane);
1940
                        Inst::Atomic {
1941
                            op: AtomicOP::load_op(ty),
1942
                            rd: writable_spilltmp_reg2(),
1943
                            addr: p,
1944
                            src: zero_reg(),
1945
                            amo: AMO::SeqCst,
1946
                        }
1947
                        .emit(sink, emit_info, state);
1948
                        AtomicOP::merge(
1949
                            writable_spilltmp_reg2(),
1950
                            writable_spilltmp_reg(),
1951
                            offset,
1952
                            t0.to_reg(),
1953
                            ty,
1954
                        )
1955
                        .iter()
1956
                        .for_each(|i| i.emit(sink, emit_info, state));
1957
                        spilltmp_reg2()
1958
                    }
1959
                    crate::ir::AtomicRmwOp::Xchg => {
1960
                        AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1961
                            .iter()
1962
                            .for_each(|i| i.emit(sink, emit_info, state));
1963
                        Inst::Atomic {
1964
                            op: AtomicOP::load_op(ty),
1965
                            rd: writable_spilltmp_reg2(),
1966
                            addr: p,
1967
                            src: zero_reg(),
1968
                            amo: AMO::SeqCst,
1969
                        }
1970
                        .emit(sink, emit_info, state);
1971
                        AtomicOP::merge(
1972
                            writable_spilltmp_reg2(),
1973
                            writable_spilltmp_reg(),
1974
                            offset,
1975
                            x,
1976
                            ty,
1977
                        )
1978
                        .iter()
1979
                        .for_each(|i| i.emit(sink, emit_info, state));
1980
                        spilltmp_reg2()
1981
                    }
1982
                };
1983

1984
                Inst::Atomic {
1985
                    op: AtomicOP::store_op(ty),
1986
                    rd: t0,
1987
                    addr: p,
1988
                    src: store_value,
1989
                    amo: AMO::SeqCst,
1990
                }
1991
                .emit(sink, emit_info, state);
1992

1993
                // if store is not ok,retry.
1994
                Inst::CondBr {
1995
                    taken: CondBrTarget::Label(retry),
1996
                    not_taken: CondBrTarget::Fallthrough,
1997
                    kind: IntegerCompare {
1998
                        kind: IntCC::NotEqual,
1999
                        rs1: t0.to_reg(),
2000
                        rs2: zero_reg(),
2001
                    },
2002
                }
2003
                .emit(sink, emit_info, state);
2004
            }
2005

2006
            &Inst::LoadExtNameGot { rd, ref name } => {
2007
                // Load a PC-relative address into a register.
2008
                // RISC-V does this slightly differently from other arches. We emit a relocation
2009
                // with a label, instead of the symbol itself.
2010
                //
2011
                // See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
2012
                //
2013
                // Emit the following code:
2014
                // label:
2015
                //   auipc rd, 0              # R_RISCV_GOT_HI20 (symbol_name)
2016
                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2017

2018
                // Create the label that is going to be published to the final binary object.
2019
                let auipc_label = sink.get_label();
2020
                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2021

2022
                // Get the current PC.
2023
                sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
2024
                Inst::Auipc {
2025
                    rd,
2026
                    imm: Imm20::from_i32(0),
2027
                }
2028
                .emit_uncompressed(sink, emit_info, state, start_off);
2029

2030
                // The `ld` here, points to the `auipc` label instead of directly to the symbol.
2031
                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2032
                Inst::Load {
2033
                    rd,
2034
                    op: LoadOP::Ld,
2035
                    flags: MemFlags::trusted(),
2036
                    from: AMode::RegOffset(rd.to_reg(), 0),
2037
                }
2038
                .emit_uncompressed(sink, emit_info, state, start_off);
2039
            }
2040

2041
            &Inst::LoadExtNameFar {
2042
                rd,
2043
                ref name,
2044
                offset,
2045
            } => {
2046
                // In the non PIC sequence we relocate the absolute address into
2047
                // a preallocated space, load it into a register and jump over
2048
                // it.
2049
                //
2050
                // Emit the following code:
2051
                //   ld rd, label_data
2052
                //   j label_end
2053
                // label_data:
2054
                //   <8 byte space>           # ABS8
2055
                // label_end:
2056

2057
                let label_data = sink.get_label();
2058
                let label_end = sink.get_label();
2059

2060
                // Load the value from a label
2061
                Inst::Load {
2062
                    rd,
2063
                    op: LoadOP::Ld,
2064
                    flags: MemFlags::trusted(),
2065
                    from: AMode::Label(label_data),
2066
                }
2067
                .emit(sink, emit_info, state);
2068

2069
                // Jump over the data
2070
                Inst::gen_jump(label_end).emit(sink, emit_info, state);
2071

2072
                sink.bind_label(label_data, &mut state.ctrl_plane);
2073
                sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
2074
                sink.put8(0);
2075

2076
                sink.bind_label(label_end, &mut state.ctrl_plane);
2077
            }
2078

2079
            &Inst::LoadExtNameNear {
2080
                rd,
2081
                ref name,
2082
                offset,
2083
            } => {
2084
                // Emit the following code:
2085
                // label:
2086
                //   auipc rd, 0              # R_RISCV_PCREL_HI20 (symbol_name)
2087
                //   ld    rd, rd, 0          # R_RISCV_PCREL_LO12_I (label)
2088

2089
                let auipc_label = sink.get_label();
2090
                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2091

2092
                // Get the current PC.
2093
                sink.add_reloc(Reloc::RiscvPCRelHi20, &**name, offset);
2094
                Inst::Auipc {
2095
                    rd,
2096
                    imm: Imm20::from_i32(0),
2097
                }
2098
                .emit_uncompressed(sink, emit_info, state, start_off);
2099

2100
                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2101
                Inst::AluRRImm12 {
2102
                    alu_op: AluOPRRI::Addi,
2103
                    rd,
2104
                    rs: rd.to_reg(),
2105
                    imm12: Imm12::ZERO,
2106
                }
2107
                .emit_uncompressed(sink, emit_info, state, start_off);
2108
            }
2109

2110
            &Inst::LabelAddress { dst, label } => {
2111
                let offset = sink.cur_offset();
2112
                Inst::Auipc {
2113
                    rd: dst,
2114
                    imm: Imm20::from_i32(0),
2115
                }
2116
                .emit_uncompressed(sink, emit_info, state, start_off);
2117
                sink.use_label_at_offset(offset, label, LabelUse::PCRelHi20);
2118

2119
                let offset = sink.cur_offset();
2120
                Inst::AluRRImm12 {
2121
                    alu_op: AluOPRRI::Addi,
2122
                    rd: dst,
2123
                    rs: dst.to_reg(),
2124
                    imm12: Imm12::ZERO,
2125
                }
2126
                .emit_uncompressed(sink, emit_info, state, start_off);
2127
                sink.use_label_at_offset(offset, label, LabelUse::PCRelLo12I);
2128
            }
2129

2130
            &Inst::ElfTlsGetAddr { rd, ref name } => {
2131
                // RISC-V's TLS GD model is slightly different from other arches.
2132
                //
2133
                // We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
2134
                // of the address relative to the GOT entry. This relocation points to
2135
                // the symbol as usual.
2136
                //
2137
                // However when loading the bottom 12bits of the address, we need to
2138
                // use a label that points to the previous AUIPC instruction.
2139
                //
2140
                // label:
2141
                //    auipc a0,0                    # R_RISCV_TLS_GD_HI20 (symbol)
2142
                //    addi  a0,a0,0                 # R_RISCV_PCREL_LO12_I (label)
2143
                //
2144
                // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
2145

2146
                // Create the label that is going to be published to the final binary object.
2147
                let auipc_label = sink.get_label();
2148
                sink.bind_label(auipc_label, &mut state.ctrl_plane);
2149

2150
                // Get the current PC.
2151
                sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
2152
                Inst::Auipc {
2153
                    rd,
2154
                    imm: Imm20::from_i32(0),
2155
                }
2156
                .emit_uncompressed(sink, emit_info, state, start_off);
2157

2158
                // The `addi` here, points to the `auipc` label instead of directly to the symbol.
2159
                sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2160
                Inst::AluRRImm12 {
2161
                    alu_op: AluOPRRI::Addi,
2162
                    rd,
2163
                    rs: rd.to_reg(),
2164
                    imm12: Imm12::from_i16(0),
2165
                }
2166
                .emit_uncompressed(sink, emit_info, state, start_off);
2167

2168
                Inst::Call {
2169
                    info: Box::new(CallInfo::empty(
2170
                        ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2171
                        CallConv::SystemV,
2172
                    )),
2173
                }
2174
                .emit_uncompressed(sink, emit_info, state, start_off);
2175
            }
2176

2177
            &Inst::TrapIf {
2178
                rs1,
2179
                rs2,
2180
                cc,
2181
                trap_code,
2182
            } => {
2183
                let label_end = sink.get_label();
2184
                let cond = IntegerCompare { kind: cc, rs1, rs2 };
2185

2186
                // Jump over the trap if we the condition is false.
2187
                Inst::CondBr {
2188
                    taken: CondBrTarget::Label(label_end),
2189
                    not_taken: CondBrTarget::Fallthrough,
2190
                    kind: cond.inverse(),
2191
                }
2192
                .emit(sink, emit_info, state);
2193
                Inst::Udf { trap_code }.emit(sink, emit_info, state);
2194

2195
                sink.bind_label(label_end, &mut state.ctrl_plane);
2196
            }
2197
            &Inst::Udf { trap_code } => {
2198
                sink.add_trap(trap_code);
2199
                sink.put_data(Inst::TRAP_OPCODE);
2200
            }
2201
            &Inst::AtomicLoad { rd, ty, p } => {
2202
                // emit the fence.
2203
                Inst::Fence {
2204
                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2205
                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2206
                }
2207
                .emit(sink, emit_info, state);
2208
                // load.
2209
                Inst::Load {
2210
                    rd,
2211
                    op: LoadOP::from_type(ty),
2212
                    flags: MemFlags::new(),
2213
                    from: AMode::RegOffset(p, 0),
2214
                }
2215
                .emit(sink, emit_info, state);
2216
                Inst::Fence {
2217
                    pred: Inst::FENCE_REQ_R,
2218
                    succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2219
                }
2220
                .emit(sink, emit_info, state);
2221
            }
2222
            &Inst::AtomicStore { src, ty, p } => {
2223
                Inst::Fence {
2224
                    pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2225
                    succ: Inst::FENCE_REQ_W,
2226
                }
2227
                .emit(sink, emit_info, state);
2228
                Inst::Store {
2229
                    to: AMode::RegOffset(p, 0),
2230
                    op: StoreOP::from_type(ty),
2231
                    flags: MemFlags::new(),
2232
                    src,
2233
                }
2234
                .emit(sink, emit_info, state);
2235
            }
2236

2237
            &Inst::Popcnt {
2238
                sum,
2239
                tmp,
2240
                step,
2241
                rs,
2242
                ty,
2243
            } => {
2244
                // load 0 to sum , init.
2245
                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2246
                // load
2247
                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2248
                    .emit(sink, emit_info, state);
2249
                //
2250
                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2251
                Inst::AluRRImm12 {
2252
                    alu_op: AluOPRRI::Slli,
2253
                    rd: tmp,
2254
                    rs: tmp.to_reg(),
2255
                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2256
                }
2257
                .emit(sink, emit_info, state);
2258
                let label_done = sink.get_label();
2259
                let label_loop = sink.get_label();
2260
                sink.bind_label(label_loop, &mut state.ctrl_plane);
2261
                Inst::CondBr {
2262
                    taken: CondBrTarget::Label(label_done),
2263
                    not_taken: CondBrTarget::Fallthrough,
2264
                    kind: IntegerCompare {
2265
                        kind: IntCC::SignedLessThanOrEqual,
2266
                        rs1: step.to_reg(),
2267
                        rs2: zero_reg(),
2268
                    },
2269
                }
2270
                .emit(sink, emit_info, state);
2271
                // test and add sum.
2272
                {
2273
                    Inst::AluRRR {
2274
                        alu_op: AluOPRRR::And,
2275
                        rd: writable_spilltmp_reg2(),
2276
                        rs1: tmp.to_reg(),
2277
                        rs2: rs,
2278
                    }
2279
                    .emit(sink, emit_info, state);
2280
                    let label_over = sink.get_label();
2281
                    Inst::CondBr {
2282
                        taken: CondBrTarget::Label(label_over),
2283
                        not_taken: CondBrTarget::Fallthrough,
2284
                        kind: IntegerCompare {
2285
                            kind: IntCC::Equal,
2286
                            rs1: zero_reg(),
2287
                            rs2: spilltmp_reg2(),
2288
                        },
2289
                    }
2290
                    .emit(sink, emit_info, state);
2291
                    Inst::AluRRImm12 {
2292
                        alu_op: AluOPRRI::Addi,
2293
                        rd: sum,
2294
                        rs: sum.to_reg(),
2295
                        imm12: Imm12::ONE,
2296
                    }
2297
                    .emit(sink, emit_info, state);
2298
                    sink.bind_label(label_over, &mut state.ctrl_plane);
2299
                }
2300
                // set step and tmp.
2301
                {
2302
                    Inst::AluRRImm12 {
2303
                        alu_op: AluOPRRI::Addi,
2304
                        rd: step,
2305
                        rs: step.to_reg(),
2306
                        imm12: Imm12::from_i16(-1),
2307
                    }
2308
                    .emit(sink, emit_info, state);
2309
                    Inst::AluRRImm12 {
2310
                        alu_op: AluOPRRI::Srli,
2311
                        rd: tmp,
2312
                        rs: tmp.to_reg(),
2313
                        imm12: Imm12::ONE,
2314
                    }
2315
                    .emit(sink, emit_info, state);
2316
                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2317
                }
2318
                sink.bind_label(label_done, &mut state.ctrl_plane);
2319
            }
2320
            &Inst::Cltz {
2321
                sum,
2322
                tmp,
2323
                step,
2324
                rs,
2325
                leading,
2326
                ty,
2327
            } => {
2328
                // load 0 to sum , init.
2329
                Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2330
                // load
2331
                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2332
                    .emit(sink, emit_info, state);
2333
                //
2334
                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2335
                if leading {
2336
                    Inst::AluRRImm12 {
2337
                        alu_op: AluOPRRI::Slli,
2338
                        rd: tmp,
2339
                        rs: tmp.to_reg(),
2340
                        imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2341
                    }
2342
                    .emit(sink, emit_info, state);
2343
                }
2344
                let label_done = sink.get_label();
2345
                let label_loop = sink.get_label();
2346
                sink.bind_label(label_loop, &mut state.ctrl_plane);
2347
                Inst::CondBr {
2348
                    taken: CondBrTarget::Label(label_done),
2349
                    not_taken: CondBrTarget::Fallthrough,
2350
                    kind: IntegerCompare {
2351
                        kind: IntCC::SignedLessThanOrEqual,
2352
                        rs1: step.to_reg(),
2353
                        rs2: zero_reg(),
2354
                    },
2355
                }
2356
                .emit(sink, emit_info, state);
2357
                // test and add sum.
2358
                {
2359
                    Inst::AluRRR {
2360
                        alu_op: AluOPRRR::And,
2361
                        rd: writable_spilltmp_reg2(),
2362
                        rs1: tmp.to_reg(),
2363
                        rs2: rs,
2364
                    }
2365
                    .emit(sink, emit_info, state);
2366
                    Inst::CondBr {
2367
                        taken: CondBrTarget::Label(label_done),
2368
                        not_taken: CondBrTarget::Fallthrough,
2369
                        kind: IntegerCompare {
2370
                            kind: IntCC::NotEqual,
2371
                            rs1: zero_reg(),
2372
                            rs2: spilltmp_reg2(),
2373
                        },
2374
                    }
2375
                    .emit(sink, emit_info, state);
2376
                    Inst::AluRRImm12 {
2377
                        alu_op: AluOPRRI::Addi,
2378
                        rd: sum,
2379
                        rs: sum.to_reg(),
2380
                        imm12: Imm12::ONE,
2381
                    }
2382
                    .emit(sink, emit_info, state);
2383
                }
2384
                // set step and tmp.
2385
                {
2386
                    Inst::AluRRImm12 {
2387
                        alu_op: AluOPRRI::Addi,
2388
                        rd: step,
2389
                        rs: step.to_reg(),
2390
                        imm12: Imm12::from_i16(-1),
2391
                    }
2392
                    .emit(sink, emit_info, state);
2393
                    Inst::AluRRImm12 {
2394
                        alu_op: if leading {
2395
                            AluOPRRI::Srli
2396
                        } else {
2397
                            AluOPRRI::Slli
2398
                        },
2399
                        rd: tmp,
2400
                        rs: tmp.to_reg(),
2401
                        imm12: Imm12::ONE,
2402
                    }
2403
                    .emit(sink, emit_info, state);
2404
                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2405
                }
2406
                sink.bind_label(label_done, &mut state.ctrl_plane);
2407
            }
2408
            &Inst::Brev8 {
2409
                rs,
2410
                ty,
2411
                step,
2412
                tmp,
2413
                tmp2,
2414
                rd,
2415
            } => {
2416
                Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2417
                Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2418
                    .emit(sink, emit_info, state);
2419
                //
2420
                Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2421
                Inst::AluRRImm12 {
2422
                    alu_op: AluOPRRI::Slli,
2423
                    rd: tmp,
2424
                    rs: tmp.to_reg(),
2425
                    imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2426
                }
2427
                .emit(sink, emit_info, state);
2428
                Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2429
                Inst::AluRRImm12 {
2430
                    alu_op: AluOPRRI::Slli,
2431
                    rd: tmp2,
2432
                    rs: tmp2.to_reg(),
2433
                    imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2434
                }
2435
                .emit(sink, emit_info, state);
2436

2437
                let label_done = sink.get_label();
2438
                let label_loop = sink.get_label();
2439
                sink.bind_label(label_loop, &mut state.ctrl_plane);
2440
                Inst::CondBr {
2441
                    taken: CondBrTarget::Label(label_done),
2442
                    not_taken: CondBrTarget::Fallthrough,
2443
                    kind: IntegerCompare {
2444
                        kind: IntCC::SignedLessThanOrEqual,
2445
                        rs1: step.to_reg(),
2446
                        rs2: zero_reg(),
2447
                    },
2448
                }
2449
                .emit(sink, emit_info, state);
2450
                // test and set bit.
2451
                {
2452
                    Inst::AluRRR {
2453
                        alu_op: AluOPRRR::And,
2454
                        rd: writable_spilltmp_reg2(),
2455
                        rs1: tmp.to_reg(),
2456
                        rs2: rs,
2457
                    }
2458
                    .emit(sink, emit_info, state);
2459
                    let label_over = sink.get_label();
2460
                    Inst::CondBr {
2461
                        taken: CondBrTarget::Label(label_over),
2462
                        not_taken: CondBrTarget::Fallthrough,
2463
                        kind: IntegerCompare {
2464
                            kind: IntCC::Equal,
2465
                            rs1: zero_reg(),
2466
                            rs2: spilltmp_reg2(),
2467
                        },
2468
                    }
2469
                    .emit(sink, emit_info, state);
2470
                    Inst::AluRRR {
2471
                        alu_op: AluOPRRR::Or,
2472
                        rd,
2473
                        rs1: rd.to_reg(),
2474
                        rs2: tmp2.to_reg(),
2475
                    }
2476
                    .emit(sink, emit_info, state);
2477
                    sink.bind_label(label_over, &mut state.ctrl_plane);
2478
                }
2479
                // set step and tmp.
2480
                {
2481
                    Inst::AluRRImm12 {
2482
                        alu_op: AluOPRRI::Addi,
2483
                        rd: step,
2484
                        rs: step.to_reg(),
2485
                        imm12: Imm12::from_i16(-1),
2486
                    }
2487
                    .emit(sink, emit_info, state);
2488
                    Inst::AluRRImm12 {
2489
                        alu_op: AluOPRRI::Srli,
2490
                        rd: tmp,
2491
                        rs: tmp.to_reg(),
2492
                        imm12: Imm12::ONE,
2493
                    }
2494
                    .emit(sink, emit_info, state);
2495
                    {
2496
                        // reset tmp2
2497
                        // if (step %=8 == 0) then tmp2 = tmp2 >> 15
2498
                        // if (step %=8 != 0) then tmp2 = tmp2 << 1
2499
                        let label_over = sink.get_label();
2500
                        let label_sll_1 = sink.get_label();
2501
                        Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2502
                            .emit(sink, emit_info, state);
2503
                        Inst::AluRRR {
2504
                            alu_op: AluOPRRR::Rem,
2505
                            rd: writable_spilltmp_reg2(),
2506
                            rs1: step.to_reg(),
2507
                            rs2: spilltmp_reg2(),
2508
                        }
2509
                        .emit(sink, emit_info, state);
2510
                        Inst::CondBr {
2511
                            taken: CondBrTarget::Label(label_sll_1),
2512
                            not_taken: CondBrTarget::Fallthrough,
2513
                            kind: IntegerCompare {
2514
                                kind: IntCC::NotEqual,
2515
                                rs1: spilltmp_reg2(),
2516
                                rs2: zero_reg(),
2517
                            },
2518
                        }
2519
                        .emit(sink, emit_info, state);
2520
                        Inst::AluRRImm12 {
2521
                            alu_op: AluOPRRI::Srli,
2522
                            rd: tmp2,
2523
                            rs: tmp2.to_reg(),
2524
                            imm12: Imm12::from_i16(15),
2525
                        }
2526
                        .emit(sink, emit_info, state);
2527
                        Inst::gen_jump(label_over).emit(sink, emit_info, state);
2528
                        sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2529
                        Inst::AluRRImm12 {
2530
                            alu_op: AluOPRRI::Slli,
2531
                            rd: tmp2,
2532
                            rs: tmp2.to_reg(),
2533
                            imm12: Imm12::ONE,
2534
                        }
2535
                        .emit(sink, emit_info, state);
2536
                        sink.bind_label(label_over, &mut state.ctrl_plane);
2537
                    }
2538
                    Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2539
                }
2540
                sink.bind_label(label_done, &mut state.ctrl_plane);
2541
            }
2542
            &Inst::StackProbeLoop {
2543
                guard_size,
2544
                probe_count,
2545
                tmp: guard_size_tmp,
2546
            } => {
2547
                let step = writable_spilltmp_reg();
2548
                Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2549
                    .iter()
2550
                    .for_each(|i| i.emit(sink, emit_info, state));
2551
                Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2552
                    .iter()
2553
                    .for_each(|i| i.emit(sink, emit_info, state));
2554

2555
                let loop_start = sink.get_label();
2556
                let label_done = sink.get_label();
2557
                sink.bind_label(loop_start, &mut state.ctrl_plane);
2558
                Inst::CondBr {
2559
                    taken: CondBrTarget::Label(label_done),
2560
                    not_taken: CondBrTarget::Fallthrough,
2561
                    kind: IntegerCompare {
2562
                        kind: IntCC::UnsignedLessThanOrEqual,
2563
                        rs1: step.to_reg(),
2564
                        rs2: guard_size_tmp.to_reg(),
2565
                    },
2566
                }
2567
                .emit(sink, emit_info, state);
2568
                // compute address.
2569
                Inst::AluRRR {
2570
                    alu_op: AluOPRRR::Sub,
2571
                    rd: writable_spilltmp_reg2(),
2572
                    rs1: stack_reg(),
2573
                    rs2: step.to_reg(),
2574
                }
2575
                .emit(sink, emit_info, state);
2576
                Inst::Store {
2577
                    to: AMode::RegOffset(spilltmp_reg2(), 0),
2578
                    op: StoreOP::Sb,
2579
                    flags: MemFlags::new(),
2580
                    src: zero_reg(),
2581
                }
2582
                .emit(sink, emit_info, state);
2583
                // reset step.
2584
                Inst::AluRRR {
2585
                    alu_op: AluOPRRR::Sub,
2586
                    rd: step,
2587
                    rs1: step.to_reg(),
2588
                    rs2: guard_size_tmp.to_reg(),
2589
                }
2590
                .emit(sink, emit_info, state);
2591
                Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2592
                sink.bind_label(label_done, &mut state.ctrl_plane);
2593
            }
2594
            &Inst::VecAluRRRImm5 {
2595
                op,
2596
                vd,
2597
                vd_src,
2598
                imm,
2599
                vs2,
2600
                ref mask,
2601
                ..
2602
            } => {
2603
                debug_assert_eq!(vd.to_reg(), vd_src);
2604

2605
                sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2606
            }
2607
            &Inst::VecAluRRRR {
2608
                op,
2609
                vd,
2610
                vd_src,
2611
                vs1,
2612
                vs2,
2613
                ref mask,
2614
                ..
2615
            } => {
2616
                debug_assert_eq!(vd.to_reg(), vd_src);
2617

2618
                sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2619
            }
2620
            &Inst::VecAluRRR {
2621
                op,
2622
                vd,
2623
                vs1,
2624
                vs2,
2625
                ref mask,
2626
                ..
2627
            } => {
2628
                sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2629
            }
2630
            &Inst::VecAluRRImm5 {
2631
                op,
2632
                vd,
2633
                imm,
2634
                vs2,
2635
                ref mask,
2636
                ..
2637
            } => {
2638
                sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2639
            }
2640
            &Inst::VecAluRR {
2641
                op,
2642
                vd,
2643
                vs,
2644
                ref mask,
2645
                ..
2646
            } => {
2647
                sink.put4(encode_valu_rr(op, vd, vs, *mask));
2648
            }
2649
            &Inst::VecAluRImm5 {
2650
                op,
2651
                vd,
2652
                imm,
2653
                ref mask,
2654
                ..
2655
            } => {
2656
                sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2657
            }
2658
            &Inst::VecSetState { rd, ref vstate } => {
2659
                sink.put4(encode_vcfg_imm(
2660
                    0x57,
2661
                    rd.to_reg(),
2662
                    vstate.avl.unwrap_static(),
2663
                    &vstate.vtype,
2664
                ));
2665

2666
                // Update the current vector emit state.
2667
                state.vstate = EmitVState::Known(*vstate);
2668
            }
2669

2670
            &Inst::VecLoad {
2671
                eew,
2672
                to,
2673
                ref from,
2674
                ref mask,
2675
                flags,
2676
                ..
2677
            } => {
2678
                // Vector Loads don't support immediate offsets, so we need to load it into a register.
2679
                let addr = match from {
2680
                    VecAMode::UnitStride { base } => {
2681
                        let base_reg = base.get_base_register();
2682
                        let offset = base.get_offset_with_state(state);
2683

2684
                        // Reg+0 Offset can be directly encoded
2685
                        if let (Some(base_reg), 0) = (base_reg, offset) {
2686
                            base_reg
2687
                        } else {
2688
                            // Otherwise load the address it into a reg and load from it.
2689
                            let tmp = writable_spilltmp_reg();
2690
                            Inst::LoadAddr {
2691
                                rd: tmp,
2692
                                mem: *base,
2693
                            }
2694
                            .emit(sink, emit_info, state);
2695
                            tmp.to_reg()
2696
                        }
2697
                    }
2698
                };
2699

2700
                if let Some(trap_code) = flags.trap_code() {
2701
                    // Register the offset at which the actual load instruction starts.
2702
                    sink.add_trap(trap_code);
2703
                }
2704

2705
                sink.put4(encode_vmem_load(
2706
                    0x07,
2707
                    to.to_reg(),
2708
                    eew,
2709
                    addr,
2710
                    from.lumop(),
2711
                    *mask,
2712
                    from.mop(),
2713
                    from.nf(),
2714
                ));
2715
            }
2716

2717
            &Inst::VecStore {
2718
                eew,
2719
                ref to,
2720
                from,
2721
                ref mask,
2722
                flags,
2723
                ..
2724
            } => {
2725
                // Vector Stores don't support immediate offsets, so we need to load it into a register.
2726
                let addr = match to {
2727
                    VecAMode::UnitStride { base } => {
2728
                        let base_reg = base.get_base_register();
2729
                        let offset = base.get_offset_with_state(state);
2730

2731
                        // Reg+0 Offset can be directly encoded
2732
                        if let (Some(base_reg), 0) = (base_reg, offset) {
2733
                            base_reg
2734
                        } else {
2735
                            // Otherwise load the address it into a reg and load from it.
2736
                            let tmp = writable_spilltmp_reg();
2737
                            Inst::LoadAddr {
2738
                                rd: tmp,
2739
                                mem: *base,
2740
                            }
2741
                            .emit(sink, emit_info, state);
2742
                            tmp.to_reg()
2743
                        }
2744
                    }
2745
                };
2746

2747
                if let Some(trap_code) = flags.trap_code() {
2748
                    // Register the offset at which the actual load instruction starts.
2749
                    sink.add_trap(trap_code);
2750
                }
2751

2752
                sink.put4(encode_vmem_store(
2753
                    0x27,
2754
                    from,
2755
                    eew,
2756
                    addr,
2757
                    to.sumop(),
2758
                    *mask,
2759
                    to.mop(),
2760
                    to.nf(),
2761
                ));
2762
            }
2763

2764
            Inst::EmitIsland { needed_space } => {
2765
                if sink.island_needed(*needed_space) {
2766
                    let jump_around_label = sink.get_label();
2767
                    Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2768
                    sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
2769
                    sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2770
                }
2771
            }
2772
        }
2773
    }
2774
}
2775

2776
fn emit_return_call_common_sequence<T>(
2777
    sink: &mut MachBuffer<Inst>,
2778
    emit_info: &EmitInfo,
2779
    state: &mut EmitState,
2780
    info: &ReturnCallInfo<T>,
2781
) {
2782
    // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2783
    // So lets emit an island here if we need it.
2784
    //
2785
    // It is difficult to calculate exactly how many instructions are going to be emitted, so
2786
    // we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2787
    // were actually emitted.
2788
    let mut buffer = MachBuffer::new();
2789
    let mut fake_emit_state = state.clone();
2790

2791
    return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2792

2793
    // Finalize the buffer and get the number of bytes emitted.
2794
    let buffer = buffer.finish(&Default::default(), &mut Default::default());
2795
    let length = buffer.data().len() as u32;
2796

2797
    // And now emit the island inline with this instruction.
2798
    if sink.island_needed(length) {
2799
        let jump_around_label = sink.get_label();
2800
        Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2801
        sink.emit_island(length + 4, &mut state.ctrl_plane);
2802
        sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2803
    }
2804

2805
    // Now that we're done, emit the *actual* return sequence.
2806
    return_call_emit_impl(sink, emit_info, state, info);
2807
}
2808

2809
/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2810
fn return_call_emit_impl<T>(
2811
    sink: &mut MachBuffer<Inst>,
2812
    emit_info: &EmitInfo,
2813
    state: &mut EmitState,
2814
    info: &ReturnCallInfo<T>,
2815
) {
2816
    let sp_to_fp_offset = {
2817
        let frame_layout = state.frame_layout();
2818
        i64::from(
2819
            frame_layout.clobber_size
2820
                + frame_layout.fixed_frame_storage_size
2821
                + frame_layout.outgoing_args_size,
2822
        )
2823
    };
2824

2825
    let mut clobber_offset = sp_to_fp_offset - 8;
2826
    for reg in state.frame_layout().clobbered_callee_saves.clone() {
2827
        let rreg = reg.to_reg();
2828
        let ty = match rreg.class() {
2829
            RegClass::Int => I64,
2830
            RegClass::Float => F64,
2831
            RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2832
        };
2833

2834
        Inst::gen_load(
2835
            reg.map(Reg::from),
2836
            AMode::SPOffset(clobber_offset),
2837
            ty,
2838
            MemFlags::trusted(),
2839
        )
2840
        .emit(sink, emit_info, state);
2841

2842
        clobber_offset -= 8
2843
    }
2844

2845
    // Restore the link register and frame pointer
2846
    let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2847
    if setup_area_size > 0 {
2848
        Inst::gen_load(
2849
            writable_link_reg(),
2850
            AMode::SPOffset(sp_to_fp_offset + 8),
2851
            I64,
2852
            MemFlags::trusted(),
2853
        )
2854
        .emit(sink, emit_info, state);
2855

2856
        Inst::gen_load(
2857
            writable_fp_reg(),
2858
            AMode::SPOffset(sp_to_fp_offset),
2859
            I64,
2860
            MemFlags::trusted(),
2861
        )
2862
        .emit(sink, emit_info, state);
2863
    }
2864

2865
    // If we over-allocated the incoming args area in the prologue, resize down to what the callee
2866
    // is expecting.
2867
    let incoming_args_diff =
2868
        i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2869

2870
    // Increment SP all at once
2871
    let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2872
    if sp_increment > 0 {
2873
        for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2874
            inst.emit(sink, emit_info, state);
2875
        }
2876
    }
2877
}
2878

2879
Product

Resources

Company