Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/riscv64/inst/emit.rs
1693 views
1
//! Riscv64 ISA: binary code emission.
2
3
use crate::ir::{self, LibCall, TrapCode};
4
use crate::isa::riscv64::inst::*;
5
use crate::isa::riscv64::lower::isle::generated_code::{
6
CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7
};
8
use cranelift_control::ControlPlane;
9
10
pub struct EmitInfo {
11
#[expect(dead_code, reason = "may want to be used in the future")]
12
shared_flag: settings::Flags,
13
isa_flags: super::super::riscv_settings::Flags,
14
}
15
16
impl EmitInfo {
17
pub(crate) fn new(
18
shared_flag: settings::Flags,
19
isa_flags: super::super::riscv_settings::Flags,
20
) -> Self {
21
Self {
22
shared_flag,
23
isa_flags,
24
}
25
}
26
}
27
28
pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
29
u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
30
}
31
32
pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
33
let real_reg = m.to_real_reg().unwrap().hw_enc();
34
debug_assert!(real_reg >= 8 && real_reg < 16);
35
let compressed_reg = real_reg - 8;
36
u32::from(compressed_reg)
37
}
38
39
#[derive(Clone, Debug, PartialEq, Default)]
40
pub enum EmitVState {
41
#[default]
42
Unknown,
43
Known(VState),
44
}
45
46
/// State carried between emissions of a sequence of instructions.
47
#[derive(Default, Clone, Debug)]
48
pub struct EmitState {
49
/// The user stack map for the upcoming instruction, as provided to
50
/// `pre_safepoint()`.
51
user_stack_map: Option<ir::UserStackMap>,
52
53
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
54
/// optimized away at compiletime. See [cranelift_control].
55
ctrl_plane: ControlPlane,
56
57
/// Vector State
58
/// Controls the current state of the vector unit at the emission point.
59
vstate: EmitVState,
60
61
frame_layout: FrameLayout,
62
}
63
64
impl EmitState {
65
fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
66
self.user_stack_map.take()
67
}
68
69
fn clobber_vstate(&mut self) {
70
self.vstate = EmitVState::Unknown;
71
}
72
}
73
74
impl MachInstEmitState<Inst> for EmitState {
75
fn new(
76
abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
77
ctrl_plane: ControlPlane,
78
) -> Self {
79
EmitState {
80
user_stack_map: None,
81
ctrl_plane,
82
vstate: EmitVState::Unknown,
83
frame_layout: abi.frame_layout().clone(),
84
}
85
}
86
87
fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
88
self.user_stack_map = user_stack_map;
89
}
90
91
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
92
&mut self.ctrl_plane
93
}
94
95
fn take_ctrl_plane(self) -> ControlPlane {
96
self.ctrl_plane
97
}
98
99
fn on_new_block(&mut self) {
100
// Reset the vector state.
101
self.clobber_vstate();
102
}
103
104
fn frame_layout(&self) -> &FrameLayout {
105
&self.frame_layout
106
}
107
}
108
109
impl Inst {
110
/// Load int mask.
111
/// If ty is int then 0xff in rd.
112
pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
113
let mut insts = SmallInstVec::new();
114
assert!(ty.is_int() && ty.bits() <= 64);
115
match ty {
116
I64 => {
117
insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
118
}
119
I32 | I16 => {
120
insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
121
insts.push(Inst::Extend {
122
rd,
123
rn: rd.to_reg(),
124
signed: false,
125
from_bits: ty.bits() as u8,
126
to_bits: 64,
127
});
128
}
129
I8 => {
130
insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
131
}
132
_ => unreachable!("ty:{:?}", ty),
133
}
134
insts
135
}
136
/// inverse all bit
137
pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
138
Inst::AluRRImm12 {
139
alu_op: AluOPRRI::Xori,
140
rd,
141
rs,
142
imm12: Imm12::from_i16(-1),
143
}
144
}
145
146
/// Returns Some(VState) if this instruction is expecting a specific vector state
147
/// before emission.
148
fn expected_vstate(&self) -> Option<&VState> {
149
match self {
150
Inst::Nop0
151
| Inst::Nop4
152
| Inst::BrTable { .. }
153
| Inst::Auipc { .. }
154
| Inst::Fli { .. }
155
| Inst::Lui { .. }
156
| Inst::LoadInlineConst { .. }
157
| Inst::AluRRR { .. }
158
| Inst::FpuRRR { .. }
159
| Inst::AluRRImm12 { .. }
160
| Inst::CsrReg { .. }
161
| Inst::CsrImm { .. }
162
| Inst::Load { .. }
163
| Inst::Store { .. }
164
| Inst::Args { .. }
165
| Inst::Rets { .. }
166
| Inst::Ret { .. }
167
| Inst::Extend { .. }
168
| Inst::Call { .. }
169
| Inst::CallInd { .. }
170
| Inst::ReturnCall { .. }
171
| Inst::ReturnCallInd { .. }
172
| Inst::Jal { .. }
173
| Inst::CondBr { .. }
174
| Inst::LoadExtNameGot { .. }
175
| Inst::LoadExtNameNear { .. }
176
| Inst::LoadExtNameFar { .. }
177
| Inst::ElfTlsGetAddr { .. }
178
| Inst::LoadAddr { .. }
179
| Inst::Mov { .. }
180
| Inst::MovFromPReg { .. }
181
| Inst::Fence { .. }
182
| Inst::EBreak
183
| Inst::Udf { .. }
184
| Inst::FpuRR { .. }
185
| Inst::FpuRRRR { .. }
186
| Inst::Jalr { .. }
187
| Inst::Atomic { .. }
188
| Inst::Select { .. }
189
| Inst::AtomicCas { .. }
190
| Inst::RawData { .. }
191
| Inst::AtomicStore { .. }
192
| Inst::AtomicLoad { .. }
193
| Inst::AtomicRmwLoop { .. }
194
| Inst::TrapIf { .. }
195
| Inst::Unwind { .. }
196
| Inst::DummyUse { .. }
197
| Inst::LabelAddress { .. }
198
| Inst::Popcnt { .. }
199
| Inst::Cltz { .. }
200
| Inst::Brev8 { .. }
201
| Inst::StackProbeLoop { .. } => None,
202
203
// VecSetState does not expect any vstate, rather it updates it.
204
Inst::VecSetState { .. } => None,
205
206
// `vmv` instructions copy a set of registers and ignore vstate.
207
Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
208
209
Inst::VecAluRR { vstate, .. } |
210
Inst::VecAluRRR { vstate, .. } |
211
Inst::VecAluRRRR { vstate, .. } |
212
Inst::VecAluRImm5 { vstate, .. } |
213
Inst::VecAluRRImm5 { vstate, .. } |
214
Inst::VecAluRRRImm5 { vstate, .. } |
215
// TODO: Unit-stride loads and stores only need the AVL to be correct, not
216
// the full vtype. A future optimization could be to decouple these two when
217
// updating vstate. This would allow us to avoid emitting a VecSetState in
218
// some cases.
219
Inst::VecLoad { vstate, .. }
220
| Inst::VecStore { vstate, .. } => Some(vstate),
221
Inst::EmitIsland { .. } => None,
222
}
223
}
224
}
225
226
impl MachInstEmit for Inst {
227
type State = EmitState;
228
type Info = EmitInfo;
229
230
fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
231
// Check if we need to update the vector state before emitting this instruction
232
if let Some(expected) = self.expected_vstate() {
233
if state.vstate != EmitVState::Known(*expected) {
234
// Update the vector state.
235
Inst::VecSetState {
236
rd: writable_zero_reg(),
237
vstate: *expected,
238
}
239
.emit(sink, emit_info, state);
240
}
241
}
242
243
// N.B.: we *must* not exceed the "worst-case size" used to compute
244
// where to insert islands, except when islands are explicitly triggered
245
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
246
// to allow disabling the check for `JTSequence`, which is always
247
// emitted following an `EmitIsland`.
248
let mut start_off = sink.cur_offset();
249
250
// First try to emit this as a compressed instruction
251
let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
252
if res.is_none() {
253
// If we can't lets emit it as a normal instruction
254
self.emit_uncompressed(sink, emit_info, state, &mut start_off);
255
}
256
257
// We exclude br_table, call, return_call and try_call from
258
// these checks since they emit their own islands, and thus
259
// are allowed to exceed the worst case size.
260
let emits_own_island = match self {
261
Inst::BrTable { .. }
262
| Inst::ReturnCall { .. }
263
| Inst::ReturnCallInd { .. }
264
| Inst::Call { .. }
265
| Inst::CallInd { .. }
266
| Inst::EmitIsland { .. } => true,
267
_ => false,
268
};
269
if !emits_own_island {
270
let end_off = sink.cur_offset();
271
assert!(
272
(end_off - start_off) <= Inst::worst_case_size(),
273
"Inst:{:?} length:{} worst_case_size:{}",
274
self,
275
end_off - start_off,
276
Inst::worst_case_size()
277
);
278
}
279
}
280
281
fn pretty_print_inst(&self, state: &mut Self::State) -> String {
282
self.print_with_state(state)
283
}
284
}
285
286
impl Inst {
287
/// Tries to emit an instruction as compressed, if we can't return false.
288
fn try_emit_compressed(
289
&self,
290
sink: &mut MachBuffer<Inst>,
291
emit_info: &EmitInfo,
292
state: &mut EmitState,
293
start_off: &mut u32,
294
) -> Option<()> {
295
let has_m = emit_info.isa_flags.has_m();
296
let has_zba = emit_info.isa_flags.has_zba();
297
let has_zbb = emit_info.isa_flags.has_zbb();
298
let has_zca = emit_info.isa_flags.has_zca();
299
let has_zcb = emit_info.isa_flags.has_zcb();
300
let has_zcd = emit_info.isa_flags.has_zcd();
301
302
// Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
303
// to be enabled, so check it early.
304
if !has_zca {
305
return None;
306
}
307
308
fn reg_is_compressible(r: Reg) -> bool {
309
r.to_real_reg()
310
.map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
311
.unwrap_or(false)
312
}
313
314
match *self {
315
// C.ADD
316
Inst::AluRRR {
317
alu_op: AluOPRRR::Add,
318
rd,
319
rs1,
320
rs2,
321
} if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
322
&& rs1 != zero_reg()
323
&& rs2 != zero_reg() =>
324
{
325
// Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
326
// also swap rs1 with rs2 and we get an equivalent instruction. i.e we
327
// can also compress `add rd, rs, rd` into `c.add rd, rs`.
328
let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
329
330
sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
331
}
332
333
// C.MV
334
Inst::AluRRImm12 {
335
alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
336
rd,
337
rs,
338
imm12,
339
} if rd.to_reg() != rs
340
&& rd.to_reg() != zero_reg()
341
&& rs != zero_reg()
342
&& imm12.as_i16() == 0 =>
343
{
344
sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
345
}
346
347
// CA Ops
348
Inst::AluRRR {
349
alu_op:
350
alu_op @ (AluOPRRR::And
351
| AluOPRRR::Or
352
| AluOPRRR::Xor
353
| AluOPRRR::Addw
354
| AluOPRRR::Mul),
355
rd,
356
rs1,
357
rs2,
358
} if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
359
&& reg_is_compressible(rs1)
360
&& reg_is_compressible(rs2) =>
361
{
362
let op = match alu_op {
363
AluOPRRR::And => CaOp::CAnd,
364
AluOPRRR::Or => CaOp::COr,
365
AluOPRRR::Xor => CaOp::CXor,
366
AluOPRRR::Addw => CaOp::CAddw,
367
AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
368
_ => return None,
369
};
370
// The canonical expansion for these instruction has `rd == rs1`, but
371
// these are all commutative operations, so we can swap the operands.
372
let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
373
374
sink.put2(encode_ca_type(op, rd, src));
375
}
376
377
// The sub instructions are non commutative, so we can't swap the operands.
378
Inst::AluRRR {
379
alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
380
rd,
381
rs1,
382
rs2,
383
} if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
384
let op = match alu_op {
385
AluOPRRR::Sub => CaOp::CSub,
386
AluOPRRR::Subw => CaOp::CSubw,
387
_ => return None,
388
};
389
sink.put2(encode_ca_type(op, rd, rs2));
390
}
391
392
// c.j
393
//
394
// We don't have a separate JAL as that is only available in RV32C
395
Inst::Jal { label } => {
396
sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
397
sink.add_uncond_branch(*start_off, *start_off + 2, label);
398
sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
399
}
400
401
// c.jr
402
Inst::Jalr { rd, base, offset }
403
if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
404
{
405
sink.put2(encode_cr2_type(CrOp::CJr, base));
406
state.clobber_vstate();
407
}
408
409
// c.jalr
410
Inst::Jalr { rd, base, offset }
411
if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
412
{
413
sink.put2(encode_cr2_type(CrOp::CJalr, base));
414
state.clobber_vstate();
415
}
416
417
// c.ebreak
418
Inst::EBreak => {
419
sink.put2(encode_cr_type(
420
CrOp::CEbreak,
421
writable_zero_reg(),
422
zero_reg(),
423
));
424
}
425
426
// c.unimp
427
Inst::Udf { trap_code } => {
428
sink.add_trap(trap_code);
429
sink.put2(0x0000);
430
}
431
// c.addi16sp
432
//
433
// c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
434
// c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
435
// where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
436
// to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
437
// is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
438
Inst::AluRRImm12 {
439
alu_op: AluOPRRI::Addi,
440
rd,
441
rs,
442
imm12,
443
} if rd.to_reg() == rs
444
&& rs == stack_reg()
445
&& imm12.as_i16() != 0
446
&& (imm12.as_i16() % 16) == 0
447
&& Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
448
{
449
let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
450
sink.put2(encode_c_addi16sp(imm6));
451
}
452
453
// c.addi4spn
454
//
455
// c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
456
// immediate, scaled by 4, to the stack pointer, x2, and writes the result to
457
// rd. This instruction is used to generate pointers to stack-allocated variables
458
// and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
459
// the code points with nzuimm=0 are reserved.
460
Inst::AluRRImm12 {
461
alu_op: AluOPRRI::Addi,
462
rd,
463
rs,
464
imm12,
465
} if reg_is_compressible(rd.to_reg())
466
&& rs == stack_reg()
467
&& imm12.as_i16() != 0
468
&& (imm12.as_i16() % 4) == 0
469
&& u8::try_from(imm12.as_i16() / 4).is_ok() =>
470
{
471
let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
472
sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
473
}
474
475
// c.li
476
Inst::AluRRImm12 {
477
alu_op: AluOPRRI::Addi,
478
rd,
479
rs,
480
imm12,
481
} if rd.to_reg() != zero_reg() && rs == zero_reg() => {
482
let imm6 = Imm6::maybe_from_imm12(imm12)?;
483
sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
484
}
485
486
// c.addi
487
Inst::AluRRImm12 {
488
alu_op: AluOPRRI::Addi,
489
rd,
490
rs,
491
imm12,
492
} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
493
let imm6 = Imm6::maybe_from_imm12(imm12)?;
494
sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
495
}
496
497
// c.addiw
498
Inst::AluRRImm12 {
499
alu_op: AluOPRRI::Addiw,
500
rd,
501
rs,
502
imm12,
503
} if rd.to_reg() == rs && rs != zero_reg() => {
504
let imm6 = Imm6::maybe_from_imm12(imm12)?;
505
sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
506
}
507
508
// c.lui
509
//
510
// c.lui loads the non-zero 6-bit immediate field into bits 17–12
511
// of the destination register, clears the bottom 12 bits, and
512
// sign-extends bit 17 into all higher bits of the destination.
513
Inst::Lui { rd, imm: imm20 }
514
if rd.to_reg() != zero_reg()
515
&& rd.to_reg() != stack_reg()
516
&& imm20.as_i32() != 0 =>
517
{
518
// Check that the top bits are sign extended
519
let imm = imm20.as_i32() << 14 >> 14;
520
if imm != imm20.as_i32() {
521
return None;
522
}
523
let imm6 = Imm6::maybe_from_i32(imm)?;
524
sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
525
}
526
527
// c.slli
528
Inst::AluRRImm12 {
529
alu_op: AluOPRRI::Slli,
530
rd,
531
rs,
532
imm12,
533
} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
534
// The shift amount is unsigned, but we encode it as signed.
535
let shift = imm12.as_i16() & 0x3f;
536
let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
537
sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
538
}
539
540
// c.srli / c.srai
541
Inst::AluRRImm12 {
542
alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
543
rd,
544
rs,
545
imm12,
546
} if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
547
let op = match op {
548
AluOPRRI::Srli => CbOp::CSrli,
549
AluOPRRI::Srai => CbOp::CSrai,
550
_ => unreachable!(),
551
};
552
553
// The shift amount is unsigned, but we encode it as signed.
554
let shift = imm12.as_i16() & 0x3f;
555
let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
556
sink.put2(encode_cb_type(op, rd, imm6));
557
}
558
559
// c.zextb
560
//
561
// This is an alias for `andi rd, rd, 0xff`
562
Inst::AluRRImm12 {
563
alu_op: AluOPRRI::Andi,
564
rd,
565
rs,
566
imm12,
567
} if has_zcb
568
&& rd.to_reg() == rs
569
&& reg_is_compressible(rs)
570
&& imm12.as_i16() == 0xff =>
571
{
572
sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
573
}
574
575
// c.andi
576
Inst::AluRRImm12 {
577
alu_op: AluOPRRI::Andi,
578
rd,
579
rs,
580
imm12,
581
} if rd.to_reg() == rs && reg_is_compressible(rs) => {
582
let imm6 = Imm6::maybe_from_imm12(imm12)?;
583
sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
584
}
585
586
// Stack Based Loads
587
Inst::Load {
588
rd,
589
op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
590
from,
591
flags,
592
} if from.get_base_register() == Some(stack_reg())
593
&& (from.get_offset_with_state(state) % op.size()) == 0 =>
594
{
595
// We encode the offset in multiples of the load size.
596
let offset = from.get_offset_with_state(state);
597
let imm6 = u8::try_from(offset / op.size())
598
.ok()
599
.and_then(Uimm6::maybe_from_u8)?;
600
601
// Some additional constraints on these instructions.
602
//
603
// Integer loads are not allowed to target x0, but floating point loads
604
// are, since f0 is not a special register.
605
//
606
// Floating point loads are not included in the base Zca extension
607
// but in a separate Zcd extension. Both of these are part of the C Extension.
608
let rd_is_zero = rd.to_reg() == zero_reg();
609
let op = match op {
610
LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
611
LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
612
LoadOP::Fld if has_zcd => CiOp::CFldsp,
613
_ => return None,
614
};
615
616
if let Some(trap_code) = flags.trap_code() {
617
// Register the offset at which the actual load instruction starts.
618
sink.add_trap(trap_code);
619
}
620
sink.put2(encode_ci_sp_load(op, rd, imm6));
621
}
622
623
// Regular Loads
624
Inst::Load {
625
rd,
626
op:
627
op
628
@ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
629
from,
630
flags,
631
} if reg_is_compressible(rd.to_reg())
632
&& from
633
.get_base_register()
634
.map(reg_is_compressible)
635
.unwrap_or(false)
636
&& (from.get_offset_with_state(state) % op.size()) == 0 =>
637
{
638
let base = from.get_base_register().unwrap();
639
640
// We encode the offset in multiples of the store size.
641
let offset = from.get_offset_with_state(state);
642
let offset = u8::try_from(offset / op.size()).ok()?;
643
644
// We mix two different formats here.
645
//
646
// c.lw / c.ld / c.fld instructions are available in the standard Zca
647
// extension using the CL format.
648
//
649
// c.lbu / c.lhu / c.lh are only available in the Zcb extension and
650
// are also encoded differently. Technically they each have a different
651
// format, but they are similar enough that we can group them.
652
let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
653
let encoded = if is_zcb_load {
654
if !has_zcb {
655
return None;
656
}
657
658
let op = match op {
659
LoadOP::Lbu => ZcbMemOp::CLbu,
660
LoadOP::Lhu => ZcbMemOp::CLhu,
661
LoadOP::Lh => ZcbMemOp::CLh,
662
_ => unreachable!(),
663
};
664
665
// Byte stores & loads have 2 bits of immediate offset. Halfword stores
666
// and loads only have 1 bit.
667
let imm2 = Uimm2::maybe_from_u8(offset)?;
668
if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
669
return None;
670
}
671
672
encode_zcbmem_load(op, rd, base, imm2)
673
} else {
674
// Floating point loads are not included in the base Zca extension
675
// but in a separate Zcd extension. Both of these are part of the C Extension.
676
let op = match op {
677
LoadOP::Lw => ClOp::CLw,
678
LoadOP::Ld => ClOp::CLd,
679
LoadOP::Fld if has_zcd => ClOp::CFld,
680
_ => return None,
681
};
682
let imm5 = Uimm5::maybe_from_u8(offset)?;
683
684
encode_cl_type(op, rd, base, imm5)
685
};
686
687
if let Some(trap_code) = flags.trap_code() {
688
// Register the offset at which the actual load instruction starts.
689
sink.add_trap(trap_code);
690
}
691
sink.put2(encoded);
692
}
693
694
// Stack Based Stores
695
Inst::Store {
696
src,
697
op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
698
to,
699
flags,
700
} if to.get_base_register() == Some(stack_reg())
701
&& (to.get_offset_with_state(state) % op.size()) == 0 =>
702
{
703
// We encode the offset in multiples of the store size.
704
let offset = to.get_offset_with_state(state);
705
let imm6 = u8::try_from(offset / op.size())
706
.ok()
707
.and_then(Uimm6::maybe_from_u8)?;
708
709
// Floating point stores are not included in the base Zca extension
710
// but in a separate Zcd extension. Both of these are part of the C Extension.
711
let op = match op {
712
StoreOP::Sw => CssOp::CSwsp,
713
StoreOP::Sd => CssOp::CSdsp,
714
StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
715
_ => return None,
716
};
717
718
if let Some(trap_code) = flags.trap_code() {
719
// Register the offset at which the actual load instruction starts.
720
sink.add_trap(trap_code);
721
}
722
sink.put2(encode_css_type(op, src, imm6));
723
}
724
725
// Regular Stores
726
Inst::Store {
727
src,
728
op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
729
to,
730
flags,
731
} if reg_is_compressible(src)
732
&& to
733
.get_base_register()
734
.map(reg_is_compressible)
735
.unwrap_or(false)
736
&& (to.get_offset_with_state(state) % op.size()) == 0 =>
737
{
738
let base = to.get_base_register().unwrap();
739
740
// We encode the offset in multiples of the store size.
741
let offset = to.get_offset_with_state(state);
742
let offset = u8::try_from(offset / op.size()).ok()?;
743
744
// We mix two different formats here.
745
//
746
// c.sw / c.sd / c.fsd instructions are available in the standard Zca
747
// extension using the CL format.
748
//
749
// c.sb / c.sh are only available in the Zcb extension and are also
750
// encoded differently.
751
let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
752
let encoded = if is_zcb_store {
753
if !has_zcb {
754
return None;
755
}
756
757
let op = match op {
758
StoreOP::Sh => ZcbMemOp::CSh,
759
StoreOP::Sb => ZcbMemOp::CSb,
760
_ => unreachable!(),
761
};
762
763
// Byte stores & loads have 2 bits of immediate offset. Halfword stores
764
// and loads only have 1 bit.
765
let imm2 = Uimm2::maybe_from_u8(offset)?;
766
if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
767
return None;
768
}
769
770
encode_zcbmem_store(op, src, base, imm2)
771
} else {
772
// Floating point stores are not included in the base Zca extension
773
// but in a separate Zcd extension. Both of these are part of the C Extension.
774
let op = match op {
775
StoreOP::Sw => CsOp::CSw,
776
StoreOP::Sd => CsOp::CSd,
777
StoreOP::Fsd if has_zcd => CsOp::CFsd,
778
_ => return None,
779
};
780
let imm5 = Uimm5::maybe_from_u8(offset)?;
781
782
encode_cs_type(op, src, base, imm5)
783
};
784
785
if let Some(trap_code) = flags.trap_code() {
786
// Register the offset at which the actual load instruction starts.
787
sink.add_trap(trap_code);
788
}
789
sink.put2(encoded);
790
}
791
792
// c.not
793
//
794
// This is an alias for `xori rd, rd, -1`
795
Inst::AluRRImm12 {
796
alu_op: AluOPRRI::Xori,
797
rd,
798
rs,
799
imm12,
800
} if has_zcb
801
&& rd.to_reg() == rs
802
&& reg_is_compressible(rs)
803
&& imm12.as_i16() == -1 =>
804
{
805
sink.put2(encode_cszn_type(CsznOp::CNot, rd));
806
}
807
808
// c.sext.b / c.sext.h / c.zext.h
809
//
810
// These are all the extend instructions present in `Zcb`, they
811
// also require `Zbb` since they aren't available in the base ISA.
812
Inst::AluRRImm12 {
813
alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
814
rd,
815
rs,
816
imm12,
817
} if has_zcb
818
&& has_zbb
819
&& rd.to_reg() == rs
820
&& reg_is_compressible(rs)
821
&& imm12.as_i16() == 0 =>
822
{
823
let op = match alu_op {
824
AluOPRRI::Sextb => CsznOp::CSextb,
825
AluOPRRI::Sexth => CsznOp::CSexth,
826
AluOPRRI::Zexth => CsznOp::CZexth,
827
_ => unreachable!(),
828
};
829
sink.put2(encode_cszn_type(op, rd));
830
}
831
832
// c.zext.w
833
//
834
// This is an alias for `add.uw rd, rd, zero`
835
Inst::AluRRR {
836
alu_op: AluOPRRR::Adduw,
837
rd,
838
rs1,
839
rs2,
840
} if has_zcb
841
&& has_zba
842
&& rd.to_reg() == rs1
843
&& reg_is_compressible(rs1)
844
&& rs2 == zero_reg() =>
845
{
846
sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
847
}
848
849
_ => return None,
850
}
851
852
return Some(());
853
}
854
855
fn emit_uncompressed(
856
&self,
857
sink: &mut MachBuffer<Inst>,
858
emit_info: &EmitInfo,
859
state: &mut EmitState,
860
start_off: &mut u32,
861
) {
862
match self {
863
&Inst::Nop0 => {
864
// do nothing
865
}
866
// Addi x0, x0, 0
867
&Inst::Nop4 => {
868
let x = Inst::AluRRImm12 {
869
alu_op: AluOPRRI::Addi,
870
rd: Writable::from_reg(zero_reg()),
871
rs: zero_reg(),
872
imm12: Imm12::ZERO,
873
};
874
x.emit(sink, emit_info, state)
875
}
876
&Inst::RawData { ref data } => {
877
// Right now we only put a u32 or u64 in this instruction.
878
// It is not very long, no need to check if need `emit_island`.
879
// If data is very long , this is a bug because RawData is typically
880
// use to load some data and rely on some position in the code stream.
881
// and we may exceed `Inst::worst_case_size`.
882
// for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
883
sink.put_data(&data[..]);
884
}
885
&Inst::Lui { rd, ref imm } => {
886
let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
887
sink.put4(x);
888
}
889
&Inst::Fli { rd, width, imm } => {
890
sink.put4(encode_fli(width, imm, rd));
891
}
892
&Inst::LoadInlineConst { rd, ty, imm } => {
893
let data = &imm.to_le_bytes()[..ty.bytes() as usize];
894
895
let label_data: MachLabel = sink.get_label();
896
let label_end: MachLabel = sink.get_label();
897
898
// Load into rd
899
Inst::Load {
900
rd,
901
op: LoadOP::from_type(ty),
902
flags: MemFlags::new(),
903
from: AMode::Label(label_data),
904
}
905
.emit(sink, emit_info, state);
906
907
// Jump over the inline pool
908
Inst::gen_jump(label_end).emit(sink, emit_info, state);
909
910
// Emit the inline data
911
sink.bind_label(label_data, &mut state.ctrl_plane);
912
Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
913
914
sink.bind_label(label_end, &mut state.ctrl_plane);
915
}
916
&Inst::FpuRR {
917
alu_op,
918
width,
919
frm,
920
rd,
921
rs,
922
} => {
923
if alu_op.is_convert_to_int() {
924
sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
925
}
926
sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
927
}
928
&Inst::FpuRRRR {
929
alu_op,
930
rd,
931
rs1,
932
rs2,
933
rs3,
934
frm,
935
width,
936
} => {
937
sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
938
}
939
&Inst::FpuRRR {
940
alu_op,
941
width,
942
frm,
943
rd,
944
rs1,
945
rs2,
946
} => {
947
sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
948
}
949
&Inst::Unwind { ref inst } => {
950
sink.add_unwind(inst.clone());
951
}
952
&Inst::DummyUse { .. } => {
953
// This has already been handled by Inst::allocate.
954
}
955
&Inst::AluRRR {
956
alu_op,
957
rd,
958
rs1,
959
rs2,
960
} => {
961
let (rs1, rs2) = if alu_op.reverse_rs() {
962
(rs2, rs1)
963
} else {
964
(rs1, rs2)
965
};
966
967
sink.put4(encode_r_type(
968
alu_op.op_code(),
969
rd,
970
alu_op.funct3(),
971
rs1,
972
rs2,
973
alu_op.funct7(),
974
));
975
}
976
&Inst::AluRRImm12 {
977
alu_op,
978
rd,
979
rs,
980
imm12,
981
} => {
982
let x = alu_op.op_code()
983
| reg_to_gpr_num(rd.to_reg()) << 7
984
| alu_op.funct3() << 12
985
| reg_to_gpr_num(rs) << 15
986
| alu_op.imm12(imm12) << 20;
987
sink.put4(x);
988
}
989
&Inst::CsrReg { op, rd, rs, csr } => {
990
sink.put4(encode_csr_reg(op, rd, rs, csr));
991
}
992
&Inst::CsrImm { op, rd, csr, imm } => {
993
sink.put4(encode_csr_imm(op, rd, csr, imm));
994
}
995
&Inst::Load {
996
rd,
997
op: LoadOP::Flh,
998
from,
999
flags,
1000
} if !emit_info.isa_flags.has_zfhmin() => {
1001
// flh unavailable, use an integer load instead
1002
Inst::Load {
1003
rd: writable_spilltmp_reg(),
1004
op: LoadOP::Lh,
1005
flags,
1006
from,
1007
}
1008
.emit(sink, emit_info, state);
1009
// NaN-box the `f16` before loading it into the floating-point
1010
// register with a 32-bit `fmv`.
1011
Inst::Lui {
1012
rd: writable_spilltmp_reg2(),
1013
imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),
1014
}
1015
.emit(sink, emit_info, state);
1016
Inst::AluRRR {
1017
alu_op: AluOPRRR::Or,
1018
rd: writable_spilltmp_reg(),
1019
rs1: spilltmp_reg(),
1020
rs2: spilltmp_reg2(),
1021
}
1022
.emit(sink, emit_info, state);
1023
Inst::FpuRR {
1024
alu_op: FpuOPRR::FmvFmtX,
1025
width: FpuOPWidth::S,
1026
frm: FRM::RNE,
1027
rd,
1028
rs: spilltmp_reg(),
1029
}
1030
.emit(sink, emit_info, state);
1031
}
1032
&Inst::Load {
1033
rd,
1034
op,
1035
from,
1036
flags,
1037
} => {
1038
let base = from.get_base_register();
1039
let offset = from.get_offset_with_state(state);
1040
let offset_imm12 = Imm12::maybe_from_i64(offset);
1041
let label = from.get_label_with_sink(sink);
1042
1043
let (addr, imm12) = match (base, offset_imm12, label) {
1044
// When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
1045
(Some(base), Some(imm12), None) => (base, imm12),
1046
1047
// Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
1048
// register and load from that.
1049
(Some(_), None, None) => {
1050
let tmp = writable_spilltmp_reg();
1051
Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
1052
(tmp.to_reg(), Imm12::ZERO)
1053
}
1054
1055
// If the AMode contains a label we can emit an internal relocation that gets
1056
// resolved with the correct address later.
1057
(None, Some(imm), Some(label)) => {
1058
debug_assert_eq!(imm.as_i16(), 0);
1059
1060
// Get the current PC.
1061
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1062
Inst::Auipc {
1063
rd,
1064
imm: Imm20::ZERO,
1065
}
1066
.emit_uncompressed(sink, emit_info, state, start_off);
1067
1068
// Emit a relocation for the load. This patches the offset into the instruction.
1069
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1070
1071
// Imm12 here is meaningless since it's going to get replaced.
1072
(rd.to_reg(), Imm12::ZERO)
1073
}
1074
1075
// These cases are impossible with the current AModes that we have. We either
1076
// always have a register, or always have a label. Never both, and never neither.
1077
(None, None, None)
1078
| (None, Some(_), None)
1079
| (Some(_), None, Some(_))
1080
| (Some(_), Some(_), Some(_))
1081
| (None, None, Some(_)) => {
1082
unreachable!("Invalid load address")
1083
}
1084
};
1085
1086
if let Some(trap_code) = flags.trap_code() {
1087
// Register the offset at which the actual load instruction starts.
1088
sink.add_trap(trap_code);
1089
}
1090
1091
sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1092
}
1093
&Inst::Store {
1094
op: StoreOP::Fsh,
1095
src,
1096
flags,
1097
to,
1098
} if !emit_info.isa_flags.has_zfhmin() => {
1099
// fsh unavailable, use an integer store instead
1100
Inst::FpuRR {
1101
alu_op: FpuOPRR::FmvXFmt,
1102
width: FpuOPWidth::S,
1103
frm: FRM::RNE,
1104
rd: writable_spilltmp_reg(),
1105
rs: src,
1106
}
1107
.emit(sink, emit_info, state);
1108
Inst::Store {
1109
to,
1110
op: StoreOP::Sh,
1111
flags,
1112
src: spilltmp_reg(),
1113
}
1114
.emit(sink, emit_info, state);
1115
}
1116
&Inst::Store { op, src, flags, to } => {
1117
let base = to.get_base_register();
1118
let offset = to.get_offset_with_state(state);
1119
let offset_imm12 = Imm12::maybe_from_i64(offset);
1120
1121
let (addr, imm12) = match (base, offset_imm12) {
1122
// If the offset fits into an imm12 we can directly encode it.
1123
(Some(base), Some(imm12)) => (base, imm12),
1124
// Otherwise load the address it into a reg and load from it.
1125
_ => {
1126
let tmp = writable_spilltmp_reg();
1127
Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1128
(tmp.to_reg(), Imm12::ZERO)
1129
}
1130
};
1131
1132
if let Some(trap_code) = flags.trap_code() {
1133
// Register the offset at which the actual load instruction starts.
1134
sink.add_trap(trap_code);
1135
}
1136
1137
sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1138
}
1139
&Inst::Args { .. } | &Inst::Rets { .. } => {
1140
// Nothing: this is a pseudoinstruction that serves
1141
// only to constrain registers at a certain point.
1142
}
1143
&Inst::Ret {} => {
1144
// RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1145
// `jalr x0, x1, 0` that jumps to the return address.
1146
Inst::Jalr {
1147
rd: writable_zero_reg(),
1148
base: link_reg(),
1149
offset: Imm12::ZERO,
1150
}
1151
.emit(sink, emit_info, state);
1152
}
1153
1154
&Inst::Extend {
1155
rd,
1156
rn,
1157
signed,
1158
from_bits,
1159
to_bits: _to_bits,
1160
} => {
1161
let mut insts = SmallInstVec::new();
1162
let shift_bits = (64 - from_bits) as i16;
1163
let is_u8 = || from_bits == 8 && signed == false;
1164
if is_u8() {
1165
// special for u8.
1166
insts.push(Inst::AluRRImm12 {
1167
alu_op: AluOPRRI::Andi,
1168
rd,
1169
rs: rn,
1170
imm12: Imm12::from_i16(255),
1171
});
1172
} else {
1173
insts.push(Inst::AluRRImm12 {
1174
alu_op: AluOPRRI::Slli,
1175
rd,
1176
rs: rn,
1177
imm12: Imm12::from_i16(shift_bits),
1178
});
1179
insts.push(Inst::AluRRImm12 {
1180
alu_op: if signed {
1181
AluOPRRI::Srai
1182
} else {
1183
AluOPRRI::Srli
1184
},
1185
rd,
1186
rs: rd.to_reg(),
1187
imm12: Imm12::from_i16(shift_bits),
1188
});
1189
}
1190
insts
1191
.into_iter()
1192
.for_each(|i| i.emit(sink, emit_info, state));
1193
}
1194
1195
&Inst::Call { ref info } => {
1196
sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1197
1198
Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1199
.into_iter()
1200
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1201
1202
if let Some(s) = state.take_stack_map() {
1203
let offset = sink.cur_offset();
1204
sink.push_user_stack_map(state, offset, s);
1205
}
1206
1207
if let Some(try_call) = info.try_call_info.as_ref() {
1208
sink.add_try_call_site(
1209
Some(state.frame_layout.sp_to_fp()),
1210
try_call.exception_handlers(&state.frame_layout),
1211
);
1212
} else {
1213
sink.add_call_site();
1214
}
1215
1216
let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1217
if callee_pop_size > 0 {
1218
for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1219
inst.emit(sink, emit_info, state);
1220
}
1221
}
1222
1223
// Load any stack-carried return values.
1224
info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1225
state.frame_layout().stackslots_size,
1226
|inst| inst.emit(sink, emit_info, state),
1227
|needed_space| Some(Inst::EmitIsland { needed_space }),
1228
);
1229
1230
// If this is a try-call, jump to the continuation
1231
// (normal-return) block.
1232
if let Some(try_call) = info.try_call_info.as_ref() {
1233
let jmp = Inst::Jal {
1234
label: try_call.continuation,
1235
};
1236
jmp.emit(sink, emit_info, state);
1237
}
1238
1239
*start_off = sink.cur_offset();
1240
}
1241
&Inst::CallInd { ref info } => {
1242
Inst::Jalr {
1243
rd: writable_link_reg(),
1244
base: info.dest,
1245
offset: Imm12::ZERO,
1246
}
1247
.emit(sink, emit_info, state);
1248
1249
if let Some(s) = state.take_stack_map() {
1250
let offset = sink.cur_offset();
1251
sink.push_user_stack_map(state, offset, s);
1252
}
1253
1254
if let Some(try_call) = info.try_call_info.as_ref() {
1255
sink.add_try_call_site(
1256
Some(state.frame_layout.sp_to_fp()),
1257
try_call.exception_handlers(&state.frame_layout),
1258
);
1259
} else {
1260
sink.add_call_site();
1261
}
1262
1263
let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1264
if callee_pop_size > 0 {
1265
for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1266
inst.emit(sink, emit_info, state);
1267
}
1268
}
1269
1270
// Load any stack-carried return values.
1271
info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1272
state.frame_layout().stackslots_size,
1273
|inst| inst.emit(sink, emit_info, state),
1274
|needed_space| Some(Inst::EmitIsland { needed_space }),
1275
);
1276
1277
// If this is a try-call, jump to the continuation
1278
// (normal-return) block.
1279
if let Some(try_call) = info.try_call_info.as_ref() {
1280
let jmp = Inst::Jal {
1281
label: try_call.continuation,
1282
};
1283
jmp.emit(sink, emit_info, state);
1284
}
1285
1286
*start_off = sink.cur_offset();
1287
}
1288
1289
&Inst::ReturnCall { ref info } => {
1290
emit_return_call_common_sequence(sink, emit_info, state, info);
1291
1292
sink.add_call_site();
1293
sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1294
Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1295
.into_iter()
1296
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1297
}
1298
1299
&Inst::ReturnCallInd { ref info } => {
1300
emit_return_call_common_sequence(sink, emit_info, state, &info);
1301
1302
Inst::Jalr {
1303
rd: writable_zero_reg(),
1304
base: info.dest,
1305
offset: Imm12::ZERO,
1306
}
1307
.emit(sink, emit_info, state);
1308
}
1309
&Inst::Jal { label } => {
1310
sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1311
sink.add_uncond_branch(*start_off, *start_off + 4, label);
1312
sink.put4(0b1101111);
1313
state.clobber_vstate();
1314
}
1315
&Inst::CondBr {
1316
taken,
1317
not_taken,
1318
kind,
1319
} => {
1320
match taken {
1321
CondBrTarget::Label(label) => {
1322
let code = kind.emit();
1323
let code_inverse = kind.inverse().emit().to_le_bytes();
1324
sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1325
sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1326
sink.put4(code);
1327
}
1328
CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1329
}
1330
1331
match not_taken {
1332
CondBrTarget::Label(label) => {
1333
Inst::gen_jump(label).emit(sink, emit_info, state)
1334
}
1335
CondBrTarget::Fallthrough => {}
1336
};
1337
}
1338
1339
&Inst::Mov { rd, rm, ty } => {
1340
debug_assert_eq!(rd.to_reg().class(), rm.class());
1341
if rd.to_reg() == rm {
1342
return;
1343
}
1344
1345
match rm.class() {
1346
RegClass::Int => Inst::AluRRImm12 {
1347
alu_op: AluOPRRI::Addi,
1348
rd,
1349
rs: rm,
1350
imm12: Imm12::ZERO,
1351
},
1352
RegClass::Float => Inst::FpuRRR {
1353
alu_op: FpuOPRRR::Fsgnj,
1354
width: FpuOPWidth::try_from(ty).unwrap(),
1355
frm: FRM::RNE,
1356
rd,
1357
rs1: rm,
1358
rs2: rm,
1359
},
1360
RegClass::Vector => Inst::VecAluRRImm5 {
1361
op: VecAluOpRRImm5::VmvrV,
1362
vd: rd,
1363
vs2: rm,
1364
// Imm 0 means copy 1 register.
1365
imm: Imm5::maybe_from_i8(0).unwrap(),
1366
mask: VecOpMasking::Disabled,
1367
// Vstate for this instruction is ignored.
1368
vstate: VState::from_type(ty),
1369
},
1370
}
1371
.emit(sink, emit_info, state);
1372
}
1373
1374
&Inst::MovFromPReg { rd, rm } => {
1375
Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1376
}
1377
1378
&Inst::BrTable {
1379
index,
1380
tmp1,
1381
tmp2,
1382
ref targets,
1383
} => {
1384
let ext_index = writable_spilltmp_reg();
1385
1386
let label_compute_target = sink.get_label();
1387
1388
// The default target is passed in as the 0th element of `targets`
1389
// separate it here for clarity.
1390
let default_target = targets[0];
1391
let targets = &targets[1..];
1392
1393
// We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1394
// now if we need one.
1395
//
1396
// The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1397
// Check if we need to emit a jump table here to support that jump.
1398
let inst_count = 12 + (targets.len() * 2);
1399
let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1400
if sink.island_needed(distance) {
1401
let jump_around_label = sink.get_label();
1402
Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1403
sink.emit_island(distance + 4, &mut state.ctrl_plane);
1404
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1405
}
1406
1407
// We emit a bounds check on the index, if the index is larger than the number of
1408
// jump table entries, we jump to the default block. Otherwise we compute a jump
1409
// offset by multiplying the index by 8 (the size of each entry) and then jump to
1410
// that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1411
//
1412
// Build the following sequence:
1413
//
1414
// extend_index:
1415
// zext.w ext_index, index
1416
// bounds_check:
1417
// li tmp, n_labels
1418
// bltu ext_index, tmp, compute_target
1419
// jump_to_default_block:
1420
// auipc pc, 0
1421
// jalr zero, pc, default_block
1422
// compute_target:
1423
// auipc pc, 0
1424
// slli tmp, ext_index, 3
1425
// add pc, pc, tmp
1426
// jalr zero, pc, 0x10
1427
// jump_table:
1428
// ; This repeats for each entry in the jumptable
1429
// auipc pc, 0
1430
// jalr zero, pc, block_target
1431
1432
// Extend the index to 64 bits.
1433
//
1434
// This prevents us branching on the top 32 bits of the index, which
1435
// are undefined.
1436
Inst::Extend {
1437
rd: ext_index,
1438
rn: index,
1439
signed: false,
1440
from_bits: 32,
1441
to_bits: 64,
1442
}
1443
.emit(sink, emit_info, state);
1444
1445
// Bounds check.
1446
//
1447
// Check if the index passed in is larger than the number of jumptable
1448
// entries that we have. If it is, we fallthrough to a jump into the
1449
// default block.
1450
Inst::load_constant_u32(tmp2, targets.len() as u64)
1451
.iter()
1452
.for_each(|i| i.emit(sink, emit_info, state));
1453
Inst::CondBr {
1454
taken: CondBrTarget::Label(label_compute_target),
1455
not_taken: CondBrTarget::Fallthrough,
1456
kind: IntegerCompare {
1457
kind: IntCC::UnsignedLessThan,
1458
rs1: ext_index.to_reg(),
1459
rs2: tmp2.to_reg(),
1460
},
1461
}
1462
.emit(sink, emit_info, state);
1463
1464
sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1465
Inst::construct_auipc_and_jalr(None, tmp2, 0)
1466
.iter()
1467
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1468
1469
// Compute the jump table offset.
1470
// We need to emit a PC relative offset,
1471
sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1472
1473
// Get the current PC.
1474
Inst::Auipc {
1475
rd: tmp1,
1476
imm: Imm20::ZERO,
1477
}
1478
.emit_uncompressed(sink, emit_info, state, start_off);
1479
1480
// These instructions must be emitted as uncompressed since we
1481
// are manually computing the offset from the PC.
1482
1483
// Multiply the index by 8, since that is the size in
1484
// bytes of each jump table entry
1485
Inst::AluRRImm12 {
1486
alu_op: AluOPRRI::Slli,
1487
rd: tmp2,
1488
rs: ext_index.to_reg(),
1489
imm12: Imm12::from_i16(3),
1490
}
1491
.emit_uncompressed(sink, emit_info, state, start_off);
1492
1493
// Calculate the base of the jump, PC + the offset from above.
1494
Inst::AluRRR {
1495
alu_op: AluOPRRR::Add,
1496
rd: tmp1,
1497
rs1: tmp1.to_reg(),
1498
rs2: tmp2.to_reg(),
1499
}
1500
.emit_uncompressed(sink, emit_info, state, start_off);
1501
1502
// Jump to the middle of the jump table.
1503
// We add a 16 byte offset here, since we used 4 instructions
1504
// since the AUIPC that was used to get the PC.
1505
Inst::Jalr {
1506
rd: writable_zero_reg(),
1507
base: tmp1.to_reg(),
1508
offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1509
}
1510
.emit_uncompressed(sink, emit_info, state, start_off);
1511
1512
// Emit the jump table.
1513
//
1514
// Each entry is a auipc + jalr to the target block. We also start with a island
1515
// if necessary.
1516
1517
// Emit the jumps back to back
1518
for target in targets.iter() {
1519
sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1520
1521
Inst::construct_auipc_and_jalr(None, tmp2, 0)
1522
.iter()
1523
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1524
}
1525
1526
// We've just emitted an island that is safe up to *here*.
1527
// Mark it as such so that we don't needlessly emit additional islands.
1528
*start_off = sink.cur_offset();
1529
}
1530
1531
&Inst::Atomic {
1532
op,
1533
rd,
1534
addr,
1535
src,
1536
amo,
1537
} => {
1538
// TODO: get flags from original CLIF atomic instruction
1539
let flags = MemFlags::new();
1540
if let Some(trap_code) = flags.trap_code() {
1541
sink.add_trap(trap_code);
1542
}
1543
let x = op.op_code()
1544
| reg_to_gpr_num(rd.to_reg()) << 7
1545
| op.funct3() << 12
1546
| reg_to_gpr_num(addr) << 15
1547
| reg_to_gpr_num(src) << 20
1548
| op.funct7(amo) << 25;
1549
1550
sink.put4(x);
1551
}
1552
&Inst::Fence { pred, succ } => {
1553
let x = 0b0001111
1554
| 0b00000 << 7
1555
| 0b000 << 12
1556
| 0b00000 << 15
1557
| (succ as u32) << 20
1558
| (pred as u32) << 24;
1559
1560
sink.put4(x);
1561
}
1562
&Inst::Auipc { rd, imm } => {
1563
sink.put4(enc_auipc(rd, imm));
1564
}
1565
1566
&Inst::LoadAddr { rd, mem } => {
1567
let base = mem.get_base_register();
1568
let offset = mem.get_offset_with_state(state);
1569
let offset_imm12 = Imm12::maybe_from_i64(offset);
1570
1571
match (mem, base, offset_imm12) {
1572
(_, Some(rs), Some(imm12)) => {
1573
Inst::AluRRImm12 {
1574
alu_op: AluOPRRI::Addi,
1575
rd,
1576
rs,
1577
imm12,
1578
}
1579
.emit(sink, emit_info, state);
1580
}
1581
(_, Some(rs), None) => {
1582
let mut insts = Inst::load_constant_u64(rd, offset as u64);
1583
insts.push(Inst::AluRRR {
1584
alu_op: AluOPRRR::Add,
1585
rd,
1586
rs1: rd.to_reg(),
1587
rs2: rs,
1588
});
1589
insts
1590
.into_iter()
1591
.for_each(|inst| inst.emit(sink, emit_info, state));
1592
}
1593
(AMode::Const(addr), None, _) => {
1594
// Get an address label for the constant and recurse.
1595
let label = sink.get_label_for_constant(addr);
1596
Inst::LoadAddr {
1597
rd,
1598
mem: AMode::Label(label),
1599
}
1600
.emit(sink, emit_info, state);
1601
}
1602
(AMode::Label(label), None, _) => {
1603
// Get the current PC.
1604
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1605
let inst = Inst::Auipc {
1606
rd,
1607
imm: Imm20::ZERO,
1608
};
1609
inst.emit_uncompressed(sink, emit_info, state, start_off);
1610
1611
// Emit an add to the address with a relocation.
1612
// This later gets patched up with the correct offset.
1613
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1614
Inst::AluRRImm12 {
1615
alu_op: AluOPRRI::Addi,
1616
rd,
1617
rs: rd.to_reg(),
1618
imm12: Imm12::ZERO,
1619
}
1620
.emit_uncompressed(sink, emit_info, state, start_off);
1621
}
1622
(amode, _, _) => {
1623
unimplemented!("LoadAddr: {:?}", amode);
1624
}
1625
}
1626
}
1627
1628
&Inst::Select {
1629
ref dst,
1630
condition,
1631
ref x,
1632
ref y,
1633
} => {
1634
// The general form for this select is the following:
1635
//
1636
// mv rd, x
1637
// b{cond} rcond, label_end
1638
// mv rd, y
1639
// label_end:
1640
// ... etc
1641
//
1642
// This is built on the assumption that moves are cheap, but branches and jumps
1643
// are not. So with this format we always avoid one jump instruction at the expense
1644
// of an unconditional move.
1645
//
1646
// We also perform another optimization here. If the destination register is the same
1647
// as one of the input registers, we can avoid emitting the first unconditional move
1648
// and emit just the branch and the second move.
1649
//
1650
// To make sure that this happens as often as possible, we also try to invert the
1651
// condition, so that if either of the input registers are the same as the destination
1652
// we avoid that move.
1653
1654
let label_end = sink.get_label();
1655
1656
let xregs = x.regs();
1657
let yregs = y.regs();
1658
let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1659
let condregs = condition.regs();
1660
1661
// We are going to write to the destination register before evaluating
1662
// the condition, so we need to make sure that the destination register
1663
// is not one of the condition registers.
1664
//
1665
// This should never happen, since hopefully the regalloc constraints
1666
// for this register are set up correctly.
1667
debug_assert_ne!(dstregs, condregs);
1668
1669
// Check if we can invert the condition and avoid moving the y registers into
1670
// the destination. This allows us to only emit the branch and one of the moves.
1671
let (uncond_move, cond_move, condition) = if yregs == dstregs {
1672
(yregs, xregs, condition.inverse())
1673
} else {
1674
(xregs, yregs, condition)
1675
};
1676
1677
// Unconditionally move one of the values to the destination register.
1678
//
1679
// These moves may not end up being emitted if the source and
1680
// destination registers are the same. That logic is built into
1681
// the emit function for `Inst::Mov`.
1682
for i in gen_moves(dst.regs(), uncond_move) {
1683
i.emit(sink, emit_info, state);
1684
}
1685
1686
// If the condition passes we skip over the conditional move
1687
Inst::CondBr {
1688
taken: CondBrTarget::Label(label_end),
1689
not_taken: CondBrTarget::Fallthrough,
1690
kind: condition,
1691
}
1692
.emit(sink, emit_info, state);
1693
1694
// Move the conditional value to the destination register.
1695
for i in gen_moves(dst.regs(), cond_move) {
1696
i.emit(sink, emit_info, state);
1697
}
1698
1699
sink.bind_label(label_end, &mut state.ctrl_plane);
1700
}
1701
&Inst::Jalr { rd, base, offset } => {
1702
sink.put4(enc_jalr(rd, base, offset));
1703
state.clobber_vstate();
1704
}
1705
&Inst::EBreak => {
1706
sink.put4(0x00100073);
1707
}
1708
&Inst::AtomicCas {
1709
offset,
1710
t0,
1711
dst,
1712
e,
1713
addr,
1714
v,
1715
ty,
1716
} => {
1717
// # addr holds address of memory location
1718
// # e holds expected value
1719
// # v holds desired value
1720
// # dst holds return value
1721
// cas:
1722
// lr.w dst, (addr) # Load original value.
1723
// bne dst, e, fail # Doesn’t match, so fail.
1724
// sc.w t0, v, (addr) # Try to update.
1725
// bnez t0 , cas # if store not ok,retry.
1726
// fail:
1727
let fail_label = sink.get_label();
1728
let cas_lebel = sink.get_label();
1729
sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1730
Inst::Atomic {
1731
op: AtomicOP::load_op(ty),
1732
rd: dst,
1733
addr,
1734
src: zero_reg(),
1735
amo: AMO::SeqCst,
1736
}
1737
.emit(sink, emit_info, state);
1738
if ty.bits() < 32 {
1739
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1740
.iter()
1741
.for_each(|i| i.emit(sink, emit_info, state));
1742
} else if ty.bits() == 32 {
1743
Inst::Extend {
1744
rd: dst,
1745
rn: dst.to_reg(),
1746
signed: false,
1747
from_bits: 32,
1748
to_bits: 64,
1749
}
1750
.emit(sink, emit_info, state);
1751
}
1752
Inst::CondBr {
1753
taken: CondBrTarget::Label(fail_label),
1754
not_taken: CondBrTarget::Fallthrough,
1755
kind: IntegerCompare {
1756
kind: IntCC::NotEqual,
1757
rs1: e,
1758
rs2: dst.to_reg(),
1759
},
1760
}
1761
.emit(sink, emit_info, state);
1762
let store_value = if ty.bits() < 32 {
1763
// reload value to t0.
1764
Inst::Atomic {
1765
op: AtomicOP::load_op(ty),
1766
rd: t0,
1767
addr,
1768
src: zero_reg(),
1769
amo: AMO::SeqCst,
1770
}
1771
.emit(sink, emit_info, state);
1772
// set reset part.
1773
AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1774
.iter()
1775
.for_each(|i| i.emit(sink, emit_info, state));
1776
t0.to_reg()
1777
} else {
1778
v
1779
};
1780
Inst::Atomic {
1781
op: AtomicOP::store_op(ty),
1782
rd: t0,
1783
addr,
1784
src: store_value,
1785
amo: AMO::SeqCst,
1786
}
1787
.emit(sink, emit_info, state);
1788
// check is our value stored.
1789
Inst::CondBr {
1790
taken: CondBrTarget::Label(cas_lebel),
1791
not_taken: CondBrTarget::Fallthrough,
1792
kind: IntegerCompare {
1793
kind: IntCC::NotEqual,
1794
rs1: t0.to_reg(),
1795
rs2: zero_reg(),
1796
},
1797
}
1798
.emit(sink, emit_info, state);
1799
sink.bind_label(fail_label, &mut state.ctrl_plane);
1800
}
1801
&Inst::AtomicRmwLoop {
1802
offset,
1803
op,
1804
dst,
1805
ty,
1806
p,
1807
x,
1808
t0,
1809
} => {
1810
let retry = sink.get_label();
1811
sink.bind_label(retry, &mut state.ctrl_plane);
1812
// load old value.
1813
Inst::Atomic {
1814
op: AtomicOP::load_op(ty),
1815
rd: dst,
1816
addr: p,
1817
src: zero_reg(),
1818
amo: AMO::SeqCst,
1819
}
1820
.emit(sink, emit_info, state);
1821
//
1822
1823
let store_value: Reg = match op {
1824
crate::ir::AtomicRmwOp::Add
1825
| crate::ir::AtomicRmwOp::Sub
1826
| crate::ir::AtomicRmwOp::And
1827
| crate::ir::AtomicRmwOp::Or
1828
| crate::ir::AtomicRmwOp::Xor => {
1829
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1830
.iter()
1831
.for_each(|i| i.emit(sink, emit_info, state));
1832
Inst::AluRRR {
1833
alu_op: match op {
1834
crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1835
crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1836
crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1837
crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1838
crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1839
_ => unreachable!(),
1840
},
1841
rd: t0,
1842
rs1: dst.to_reg(),
1843
rs2: x,
1844
}
1845
.emit(sink, emit_info, state);
1846
Inst::Atomic {
1847
op: AtomicOP::load_op(ty),
1848
rd: writable_spilltmp_reg2(),
1849
addr: p,
1850
src: zero_reg(),
1851
amo: AMO::SeqCst,
1852
}
1853
.emit(sink, emit_info, state);
1854
AtomicOP::merge(
1855
writable_spilltmp_reg2(),
1856
writable_spilltmp_reg(),
1857
offset,
1858
t0.to_reg(),
1859
ty,
1860
)
1861
.iter()
1862
.for_each(|i| i.emit(sink, emit_info, state));
1863
spilltmp_reg2()
1864
}
1865
crate::ir::AtomicRmwOp::Nand => {
1866
if ty.bits() < 32 {
1867
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1868
.iter()
1869
.for_each(|i| i.emit(sink, emit_info, state));
1870
}
1871
Inst::AluRRR {
1872
alu_op: AluOPRRR::And,
1873
rd: t0,
1874
rs1: x,
1875
rs2: dst.to_reg(),
1876
}
1877
.emit(sink, emit_info, state);
1878
Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1879
if ty.bits() < 32 {
1880
Inst::Atomic {
1881
op: AtomicOP::load_op(ty),
1882
rd: writable_spilltmp_reg2(),
1883
addr: p,
1884
src: zero_reg(),
1885
amo: AMO::SeqCst,
1886
}
1887
.emit(sink, emit_info, state);
1888
AtomicOP::merge(
1889
writable_spilltmp_reg2(),
1890
writable_spilltmp_reg(),
1891
offset,
1892
t0.to_reg(),
1893
ty,
1894
)
1895
.iter()
1896
.for_each(|i| i.emit(sink, emit_info, state));
1897
spilltmp_reg2()
1898
} else {
1899
t0.to_reg()
1900
}
1901
}
1902
1903
crate::ir::AtomicRmwOp::Umin
1904
| crate::ir::AtomicRmwOp::Umax
1905
| crate::ir::AtomicRmwOp::Smin
1906
| crate::ir::AtomicRmwOp::Smax => {
1907
let label_select_dst = sink.get_label();
1908
let label_select_done = sink.get_label();
1909
if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1910
{
1911
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1912
} else {
1913
AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1914
}
1915
.iter()
1916
.for_each(|i| i.emit(sink, emit_info, state));
1917
1918
Inst::CondBr {
1919
taken: CondBrTarget::Label(label_select_dst),
1920
not_taken: CondBrTarget::Fallthrough,
1921
kind: IntegerCompare {
1922
kind: match op {
1923
crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1924
crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1925
crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1926
crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1927
_ => unreachable!(),
1928
},
1929
rs1: dst.to_reg(),
1930
rs2: x,
1931
},
1932
}
1933
.emit(sink, emit_info, state);
1934
// here we select x.
1935
Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1936
Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1937
sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1938
Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1939
sink.bind_label(label_select_done, &mut state.ctrl_plane);
1940
Inst::Atomic {
1941
op: AtomicOP::load_op(ty),
1942
rd: writable_spilltmp_reg2(),
1943
addr: p,
1944
src: zero_reg(),
1945
amo: AMO::SeqCst,
1946
}
1947
.emit(sink, emit_info, state);
1948
AtomicOP::merge(
1949
writable_spilltmp_reg2(),
1950
writable_spilltmp_reg(),
1951
offset,
1952
t0.to_reg(),
1953
ty,
1954
)
1955
.iter()
1956
.for_each(|i| i.emit(sink, emit_info, state));
1957
spilltmp_reg2()
1958
}
1959
crate::ir::AtomicRmwOp::Xchg => {
1960
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1961
.iter()
1962
.for_each(|i| i.emit(sink, emit_info, state));
1963
Inst::Atomic {
1964
op: AtomicOP::load_op(ty),
1965
rd: writable_spilltmp_reg2(),
1966
addr: p,
1967
src: zero_reg(),
1968
amo: AMO::SeqCst,
1969
}
1970
.emit(sink, emit_info, state);
1971
AtomicOP::merge(
1972
writable_spilltmp_reg2(),
1973
writable_spilltmp_reg(),
1974
offset,
1975
x,
1976
ty,
1977
)
1978
.iter()
1979
.for_each(|i| i.emit(sink, emit_info, state));
1980
spilltmp_reg2()
1981
}
1982
};
1983
1984
Inst::Atomic {
1985
op: AtomicOP::store_op(ty),
1986
rd: t0,
1987
addr: p,
1988
src: store_value,
1989
amo: AMO::SeqCst,
1990
}
1991
.emit(sink, emit_info, state);
1992
1993
// if store is not ok,retry.
1994
Inst::CondBr {
1995
taken: CondBrTarget::Label(retry),
1996
not_taken: CondBrTarget::Fallthrough,
1997
kind: IntegerCompare {
1998
kind: IntCC::NotEqual,
1999
rs1: t0.to_reg(),
2000
rs2: zero_reg(),
2001
},
2002
}
2003
.emit(sink, emit_info, state);
2004
}
2005
2006
&Inst::LoadExtNameGot { rd, ref name } => {
2007
// Load a PC-relative address into a register.
2008
// RISC-V does this slightly differently from other arches. We emit a relocation
2009
// with a label, instead of the symbol itself.
2010
//
2011
// See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
2012
//
2013
// Emit the following code:
2014
// label:
2015
// auipc rd, 0 # R_RISCV_GOT_HI20 (symbol_name)
2016
// ld rd, rd, 0 # R_RISCV_PCREL_LO12_I (label)
2017
2018
// Create the label that is going to be published to the final binary object.
2019
let auipc_label = sink.get_label();
2020
sink.bind_label(auipc_label, &mut state.ctrl_plane);
2021
2022
// Get the current PC.
2023
sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
2024
Inst::Auipc {
2025
rd,
2026
imm: Imm20::from_i32(0),
2027
}
2028
.emit_uncompressed(sink, emit_info, state, start_off);
2029
2030
// The `ld` here, points to the `auipc` label instead of directly to the symbol.
2031
sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2032
Inst::Load {
2033
rd,
2034
op: LoadOP::Ld,
2035
flags: MemFlags::trusted(),
2036
from: AMode::RegOffset(rd.to_reg(), 0),
2037
}
2038
.emit_uncompressed(sink, emit_info, state, start_off);
2039
}
2040
2041
&Inst::LoadExtNameFar {
2042
rd,
2043
ref name,
2044
offset,
2045
} => {
2046
// In the non PIC sequence we relocate the absolute address into
2047
// a preallocated space, load it into a register and jump over
2048
// it.
2049
//
2050
// Emit the following code:
2051
// ld rd, label_data
2052
// j label_end
2053
// label_data:
2054
// <8 byte space> # ABS8
2055
// label_end:
2056
2057
let label_data = sink.get_label();
2058
let label_end = sink.get_label();
2059
2060
// Load the value from a label
2061
Inst::Load {
2062
rd,
2063
op: LoadOP::Ld,
2064
flags: MemFlags::trusted(),
2065
from: AMode::Label(label_data),
2066
}
2067
.emit(sink, emit_info, state);
2068
2069
// Jump over the data
2070
Inst::gen_jump(label_end).emit(sink, emit_info, state);
2071
2072
sink.bind_label(label_data, &mut state.ctrl_plane);
2073
sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
2074
sink.put8(0);
2075
2076
sink.bind_label(label_end, &mut state.ctrl_plane);
2077
}
2078
2079
&Inst::LoadExtNameNear {
2080
rd,
2081
ref name,
2082
offset,
2083
} => {
2084
// Emit the following code:
2085
// label:
2086
// auipc rd, 0 # R_RISCV_PCREL_HI20 (symbol_name)
2087
// ld rd, rd, 0 # R_RISCV_PCREL_LO12_I (label)
2088
2089
let auipc_label = sink.get_label();
2090
sink.bind_label(auipc_label, &mut state.ctrl_plane);
2091
2092
// Get the current PC.
2093
sink.add_reloc(Reloc::RiscvPCRelHi20, &**name, offset);
2094
Inst::Auipc {
2095
rd,
2096
imm: Imm20::from_i32(0),
2097
}
2098
.emit_uncompressed(sink, emit_info, state, start_off);
2099
2100
sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2101
Inst::AluRRImm12 {
2102
alu_op: AluOPRRI::Addi,
2103
rd,
2104
rs: rd.to_reg(),
2105
imm12: Imm12::ZERO,
2106
}
2107
.emit_uncompressed(sink, emit_info, state, start_off);
2108
}
2109
2110
&Inst::LabelAddress { dst, label } => {
2111
let offset = sink.cur_offset();
2112
Inst::Auipc {
2113
rd: dst,
2114
imm: Imm20::from_i32(0),
2115
}
2116
.emit_uncompressed(sink, emit_info, state, start_off);
2117
sink.use_label_at_offset(offset, label, LabelUse::PCRelHi20);
2118
2119
let offset = sink.cur_offset();
2120
Inst::AluRRImm12 {
2121
alu_op: AluOPRRI::Addi,
2122
rd: dst,
2123
rs: dst.to_reg(),
2124
imm12: Imm12::ZERO,
2125
}
2126
.emit_uncompressed(sink, emit_info, state, start_off);
2127
sink.use_label_at_offset(offset, label, LabelUse::PCRelLo12I);
2128
}
2129
2130
&Inst::ElfTlsGetAddr { rd, ref name } => {
2131
// RISC-V's TLS GD model is slightly different from other arches.
2132
//
2133
// We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
2134
// of the address relative to the GOT entry. This relocation points to
2135
// the symbol as usual.
2136
//
2137
// However when loading the bottom 12bits of the address, we need to
2138
// use a label that points to the previous AUIPC instruction.
2139
//
2140
// label:
2141
// auipc a0,0 # R_RISCV_TLS_GD_HI20 (symbol)
2142
// addi a0,a0,0 # R_RISCV_PCREL_LO12_I (label)
2143
//
2144
// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
2145
2146
// Create the label that is going to be published to the final binary object.
2147
let auipc_label = sink.get_label();
2148
sink.bind_label(auipc_label, &mut state.ctrl_plane);
2149
2150
// Get the current PC.
2151
sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
2152
Inst::Auipc {
2153
rd,
2154
imm: Imm20::from_i32(0),
2155
}
2156
.emit_uncompressed(sink, emit_info, state, start_off);
2157
2158
// The `addi` here, points to the `auipc` label instead of directly to the symbol.
2159
sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2160
Inst::AluRRImm12 {
2161
alu_op: AluOPRRI::Addi,
2162
rd,
2163
rs: rd.to_reg(),
2164
imm12: Imm12::from_i16(0),
2165
}
2166
.emit_uncompressed(sink, emit_info, state, start_off);
2167
2168
Inst::Call {
2169
info: Box::new(CallInfo::empty(
2170
ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2171
CallConv::SystemV,
2172
)),
2173
}
2174
.emit_uncompressed(sink, emit_info, state, start_off);
2175
}
2176
2177
&Inst::TrapIf {
2178
rs1,
2179
rs2,
2180
cc,
2181
trap_code,
2182
} => {
2183
let label_end = sink.get_label();
2184
let cond = IntegerCompare { kind: cc, rs1, rs2 };
2185
2186
// Jump over the trap if we the condition is false.
2187
Inst::CondBr {
2188
taken: CondBrTarget::Label(label_end),
2189
not_taken: CondBrTarget::Fallthrough,
2190
kind: cond.inverse(),
2191
}
2192
.emit(sink, emit_info, state);
2193
Inst::Udf { trap_code }.emit(sink, emit_info, state);
2194
2195
sink.bind_label(label_end, &mut state.ctrl_plane);
2196
}
2197
&Inst::Udf { trap_code } => {
2198
sink.add_trap(trap_code);
2199
sink.put_data(Inst::TRAP_OPCODE);
2200
}
2201
&Inst::AtomicLoad { rd, ty, p } => {
2202
// emit the fence.
2203
Inst::Fence {
2204
pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2205
succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2206
}
2207
.emit(sink, emit_info, state);
2208
// load.
2209
Inst::Load {
2210
rd,
2211
op: LoadOP::from_type(ty),
2212
flags: MemFlags::new(),
2213
from: AMode::RegOffset(p, 0),
2214
}
2215
.emit(sink, emit_info, state);
2216
Inst::Fence {
2217
pred: Inst::FENCE_REQ_R,
2218
succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2219
}
2220
.emit(sink, emit_info, state);
2221
}
2222
&Inst::AtomicStore { src, ty, p } => {
2223
Inst::Fence {
2224
pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2225
succ: Inst::FENCE_REQ_W,
2226
}
2227
.emit(sink, emit_info, state);
2228
Inst::Store {
2229
to: AMode::RegOffset(p, 0),
2230
op: StoreOP::from_type(ty),
2231
flags: MemFlags::new(),
2232
src,
2233
}
2234
.emit(sink, emit_info, state);
2235
}
2236
2237
&Inst::Popcnt {
2238
sum,
2239
tmp,
2240
step,
2241
rs,
2242
ty,
2243
} => {
2244
// load 0 to sum , init.
2245
Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2246
// load
2247
Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2248
.emit(sink, emit_info, state);
2249
//
2250
Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2251
Inst::AluRRImm12 {
2252
alu_op: AluOPRRI::Slli,
2253
rd: tmp,
2254
rs: tmp.to_reg(),
2255
imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2256
}
2257
.emit(sink, emit_info, state);
2258
let label_done = sink.get_label();
2259
let label_loop = sink.get_label();
2260
sink.bind_label(label_loop, &mut state.ctrl_plane);
2261
Inst::CondBr {
2262
taken: CondBrTarget::Label(label_done),
2263
not_taken: CondBrTarget::Fallthrough,
2264
kind: IntegerCompare {
2265
kind: IntCC::SignedLessThanOrEqual,
2266
rs1: step.to_reg(),
2267
rs2: zero_reg(),
2268
},
2269
}
2270
.emit(sink, emit_info, state);
2271
// test and add sum.
2272
{
2273
Inst::AluRRR {
2274
alu_op: AluOPRRR::And,
2275
rd: writable_spilltmp_reg2(),
2276
rs1: tmp.to_reg(),
2277
rs2: rs,
2278
}
2279
.emit(sink, emit_info, state);
2280
let label_over = sink.get_label();
2281
Inst::CondBr {
2282
taken: CondBrTarget::Label(label_over),
2283
not_taken: CondBrTarget::Fallthrough,
2284
kind: IntegerCompare {
2285
kind: IntCC::Equal,
2286
rs1: zero_reg(),
2287
rs2: spilltmp_reg2(),
2288
},
2289
}
2290
.emit(sink, emit_info, state);
2291
Inst::AluRRImm12 {
2292
alu_op: AluOPRRI::Addi,
2293
rd: sum,
2294
rs: sum.to_reg(),
2295
imm12: Imm12::ONE,
2296
}
2297
.emit(sink, emit_info, state);
2298
sink.bind_label(label_over, &mut state.ctrl_plane);
2299
}
2300
// set step and tmp.
2301
{
2302
Inst::AluRRImm12 {
2303
alu_op: AluOPRRI::Addi,
2304
rd: step,
2305
rs: step.to_reg(),
2306
imm12: Imm12::from_i16(-1),
2307
}
2308
.emit(sink, emit_info, state);
2309
Inst::AluRRImm12 {
2310
alu_op: AluOPRRI::Srli,
2311
rd: tmp,
2312
rs: tmp.to_reg(),
2313
imm12: Imm12::ONE,
2314
}
2315
.emit(sink, emit_info, state);
2316
Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2317
}
2318
sink.bind_label(label_done, &mut state.ctrl_plane);
2319
}
2320
&Inst::Cltz {
2321
sum,
2322
tmp,
2323
step,
2324
rs,
2325
leading,
2326
ty,
2327
} => {
2328
// load 0 to sum , init.
2329
Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2330
// load
2331
Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2332
.emit(sink, emit_info, state);
2333
//
2334
Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2335
if leading {
2336
Inst::AluRRImm12 {
2337
alu_op: AluOPRRI::Slli,
2338
rd: tmp,
2339
rs: tmp.to_reg(),
2340
imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2341
}
2342
.emit(sink, emit_info, state);
2343
}
2344
let label_done = sink.get_label();
2345
let label_loop = sink.get_label();
2346
sink.bind_label(label_loop, &mut state.ctrl_plane);
2347
Inst::CondBr {
2348
taken: CondBrTarget::Label(label_done),
2349
not_taken: CondBrTarget::Fallthrough,
2350
kind: IntegerCompare {
2351
kind: IntCC::SignedLessThanOrEqual,
2352
rs1: step.to_reg(),
2353
rs2: zero_reg(),
2354
},
2355
}
2356
.emit(sink, emit_info, state);
2357
// test and add sum.
2358
{
2359
Inst::AluRRR {
2360
alu_op: AluOPRRR::And,
2361
rd: writable_spilltmp_reg2(),
2362
rs1: tmp.to_reg(),
2363
rs2: rs,
2364
}
2365
.emit(sink, emit_info, state);
2366
Inst::CondBr {
2367
taken: CondBrTarget::Label(label_done),
2368
not_taken: CondBrTarget::Fallthrough,
2369
kind: IntegerCompare {
2370
kind: IntCC::NotEqual,
2371
rs1: zero_reg(),
2372
rs2: spilltmp_reg2(),
2373
},
2374
}
2375
.emit(sink, emit_info, state);
2376
Inst::AluRRImm12 {
2377
alu_op: AluOPRRI::Addi,
2378
rd: sum,
2379
rs: sum.to_reg(),
2380
imm12: Imm12::ONE,
2381
}
2382
.emit(sink, emit_info, state);
2383
}
2384
// set step and tmp.
2385
{
2386
Inst::AluRRImm12 {
2387
alu_op: AluOPRRI::Addi,
2388
rd: step,
2389
rs: step.to_reg(),
2390
imm12: Imm12::from_i16(-1),
2391
}
2392
.emit(sink, emit_info, state);
2393
Inst::AluRRImm12 {
2394
alu_op: if leading {
2395
AluOPRRI::Srli
2396
} else {
2397
AluOPRRI::Slli
2398
},
2399
rd: tmp,
2400
rs: tmp.to_reg(),
2401
imm12: Imm12::ONE,
2402
}
2403
.emit(sink, emit_info, state);
2404
Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2405
}
2406
sink.bind_label(label_done, &mut state.ctrl_plane);
2407
}
2408
&Inst::Brev8 {
2409
rs,
2410
ty,
2411
step,
2412
tmp,
2413
tmp2,
2414
rd,
2415
} => {
2416
Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2417
Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2418
.emit(sink, emit_info, state);
2419
//
2420
Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2421
Inst::AluRRImm12 {
2422
alu_op: AluOPRRI::Slli,
2423
rd: tmp,
2424
rs: tmp.to_reg(),
2425
imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2426
}
2427
.emit(sink, emit_info, state);
2428
Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2429
Inst::AluRRImm12 {
2430
alu_op: AluOPRRI::Slli,
2431
rd: tmp2,
2432
rs: tmp2.to_reg(),
2433
imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2434
}
2435
.emit(sink, emit_info, state);
2436
2437
let label_done = sink.get_label();
2438
let label_loop = sink.get_label();
2439
sink.bind_label(label_loop, &mut state.ctrl_plane);
2440
Inst::CondBr {
2441
taken: CondBrTarget::Label(label_done),
2442
not_taken: CondBrTarget::Fallthrough,
2443
kind: IntegerCompare {
2444
kind: IntCC::SignedLessThanOrEqual,
2445
rs1: step.to_reg(),
2446
rs2: zero_reg(),
2447
},
2448
}
2449
.emit(sink, emit_info, state);
2450
// test and set bit.
2451
{
2452
Inst::AluRRR {
2453
alu_op: AluOPRRR::And,
2454
rd: writable_spilltmp_reg2(),
2455
rs1: tmp.to_reg(),
2456
rs2: rs,
2457
}
2458
.emit(sink, emit_info, state);
2459
let label_over = sink.get_label();
2460
Inst::CondBr {
2461
taken: CondBrTarget::Label(label_over),
2462
not_taken: CondBrTarget::Fallthrough,
2463
kind: IntegerCompare {
2464
kind: IntCC::Equal,
2465
rs1: zero_reg(),
2466
rs2: spilltmp_reg2(),
2467
},
2468
}
2469
.emit(sink, emit_info, state);
2470
Inst::AluRRR {
2471
alu_op: AluOPRRR::Or,
2472
rd,
2473
rs1: rd.to_reg(),
2474
rs2: tmp2.to_reg(),
2475
}
2476
.emit(sink, emit_info, state);
2477
sink.bind_label(label_over, &mut state.ctrl_plane);
2478
}
2479
// set step and tmp.
2480
{
2481
Inst::AluRRImm12 {
2482
alu_op: AluOPRRI::Addi,
2483
rd: step,
2484
rs: step.to_reg(),
2485
imm12: Imm12::from_i16(-1),
2486
}
2487
.emit(sink, emit_info, state);
2488
Inst::AluRRImm12 {
2489
alu_op: AluOPRRI::Srli,
2490
rd: tmp,
2491
rs: tmp.to_reg(),
2492
imm12: Imm12::ONE,
2493
}
2494
.emit(sink, emit_info, state);
2495
{
2496
// reset tmp2
2497
// if (step %=8 == 0) then tmp2 = tmp2 >> 15
2498
// if (step %=8 != 0) then tmp2 = tmp2 << 1
2499
let label_over = sink.get_label();
2500
let label_sll_1 = sink.get_label();
2501
Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2502
.emit(sink, emit_info, state);
2503
Inst::AluRRR {
2504
alu_op: AluOPRRR::Rem,
2505
rd: writable_spilltmp_reg2(),
2506
rs1: step.to_reg(),
2507
rs2: spilltmp_reg2(),
2508
}
2509
.emit(sink, emit_info, state);
2510
Inst::CondBr {
2511
taken: CondBrTarget::Label(label_sll_1),
2512
not_taken: CondBrTarget::Fallthrough,
2513
kind: IntegerCompare {
2514
kind: IntCC::NotEqual,
2515
rs1: spilltmp_reg2(),
2516
rs2: zero_reg(),
2517
},
2518
}
2519
.emit(sink, emit_info, state);
2520
Inst::AluRRImm12 {
2521
alu_op: AluOPRRI::Srli,
2522
rd: tmp2,
2523
rs: tmp2.to_reg(),
2524
imm12: Imm12::from_i16(15),
2525
}
2526
.emit(sink, emit_info, state);
2527
Inst::gen_jump(label_over).emit(sink, emit_info, state);
2528
sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2529
Inst::AluRRImm12 {
2530
alu_op: AluOPRRI::Slli,
2531
rd: tmp2,
2532
rs: tmp2.to_reg(),
2533
imm12: Imm12::ONE,
2534
}
2535
.emit(sink, emit_info, state);
2536
sink.bind_label(label_over, &mut state.ctrl_plane);
2537
}
2538
Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2539
}
2540
sink.bind_label(label_done, &mut state.ctrl_plane);
2541
}
2542
&Inst::StackProbeLoop {
2543
guard_size,
2544
probe_count,
2545
tmp: guard_size_tmp,
2546
} => {
2547
let step = writable_spilltmp_reg();
2548
Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2549
.iter()
2550
.for_each(|i| i.emit(sink, emit_info, state));
2551
Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2552
.iter()
2553
.for_each(|i| i.emit(sink, emit_info, state));
2554
2555
let loop_start = sink.get_label();
2556
let label_done = sink.get_label();
2557
sink.bind_label(loop_start, &mut state.ctrl_plane);
2558
Inst::CondBr {
2559
taken: CondBrTarget::Label(label_done),
2560
not_taken: CondBrTarget::Fallthrough,
2561
kind: IntegerCompare {
2562
kind: IntCC::UnsignedLessThanOrEqual,
2563
rs1: step.to_reg(),
2564
rs2: guard_size_tmp.to_reg(),
2565
},
2566
}
2567
.emit(sink, emit_info, state);
2568
// compute address.
2569
Inst::AluRRR {
2570
alu_op: AluOPRRR::Sub,
2571
rd: writable_spilltmp_reg2(),
2572
rs1: stack_reg(),
2573
rs2: step.to_reg(),
2574
}
2575
.emit(sink, emit_info, state);
2576
Inst::Store {
2577
to: AMode::RegOffset(spilltmp_reg2(), 0),
2578
op: StoreOP::Sb,
2579
flags: MemFlags::new(),
2580
src: zero_reg(),
2581
}
2582
.emit(sink, emit_info, state);
2583
// reset step.
2584
Inst::AluRRR {
2585
alu_op: AluOPRRR::Sub,
2586
rd: step,
2587
rs1: step.to_reg(),
2588
rs2: guard_size_tmp.to_reg(),
2589
}
2590
.emit(sink, emit_info, state);
2591
Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2592
sink.bind_label(label_done, &mut state.ctrl_plane);
2593
}
2594
&Inst::VecAluRRRImm5 {
2595
op,
2596
vd,
2597
vd_src,
2598
imm,
2599
vs2,
2600
ref mask,
2601
..
2602
} => {
2603
debug_assert_eq!(vd.to_reg(), vd_src);
2604
2605
sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2606
}
2607
&Inst::VecAluRRRR {
2608
op,
2609
vd,
2610
vd_src,
2611
vs1,
2612
vs2,
2613
ref mask,
2614
..
2615
} => {
2616
debug_assert_eq!(vd.to_reg(), vd_src);
2617
2618
sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2619
}
2620
&Inst::VecAluRRR {
2621
op,
2622
vd,
2623
vs1,
2624
vs2,
2625
ref mask,
2626
..
2627
} => {
2628
sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2629
}
2630
&Inst::VecAluRRImm5 {
2631
op,
2632
vd,
2633
imm,
2634
vs2,
2635
ref mask,
2636
..
2637
} => {
2638
sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2639
}
2640
&Inst::VecAluRR {
2641
op,
2642
vd,
2643
vs,
2644
ref mask,
2645
..
2646
} => {
2647
sink.put4(encode_valu_rr(op, vd, vs, *mask));
2648
}
2649
&Inst::VecAluRImm5 {
2650
op,
2651
vd,
2652
imm,
2653
ref mask,
2654
..
2655
} => {
2656
sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2657
}
2658
&Inst::VecSetState { rd, ref vstate } => {
2659
sink.put4(encode_vcfg_imm(
2660
0x57,
2661
rd.to_reg(),
2662
vstate.avl.unwrap_static(),
2663
&vstate.vtype,
2664
));
2665
2666
// Update the current vector emit state.
2667
state.vstate = EmitVState::Known(*vstate);
2668
}
2669
2670
&Inst::VecLoad {
2671
eew,
2672
to,
2673
ref from,
2674
ref mask,
2675
flags,
2676
..
2677
} => {
2678
// Vector Loads don't support immediate offsets, so we need to load it into a register.
2679
let addr = match from {
2680
VecAMode::UnitStride { base } => {
2681
let base_reg = base.get_base_register();
2682
let offset = base.get_offset_with_state(state);
2683
2684
// Reg+0 Offset can be directly encoded
2685
if let (Some(base_reg), 0) = (base_reg, offset) {
2686
base_reg
2687
} else {
2688
// Otherwise load the address it into a reg and load from it.
2689
let tmp = writable_spilltmp_reg();
2690
Inst::LoadAddr {
2691
rd: tmp,
2692
mem: *base,
2693
}
2694
.emit(sink, emit_info, state);
2695
tmp.to_reg()
2696
}
2697
}
2698
};
2699
2700
if let Some(trap_code) = flags.trap_code() {
2701
// Register the offset at which the actual load instruction starts.
2702
sink.add_trap(trap_code);
2703
}
2704
2705
sink.put4(encode_vmem_load(
2706
0x07,
2707
to.to_reg(),
2708
eew,
2709
addr,
2710
from.lumop(),
2711
*mask,
2712
from.mop(),
2713
from.nf(),
2714
));
2715
}
2716
2717
&Inst::VecStore {
2718
eew,
2719
ref to,
2720
from,
2721
ref mask,
2722
flags,
2723
..
2724
} => {
2725
// Vector Stores don't support immediate offsets, so we need to load it into a register.
2726
let addr = match to {
2727
VecAMode::UnitStride { base } => {
2728
let base_reg = base.get_base_register();
2729
let offset = base.get_offset_with_state(state);
2730
2731
// Reg+0 Offset can be directly encoded
2732
if let (Some(base_reg), 0) = (base_reg, offset) {
2733
base_reg
2734
} else {
2735
// Otherwise load the address it into a reg and load from it.
2736
let tmp = writable_spilltmp_reg();
2737
Inst::LoadAddr {
2738
rd: tmp,
2739
mem: *base,
2740
}
2741
.emit(sink, emit_info, state);
2742
tmp.to_reg()
2743
}
2744
}
2745
};
2746
2747
if let Some(trap_code) = flags.trap_code() {
2748
// Register the offset at which the actual load instruction starts.
2749
sink.add_trap(trap_code);
2750
}
2751
2752
sink.put4(encode_vmem_store(
2753
0x27,
2754
from,
2755
eew,
2756
addr,
2757
to.sumop(),
2758
*mask,
2759
to.mop(),
2760
to.nf(),
2761
));
2762
}
2763
2764
Inst::EmitIsland { needed_space } => {
2765
if sink.island_needed(*needed_space) {
2766
let jump_around_label = sink.get_label();
2767
Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2768
sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
2769
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2770
}
2771
}
2772
}
2773
}
2774
}
2775
2776
fn emit_return_call_common_sequence<T>(
2777
sink: &mut MachBuffer<Inst>,
2778
emit_info: &EmitInfo,
2779
state: &mut EmitState,
2780
info: &ReturnCallInfo<T>,
2781
) {
2782
// The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2783
// So lets emit an island here if we need it.
2784
//
2785
// It is difficult to calculate exactly how many instructions are going to be emitted, so
2786
// we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2787
// were actually emitted.
2788
let mut buffer = MachBuffer::new();
2789
let mut fake_emit_state = state.clone();
2790
2791
return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2792
2793
// Finalize the buffer and get the number of bytes emitted.
2794
let buffer = buffer.finish(&Default::default(), &mut Default::default());
2795
let length = buffer.data().len() as u32;
2796
2797
// And now emit the island inline with this instruction.
2798
if sink.island_needed(length) {
2799
let jump_around_label = sink.get_label();
2800
Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2801
sink.emit_island(length + 4, &mut state.ctrl_plane);
2802
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2803
}
2804
2805
// Now that we're done, emit the *actual* return sequence.
2806
return_call_emit_impl(sink, emit_info, state, info);
2807
}
2808
2809
/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2810
fn return_call_emit_impl<T>(
2811
sink: &mut MachBuffer<Inst>,
2812
emit_info: &EmitInfo,
2813
state: &mut EmitState,
2814
info: &ReturnCallInfo<T>,
2815
) {
2816
let sp_to_fp_offset = {
2817
let frame_layout = state.frame_layout();
2818
i64::from(
2819
frame_layout.clobber_size
2820
+ frame_layout.fixed_frame_storage_size
2821
+ frame_layout.outgoing_args_size,
2822
)
2823
};
2824
2825
let mut clobber_offset = sp_to_fp_offset - 8;
2826
for reg in state.frame_layout().clobbered_callee_saves.clone() {
2827
let rreg = reg.to_reg();
2828
let ty = match rreg.class() {
2829
RegClass::Int => I64,
2830
RegClass::Float => F64,
2831
RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2832
};
2833
2834
Inst::gen_load(
2835
reg.map(Reg::from),
2836
AMode::SPOffset(clobber_offset),
2837
ty,
2838
MemFlags::trusted(),
2839
)
2840
.emit(sink, emit_info, state);
2841
2842
clobber_offset -= 8
2843
}
2844
2845
// Restore the link register and frame pointer
2846
let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2847
if setup_area_size > 0 {
2848
Inst::gen_load(
2849
writable_link_reg(),
2850
AMode::SPOffset(sp_to_fp_offset + 8),
2851
I64,
2852
MemFlags::trusted(),
2853
)
2854
.emit(sink, emit_info, state);
2855
2856
Inst::gen_load(
2857
writable_fp_reg(),
2858
AMode::SPOffset(sp_to_fp_offset),
2859
I64,
2860
MemFlags::trusted(),
2861
)
2862
.emit(sink, emit_info, state);
2863
}
2864
2865
// If we over-allocated the incoming args area in the prologue, resize down to what the callee
2866
// is expecting.
2867
let incoming_args_diff =
2868
i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2869
2870
// Increment SP all at once
2871
let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2872
if sp_increment > 0 {
2873
for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2874
inst.emit(sink, emit_info, state);
2875
}
2876
}
2877
}
2878
2879