Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/isa/riscv64/inst/emit.rs
3092 views
1
//! Riscv64 ISA: binary code emission.
2
3
use crate::ir::{self, LibCall, TrapCode};
4
use crate::isa::riscv64::inst::*;
5
use crate::isa::riscv64::lower::isle::generated_code::{
6
CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,
7
};
8
use cranelift_control::ControlPlane;
9
10
pub struct EmitInfo {
11
#[expect(dead_code, reason = "may want to be used in the future")]
12
shared_flag: settings::Flags,
13
isa_flags: super::super::riscv_settings::Flags,
14
}
15
16
impl EmitInfo {
17
pub(crate) fn new(
18
shared_flag: settings::Flags,
19
isa_flags: super::super::riscv_settings::Flags,
20
) -> Self {
21
Self {
22
shared_flag,
23
isa_flags,
24
}
25
}
26
}
27
28
pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
29
u32::from(m.to_real_reg().unwrap().hw_enc() & 31)
30
}
31
32
pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {
33
let real_reg = m.to_real_reg().unwrap().hw_enc();
34
debug_assert!(real_reg >= 8 && real_reg < 16);
35
let compressed_reg = real_reg - 8;
36
u32::from(compressed_reg)
37
}
38
39
#[derive(Clone, Debug, PartialEq, Default)]
40
pub enum EmitVState {
41
#[default]
42
Unknown,
43
Known(VState),
44
}
45
46
/// State carried between emissions of a sequence of instructions.
47
#[derive(Default, Clone, Debug)]
48
pub struct EmitState {
49
/// The user stack map for the upcoming instruction, as provided to
50
/// `pre_safepoint()`.
51
user_stack_map: Option<ir::UserStackMap>,
52
53
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
54
/// optimized away at compiletime. See [cranelift_control].
55
ctrl_plane: ControlPlane,
56
57
/// Vector State
58
/// Controls the current state of the vector unit at the emission point.
59
vstate: EmitVState,
60
61
frame_layout: FrameLayout,
62
}
63
64
impl EmitState {
65
fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {
66
self.user_stack_map.take()
67
}
68
69
fn clobber_vstate(&mut self) {
70
self.vstate = EmitVState::Unknown;
71
}
72
}
73
74
impl MachInstEmitState<Inst> for EmitState {
75
fn new(
76
abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,
77
ctrl_plane: ControlPlane,
78
) -> Self {
79
EmitState {
80
user_stack_map: None,
81
ctrl_plane,
82
vstate: EmitVState::Unknown,
83
frame_layout: abi.frame_layout().clone(),
84
}
85
}
86
87
fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {
88
self.user_stack_map = user_stack_map;
89
}
90
91
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
92
&mut self.ctrl_plane
93
}
94
95
fn take_ctrl_plane(self) -> ControlPlane {
96
self.ctrl_plane
97
}
98
99
fn on_new_block(&mut self) {
100
// Reset the vector state.
101
self.clobber_vstate();
102
}
103
104
fn frame_layout(&self) -> &FrameLayout {
105
&self.frame_layout
106
}
107
}
108
109
impl Inst {
110
/// Load int mask.
111
/// If ty is int then 0xff in rd.
112
pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {
113
let mut insts = SmallInstVec::new();
114
assert!(ty.is_int() && ty.bits() <= 64);
115
match ty {
116
I64 => {
117
insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
118
}
119
I32 | I16 => {
120
insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));
121
insts.push(Inst::Extend {
122
rd,
123
rn: rd.to_reg(),
124
signed: false,
125
from_bits: ty.bits() as u8,
126
to_bits: 64,
127
});
128
}
129
I8 => {
130
insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));
131
}
132
_ => unreachable!("ty:{:?}", ty),
133
}
134
insts
135
}
136
/// inverse all bit
137
pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {
138
Inst::AluRRImm12 {
139
alu_op: AluOPRRI::Xori,
140
rd,
141
rs,
142
imm12: Imm12::from_i16(-1),
143
}
144
}
145
146
/// Returns Some(VState) if this instruction is expecting a specific vector state
147
/// before emission.
148
fn expected_vstate(&self) -> Option<&VState> {
149
match self {
150
Inst::Nop0
151
| Inst::Nop4
152
| Inst::BrTable { .. }
153
| Inst::Auipc { .. }
154
| Inst::Fli { .. }
155
| Inst::Lui { .. }
156
| Inst::LoadInlineConst { .. }
157
| Inst::AluRRR { .. }
158
| Inst::FpuRRR { .. }
159
| Inst::AluRRImm12 { .. }
160
| Inst::CsrReg { .. }
161
| Inst::CsrImm { .. }
162
| Inst::Load { .. }
163
| Inst::Store { .. }
164
| Inst::Args { .. }
165
| Inst::Rets { .. }
166
| Inst::Ret { .. }
167
| Inst::Extend { .. }
168
| Inst::Call { .. }
169
| Inst::CallInd { .. }
170
| Inst::ReturnCall { .. }
171
| Inst::ReturnCallInd { .. }
172
| Inst::Jal { .. }
173
| Inst::CondBr { .. }
174
| Inst::LoadExtNameGot { .. }
175
| Inst::LoadExtNameNear { .. }
176
| Inst::LoadExtNameFar { .. }
177
| Inst::ElfTlsGetAddr { .. }
178
| Inst::LoadAddr { .. }
179
| Inst::Mov { .. }
180
| Inst::MovFromPReg { .. }
181
| Inst::Fence { .. }
182
| Inst::EBreak
183
| Inst::Udf { .. }
184
| Inst::FpuRR { .. }
185
| Inst::FpuRRRR { .. }
186
| Inst::Jalr { .. }
187
| Inst::Atomic { .. }
188
| Inst::Select { .. }
189
| Inst::AtomicCas { .. }
190
| Inst::RawData { .. }
191
| Inst::AtomicStore { .. }
192
| Inst::AtomicLoad { .. }
193
| Inst::AtomicRmwLoop { .. }
194
| Inst::TrapIf { .. }
195
| Inst::Unwind { .. }
196
| Inst::DummyUse { .. }
197
| Inst::LabelAddress { .. }
198
| Inst::SequencePoint { .. }
199
| Inst::Popcnt { .. }
200
| Inst::Cltz { .. }
201
| Inst::Brev8 { .. }
202
| Inst::StackProbeLoop { .. } => None,
203
204
// VecSetState does not expect any vstate, rather it updates it.
205
Inst::VecSetState { .. } => None,
206
207
// `vmv` instructions copy a set of registers and ignore vstate.
208
Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,
209
210
Inst::VecAluRR { vstate, .. } |
211
Inst::VecAluRRR { vstate, .. } |
212
Inst::VecAluRRRR { vstate, .. } |
213
Inst::VecAluRImm5 { vstate, .. } |
214
Inst::VecAluRRImm5 { vstate, .. } |
215
Inst::VecAluRRRImm5 { vstate, .. } |
216
// TODO: Unit-stride loads and stores only need the AVL to be correct, not
217
// the full vtype. A future optimization could be to decouple these two when
218
// updating vstate. This would allow us to avoid emitting a VecSetState in
219
// some cases.
220
Inst::VecLoad { vstate, .. }
221
| Inst::VecStore { vstate, .. } => Some(vstate),
222
Inst::EmitIsland { .. } => None,
223
}
224
}
225
}
226
227
impl MachInstEmit for Inst {
228
type State = EmitState;
229
type Info = EmitInfo;
230
231
fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
232
// Check if we need to update the vector state before emitting this instruction
233
if let Some(expected) = self.expected_vstate() {
234
if state.vstate != EmitVState::Known(*expected) {
235
// Update the vector state.
236
Inst::VecSetState {
237
rd: writable_zero_reg(),
238
vstate: *expected,
239
}
240
.emit(sink, emit_info, state);
241
}
242
}
243
244
// N.B.: we *must* not exceed the "worst-case size" used to compute
245
// where to insert islands, except when islands are explicitly triggered
246
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
247
// to allow disabling the check for `JTSequence`, which is always
248
// emitted following an `EmitIsland`.
249
let mut start_off = sink.cur_offset();
250
251
// First try to emit this as a compressed instruction
252
let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);
253
if res.is_none() {
254
// If we can't lets emit it as a normal instruction
255
self.emit_uncompressed(sink, emit_info, state, &mut start_off);
256
}
257
258
// We exclude br_table, call, return_call and try_call from
259
// these checks since they emit their own islands, and thus
260
// are allowed to exceed the worst case size.
261
let emits_own_island = match self {
262
Inst::BrTable { .. }
263
| Inst::ReturnCall { .. }
264
| Inst::ReturnCallInd { .. }
265
| Inst::Call { .. }
266
| Inst::CallInd { .. }
267
| Inst::EmitIsland { .. } => true,
268
_ => false,
269
};
270
if !emits_own_island {
271
let end_off = sink.cur_offset();
272
assert!(
273
(end_off - start_off) <= Inst::worst_case_size(),
274
"Inst:{:?} length:{} worst_case_size:{}",
275
self,
276
end_off - start_off,
277
Inst::worst_case_size()
278
);
279
}
280
}
281
282
fn pretty_print_inst(&self, state: &mut Self::State) -> String {
283
self.print_with_state(state)
284
}
285
}
286
287
impl Inst {
288
/// Tries to emit an instruction as compressed, if we can't return false.
289
fn try_emit_compressed(
290
&self,
291
sink: &mut MachBuffer<Inst>,
292
emit_info: &EmitInfo,
293
state: &mut EmitState,
294
start_off: &mut u32,
295
) -> Option<()> {
296
let has_m = emit_info.isa_flags.has_m();
297
let has_zba = emit_info.isa_flags.has_zba();
298
let has_zbb = emit_info.isa_flags.has_zbb();
299
let has_zca = emit_info.isa_flags.has_zca();
300
let has_zcb = emit_info.isa_flags.has_zcb();
301
let has_zcd = emit_info.isa_flags.has_zcd();
302
303
// Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca
304
// to be enabled, so check it early.
305
if !has_zca {
306
return None;
307
}
308
309
fn reg_is_compressible(r: Reg) -> bool {
310
r.to_real_reg()
311
.map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)
312
.unwrap_or(false)
313
}
314
315
match *self {
316
// C.ADD
317
Inst::AluRRR {
318
alu_op: AluOPRRR::Add,
319
rd,
320
rs1,
321
rs2,
322
} if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
323
&& rs1 != zero_reg()
324
&& rs2 != zero_reg() =>
325
{
326
// Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can
327
// also swap rs1 with rs2 and we get an equivalent instruction. i.e we
328
// can also compress `add rd, rs, rd` into `c.add rd, rs`.
329
let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
330
331
sink.put2(encode_cr_type(CrOp::CAdd, rd, src));
332
}
333
334
// C.MV
335
Inst::AluRRImm12 {
336
alu_op: AluOPRRI::Addi | AluOPRRI::Ori,
337
rd,
338
rs,
339
imm12,
340
} if rd.to_reg() != rs
341
&& rd.to_reg() != zero_reg()
342
&& rs != zero_reg()
343
&& imm12.as_i16() == 0 =>
344
{
345
sink.put2(encode_cr_type(CrOp::CMv, rd, rs));
346
}
347
348
// CA Ops
349
Inst::AluRRR {
350
alu_op:
351
alu_op @ (AluOPRRR::And
352
| AluOPRRR::Or
353
| AluOPRRR::Xor
354
| AluOPRRR::Addw
355
| AluOPRRR::Mul),
356
rd,
357
rs1,
358
rs2,
359
} if (rd.to_reg() == rs1 || rd.to_reg() == rs2)
360
&& reg_is_compressible(rs1)
361
&& reg_is_compressible(rs2) =>
362
{
363
let op = match alu_op {
364
AluOPRRR::And => CaOp::CAnd,
365
AluOPRRR::Or => CaOp::COr,
366
AluOPRRR::Xor => CaOp::CXor,
367
AluOPRRR::Addw => CaOp::CAddw,
368
AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,
369
_ => return None,
370
};
371
// The canonical expansion for these instruction has `rd == rs1`, but
372
// these are all commutative operations, so we can swap the operands.
373
let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };
374
375
sink.put2(encode_ca_type(op, rd, src));
376
}
377
378
// The sub instructions are non commutative, so we can't swap the operands.
379
Inst::AluRRR {
380
alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),
381
rd,
382
rs1,
383
rs2,
384
} if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {
385
let op = match alu_op {
386
AluOPRRR::Sub => CaOp::CSub,
387
AluOPRRR::Subw => CaOp::CSubw,
388
_ => return None,
389
};
390
sink.put2(encode_ca_type(op, rd, rs2));
391
}
392
393
// c.j
394
//
395
// We don't have a separate JAL as that is only available in RV32C
396
Inst::Jal { label } => {
397
sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);
398
sink.add_uncond_branch(*start_off, *start_off + 2, label);
399
sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));
400
}
401
402
// c.jr
403
Inst::Jalr { rd, base, offset }
404
if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>
405
{
406
sink.put2(encode_cr2_type(CrOp::CJr, base));
407
state.clobber_vstate();
408
}
409
410
// c.jalr
411
Inst::Jalr { rd, base, offset }
412
if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>
413
{
414
sink.put2(encode_cr2_type(CrOp::CJalr, base));
415
state.clobber_vstate();
416
}
417
418
// c.ebreak
419
Inst::EBreak => {
420
sink.put2(encode_cr_type(
421
CrOp::CEbreak,
422
writable_zero_reg(),
423
zero_reg(),
424
));
425
}
426
427
// c.unimp
428
Inst::Udf { trap_code } => {
429
sink.add_trap(trap_code);
430
sink.put2(0x0000);
431
}
432
// c.addi16sp
433
//
434
// c.addi16sp shares the opcode with c.lui, but has a destination field of x2.
435
// c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),
436
// where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used
437
// to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp
438
// is only valid when nzimm≠0; the code point with nzimm=0 is reserved.
439
Inst::AluRRImm12 {
440
alu_op: AluOPRRI::Addi,
441
rd,
442
rs,
443
imm12,
444
} if rd.to_reg() == rs
445
&& rs == stack_reg()
446
&& imm12.as_i16() != 0
447
&& (imm12.as_i16() % 16) == 0
448
&& Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>
449
{
450
let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();
451
sink.put2(encode_c_addi16sp(imm6));
452
}
453
454
// c.addi4spn
455
//
456
// c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero
457
// immediate, scaled by 4, to the stack pointer, x2, and writes the result to
458
// rd. This instruction is used to generate pointers to stack-allocated variables
459
// and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;
460
// the code points with nzuimm=0 are reserved.
461
Inst::AluRRImm12 {
462
alu_op: AluOPRRI::Addi,
463
rd,
464
rs,
465
imm12,
466
} if reg_is_compressible(rd.to_reg())
467
&& rs == stack_reg()
468
&& imm12.as_i16() != 0
469
&& (imm12.as_i16() % 4) == 0
470
&& u8::try_from(imm12.as_i16() / 4).is_ok() =>
471
{
472
let imm = u8::try_from(imm12.as_i16() / 4).unwrap();
473
sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));
474
}
475
476
// c.li
477
Inst::AluRRImm12 {
478
alu_op: AluOPRRI::Addi,
479
rd,
480
rs,
481
imm12,
482
} if rd.to_reg() != zero_reg() && rs == zero_reg() => {
483
let imm6 = Imm6::maybe_from_imm12(imm12)?;
484
sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));
485
}
486
487
// c.addi
488
Inst::AluRRImm12 {
489
alu_op: AluOPRRI::Addi,
490
rd,
491
rs,
492
imm12,
493
} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
494
let imm6 = Imm6::maybe_from_imm12(imm12)?;
495
sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));
496
}
497
498
// c.addiw
499
Inst::AluRRImm12 {
500
alu_op: AluOPRRI::Addiw,
501
rd,
502
rs,
503
imm12,
504
} if rd.to_reg() == rs && rs != zero_reg() => {
505
let imm6 = Imm6::maybe_from_imm12(imm12)?;
506
sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));
507
}
508
509
// c.lui
510
//
511
// c.lui loads the non-zero 6-bit immediate field into bits 17–12
512
// of the destination register, clears the bottom 12 bits, and
513
// sign-extends bit 17 into all higher bits of the destination.
514
Inst::Lui { rd, imm: imm20 }
515
if rd.to_reg() != zero_reg()
516
&& rd.to_reg() != stack_reg()
517
&& imm20.as_i32() != 0 =>
518
{
519
// Check that the top bits are sign extended
520
let imm = imm20.as_i32() << 14 >> 14;
521
if imm != imm20.as_i32() {
522
return None;
523
}
524
let imm6 = Imm6::maybe_from_i32(imm)?;
525
sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));
526
}
527
528
// c.slli
529
Inst::AluRRImm12 {
530
alu_op: AluOPRRI::Slli,
531
rd,
532
rs,
533
imm12,
534
} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {
535
// The shift amount is unsigned, but we encode it as signed.
536
let shift = imm12.as_i16() & 0x3f;
537
let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
538
sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));
539
}
540
541
// c.srli / c.srai
542
Inst::AluRRImm12 {
543
alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),
544
rd,
545
rs,
546
imm12,
547
} if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {
548
let op = match op {
549
AluOPRRI::Srli => CbOp::CSrli,
550
AluOPRRI::Srai => CbOp::CSrai,
551
_ => unreachable!(),
552
};
553
554
// The shift amount is unsigned, but we encode it as signed.
555
let shift = imm12.as_i16() & 0x3f;
556
let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();
557
sink.put2(encode_cb_type(op, rd, imm6));
558
}
559
560
// c.zextb
561
//
562
// This is an alias for `andi rd, rd, 0xff`
563
Inst::AluRRImm12 {
564
alu_op: AluOPRRI::Andi,
565
rd,
566
rs,
567
imm12,
568
} if has_zcb
569
&& rd.to_reg() == rs
570
&& reg_is_compressible(rs)
571
&& imm12.as_i16() == 0xff =>
572
{
573
sink.put2(encode_cszn_type(CsznOp::CZextb, rd));
574
}
575
576
// c.andi
577
Inst::AluRRImm12 {
578
alu_op: AluOPRRI::Andi,
579
rd,
580
rs,
581
imm12,
582
} if rd.to_reg() == rs && reg_is_compressible(rs) => {
583
let imm6 = Imm6::maybe_from_imm12(imm12)?;
584
sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));
585
}
586
587
// Stack Based Loads
588
Inst::Load {
589
rd,
590
op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),
591
from,
592
flags,
593
} if from.get_base_register() == Some(stack_reg())
594
&& (from.get_offset_with_state(state) % op.size()) == 0 =>
595
{
596
// We encode the offset in multiples of the load size.
597
let offset = from.get_offset_with_state(state);
598
let imm6 = u8::try_from(offset / op.size())
599
.ok()
600
.and_then(Uimm6::maybe_from_u8)?;
601
602
// Some additional constraints on these instructions.
603
//
604
// Integer loads are not allowed to target x0, but floating point loads
605
// are, since f0 is not a special register.
606
//
607
// Floating point loads are not included in the base Zca extension
608
// but in a separate Zcd extension. Both of these are part of the C Extension.
609
let rd_is_zero = rd.to_reg() == zero_reg();
610
let op = match op {
611
LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,
612
LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,
613
LoadOP::Fld if has_zcd => CiOp::CFldsp,
614
_ => return None,
615
};
616
617
if let Some(trap_code) = flags.trap_code() {
618
// Register the offset at which the actual load instruction starts.
619
sink.add_trap(trap_code);
620
}
621
sink.put2(encode_ci_sp_load(op, rd, imm6));
622
}
623
624
// Regular Loads
625
Inst::Load {
626
rd,
627
op:
628
op
629
@ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),
630
from,
631
flags,
632
} if reg_is_compressible(rd.to_reg())
633
&& from
634
.get_base_register()
635
.map(reg_is_compressible)
636
.unwrap_or(false)
637
&& (from.get_offset_with_state(state) % op.size()) == 0 =>
638
{
639
let base = from.get_base_register().unwrap();
640
641
// We encode the offset in multiples of the store size.
642
let offset = from.get_offset_with_state(state);
643
let offset = u8::try_from(offset / op.size()).ok()?;
644
645
// We mix two different formats here.
646
//
647
// c.lw / c.ld / c.fld instructions are available in the standard Zca
648
// extension using the CL format.
649
//
650
// c.lbu / c.lhu / c.lh are only available in the Zcb extension and
651
// are also encoded differently. Technically they each have a different
652
// format, but they are similar enough that we can group them.
653
let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);
654
let encoded = if is_zcb_load {
655
if !has_zcb {
656
return None;
657
}
658
659
let op = match op {
660
LoadOP::Lbu => ZcbMemOp::CLbu,
661
LoadOP::Lhu => ZcbMemOp::CLhu,
662
LoadOP::Lh => ZcbMemOp::CLh,
663
_ => unreachable!(),
664
};
665
666
// Byte stores & loads have 2 bits of immediate offset. Halfword stores
667
// and loads only have 1 bit.
668
let imm2 = Uimm2::maybe_from_u8(offset)?;
669
if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
670
return None;
671
}
672
673
encode_zcbmem_load(op, rd, base, imm2)
674
} else {
675
// Floating point loads are not included in the base Zca extension
676
// but in a separate Zcd extension. Both of these are part of the C Extension.
677
let op = match op {
678
LoadOP::Lw => ClOp::CLw,
679
LoadOP::Ld => ClOp::CLd,
680
LoadOP::Fld if has_zcd => ClOp::CFld,
681
_ => return None,
682
};
683
let imm5 = Uimm5::maybe_from_u8(offset)?;
684
685
encode_cl_type(op, rd, base, imm5)
686
};
687
688
if let Some(trap_code) = flags.trap_code() {
689
// Register the offset at which the actual load instruction starts.
690
sink.add_trap(trap_code);
691
}
692
sink.put2(encoded);
693
}
694
695
// Stack Based Stores
696
Inst::Store {
697
src,
698
op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),
699
to,
700
flags,
701
} if to.get_base_register() == Some(stack_reg())
702
&& (to.get_offset_with_state(state) % op.size()) == 0 =>
703
{
704
// We encode the offset in multiples of the store size.
705
let offset = to.get_offset_with_state(state);
706
let imm6 = u8::try_from(offset / op.size())
707
.ok()
708
.and_then(Uimm6::maybe_from_u8)?;
709
710
// Floating point stores are not included in the base Zca extension
711
// but in a separate Zcd extension. Both of these are part of the C Extension.
712
let op = match op {
713
StoreOP::Sw => CssOp::CSwsp,
714
StoreOP::Sd => CssOp::CSdsp,
715
StoreOP::Fsd if has_zcd => CssOp::CFsdsp,
716
_ => return None,
717
};
718
719
if let Some(trap_code) = flags.trap_code() {
720
// Register the offset at which the actual load instruction starts.
721
sink.add_trap(trap_code);
722
}
723
sink.put2(encode_css_type(op, src, imm6));
724
}
725
726
// Regular Stores
727
Inst::Store {
728
src,
729
op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),
730
to,
731
flags,
732
} if reg_is_compressible(src)
733
&& to
734
.get_base_register()
735
.map(reg_is_compressible)
736
.unwrap_or(false)
737
&& (to.get_offset_with_state(state) % op.size()) == 0 =>
738
{
739
let base = to.get_base_register().unwrap();
740
741
// We encode the offset in multiples of the store size.
742
let offset = to.get_offset_with_state(state);
743
let offset = u8::try_from(offset / op.size()).ok()?;
744
745
// We mix two different formats here.
746
//
747
// c.sw / c.sd / c.fsd instructions are available in the standard Zca
748
// extension using the CL format.
749
//
750
// c.sb / c.sh are only available in the Zcb extension and are also
751
// encoded differently.
752
let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);
753
let encoded = if is_zcb_store {
754
if !has_zcb {
755
return None;
756
}
757
758
let op = match op {
759
StoreOP::Sh => ZcbMemOp::CSh,
760
StoreOP::Sb => ZcbMemOp::CSb,
761
_ => unreachable!(),
762
};
763
764
// Byte stores & loads have 2 bits of immediate offset. Halfword stores
765
// and loads only have 1 bit.
766
let imm2 = Uimm2::maybe_from_u8(offset)?;
767
if (offset & !((1 << op.imm_bits()) - 1)) != 0 {
768
return None;
769
}
770
771
encode_zcbmem_store(op, src, base, imm2)
772
} else {
773
// Floating point stores are not included in the base Zca extension
774
// but in a separate Zcd extension. Both of these are part of the C Extension.
775
let op = match op {
776
StoreOP::Sw => CsOp::CSw,
777
StoreOP::Sd => CsOp::CSd,
778
StoreOP::Fsd if has_zcd => CsOp::CFsd,
779
_ => return None,
780
};
781
let imm5 = Uimm5::maybe_from_u8(offset)?;
782
783
encode_cs_type(op, src, base, imm5)
784
};
785
786
if let Some(trap_code) = flags.trap_code() {
787
// Register the offset at which the actual load instruction starts.
788
sink.add_trap(trap_code);
789
}
790
sink.put2(encoded);
791
}
792
793
// c.not
794
//
795
// This is an alias for `xori rd, rd, -1`
796
Inst::AluRRImm12 {
797
alu_op: AluOPRRI::Xori,
798
rd,
799
rs,
800
imm12,
801
} if has_zcb
802
&& rd.to_reg() == rs
803
&& reg_is_compressible(rs)
804
&& imm12.as_i16() == -1 =>
805
{
806
sink.put2(encode_cszn_type(CsznOp::CNot, rd));
807
}
808
809
// c.sext.b / c.sext.h / c.zext.h
810
//
811
// These are all the extend instructions present in `Zcb`, they
812
// also require `Zbb` since they aren't available in the base ISA.
813
Inst::AluRRImm12 {
814
alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),
815
rd,
816
rs,
817
imm12,
818
} if has_zcb
819
&& has_zbb
820
&& rd.to_reg() == rs
821
&& reg_is_compressible(rs)
822
&& imm12.as_i16() == 0 =>
823
{
824
let op = match alu_op {
825
AluOPRRI::Sextb => CsznOp::CSextb,
826
AluOPRRI::Sexth => CsznOp::CSexth,
827
AluOPRRI::Zexth => CsznOp::CZexth,
828
_ => unreachable!(),
829
};
830
sink.put2(encode_cszn_type(op, rd));
831
}
832
833
// c.zext.w
834
//
835
// This is an alias for `add.uw rd, rd, zero`
836
Inst::AluRRR {
837
alu_op: AluOPRRR::Adduw,
838
rd,
839
rs1,
840
rs2,
841
} if has_zcb
842
&& has_zba
843
&& rd.to_reg() == rs1
844
&& reg_is_compressible(rs1)
845
&& rs2 == zero_reg() =>
846
{
847
sink.put2(encode_cszn_type(CsznOp::CZextw, rd));
848
}
849
850
_ => return None,
851
}
852
853
return Some(());
854
}
855
856
fn emit_uncompressed(
857
&self,
858
sink: &mut MachBuffer<Inst>,
859
emit_info: &EmitInfo,
860
state: &mut EmitState,
861
start_off: &mut u32,
862
) {
863
match self {
864
&Inst::Nop0 => {
865
// do nothing
866
}
867
// Addi x0, x0, 0
868
&Inst::Nop4 => {
869
let x = Inst::AluRRImm12 {
870
alu_op: AluOPRRI::Addi,
871
rd: Writable::from_reg(zero_reg()),
872
rs: zero_reg(),
873
imm12: Imm12::ZERO,
874
};
875
x.emit(sink, emit_info, state)
876
}
877
&Inst::RawData { ref data } => {
878
// Right now we only put a u32 or u64 in this instruction.
879
// It is not very long, no need to check if need `emit_island`.
880
// If data is very long , this is a bug because RawData is typically
881
// use to load some data and rely on some position in the code stream.
882
// and we may exceed `Inst::worst_case_size`.
883
// for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.
884
sink.put_data(&data[..]);
885
}
886
&Inst::Lui { rd, ref imm } => {
887
let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);
888
sink.put4(x);
889
}
890
&Inst::Fli { rd, width, imm } => {
891
sink.put4(encode_fli(width, imm, rd));
892
}
893
&Inst::LoadInlineConst { rd, ty, imm } => {
894
let data = &imm.to_le_bytes()[..ty.bytes() as usize];
895
896
let label_data: MachLabel = sink.get_label();
897
let label_end: MachLabel = sink.get_label();
898
899
// Load into rd
900
Inst::Load {
901
rd,
902
op: LoadOP::from_type(ty),
903
flags: MemFlags::new(),
904
from: AMode::Label(label_data),
905
}
906
.emit(sink, emit_info, state);
907
908
// Jump over the inline pool
909
Inst::gen_jump(label_end).emit(sink, emit_info, state);
910
911
// Emit the inline data
912
sink.bind_label(label_data, &mut state.ctrl_plane);
913
Inst::RawData { data: data.into() }.emit(sink, emit_info, state);
914
915
sink.bind_label(label_end, &mut state.ctrl_plane);
916
}
917
&Inst::FpuRR {
918
alu_op,
919
width,
920
frm,
921
rd,
922
rs,
923
} => {
924
if alu_op.is_convert_to_int() {
925
sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);
926
}
927
sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));
928
}
929
&Inst::FpuRRRR {
930
alu_op,
931
rd,
932
rs1,
933
rs2,
934
rs3,
935
frm,
936
width,
937
} => {
938
sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));
939
}
940
&Inst::FpuRRR {
941
alu_op,
942
width,
943
frm,
944
rd,
945
rs1,
946
rs2,
947
} => {
948
sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));
949
}
950
&Inst::Unwind { ref inst } => {
951
sink.add_unwind(inst.clone());
952
}
953
&Inst::DummyUse { .. } => {
954
// This has already been handled by Inst::allocate.
955
}
956
&Inst::AluRRR {
957
alu_op,
958
rd,
959
rs1,
960
rs2,
961
} => {
962
let (rs1, rs2) = if alu_op.reverse_rs() {
963
(rs2, rs1)
964
} else {
965
(rs1, rs2)
966
};
967
968
sink.put4(encode_r_type(
969
alu_op.op_code(),
970
rd,
971
alu_op.funct3(),
972
rs1,
973
rs2,
974
alu_op.funct7(),
975
));
976
}
977
&Inst::AluRRImm12 {
978
alu_op,
979
rd,
980
rs,
981
imm12,
982
} => {
983
let x = alu_op.op_code()
984
| reg_to_gpr_num(rd.to_reg()) << 7
985
| alu_op.funct3() << 12
986
| reg_to_gpr_num(rs) << 15
987
| alu_op.imm12(imm12) << 20;
988
sink.put4(x);
989
}
990
&Inst::CsrReg { op, rd, rs, csr } => {
991
sink.put4(encode_csr_reg(op, rd, rs, csr));
992
}
993
&Inst::CsrImm { op, rd, csr, imm } => {
994
sink.put4(encode_csr_imm(op, rd, csr, imm));
995
}
996
&Inst::Load {
997
rd,
998
op: LoadOP::Flh,
999
from,
1000
flags,
1001
} if !emit_info.isa_flags.has_zfhmin() => {
1002
// flh unavailable, use an integer load instead
1003
Inst::Load {
1004
rd: writable_spilltmp_reg(),
1005
op: LoadOP::Lh,
1006
flags,
1007
from,
1008
}
1009
.emit(sink, emit_info, state);
1010
// NaN-box the `f16` before loading it into the floating-point
1011
// register with a 32-bit `fmv`.
1012
Inst::Lui {
1013
rd: writable_spilltmp_reg2(),
1014
imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),
1015
}
1016
.emit(sink, emit_info, state);
1017
Inst::AluRRR {
1018
alu_op: AluOPRRR::Or,
1019
rd: writable_spilltmp_reg(),
1020
rs1: spilltmp_reg(),
1021
rs2: spilltmp_reg2(),
1022
}
1023
.emit(sink, emit_info, state);
1024
Inst::FpuRR {
1025
alu_op: FpuOPRR::FmvFmtX,
1026
width: FpuOPWidth::S,
1027
frm: FRM::RNE,
1028
rd,
1029
rs: spilltmp_reg(),
1030
}
1031
.emit(sink, emit_info, state);
1032
}
1033
&Inst::Load {
1034
rd,
1035
op,
1036
from,
1037
flags,
1038
} => {
1039
let base = from.get_base_register();
1040
let offset = from.get_offset_with_state(state);
1041
let offset_imm12 = Imm12::maybe_from_i64(offset);
1042
let label = from.get_label_with_sink(sink);
1043
1044
let (addr, imm12) = match (base, offset_imm12, label) {
1045
// When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.
1046
(Some(base), Some(imm12), None) => (base, imm12),
1047
1048
// Otherwise, if the offset does not fit into a imm12, we need to materialize it into a
1049
// register and load from that.
1050
(Some(_), None, None) => {
1051
let tmp = writable_spilltmp_reg();
1052
Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);
1053
(tmp.to_reg(), Imm12::ZERO)
1054
}
1055
1056
// If the AMode contains a label we can emit an internal relocation that gets
1057
// resolved with the correct address later.
1058
(None, Some(imm), Some(label)) => {
1059
debug_assert_eq!(imm.as_i16(), 0);
1060
1061
// Get the current PC.
1062
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1063
Inst::Auipc {
1064
rd,
1065
imm: Imm20::ZERO,
1066
}
1067
.emit_uncompressed(sink, emit_info, state, start_off);
1068
1069
// Emit a relocation for the load. This patches the offset into the instruction.
1070
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1071
1072
// Imm12 here is meaningless since it's going to get replaced.
1073
(rd.to_reg(), Imm12::ZERO)
1074
}
1075
1076
// These cases are impossible with the current AModes that we have. We either
1077
// always have a register, or always have a label. Never both, and never neither.
1078
(None, None, None)
1079
| (None, Some(_), None)
1080
| (Some(_), None, Some(_))
1081
| (Some(_), Some(_), Some(_))
1082
| (None, None, Some(_)) => {
1083
unreachable!("Invalid load address")
1084
}
1085
};
1086
1087
if let Some(trap_code) = flags.trap_code() {
1088
// Register the offset at which the actual load instruction starts.
1089
sink.add_trap(trap_code);
1090
}
1091
1092
sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));
1093
}
1094
&Inst::Store {
1095
op: StoreOP::Fsh,
1096
src,
1097
flags,
1098
to,
1099
} if !emit_info.isa_flags.has_zfhmin() => {
1100
// fsh unavailable, use an integer store instead
1101
Inst::FpuRR {
1102
alu_op: FpuOPRR::FmvXFmt,
1103
width: FpuOPWidth::S,
1104
frm: FRM::RNE,
1105
rd: writable_spilltmp_reg(),
1106
rs: src,
1107
}
1108
.emit(sink, emit_info, state);
1109
Inst::Store {
1110
to,
1111
op: StoreOP::Sh,
1112
flags,
1113
src: spilltmp_reg(),
1114
}
1115
.emit(sink, emit_info, state);
1116
}
1117
&Inst::Store { op, src, flags, to } => {
1118
let base = to.get_base_register();
1119
let offset = to.get_offset_with_state(state);
1120
let offset_imm12 = Imm12::maybe_from_i64(offset);
1121
1122
let (addr, imm12) = match (base, offset_imm12) {
1123
// If the offset fits into an imm12 we can directly encode it.
1124
(Some(base), Some(imm12)) => (base, imm12),
1125
// Otherwise load the address it into a reg and load from it.
1126
_ => {
1127
let tmp = writable_spilltmp_reg();
1128
Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);
1129
(tmp.to_reg(), Imm12::ZERO)
1130
}
1131
};
1132
1133
if let Some(trap_code) = flags.trap_code() {
1134
// Register the offset at which the actual load instruction starts.
1135
sink.add_trap(trap_code);
1136
}
1137
1138
sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));
1139
}
1140
&Inst::Args { .. } | &Inst::Rets { .. } => {
1141
// Nothing: this is a pseudoinstruction that serves
1142
// only to constrain registers at a certain point.
1143
}
1144
&Inst::Ret {} => {
1145
// RISC-V does not have a dedicated ret instruction, instead we emit the equivalent
1146
// `jalr x0, x1, 0` that jumps to the return address.
1147
Inst::Jalr {
1148
rd: writable_zero_reg(),
1149
base: link_reg(),
1150
offset: Imm12::ZERO,
1151
}
1152
.emit(sink, emit_info, state);
1153
}
1154
1155
&Inst::Extend {
1156
rd,
1157
rn,
1158
signed,
1159
from_bits,
1160
to_bits: _to_bits,
1161
} => {
1162
let mut insts = SmallInstVec::new();
1163
let shift_bits = (64 - from_bits) as i16;
1164
let is_u8 = || from_bits == 8 && signed == false;
1165
if is_u8() {
1166
// special for u8.
1167
insts.push(Inst::AluRRImm12 {
1168
alu_op: AluOPRRI::Andi,
1169
rd,
1170
rs: rn,
1171
imm12: Imm12::from_i16(255),
1172
});
1173
} else {
1174
insts.push(Inst::AluRRImm12 {
1175
alu_op: AluOPRRI::Slli,
1176
rd,
1177
rs: rn,
1178
imm12: Imm12::from_i16(shift_bits),
1179
});
1180
insts.push(Inst::AluRRImm12 {
1181
alu_op: if signed {
1182
AluOPRRI::Srai
1183
} else {
1184
AluOPRRI::Srli
1185
},
1186
rd,
1187
rs: rd.to_reg(),
1188
imm12: Imm12::from_i16(shift_bits),
1189
});
1190
}
1191
insts
1192
.into_iter()
1193
.for_each(|i| i.emit(sink, emit_info, state));
1194
}
1195
1196
&Inst::Call { ref info } => {
1197
sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1198
1199
let start = sink.cur_offset();
1200
Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
1201
.into_iter()
1202
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1203
1204
if let Some(s) = state.take_stack_map() {
1205
let offset = sink.cur_offset();
1206
sink.push_user_stack_map(state, offset, s);
1207
}
1208
1209
if let Some(try_call) = info.try_call_info.as_ref() {
1210
sink.add_try_call_site(
1211
Some(state.frame_layout.sp_to_fp()),
1212
try_call.exception_handlers(&state.frame_layout),
1213
);
1214
} else {
1215
sink.add_call_site();
1216
}
1217
1218
let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1219
if callee_pop_size > 0 {
1220
for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1221
inst.emit(sink, emit_info, state);
1222
}
1223
}
1224
1225
if info.patchable {
1226
sink.add_patchable_call_site(sink.cur_offset() - start);
1227
} else {
1228
// Load any stack-carried return values.
1229
info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1230
state.frame_layout().stackslots_size,
1231
|inst| inst.emit(sink, emit_info, state),
1232
|needed_space| Some(Inst::EmitIsland { needed_space }),
1233
);
1234
}
1235
1236
// If this is a try-call, jump to the continuation
1237
// (normal-return) block.
1238
if let Some(try_call) = info.try_call_info.as_ref() {
1239
let jmp = Inst::Jal {
1240
label: try_call.continuation,
1241
};
1242
jmp.emit(sink, emit_info, state);
1243
}
1244
1245
*start_off = sink.cur_offset();
1246
}
1247
&Inst::CallInd { ref info } => {
1248
Inst::Jalr {
1249
rd: writable_link_reg(),
1250
base: info.dest,
1251
offset: Imm12::ZERO,
1252
}
1253
.emit(sink, emit_info, state);
1254
1255
if let Some(s) = state.take_stack_map() {
1256
let offset = sink.cur_offset();
1257
sink.push_user_stack_map(state, offset, s);
1258
}
1259
1260
if let Some(try_call) = info.try_call_info.as_ref() {
1261
sink.add_try_call_site(
1262
Some(state.frame_layout.sp_to_fp()),
1263
try_call.exception_handlers(&state.frame_layout),
1264
);
1265
} else {
1266
sink.add_call_site();
1267
}
1268
1269
let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
1270
if callee_pop_size > 0 {
1271
for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
1272
inst.emit(sink, emit_info, state);
1273
}
1274
}
1275
1276
// Load any stack-carried return values.
1277
info.emit_retval_loads::<Riscv64MachineDeps, _, _>(
1278
state.frame_layout().stackslots_size,
1279
|inst| inst.emit(sink, emit_info, state),
1280
|needed_space| Some(Inst::EmitIsland { needed_space }),
1281
);
1282
1283
// If this is a try-call, jump to the continuation
1284
// (normal-return) block.
1285
if let Some(try_call) = info.try_call_info.as_ref() {
1286
let jmp = Inst::Jal {
1287
label: try_call.continuation,
1288
};
1289
jmp.emit(sink, emit_info, state);
1290
}
1291
1292
*start_off = sink.cur_offset();
1293
}
1294
1295
&Inst::ReturnCall { ref info } => {
1296
emit_return_call_common_sequence(sink, emit_info, state, info);
1297
1298
sink.add_call_site();
1299
sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
1300
Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)
1301
.into_iter()
1302
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1303
}
1304
1305
&Inst::ReturnCallInd { ref info } => {
1306
emit_return_call_common_sequence(sink, emit_info, state, &info);
1307
1308
Inst::Jalr {
1309
rd: writable_zero_reg(),
1310
base: info.dest,
1311
offset: Imm12::ZERO,
1312
}
1313
.emit(sink, emit_info, state);
1314
}
1315
&Inst::Jal { label } => {
1316
sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);
1317
sink.add_uncond_branch(*start_off, *start_off + 4, label);
1318
sink.put4(0b1101111);
1319
state.clobber_vstate();
1320
}
1321
&Inst::CondBr {
1322
taken,
1323
not_taken,
1324
kind,
1325
} => {
1326
match taken {
1327
CondBrTarget::Label(label) => {
1328
let code = kind.emit();
1329
let code_inverse = kind.inverse().emit().to_le_bytes();
1330
sink.use_label_at_offset(*start_off, label, LabelUse::B12);
1331
sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);
1332
sink.put4(code);
1333
}
1334
CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),
1335
}
1336
1337
match not_taken {
1338
CondBrTarget::Label(label) => {
1339
Inst::gen_jump(label).emit(sink, emit_info, state)
1340
}
1341
CondBrTarget::Fallthrough => {}
1342
};
1343
}
1344
1345
&Inst::Mov { rd, rm, ty } => {
1346
debug_assert_eq!(rd.to_reg().class(), rm.class());
1347
if rd.to_reg() == rm {
1348
return;
1349
}
1350
1351
match rm.class() {
1352
RegClass::Int => Inst::AluRRImm12 {
1353
alu_op: AluOPRRI::Addi,
1354
rd,
1355
rs: rm,
1356
imm12: Imm12::ZERO,
1357
},
1358
RegClass::Float => Inst::FpuRRR {
1359
alu_op: FpuOPRRR::Fsgnj,
1360
width: FpuOPWidth::try_from(ty).unwrap(),
1361
frm: FRM::RNE,
1362
rd,
1363
rs1: rm,
1364
rs2: rm,
1365
},
1366
RegClass::Vector => Inst::VecAluRRImm5 {
1367
op: VecAluOpRRImm5::VmvrV,
1368
vd: rd,
1369
vs2: rm,
1370
// Imm 0 means copy 1 register.
1371
imm: Imm5::maybe_from_i8(0).unwrap(),
1372
mask: VecOpMasking::Disabled,
1373
// Vstate for this instruction is ignored.
1374
vstate: VState::from_type(ty),
1375
},
1376
}
1377
.emit(sink, emit_info, state);
1378
}
1379
1380
&Inst::MovFromPReg { rd, rm } => {
1381
Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);
1382
}
1383
1384
&Inst::BrTable {
1385
index,
1386
tmp1,
1387
tmp2,
1388
ref targets,
1389
} => {
1390
let ext_index = writable_spilltmp_reg();
1391
1392
let label_compute_target = sink.get_label();
1393
1394
// The default target is passed in as the 0th element of `targets`
1395
// separate it here for clarity.
1396
let default_target = targets[0];
1397
let targets = &targets[1..];
1398
1399
// We are going to potentially emit a large amount of instructions, so ensure that we emit an island
1400
// now if we need one.
1401
//
1402
// The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.
1403
// Check if we need to emit a jump table here to support that jump.
1404
let inst_count = 12 + (targets.len() * 2);
1405
let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;
1406
if sink.island_needed(distance) {
1407
let jump_around_label = sink.get_label();
1408
Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
1409
sink.emit_island(distance + 4, &mut state.ctrl_plane);
1410
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
1411
}
1412
1413
// We emit a bounds check on the index, if the index is larger than the number of
1414
// jump table entries, we jump to the default block. Otherwise we compute a jump
1415
// offset by multiplying the index by 8 (the size of each entry) and then jump to
1416
// that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.
1417
//
1418
// Build the following sequence:
1419
//
1420
// extend_index:
1421
// zext.w ext_index, index
1422
// bounds_check:
1423
// li tmp, n_labels
1424
// bltu ext_index, tmp, compute_target
1425
// jump_to_default_block:
1426
// auipc pc, 0
1427
// jalr zero, pc, default_block
1428
// compute_target:
1429
// auipc pc, 0
1430
// slli tmp, ext_index, 3
1431
// add pc, pc, tmp
1432
// jalr zero, pc, 0x10
1433
// jump_table:
1434
// ; This repeats for each entry in the jumptable
1435
// auipc pc, 0
1436
// jalr zero, pc, block_target
1437
1438
// Extend the index to 64 bits.
1439
//
1440
// This prevents us branching on the top 32 bits of the index, which
1441
// are undefined.
1442
Inst::Extend {
1443
rd: ext_index,
1444
rn: index,
1445
signed: false,
1446
from_bits: 32,
1447
to_bits: 64,
1448
}
1449
.emit(sink, emit_info, state);
1450
1451
// Bounds check.
1452
//
1453
// Check if the index passed in is larger than the number of jumptable
1454
// entries that we have. If it is, we fallthrough to a jump into the
1455
// default block.
1456
Inst::load_constant_u32(tmp2, targets.len() as u64)
1457
.iter()
1458
.for_each(|i| i.emit(sink, emit_info, state));
1459
Inst::CondBr {
1460
taken: CondBrTarget::Label(label_compute_target),
1461
not_taken: CondBrTarget::Fallthrough,
1462
kind: IntegerCompare {
1463
kind: IntCC::UnsignedLessThan,
1464
rs1: ext_index.to_reg(),
1465
rs2: tmp2.to_reg(),
1466
},
1467
}
1468
.emit(sink, emit_info, state);
1469
1470
sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);
1471
Inst::construct_auipc_and_jalr(None, tmp2, 0)
1472
.iter()
1473
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1474
1475
// Compute the jump table offset.
1476
// We need to emit a PC relative offset,
1477
sink.bind_label(label_compute_target, &mut state.ctrl_plane);
1478
1479
// Get the current PC.
1480
Inst::Auipc {
1481
rd: tmp1,
1482
imm: Imm20::ZERO,
1483
}
1484
.emit_uncompressed(sink, emit_info, state, start_off);
1485
1486
// These instructions must be emitted as uncompressed since we
1487
// are manually computing the offset from the PC.
1488
1489
// Multiply the index by 8, since that is the size in
1490
// bytes of each jump table entry
1491
Inst::AluRRImm12 {
1492
alu_op: AluOPRRI::Slli,
1493
rd: tmp2,
1494
rs: ext_index.to_reg(),
1495
imm12: Imm12::from_i16(3),
1496
}
1497
.emit_uncompressed(sink, emit_info, state, start_off);
1498
1499
// Calculate the base of the jump, PC + the offset from above.
1500
Inst::AluRRR {
1501
alu_op: AluOPRRR::Add,
1502
rd: tmp1,
1503
rs1: tmp1.to_reg(),
1504
rs2: tmp2.to_reg(),
1505
}
1506
.emit_uncompressed(sink, emit_info, state, start_off);
1507
1508
// Jump to the middle of the jump table.
1509
// We add a 16 byte offset here, since we used 4 instructions
1510
// since the AUIPC that was used to get the PC.
1511
Inst::Jalr {
1512
rd: writable_zero_reg(),
1513
base: tmp1.to_reg(),
1514
offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),
1515
}
1516
.emit_uncompressed(sink, emit_info, state, start_off);
1517
1518
// Emit the jump table.
1519
//
1520
// Each entry is a auipc + jalr to the target block. We also start with a island
1521
// if necessary.
1522
1523
// Emit the jumps back to back
1524
for target in targets.iter() {
1525
sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);
1526
1527
Inst::construct_auipc_and_jalr(None, tmp2, 0)
1528
.iter()
1529
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
1530
}
1531
1532
// We've just emitted an island that is safe up to *here*.
1533
// Mark it as such so that we don't needlessly emit additional islands.
1534
*start_off = sink.cur_offset();
1535
}
1536
1537
&Inst::Atomic {
1538
op,
1539
rd,
1540
addr,
1541
src,
1542
amo,
1543
} => {
1544
// TODO: get flags from original CLIF atomic instruction
1545
let flags = MemFlags::new();
1546
if let Some(trap_code) = flags.trap_code() {
1547
sink.add_trap(trap_code);
1548
}
1549
let x = op.op_code()
1550
| reg_to_gpr_num(rd.to_reg()) << 7
1551
| op.funct3() << 12
1552
| reg_to_gpr_num(addr) << 15
1553
| reg_to_gpr_num(src) << 20
1554
| op.funct7(amo) << 25;
1555
1556
sink.put4(x);
1557
}
1558
&Inst::Fence { pred, succ } => {
1559
let x = 0b0001111
1560
| 0b00000 << 7
1561
| 0b000 << 12
1562
| 0b00000 << 15
1563
| (succ as u32) << 20
1564
| (pred as u32) << 24;
1565
1566
sink.put4(x);
1567
}
1568
&Inst::Auipc { rd, imm } => {
1569
sink.put4(enc_auipc(rd, imm));
1570
}
1571
1572
&Inst::LoadAddr { rd, mem } => {
1573
let base = mem.get_base_register();
1574
let offset = mem.get_offset_with_state(state);
1575
let offset_imm12 = Imm12::maybe_from_i64(offset);
1576
1577
match (mem, base, offset_imm12) {
1578
(_, Some(rs), Some(imm12)) => {
1579
Inst::AluRRImm12 {
1580
alu_op: AluOPRRI::Addi,
1581
rd,
1582
rs,
1583
imm12,
1584
}
1585
.emit(sink, emit_info, state);
1586
}
1587
(_, Some(rs), None) => {
1588
let mut insts = Inst::load_constant_u64(rd, offset as u64);
1589
insts.push(Inst::AluRRR {
1590
alu_op: AluOPRRR::Add,
1591
rd,
1592
rs1: rd.to_reg(),
1593
rs2: rs,
1594
});
1595
insts
1596
.into_iter()
1597
.for_each(|inst| inst.emit(sink, emit_info, state));
1598
}
1599
(AMode::Const(addr), None, _) => {
1600
// Get an address label for the constant and recurse.
1601
let label = sink.get_label_for_constant(addr);
1602
Inst::LoadAddr {
1603
rd,
1604
mem: AMode::Label(label),
1605
}
1606
.emit(sink, emit_info, state);
1607
}
1608
(AMode::Label(label), None, _) => {
1609
// Get the current PC.
1610
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);
1611
let inst = Inst::Auipc {
1612
rd,
1613
imm: Imm20::ZERO,
1614
};
1615
inst.emit_uncompressed(sink, emit_info, state, start_off);
1616
1617
// Emit an add to the address with a relocation.
1618
// This later gets patched up with the correct offset.
1619
sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);
1620
Inst::AluRRImm12 {
1621
alu_op: AluOPRRI::Addi,
1622
rd,
1623
rs: rd.to_reg(),
1624
imm12: Imm12::ZERO,
1625
}
1626
.emit_uncompressed(sink, emit_info, state, start_off);
1627
}
1628
(amode, _, _) => {
1629
unimplemented!("LoadAddr: {:?}", amode);
1630
}
1631
}
1632
}
1633
1634
&Inst::Select {
1635
ref dst,
1636
condition,
1637
ref x,
1638
ref y,
1639
} => {
1640
// The general form for this select is the following:
1641
//
1642
// mv rd, x
1643
// b{cond} rcond, label_end
1644
// mv rd, y
1645
// label_end:
1646
// ... etc
1647
//
1648
// This is built on the assumption that moves are cheap, but branches and jumps
1649
// are not. So with this format we always avoid one jump instruction at the expense
1650
// of an unconditional move.
1651
//
1652
// We also perform another optimization here. If the destination register is the same
1653
// as one of the input registers, we can avoid emitting the first unconditional move
1654
// and emit just the branch and the second move.
1655
//
1656
// To make sure that this happens as often as possible, we also try to invert the
1657
// condition, so that if either of the input registers are the same as the destination
1658
// we avoid that move.
1659
1660
let label_end = sink.get_label();
1661
1662
let xregs = x.regs();
1663
let yregs = y.regs();
1664
let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
1665
let condregs = condition.regs();
1666
1667
// We are going to write to the destination register before evaluating
1668
// the condition, so we need to make sure that the destination register
1669
// is not one of the condition registers.
1670
//
1671
// This should never happen, since hopefully the regalloc constraints
1672
// for this register are set up correctly.
1673
debug_assert_ne!(dstregs, condregs);
1674
1675
// Check if we can invert the condition and avoid moving the y registers into
1676
// the destination. This allows us to only emit the branch and one of the moves.
1677
let (uncond_move, cond_move, condition) = if yregs == dstregs {
1678
(yregs, xregs, condition.inverse())
1679
} else {
1680
(xregs, yregs, condition)
1681
};
1682
1683
// Unconditionally move one of the values to the destination register.
1684
//
1685
// These moves may not end up being emitted if the source and
1686
// destination registers are the same. That logic is built into
1687
// the emit function for `Inst::Mov`.
1688
for i in gen_moves(dst.regs(), uncond_move) {
1689
i.emit(sink, emit_info, state);
1690
}
1691
1692
// If the condition passes we skip over the conditional move
1693
Inst::CondBr {
1694
taken: CondBrTarget::Label(label_end),
1695
not_taken: CondBrTarget::Fallthrough,
1696
kind: condition,
1697
}
1698
.emit(sink, emit_info, state);
1699
1700
// Move the conditional value to the destination register.
1701
for i in gen_moves(dst.regs(), cond_move) {
1702
i.emit(sink, emit_info, state);
1703
}
1704
1705
sink.bind_label(label_end, &mut state.ctrl_plane);
1706
}
1707
&Inst::Jalr { rd, base, offset } => {
1708
sink.put4(enc_jalr(rd, base, offset));
1709
state.clobber_vstate();
1710
}
1711
&Inst::EBreak => {
1712
sink.put4(0x00100073);
1713
}
1714
&Inst::AtomicCas {
1715
offset,
1716
t0,
1717
dst,
1718
e,
1719
addr,
1720
v,
1721
ty,
1722
} => {
1723
// # addr holds address of memory location
1724
// # e holds expected value
1725
// # v holds desired value
1726
// # dst holds return value
1727
// cas:
1728
// lr.w dst, (addr) # Load original value.
1729
// bne dst, e, fail # Doesn’t match, so fail.
1730
// sc.w t0, v, (addr) # Try to update.
1731
// bnez t0 , cas # if store not ok,retry.
1732
// fail:
1733
let fail_label = sink.get_label();
1734
let cas_lebel = sink.get_label();
1735
sink.bind_label(cas_lebel, &mut state.ctrl_plane);
1736
Inst::Atomic {
1737
op: AtomicOP::load_op(ty),
1738
rd: dst,
1739
addr,
1740
src: zero_reg(),
1741
amo: AMO::SeqCst,
1742
}
1743
.emit(sink, emit_info, state);
1744
if ty.bits() < 32 {
1745
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1746
.iter()
1747
.for_each(|i| i.emit(sink, emit_info, state));
1748
} else if ty.bits() == 32 {
1749
Inst::Extend {
1750
rd: dst,
1751
rn: dst.to_reg(),
1752
signed: false,
1753
from_bits: 32,
1754
to_bits: 64,
1755
}
1756
.emit(sink, emit_info, state);
1757
}
1758
Inst::CondBr {
1759
taken: CondBrTarget::Label(fail_label),
1760
not_taken: CondBrTarget::Fallthrough,
1761
kind: IntegerCompare {
1762
kind: IntCC::NotEqual,
1763
rs1: e,
1764
rs2: dst.to_reg(),
1765
},
1766
}
1767
.emit(sink, emit_info, state);
1768
let store_value = if ty.bits() < 32 {
1769
// reload value to t0.
1770
Inst::Atomic {
1771
op: AtomicOP::load_op(ty),
1772
rd: t0,
1773
addr,
1774
src: zero_reg(),
1775
amo: AMO::SeqCst,
1776
}
1777
.emit(sink, emit_info, state);
1778
// set reset part.
1779
AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)
1780
.iter()
1781
.for_each(|i| i.emit(sink, emit_info, state));
1782
t0.to_reg()
1783
} else {
1784
v
1785
};
1786
Inst::Atomic {
1787
op: AtomicOP::store_op(ty),
1788
rd: t0,
1789
addr,
1790
src: store_value,
1791
amo: AMO::SeqCst,
1792
}
1793
.emit(sink, emit_info, state);
1794
// check is our value stored.
1795
Inst::CondBr {
1796
taken: CondBrTarget::Label(cas_lebel),
1797
not_taken: CondBrTarget::Fallthrough,
1798
kind: IntegerCompare {
1799
kind: IntCC::NotEqual,
1800
rs1: t0.to_reg(),
1801
rs2: zero_reg(),
1802
},
1803
}
1804
.emit(sink, emit_info, state);
1805
sink.bind_label(fail_label, &mut state.ctrl_plane);
1806
}
1807
&Inst::AtomicRmwLoop {
1808
offset,
1809
op,
1810
dst,
1811
ty,
1812
p,
1813
x,
1814
t0,
1815
} => {
1816
let retry = sink.get_label();
1817
sink.bind_label(retry, &mut state.ctrl_plane);
1818
// load old value.
1819
Inst::Atomic {
1820
op: AtomicOP::load_op(ty),
1821
rd: dst,
1822
addr: p,
1823
src: zero_reg(),
1824
amo: AMO::SeqCst,
1825
}
1826
.emit(sink, emit_info, state);
1827
//
1828
1829
let store_value: Reg = match op {
1830
crate::ir::AtomicRmwOp::Add
1831
| crate::ir::AtomicRmwOp::Sub
1832
| crate::ir::AtomicRmwOp::And
1833
| crate::ir::AtomicRmwOp::Or
1834
| crate::ir::AtomicRmwOp::Xor => {
1835
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1836
.iter()
1837
.for_each(|i| i.emit(sink, emit_info, state));
1838
Inst::AluRRR {
1839
alu_op: match op {
1840
crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,
1841
crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,
1842
crate::ir::AtomicRmwOp::And => AluOPRRR::And,
1843
crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,
1844
crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,
1845
_ => unreachable!(),
1846
},
1847
rd: t0,
1848
rs1: dst.to_reg(),
1849
rs2: x,
1850
}
1851
.emit(sink, emit_info, state);
1852
Inst::Atomic {
1853
op: AtomicOP::load_op(ty),
1854
rd: writable_spilltmp_reg2(),
1855
addr: p,
1856
src: zero_reg(),
1857
amo: AMO::SeqCst,
1858
}
1859
.emit(sink, emit_info, state);
1860
AtomicOP::merge(
1861
writable_spilltmp_reg2(),
1862
writable_spilltmp_reg(),
1863
offset,
1864
t0.to_reg(),
1865
ty,
1866
)
1867
.iter()
1868
.for_each(|i| i.emit(sink, emit_info, state));
1869
spilltmp_reg2()
1870
}
1871
crate::ir::AtomicRmwOp::Nand => {
1872
if ty.bits() < 32 {
1873
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1874
.iter()
1875
.for_each(|i| i.emit(sink, emit_info, state));
1876
}
1877
Inst::AluRRR {
1878
alu_op: AluOPRRR::And,
1879
rd: t0,
1880
rs1: x,
1881
rs2: dst.to_reg(),
1882
}
1883
.emit(sink, emit_info, state);
1884
Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);
1885
if ty.bits() < 32 {
1886
Inst::Atomic {
1887
op: AtomicOP::load_op(ty),
1888
rd: writable_spilltmp_reg2(),
1889
addr: p,
1890
src: zero_reg(),
1891
amo: AMO::SeqCst,
1892
}
1893
.emit(sink, emit_info, state);
1894
AtomicOP::merge(
1895
writable_spilltmp_reg2(),
1896
writable_spilltmp_reg(),
1897
offset,
1898
t0.to_reg(),
1899
ty,
1900
)
1901
.iter()
1902
.for_each(|i| i.emit(sink, emit_info, state));
1903
spilltmp_reg2()
1904
} else {
1905
t0.to_reg()
1906
}
1907
}
1908
1909
crate::ir::AtomicRmwOp::Umin
1910
| crate::ir::AtomicRmwOp::Umax
1911
| crate::ir::AtomicRmwOp::Smin
1912
| crate::ir::AtomicRmwOp::Smax => {
1913
let label_select_dst = sink.get_label();
1914
let label_select_done = sink.get_label();
1915
if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax
1916
{
1917
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1918
} else {
1919
AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)
1920
}
1921
.iter()
1922
.for_each(|i| i.emit(sink, emit_info, state));
1923
1924
Inst::CondBr {
1925
taken: CondBrTarget::Label(label_select_dst),
1926
not_taken: CondBrTarget::Fallthrough,
1927
kind: IntegerCompare {
1928
kind: match op {
1929
crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,
1930
crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,
1931
crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,
1932
crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,
1933
_ => unreachable!(),
1934
},
1935
rs1: dst.to_reg(),
1936
rs2: x,
1937
},
1938
}
1939
.emit(sink, emit_info, state);
1940
// here we select x.
1941
Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);
1942
Inst::gen_jump(label_select_done).emit(sink, emit_info, state);
1943
sink.bind_label(label_select_dst, &mut state.ctrl_plane);
1944
Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);
1945
sink.bind_label(label_select_done, &mut state.ctrl_plane);
1946
Inst::Atomic {
1947
op: AtomicOP::load_op(ty),
1948
rd: writable_spilltmp_reg2(),
1949
addr: p,
1950
src: zero_reg(),
1951
amo: AMO::SeqCst,
1952
}
1953
.emit(sink, emit_info, state);
1954
AtomicOP::merge(
1955
writable_spilltmp_reg2(),
1956
writable_spilltmp_reg(),
1957
offset,
1958
t0.to_reg(),
1959
ty,
1960
)
1961
.iter()
1962
.for_each(|i| i.emit(sink, emit_info, state));
1963
spilltmp_reg2()
1964
}
1965
crate::ir::AtomicRmwOp::Xchg => {
1966
AtomicOP::extract(dst, offset, dst.to_reg(), ty)
1967
.iter()
1968
.for_each(|i| i.emit(sink, emit_info, state));
1969
Inst::Atomic {
1970
op: AtomicOP::load_op(ty),
1971
rd: writable_spilltmp_reg2(),
1972
addr: p,
1973
src: zero_reg(),
1974
amo: AMO::SeqCst,
1975
}
1976
.emit(sink, emit_info, state);
1977
AtomicOP::merge(
1978
writable_spilltmp_reg2(),
1979
writable_spilltmp_reg(),
1980
offset,
1981
x,
1982
ty,
1983
)
1984
.iter()
1985
.for_each(|i| i.emit(sink, emit_info, state));
1986
spilltmp_reg2()
1987
}
1988
};
1989
1990
Inst::Atomic {
1991
op: AtomicOP::store_op(ty),
1992
rd: t0,
1993
addr: p,
1994
src: store_value,
1995
amo: AMO::SeqCst,
1996
}
1997
.emit(sink, emit_info, state);
1998
1999
// if store is not ok,retry.
2000
Inst::CondBr {
2001
taken: CondBrTarget::Label(retry),
2002
not_taken: CondBrTarget::Fallthrough,
2003
kind: IntegerCompare {
2004
kind: IntCC::NotEqual,
2005
rs1: t0.to_reg(),
2006
rs2: zero_reg(),
2007
},
2008
}
2009
.emit(sink, emit_info, state);
2010
}
2011
2012
&Inst::LoadExtNameGot { rd, ref name } => {
2013
// Load a PC-relative address into a register.
2014
// RISC-V does this slightly differently from other arches. We emit a relocation
2015
// with a label, instead of the symbol itself.
2016
//
2017
// See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses
2018
//
2019
// Emit the following code:
2020
// label:
2021
// auipc rd, 0 # R_RISCV_GOT_HI20 (symbol_name)
2022
// ld rd, rd, 0 # R_RISCV_PCREL_LO12_I (label)
2023
2024
// Create the label that is going to be published to the final binary object.
2025
let auipc_label = sink.get_label();
2026
sink.bind_label(auipc_label, &mut state.ctrl_plane);
2027
2028
// Get the current PC.
2029
sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);
2030
Inst::Auipc {
2031
rd,
2032
imm: Imm20::from_i32(0),
2033
}
2034
.emit_uncompressed(sink, emit_info, state, start_off);
2035
2036
// The `ld` here, points to the `auipc` label instead of directly to the symbol.
2037
sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2038
Inst::Load {
2039
rd,
2040
op: LoadOP::Ld,
2041
flags: MemFlags::trusted(),
2042
from: AMode::RegOffset(rd.to_reg(), 0),
2043
}
2044
.emit_uncompressed(sink, emit_info, state, start_off);
2045
}
2046
2047
&Inst::LoadExtNameFar {
2048
rd,
2049
ref name,
2050
offset,
2051
} => {
2052
// In the non PIC sequence we relocate the absolute address into
2053
// a preallocated space, load it into a register and jump over
2054
// it.
2055
//
2056
// Emit the following code:
2057
// ld rd, label_data
2058
// j label_end
2059
// label_data:
2060
// <8 byte space> # ABS8
2061
// label_end:
2062
2063
let label_data = sink.get_label();
2064
let label_end = sink.get_label();
2065
2066
// Load the value from a label
2067
Inst::Load {
2068
rd,
2069
op: LoadOP::Ld,
2070
flags: MemFlags::trusted(),
2071
from: AMode::Label(label_data),
2072
}
2073
.emit(sink, emit_info, state);
2074
2075
// Jump over the data
2076
Inst::gen_jump(label_end).emit(sink, emit_info, state);
2077
2078
sink.bind_label(label_data, &mut state.ctrl_plane);
2079
sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);
2080
sink.put8(0);
2081
2082
sink.bind_label(label_end, &mut state.ctrl_plane);
2083
}
2084
2085
&Inst::LoadExtNameNear {
2086
rd,
2087
ref name,
2088
offset,
2089
} => {
2090
// Emit the following code:
2091
// label:
2092
// auipc rd, 0 # R_RISCV_PCREL_HI20 (symbol_name)
2093
// ld rd, rd, 0 # R_RISCV_PCREL_LO12_I (label)
2094
2095
let auipc_label = sink.get_label();
2096
sink.bind_label(auipc_label, &mut state.ctrl_plane);
2097
2098
// Get the current PC.
2099
sink.add_reloc(Reloc::RiscvPCRelHi20, &**name, offset);
2100
Inst::Auipc {
2101
rd,
2102
imm: Imm20::from_i32(0),
2103
}
2104
.emit_uncompressed(sink, emit_info, state, start_off);
2105
2106
sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2107
Inst::AluRRImm12 {
2108
alu_op: AluOPRRI::Addi,
2109
rd,
2110
rs: rd.to_reg(),
2111
imm12: Imm12::ZERO,
2112
}
2113
.emit_uncompressed(sink, emit_info, state, start_off);
2114
}
2115
2116
&Inst::LabelAddress { dst, label } => {
2117
let offset = sink.cur_offset();
2118
Inst::Auipc {
2119
rd: dst,
2120
imm: Imm20::from_i32(0),
2121
}
2122
.emit_uncompressed(sink, emit_info, state, start_off);
2123
sink.use_label_at_offset(offset, label, LabelUse::PCRelHi20);
2124
2125
let offset = sink.cur_offset();
2126
Inst::AluRRImm12 {
2127
alu_op: AluOPRRI::Addi,
2128
rd: dst,
2129
rs: dst.to_reg(),
2130
imm12: Imm12::ZERO,
2131
}
2132
.emit_uncompressed(sink, emit_info, state, start_off);
2133
sink.use_label_at_offset(offset, label, LabelUse::PCRelLo12I);
2134
}
2135
2136
&Inst::ElfTlsGetAddr { rd, ref name } => {
2137
// RISC-V's TLS GD model is slightly different from other arches.
2138
//
2139
// We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits
2140
// of the address relative to the GOT entry. This relocation points to
2141
// the symbol as usual.
2142
//
2143
// However when loading the bottom 12bits of the address, we need to
2144
// use a label that points to the previous AUIPC instruction.
2145
//
2146
// label:
2147
// auipc a0,0 # R_RISCV_TLS_GD_HI20 (symbol)
2148
// addi a0,a0,0 # R_RISCV_PCREL_LO12_I (label)
2149
//
2150
// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic
2151
2152
// Create the label that is going to be published to the final binary object.
2153
let auipc_label = sink.get_label();
2154
sink.bind_label(auipc_label, &mut state.ctrl_plane);
2155
2156
// Get the current PC.
2157
sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);
2158
Inst::Auipc {
2159
rd,
2160
imm: Imm20::from_i32(0),
2161
}
2162
.emit_uncompressed(sink, emit_info, state, start_off);
2163
2164
// The `addi` here, points to the `auipc` label instead of directly to the symbol.
2165
sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);
2166
Inst::AluRRImm12 {
2167
alu_op: AluOPRRI::Addi,
2168
rd,
2169
rs: rd.to_reg(),
2170
imm12: Imm12::from_i16(0),
2171
}
2172
.emit_uncompressed(sink, emit_info, state, start_off);
2173
2174
Inst::Call {
2175
info: Box::new(CallInfo::empty(
2176
ExternalName::LibCall(LibCall::ElfTlsGetAddr),
2177
CallConv::SystemV,
2178
)),
2179
}
2180
.emit_uncompressed(sink, emit_info, state, start_off);
2181
}
2182
2183
&Inst::TrapIf {
2184
rs1,
2185
rs2,
2186
cc,
2187
trap_code,
2188
} => {
2189
let label_end = sink.get_label();
2190
let cond = IntegerCompare { kind: cc, rs1, rs2 };
2191
2192
// Jump over the trap if we the condition is false.
2193
Inst::CondBr {
2194
taken: CondBrTarget::Label(label_end),
2195
not_taken: CondBrTarget::Fallthrough,
2196
kind: cond.inverse(),
2197
}
2198
.emit(sink, emit_info, state);
2199
Inst::Udf { trap_code }.emit(sink, emit_info, state);
2200
2201
sink.bind_label(label_end, &mut state.ctrl_plane);
2202
}
2203
&Inst::Udf { trap_code } => {
2204
sink.add_trap(trap_code);
2205
sink.put_data(Inst::TRAP_OPCODE);
2206
}
2207
&Inst::AtomicLoad { rd, ty, p } => {
2208
// emit the fence.
2209
Inst::Fence {
2210
pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2211
succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2212
}
2213
.emit(sink, emit_info, state);
2214
// load.
2215
Inst::Load {
2216
rd,
2217
op: LoadOP::from_type(ty),
2218
flags: MemFlags::new(),
2219
from: AMode::RegOffset(p, 0),
2220
}
2221
.emit(sink, emit_info, state);
2222
Inst::Fence {
2223
pred: Inst::FENCE_REQ_R,
2224
succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2225
}
2226
.emit(sink, emit_info, state);
2227
}
2228
&Inst::AtomicStore { src, ty, p } => {
2229
Inst::Fence {
2230
pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,
2231
succ: Inst::FENCE_REQ_W,
2232
}
2233
.emit(sink, emit_info, state);
2234
Inst::Store {
2235
to: AMode::RegOffset(p, 0),
2236
op: StoreOP::from_type(ty),
2237
flags: MemFlags::new(),
2238
src,
2239
}
2240
.emit(sink, emit_info, state);
2241
}
2242
2243
&Inst::Popcnt {
2244
sum,
2245
tmp,
2246
step,
2247
rs,
2248
ty,
2249
} => {
2250
// load 0 to sum , init.
2251
Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2252
// load
2253
Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2254
.emit(sink, emit_info, state);
2255
//
2256
Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2257
Inst::AluRRImm12 {
2258
alu_op: AluOPRRI::Slli,
2259
rd: tmp,
2260
rs: tmp.to_reg(),
2261
imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2262
}
2263
.emit(sink, emit_info, state);
2264
let label_done = sink.get_label();
2265
let label_loop = sink.get_label();
2266
sink.bind_label(label_loop, &mut state.ctrl_plane);
2267
Inst::CondBr {
2268
taken: CondBrTarget::Label(label_done),
2269
not_taken: CondBrTarget::Fallthrough,
2270
kind: IntegerCompare {
2271
kind: IntCC::SignedLessThanOrEqual,
2272
rs1: step.to_reg(),
2273
rs2: zero_reg(),
2274
},
2275
}
2276
.emit(sink, emit_info, state);
2277
// test and add sum.
2278
{
2279
Inst::AluRRR {
2280
alu_op: AluOPRRR::And,
2281
rd: writable_spilltmp_reg2(),
2282
rs1: tmp.to_reg(),
2283
rs2: rs,
2284
}
2285
.emit(sink, emit_info, state);
2286
let label_over = sink.get_label();
2287
Inst::CondBr {
2288
taken: CondBrTarget::Label(label_over),
2289
not_taken: CondBrTarget::Fallthrough,
2290
kind: IntegerCompare {
2291
kind: IntCC::Equal,
2292
rs1: zero_reg(),
2293
rs2: spilltmp_reg2(),
2294
},
2295
}
2296
.emit(sink, emit_info, state);
2297
Inst::AluRRImm12 {
2298
alu_op: AluOPRRI::Addi,
2299
rd: sum,
2300
rs: sum.to_reg(),
2301
imm12: Imm12::ONE,
2302
}
2303
.emit(sink, emit_info, state);
2304
sink.bind_label(label_over, &mut state.ctrl_plane);
2305
}
2306
// set step and tmp.
2307
{
2308
Inst::AluRRImm12 {
2309
alu_op: AluOPRRI::Addi,
2310
rd: step,
2311
rs: step.to_reg(),
2312
imm12: Imm12::from_i16(-1),
2313
}
2314
.emit(sink, emit_info, state);
2315
Inst::AluRRImm12 {
2316
alu_op: AluOPRRI::Srli,
2317
rd: tmp,
2318
rs: tmp.to_reg(),
2319
imm12: Imm12::ONE,
2320
}
2321
.emit(sink, emit_info, state);
2322
Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2323
}
2324
sink.bind_label(label_done, &mut state.ctrl_plane);
2325
}
2326
&Inst::Cltz {
2327
sum,
2328
tmp,
2329
step,
2330
rs,
2331
leading,
2332
ty,
2333
} => {
2334
// load 0 to sum , init.
2335
Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);
2336
// load
2337
Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2338
.emit(sink, emit_info, state);
2339
//
2340
Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2341
if leading {
2342
Inst::AluRRImm12 {
2343
alu_op: AluOPRRI::Slli,
2344
rd: tmp,
2345
rs: tmp.to_reg(),
2346
imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2347
}
2348
.emit(sink, emit_info, state);
2349
}
2350
let label_done = sink.get_label();
2351
let label_loop = sink.get_label();
2352
sink.bind_label(label_loop, &mut state.ctrl_plane);
2353
Inst::CondBr {
2354
taken: CondBrTarget::Label(label_done),
2355
not_taken: CondBrTarget::Fallthrough,
2356
kind: IntegerCompare {
2357
kind: IntCC::SignedLessThanOrEqual,
2358
rs1: step.to_reg(),
2359
rs2: zero_reg(),
2360
},
2361
}
2362
.emit(sink, emit_info, state);
2363
// test and add sum.
2364
{
2365
Inst::AluRRR {
2366
alu_op: AluOPRRR::And,
2367
rd: writable_spilltmp_reg2(),
2368
rs1: tmp.to_reg(),
2369
rs2: rs,
2370
}
2371
.emit(sink, emit_info, state);
2372
Inst::CondBr {
2373
taken: CondBrTarget::Label(label_done),
2374
not_taken: CondBrTarget::Fallthrough,
2375
kind: IntegerCompare {
2376
kind: IntCC::NotEqual,
2377
rs1: zero_reg(),
2378
rs2: spilltmp_reg2(),
2379
},
2380
}
2381
.emit(sink, emit_info, state);
2382
Inst::AluRRImm12 {
2383
alu_op: AluOPRRI::Addi,
2384
rd: sum,
2385
rs: sum.to_reg(),
2386
imm12: Imm12::ONE,
2387
}
2388
.emit(sink, emit_info, state);
2389
}
2390
// set step and tmp.
2391
{
2392
Inst::AluRRImm12 {
2393
alu_op: AluOPRRI::Addi,
2394
rd: step,
2395
rs: step.to_reg(),
2396
imm12: Imm12::from_i16(-1),
2397
}
2398
.emit(sink, emit_info, state);
2399
Inst::AluRRImm12 {
2400
alu_op: if leading {
2401
AluOPRRI::Srli
2402
} else {
2403
AluOPRRI::Slli
2404
},
2405
rd: tmp,
2406
rs: tmp.to_reg(),
2407
imm12: Imm12::ONE,
2408
}
2409
.emit(sink, emit_info, state);
2410
Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2411
}
2412
sink.bind_label(label_done, &mut state.ctrl_plane);
2413
}
2414
&Inst::Brev8 {
2415
rs,
2416
ty,
2417
step,
2418
tmp,
2419
tmp2,
2420
rd,
2421
} => {
2422
Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);
2423
Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))
2424
.emit(sink, emit_info, state);
2425
//
2426
Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);
2427
Inst::AluRRImm12 {
2428
alu_op: AluOPRRI::Slli,
2429
rd: tmp,
2430
rs: tmp.to_reg(),
2431
imm12: Imm12::from_i16((ty.bits() - 1) as i16),
2432
}
2433
.emit(sink, emit_info, state);
2434
Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);
2435
Inst::AluRRImm12 {
2436
alu_op: AluOPRRI::Slli,
2437
rd: tmp2,
2438
rs: tmp2.to_reg(),
2439
imm12: Imm12::from_i16((ty.bits() - 8) as i16),
2440
}
2441
.emit(sink, emit_info, state);
2442
2443
let label_done = sink.get_label();
2444
let label_loop = sink.get_label();
2445
sink.bind_label(label_loop, &mut state.ctrl_plane);
2446
Inst::CondBr {
2447
taken: CondBrTarget::Label(label_done),
2448
not_taken: CondBrTarget::Fallthrough,
2449
kind: IntegerCompare {
2450
kind: IntCC::SignedLessThanOrEqual,
2451
rs1: step.to_reg(),
2452
rs2: zero_reg(),
2453
},
2454
}
2455
.emit(sink, emit_info, state);
2456
// test and set bit.
2457
{
2458
Inst::AluRRR {
2459
alu_op: AluOPRRR::And,
2460
rd: writable_spilltmp_reg2(),
2461
rs1: tmp.to_reg(),
2462
rs2: rs,
2463
}
2464
.emit(sink, emit_info, state);
2465
let label_over = sink.get_label();
2466
Inst::CondBr {
2467
taken: CondBrTarget::Label(label_over),
2468
not_taken: CondBrTarget::Fallthrough,
2469
kind: IntegerCompare {
2470
kind: IntCC::Equal,
2471
rs1: zero_reg(),
2472
rs2: spilltmp_reg2(),
2473
},
2474
}
2475
.emit(sink, emit_info, state);
2476
Inst::AluRRR {
2477
alu_op: AluOPRRR::Or,
2478
rd,
2479
rs1: rd.to_reg(),
2480
rs2: tmp2.to_reg(),
2481
}
2482
.emit(sink, emit_info, state);
2483
sink.bind_label(label_over, &mut state.ctrl_plane);
2484
}
2485
// set step and tmp.
2486
{
2487
Inst::AluRRImm12 {
2488
alu_op: AluOPRRI::Addi,
2489
rd: step,
2490
rs: step.to_reg(),
2491
imm12: Imm12::from_i16(-1),
2492
}
2493
.emit(sink, emit_info, state);
2494
Inst::AluRRImm12 {
2495
alu_op: AluOPRRI::Srli,
2496
rd: tmp,
2497
rs: tmp.to_reg(),
2498
imm12: Imm12::ONE,
2499
}
2500
.emit(sink, emit_info, state);
2501
{
2502
// reset tmp2
2503
// if (step %=8 == 0) then tmp2 = tmp2 >> 15
2504
// if (step %=8 != 0) then tmp2 = tmp2 << 1
2505
let label_over = sink.get_label();
2506
let label_sll_1 = sink.get_label();
2507
Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))
2508
.emit(sink, emit_info, state);
2509
Inst::AluRRR {
2510
alu_op: AluOPRRR::Rem,
2511
rd: writable_spilltmp_reg2(),
2512
rs1: step.to_reg(),
2513
rs2: spilltmp_reg2(),
2514
}
2515
.emit(sink, emit_info, state);
2516
Inst::CondBr {
2517
taken: CondBrTarget::Label(label_sll_1),
2518
not_taken: CondBrTarget::Fallthrough,
2519
kind: IntegerCompare {
2520
kind: IntCC::NotEqual,
2521
rs1: spilltmp_reg2(),
2522
rs2: zero_reg(),
2523
},
2524
}
2525
.emit(sink, emit_info, state);
2526
Inst::AluRRImm12 {
2527
alu_op: AluOPRRI::Srli,
2528
rd: tmp2,
2529
rs: tmp2.to_reg(),
2530
imm12: Imm12::from_i16(15),
2531
}
2532
.emit(sink, emit_info, state);
2533
Inst::gen_jump(label_over).emit(sink, emit_info, state);
2534
sink.bind_label(label_sll_1, &mut state.ctrl_plane);
2535
Inst::AluRRImm12 {
2536
alu_op: AluOPRRI::Slli,
2537
rd: tmp2,
2538
rs: tmp2.to_reg(),
2539
imm12: Imm12::ONE,
2540
}
2541
.emit(sink, emit_info, state);
2542
sink.bind_label(label_over, &mut state.ctrl_plane);
2543
}
2544
Inst::gen_jump(label_loop).emit(sink, emit_info, state);
2545
}
2546
sink.bind_label(label_done, &mut state.ctrl_plane);
2547
}
2548
&Inst::StackProbeLoop {
2549
guard_size,
2550
probe_count,
2551
tmp: guard_size_tmp,
2552
} => {
2553
let step = writable_spilltmp_reg();
2554
Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))
2555
.iter()
2556
.for_each(|i| i.emit(sink, emit_info, state));
2557
Inst::load_constant_u64(guard_size_tmp, guard_size as u64)
2558
.iter()
2559
.for_each(|i| i.emit(sink, emit_info, state));
2560
2561
let loop_start = sink.get_label();
2562
let label_done = sink.get_label();
2563
sink.bind_label(loop_start, &mut state.ctrl_plane);
2564
Inst::CondBr {
2565
taken: CondBrTarget::Label(label_done),
2566
not_taken: CondBrTarget::Fallthrough,
2567
kind: IntegerCompare {
2568
kind: IntCC::UnsignedLessThanOrEqual,
2569
rs1: step.to_reg(),
2570
rs2: guard_size_tmp.to_reg(),
2571
},
2572
}
2573
.emit(sink, emit_info, state);
2574
// compute address.
2575
Inst::AluRRR {
2576
alu_op: AluOPRRR::Sub,
2577
rd: writable_spilltmp_reg2(),
2578
rs1: stack_reg(),
2579
rs2: step.to_reg(),
2580
}
2581
.emit(sink, emit_info, state);
2582
Inst::Store {
2583
to: AMode::RegOffset(spilltmp_reg2(), 0),
2584
op: StoreOP::Sb,
2585
flags: MemFlags::new(),
2586
src: zero_reg(),
2587
}
2588
.emit(sink, emit_info, state);
2589
// reset step.
2590
Inst::AluRRR {
2591
alu_op: AluOPRRR::Sub,
2592
rd: step,
2593
rs1: step.to_reg(),
2594
rs2: guard_size_tmp.to_reg(),
2595
}
2596
.emit(sink, emit_info, state);
2597
Inst::gen_jump(loop_start).emit(sink, emit_info, state);
2598
sink.bind_label(label_done, &mut state.ctrl_plane);
2599
}
2600
&Inst::VecAluRRRImm5 {
2601
op,
2602
vd,
2603
vd_src,
2604
imm,
2605
vs2,
2606
ref mask,
2607
..
2608
} => {
2609
debug_assert_eq!(vd.to_reg(), vd_src);
2610
2611
sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));
2612
}
2613
&Inst::VecAluRRRR {
2614
op,
2615
vd,
2616
vd_src,
2617
vs1,
2618
vs2,
2619
ref mask,
2620
..
2621
} => {
2622
debug_assert_eq!(vd.to_reg(), vd_src);
2623
2624
sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));
2625
}
2626
&Inst::VecAluRRR {
2627
op,
2628
vd,
2629
vs1,
2630
vs2,
2631
ref mask,
2632
..
2633
} => {
2634
sink.put4(encode_valu(op, vd, vs1, vs2, *mask));
2635
}
2636
&Inst::VecAluRRImm5 {
2637
op,
2638
vd,
2639
imm,
2640
vs2,
2641
ref mask,
2642
..
2643
} => {
2644
sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));
2645
}
2646
&Inst::VecAluRR {
2647
op,
2648
vd,
2649
vs,
2650
ref mask,
2651
..
2652
} => {
2653
sink.put4(encode_valu_rr(op, vd, vs, *mask));
2654
}
2655
&Inst::VecAluRImm5 {
2656
op,
2657
vd,
2658
imm,
2659
ref mask,
2660
..
2661
} => {
2662
sink.put4(encode_valu_r_imm(op, vd, imm, *mask));
2663
}
2664
&Inst::VecSetState { rd, ref vstate } => {
2665
sink.put4(encode_vcfg_imm(
2666
0x57,
2667
rd.to_reg(),
2668
vstate.avl.unwrap_static(),
2669
&vstate.vtype,
2670
));
2671
2672
// Update the current vector emit state.
2673
state.vstate = EmitVState::Known(*vstate);
2674
}
2675
2676
&Inst::VecLoad {
2677
eew,
2678
to,
2679
ref from,
2680
ref mask,
2681
flags,
2682
..
2683
} => {
2684
// Vector Loads don't support immediate offsets, so we need to load it into a register.
2685
let addr = match from {
2686
VecAMode::UnitStride { base } => {
2687
let base_reg = base.get_base_register();
2688
let offset = base.get_offset_with_state(state);
2689
2690
// Reg+0 Offset can be directly encoded
2691
if let (Some(base_reg), 0) = (base_reg, offset) {
2692
base_reg
2693
} else {
2694
// Otherwise load the address it into a reg and load from it.
2695
let tmp = writable_spilltmp_reg();
2696
Inst::LoadAddr {
2697
rd: tmp,
2698
mem: *base,
2699
}
2700
.emit(sink, emit_info, state);
2701
tmp.to_reg()
2702
}
2703
}
2704
};
2705
2706
if let Some(trap_code) = flags.trap_code() {
2707
// Register the offset at which the actual load instruction starts.
2708
sink.add_trap(trap_code);
2709
}
2710
2711
sink.put4(encode_vmem_load(
2712
0x07,
2713
to.to_reg(),
2714
eew,
2715
addr,
2716
from.lumop(),
2717
*mask,
2718
from.mop(),
2719
from.nf(),
2720
));
2721
}
2722
2723
&Inst::VecStore {
2724
eew,
2725
ref to,
2726
from,
2727
ref mask,
2728
flags,
2729
..
2730
} => {
2731
// Vector Stores don't support immediate offsets, so we need to load it into a register.
2732
let addr = match to {
2733
VecAMode::UnitStride { base } => {
2734
let base_reg = base.get_base_register();
2735
let offset = base.get_offset_with_state(state);
2736
2737
// Reg+0 Offset can be directly encoded
2738
if let (Some(base_reg), 0) = (base_reg, offset) {
2739
base_reg
2740
} else {
2741
// Otherwise load the address it into a reg and load from it.
2742
let tmp = writable_spilltmp_reg();
2743
Inst::LoadAddr {
2744
rd: tmp,
2745
mem: *base,
2746
}
2747
.emit(sink, emit_info, state);
2748
tmp.to_reg()
2749
}
2750
}
2751
};
2752
2753
if let Some(trap_code) = flags.trap_code() {
2754
// Register the offset at which the actual load instruction starts.
2755
sink.add_trap(trap_code);
2756
}
2757
2758
sink.put4(encode_vmem_store(
2759
0x27,
2760
from,
2761
eew,
2762
addr,
2763
to.sumop(),
2764
*mask,
2765
to.mop(),
2766
to.nf(),
2767
));
2768
}
2769
2770
Inst::EmitIsland { needed_space } => {
2771
if sink.island_needed(*needed_space) {
2772
let jump_around_label = sink.get_label();
2773
Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2774
sink.emit_island(needed_space + 4, &mut state.ctrl_plane);
2775
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2776
}
2777
}
2778
2779
Inst::SequencePoint { .. } => {
2780
// Nothing.
2781
}
2782
}
2783
}
2784
}
2785
2786
fn emit_return_call_common_sequence<T>(
2787
sink: &mut MachBuffer<Inst>,
2788
emit_info: &EmitInfo,
2789
state: &mut EmitState,
2790
info: &ReturnCallInfo<T>,
2791
) {
2792
// The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)
2793
// So lets emit an island here if we need it.
2794
//
2795
// It is difficult to calculate exactly how many instructions are going to be emitted, so
2796
// we calculate it by emitting it into a disposable buffer, and then checking how many instructions
2797
// were actually emitted.
2798
let mut buffer = MachBuffer::new();
2799
let mut fake_emit_state = state.clone();
2800
2801
return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);
2802
2803
// Finalize the buffer and get the number of bytes emitted.
2804
let buffer = buffer.finish(&Default::default(), &mut Default::default());
2805
let length = buffer.data().len() as u32;
2806
2807
// And now emit the island inline with this instruction.
2808
if sink.island_needed(length) {
2809
let jump_around_label = sink.get_label();
2810
Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);
2811
sink.emit_island(length + 4, &mut state.ctrl_plane);
2812
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
2813
}
2814
2815
// Now that we're done, emit the *actual* return sequence.
2816
return_call_emit_impl(sink, emit_info, state, info);
2817
}
2818
2819
/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
2820
fn return_call_emit_impl<T>(
2821
sink: &mut MachBuffer<Inst>,
2822
emit_info: &EmitInfo,
2823
state: &mut EmitState,
2824
info: &ReturnCallInfo<T>,
2825
) {
2826
let sp_to_fp_offset = {
2827
let frame_layout = state.frame_layout();
2828
i64::from(
2829
frame_layout.clobber_size
2830
+ frame_layout.fixed_frame_storage_size
2831
+ frame_layout.outgoing_args_size,
2832
)
2833
};
2834
2835
let mut clobber_offset = sp_to_fp_offset - 8;
2836
for reg in state.frame_layout().clobbered_callee_saves.clone() {
2837
let rreg = reg.to_reg();
2838
let ty = match rreg.class() {
2839
RegClass::Int => I64,
2840
RegClass::Float => F64,
2841
RegClass::Vector => unimplemented!("Vector Clobber Restores"),
2842
};
2843
2844
Inst::gen_load(
2845
reg.map(Reg::from),
2846
AMode::SPOffset(clobber_offset),
2847
ty,
2848
MemFlags::trusted(),
2849
)
2850
.emit(sink, emit_info, state);
2851
2852
clobber_offset -= 8
2853
}
2854
2855
// Restore the link register and frame pointer
2856
let setup_area_size = i64::from(state.frame_layout().setup_area_size);
2857
if setup_area_size > 0 {
2858
Inst::gen_load(
2859
writable_link_reg(),
2860
AMode::SPOffset(sp_to_fp_offset + 8),
2861
I64,
2862
MemFlags::trusted(),
2863
)
2864
.emit(sink, emit_info, state);
2865
2866
Inst::gen_load(
2867
writable_fp_reg(),
2868
AMode::SPOffset(sp_to_fp_offset),
2869
I64,
2870
MemFlags::trusted(),
2871
)
2872
.emit(sink, emit_info, state);
2873
}
2874
2875
// If we over-allocated the incoming args area in the prologue, resize down to what the callee
2876
// is expecting.
2877
let incoming_args_diff =
2878
i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);
2879
2880
// Increment SP all at once
2881
let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;
2882
if sp_increment > 0 {
2883
for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {
2884
inst.emit(sink, emit_info, state);
2885
}
2886
}
2887
}
2888
2889