Path: blob/main/cranelift/codegen/src/isa/riscv64/inst/emit.rs
1693 views
//! Riscv64 ISA: binary code emission.12use crate::ir::{self, LibCall, TrapCode};3use crate::isa::riscv64::inst::*;4use crate::isa::riscv64::lower::isle::generated_code::{5CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,6};7use cranelift_control::ControlPlane;89pub struct EmitInfo {10#[expect(dead_code, reason = "may want to be used in the future")]11shared_flag: settings::Flags,12isa_flags: super::super::riscv_settings::Flags,13}1415impl EmitInfo {16pub(crate) fn new(17shared_flag: settings::Flags,18isa_flags: super::super::riscv_settings::Flags,19) -> Self {20Self {21shared_flag,22isa_flags,23}24}25}2627pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {28u32::from(m.to_real_reg().unwrap().hw_enc() & 31)29}3031pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {32let real_reg = m.to_real_reg().unwrap().hw_enc();33debug_assert!(real_reg >= 8 && real_reg < 16);34let compressed_reg = real_reg - 8;35u32::from(compressed_reg)36}3738#[derive(Clone, Debug, PartialEq, Default)]39pub enum EmitVState {40#[default]41Unknown,42Known(VState),43}4445/// State carried between emissions of a sequence of instructions.46#[derive(Default, Clone, Debug)]47pub struct EmitState {48/// The user stack map for the upcoming instruction, as provided to49/// `pre_safepoint()`.50user_stack_map: Option<ir::UserStackMap>,5152/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and53/// optimized away at compiletime. See [cranelift_control].54ctrl_plane: ControlPlane,5556/// Vector State57/// Controls the current state of the vector unit at the emission point.58vstate: EmitVState,5960frame_layout: FrameLayout,61}6263impl EmitState {64fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {65self.user_stack_map.take()66}6768fn clobber_vstate(&mut self) {69self.vstate = EmitVState::Unknown;70}71}7273impl MachInstEmitState<Inst> for EmitState {74fn new(75abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,76ctrl_plane: ControlPlane,77) -> Self {78EmitState {79user_stack_map: None,80ctrl_plane,81vstate: EmitVState::Unknown,82frame_layout: abi.frame_layout().clone(),83}84}8586fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {87self.user_stack_map = user_stack_map;88}8990fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {91&mut self.ctrl_plane92}9394fn take_ctrl_plane(self) -> ControlPlane {95self.ctrl_plane96}9798fn on_new_block(&mut self) {99// Reset the vector state.100self.clobber_vstate();101}102103fn frame_layout(&self) -> &FrameLayout {104&self.frame_layout105}106}107108impl Inst {109/// Load int mask.110/// If ty is int then 0xff in rd.111pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {112let mut insts = SmallInstVec::new();113assert!(ty.is_int() && ty.bits() <= 64);114match ty {115I64 => {116insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));117}118I32 | I16 => {119insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));120insts.push(Inst::Extend {121rd,122rn: rd.to_reg(),123signed: false,124from_bits: ty.bits() as u8,125to_bits: 64,126});127}128I8 => {129insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));130}131_ => unreachable!("ty:{:?}", ty),132}133insts134}135/// inverse all bit136pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {137Inst::AluRRImm12 {138alu_op: AluOPRRI::Xori,139rd,140rs,141imm12: Imm12::from_i16(-1),142}143}144145/// Returns Some(VState) if this instruction is expecting a specific vector state146/// before emission.147fn expected_vstate(&self) -> Option<&VState> {148match self {149Inst::Nop0150| Inst::Nop4151| Inst::BrTable { .. }152| Inst::Auipc { .. }153| Inst::Fli { .. }154| Inst::Lui { .. }155| Inst::LoadInlineConst { .. }156| Inst::AluRRR { .. }157| Inst::FpuRRR { .. }158| Inst::AluRRImm12 { .. }159| Inst::CsrReg { .. }160| Inst::CsrImm { .. }161| Inst::Load { .. }162| Inst::Store { .. }163| Inst::Args { .. }164| Inst::Rets { .. }165| Inst::Ret { .. }166| Inst::Extend { .. }167| Inst::Call { .. }168| Inst::CallInd { .. }169| Inst::ReturnCall { .. }170| Inst::ReturnCallInd { .. }171| Inst::Jal { .. }172| Inst::CondBr { .. }173| Inst::LoadExtNameGot { .. }174| Inst::LoadExtNameNear { .. }175| Inst::LoadExtNameFar { .. }176| Inst::ElfTlsGetAddr { .. }177| Inst::LoadAddr { .. }178| Inst::Mov { .. }179| Inst::MovFromPReg { .. }180| Inst::Fence { .. }181| Inst::EBreak182| Inst::Udf { .. }183| Inst::FpuRR { .. }184| Inst::FpuRRRR { .. }185| Inst::Jalr { .. }186| Inst::Atomic { .. }187| Inst::Select { .. }188| Inst::AtomicCas { .. }189| Inst::RawData { .. }190| Inst::AtomicStore { .. }191| Inst::AtomicLoad { .. }192| Inst::AtomicRmwLoop { .. }193| Inst::TrapIf { .. }194| Inst::Unwind { .. }195| Inst::DummyUse { .. }196| Inst::LabelAddress { .. }197| Inst::Popcnt { .. }198| Inst::Cltz { .. }199| Inst::Brev8 { .. }200| Inst::StackProbeLoop { .. } => None,201202// VecSetState does not expect any vstate, rather it updates it.203Inst::VecSetState { .. } => None,204205// `vmv` instructions copy a set of registers and ignore vstate.206Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,207208Inst::VecAluRR { vstate, .. } |209Inst::VecAluRRR { vstate, .. } |210Inst::VecAluRRRR { vstate, .. } |211Inst::VecAluRImm5 { vstate, .. } |212Inst::VecAluRRImm5 { vstate, .. } |213Inst::VecAluRRRImm5 { vstate, .. } |214// TODO: Unit-stride loads and stores only need the AVL to be correct, not215// the full vtype. A future optimization could be to decouple these two when216// updating vstate. This would allow us to avoid emitting a VecSetState in217// some cases.218Inst::VecLoad { vstate, .. }219| Inst::VecStore { vstate, .. } => Some(vstate),220Inst::EmitIsland { .. } => None,221}222}223}224225impl MachInstEmit for Inst {226type State = EmitState;227type Info = EmitInfo;228229fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {230// Check if we need to update the vector state before emitting this instruction231if let Some(expected) = self.expected_vstate() {232if state.vstate != EmitVState::Known(*expected) {233// Update the vector state.234Inst::VecSetState {235rd: writable_zero_reg(),236vstate: *expected,237}238.emit(sink, emit_info, state);239}240}241242// N.B.: we *must* not exceed the "worst-case size" used to compute243// where to insert islands, except when islands are explicitly triggered244// (with an `EmitIsland`). We check this in debug builds. This is `mut`245// to allow disabling the check for `JTSequence`, which is always246// emitted following an `EmitIsland`.247let mut start_off = sink.cur_offset();248249// First try to emit this as a compressed instruction250let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);251if res.is_none() {252// If we can't lets emit it as a normal instruction253self.emit_uncompressed(sink, emit_info, state, &mut start_off);254}255256// We exclude br_table, call, return_call and try_call from257// these checks since they emit their own islands, and thus258// are allowed to exceed the worst case size.259let emits_own_island = match self {260Inst::BrTable { .. }261| Inst::ReturnCall { .. }262| Inst::ReturnCallInd { .. }263| Inst::Call { .. }264| Inst::CallInd { .. }265| Inst::EmitIsland { .. } => true,266_ => false,267};268if !emits_own_island {269let end_off = sink.cur_offset();270assert!(271(end_off - start_off) <= Inst::worst_case_size(),272"Inst:{:?} length:{} worst_case_size:{}",273self,274end_off - start_off,275Inst::worst_case_size()276);277}278}279280fn pretty_print_inst(&self, state: &mut Self::State) -> String {281self.print_with_state(state)282}283}284285impl Inst {286/// Tries to emit an instruction as compressed, if we can't return false.287fn try_emit_compressed(288&self,289sink: &mut MachBuffer<Inst>,290emit_info: &EmitInfo,291state: &mut EmitState,292start_off: &mut u32,293) -> Option<()> {294let has_m = emit_info.isa_flags.has_m();295let has_zba = emit_info.isa_flags.has_zba();296let has_zbb = emit_info.isa_flags.has_zbb();297let has_zca = emit_info.isa_flags.has_zca();298let has_zcb = emit_info.isa_flags.has_zcb();299let has_zcd = emit_info.isa_flags.has_zcd();300301// Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca302// to be enabled, so check it early.303if !has_zca {304return None;305}306307fn reg_is_compressible(r: Reg) -> bool {308r.to_real_reg()309.map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)310.unwrap_or(false)311}312313match *self {314// C.ADD315Inst::AluRRR {316alu_op: AluOPRRR::Add,317rd,318rs1,319rs2,320} if (rd.to_reg() == rs1 || rd.to_reg() == rs2)321&& rs1 != zero_reg()322&& rs2 != zero_reg() =>323{324// Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can325// also swap rs1 with rs2 and we get an equivalent instruction. i.e we326// can also compress `add rd, rs, rd` into `c.add rd, rs`.327let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };328329sink.put2(encode_cr_type(CrOp::CAdd, rd, src));330}331332// C.MV333Inst::AluRRImm12 {334alu_op: AluOPRRI::Addi | AluOPRRI::Ori,335rd,336rs,337imm12,338} if rd.to_reg() != rs339&& rd.to_reg() != zero_reg()340&& rs != zero_reg()341&& imm12.as_i16() == 0 =>342{343sink.put2(encode_cr_type(CrOp::CMv, rd, rs));344}345346// CA Ops347Inst::AluRRR {348alu_op:349alu_op @ (AluOPRRR::And350| AluOPRRR::Or351| AluOPRRR::Xor352| AluOPRRR::Addw353| AluOPRRR::Mul),354rd,355rs1,356rs2,357} if (rd.to_reg() == rs1 || rd.to_reg() == rs2)358&& reg_is_compressible(rs1)359&& reg_is_compressible(rs2) =>360{361let op = match alu_op {362AluOPRRR::And => CaOp::CAnd,363AluOPRRR::Or => CaOp::COr,364AluOPRRR::Xor => CaOp::CXor,365AluOPRRR::Addw => CaOp::CAddw,366AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,367_ => return None,368};369// The canonical expansion for these instruction has `rd == rs1`, but370// these are all commutative operations, so we can swap the operands.371let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };372373sink.put2(encode_ca_type(op, rd, src));374}375376// The sub instructions are non commutative, so we can't swap the operands.377Inst::AluRRR {378alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),379rd,380rs1,381rs2,382} if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {383let op = match alu_op {384AluOPRRR::Sub => CaOp::CSub,385AluOPRRR::Subw => CaOp::CSubw,386_ => return None,387};388sink.put2(encode_ca_type(op, rd, rs2));389}390391// c.j392//393// We don't have a separate JAL as that is only available in RV32C394Inst::Jal { label } => {395sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);396sink.add_uncond_branch(*start_off, *start_off + 2, label);397sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));398}399400// c.jr401Inst::Jalr { rd, base, offset }402if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>403{404sink.put2(encode_cr2_type(CrOp::CJr, base));405state.clobber_vstate();406}407408// c.jalr409Inst::Jalr { rd, base, offset }410if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>411{412sink.put2(encode_cr2_type(CrOp::CJalr, base));413state.clobber_vstate();414}415416// c.ebreak417Inst::EBreak => {418sink.put2(encode_cr_type(419CrOp::CEbreak,420writable_zero_reg(),421zero_reg(),422));423}424425// c.unimp426Inst::Udf { trap_code } => {427sink.add_trap(trap_code);428sink.put2(0x0000);429}430// c.addi16sp431//432// c.addi16sp shares the opcode with c.lui, but has a destination field of x2.433// c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),434// where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used435// to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp436// is only valid when nzimm≠0; the code point with nzimm=0 is reserved.437Inst::AluRRImm12 {438alu_op: AluOPRRI::Addi,439rd,440rs,441imm12,442} if rd.to_reg() == rs443&& rs == stack_reg()444&& imm12.as_i16() != 0445&& (imm12.as_i16() % 16) == 0446&& Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>447{448let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();449sink.put2(encode_c_addi16sp(imm6));450}451452// c.addi4spn453//454// c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero455// immediate, scaled by 4, to the stack pointer, x2, and writes the result to456// rd. This instruction is used to generate pointers to stack-allocated variables457// and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;458// the code points with nzuimm=0 are reserved.459Inst::AluRRImm12 {460alu_op: AluOPRRI::Addi,461rd,462rs,463imm12,464} if reg_is_compressible(rd.to_reg())465&& rs == stack_reg()466&& imm12.as_i16() != 0467&& (imm12.as_i16() % 4) == 0468&& u8::try_from(imm12.as_i16() / 4).is_ok() =>469{470let imm = u8::try_from(imm12.as_i16() / 4).unwrap();471sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));472}473474// c.li475Inst::AluRRImm12 {476alu_op: AluOPRRI::Addi,477rd,478rs,479imm12,480} if rd.to_reg() != zero_reg() && rs == zero_reg() => {481let imm6 = Imm6::maybe_from_imm12(imm12)?;482sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));483}484485// c.addi486Inst::AluRRImm12 {487alu_op: AluOPRRI::Addi,488rd,489rs,490imm12,491} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {492let imm6 = Imm6::maybe_from_imm12(imm12)?;493sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));494}495496// c.addiw497Inst::AluRRImm12 {498alu_op: AluOPRRI::Addiw,499rd,500rs,501imm12,502} if rd.to_reg() == rs && rs != zero_reg() => {503let imm6 = Imm6::maybe_from_imm12(imm12)?;504sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));505}506507// c.lui508//509// c.lui loads the non-zero 6-bit immediate field into bits 17–12510// of the destination register, clears the bottom 12 bits, and511// sign-extends bit 17 into all higher bits of the destination.512Inst::Lui { rd, imm: imm20 }513if rd.to_reg() != zero_reg()514&& rd.to_reg() != stack_reg()515&& imm20.as_i32() != 0 =>516{517// Check that the top bits are sign extended518let imm = imm20.as_i32() << 14 >> 14;519if imm != imm20.as_i32() {520return None;521}522let imm6 = Imm6::maybe_from_i32(imm)?;523sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));524}525526// c.slli527Inst::AluRRImm12 {528alu_op: AluOPRRI::Slli,529rd,530rs,531imm12,532} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {533// The shift amount is unsigned, but we encode it as signed.534let shift = imm12.as_i16() & 0x3f;535let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();536sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));537}538539// c.srli / c.srai540Inst::AluRRImm12 {541alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),542rd,543rs,544imm12,545} if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {546let op = match op {547AluOPRRI::Srli => CbOp::CSrli,548AluOPRRI::Srai => CbOp::CSrai,549_ => unreachable!(),550};551552// The shift amount is unsigned, but we encode it as signed.553let shift = imm12.as_i16() & 0x3f;554let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();555sink.put2(encode_cb_type(op, rd, imm6));556}557558// c.zextb559//560// This is an alias for `andi rd, rd, 0xff`561Inst::AluRRImm12 {562alu_op: AluOPRRI::Andi,563rd,564rs,565imm12,566} if has_zcb567&& rd.to_reg() == rs568&& reg_is_compressible(rs)569&& imm12.as_i16() == 0xff =>570{571sink.put2(encode_cszn_type(CsznOp::CZextb, rd));572}573574// c.andi575Inst::AluRRImm12 {576alu_op: AluOPRRI::Andi,577rd,578rs,579imm12,580} if rd.to_reg() == rs && reg_is_compressible(rs) => {581let imm6 = Imm6::maybe_from_imm12(imm12)?;582sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));583}584585// Stack Based Loads586Inst::Load {587rd,588op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),589from,590flags,591} if from.get_base_register() == Some(stack_reg())592&& (from.get_offset_with_state(state) % op.size()) == 0 =>593{594// We encode the offset in multiples of the load size.595let offset = from.get_offset_with_state(state);596let imm6 = u8::try_from(offset / op.size())597.ok()598.and_then(Uimm6::maybe_from_u8)?;599600// Some additional constraints on these instructions.601//602// Integer loads are not allowed to target x0, but floating point loads603// are, since f0 is not a special register.604//605// Floating point loads are not included in the base Zca extension606// but in a separate Zcd extension. Both of these are part of the C Extension.607let rd_is_zero = rd.to_reg() == zero_reg();608let op = match op {609LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,610LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,611LoadOP::Fld if has_zcd => CiOp::CFldsp,612_ => return None,613};614615if let Some(trap_code) = flags.trap_code() {616// Register the offset at which the actual load instruction starts.617sink.add_trap(trap_code);618}619sink.put2(encode_ci_sp_load(op, rd, imm6));620}621622// Regular Loads623Inst::Load {624rd,625op:626op627@ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),628from,629flags,630} if reg_is_compressible(rd.to_reg())631&& from632.get_base_register()633.map(reg_is_compressible)634.unwrap_or(false)635&& (from.get_offset_with_state(state) % op.size()) == 0 =>636{637let base = from.get_base_register().unwrap();638639// We encode the offset in multiples of the store size.640let offset = from.get_offset_with_state(state);641let offset = u8::try_from(offset / op.size()).ok()?;642643// We mix two different formats here.644//645// c.lw / c.ld / c.fld instructions are available in the standard Zca646// extension using the CL format.647//648// c.lbu / c.lhu / c.lh are only available in the Zcb extension and649// are also encoded differently. Technically they each have a different650// format, but they are similar enough that we can group them.651let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);652let encoded = if is_zcb_load {653if !has_zcb {654return None;655}656657let op = match op {658LoadOP::Lbu => ZcbMemOp::CLbu,659LoadOP::Lhu => ZcbMemOp::CLhu,660LoadOP::Lh => ZcbMemOp::CLh,661_ => unreachable!(),662};663664// Byte stores & loads have 2 bits of immediate offset. Halfword stores665// and loads only have 1 bit.666let imm2 = Uimm2::maybe_from_u8(offset)?;667if (offset & !((1 << op.imm_bits()) - 1)) != 0 {668return None;669}670671encode_zcbmem_load(op, rd, base, imm2)672} else {673// Floating point loads are not included in the base Zca extension674// but in a separate Zcd extension. Both of these are part of the C Extension.675let op = match op {676LoadOP::Lw => ClOp::CLw,677LoadOP::Ld => ClOp::CLd,678LoadOP::Fld if has_zcd => ClOp::CFld,679_ => return None,680};681let imm5 = Uimm5::maybe_from_u8(offset)?;682683encode_cl_type(op, rd, base, imm5)684};685686if let Some(trap_code) = flags.trap_code() {687// Register the offset at which the actual load instruction starts.688sink.add_trap(trap_code);689}690sink.put2(encoded);691}692693// Stack Based Stores694Inst::Store {695src,696op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),697to,698flags,699} if to.get_base_register() == Some(stack_reg())700&& (to.get_offset_with_state(state) % op.size()) == 0 =>701{702// We encode the offset in multiples of the store size.703let offset = to.get_offset_with_state(state);704let imm6 = u8::try_from(offset / op.size())705.ok()706.and_then(Uimm6::maybe_from_u8)?;707708// Floating point stores are not included in the base Zca extension709// but in a separate Zcd extension. Both of these are part of the C Extension.710let op = match op {711StoreOP::Sw => CssOp::CSwsp,712StoreOP::Sd => CssOp::CSdsp,713StoreOP::Fsd if has_zcd => CssOp::CFsdsp,714_ => return None,715};716717if let Some(trap_code) = flags.trap_code() {718// Register the offset at which the actual load instruction starts.719sink.add_trap(trap_code);720}721sink.put2(encode_css_type(op, src, imm6));722}723724// Regular Stores725Inst::Store {726src,727op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),728to,729flags,730} if reg_is_compressible(src)731&& to732.get_base_register()733.map(reg_is_compressible)734.unwrap_or(false)735&& (to.get_offset_with_state(state) % op.size()) == 0 =>736{737let base = to.get_base_register().unwrap();738739// We encode the offset in multiples of the store size.740let offset = to.get_offset_with_state(state);741let offset = u8::try_from(offset / op.size()).ok()?;742743// We mix two different formats here.744//745// c.sw / c.sd / c.fsd instructions are available in the standard Zca746// extension using the CL format.747//748// c.sb / c.sh are only available in the Zcb extension and are also749// encoded differently.750let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);751let encoded = if is_zcb_store {752if !has_zcb {753return None;754}755756let op = match op {757StoreOP::Sh => ZcbMemOp::CSh,758StoreOP::Sb => ZcbMemOp::CSb,759_ => unreachable!(),760};761762// Byte stores & loads have 2 bits of immediate offset. Halfword stores763// and loads only have 1 bit.764let imm2 = Uimm2::maybe_from_u8(offset)?;765if (offset & !((1 << op.imm_bits()) - 1)) != 0 {766return None;767}768769encode_zcbmem_store(op, src, base, imm2)770} else {771// Floating point stores are not included in the base Zca extension772// but in a separate Zcd extension. Both of these are part of the C Extension.773let op = match op {774StoreOP::Sw => CsOp::CSw,775StoreOP::Sd => CsOp::CSd,776StoreOP::Fsd if has_zcd => CsOp::CFsd,777_ => return None,778};779let imm5 = Uimm5::maybe_from_u8(offset)?;780781encode_cs_type(op, src, base, imm5)782};783784if let Some(trap_code) = flags.trap_code() {785// Register the offset at which the actual load instruction starts.786sink.add_trap(trap_code);787}788sink.put2(encoded);789}790791// c.not792//793// This is an alias for `xori rd, rd, -1`794Inst::AluRRImm12 {795alu_op: AluOPRRI::Xori,796rd,797rs,798imm12,799} if has_zcb800&& rd.to_reg() == rs801&& reg_is_compressible(rs)802&& imm12.as_i16() == -1 =>803{804sink.put2(encode_cszn_type(CsznOp::CNot, rd));805}806807// c.sext.b / c.sext.h / c.zext.h808//809// These are all the extend instructions present in `Zcb`, they810// also require `Zbb` since they aren't available in the base ISA.811Inst::AluRRImm12 {812alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),813rd,814rs,815imm12,816} if has_zcb817&& has_zbb818&& rd.to_reg() == rs819&& reg_is_compressible(rs)820&& imm12.as_i16() == 0 =>821{822let op = match alu_op {823AluOPRRI::Sextb => CsznOp::CSextb,824AluOPRRI::Sexth => CsznOp::CSexth,825AluOPRRI::Zexth => CsznOp::CZexth,826_ => unreachable!(),827};828sink.put2(encode_cszn_type(op, rd));829}830831// c.zext.w832//833// This is an alias for `add.uw rd, rd, zero`834Inst::AluRRR {835alu_op: AluOPRRR::Adduw,836rd,837rs1,838rs2,839} if has_zcb840&& has_zba841&& rd.to_reg() == rs1842&& reg_is_compressible(rs1)843&& rs2 == zero_reg() =>844{845sink.put2(encode_cszn_type(CsznOp::CZextw, rd));846}847848_ => return None,849}850851return Some(());852}853854fn emit_uncompressed(855&self,856sink: &mut MachBuffer<Inst>,857emit_info: &EmitInfo,858state: &mut EmitState,859start_off: &mut u32,860) {861match self {862&Inst::Nop0 => {863// do nothing864}865// Addi x0, x0, 0866&Inst::Nop4 => {867let x = Inst::AluRRImm12 {868alu_op: AluOPRRI::Addi,869rd: Writable::from_reg(zero_reg()),870rs: zero_reg(),871imm12: Imm12::ZERO,872};873x.emit(sink, emit_info, state)874}875&Inst::RawData { ref data } => {876// Right now we only put a u32 or u64 in this instruction.877// It is not very long, no need to check if need `emit_island`.878// If data is very long , this is a bug because RawData is typically879// use to load some data and rely on some position in the code stream.880// and we may exceed `Inst::worst_case_size`.881// for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.882sink.put_data(&data[..]);883}884&Inst::Lui { rd, ref imm } => {885let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);886sink.put4(x);887}888&Inst::Fli { rd, width, imm } => {889sink.put4(encode_fli(width, imm, rd));890}891&Inst::LoadInlineConst { rd, ty, imm } => {892let data = &imm.to_le_bytes()[..ty.bytes() as usize];893894let label_data: MachLabel = sink.get_label();895let label_end: MachLabel = sink.get_label();896897// Load into rd898Inst::Load {899rd,900op: LoadOP::from_type(ty),901flags: MemFlags::new(),902from: AMode::Label(label_data),903}904.emit(sink, emit_info, state);905906// Jump over the inline pool907Inst::gen_jump(label_end).emit(sink, emit_info, state);908909// Emit the inline data910sink.bind_label(label_data, &mut state.ctrl_plane);911Inst::RawData { data: data.into() }.emit(sink, emit_info, state);912913sink.bind_label(label_end, &mut state.ctrl_plane);914}915&Inst::FpuRR {916alu_op,917width,918frm,919rd,920rs,921} => {922if alu_op.is_convert_to_int() {923sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);924}925sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));926}927&Inst::FpuRRRR {928alu_op,929rd,930rs1,931rs2,932rs3,933frm,934width,935} => {936sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));937}938&Inst::FpuRRR {939alu_op,940width,941frm,942rd,943rs1,944rs2,945} => {946sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));947}948&Inst::Unwind { ref inst } => {949sink.add_unwind(inst.clone());950}951&Inst::DummyUse { .. } => {952// This has already been handled by Inst::allocate.953}954&Inst::AluRRR {955alu_op,956rd,957rs1,958rs2,959} => {960let (rs1, rs2) = if alu_op.reverse_rs() {961(rs2, rs1)962} else {963(rs1, rs2)964};965966sink.put4(encode_r_type(967alu_op.op_code(),968rd,969alu_op.funct3(),970rs1,971rs2,972alu_op.funct7(),973));974}975&Inst::AluRRImm12 {976alu_op,977rd,978rs,979imm12,980} => {981let x = alu_op.op_code()982| reg_to_gpr_num(rd.to_reg()) << 7983| alu_op.funct3() << 12984| reg_to_gpr_num(rs) << 15985| alu_op.imm12(imm12) << 20;986sink.put4(x);987}988&Inst::CsrReg { op, rd, rs, csr } => {989sink.put4(encode_csr_reg(op, rd, rs, csr));990}991&Inst::CsrImm { op, rd, csr, imm } => {992sink.put4(encode_csr_imm(op, rd, csr, imm));993}994&Inst::Load {995rd,996op: LoadOP::Flh,997from,998flags,999} if !emit_info.isa_flags.has_zfhmin() => {1000// flh unavailable, use an integer load instead1001Inst::Load {1002rd: writable_spilltmp_reg(),1003op: LoadOP::Lh,1004flags,1005from,1006}1007.emit(sink, emit_info, state);1008// NaN-box the `f16` before loading it into the floating-point1009// register with a 32-bit `fmv`.1010Inst::Lui {1011rd: writable_spilltmp_reg2(),1012imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),1013}1014.emit(sink, emit_info, state);1015Inst::AluRRR {1016alu_op: AluOPRRR::Or,1017rd: writable_spilltmp_reg(),1018rs1: spilltmp_reg(),1019rs2: spilltmp_reg2(),1020}1021.emit(sink, emit_info, state);1022Inst::FpuRR {1023alu_op: FpuOPRR::FmvFmtX,1024width: FpuOPWidth::S,1025frm: FRM::RNE,1026rd,1027rs: spilltmp_reg(),1028}1029.emit(sink, emit_info, state);1030}1031&Inst::Load {1032rd,1033op,1034from,1035flags,1036} => {1037let base = from.get_base_register();1038let offset = from.get_offset_with_state(state);1039let offset_imm12 = Imm12::maybe_from_i64(offset);1040let label = from.get_label_with_sink(sink);10411042let (addr, imm12) = match (base, offset_imm12, label) {1043// When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.1044(Some(base), Some(imm12), None) => (base, imm12),10451046// Otherwise, if the offset does not fit into a imm12, we need to materialize it into a1047// register and load from that.1048(Some(_), None, None) => {1049let tmp = writable_spilltmp_reg();1050Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);1051(tmp.to_reg(), Imm12::ZERO)1052}10531054// If the AMode contains a label we can emit an internal relocation that gets1055// resolved with the correct address later.1056(None, Some(imm), Some(label)) => {1057debug_assert_eq!(imm.as_i16(), 0);10581059// Get the current PC.1060sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);1061Inst::Auipc {1062rd,1063imm: Imm20::ZERO,1064}1065.emit_uncompressed(sink, emit_info, state, start_off);10661067// Emit a relocation for the load. This patches the offset into the instruction.1068sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);10691070// Imm12 here is meaningless since it's going to get replaced.1071(rd.to_reg(), Imm12::ZERO)1072}10731074// These cases are impossible with the current AModes that we have. We either1075// always have a register, or always have a label. Never both, and never neither.1076(None, None, None)1077| (None, Some(_), None)1078| (Some(_), None, Some(_))1079| (Some(_), Some(_), Some(_))1080| (None, None, Some(_)) => {1081unreachable!("Invalid load address")1082}1083};10841085if let Some(trap_code) = flags.trap_code() {1086// Register the offset at which the actual load instruction starts.1087sink.add_trap(trap_code);1088}10891090sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));1091}1092&Inst::Store {1093op: StoreOP::Fsh,1094src,1095flags,1096to,1097} if !emit_info.isa_flags.has_zfhmin() => {1098// fsh unavailable, use an integer store instead1099Inst::FpuRR {1100alu_op: FpuOPRR::FmvXFmt,1101width: FpuOPWidth::S,1102frm: FRM::RNE,1103rd: writable_spilltmp_reg(),1104rs: src,1105}1106.emit(sink, emit_info, state);1107Inst::Store {1108to,1109op: StoreOP::Sh,1110flags,1111src: spilltmp_reg(),1112}1113.emit(sink, emit_info, state);1114}1115&Inst::Store { op, src, flags, to } => {1116let base = to.get_base_register();1117let offset = to.get_offset_with_state(state);1118let offset_imm12 = Imm12::maybe_from_i64(offset);11191120let (addr, imm12) = match (base, offset_imm12) {1121// If the offset fits into an imm12 we can directly encode it.1122(Some(base), Some(imm12)) => (base, imm12),1123// Otherwise load the address it into a reg and load from it.1124_ => {1125let tmp = writable_spilltmp_reg();1126Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);1127(tmp.to_reg(), Imm12::ZERO)1128}1129};11301131if let Some(trap_code) = flags.trap_code() {1132// Register the offset at which the actual load instruction starts.1133sink.add_trap(trap_code);1134}11351136sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));1137}1138&Inst::Args { .. } | &Inst::Rets { .. } => {1139// Nothing: this is a pseudoinstruction that serves1140// only to constrain registers at a certain point.1141}1142&Inst::Ret {} => {1143// RISC-V does not have a dedicated ret instruction, instead we emit the equivalent1144// `jalr x0, x1, 0` that jumps to the return address.1145Inst::Jalr {1146rd: writable_zero_reg(),1147base: link_reg(),1148offset: Imm12::ZERO,1149}1150.emit(sink, emit_info, state);1151}11521153&Inst::Extend {1154rd,1155rn,1156signed,1157from_bits,1158to_bits: _to_bits,1159} => {1160let mut insts = SmallInstVec::new();1161let shift_bits = (64 - from_bits) as i16;1162let is_u8 = || from_bits == 8 && signed == false;1163if is_u8() {1164// special for u8.1165insts.push(Inst::AluRRImm12 {1166alu_op: AluOPRRI::Andi,1167rd,1168rs: rn,1169imm12: Imm12::from_i16(255),1170});1171} else {1172insts.push(Inst::AluRRImm12 {1173alu_op: AluOPRRI::Slli,1174rd,1175rs: rn,1176imm12: Imm12::from_i16(shift_bits),1177});1178insts.push(Inst::AluRRImm12 {1179alu_op: if signed {1180AluOPRRI::Srai1181} else {1182AluOPRRI::Srli1183},1184rd,1185rs: rd.to_reg(),1186imm12: Imm12::from_i16(shift_bits),1187});1188}1189insts1190.into_iter()1191.for_each(|i| i.emit(sink, emit_info, state));1192}11931194&Inst::Call { ref info } => {1195sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);11961197Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)1198.into_iter()1199.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));12001201if let Some(s) = state.take_stack_map() {1202let offset = sink.cur_offset();1203sink.push_user_stack_map(state, offset, s);1204}12051206if let Some(try_call) = info.try_call_info.as_ref() {1207sink.add_try_call_site(1208Some(state.frame_layout.sp_to_fp()),1209try_call.exception_handlers(&state.frame_layout),1210);1211} else {1212sink.add_call_site();1213}12141215let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();1216if callee_pop_size > 0 {1217for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {1218inst.emit(sink, emit_info, state);1219}1220}12211222// Load any stack-carried return values.1223info.emit_retval_loads::<Riscv64MachineDeps, _, _>(1224state.frame_layout().stackslots_size,1225|inst| inst.emit(sink, emit_info, state),1226|needed_space| Some(Inst::EmitIsland { needed_space }),1227);12281229// If this is a try-call, jump to the continuation1230// (normal-return) block.1231if let Some(try_call) = info.try_call_info.as_ref() {1232let jmp = Inst::Jal {1233label: try_call.continuation,1234};1235jmp.emit(sink, emit_info, state);1236}12371238*start_off = sink.cur_offset();1239}1240&Inst::CallInd { ref info } => {1241Inst::Jalr {1242rd: writable_link_reg(),1243base: info.dest,1244offset: Imm12::ZERO,1245}1246.emit(sink, emit_info, state);12471248if let Some(s) = state.take_stack_map() {1249let offset = sink.cur_offset();1250sink.push_user_stack_map(state, offset, s);1251}12521253if let Some(try_call) = info.try_call_info.as_ref() {1254sink.add_try_call_site(1255Some(state.frame_layout.sp_to_fp()),1256try_call.exception_handlers(&state.frame_layout),1257);1258} else {1259sink.add_call_site();1260}12611262let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();1263if callee_pop_size > 0 {1264for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {1265inst.emit(sink, emit_info, state);1266}1267}12681269// Load any stack-carried return values.1270info.emit_retval_loads::<Riscv64MachineDeps, _, _>(1271state.frame_layout().stackslots_size,1272|inst| inst.emit(sink, emit_info, state),1273|needed_space| Some(Inst::EmitIsland { needed_space }),1274);12751276// If this is a try-call, jump to the continuation1277// (normal-return) block.1278if let Some(try_call) = info.try_call_info.as_ref() {1279let jmp = Inst::Jal {1280label: try_call.continuation,1281};1282jmp.emit(sink, emit_info, state);1283}12841285*start_off = sink.cur_offset();1286}12871288&Inst::ReturnCall { ref info } => {1289emit_return_call_common_sequence(sink, emit_info, state, info);12901291sink.add_call_site();1292sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);1293Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)1294.into_iter()1295.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));1296}12971298&Inst::ReturnCallInd { ref info } => {1299emit_return_call_common_sequence(sink, emit_info, state, &info);13001301Inst::Jalr {1302rd: writable_zero_reg(),1303base: info.dest,1304offset: Imm12::ZERO,1305}1306.emit(sink, emit_info, state);1307}1308&Inst::Jal { label } => {1309sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);1310sink.add_uncond_branch(*start_off, *start_off + 4, label);1311sink.put4(0b1101111);1312state.clobber_vstate();1313}1314&Inst::CondBr {1315taken,1316not_taken,1317kind,1318} => {1319match taken {1320CondBrTarget::Label(label) => {1321let code = kind.emit();1322let code_inverse = kind.inverse().emit().to_le_bytes();1323sink.use_label_at_offset(*start_off, label, LabelUse::B12);1324sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);1325sink.put4(code);1326}1327CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),1328}13291330match not_taken {1331CondBrTarget::Label(label) => {1332Inst::gen_jump(label).emit(sink, emit_info, state)1333}1334CondBrTarget::Fallthrough => {}1335};1336}13371338&Inst::Mov { rd, rm, ty } => {1339debug_assert_eq!(rd.to_reg().class(), rm.class());1340if rd.to_reg() == rm {1341return;1342}13431344match rm.class() {1345RegClass::Int => Inst::AluRRImm12 {1346alu_op: AluOPRRI::Addi,1347rd,1348rs: rm,1349imm12: Imm12::ZERO,1350},1351RegClass::Float => Inst::FpuRRR {1352alu_op: FpuOPRRR::Fsgnj,1353width: FpuOPWidth::try_from(ty).unwrap(),1354frm: FRM::RNE,1355rd,1356rs1: rm,1357rs2: rm,1358},1359RegClass::Vector => Inst::VecAluRRImm5 {1360op: VecAluOpRRImm5::VmvrV,1361vd: rd,1362vs2: rm,1363// Imm 0 means copy 1 register.1364imm: Imm5::maybe_from_i8(0).unwrap(),1365mask: VecOpMasking::Disabled,1366// Vstate for this instruction is ignored.1367vstate: VState::from_type(ty),1368},1369}1370.emit(sink, emit_info, state);1371}13721373&Inst::MovFromPReg { rd, rm } => {1374Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);1375}13761377&Inst::BrTable {1378index,1379tmp1,1380tmp2,1381ref targets,1382} => {1383let ext_index = writable_spilltmp_reg();13841385let label_compute_target = sink.get_label();13861387// The default target is passed in as the 0th element of `targets`1388// separate it here for clarity.1389let default_target = targets[0];1390let targets = &targets[1..];13911392// We are going to potentially emit a large amount of instructions, so ensure that we emit an island1393// now if we need one.1394//1395// The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.1396// Check if we need to emit a jump table here to support that jump.1397let inst_count = 12 + (targets.len() * 2);1398let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;1399if sink.island_needed(distance) {1400let jump_around_label = sink.get_label();1401Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);1402sink.emit_island(distance + 4, &mut state.ctrl_plane);1403sink.bind_label(jump_around_label, &mut state.ctrl_plane);1404}14051406// We emit a bounds check on the index, if the index is larger than the number of1407// jump table entries, we jump to the default block. Otherwise we compute a jump1408// offset by multiplying the index by 8 (the size of each entry) and then jump to1409// that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.1410//1411// Build the following sequence:1412//1413// extend_index:1414// zext.w ext_index, index1415// bounds_check:1416// li tmp, n_labels1417// bltu ext_index, tmp, compute_target1418// jump_to_default_block:1419// auipc pc, 01420// jalr zero, pc, default_block1421// compute_target:1422// auipc pc, 01423// slli tmp, ext_index, 31424// add pc, pc, tmp1425// jalr zero, pc, 0x101426// jump_table:1427// ; This repeats for each entry in the jumptable1428// auipc pc, 01429// jalr zero, pc, block_target14301431// Extend the index to 64 bits.1432//1433// This prevents us branching on the top 32 bits of the index, which1434// are undefined.1435Inst::Extend {1436rd: ext_index,1437rn: index,1438signed: false,1439from_bits: 32,1440to_bits: 64,1441}1442.emit(sink, emit_info, state);14431444// Bounds check.1445//1446// Check if the index passed in is larger than the number of jumptable1447// entries that we have. If it is, we fallthrough to a jump into the1448// default block.1449Inst::load_constant_u32(tmp2, targets.len() as u64)1450.iter()1451.for_each(|i| i.emit(sink, emit_info, state));1452Inst::CondBr {1453taken: CondBrTarget::Label(label_compute_target),1454not_taken: CondBrTarget::Fallthrough,1455kind: IntegerCompare {1456kind: IntCC::UnsignedLessThan,1457rs1: ext_index.to_reg(),1458rs2: tmp2.to_reg(),1459},1460}1461.emit(sink, emit_info, state);14621463sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);1464Inst::construct_auipc_and_jalr(None, tmp2, 0)1465.iter()1466.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));14671468// Compute the jump table offset.1469// We need to emit a PC relative offset,1470sink.bind_label(label_compute_target, &mut state.ctrl_plane);14711472// Get the current PC.1473Inst::Auipc {1474rd: tmp1,1475imm: Imm20::ZERO,1476}1477.emit_uncompressed(sink, emit_info, state, start_off);14781479// These instructions must be emitted as uncompressed since we1480// are manually computing the offset from the PC.14811482// Multiply the index by 8, since that is the size in1483// bytes of each jump table entry1484Inst::AluRRImm12 {1485alu_op: AluOPRRI::Slli,1486rd: tmp2,1487rs: ext_index.to_reg(),1488imm12: Imm12::from_i16(3),1489}1490.emit_uncompressed(sink, emit_info, state, start_off);14911492// Calculate the base of the jump, PC + the offset from above.1493Inst::AluRRR {1494alu_op: AluOPRRR::Add,1495rd: tmp1,1496rs1: tmp1.to_reg(),1497rs2: tmp2.to_reg(),1498}1499.emit_uncompressed(sink, emit_info, state, start_off);15001501// Jump to the middle of the jump table.1502// We add a 16 byte offset here, since we used 4 instructions1503// since the AUIPC that was used to get the PC.1504Inst::Jalr {1505rd: writable_zero_reg(),1506base: tmp1.to_reg(),1507offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),1508}1509.emit_uncompressed(sink, emit_info, state, start_off);15101511// Emit the jump table.1512//1513// Each entry is a auipc + jalr to the target block. We also start with a island1514// if necessary.15151516// Emit the jumps back to back1517for target in targets.iter() {1518sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);15191520Inst::construct_auipc_and_jalr(None, tmp2, 0)1521.iter()1522.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));1523}15241525// We've just emitted an island that is safe up to *here*.1526// Mark it as such so that we don't needlessly emit additional islands.1527*start_off = sink.cur_offset();1528}15291530&Inst::Atomic {1531op,1532rd,1533addr,1534src,1535amo,1536} => {1537// TODO: get flags from original CLIF atomic instruction1538let flags = MemFlags::new();1539if let Some(trap_code) = flags.trap_code() {1540sink.add_trap(trap_code);1541}1542let x = op.op_code()1543| reg_to_gpr_num(rd.to_reg()) << 71544| op.funct3() << 121545| reg_to_gpr_num(addr) << 151546| reg_to_gpr_num(src) << 201547| op.funct7(amo) << 25;15481549sink.put4(x);1550}1551&Inst::Fence { pred, succ } => {1552let x = 0b00011111553| 0b00000 << 71554| 0b000 << 121555| 0b00000 << 151556| (succ as u32) << 201557| (pred as u32) << 24;15581559sink.put4(x);1560}1561&Inst::Auipc { rd, imm } => {1562sink.put4(enc_auipc(rd, imm));1563}15641565&Inst::LoadAddr { rd, mem } => {1566let base = mem.get_base_register();1567let offset = mem.get_offset_with_state(state);1568let offset_imm12 = Imm12::maybe_from_i64(offset);15691570match (mem, base, offset_imm12) {1571(_, Some(rs), Some(imm12)) => {1572Inst::AluRRImm12 {1573alu_op: AluOPRRI::Addi,1574rd,1575rs,1576imm12,1577}1578.emit(sink, emit_info, state);1579}1580(_, Some(rs), None) => {1581let mut insts = Inst::load_constant_u64(rd, offset as u64);1582insts.push(Inst::AluRRR {1583alu_op: AluOPRRR::Add,1584rd,1585rs1: rd.to_reg(),1586rs2: rs,1587});1588insts1589.into_iter()1590.for_each(|inst| inst.emit(sink, emit_info, state));1591}1592(AMode::Const(addr), None, _) => {1593// Get an address label for the constant and recurse.1594let label = sink.get_label_for_constant(addr);1595Inst::LoadAddr {1596rd,1597mem: AMode::Label(label),1598}1599.emit(sink, emit_info, state);1600}1601(AMode::Label(label), None, _) => {1602// Get the current PC.1603sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);1604let inst = Inst::Auipc {1605rd,1606imm: Imm20::ZERO,1607};1608inst.emit_uncompressed(sink, emit_info, state, start_off);16091610// Emit an add to the address with a relocation.1611// This later gets patched up with the correct offset.1612sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);1613Inst::AluRRImm12 {1614alu_op: AluOPRRI::Addi,1615rd,1616rs: rd.to_reg(),1617imm12: Imm12::ZERO,1618}1619.emit_uncompressed(sink, emit_info, state, start_off);1620}1621(amode, _, _) => {1622unimplemented!("LoadAddr: {:?}", amode);1623}1624}1625}16261627&Inst::Select {1628ref dst,1629condition,1630ref x,1631ref y,1632} => {1633// The general form for this select is the following:1634//1635// mv rd, x1636// b{cond} rcond, label_end1637// mv rd, y1638// label_end:1639// ... etc1640//1641// This is built on the assumption that moves are cheap, but branches and jumps1642// are not. So with this format we always avoid one jump instruction at the expense1643// of an unconditional move.1644//1645// We also perform another optimization here. If the destination register is the same1646// as one of the input registers, we can avoid emitting the first unconditional move1647// and emit just the branch and the second move.1648//1649// To make sure that this happens as often as possible, we also try to invert the1650// condition, so that if either of the input registers are the same as the destination1651// we avoid that move.16521653let label_end = sink.get_label();16541655let xregs = x.regs();1656let yregs = y.regs();1657let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();1658let condregs = condition.regs();16591660// We are going to write to the destination register before evaluating1661// the condition, so we need to make sure that the destination register1662// is not one of the condition registers.1663//1664// This should never happen, since hopefully the regalloc constraints1665// for this register are set up correctly.1666debug_assert_ne!(dstregs, condregs);16671668// Check if we can invert the condition and avoid moving the y registers into1669// the destination. This allows us to only emit the branch and one of the moves.1670let (uncond_move, cond_move, condition) = if yregs == dstregs {1671(yregs, xregs, condition.inverse())1672} else {1673(xregs, yregs, condition)1674};16751676// Unconditionally move one of the values to the destination register.1677//1678// These moves may not end up being emitted if the source and1679// destination registers are the same. That logic is built into1680// the emit function for `Inst::Mov`.1681for i in gen_moves(dst.regs(), uncond_move) {1682i.emit(sink, emit_info, state);1683}16841685// If the condition passes we skip over the conditional move1686Inst::CondBr {1687taken: CondBrTarget::Label(label_end),1688not_taken: CondBrTarget::Fallthrough,1689kind: condition,1690}1691.emit(sink, emit_info, state);16921693// Move the conditional value to the destination register.1694for i in gen_moves(dst.regs(), cond_move) {1695i.emit(sink, emit_info, state);1696}16971698sink.bind_label(label_end, &mut state.ctrl_plane);1699}1700&Inst::Jalr { rd, base, offset } => {1701sink.put4(enc_jalr(rd, base, offset));1702state.clobber_vstate();1703}1704&Inst::EBreak => {1705sink.put4(0x00100073);1706}1707&Inst::AtomicCas {1708offset,1709t0,1710dst,1711e,1712addr,1713v,1714ty,1715} => {1716// # addr holds address of memory location1717// # e holds expected value1718// # v holds desired value1719// # dst holds return value1720// cas:1721// lr.w dst, (addr) # Load original value.1722// bne dst, e, fail # Doesn’t match, so fail.1723// sc.w t0, v, (addr) # Try to update.1724// bnez t0 , cas # if store not ok,retry.1725// fail:1726let fail_label = sink.get_label();1727let cas_lebel = sink.get_label();1728sink.bind_label(cas_lebel, &mut state.ctrl_plane);1729Inst::Atomic {1730op: AtomicOP::load_op(ty),1731rd: dst,1732addr,1733src: zero_reg(),1734amo: AMO::SeqCst,1735}1736.emit(sink, emit_info, state);1737if ty.bits() < 32 {1738AtomicOP::extract(dst, offset, dst.to_reg(), ty)1739.iter()1740.for_each(|i| i.emit(sink, emit_info, state));1741} else if ty.bits() == 32 {1742Inst::Extend {1743rd: dst,1744rn: dst.to_reg(),1745signed: false,1746from_bits: 32,1747to_bits: 64,1748}1749.emit(sink, emit_info, state);1750}1751Inst::CondBr {1752taken: CondBrTarget::Label(fail_label),1753not_taken: CondBrTarget::Fallthrough,1754kind: IntegerCompare {1755kind: IntCC::NotEqual,1756rs1: e,1757rs2: dst.to_reg(),1758},1759}1760.emit(sink, emit_info, state);1761let store_value = if ty.bits() < 32 {1762// reload value to t0.1763Inst::Atomic {1764op: AtomicOP::load_op(ty),1765rd: t0,1766addr,1767src: zero_reg(),1768amo: AMO::SeqCst,1769}1770.emit(sink, emit_info, state);1771// set reset part.1772AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)1773.iter()1774.for_each(|i| i.emit(sink, emit_info, state));1775t0.to_reg()1776} else {1777v1778};1779Inst::Atomic {1780op: AtomicOP::store_op(ty),1781rd: t0,1782addr,1783src: store_value,1784amo: AMO::SeqCst,1785}1786.emit(sink, emit_info, state);1787// check is our value stored.1788Inst::CondBr {1789taken: CondBrTarget::Label(cas_lebel),1790not_taken: CondBrTarget::Fallthrough,1791kind: IntegerCompare {1792kind: IntCC::NotEqual,1793rs1: t0.to_reg(),1794rs2: zero_reg(),1795},1796}1797.emit(sink, emit_info, state);1798sink.bind_label(fail_label, &mut state.ctrl_plane);1799}1800&Inst::AtomicRmwLoop {1801offset,1802op,1803dst,1804ty,1805p,1806x,1807t0,1808} => {1809let retry = sink.get_label();1810sink.bind_label(retry, &mut state.ctrl_plane);1811// load old value.1812Inst::Atomic {1813op: AtomicOP::load_op(ty),1814rd: dst,1815addr: p,1816src: zero_reg(),1817amo: AMO::SeqCst,1818}1819.emit(sink, emit_info, state);1820//18211822let store_value: Reg = match op {1823crate::ir::AtomicRmwOp::Add1824| crate::ir::AtomicRmwOp::Sub1825| crate::ir::AtomicRmwOp::And1826| crate::ir::AtomicRmwOp::Or1827| crate::ir::AtomicRmwOp::Xor => {1828AtomicOP::extract(dst, offset, dst.to_reg(), ty)1829.iter()1830.for_each(|i| i.emit(sink, emit_info, state));1831Inst::AluRRR {1832alu_op: match op {1833crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,1834crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,1835crate::ir::AtomicRmwOp::And => AluOPRRR::And,1836crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,1837crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,1838_ => unreachable!(),1839},1840rd: t0,1841rs1: dst.to_reg(),1842rs2: x,1843}1844.emit(sink, emit_info, state);1845Inst::Atomic {1846op: AtomicOP::load_op(ty),1847rd: writable_spilltmp_reg2(),1848addr: p,1849src: zero_reg(),1850amo: AMO::SeqCst,1851}1852.emit(sink, emit_info, state);1853AtomicOP::merge(1854writable_spilltmp_reg2(),1855writable_spilltmp_reg(),1856offset,1857t0.to_reg(),1858ty,1859)1860.iter()1861.for_each(|i| i.emit(sink, emit_info, state));1862spilltmp_reg2()1863}1864crate::ir::AtomicRmwOp::Nand => {1865if ty.bits() < 32 {1866AtomicOP::extract(dst, offset, dst.to_reg(), ty)1867.iter()1868.for_each(|i| i.emit(sink, emit_info, state));1869}1870Inst::AluRRR {1871alu_op: AluOPRRR::And,1872rd: t0,1873rs1: x,1874rs2: dst.to_reg(),1875}1876.emit(sink, emit_info, state);1877Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);1878if ty.bits() < 32 {1879Inst::Atomic {1880op: AtomicOP::load_op(ty),1881rd: writable_spilltmp_reg2(),1882addr: p,1883src: zero_reg(),1884amo: AMO::SeqCst,1885}1886.emit(sink, emit_info, state);1887AtomicOP::merge(1888writable_spilltmp_reg2(),1889writable_spilltmp_reg(),1890offset,1891t0.to_reg(),1892ty,1893)1894.iter()1895.for_each(|i| i.emit(sink, emit_info, state));1896spilltmp_reg2()1897} else {1898t0.to_reg()1899}1900}19011902crate::ir::AtomicRmwOp::Umin1903| crate::ir::AtomicRmwOp::Umax1904| crate::ir::AtomicRmwOp::Smin1905| crate::ir::AtomicRmwOp::Smax => {1906let label_select_dst = sink.get_label();1907let label_select_done = sink.get_label();1908if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax1909{1910AtomicOP::extract(dst, offset, dst.to_reg(), ty)1911} else {1912AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)1913}1914.iter()1915.for_each(|i| i.emit(sink, emit_info, state));19161917Inst::CondBr {1918taken: CondBrTarget::Label(label_select_dst),1919not_taken: CondBrTarget::Fallthrough,1920kind: IntegerCompare {1921kind: match op {1922crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,1923crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,1924crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,1925crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,1926_ => unreachable!(),1927},1928rs1: dst.to_reg(),1929rs2: x,1930},1931}1932.emit(sink, emit_info, state);1933// here we select x.1934Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);1935Inst::gen_jump(label_select_done).emit(sink, emit_info, state);1936sink.bind_label(label_select_dst, &mut state.ctrl_plane);1937Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);1938sink.bind_label(label_select_done, &mut state.ctrl_plane);1939Inst::Atomic {1940op: AtomicOP::load_op(ty),1941rd: writable_spilltmp_reg2(),1942addr: p,1943src: zero_reg(),1944amo: AMO::SeqCst,1945}1946.emit(sink, emit_info, state);1947AtomicOP::merge(1948writable_spilltmp_reg2(),1949writable_spilltmp_reg(),1950offset,1951t0.to_reg(),1952ty,1953)1954.iter()1955.for_each(|i| i.emit(sink, emit_info, state));1956spilltmp_reg2()1957}1958crate::ir::AtomicRmwOp::Xchg => {1959AtomicOP::extract(dst, offset, dst.to_reg(), ty)1960.iter()1961.for_each(|i| i.emit(sink, emit_info, state));1962Inst::Atomic {1963op: AtomicOP::load_op(ty),1964rd: writable_spilltmp_reg2(),1965addr: p,1966src: zero_reg(),1967amo: AMO::SeqCst,1968}1969.emit(sink, emit_info, state);1970AtomicOP::merge(1971writable_spilltmp_reg2(),1972writable_spilltmp_reg(),1973offset,1974x,1975ty,1976)1977.iter()1978.for_each(|i| i.emit(sink, emit_info, state));1979spilltmp_reg2()1980}1981};19821983Inst::Atomic {1984op: AtomicOP::store_op(ty),1985rd: t0,1986addr: p,1987src: store_value,1988amo: AMO::SeqCst,1989}1990.emit(sink, emit_info, state);19911992// if store is not ok,retry.1993Inst::CondBr {1994taken: CondBrTarget::Label(retry),1995not_taken: CondBrTarget::Fallthrough,1996kind: IntegerCompare {1997kind: IntCC::NotEqual,1998rs1: t0.to_reg(),1999rs2: zero_reg(),2000},2001}2002.emit(sink, emit_info, state);2003}20042005&Inst::LoadExtNameGot { rd, ref name } => {2006// Load a PC-relative address into a register.2007// RISC-V does this slightly differently from other arches. We emit a relocation2008// with a label, instead of the symbol itself.2009//2010// See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses2011//2012// Emit the following code:2013// label:2014// auipc rd, 0 # R_RISCV_GOT_HI20 (symbol_name)2015// ld rd, rd, 0 # R_RISCV_PCREL_LO12_I (label)20162017// Create the label that is going to be published to the final binary object.2018let auipc_label = sink.get_label();2019sink.bind_label(auipc_label, &mut state.ctrl_plane);20202021// Get the current PC.2022sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);2023Inst::Auipc {2024rd,2025imm: Imm20::from_i32(0),2026}2027.emit_uncompressed(sink, emit_info, state, start_off);20282029// The `ld` here, points to the `auipc` label instead of directly to the symbol.2030sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);2031Inst::Load {2032rd,2033op: LoadOP::Ld,2034flags: MemFlags::trusted(),2035from: AMode::RegOffset(rd.to_reg(), 0),2036}2037.emit_uncompressed(sink, emit_info, state, start_off);2038}20392040&Inst::LoadExtNameFar {2041rd,2042ref name,2043offset,2044} => {2045// In the non PIC sequence we relocate the absolute address into2046// a preallocated space, load it into a register and jump over2047// it.2048//2049// Emit the following code:2050// ld rd, label_data2051// j label_end2052// label_data:2053// <8 byte space> # ABS82054// label_end:20552056let label_data = sink.get_label();2057let label_end = sink.get_label();20582059// Load the value from a label2060Inst::Load {2061rd,2062op: LoadOP::Ld,2063flags: MemFlags::trusted(),2064from: AMode::Label(label_data),2065}2066.emit(sink, emit_info, state);20672068// Jump over the data2069Inst::gen_jump(label_end).emit(sink, emit_info, state);20702071sink.bind_label(label_data, &mut state.ctrl_plane);2072sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);2073sink.put8(0);20742075sink.bind_label(label_end, &mut state.ctrl_plane);2076}20772078&Inst::LoadExtNameNear {2079rd,2080ref name,2081offset,2082} => {2083// Emit the following code:2084// label:2085// auipc rd, 0 # R_RISCV_PCREL_HI20 (symbol_name)2086// ld rd, rd, 0 # R_RISCV_PCREL_LO12_I (label)20872088let auipc_label = sink.get_label();2089sink.bind_label(auipc_label, &mut state.ctrl_plane);20902091// Get the current PC.2092sink.add_reloc(Reloc::RiscvPCRelHi20, &**name, offset);2093Inst::Auipc {2094rd,2095imm: Imm20::from_i32(0),2096}2097.emit_uncompressed(sink, emit_info, state, start_off);20982099sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);2100Inst::AluRRImm12 {2101alu_op: AluOPRRI::Addi,2102rd,2103rs: rd.to_reg(),2104imm12: Imm12::ZERO,2105}2106.emit_uncompressed(sink, emit_info, state, start_off);2107}21082109&Inst::LabelAddress { dst, label } => {2110let offset = sink.cur_offset();2111Inst::Auipc {2112rd: dst,2113imm: Imm20::from_i32(0),2114}2115.emit_uncompressed(sink, emit_info, state, start_off);2116sink.use_label_at_offset(offset, label, LabelUse::PCRelHi20);21172118let offset = sink.cur_offset();2119Inst::AluRRImm12 {2120alu_op: AluOPRRI::Addi,2121rd: dst,2122rs: dst.to_reg(),2123imm12: Imm12::ZERO,2124}2125.emit_uncompressed(sink, emit_info, state, start_off);2126sink.use_label_at_offset(offset, label, LabelUse::PCRelLo12I);2127}21282129&Inst::ElfTlsGetAddr { rd, ref name } => {2130// RISC-V's TLS GD model is slightly different from other arches.2131//2132// We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits2133// of the address relative to the GOT entry. This relocation points to2134// the symbol as usual.2135//2136// However when loading the bottom 12bits of the address, we need to2137// use a label that points to the previous AUIPC instruction.2138//2139// label:2140// auipc a0,0 # R_RISCV_TLS_GD_HI20 (symbol)2141// addi a0,a0,0 # R_RISCV_PCREL_LO12_I (label)2142//2143// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic21442145// Create the label that is going to be published to the final binary object.2146let auipc_label = sink.get_label();2147sink.bind_label(auipc_label, &mut state.ctrl_plane);21482149// Get the current PC.2150sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);2151Inst::Auipc {2152rd,2153imm: Imm20::from_i32(0),2154}2155.emit_uncompressed(sink, emit_info, state, start_off);21562157// The `addi` here, points to the `auipc` label instead of directly to the symbol.2158sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);2159Inst::AluRRImm12 {2160alu_op: AluOPRRI::Addi,2161rd,2162rs: rd.to_reg(),2163imm12: Imm12::from_i16(0),2164}2165.emit_uncompressed(sink, emit_info, state, start_off);21662167Inst::Call {2168info: Box::new(CallInfo::empty(2169ExternalName::LibCall(LibCall::ElfTlsGetAddr),2170CallConv::SystemV,2171)),2172}2173.emit_uncompressed(sink, emit_info, state, start_off);2174}21752176&Inst::TrapIf {2177rs1,2178rs2,2179cc,2180trap_code,2181} => {2182let label_end = sink.get_label();2183let cond = IntegerCompare { kind: cc, rs1, rs2 };21842185// Jump over the trap if we the condition is false.2186Inst::CondBr {2187taken: CondBrTarget::Label(label_end),2188not_taken: CondBrTarget::Fallthrough,2189kind: cond.inverse(),2190}2191.emit(sink, emit_info, state);2192Inst::Udf { trap_code }.emit(sink, emit_info, state);21932194sink.bind_label(label_end, &mut state.ctrl_plane);2195}2196&Inst::Udf { trap_code } => {2197sink.add_trap(trap_code);2198sink.put_data(Inst::TRAP_OPCODE);2199}2200&Inst::AtomicLoad { rd, ty, p } => {2201// emit the fence.2202Inst::Fence {2203pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,2204succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,2205}2206.emit(sink, emit_info, state);2207// load.2208Inst::Load {2209rd,2210op: LoadOP::from_type(ty),2211flags: MemFlags::new(),2212from: AMode::RegOffset(p, 0),2213}2214.emit(sink, emit_info, state);2215Inst::Fence {2216pred: Inst::FENCE_REQ_R,2217succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,2218}2219.emit(sink, emit_info, state);2220}2221&Inst::AtomicStore { src, ty, p } => {2222Inst::Fence {2223pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,2224succ: Inst::FENCE_REQ_W,2225}2226.emit(sink, emit_info, state);2227Inst::Store {2228to: AMode::RegOffset(p, 0),2229op: StoreOP::from_type(ty),2230flags: MemFlags::new(),2231src,2232}2233.emit(sink, emit_info, state);2234}22352236&Inst::Popcnt {2237sum,2238tmp,2239step,2240rs,2241ty,2242} => {2243// load 0 to sum , init.2244Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);2245// load2246Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))2247.emit(sink, emit_info, state);2248//2249Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);2250Inst::AluRRImm12 {2251alu_op: AluOPRRI::Slli,2252rd: tmp,2253rs: tmp.to_reg(),2254imm12: Imm12::from_i16((ty.bits() - 1) as i16),2255}2256.emit(sink, emit_info, state);2257let label_done = sink.get_label();2258let label_loop = sink.get_label();2259sink.bind_label(label_loop, &mut state.ctrl_plane);2260Inst::CondBr {2261taken: CondBrTarget::Label(label_done),2262not_taken: CondBrTarget::Fallthrough,2263kind: IntegerCompare {2264kind: IntCC::SignedLessThanOrEqual,2265rs1: step.to_reg(),2266rs2: zero_reg(),2267},2268}2269.emit(sink, emit_info, state);2270// test and add sum.2271{2272Inst::AluRRR {2273alu_op: AluOPRRR::And,2274rd: writable_spilltmp_reg2(),2275rs1: tmp.to_reg(),2276rs2: rs,2277}2278.emit(sink, emit_info, state);2279let label_over = sink.get_label();2280Inst::CondBr {2281taken: CondBrTarget::Label(label_over),2282not_taken: CondBrTarget::Fallthrough,2283kind: IntegerCompare {2284kind: IntCC::Equal,2285rs1: zero_reg(),2286rs2: spilltmp_reg2(),2287},2288}2289.emit(sink, emit_info, state);2290Inst::AluRRImm12 {2291alu_op: AluOPRRI::Addi,2292rd: sum,2293rs: sum.to_reg(),2294imm12: Imm12::ONE,2295}2296.emit(sink, emit_info, state);2297sink.bind_label(label_over, &mut state.ctrl_plane);2298}2299// set step and tmp.2300{2301Inst::AluRRImm12 {2302alu_op: AluOPRRI::Addi,2303rd: step,2304rs: step.to_reg(),2305imm12: Imm12::from_i16(-1),2306}2307.emit(sink, emit_info, state);2308Inst::AluRRImm12 {2309alu_op: AluOPRRI::Srli,2310rd: tmp,2311rs: tmp.to_reg(),2312imm12: Imm12::ONE,2313}2314.emit(sink, emit_info, state);2315Inst::gen_jump(label_loop).emit(sink, emit_info, state);2316}2317sink.bind_label(label_done, &mut state.ctrl_plane);2318}2319&Inst::Cltz {2320sum,2321tmp,2322step,2323rs,2324leading,2325ty,2326} => {2327// load 0 to sum , init.2328Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);2329// load2330Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))2331.emit(sink, emit_info, state);2332//2333Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);2334if leading {2335Inst::AluRRImm12 {2336alu_op: AluOPRRI::Slli,2337rd: tmp,2338rs: tmp.to_reg(),2339imm12: Imm12::from_i16((ty.bits() - 1) as i16),2340}2341.emit(sink, emit_info, state);2342}2343let label_done = sink.get_label();2344let label_loop = sink.get_label();2345sink.bind_label(label_loop, &mut state.ctrl_plane);2346Inst::CondBr {2347taken: CondBrTarget::Label(label_done),2348not_taken: CondBrTarget::Fallthrough,2349kind: IntegerCompare {2350kind: IntCC::SignedLessThanOrEqual,2351rs1: step.to_reg(),2352rs2: zero_reg(),2353},2354}2355.emit(sink, emit_info, state);2356// test and add sum.2357{2358Inst::AluRRR {2359alu_op: AluOPRRR::And,2360rd: writable_spilltmp_reg2(),2361rs1: tmp.to_reg(),2362rs2: rs,2363}2364.emit(sink, emit_info, state);2365Inst::CondBr {2366taken: CondBrTarget::Label(label_done),2367not_taken: CondBrTarget::Fallthrough,2368kind: IntegerCompare {2369kind: IntCC::NotEqual,2370rs1: zero_reg(),2371rs2: spilltmp_reg2(),2372},2373}2374.emit(sink, emit_info, state);2375Inst::AluRRImm12 {2376alu_op: AluOPRRI::Addi,2377rd: sum,2378rs: sum.to_reg(),2379imm12: Imm12::ONE,2380}2381.emit(sink, emit_info, state);2382}2383// set step and tmp.2384{2385Inst::AluRRImm12 {2386alu_op: AluOPRRI::Addi,2387rd: step,2388rs: step.to_reg(),2389imm12: Imm12::from_i16(-1),2390}2391.emit(sink, emit_info, state);2392Inst::AluRRImm12 {2393alu_op: if leading {2394AluOPRRI::Srli2395} else {2396AluOPRRI::Slli2397},2398rd: tmp,2399rs: tmp.to_reg(),2400imm12: Imm12::ONE,2401}2402.emit(sink, emit_info, state);2403Inst::gen_jump(label_loop).emit(sink, emit_info, state);2404}2405sink.bind_label(label_done, &mut state.ctrl_plane);2406}2407&Inst::Brev8 {2408rs,2409ty,2410step,2411tmp,2412tmp2,2413rd,2414} => {2415Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);2416Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))2417.emit(sink, emit_info, state);2418//2419Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);2420Inst::AluRRImm12 {2421alu_op: AluOPRRI::Slli,2422rd: tmp,2423rs: tmp.to_reg(),2424imm12: Imm12::from_i16((ty.bits() - 1) as i16),2425}2426.emit(sink, emit_info, state);2427Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);2428Inst::AluRRImm12 {2429alu_op: AluOPRRI::Slli,2430rd: tmp2,2431rs: tmp2.to_reg(),2432imm12: Imm12::from_i16((ty.bits() - 8) as i16),2433}2434.emit(sink, emit_info, state);24352436let label_done = sink.get_label();2437let label_loop = sink.get_label();2438sink.bind_label(label_loop, &mut state.ctrl_plane);2439Inst::CondBr {2440taken: CondBrTarget::Label(label_done),2441not_taken: CondBrTarget::Fallthrough,2442kind: IntegerCompare {2443kind: IntCC::SignedLessThanOrEqual,2444rs1: step.to_reg(),2445rs2: zero_reg(),2446},2447}2448.emit(sink, emit_info, state);2449// test and set bit.2450{2451Inst::AluRRR {2452alu_op: AluOPRRR::And,2453rd: writable_spilltmp_reg2(),2454rs1: tmp.to_reg(),2455rs2: rs,2456}2457.emit(sink, emit_info, state);2458let label_over = sink.get_label();2459Inst::CondBr {2460taken: CondBrTarget::Label(label_over),2461not_taken: CondBrTarget::Fallthrough,2462kind: IntegerCompare {2463kind: IntCC::Equal,2464rs1: zero_reg(),2465rs2: spilltmp_reg2(),2466},2467}2468.emit(sink, emit_info, state);2469Inst::AluRRR {2470alu_op: AluOPRRR::Or,2471rd,2472rs1: rd.to_reg(),2473rs2: tmp2.to_reg(),2474}2475.emit(sink, emit_info, state);2476sink.bind_label(label_over, &mut state.ctrl_plane);2477}2478// set step and tmp.2479{2480Inst::AluRRImm12 {2481alu_op: AluOPRRI::Addi,2482rd: step,2483rs: step.to_reg(),2484imm12: Imm12::from_i16(-1),2485}2486.emit(sink, emit_info, state);2487Inst::AluRRImm12 {2488alu_op: AluOPRRI::Srli,2489rd: tmp,2490rs: tmp.to_reg(),2491imm12: Imm12::ONE,2492}2493.emit(sink, emit_info, state);2494{2495// reset tmp22496// if (step %=8 == 0) then tmp2 = tmp2 >> 152497// if (step %=8 != 0) then tmp2 = tmp2 << 12498let label_over = sink.get_label();2499let label_sll_1 = sink.get_label();2500Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))2501.emit(sink, emit_info, state);2502Inst::AluRRR {2503alu_op: AluOPRRR::Rem,2504rd: writable_spilltmp_reg2(),2505rs1: step.to_reg(),2506rs2: spilltmp_reg2(),2507}2508.emit(sink, emit_info, state);2509Inst::CondBr {2510taken: CondBrTarget::Label(label_sll_1),2511not_taken: CondBrTarget::Fallthrough,2512kind: IntegerCompare {2513kind: IntCC::NotEqual,2514rs1: spilltmp_reg2(),2515rs2: zero_reg(),2516},2517}2518.emit(sink, emit_info, state);2519Inst::AluRRImm12 {2520alu_op: AluOPRRI::Srli,2521rd: tmp2,2522rs: tmp2.to_reg(),2523imm12: Imm12::from_i16(15),2524}2525.emit(sink, emit_info, state);2526Inst::gen_jump(label_over).emit(sink, emit_info, state);2527sink.bind_label(label_sll_1, &mut state.ctrl_plane);2528Inst::AluRRImm12 {2529alu_op: AluOPRRI::Slli,2530rd: tmp2,2531rs: tmp2.to_reg(),2532imm12: Imm12::ONE,2533}2534.emit(sink, emit_info, state);2535sink.bind_label(label_over, &mut state.ctrl_plane);2536}2537Inst::gen_jump(label_loop).emit(sink, emit_info, state);2538}2539sink.bind_label(label_done, &mut state.ctrl_plane);2540}2541&Inst::StackProbeLoop {2542guard_size,2543probe_count,2544tmp: guard_size_tmp,2545} => {2546let step = writable_spilltmp_reg();2547Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))2548.iter()2549.for_each(|i| i.emit(sink, emit_info, state));2550Inst::load_constant_u64(guard_size_tmp, guard_size as u64)2551.iter()2552.for_each(|i| i.emit(sink, emit_info, state));25532554let loop_start = sink.get_label();2555let label_done = sink.get_label();2556sink.bind_label(loop_start, &mut state.ctrl_plane);2557Inst::CondBr {2558taken: CondBrTarget::Label(label_done),2559not_taken: CondBrTarget::Fallthrough,2560kind: IntegerCompare {2561kind: IntCC::UnsignedLessThanOrEqual,2562rs1: step.to_reg(),2563rs2: guard_size_tmp.to_reg(),2564},2565}2566.emit(sink, emit_info, state);2567// compute address.2568Inst::AluRRR {2569alu_op: AluOPRRR::Sub,2570rd: writable_spilltmp_reg2(),2571rs1: stack_reg(),2572rs2: step.to_reg(),2573}2574.emit(sink, emit_info, state);2575Inst::Store {2576to: AMode::RegOffset(spilltmp_reg2(), 0),2577op: StoreOP::Sb,2578flags: MemFlags::new(),2579src: zero_reg(),2580}2581.emit(sink, emit_info, state);2582// reset step.2583Inst::AluRRR {2584alu_op: AluOPRRR::Sub,2585rd: step,2586rs1: step.to_reg(),2587rs2: guard_size_tmp.to_reg(),2588}2589.emit(sink, emit_info, state);2590Inst::gen_jump(loop_start).emit(sink, emit_info, state);2591sink.bind_label(label_done, &mut state.ctrl_plane);2592}2593&Inst::VecAluRRRImm5 {2594op,2595vd,2596vd_src,2597imm,2598vs2,2599ref mask,2600..2601} => {2602debug_assert_eq!(vd.to_reg(), vd_src);26032604sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));2605}2606&Inst::VecAluRRRR {2607op,2608vd,2609vd_src,2610vs1,2611vs2,2612ref mask,2613..2614} => {2615debug_assert_eq!(vd.to_reg(), vd_src);26162617sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));2618}2619&Inst::VecAluRRR {2620op,2621vd,2622vs1,2623vs2,2624ref mask,2625..2626} => {2627sink.put4(encode_valu(op, vd, vs1, vs2, *mask));2628}2629&Inst::VecAluRRImm5 {2630op,2631vd,2632imm,2633vs2,2634ref mask,2635..2636} => {2637sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));2638}2639&Inst::VecAluRR {2640op,2641vd,2642vs,2643ref mask,2644..2645} => {2646sink.put4(encode_valu_rr(op, vd, vs, *mask));2647}2648&Inst::VecAluRImm5 {2649op,2650vd,2651imm,2652ref mask,2653..2654} => {2655sink.put4(encode_valu_r_imm(op, vd, imm, *mask));2656}2657&Inst::VecSetState { rd, ref vstate } => {2658sink.put4(encode_vcfg_imm(26590x57,2660rd.to_reg(),2661vstate.avl.unwrap_static(),2662&vstate.vtype,2663));26642665// Update the current vector emit state.2666state.vstate = EmitVState::Known(*vstate);2667}26682669&Inst::VecLoad {2670eew,2671to,2672ref from,2673ref mask,2674flags,2675..2676} => {2677// Vector Loads don't support immediate offsets, so we need to load it into a register.2678let addr = match from {2679VecAMode::UnitStride { base } => {2680let base_reg = base.get_base_register();2681let offset = base.get_offset_with_state(state);26822683// Reg+0 Offset can be directly encoded2684if let (Some(base_reg), 0) = (base_reg, offset) {2685base_reg2686} else {2687// Otherwise load the address it into a reg and load from it.2688let tmp = writable_spilltmp_reg();2689Inst::LoadAddr {2690rd: tmp,2691mem: *base,2692}2693.emit(sink, emit_info, state);2694tmp.to_reg()2695}2696}2697};26982699if let Some(trap_code) = flags.trap_code() {2700// Register the offset at which the actual load instruction starts.2701sink.add_trap(trap_code);2702}27032704sink.put4(encode_vmem_load(27050x07,2706to.to_reg(),2707eew,2708addr,2709from.lumop(),2710*mask,2711from.mop(),2712from.nf(),2713));2714}27152716&Inst::VecStore {2717eew,2718ref to,2719from,2720ref mask,2721flags,2722..2723} => {2724// Vector Stores don't support immediate offsets, so we need to load it into a register.2725let addr = match to {2726VecAMode::UnitStride { base } => {2727let base_reg = base.get_base_register();2728let offset = base.get_offset_with_state(state);27292730// Reg+0 Offset can be directly encoded2731if let (Some(base_reg), 0) = (base_reg, offset) {2732base_reg2733} else {2734// Otherwise load the address it into a reg and load from it.2735let tmp = writable_spilltmp_reg();2736Inst::LoadAddr {2737rd: tmp,2738mem: *base,2739}2740.emit(sink, emit_info, state);2741tmp.to_reg()2742}2743}2744};27452746if let Some(trap_code) = flags.trap_code() {2747// Register the offset at which the actual load instruction starts.2748sink.add_trap(trap_code);2749}27502751sink.put4(encode_vmem_store(27520x27,2753from,2754eew,2755addr,2756to.sumop(),2757*mask,2758to.mop(),2759to.nf(),2760));2761}27622763Inst::EmitIsland { needed_space } => {2764if sink.island_needed(*needed_space) {2765let jump_around_label = sink.get_label();2766Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);2767sink.emit_island(needed_space + 4, &mut state.ctrl_plane);2768sink.bind_label(jump_around_label, &mut state.ctrl_plane);2769}2770}2771}2772}2773}27742775fn emit_return_call_common_sequence<T>(2776sink: &mut MachBuffer<Inst>,2777emit_info: &EmitInfo,2778state: &mut EmitState,2779info: &ReturnCallInfo<T>,2780) {2781// The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)2782// So lets emit an island here if we need it.2783//2784// It is difficult to calculate exactly how many instructions are going to be emitted, so2785// we calculate it by emitting it into a disposable buffer, and then checking how many instructions2786// were actually emitted.2787let mut buffer = MachBuffer::new();2788let mut fake_emit_state = state.clone();27892790return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);27912792// Finalize the buffer and get the number of bytes emitted.2793let buffer = buffer.finish(&Default::default(), &mut Default::default());2794let length = buffer.data().len() as u32;27952796// And now emit the island inline with this instruction.2797if sink.island_needed(length) {2798let jump_around_label = sink.get_label();2799Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);2800sink.emit_island(length + 4, &mut state.ctrl_plane);2801sink.bind_label(jump_around_label, &mut state.ctrl_plane);2802}28032804// Now that we're done, emit the *actual* return sequence.2805return_call_emit_impl(sink, emit_info, state, info);2806}28072808/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].2809fn return_call_emit_impl<T>(2810sink: &mut MachBuffer<Inst>,2811emit_info: &EmitInfo,2812state: &mut EmitState,2813info: &ReturnCallInfo<T>,2814) {2815let sp_to_fp_offset = {2816let frame_layout = state.frame_layout();2817i64::from(2818frame_layout.clobber_size2819+ frame_layout.fixed_frame_storage_size2820+ frame_layout.outgoing_args_size,2821)2822};28232824let mut clobber_offset = sp_to_fp_offset - 8;2825for reg in state.frame_layout().clobbered_callee_saves.clone() {2826let rreg = reg.to_reg();2827let ty = match rreg.class() {2828RegClass::Int => I64,2829RegClass::Float => F64,2830RegClass::Vector => unimplemented!("Vector Clobber Restores"),2831};28322833Inst::gen_load(2834reg.map(Reg::from),2835AMode::SPOffset(clobber_offset),2836ty,2837MemFlags::trusted(),2838)2839.emit(sink, emit_info, state);28402841clobber_offset -= 82842}28432844// Restore the link register and frame pointer2845let setup_area_size = i64::from(state.frame_layout().setup_area_size);2846if setup_area_size > 0 {2847Inst::gen_load(2848writable_link_reg(),2849AMode::SPOffset(sp_to_fp_offset + 8),2850I64,2851MemFlags::trusted(),2852)2853.emit(sink, emit_info, state);28542855Inst::gen_load(2856writable_fp_reg(),2857AMode::SPOffset(sp_to_fp_offset),2858I64,2859MemFlags::trusted(),2860)2861.emit(sink, emit_info, state);2862}28632864// If we over-allocated the incoming args area in the prologue, resize down to what the callee2865// is expecting.2866let incoming_args_diff =2867i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);28682869// Increment SP all at once2870let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;2871if sp_increment > 0 {2872for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {2873inst.emit(sink, emit_info, state);2874}2875}2876}287728782879