Path: blob/main/cranelift/codegen/src/isa/riscv64/inst/emit.rs
3092 views
//! Riscv64 ISA: binary code emission.12use crate::ir::{self, LibCall, TrapCode};3use crate::isa::riscv64::inst::*;4use crate::isa::riscv64::lower::isle::generated_code::{5CaOp, CbOp, CiOp, CiwOp, ClOp, CrOp, CsOp, CssOp, CsznOp, FpuOPWidth, ZcbMemOp,6};7use cranelift_control::ControlPlane;89pub struct EmitInfo {10#[expect(dead_code, reason = "may want to be used in the future")]11shared_flag: settings::Flags,12isa_flags: super::super::riscv_settings::Flags,13}1415impl EmitInfo {16pub(crate) fn new(17shared_flag: settings::Flags,18isa_flags: super::super::riscv_settings::Flags,19) -> Self {20Self {21shared_flag,22isa_flags,23}24}25}2627pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {28u32::from(m.to_real_reg().unwrap().hw_enc() & 31)29}3031pub(crate) fn reg_to_compressed_gpr_num(m: Reg) -> u32 {32let real_reg = m.to_real_reg().unwrap().hw_enc();33debug_assert!(real_reg >= 8 && real_reg < 16);34let compressed_reg = real_reg - 8;35u32::from(compressed_reg)36}3738#[derive(Clone, Debug, PartialEq, Default)]39pub enum EmitVState {40#[default]41Unknown,42Known(VState),43}4445/// State carried between emissions of a sequence of instructions.46#[derive(Default, Clone, Debug)]47pub struct EmitState {48/// The user stack map for the upcoming instruction, as provided to49/// `pre_safepoint()`.50user_stack_map: Option<ir::UserStackMap>,5152/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and53/// optimized away at compiletime. See [cranelift_control].54ctrl_plane: ControlPlane,5556/// Vector State57/// Controls the current state of the vector unit at the emission point.58vstate: EmitVState,5960frame_layout: FrameLayout,61}6263impl EmitState {64fn take_stack_map(&mut self) -> Option<ir::UserStackMap> {65self.user_stack_map.take()66}6768fn clobber_vstate(&mut self) {69self.vstate = EmitVState::Unknown;70}71}7273impl MachInstEmitState<Inst> for EmitState {74fn new(75abi: &Callee<crate::isa::riscv64::abi::Riscv64MachineDeps>,76ctrl_plane: ControlPlane,77) -> Self {78EmitState {79user_stack_map: None,80ctrl_plane,81vstate: EmitVState::Unknown,82frame_layout: abi.frame_layout().clone(),83}84}8586fn pre_safepoint(&mut self, user_stack_map: Option<ir::UserStackMap>) {87self.user_stack_map = user_stack_map;88}8990fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {91&mut self.ctrl_plane92}9394fn take_ctrl_plane(self) -> ControlPlane {95self.ctrl_plane96}9798fn on_new_block(&mut self) {99// Reset the vector state.100self.clobber_vstate();101}102103fn frame_layout(&self) -> &FrameLayout {104&self.frame_layout105}106}107108impl Inst {109/// Load int mask.110/// If ty is int then 0xff in rd.111pub(crate) fn load_int_mask(rd: Writable<Reg>, ty: Type) -> SmallInstVec<Inst> {112let mut insts = SmallInstVec::new();113assert!(ty.is_int() && ty.bits() <= 64);114match ty {115I64 => {116insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));117}118I32 | I16 => {119insts.push(Inst::load_imm12(rd, Imm12::from_i16(-1)));120insts.push(Inst::Extend {121rd,122rn: rd.to_reg(),123signed: false,124from_bits: ty.bits() as u8,125to_bits: 64,126});127}128I8 => {129insts.push(Inst::load_imm12(rd, Imm12::from_i16(255)));130}131_ => unreachable!("ty:{:?}", ty),132}133insts134}135/// inverse all bit136pub(crate) fn construct_bit_not(rd: Writable<Reg>, rs: Reg) -> Inst {137Inst::AluRRImm12 {138alu_op: AluOPRRI::Xori,139rd,140rs,141imm12: Imm12::from_i16(-1),142}143}144145/// Returns Some(VState) if this instruction is expecting a specific vector state146/// before emission.147fn expected_vstate(&self) -> Option<&VState> {148match self {149Inst::Nop0150| Inst::Nop4151| Inst::BrTable { .. }152| Inst::Auipc { .. }153| Inst::Fli { .. }154| Inst::Lui { .. }155| Inst::LoadInlineConst { .. }156| Inst::AluRRR { .. }157| Inst::FpuRRR { .. }158| Inst::AluRRImm12 { .. }159| Inst::CsrReg { .. }160| Inst::CsrImm { .. }161| Inst::Load { .. }162| Inst::Store { .. }163| Inst::Args { .. }164| Inst::Rets { .. }165| Inst::Ret { .. }166| Inst::Extend { .. }167| Inst::Call { .. }168| Inst::CallInd { .. }169| Inst::ReturnCall { .. }170| Inst::ReturnCallInd { .. }171| Inst::Jal { .. }172| Inst::CondBr { .. }173| Inst::LoadExtNameGot { .. }174| Inst::LoadExtNameNear { .. }175| Inst::LoadExtNameFar { .. }176| Inst::ElfTlsGetAddr { .. }177| Inst::LoadAddr { .. }178| Inst::Mov { .. }179| Inst::MovFromPReg { .. }180| Inst::Fence { .. }181| Inst::EBreak182| Inst::Udf { .. }183| Inst::FpuRR { .. }184| Inst::FpuRRRR { .. }185| Inst::Jalr { .. }186| Inst::Atomic { .. }187| Inst::Select { .. }188| Inst::AtomicCas { .. }189| Inst::RawData { .. }190| Inst::AtomicStore { .. }191| Inst::AtomicLoad { .. }192| Inst::AtomicRmwLoop { .. }193| Inst::TrapIf { .. }194| Inst::Unwind { .. }195| Inst::DummyUse { .. }196| Inst::LabelAddress { .. }197| Inst::SequencePoint { .. }198| Inst::Popcnt { .. }199| Inst::Cltz { .. }200| Inst::Brev8 { .. }201| Inst::StackProbeLoop { .. } => None,202203// VecSetState does not expect any vstate, rather it updates it.204Inst::VecSetState { .. } => None,205206// `vmv` instructions copy a set of registers and ignore vstate.207Inst::VecAluRRImm5 { op: VecAluOpRRImm5::VmvrV, .. } => None,208209Inst::VecAluRR { vstate, .. } |210Inst::VecAluRRR { vstate, .. } |211Inst::VecAluRRRR { vstate, .. } |212Inst::VecAluRImm5 { vstate, .. } |213Inst::VecAluRRImm5 { vstate, .. } |214Inst::VecAluRRRImm5 { vstate, .. } |215// TODO: Unit-stride loads and stores only need the AVL to be correct, not216// the full vtype. A future optimization could be to decouple these two when217// updating vstate. This would allow us to avoid emitting a VecSetState in218// some cases.219Inst::VecLoad { vstate, .. }220| Inst::VecStore { vstate, .. } => Some(vstate),221Inst::EmitIsland { .. } => None,222}223}224}225226impl MachInstEmit for Inst {227type State = EmitState;228type Info = EmitInfo;229230fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {231// Check if we need to update the vector state before emitting this instruction232if let Some(expected) = self.expected_vstate() {233if state.vstate != EmitVState::Known(*expected) {234// Update the vector state.235Inst::VecSetState {236rd: writable_zero_reg(),237vstate: *expected,238}239.emit(sink, emit_info, state);240}241}242243// N.B.: we *must* not exceed the "worst-case size" used to compute244// where to insert islands, except when islands are explicitly triggered245// (with an `EmitIsland`). We check this in debug builds. This is `mut`246// to allow disabling the check for `JTSequence`, which is always247// emitted following an `EmitIsland`.248let mut start_off = sink.cur_offset();249250// First try to emit this as a compressed instruction251let res = self.try_emit_compressed(sink, emit_info, state, &mut start_off);252if res.is_none() {253// If we can't lets emit it as a normal instruction254self.emit_uncompressed(sink, emit_info, state, &mut start_off);255}256257// We exclude br_table, call, return_call and try_call from258// these checks since they emit their own islands, and thus259// are allowed to exceed the worst case size.260let emits_own_island = match self {261Inst::BrTable { .. }262| Inst::ReturnCall { .. }263| Inst::ReturnCallInd { .. }264| Inst::Call { .. }265| Inst::CallInd { .. }266| Inst::EmitIsland { .. } => true,267_ => false,268};269if !emits_own_island {270let end_off = sink.cur_offset();271assert!(272(end_off - start_off) <= Inst::worst_case_size(),273"Inst:{:?} length:{} worst_case_size:{}",274self,275end_off - start_off,276Inst::worst_case_size()277);278}279}280281fn pretty_print_inst(&self, state: &mut Self::State) -> String {282self.print_with_state(state)283}284}285286impl Inst {287/// Tries to emit an instruction as compressed, if we can't return false.288fn try_emit_compressed(289&self,290sink: &mut MachBuffer<Inst>,291emit_info: &EmitInfo,292state: &mut EmitState,293start_off: &mut u32,294) -> Option<()> {295let has_m = emit_info.isa_flags.has_m();296let has_zba = emit_info.isa_flags.has_zba();297let has_zbb = emit_info.isa_flags.has_zbb();298let has_zca = emit_info.isa_flags.has_zca();299let has_zcb = emit_info.isa_flags.has_zcb();300let has_zcd = emit_info.isa_flags.has_zcd();301302// Currently all compressed extensions (Zcb, Zcd, Zcmp, Zcmt, etc..) require Zca303// to be enabled, so check it early.304if !has_zca {305return None;306}307308fn reg_is_compressible(r: Reg) -> bool {309r.to_real_reg()310.map(|r| r.hw_enc() >= 8 && r.hw_enc() < 16)311.unwrap_or(false)312}313314match *self {315// C.ADD316Inst::AluRRR {317alu_op: AluOPRRR::Add,318rd,319rs1,320rs2,321} if (rd.to_reg() == rs1 || rd.to_reg() == rs2)322&& rs1 != zero_reg()323&& rs2 != zero_reg() =>324{325// Technically `c.add rd, rs` expands to `add rd, rd, rs`, but we can326// also swap rs1 with rs2 and we get an equivalent instruction. i.e we327// can also compress `add rd, rs, rd` into `c.add rd, rs`.328let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };329330sink.put2(encode_cr_type(CrOp::CAdd, rd, src));331}332333// C.MV334Inst::AluRRImm12 {335alu_op: AluOPRRI::Addi | AluOPRRI::Ori,336rd,337rs,338imm12,339} if rd.to_reg() != rs340&& rd.to_reg() != zero_reg()341&& rs != zero_reg()342&& imm12.as_i16() == 0 =>343{344sink.put2(encode_cr_type(CrOp::CMv, rd, rs));345}346347// CA Ops348Inst::AluRRR {349alu_op:350alu_op @ (AluOPRRR::And351| AluOPRRR::Or352| AluOPRRR::Xor353| AluOPRRR::Addw354| AluOPRRR::Mul),355rd,356rs1,357rs2,358} if (rd.to_reg() == rs1 || rd.to_reg() == rs2)359&& reg_is_compressible(rs1)360&& reg_is_compressible(rs2) =>361{362let op = match alu_op {363AluOPRRR::And => CaOp::CAnd,364AluOPRRR::Or => CaOp::COr,365AluOPRRR::Xor => CaOp::CXor,366AluOPRRR::Addw => CaOp::CAddw,367AluOPRRR::Mul if has_zcb && has_m => CaOp::CMul,368_ => return None,369};370// The canonical expansion for these instruction has `rd == rs1`, but371// these are all commutative operations, so we can swap the operands.372let src = if rd.to_reg() == rs1 { rs2 } else { rs1 };373374sink.put2(encode_ca_type(op, rd, src));375}376377// The sub instructions are non commutative, so we can't swap the operands.378Inst::AluRRR {379alu_op: alu_op @ (AluOPRRR::Sub | AluOPRRR::Subw),380rd,381rs1,382rs2,383} if rd.to_reg() == rs1 && reg_is_compressible(rs1) && reg_is_compressible(rs2) => {384let op = match alu_op {385AluOPRRR::Sub => CaOp::CSub,386AluOPRRR::Subw => CaOp::CSubw,387_ => return None,388};389sink.put2(encode_ca_type(op, rd, rs2));390}391392// c.j393//394// We don't have a separate JAL as that is only available in RV32C395Inst::Jal { label } => {396sink.use_label_at_offset(*start_off, label, LabelUse::RVCJump);397sink.add_uncond_branch(*start_off, *start_off + 2, label);398sink.put2(encode_cj_type(CjOp::CJ, Imm12::ZERO));399}400401// c.jr402Inst::Jalr { rd, base, offset }403if rd.to_reg() == zero_reg() && base != zero_reg() && offset.as_i16() == 0 =>404{405sink.put2(encode_cr2_type(CrOp::CJr, base));406state.clobber_vstate();407}408409// c.jalr410Inst::Jalr { rd, base, offset }411if rd.to_reg() == link_reg() && base != zero_reg() && offset.as_i16() == 0 =>412{413sink.put2(encode_cr2_type(CrOp::CJalr, base));414state.clobber_vstate();415}416417// c.ebreak418Inst::EBreak => {419sink.put2(encode_cr_type(420CrOp::CEbreak,421writable_zero_reg(),422zero_reg(),423));424}425426// c.unimp427Inst::Udf { trap_code } => {428sink.add_trap(trap_code);429sink.put2(0x0000);430}431// c.addi16sp432//433// c.addi16sp shares the opcode with c.lui, but has a destination field of x2.434// c.addi16sp adds the non-zero sign-extended 6-bit immediate to the value in the stack pointer (sp=x2),435// where the immediate is scaled to represent multiples of 16 in the range (-512,496). c.addi16sp is used436// to adjust the stack pointer in procedure prologues and epilogues. It expands into addi x2, x2, nzimm. c.addi16sp437// is only valid when nzimm≠0; the code point with nzimm=0 is reserved.438Inst::AluRRImm12 {439alu_op: AluOPRRI::Addi,440rd,441rs,442imm12,443} if rd.to_reg() == rs444&& rs == stack_reg()445&& imm12.as_i16() != 0446&& (imm12.as_i16() % 16) == 0447&& Imm6::maybe_from_i16(imm12.as_i16() / 16).is_some() =>448{449let imm6 = Imm6::maybe_from_i16(imm12.as_i16() / 16).unwrap();450sink.put2(encode_c_addi16sp(imm6));451}452453// c.addi4spn454//455// c.addi4spn is a CIW-format instruction that adds a zero-extended non-zero456// immediate, scaled by 4, to the stack pointer, x2, and writes the result to457// rd. This instruction is used to generate pointers to stack-allocated variables458// and expands to addi rd, x2, nzuimm. c.addi4spn is only valid when nzuimm≠0;459// the code points with nzuimm=0 are reserved.460Inst::AluRRImm12 {461alu_op: AluOPRRI::Addi,462rd,463rs,464imm12,465} if reg_is_compressible(rd.to_reg())466&& rs == stack_reg()467&& imm12.as_i16() != 0468&& (imm12.as_i16() % 4) == 0469&& u8::try_from(imm12.as_i16() / 4).is_ok() =>470{471let imm = u8::try_from(imm12.as_i16() / 4).unwrap();472sink.put2(encode_ciw_type(CiwOp::CAddi4spn, rd, imm));473}474475// c.li476Inst::AluRRImm12 {477alu_op: AluOPRRI::Addi,478rd,479rs,480imm12,481} if rd.to_reg() != zero_reg() && rs == zero_reg() => {482let imm6 = Imm6::maybe_from_imm12(imm12)?;483sink.put2(encode_ci_type(CiOp::CLi, rd, imm6));484}485486// c.addi487Inst::AluRRImm12 {488alu_op: AluOPRRI::Addi,489rd,490rs,491imm12,492} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {493let imm6 = Imm6::maybe_from_imm12(imm12)?;494sink.put2(encode_ci_type(CiOp::CAddi, rd, imm6));495}496497// c.addiw498Inst::AluRRImm12 {499alu_op: AluOPRRI::Addiw,500rd,501rs,502imm12,503} if rd.to_reg() == rs && rs != zero_reg() => {504let imm6 = Imm6::maybe_from_imm12(imm12)?;505sink.put2(encode_ci_type(CiOp::CAddiw, rd, imm6));506}507508// c.lui509//510// c.lui loads the non-zero 6-bit immediate field into bits 17–12511// of the destination register, clears the bottom 12 bits, and512// sign-extends bit 17 into all higher bits of the destination.513Inst::Lui { rd, imm: imm20 }514if rd.to_reg() != zero_reg()515&& rd.to_reg() != stack_reg()516&& imm20.as_i32() != 0 =>517{518// Check that the top bits are sign extended519let imm = imm20.as_i32() << 14 >> 14;520if imm != imm20.as_i32() {521return None;522}523let imm6 = Imm6::maybe_from_i32(imm)?;524sink.put2(encode_ci_type(CiOp::CLui, rd, imm6));525}526527// c.slli528Inst::AluRRImm12 {529alu_op: AluOPRRI::Slli,530rd,531rs,532imm12,533} if rd.to_reg() == rs && rs != zero_reg() && imm12.as_i16() != 0 => {534// The shift amount is unsigned, but we encode it as signed.535let shift = imm12.as_i16() & 0x3f;536let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();537sink.put2(encode_ci_type(CiOp::CSlli, rd, imm6));538}539540// c.srli / c.srai541Inst::AluRRImm12 {542alu_op: op @ (AluOPRRI::Srli | AluOPRRI::Srai),543rd,544rs,545imm12,546} if rd.to_reg() == rs && reg_is_compressible(rs) && imm12.as_i16() != 0 => {547let op = match op {548AluOPRRI::Srli => CbOp::CSrli,549AluOPRRI::Srai => CbOp::CSrai,550_ => unreachable!(),551};552553// The shift amount is unsigned, but we encode it as signed.554let shift = imm12.as_i16() & 0x3f;555let imm6 = Imm6::maybe_from_i16(shift << 10 >> 10).unwrap();556sink.put2(encode_cb_type(op, rd, imm6));557}558559// c.zextb560//561// This is an alias for `andi rd, rd, 0xff`562Inst::AluRRImm12 {563alu_op: AluOPRRI::Andi,564rd,565rs,566imm12,567} if has_zcb568&& rd.to_reg() == rs569&& reg_is_compressible(rs)570&& imm12.as_i16() == 0xff =>571{572sink.put2(encode_cszn_type(CsznOp::CZextb, rd));573}574575// c.andi576Inst::AluRRImm12 {577alu_op: AluOPRRI::Andi,578rd,579rs,580imm12,581} if rd.to_reg() == rs && reg_is_compressible(rs) => {582let imm6 = Imm6::maybe_from_imm12(imm12)?;583sink.put2(encode_cb_type(CbOp::CAndi, rd, imm6));584}585586// Stack Based Loads587Inst::Load {588rd,589op: op @ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld),590from,591flags,592} if from.get_base_register() == Some(stack_reg())593&& (from.get_offset_with_state(state) % op.size()) == 0 =>594{595// We encode the offset in multiples of the load size.596let offset = from.get_offset_with_state(state);597let imm6 = u8::try_from(offset / op.size())598.ok()599.and_then(Uimm6::maybe_from_u8)?;600601// Some additional constraints on these instructions.602//603// Integer loads are not allowed to target x0, but floating point loads604// are, since f0 is not a special register.605//606// Floating point loads are not included in the base Zca extension607// but in a separate Zcd extension. Both of these are part of the C Extension.608let rd_is_zero = rd.to_reg() == zero_reg();609let op = match op {610LoadOP::Lw if !rd_is_zero => CiOp::CLwsp,611LoadOP::Ld if !rd_is_zero => CiOp::CLdsp,612LoadOP::Fld if has_zcd => CiOp::CFldsp,613_ => return None,614};615616if let Some(trap_code) = flags.trap_code() {617// Register the offset at which the actual load instruction starts.618sink.add_trap(trap_code);619}620sink.put2(encode_ci_sp_load(op, rd, imm6));621}622623// Regular Loads624Inst::Load {625rd,626op:627op628@ (LoadOP::Lw | LoadOP::Ld | LoadOP::Fld | LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh),629from,630flags,631} if reg_is_compressible(rd.to_reg())632&& from633.get_base_register()634.map(reg_is_compressible)635.unwrap_or(false)636&& (from.get_offset_with_state(state) % op.size()) == 0 =>637{638let base = from.get_base_register().unwrap();639640// We encode the offset in multiples of the store size.641let offset = from.get_offset_with_state(state);642let offset = u8::try_from(offset / op.size()).ok()?;643644// We mix two different formats here.645//646// c.lw / c.ld / c.fld instructions are available in the standard Zca647// extension using the CL format.648//649// c.lbu / c.lhu / c.lh are only available in the Zcb extension and650// are also encoded differently. Technically they each have a different651// format, but they are similar enough that we can group them.652let is_zcb_load = matches!(op, LoadOP::Lbu | LoadOP::Lhu | LoadOP::Lh);653let encoded = if is_zcb_load {654if !has_zcb {655return None;656}657658let op = match op {659LoadOP::Lbu => ZcbMemOp::CLbu,660LoadOP::Lhu => ZcbMemOp::CLhu,661LoadOP::Lh => ZcbMemOp::CLh,662_ => unreachable!(),663};664665// Byte stores & loads have 2 bits of immediate offset. Halfword stores666// and loads only have 1 bit.667let imm2 = Uimm2::maybe_from_u8(offset)?;668if (offset & !((1 << op.imm_bits()) - 1)) != 0 {669return None;670}671672encode_zcbmem_load(op, rd, base, imm2)673} else {674// Floating point loads are not included in the base Zca extension675// but in a separate Zcd extension. Both of these are part of the C Extension.676let op = match op {677LoadOP::Lw => ClOp::CLw,678LoadOP::Ld => ClOp::CLd,679LoadOP::Fld if has_zcd => ClOp::CFld,680_ => return None,681};682let imm5 = Uimm5::maybe_from_u8(offset)?;683684encode_cl_type(op, rd, base, imm5)685};686687if let Some(trap_code) = flags.trap_code() {688// Register the offset at which the actual load instruction starts.689sink.add_trap(trap_code);690}691sink.put2(encoded);692}693694// Stack Based Stores695Inst::Store {696src,697op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd),698to,699flags,700} if to.get_base_register() == Some(stack_reg())701&& (to.get_offset_with_state(state) % op.size()) == 0 =>702{703// We encode the offset in multiples of the store size.704let offset = to.get_offset_with_state(state);705let imm6 = u8::try_from(offset / op.size())706.ok()707.and_then(Uimm6::maybe_from_u8)?;708709// Floating point stores are not included in the base Zca extension710// but in a separate Zcd extension. Both of these are part of the C Extension.711let op = match op {712StoreOP::Sw => CssOp::CSwsp,713StoreOP::Sd => CssOp::CSdsp,714StoreOP::Fsd if has_zcd => CssOp::CFsdsp,715_ => return None,716};717718if let Some(trap_code) = flags.trap_code() {719// Register the offset at which the actual load instruction starts.720sink.add_trap(trap_code);721}722sink.put2(encode_css_type(op, src, imm6));723}724725// Regular Stores726Inst::Store {727src,728op: op @ (StoreOP::Sw | StoreOP::Sd | StoreOP::Fsd | StoreOP::Sh | StoreOP::Sb),729to,730flags,731} if reg_is_compressible(src)732&& to733.get_base_register()734.map(reg_is_compressible)735.unwrap_or(false)736&& (to.get_offset_with_state(state) % op.size()) == 0 =>737{738let base = to.get_base_register().unwrap();739740// We encode the offset in multiples of the store size.741let offset = to.get_offset_with_state(state);742let offset = u8::try_from(offset / op.size()).ok()?;743744// We mix two different formats here.745//746// c.sw / c.sd / c.fsd instructions are available in the standard Zca747// extension using the CL format.748//749// c.sb / c.sh are only available in the Zcb extension and are also750// encoded differently.751let is_zcb_store = matches!(op, StoreOP::Sh | StoreOP::Sb);752let encoded = if is_zcb_store {753if !has_zcb {754return None;755}756757let op = match op {758StoreOP::Sh => ZcbMemOp::CSh,759StoreOP::Sb => ZcbMemOp::CSb,760_ => unreachable!(),761};762763// Byte stores & loads have 2 bits of immediate offset. Halfword stores764// and loads only have 1 bit.765let imm2 = Uimm2::maybe_from_u8(offset)?;766if (offset & !((1 << op.imm_bits()) - 1)) != 0 {767return None;768}769770encode_zcbmem_store(op, src, base, imm2)771} else {772// Floating point stores are not included in the base Zca extension773// but in a separate Zcd extension. Both of these are part of the C Extension.774let op = match op {775StoreOP::Sw => CsOp::CSw,776StoreOP::Sd => CsOp::CSd,777StoreOP::Fsd if has_zcd => CsOp::CFsd,778_ => return None,779};780let imm5 = Uimm5::maybe_from_u8(offset)?;781782encode_cs_type(op, src, base, imm5)783};784785if let Some(trap_code) = flags.trap_code() {786// Register the offset at which the actual load instruction starts.787sink.add_trap(trap_code);788}789sink.put2(encoded);790}791792// c.not793//794// This is an alias for `xori rd, rd, -1`795Inst::AluRRImm12 {796alu_op: AluOPRRI::Xori,797rd,798rs,799imm12,800} if has_zcb801&& rd.to_reg() == rs802&& reg_is_compressible(rs)803&& imm12.as_i16() == -1 =>804{805sink.put2(encode_cszn_type(CsznOp::CNot, rd));806}807808// c.sext.b / c.sext.h / c.zext.h809//810// These are all the extend instructions present in `Zcb`, they811// also require `Zbb` since they aren't available in the base ISA.812Inst::AluRRImm12 {813alu_op: alu_op @ (AluOPRRI::Sextb | AluOPRRI::Sexth | AluOPRRI::Zexth),814rd,815rs,816imm12,817} if has_zcb818&& has_zbb819&& rd.to_reg() == rs820&& reg_is_compressible(rs)821&& imm12.as_i16() == 0 =>822{823let op = match alu_op {824AluOPRRI::Sextb => CsznOp::CSextb,825AluOPRRI::Sexth => CsznOp::CSexth,826AluOPRRI::Zexth => CsznOp::CZexth,827_ => unreachable!(),828};829sink.put2(encode_cszn_type(op, rd));830}831832// c.zext.w833//834// This is an alias for `add.uw rd, rd, zero`835Inst::AluRRR {836alu_op: AluOPRRR::Adduw,837rd,838rs1,839rs2,840} if has_zcb841&& has_zba842&& rd.to_reg() == rs1843&& reg_is_compressible(rs1)844&& rs2 == zero_reg() =>845{846sink.put2(encode_cszn_type(CsznOp::CZextw, rd));847}848849_ => return None,850}851852return Some(());853}854855fn emit_uncompressed(856&self,857sink: &mut MachBuffer<Inst>,858emit_info: &EmitInfo,859state: &mut EmitState,860start_off: &mut u32,861) {862match self {863&Inst::Nop0 => {864// do nothing865}866// Addi x0, x0, 0867&Inst::Nop4 => {868let x = Inst::AluRRImm12 {869alu_op: AluOPRRI::Addi,870rd: Writable::from_reg(zero_reg()),871rs: zero_reg(),872imm12: Imm12::ZERO,873};874x.emit(sink, emit_info, state)875}876&Inst::RawData { ref data } => {877// Right now we only put a u32 or u64 in this instruction.878// It is not very long, no need to check if need `emit_island`.879// If data is very long , this is a bug because RawData is typically880// use to load some data and rely on some position in the code stream.881// and we may exceed `Inst::worst_case_size`.882// for more information see https://github.com/bytecodealliance/wasmtime/pull/5612.883sink.put_data(&data[..]);884}885&Inst::Lui { rd, ref imm } => {886let x: u32 = 0b0110111 | reg_to_gpr_num(rd.to_reg()) << 7 | (imm.bits() << 12);887sink.put4(x);888}889&Inst::Fli { rd, width, imm } => {890sink.put4(encode_fli(width, imm, rd));891}892&Inst::LoadInlineConst { rd, ty, imm } => {893let data = &imm.to_le_bytes()[..ty.bytes() as usize];894895let label_data: MachLabel = sink.get_label();896let label_end: MachLabel = sink.get_label();897898// Load into rd899Inst::Load {900rd,901op: LoadOP::from_type(ty),902flags: MemFlags::new(),903from: AMode::Label(label_data),904}905.emit(sink, emit_info, state);906907// Jump over the inline pool908Inst::gen_jump(label_end).emit(sink, emit_info, state);909910// Emit the inline data911sink.bind_label(label_data, &mut state.ctrl_plane);912Inst::RawData { data: data.into() }.emit(sink, emit_info, state);913914sink.bind_label(label_end, &mut state.ctrl_plane);915}916&Inst::FpuRR {917alu_op,918width,919frm,920rd,921rs,922} => {923if alu_op.is_convert_to_int() {924sink.add_trap(TrapCode::BAD_CONVERSION_TO_INTEGER);925}926sink.put4(encode_fp_rr(alu_op, width, frm, rd, rs));927}928&Inst::FpuRRRR {929alu_op,930rd,931rs1,932rs2,933rs3,934frm,935width,936} => {937sink.put4(encode_fp_rrrr(alu_op, width, frm, rd, rs1, rs2, rs3));938}939&Inst::FpuRRR {940alu_op,941width,942frm,943rd,944rs1,945rs2,946} => {947sink.put4(encode_fp_rrr(alu_op, width, frm, rd, rs1, rs2));948}949&Inst::Unwind { ref inst } => {950sink.add_unwind(inst.clone());951}952&Inst::DummyUse { .. } => {953// This has already been handled by Inst::allocate.954}955&Inst::AluRRR {956alu_op,957rd,958rs1,959rs2,960} => {961let (rs1, rs2) = if alu_op.reverse_rs() {962(rs2, rs1)963} else {964(rs1, rs2)965};966967sink.put4(encode_r_type(968alu_op.op_code(),969rd,970alu_op.funct3(),971rs1,972rs2,973alu_op.funct7(),974));975}976&Inst::AluRRImm12 {977alu_op,978rd,979rs,980imm12,981} => {982let x = alu_op.op_code()983| reg_to_gpr_num(rd.to_reg()) << 7984| alu_op.funct3() << 12985| reg_to_gpr_num(rs) << 15986| alu_op.imm12(imm12) << 20;987sink.put4(x);988}989&Inst::CsrReg { op, rd, rs, csr } => {990sink.put4(encode_csr_reg(op, rd, rs, csr));991}992&Inst::CsrImm { op, rd, csr, imm } => {993sink.put4(encode_csr_imm(op, rd, csr, imm));994}995&Inst::Load {996rd,997op: LoadOP::Flh,998from,999flags,1000} if !emit_info.isa_flags.has_zfhmin() => {1001// flh unavailable, use an integer load instead1002Inst::Load {1003rd: writable_spilltmp_reg(),1004op: LoadOP::Lh,1005flags,1006from,1007}1008.emit(sink, emit_info, state);1009// NaN-box the `f16` before loading it into the floating-point1010// register with a 32-bit `fmv`.1011Inst::Lui {1012rd: writable_spilltmp_reg2(),1013imm: Imm20::from_i32((0xffff_0000_u32 as i32) >> 12),1014}1015.emit(sink, emit_info, state);1016Inst::AluRRR {1017alu_op: AluOPRRR::Or,1018rd: writable_spilltmp_reg(),1019rs1: spilltmp_reg(),1020rs2: spilltmp_reg2(),1021}1022.emit(sink, emit_info, state);1023Inst::FpuRR {1024alu_op: FpuOPRR::FmvFmtX,1025width: FpuOPWidth::S,1026frm: FRM::RNE,1027rd,1028rs: spilltmp_reg(),1029}1030.emit(sink, emit_info, state);1031}1032&Inst::Load {1033rd,1034op,1035from,1036flags,1037} => {1038let base = from.get_base_register();1039let offset = from.get_offset_with_state(state);1040let offset_imm12 = Imm12::maybe_from_i64(offset);1041let label = from.get_label_with_sink(sink);10421043let (addr, imm12) = match (base, offset_imm12, label) {1044// When loading from a Reg+Offset, if the offset fits into an imm12 we can directly encode it.1045(Some(base), Some(imm12), None) => (base, imm12),10461047// Otherwise, if the offset does not fit into a imm12, we need to materialize it into a1048// register and load from that.1049(Some(_), None, None) => {1050let tmp = writable_spilltmp_reg();1051Inst::LoadAddr { rd: tmp, mem: from }.emit(sink, emit_info, state);1052(tmp.to_reg(), Imm12::ZERO)1053}10541055// If the AMode contains a label we can emit an internal relocation that gets1056// resolved with the correct address later.1057(None, Some(imm), Some(label)) => {1058debug_assert_eq!(imm.as_i16(), 0);10591060// Get the current PC.1061sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);1062Inst::Auipc {1063rd,1064imm: Imm20::ZERO,1065}1066.emit_uncompressed(sink, emit_info, state, start_off);10671068// Emit a relocation for the load. This patches the offset into the instruction.1069sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);10701071// Imm12 here is meaningless since it's going to get replaced.1072(rd.to_reg(), Imm12::ZERO)1073}10741075// These cases are impossible with the current AModes that we have. We either1076// always have a register, or always have a label. Never both, and never neither.1077(None, None, None)1078| (None, Some(_), None)1079| (Some(_), None, Some(_))1080| (Some(_), Some(_), Some(_))1081| (None, None, Some(_)) => {1082unreachable!("Invalid load address")1083}1084};10851086if let Some(trap_code) = flags.trap_code() {1087// Register the offset at which the actual load instruction starts.1088sink.add_trap(trap_code);1089}10901091sink.put4(encode_i_type(op.op_code(), rd, op.funct3(), addr, imm12));1092}1093&Inst::Store {1094op: StoreOP::Fsh,1095src,1096flags,1097to,1098} if !emit_info.isa_flags.has_zfhmin() => {1099// fsh unavailable, use an integer store instead1100Inst::FpuRR {1101alu_op: FpuOPRR::FmvXFmt,1102width: FpuOPWidth::S,1103frm: FRM::RNE,1104rd: writable_spilltmp_reg(),1105rs: src,1106}1107.emit(sink, emit_info, state);1108Inst::Store {1109to,1110op: StoreOP::Sh,1111flags,1112src: spilltmp_reg(),1113}1114.emit(sink, emit_info, state);1115}1116&Inst::Store { op, src, flags, to } => {1117let base = to.get_base_register();1118let offset = to.get_offset_with_state(state);1119let offset_imm12 = Imm12::maybe_from_i64(offset);11201121let (addr, imm12) = match (base, offset_imm12) {1122// If the offset fits into an imm12 we can directly encode it.1123(Some(base), Some(imm12)) => (base, imm12),1124// Otherwise load the address it into a reg and load from it.1125_ => {1126let tmp = writable_spilltmp_reg();1127Inst::LoadAddr { rd: tmp, mem: to }.emit(sink, emit_info, state);1128(tmp.to_reg(), Imm12::ZERO)1129}1130};11311132if let Some(trap_code) = flags.trap_code() {1133// Register the offset at which the actual load instruction starts.1134sink.add_trap(trap_code);1135}11361137sink.put4(encode_s_type(op.op_code(), op.funct3(), addr, src, imm12));1138}1139&Inst::Args { .. } | &Inst::Rets { .. } => {1140// Nothing: this is a pseudoinstruction that serves1141// only to constrain registers at a certain point.1142}1143&Inst::Ret {} => {1144// RISC-V does not have a dedicated ret instruction, instead we emit the equivalent1145// `jalr x0, x1, 0` that jumps to the return address.1146Inst::Jalr {1147rd: writable_zero_reg(),1148base: link_reg(),1149offset: Imm12::ZERO,1150}1151.emit(sink, emit_info, state);1152}11531154&Inst::Extend {1155rd,1156rn,1157signed,1158from_bits,1159to_bits: _to_bits,1160} => {1161let mut insts = SmallInstVec::new();1162let shift_bits = (64 - from_bits) as i16;1163let is_u8 = || from_bits == 8 && signed == false;1164if is_u8() {1165// special for u8.1166insts.push(Inst::AluRRImm12 {1167alu_op: AluOPRRI::Andi,1168rd,1169rs: rn,1170imm12: Imm12::from_i16(255),1171});1172} else {1173insts.push(Inst::AluRRImm12 {1174alu_op: AluOPRRI::Slli,1175rd,1176rs: rn,1177imm12: Imm12::from_i16(shift_bits),1178});1179insts.push(Inst::AluRRImm12 {1180alu_op: if signed {1181AluOPRRI::Srai1182} else {1183AluOPRRI::Srli1184},1185rd,1186rs: rd.to_reg(),1187imm12: Imm12::from_i16(shift_bits),1188});1189}1190insts1191.into_iter()1192.for_each(|i| i.emit(sink, emit_info, state));1193}11941195&Inst::Call { ref info } => {1196sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);11971198let start = sink.cur_offset();1199Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)1200.into_iter()1201.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));12021203if let Some(s) = state.take_stack_map() {1204let offset = sink.cur_offset();1205sink.push_user_stack_map(state, offset, s);1206}12071208if let Some(try_call) = info.try_call_info.as_ref() {1209sink.add_try_call_site(1210Some(state.frame_layout.sp_to_fp()),1211try_call.exception_handlers(&state.frame_layout),1212);1213} else {1214sink.add_call_site();1215}12161217let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();1218if callee_pop_size > 0 {1219for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {1220inst.emit(sink, emit_info, state);1221}1222}12231224if info.patchable {1225sink.add_patchable_call_site(sink.cur_offset() - start);1226} else {1227// Load any stack-carried return values.1228info.emit_retval_loads::<Riscv64MachineDeps, _, _>(1229state.frame_layout().stackslots_size,1230|inst| inst.emit(sink, emit_info, state),1231|needed_space| Some(Inst::EmitIsland { needed_space }),1232);1233}12341235// If this is a try-call, jump to the continuation1236// (normal-return) block.1237if let Some(try_call) = info.try_call_info.as_ref() {1238let jmp = Inst::Jal {1239label: try_call.continuation,1240};1241jmp.emit(sink, emit_info, state);1242}12431244*start_off = sink.cur_offset();1245}1246&Inst::CallInd { ref info } => {1247Inst::Jalr {1248rd: writable_link_reg(),1249base: info.dest,1250offset: Imm12::ZERO,1251}1252.emit(sink, emit_info, state);12531254if let Some(s) = state.take_stack_map() {1255let offset = sink.cur_offset();1256sink.push_user_stack_map(state, offset, s);1257}12581259if let Some(try_call) = info.try_call_info.as_ref() {1260sink.add_try_call_site(1261Some(state.frame_layout.sp_to_fp()),1262try_call.exception_handlers(&state.frame_layout),1263);1264} else {1265sink.add_call_site();1266}12671268let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();1269if callee_pop_size > 0 {1270for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {1271inst.emit(sink, emit_info, state);1272}1273}12741275// Load any stack-carried return values.1276info.emit_retval_loads::<Riscv64MachineDeps, _, _>(1277state.frame_layout().stackslots_size,1278|inst| inst.emit(sink, emit_info, state),1279|needed_space| Some(Inst::EmitIsland { needed_space }),1280);12811282// If this is a try-call, jump to the continuation1283// (normal-return) block.1284if let Some(try_call) = info.try_call_info.as_ref() {1285let jmp = Inst::Jal {1286label: try_call.continuation,1287};1288jmp.emit(sink, emit_info, state);1289}12901291*start_off = sink.cur_offset();1292}12931294&Inst::ReturnCall { ref info } => {1295emit_return_call_common_sequence(sink, emit_info, state, info);12961297sink.add_call_site();1298sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);1299Inst::construct_auipc_and_jalr(None, writable_spilltmp_reg(), 0)1300.into_iter()1301.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));1302}13031304&Inst::ReturnCallInd { ref info } => {1305emit_return_call_common_sequence(sink, emit_info, state, &info);13061307Inst::Jalr {1308rd: writable_zero_reg(),1309base: info.dest,1310offset: Imm12::ZERO,1311}1312.emit(sink, emit_info, state);1313}1314&Inst::Jal { label } => {1315sink.use_label_at_offset(*start_off, label, LabelUse::Jal20);1316sink.add_uncond_branch(*start_off, *start_off + 4, label);1317sink.put4(0b1101111);1318state.clobber_vstate();1319}1320&Inst::CondBr {1321taken,1322not_taken,1323kind,1324} => {1325match taken {1326CondBrTarget::Label(label) => {1327let code = kind.emit();1328let code_inverse = kind.inverse().emit().to_le_bytes();1329sink.use_label_at_offset(*start_off, label, LabelUse::B12);1330sink.add_cond_branch(*start_off, *start_off + 4, label, &code_inverse);1331sink.put4(code);1332}1333CondBrTarget::Fallthrough => panic!("Cannot fallthrough in taken target"),1334}13351336match not_taken {1337CondBrTarget::Label(label) => {1338Inst::gen_jump(label).emit(sink, emit_info, state)1339}1340CondBrTarget::Fallthrough => {}1341};1342}13431344&Inst::Mov { rd, rm, ty } => {1345debug_assert_eq!(rd.to_reg().class(), rm.class());1346if rd.to_reg() == rm {1347return;1348}13491350match rm.class() {1351RegClass::Int => Inst::AluRRImm12 {1352alu_op: AluOPRRI::Addi,1353rd,1354rs: rm,1355imm12: Imm12::ZERO,1356},1357RegClass::Float => Inst::FpuRRR {1358alu_op: FpuOPRRR::Fsgnj,1359width: FpuOPWidth::try_from(ty).unwrap(),1360frm: FRM::RNE,1361rd,1362rs1: rm,1363rs2: rm,1364},1365RegClass::Vector => Inst::VecAluRRImm5 {1366op: VecAluOpRRImm5::VmvrV,1367vd: rd,1368vs2: rm,1369// Imm 0 means copy 1 register.1370imm: Imm5::maybe_from_i8(0).unwrap(),1371mask: VecOpMasking::Disabled,1372// Vstate for this instruction is ignored.1373vstate: VState::from_type(ty),1374},1375}1376.emit(sink, emit_info, state);1377}13781379&Inst::MovFromPReg { rd, rm } => {1380Inst::gen_move(rd, Reg::from(rm), I64).emit(sink, emit_info, state);1381}13821383&Inst::BrTable {1384index,1385tmp1,1386tmp2,1387ref targets,1388} => {1389let ext_index = writable_spilltmp_reg();13901391let label_compute_target = sink.get_label();13921393// The default target is passed in as the 0th element of `targets`1394// separate it here for clarity.1395let default_target = targets[0];1396let targets = &targets[1..];13971398// We are going to potentially emit a large amount of instructions, so ensure that we emit an island1399// now if we need one.1400//1401// The worse case PC calculations are 12 instructions. And each entry in the jump table is 2 instructions.1402// Check if we need to emit a jump table here to support that jump.1403let inst_count = 12 + (targets.len() * 2);1404let distance = (inst_count * Inst::UNCOMPRESSED_INSTRUCTION_SIZE as usize) as u32;1405if sink.island_needed(distance) {1406let jump_around_label = sink.get_label();1407Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);1408sink.emit_island(distance + 4, &mut state.ctrl_plane);1409sink.bind_label(jump_around_label, &mut state.ctrl_plane);1410}14111412// We emit a bounds check on the index, if the index is larger than the number of1413// jump table entries, we jump to the default block. Otherwise we compute a jump1414// offset by multiplying the index by 8 (the size of each entry) and then jump to1415// that offset. Each jump table entry is a regular auipc+jalr which we emit sequentially.1416//1417// Build the following sequence:1418//1419// extend_index:1420// zext.w ext_index, index1421// bounds_check:1422// li tmp, n_labels1423// bltu ext_index, tmp, compute_target1424// jump_to_default_block:1425// auipc pc, 01426// jalr zero, pc, default_block1427// compute_target:1428// auipc pc, 01429// slli tmp, ext_index, 31430// add pc, pc, tmp1431// jalr zero, pc, 0x101432// jump_table:1433// ; This repeats for each entry in the jumptable1434// auipc pc, 01435// jalr zero, pc, block_target14361437// Extend the index to 64 bits.1438//1439// This prevents us branching on the top 32 bits of the index, which1440// are undefined.1441Inst::Extend {1442rd: ext_index,1443rn: index,1444signed: false,1445from_bits: 32,1446to_bits: 64,1447}1448.emit(sink, emit_info, state);14491450// Bounds check.1451//1452// Check if the index passed in is larger than the number of jumptable1453// entries that we have. If it is, we fallthrough to a jump into the1454// default block.1455Inst::load_constant_u32(tmp2, targets.len() as u64)1456.iter()1457.for_each(|i| i.emit(sink, emit_info, state));1458Inst::CondBr {1459taken: CondBrTarget::Label(label_compute_target),1460not_taken: CondBrTarget::Fallthrough,1461kind: IntegerCompare {1462kind: IntCC::UnsignedLessThan,1463rs1: ext_index.to_reg(),1464rs2: tmp2.to_reg(),1465},1466}1467.emit(sink, emit_info, state);14681469sink.use_label_at_offset(sink.cur_offset(), default_target, LabelUse::PCRel32);1470Inst::construct_auipc_and_jalr(None, tmp2, 0)1471.iter()1472.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));14731474// Compute the jump table offset.1475// We need to emit a PC relative offset,1476sink.bind_label(label_compute_target, &mut state.ctrl_plane);14771478// Get the current PC.1479Inst::Auipc {1480rd: tmp1,1481imm: Imm20::ZERO,1482}1483.emit_uncompressed(sink, emit_info, state, start_off);14841485// These instructions must be emitted as uncompressed since we1486// are manually computing the offset from the PC.14871488// Multiply the index by 8, since that is the size in1489// bytes of each jump table entry1490Inst::AluRRImm12 {1491alu_op: AluOPRRI::Slli,1492rd: tmp2,1493rs: ext_index.to_reg(),1494imm12: Imm12::from_i16(3),1495}1496.emit_uncompressed(sink, emit_info, state, start_off);14971498// Calculate the base of the jump, PC + the offset from above.1499Inst::AluRRR {1500alu_op: AluOPRRR::Add,1501rd: tmp1,1502rs1: tmp1.to_reg(),1503rs2: tmp2.to_reg(),1504}1505.emit_uncompressed(sink, emit_info, state, start_off);15061507// Jump to the middle of the jump table.1508// We add a 16 byte offset here, since we used 4 instructions1509// since the AUIPC that was used to get the PC.1510Inst::Jalr {1511rd: writable_zero_reg(),1512base: tmp1.to_reg(),1513offset: Imm12::from_i16((4 * Inst::UNCOMPRESSED_INSTRUCTION_SIZE) as i16),1514}1515.emit_uncompressed(sink, emit_info, state, start_off);15161517// Emit the jump table.1518//1519// Each entry is a auipc + jalr to the target block. We also start with a island1520// if necessary.15211522// Emit the jumps back to back1523for target in targets.iter() {1524sink.use_label_at_offset(sink.cur_offset(), *target, LabelUse::PCRel32);15251526Inst::construct_auipc_and_jalr(None, tmp2, 0)1527.iter()1528.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));1529}15301531// We've just emitted an island that is safe up to *here*.1532// Mark it as such so that we don't needlessly emit additional islands.1533*start_off = sink.cur_offset();1534}15351536&Inst::Atomic {1537op,1538rd,1539addr,1540src,1541amo,1542} => {1543// TODO: get flags from original CLIF atomic instruction1544let flags = MemFlags::new();1545if let Some(trap_code) = flags.trap_code() {1546sink.add_trap(trap_code);1547}1548let x = op.op_code()1549| reg_to_gpr_num(rd.to_reg()) << 71550| op.funct3() << 121551| reg_to_gpr_num(addr) << 151552| reg_to_gpr_num(src) << 201553| op.funct7(amo) << 25;15541555sink.put4(x);1556}1557&Inst::Fence { pred, succ } => {1558let x = 0b00011111559| 0b00000 << 71560| 0b000 << 121561| 0b00000 << 151562| (succ as u32) << 201563| (pred as u32) << 24;15641565sink.put4(x);1566}1567&Inst::Auipc { rd, imm } => {1568sink.put4(enc_auipc(rd, imm));1569}15701571&Inst::LoadAddr { rd, mem } => {1572let base = mem.get_base_register();1573let offset = mem.get_offset_with_state(state);1574let offset_imm12 = Imm12::maybe_from_i64(offset);15751576match (mem, base, offset_imm12) {1577(_, Some(rs), Some(imm12)) => {1578Inst::AluRRImm12 {1579alu_op: AluOPRRI::Addi,1580rd,1581rs,1582imm12,1583}1584.emit(sink, emit_info, state);1585}1586(_, Some(rs), None) => {1587let mut insts = Inst::load_constant_u64(rd, offset as u64);1588insts.push(Inst::AluRRR {1589alu_op: AluOPRRR::Add,1590rd,1591rs1: rd.to_reg(),1592rs2: rs,1593});1594insts1595.into_iter()1596.for_each(|inst| inst.emit(sink, emit_info, state));1597}1598(AMode::Const(addr), None, _) => {1599// Get an address label for the constant and recurse.1600let label = sink.get_label_for_constant(addr);1601Inst::LoadAddr {1602rd,1603mem: AMode::Label(label),1604}1605.emit(sink, emit_info, state);1606}1607(AMode::Label(label), None, _) => {1608// Get the current PC.1609sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelHi20);1610let inst = Inst::Auipc {1611rd,1612imm: Imm20::ZERO,1613};1614inst.emit_uncompressed(sink, emit_info, state, start_off);16151616// Emit an add to the address with a relocation.1617// This later gets patched up with the correct offset.1618sink.use_label_at_offset(sink.cur_offset(), label, LabelUse::PCRelLo12I);1619Inst::AluRRImm12 {1620alu_op: AluOPRRI::Addi,1621rd,1622rs: rd.to_reg(),1623imm12: Imm12::ZERO,1624}1625.emit_uncompressed(sink, emit_info, state, start_off);1626}1627(amode, _, _) => {1628unimplemented!("LoadAddr: {:?}", amode);1629}1630}1631}16321633&Inst::Select {1634ref dst,1635condition,1636ref x,1637ref y,1638} => {1639// The general form for this select is the following:1640//1641// mv rd, x1642// b{cond} rcond, label_end1643// mv rd, y1644// label_end:1645// ... etc1646//1647// This is built on the assumption that moves are cheap, but branches and jumps1648// are not. So with this format we always avoid one jump instruction at the expense1649// of an unconditional move.1650//1651// We also perform another optimization here. If the destination register is the same1652// as one of the input registers, we can avoid emitting the first unconditional move1653// and emit just the branch and the second move.1654//1655// To make sure that this happens as often as possible, we also try to invert the1656// condition, so that if either of the input registers are the same as the destination1657// we avoid that move.16581659let label_end = sink.get_label();16601661let xregs = x.regs();1662let yregs = y.regs();1663let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();1664let condregs = condition.regs();16651666// We are going to write to the destination register before evaluating1667// the condition, so we need to make sure that the destination register1668// is not one of the condition registers.1669//1670// This should never happen, since hopefully the regalloc constraints1671// for this register are set up correctly.1672debug_assert_ne!(dstregs, condregs);16731674// Check if we can invert the condition and avoid moving the y registers into1675// the destination. This allows us to only emit the branch and one of the moves.1676let (uncond_move, cond_move, condition) = if yregs == dstregs {1677(yregs, xregs, condition.inverse())1678} else {1679(xregs, yregs, condition)1680};16811682// Unconditionally move one of the values to the destination register.1683//1684// These moves may not end up being emitted if the source and1685// destination registers are the same. That logic is built into1686// the emit function for `Inst::Mov`.1687for i in gen_moves(dst.regs(), uncond_move) {1688i.emit(sink, emit_info, state);1689}16901691// If the condition passes we skip over the conditional move1692Inst::CondBr {1693taken: CondBrTarget::Label(label_end),1694not_taken: CondBrTarget::Fallthrough,1695kind: condition,1696}1697.emit(sink, emit_info, state);16981699// Move the conditional value to the destination register.1700for i in gen_moves(dst.regs(), cond_move) {1701i.emit(sink, emit_info, state);1702}17031704sink.bind_label(label_end, &mut state.ctrl_plane);1705}1706&Inst::Jalr { rd, base, offset } => {1707sink.put4(enc_jalr(rd, base, offset));1708state.clobber_vstate();1709}1710&Inst::EBreak => {1711sink.put4(0x00100073);1712}1713&Inst::AtomicCas {1714offset,1715t0,1716dst,1717e,1718addr,1719v,1720ty,1721} => {1722// # addr holds address of memory location1723// # e holds expected value1724// # v holds desired value1725// # dst holds return value1726// cas:1727// lr.w dst, (addr) # Load original value.1728// bne dst, e, fail # Doesn’t match, so fail.1729// sc.w t0, v, (addr) # Try to update.1730// bnez t0 , cas # if store not ok,retry.1731// fail:1732let fail_label = sink.get_label();1733let cas_lebel = sink.get_label();1734sink.bind_label(cas_lebel, &mut state.ctrl_plane);1735Inst::Atomic {1736op: AtomicOP::load_op(ty),1737rd: dst,1738addr,1739src: zero_reg(),1740amo: AMO::SeqCst,1741}1742.emit(sink, emit_info, state);1743if ty.bits() < 32 {1744AtomicOP::extract(dst, offset, dst.to_reg(), ty)1745.iter()1746.for_each(|i| i.emit(sink, emit_info, state));1747} else if ty.bits() == 32 {1748Inst::Extend {1749rd: dst,1750rn: dst.to_reg(),1751signed: false,1752from_bits: 32,1753to_bits: 64,1754}1755.emit(sink, emit_info, state);1756}1757Inst::CondBr {1758taken: CondBrTarget::Label(fail_label),1759not_taken: CondBrTarget::Fallthrough,1760kind: IntegerCompare {1761kind: IntCC::NotEqual,1762rs1: e,1763rs2: dst.to_reg(),1764},1765}1766.emit(sink, emit_info, state);1767let store_value = if ty.bits() < 32 {1768// reload value to t0.1769Inst::Atomic {1770op: AtomicOP::load_op(ty),1771rd: t0,1772addr,1773src: zero_reg(),1774amo: AMO::SeqCst,1775}1776.emit(sink, emit_info, state);1777// set reset part.1778AtomicOP::merge(t0, writable_spilltmp_reg(), offset, v, ty)1779.iter()1780.for_each(|i| i.emit(sink, emit_info, state));1781t0.to_reg()1782} else {1783v1784};1785Inst::Atomic {1786op: AtomicOP::store_op(ty),1787rd: t0,1788addr,1789src: store_value,1790amo: AMO::SeqCst,1791}1792.emit(sink, emit_info, state);1793// check is our value stored.1794Inst::CondBr {1795taken: CondBrTarget::Label(cas_lebel),1796not_taken: CondBrTarget::Fallthrough,1797kind: IntegerCompare {1798kind: IntCC::NotEqual,1799rs1: t0.to_reg(),1800rs2: zero_reg(),1801},1802}1803.emit(sink, emit_info, state);1804sink.bind_label(fail_label, &mut state.ctrl_plane);1805}1806&Inst::AtomicRmwLoop {1807offset,1808op,1809dst,1810ty,1811p,1812x,1813t0,1814} => {1815let retry = sink.get_label();1816sink.bind_label(retry, &mut state.ctrl_plane);1817// load old value.1818Inst::Atomic {1819op: AtomicOP::load_op(ty),1820rd: dst,1821addr: p,1822src: zero_reg(),1823amo: AMO::SeqCst,1824}1825.emit(sink, emit_info, state);1826//18271828let store_value: Reg = match op {1829crate::ir::AtomicRmwOp::Add1830| crate::ir::AtomicRmwOp::Sub1831| crate::ir::AtomicRmwOp::And1832| crate::ir::AtomicRmwOp::Or1833| crate::ir::AtomicRmwOp::Xor => {1834AtomicOP::extract(dst, offset, dst.to_reg(), ty)1835.iter()1836.for_each(|i| i.emit(sink, emit_info, state));1837Inst::AluRRR {1838alu_op: match op {1839crate::ir::AtomicRmwOp::Add => AluOPRRR::Add,1840crate::ir::AtomicRmwOp::Sub => AluOPRRR::Sub,1841crate::ir::AtomicRmwOp::And => AluOPRRR::And,1842crate::ir::AtomicRmwOp::Or => AluOPRRR::Or,1843crate::ir::AtomicRmwOp::Xor => AluOPRRR::Xor,1844_ => unreachable!(),1845},1846rd: t0,1847rs1: dst.to_reg(),1848rs2: x,1849}1850.emit(sink, emit_info, state);1851Inst::Atomic {1852op: AtomicOP::load_op(ty),1853rd: writable_spilltmp_reg2(),1854addr: p,1855src: zero_reg(),1856amo: AMO::SeqCst,1857}1858.emit(sink, emit_info, state);1859AtomicOP::merge(1860writable_spilltmp_reg2(),1861writable_spilltmp_reg(),1862offset,1863t0.to_reg(),1864ty,1865)1866.iter()1867.for_each(|i| i.emit(sink, emit_info, state));1868spilltmp_reg2()1869}1870crate::ir::AtomicRmwOp::Nand => {1871if ty.bits() < 32 {1872AtomicOP::extract(dst, offset, dst.to_reg(), ty)1873.iter()1874.for_each(|i| i.emit(sink, emit_info, state));1875}1876Inst::AluRRR {1877alu_op: AluOPRRR::And,1878rd: t0,1879rs1: x,1880rs2: dst.to_reg(),1881}1882.emit(sink, emit_info, state);1883Inst::construct_bit_not(t0, t0.to_reg()).emit(sink, emit_info, state);1884if ty.bits() < 32 {1885Inst::Atomic {1886op: AtomicOP::load_op(ty),1887rd: writable_spilltmp_reg2(),1888addr: p,1889src: zero_reg(),1890amo: AMO::SeqCst,1891}1892.emit(sink, emit_info, state);1893AtomicOP::merge(1894writable_spilltmp_reg2(),1895writable_spilltmp_reg(),1896offset,1897t0.to_reg(),1898ty,1899)1900.iter()1901.for_each(|i| i.emit(sink, emit_info, state));1902spilltmp_reg2()1903} else {1904t0.to_reg()1905}1906}19071908crate::ir::AtomicRmwOp::Umin1909| crate::ir::AtomicRmwOp::Umax1910| crate::ir::AtomicRmwOp::Smin1911| crate::ir::AtomicRmwOp::Smax => {1912let label_select_dst = sink.get_label();1913let label_select_done = sink.get_label();1914if op == crate::ir::AtomicRmwOp::Umin || op == crate::ir::AtomicRmwOp::Umax1915{1916AtomicOP::extract(dst, offset, dst.to_reg(), ty)1917} else {1918AtomicOP::extract_sext(dst, offset, dst.to_reg(), ty)1919}1920.iter()1921.for_each(|i| i.emit(sink, emit_info, state));19221923Inst::CondBr {1924taken: CondBrTarget::Label(label_select_dst),1925not_taken: CondBrTarget::Fallthrough,1926kind: IntegerCompare {1927kind: match op {1928crate::ir::AtomicRmwOp::Umin => IntCC::UnsignedLessThan,1929crate::ir::AtomicRmwOp::Umax => IntCC::UnsignedGreaterThan,1930crate::ir::AtomicRmwOp::Smin => IntCC::SignedLessThan,1931crate::ir::AtomicRmwOp::Smax => IntCC::SignedGreaterThan,1932_ => unreachable!(),1933},1934rs1: dst.to_reg(),1935rs2: x,1936},1937}1938.emit(sink, emit_info, state);1939// here we select x.1940Inst::gen_move(t0, x, I64).emit(sink, emit_info, state);1941Inst::gen_jump(label_select_done).emit(sink, emit_info, state);1942sink.bind_label(label_select_dst, &mut state.ctrl_plane);1943Inst::gen_move(t0, dst.to_reg(), I64).emit(sink, emit_info, state);1944sink.bind_label(label_select_done, &mut state.ctrl_plane);1945Inst::Atomic {1946op: AtomicOP::load_op(ty),1947rd: writable_spilltmp_reg2(),1948addr: p,1949src: zero_reg(),1950amo: AMO::SeqCst,1951}1952.emit(sink, emit_info, state);1953AtomicOP::merge(1954writable_spilltmp_reg2(),1955writable_spilltmp_reg(),1956offset,1957t0.to_reg(),1958ty,1959)1960.iter()1961.for_each(|i| i.emit(sink, emit_info, state));1962spilltmp_reg2()1963}1964crate::ir::AtomicRmwOp::Xchg => {1965AtomicOP::extract(dst, offset, dst.to_reg(), ty)1966.iter()1967.for_each(|i| i.emit(sink, emit_info, state));1968Inst::Atomic {1969op: AtomicOP::load_op(ty),1970rd: writable_spilltmp_reg2(),1971addr: p,1972src: zero_reg(),1973amo: AMO::SeqCst,1974}1975.emit(sink, emit_info, state);1976AtomicOP::merge(1977writable_spilltmp_reg2(),1978writable_spilltmp_reg(),1979offset,1980x,1981ty,1982)1983.iter()1984.for_each(|i| i.emit(sink, emit_info, state));1985spilltmp_reg2()1986}1987};19881989Inst::Atomic {1990op: AtomicOP::store_op(ty),1991rd: t0,1992addr: p,1993src: store_value,1994amo: AMO::SeqCst,1995}1996.emit(sink, emit_info, state);19971998// if store is not ok,retry.1999Inst::CondBr {2000taken: CondBrTarget::Label(retry),2001not_taken: CondBrTarget::Fallthrough,2002kind: IntegerCompare {2003kind: IntCC::NotEqual,2004rs1: t0.to_reg(),2005rs2: zero_reg(),2006},2007}2008.emit(sink, emit_info, state);2009}20102011&Inst::LoadExtNameGot { rd, ref name } => {2012// Load a PC-relative address into a register.2013// RISC-V does this slightly differently from other arches. We emit a relocation2014// with a label, instead of the symbol itself.2015//2016// See: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses2017//2018// Emit the following code:2019// label:2020// auipc rd, 0 # R_RISCV_GOT_HI20 (symbol_name)2021// ld rd, rd, 0 # R_RISCV_PCREL_LO12_I (label)20222023// Create the label that is going to be published to the final binary object.2024let auipc_label = sink.get_label();2025sink.bind_label(auipc_label, &mut state.ctrl_plane);20262027// Get the current PC.2028sink.add_reloc(Reloc::RiscvGotHi20, &**name, 0);2029Inst::Auipc {2030rd,2031imm: Imm20::from_i32(0),2032}2033.emit_uncompressed(sink, emit_info, state, start_off);20342035// The `ld` here, points to the `auipc` label instead of directly to the symbol.2036sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);2037Inst::Load {2038rd,2039op: LoadOP::Ld,2040flags: MemFlags::trusted(),2041from: AMode::RegOffset(rd.to_reg(), 0),2042}2043.emit_uncompressed(sink, emit_info, state, start_off);2044}20452046&Inst::LoadExtNameFar {2047rd,2048ref name,2049offset,2050} => {2051// In the non PIC sequence we relocate the absolute address into2052// a preallocated space, load it into a register and jump over2053// it.2054//2055// Emit the following code:2056// ld rd, label_data2057// j label_end2058// label_data:2059// <8 byte space> # ABS82060// label_end:20612062let label_data = sink.get_label();2063let label_end = sink.get_label();20642065// Load the value from a label2066Inst::Load {2067rd,2068op: LoadOP::Ld,2069flags: MemFlags::trusted(),2070from: AMode::Label(label_data),2071}2072.emit(sink, emit_info, state);20732074// Jump over the data2075Inst::gen_jump(label_end).emit(sink, emit_info, state);20762077sink.bind_label(label_data, &mut state.ctrl_plane);2078sink.add_reloc(Reloc::Abs8, name.as_ref(), offset);2079sink.put8(0);20802081sink.bind_label(label_end, &mut state.ctrl_plane);2082}20832084&Inst::LoadExtNameNear {2085rd,2086ref name,2087offset,2088} => {2089// Emit the following code:2090// label:2091// auipc rd, 0 # R_RISCV_PCREL_HI20 (symbol_name)2092// ld rd, rd, 0 # R_RISCV_PCREL_LO12_I (label)20932094let auipc_label = sink.get_label();2095sink.bind_label(auipc_label, &mut state.ctrl_plane);20962097// Get the current PC.2098sink.add_reloc(Reloc::RiscvPCRelHi20, &**name, offset);2099Inst::Auipc {2100rd,2101imm: Imm20::from_i32(0),2102}2103.emit_uncompressed(sink, emit_info, state, start_off);21042105sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);2106Inst::AluRRImm12 {2107alu_op: AluOPRRI::Addi,2108rd,2109rs: rd.to_reg(),2110imm12: Imm12::ZERO,2111}2112.emit_uncompressed(sink, emit_info, state, start_off);2113}21142115&Inst::LabelAddress { dst, label } => {2116let offset = sink.cur_offset();2117Inst::Auipc {2118rd: dst,2119imm: Imm20::from_i32(0),2120}2121.emit_uncompressed(sink, emit_info, state, start_off);2122sink.use_label_at_offset(offset, label, LabelUse::PCRelHi20);21232124let offset = sink.cur_offset();2125Inst::AluRRImm12 {2126alu_op: AluOPRRI::Addi,2127rd: dst,2128rs: dst.to_reg(),2129imm12: Imm12::ZERO,2130}2131.emit_uncompressed(sink, emit_info, state, start_off);2132sink.use_label_at_offset(offset, label, LabelUse::PCRelLo12I);2133}21342135&Inst::ElfTlsGetAddr { rd, ref name } => {2136// RISC-V's TLS GD model is slightly different from other arches.2137//2138// We have a relocation (R_RISCV_TLS_GD_HI20) that loads the high 20 bits2139// of the address relative to the GOT entry. This relocation points to2140// the symbol as usual.2141//2142// However when loading the bottom 12bits of the address, we need to2143// use a label that points to the previous AUIPC instruction.2144//2145// label:2146// auipc a0,0 # R_RISCV_TLS_GD_HI20 (symbol)2147// addi a0,a0,0 # R_RISCV_PCREL_LO12_I (label)2148//2149// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#global-dynamic21502151// Create the label that is going to be published to the final binary object.2152let auipc_label = sink.get_label();2153sink.bind_label(auipc_label, &mut state.ctrl_plane);21542155// Get the current PC.2156sink.add_reloc(Reloc::RiscvTlsGdHi20, &**name, 0);2157Inst::Auipc {2158rd,2159imm: Imm20::from_i32(0),2160}2161.emit_uncompressed(sink, emit_info, state, start_off);21622163// The `addi` here, points to the `auipc` label instead of directly to the symbol.2164sink.add_reloc(Reloc::RiscvPCRelLo12I, &auipc_label, 0);2165Inst::AluRRImm12 {2166alu_op: AluOPRRI::Addi,2167rd,2168rs: rd.to_reg(),2169imm12: Imm12::from_i16(0),2170}2171.emit_uncompressed(sink, emit_info, state, start_off);21722173Inst::Call {2174info: Box::new(CallInfo::empty(2175ExternalName::LibCall(LibCall::ElfTlsGetAddr),2176CallConv::SystemV,2177)),2178}2179.emit_uncompressed(sink, emit_info, state, start_off);2180}21812182&Inst::TrapIf {2183rs1,2184rs2,2185cc,2186trap_code,2187} => {2188let label_end = sink.get_label();2189let cond = IntegerCompare { kind: cc, rs1, rs2 };21902191// Jump over the trap if we the condition is false.2192Inst::CondBr {2193taken: CondBrTarget::Label(label_end),2194not_taken: CondBrTarget::Fallthrough,2195kind: cond.inverse(),2196}2197.emit(sink, emit_info, state);2198Inst::Udf { trap_code }.emit(sink, emit_info, state);21992200sink.bind_label(label_end, &mut state.ctrl_plane);2201}2202&Inst::Udf { trap_code } => {2203sink.add_trap(trap_code);2204sink.put_data(Inst::TRAP_OPCODE);2205}2206&Inst::AtomicLoad { rd, ty, p } => {2207// emit the fence.2208Inst::Fence {2209pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,2210succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,2211}2212.emit(sink, emit_info, state);2213// load.2214Inst::Load {2215rd,2216op: LoadOP::from_type(ty),2217flags: MemFlags::new(),2218from: AMode::RegOffset(p, 0),2219}2220.emit(sink, emit_info, state);2221Inst::Fence {2222pred: Inst::FENCE_REQ_R,2223succ: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,2224}2225.emit(sink, emit_info, state);2226}2227&Inst::AtomicStore { src, ty, p } => {2228Inst::Fence {2229pred: Inst::FENCE_REQ_R | Inst::FENCE_REQ_W,2230succ: Inst::FENCE_REQ_W,2231}2232.emit(sink, emit_info, state);2233Inst::Store {2234to: AMode::RegOffset(p, 0),2235op: StoreOP::from_type(ty),2236flags: MemFlags::new(),2237src,2238}2239.emit(sink, emit_info, state);2240}22412242&Inst::Popcnt {2243sum,2244tmp,2245step,2246rs,2247ty,2248} => {2249// load 0 to sum , init.2250Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);2251// load2252Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))2253.emit(sink, emit_info, state);2254//2255Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);2256Inst::AluRRImm12 {2257alu_op: AluOPRRI::Slli,2258rd: tmp,2259rs: tmp.to_reg(),2260imm12: Imm12::from_i16((ty.bits() - 1) as i16),2261}2262.emit(sink, emit_info, state);2263let label_done = sink.get_label();2264let label_loop = sink.get_label();2265sink.bind_label(label_loop, &mut state.ctrl_plane);2266Inst::CondBr {2267taken: CondBrTarget::Label(label_done),2268not_taken: CondBrTarget::Fallthrough,2269kind: IntegerCompare {2270kind: IntCC::SignedLessThanOrEqual,2271rs1: step.to_reg(),2272rs2: zero_reg(),2273},2274}2275.emit(sink, emit_info, state);2276// test and add sum.2277{2278Inst::AluRRR {2279alu_op: AluOPRRR::And,2280rd: writable_spilltmp_reg2(),2281rs1: tmp.to_reg(),2282rs2: rs,2283}2284.emit(sink, emit_info, state);2285let label_over = sink.get_label();2286Inst::CondBr {2287taken: CondBrTarget::Label(label_over),2288not_taken: CondBrTarget::Fallthrough,2289kind: IntegerCompare {2290kind: IntCC::Equal,2291rs1: zero_reg(),2292rs2: spilltmp_reg2(),2293},2294}2295.emit(sink, emit_info, state);2296Inst::AluRRImm12 {2297alu_op: AluOPRRI::Addi,2298rd: sum,2299rs: sum.to_reg(),2300imm12: Imm12::ONE,2301}2302.emit(sink, emit_info, state);2303sink.bind_label(label_over, &mut state.ctrl_plane);2304}2305// set step and tmp.2306{2307Inst::AluRRImm12 {2308alu_op: AluOPRRI::Addi,2309rd: step,2310rs: step.to_reg(),2311imm12: Imm12::from_i16(-1),2312}2313.emit(sink, emit_info, state);2314Inst::AluRRImm12 {2315alu_op: AluOPRRI::Srli,2316rd: tmp,2317rs: tmp.to_reg(),2318imm12: Imm12::ONE,2319}2320.emit(sink, emit_info, state);2321Inst::gen_jump(label_loop).emit(sink, emit_info, state);2322}2323sink.bind_label(label_done, &mut state.ctrl_plane);2324}2325&Inst::Cltz {2326sum,2327tmp,2328step,2329rs,2330leading,2331ty,2332} => {2333// load 0 to sum , init.2334Inst::gen_move(sum, zero_reg(), I64).emit(sink, emit_info, state);2335// load2336Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))2337.emit(sink, emit_info, state);2338//2339Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);2340if leading {2341Inst::AluRRImm12 {2342alu_op: AluOPRRI::Slli,2343rd: tmp,2344rs: tmp.to_reg(),2345imm12: Imm12::from_i16((ty.bits() - 1) as i16),2346}2347.emit(sink, emit_info, state);2348}2349let label_done = sink.get_label();2350let label_loop = sink.get_label();2351sink.bind_label(label_loop, &mut state.ctrl_plane);2352Inst::CondBr {2353taken: CondBrTarget::Label(label_done),2354not_taken: CondBrTarget::Fallthrough,2355kind: IntegerCompare {2356kind: IntCC::SignedLessThanOrEqual,2357rs1: step.to_reg(),2358rs2: zero_reg(),2359},2360}2361.emit(sink, emit_info, state);2362// test and add sum.2363{2364Inst::AluRRR {2365alu_op: AluOPRRR::And,2366rd: writable_spilltmp_reg2(),2367rs1: tmp.to_reg(),2368rs2: rs,2369}2370.emit(sink, emit_info, state);2371Inst::CondBr {2372taken: CondBrTarget::Label(label_done),2373not_taken: CondBrTarget::Fallthrough,2374kind: IntegerCompare {2375kind: IntCC::NotEqual,2376rs1: zero_reg(),2377rs2: spilltmp_reg2(),2378},2379}2380.emit(sink, emit_info, state);2381Inst::AluRRImm12 {2382alu_op: AluOPRRI::Addi,2383rd: sum,2384rs: sum.to_reg(),2385imm12: Imm12::ONE,2386}2387.emit(sink, emit_info, state);2388}2389// set step and tmp.2390{2391Inst::AluRRImm12 {2392alu_op: AluOPRRI::Addi,2393rd: step,2394rs: step.to_reg(),2395imm12: Imm12::from_i16(-1),2396}2397.emit(sink, emit_info, state);2398Inst::AluRRImm12 {2399alu_op: if leading {2400AluOPRRI::Srli2401} else {2402AluOPRRI::Slli2403},2404rd: tmp,2405rs: tmp.to_reg(),2406imm12: Imm12::ONE,2407}2408.emit(sink, emit_info, state);2409Inst::gen_jump(label_loop).emit(sink, emit_info, state);2410}2411sink.bind_label(label_done, &mut state.ctrl_plane);2412}2413&Inst::Brev8 {2414rs,2415ty,2416step,2417tmp,2418tmp2,2419rd,2420} => {2421Inst::gen_move(rd, zero_reg(), I64).emit(sink, emit_info, state);2422Inst::load_imm12(step, Imm12::from_i16(ty.bits() as i16))2423.emit(sink, emit_info, state);2424//2425Inst::load_imm12(tmp, Imm12::ONE).emit(sink, emit_info, state);2426Inst::AluRRImm12 {2427alu_op: AluOPRRI::Slli,2428rd: tmp,2429rs: tmp.to_reg(),2430imm12: Imm12::from_i16((ty.bits() - 1) as i16),2431}2432.emit(sink, emit_info, state);2433Inst::load_imm12(tmp2, Imm12::ONE).emit(sink, emit_info, state);2434Inst::AluRRImm12 {2435alu_op: AluOPRRI::Slli,2436rd: tmp2,2437rs: tmp2.to_reg(),2438imm12: Imm12::from_i16((ty.bits() - 8) as i16),2439}2440.emit(sink, emit_info, state);24412442let label_done = sink.get_label();2443let label_loop = sink.get_label();2444sink.bind_label(label_loop, &mut state.ctrl_plane);2445Inst::CondBr {2446taken: CondBrTarget::Label(label_done),2447not_taken: CondBrTarget::Fallthrough,2448kind: IntegerCompare {2449kind: IntCC::SignedLessThanOrEqual,2450rs1: step.to_reg(),2451rs2: zero_reg(),2452},2453}2454.emit(sink, emit_info, state);2455// test and set bit.2456{2457Inst::AluRRR {2458alu_op: AluOPRRR::And,2459rd: writable_spilltmp_reg2(),2460rs1: tmp.to_reg(),2461rs2: rs,2462}2463.emit(sink, emit_info, state);2464let label_over = sink.get_label();2465Inst::CondBr {2466taken: CondBrTarget::Label(label_over),2467not_taken: CondBrTarget::Fallthrough,2468kind: IntegerCompare {2469kind: IntCC::Equal,2470rs1: zero_reg(),2471rs2: spilltmp_reg2(),2472},2473}2474.emit(sink, emit_info, state);2475Inst::AluRRR {2476alu_op: AluOPRRR::Or,2477rd,2478rs1: rd.to_reg(),2479rs2: tmp2.to_reg(),2480}2481.emit(sink, emit_info, state);2482sink.bind_label(label_over, &mut state.ctrl_plane);2483}2484// set step and tmp.2485{2486Inst::AluRRImm12 {2487alu_op: AluOPRRI::Addi,2488rd: step,2489rs: step.to_reg(),2490imm12: Imm12::from_i16(-1),2491}2492.emit(sink, emit_info, state);2493Inst::AluRRImm12 {2494alu_op: AluOPRRI::Srli,2495rd: tmp,2496rs: tmp.to_reg(),2497imm12: Imm12::ONE,2498}2499.emit(sink, emit_info, state);2500{2501// reset tmp22502// if (step %=8 == 0) then tmp2 = tmp2 >> 152503// if (step %=8 != 0) then tmp2 = tmp2 << 12504let label_over = sink.get_label();2505let label_sll_1 = sink.get_label();2506Inst::load_imm12(writable_spilltmp_reg2(), Imm12::from_i16(8))2507.emit(sink, emit_info, state);2508Inst::AluRRR {2509alu_op: AluOPRRR::Rem,2510rd: writable_spilltmp_reg2(),2511rs1: step.to_reg(),2512rs2: spilltmp_reg2(),2513}2514.emit(sink, emit_info, state);2515Inst::CondBr {2516taken: CondBrTarget::Label(label_sll_1),2517not_taken: CondBrTarget::Fallthrough,2518kind: IntegerCompare {2519kind: IntCC::NotEqual,2520rs1: spilltmp_reg2(),2521rs2: zero_reg(),2522},2523}2524.emit(sink, emit_info, state);2525Inst::AluRRImm12 {2526alu_op: AluOPRRI::Srli,2527rd: tmp2,2528rs: tmp2.to_reg(),2529imm12: Imm12::from_i16(15),2530}2531.emit(sink, emit_info, state);2532Inst::gen_jump(label_over).emit(sink, emit_info, state);2533sink.bind_label(label_sll_1, &mut state.ctrl_plane);2534Inst::AluRRImm12 {2535alu_op: AluOPRRI::Slli,2536rd: tmp2,2537rs: tmp2.to_reg(),2538imm12: Imm12::ONE,2539}2540.emit(sink, emit_info, state);2541sink.bind_label(label_over, &mut state.ctrl_plane);2542}2543Inst::gen_jump(label_loop).emit(sink, emit_info, state);2544}2545sink.bind_label(label_done, &mut state.ctrl_plane);2546}2547&Inst::StackProbeLoop {2548guard_size,2549probe_count,2550tmp: guard_size_tmp,2551} => {2552let step = writable_spilltmp_reg();2553Inst::load_constant_u64(step, (guard_size as u64) * (probe_count as u64))2554.iter()2555.for_each(|i| i.emit(sink, emit_info, state));2556Inst::load_constant_u64(guard_size_tmp, guard_size as u64)2557.iter()2558.for_each(|i| i.emit(sink, emit_info, state));25592560let loop_start = sink.get_label();2561let label_done = sink.get_label();2562sink.bind_label(loop_start, &mut state.ctrl_plane);2563Inst::CondBr {2564taken: CondBrTarget::Label(label_done),2565not_taken: CondBrTarget::Fallthrough,2566kind: IntegerCompare {2567kind: IntCC::UnsignedLessThanOrEqual,2568rs1: step.to_reg(),2569rs2: guard_size_tmp.to_reg(),2570},2571}2572.emit(sink, emit_info, state);2573// compute address.2574Inst::AluRRR {2575alu_op: AluOPRRR::Sub,2576rd: writable_spilltmp_reg2(),2577rs1: stack_reg(),2578rs2: step.to_reg(),2579}2580.emit(sink, emit_info, state);2581Inst::Store {2582to: AMode::RegOffset(spilltmp_reg2(), 0),2583op: StoreOP::Sb,2584flags: MemFlags::new(),2585src: zero_reg(),2586}2587.emit(sink, emit_info, state);2588// reset step.2589Inst::AluRRR {2590alu_op: AluOPRRR::Sub,2591rd: step,2592rs1: step.to_reg(),2593rs2: guard_size_tmp.to_reg(),2594}2595.emit(sink, emit_info, state);2596Inst::gen_jump(loop_start).emit(sink, emit_info, state);2597sink.bind_label(label_done, &mut state.ctrl_plane);2598}2599&Inst::VecAluRRRImm5 {2600op,2601vd,2602vd_src,2603imm,2604vs2,2605ref mask,2606..2607} => {2608debug_assert_eq!(vd.to_reg(), vd_src);26092610sink.put4(encode_valu_rrr_imm(op, vd, imm, vs2, *mask));2611}2612&Inst::VecAluRRRR {2613op,2614vd,2615vd_src,2616vs1,2617vs2,2618ref mask,2619..2620} => {2621debug_assert_eq!(vd.to_reg(), vd_src);26222623sink.put4(encode_valu_rrrr(op, vd, vs2, vs1, *mask));2624}2625&Inst::VecAluRRR {2626op,2627vd,2628vs1,2629vs2,2630ref mask,2631..2632} => {2633sink.put4(encode_valu(op, vd, vs1, vs2, *mask));2634}2635&Inst::VecAluRRImm5 {2636op,2637vd,2638imm,2639vs2,2640ref mask,2641..2642} => {2643sink.put4(encode_valu_rr_imm(op, vd, imm, vs2, *mask));2644}2645&Inst::VecAluRR {2646op,2647vd,2648vs,2649ref mask,2650..2651} => {2652sink.put4(encode_valu_rr(op, vd, vs, *mask));2653}2654&Inst::VecAluRImm5 {2655op,2656vd,2657imm,2658ref mask,2659..2660} => {2661sink.put4(encode_valu_r_imm(op, vd, imm, *mask));2662}2663&Inst::VecSetState { rd, ref vstate } => {2664sink.put4(encode_vcfg_imm(26650x57,2666rd.to_reg(),2667vstate.avl.unwrap_static(),2668&vstate.vtype,2669));26702671// Update the current vector emit state.2672state.vstate = EmitVState::Known(*vstate);2673}26742675&Inst::VecLoad {2676eew,2677to,2678ref from,2679ref mask,2680flags,2681..2682} => {2683// Vector Loads don't support immediate offsets, so we need to load it into a register.2684let addr = match from {2685VecAMode::UnitStride { base } => {2686let base_reg = base.get_base_register();2687let offset = base.get_offset_with_state(state);26882689// Reg+0 Offset can be directly encoded2690if let (Some(base_reg), 0) = (base_reg, offset) {2691base_reg2692} else {2693// Otherwise load the address it into a reg and load from it.2694let tmp = writable_spilltmp_reg();2695Inst::LoadAddr {2696rd: tmp,2697mem: *base,2698}2699.emit(sink, emit_info, state);2700tmp.to_reg()2701}2702}2703};27042705if let Some(trap_code) = flags.trap_code() {2706// Register the offset at which the actual load instruction starts.2707sink.add_trap(trap_code);2708}27092710sink.put4(encode_vmem_load(27110x07,2712to.to_reg(),2713eew,2714addr,2715from.lumop(),2716*mask,2717from.mop(),2718from.nf(),2719));2720}27212722&Inst::VecStore {2723eew,2724ref to,2725from,2726ref mask,2727flags,2728..2729} => {2730// Vector Stores don't support immediate offsets, so we need to load it into a register.2731let addr = match to {2732VecAMode::UnitStride { base } => {2733let base_reg = base.get_base_register();2734let offset = base.get_offset_with_state(state);27352736// Reg+0 Offset can be directly encoded2737if let (Some(base_reg), 0) = (base_reg, offset) {2738base_reg2739} else {2740// Otherwise load the address it into a reg and load from it.2741let tmp = writable_spilltmp_reg();2742Inst::LoadAddr {2743rd: tmp,2744mem: *base,2745}2746.emit(sink, emit_info, state);2747tmp.to_reg()2748}2749}2750};27512752if let Some(trap_code) = flags.trap_code() {2753// Register the offset at which the actual load instruction starts.2754sink.add_trap(trap_code);2755}27562757sink.put4(encode_vmem_store(27580x27,2759from,2760eew,2761addr,2762to.sumop(),2763*mask,2764to.mop(),2765to.nf(),2766));2767}27682769Inst::EmitIsland { needed_space } => {2770if sink.island_needed(*needed_space) {2771let jump_around_label = sink.get_label();2772Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);2773sink.emit_island(needed_space + 4, &mut state.ctrl_plane);2774sink.bind_label(jump_around_label, &mut state.ctrl_plane);2775}2776}27772778Inst::SequencePoint { .. } => {2779// Nothing.2780}2781}2782}2783}27842785fn emit_return_call_common_sequence<T>(2786sink: &mut MachBuffer<Inst>,2787emit_info: &EmitInfo,2788state: &mut EmitState,2789info: &ReturnCallInfo<T>,2790) {2791// The return call sequence can potentially emit a lot of instructions (up to 634 bytes!)2792// So lets emit an island here if we need it.2793//2794// It is difficult to calculate exactly how many instructions are going to be emitted, so2795// we calculate it by emitting it into a disposable buffer, and then checking how many instructions2796// were actually emitted.2797let mut buffer = MachBuffer::new();2798let mut fake_emit_state = state.clone();27992800return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);28012802// Finalize the buffer and get the number of bytes emitted.2803let buffer = buffer.finish(&Default::default(), &mut Default::default());2804let length = buffer.data().len() as u32;28052806// And now emit the island inline with this instruction.2807if sink.island_needed(length) {2808let jump_around_label = sink.get_label();2809Inst::gen_jump(jump_around_label).emit(sink, emit_info, state);2810sink.emit_island(length + 4, &mut state.ctrl_plane);2811sink.bind_label(jump_around_label, &mut state.ctrl_plane);2812}28132814// Now that we're done, emit the *actual* return sequence.2815return_call_emit_impl(sink, emit_info, state, info);2816}28172818/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].2819fn return_call_emit_impl<T>(2820sink: &mut MachBuffer<Inst>,2821emit_info: &EmitInfo,2822state: &mut EmitState,2823info: &ReturnCallInfo<T>,2824) {2825let sp_to_fp_offset = {2826let frame_layout = state.frame_layout();2827i64::from(2828frame_layout.clobber_size2829+ frame_layout.fixed_frame_storage_size2830+ frame_layout.outgoing_args_size,2831)2832};28332834let mut clobber_offset = sp_to_fp_offset - 8;2835for reg in state.frame_layout().clobbered_callee_saves.clone() {2836let rreg = reg.to_reg();2837let ty = match rreg.class() {2838RegClass::Int => I64,2839RegClass::Float => F64,2840RegClass::Vector => unimplemented!("Vector Clobber Restores"),2841};28422843Inst::gen_load(2844reg.map(Reg::from),2845AMode::SPOffset(clobber_offset),2846ty,2847MemFlags::trusted(),2848)2849.emit(sink, emit_info, state);28502851clobber_offset -= 82852}28532854// Restore the link register and frame pointer2855let setup_area_size = i64::from(state.frame_layout().setup_area_size);2856if setup_area_size > 0 {2857Inst::gen_load(2858writable_link_reg(),2859AMode::SPOffset(sp_to_fp_offset + 8),2860I64,2861MemFlags::trusted(),2862)2863.emit(sink, emit_info, state);28642865Inst::gen_load(2866writable_fp_reg(),2867AMode::SPOffset(sp_to_fp_offset),2868I64,2869MemFlags::trusted(),2870)2871.emit(sink, emit_info, state);2872}28732874// If we over-allocated the incoming args area in the prologue, resize down to what the callee2875// is expecting.2876let incoming_args_diff =2877i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);28782879// Increment SP all at once2880let sp_increment = sp_to_fp_offset + setup_area_size + incoming_args_diff;2881if sp_increment > 0 {2882for inst in Riscv64MachineDeps::gen_sp_reg_adjust(i32::try_from(sp_increment).unwrap()) {2883inst.emit(sink, emit_info, state);2884}2885}2886}288728882889