Path: blob/main/cranelift/codegen/src/isa/riscv64/inst/mod.rs
3092 views
//! This module defines riscv64-specific machine instruction types.12use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking};3use crate::binemit::{Addend, CodeOffset, Reloc};4pub use crate::ir::condcodes::IntCC;5use crate::ir::types::{self, F16, F32, F64, F128, I8, I8X16, I16, I32, I64, I128};67pub use crate::ir::{ExternalName, MemFlags, Type};8use crate::isa::{CallConv, FunctionAlignment};9use crate::machinst::*;10use crate::{CodegenError, CodegenResult, settings};1112pub use crate::ir::condcodes::FloatCC;1314use alloc::boxed::Box;15use alloc::string::{String, ToString};16use alloc::vec::Vec;17use core::fmt::Write;18use regalloc2::RegClass;19use smallvec::{SmallVec, smallvec};2021pub mod regs;22pub use self::regs::*;23pub mod imms;24pub use self::imms::*;25pub mod args;26pub use self::args::*;27pub mod emit;28pub use self::emit::*;29pub mod vector;30pub use self::vector::*;31pub mod encode;32pub use self::encode::*;33pub mod unwind;3435use crate::isa::riscv64::abi::Riscv64MachineDeps;3637#[cfg(test)]38mod emit_tests;3940use core::fmt::{Display, Formatter};4142pub(crate) type VecU8 = Vec<u8>;4344//=============================================================================45// Instructions (top level): definition4647pub use crate::isa::riscv64::lower::isle::generated_code::{48AluOPRRI, AluOPRRR, AtomicOP, CSR, CsrImmOP, CsrRegOP, FClassResult, FFlagsException, FRM,49FpuOPRR, FpuOPRRR, FpuOPRRRR, LoadOP, MInst as Inst, StoreOP,50};51use crate::isa::riscv64::lower::isle::generated_code::{CjOp, MInst, VecAluOpRRImm5, VecAluOpRRR};5253/// Additional information for `return_call[_ind]` instructions, left out of54/// line to lower the size of the `Inst` enum.55#[derive(Clone, Debug)]56pub struct ReturnCallInfo<T> {57pub dest: T,58pub uses: CallArgList,59pub new_stack_arg_size: u32,60}6162/// A conditional branch target.63#[derive(Clone, Copy, Debug, PartialEq, Eq)]64pub enum CondBrTarget {65/// An unresolved reference to a Label, as passed into66/// `lower_branch_group()`.67Label(MachLabel),68/// No jump; fall through to the next instruction.69Fallthrough,70}7172impl CondBrTarget {73/// Return the target's label, if it is a label-based target.74pub(crate) fn as_label(self) -> Option<MachLabel> {75match self {76CondBrTarget::Label(l) => Some(l),77_ => None,78}79}8081pub(crate) fn is_fallthrouh(&self) -> bool {82self == &CondBrTarget::Fallthrough83}84}8586impl Display for CondBrTarget {87fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {88match self {89CondBrTarget::Label(l) => write!(f, "{}", l.to_string()),90CondBrTarget::Fallthrough => write!(f, "0"),91}92}93}9495pub(crate) fn enc_auipc(rd: Writable<Reg>, imm: Imm20) -> u32 {96let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.bits() << 12;97x98}99100pub(crate) fn enc_jalr(rd: Writable<Reg>, base: Reg, offset: Imm12) -> u32 {101let x = 0b1100111102| reg_to_gpr_num(rd.to_reg()) << 7103| 0b000 << 12104| reg_to_gpr_num(base) << 15105| offset.bits() << 20;106x107}108109/// rd and src must have the same length.110pub(crate) fn gen_moves(rd: &[Writable<Reg>], src: &[Reg]) -> SmallInstVec<Inst> {111assert!(rd.len() == src.len());112assert!(rd.len() > 0);113let mut insts = SmallInstVec::new();114for (dst, src) in rd.iter().zip(src.iter()) {115let ty = Inst::canonical_type_for_rc(dst.to_reg().class());116insts.push(Inst::gen_move(*dst, *src, ty));117}118insts119}120121impl Inst {122/// RISC-V can have multiple instruction sizes. 2 bytes for compressed123/// instructions, 4 for regular instructions, 6 and 8 byte instructions124/// are also being considered.125const UNCOMPRESSED_INSTRUCTION_SIZE: i32 = 4;126127#[inline]128pub(crate) fn load_imm12(rd: Writable<Reg>, imm: Imm12) -> Inst {129Inst::AluRRImm12 {130alu_op: AluOPRRI::Addi,131rd,132rs: zero_reg(),133imm12: imm,134}135}136137/// Immediates can be loaded using lui and addi instructions.138fn load_const_imm(rd: Writable<Reg>, value: u64) -> Option<SmallInstVec<Inst>> {139Inst::generate_imm(value).map(|(imm20, imm12)| {140let mut insts = SmallVec::new();141142let imm20_is_zero = imm20.as_i32() == 0;143let imm12_is_zero = imm12.as_i16() == 0;144145let rs = if !imm20_is_zero {146insts.push(Inst::Lui { rd, imm: imm20 });147rd.to_reg()148} else {149zero_reg()150};151152// We also need to emit the addi if the value is 0, otherwise we just153// won't produce any instructions.154if !imm12_is_zero || (imm20_is_zero && imm12_is_zero) {155insts.push(Inst::AluRRImm12 {156alu_op: AluOPRRI::Addi,157rd,158rs,159imm12,160})161}162163insts164})165}166167pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {168let insts = Inst::load_const_imm(rd, value);169insts.unwrap_or_else(|| {170smallvec![Inst::LoadInlineConst {171rd,172ty: I32,173imm: value174}]175})176}177178pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {179let insts = Inst::load_const_imm(rd, value);180insts.unwrap_or_else(|| {181smallvec![Inst::LoadInlineConst {182rd,183ty: I64,184imm: value185}]186})187}188189pub(crate) fn construct_auipc_and_jalr(190link: Option<Writable<Reg>>,191tmp: Writable<Reg>,192offset: i64,193) -> [Inst; 2] {194Inst::generate_imm(offset as u64)195.map(|(imm20, imm12)| {196let a = Inst::Auipc {197rd: tmp,198imm: imm20,199};200let b = Inst::Jalr {201rd: link.unwrap_or(writable_zero_reg()),202base: tmp.to_reg(),203offset: imm12,204};205[a, b]206})207.expect("code range is too big.")208}209210/// Generic constructor for a load (zero-extending where appropriate).211pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {212if ty.is_vector() {213Inst::VecLoad {214eew: VecElementWidth::from_type(ty),215to: into_reg,216from: VecAMode::UnitStride { base: mem },217flags,218mask: VecOpMasking::Disabled,219vstate: VState::from_type(ty),220}221} else {222Inst::Load {223rd: into_reg,224op: LoadOP::from_type(ty),225from: mem,226flags,227}228}229}230231/// Generic constructor for a store.232pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {233if ty.is_vector() {234Inst::VecStore {235eew: VecElementWidth::from_type(ty),236to: VecAMode::UnitStride { base: mem },237from: from_reg,238flags,239mask: VecOpMasking::Disabled,240vstate: VState::from_type(ty),241}242} else {243Inst::Store {244src: from_reg,245op: StoreOP::from_type(ty),246to: mem,247flags,248}249}250}251}252253//=============================================================================254255fn vec_mask_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) {256match mask {257VecOpMasking::Enabled { reg } => {258collector.reg_fixed_use(reg, pv_reg(0).into());259}260VecOpMasking::Disabled => {}261}262}263fn vec_mask_late_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) {264match mask {265VecOpMasking::Enabled { reg } => {266collector.reg_fixed_late_use(reg, pv_reg(0).into());267}268VecOpMasking::Disabled => {}269}270}271272fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {273match inst {274Inst::Nop0 | Inst::Nop4 => {}275Inst::BrTable {276index, tmp1, tmp2, ..277} => {278collector.reg_use(index);279collector.reg_early_def(tmp1);280collector.reg_early_def(tmp2);281}282Inst::Auipc { rd, .. } => collector.reg_def(rd),283Inst::Lui { rd, .. } => collector.reg_def(rd),284Inst::Fli { rd, .. } => collector.reg_def(rd),285Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd),286Inst::AluRRR { rd, rs1, rs2, .. } => {287collector.reg_use(rs1);288collector.reg_use(rs2);289collector.reg_def(rd);290}291Inst::FpuRRR { rd, rs1, rs2, .. } => {292collector.reg_use(rs1);293collector.reg_use(rs2);294collector.reg_def(rd);295}296Inst::AluRRImm12 { rd, rs, .. } => {297collector.reg_use(rs);298collector.reg_def(rd);299}300Inst::CsrReg { rd, rs, .. } => {301collector.reg_use(rs);302collector.reg_def(rd);303}304Inst::CsrImm { rd, .. } => {305collector.reg_def(rd);306}307Inst::Load { rd, from, .. } => {308from.get_operands(collector);309collector.reg_def(rd);310}311Inst::Store { to, src, .. } => {312to.get_operands(collector);313collector.reg_use(src);314}315316Inst::Args { args } => {317for ArgPair { vreg, preg } in args {318collector.reg_fixed_def(vreg, *preg);319}320}321Inst::Rets { rets } => {322for RetPair { vreg, preg } in rets {323collector.reg_fixed_use(vreg, *preg);324}325}326Inst::Ret { .. } => {}327328Inst::Extend { rd, rn, .. } => {329collector.reg_use(rn);330collector.reg_def(rd);331}332Inst::Call { info, .. } => {333let CallInfo { uses, defs, .. } = &mut **info;334for CallArgPair { vreg, preg } in uses {335collector.reg_fixed_use(vreg, *preg);336}337for CallRetPair { vreg, location } in defs {338match location {339RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),340RetLocation::Stack(..) => collector.any_def(vreg),341}342}343collector.reg_clobbers(info.clobbers);344if let Some(try_call_info) = &mut info.try_call_info {345try_call_info.collect_operands(collector);346}347}348Inst::CallInd { info } => {349let CallInfo {350dest, uses, defs, ..351} = &mut **info;352collector.reg_use(dest);353for CallArgPair { vreg, preg } in uses {354collector.reg_fixed_use(vreg, *preg);355}356for CallRetPair { vreg, location } in defs {357match location {358RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),359RetLocation::Stack(..) => collector.any_def(vreg),360}361}362collector.reg_clobbers(info.clobbers);363if let Some(try_call_info) = &mut info.try_call_info {364try_call_info.collect_operands(collector);365}366}367Inst::ReturnCall { info } => {368for CallArgPair { vreg, preg } in &mut info.uses {369collector.reg_fixed_use(vreg, *preg);370}371}372Inst::ReturnCallInd { info } => {373// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):374// This shouldn't be a fixed register constraint.375collector.reg_fixed_use(&mut info.dest, x_reg(5));376377for CallArgPair { vreg, preg } in &mut info.uses {378collector.reg_fixed_use(vreg, *preg);379}380}381Inst::Jal { .. } => {382// JAL technically has a rd register, but we currently always383// hardcode it to x0.384}385Inst::CondBr {386kind: IntegerCompare { rs1, rs2, .. },387..388} => {389collector.reg_use(rs1);390collector.reg_use(rs2);391}392Inst::LoadExtNameGot { rd, .. }393| Inst::LoadExtNameNear { rd, .. }394| Inst::LoadExtNameFar { rd, .. } => {395collector.reg_def(rd);396}397Inst::ElfTlsGetAddr { rd, .. } => {398// x10 is a0 which is both the first argument and the first return value.399collector.reg_fixed_def(rd, a0());400let mut clobbers =401Riscv64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV, false);402clobbers.remove(px_reg(10));403collector.reg_clobbers(clobbers);404}405Inst::LoadAddr { rd, mem } => {406mem.get_operands(collector);407collector.reg_early_def(rd);408}409410Inst::Mov { rd, rm, .. } => {411collector.reg_use(rm);412collector.reg_def(rd);413}414Inst::MovFromPReg { rd, rm } => {415debug_assert!([px_reg(2), px_reg(8)].contains(rm));416collector.reg_def(rd);417}418Inst::Fence { .. } => {}419Inst::EBreak => {}420Inst::Udf { .. } => {}421Inst::FpuRR { rd, rs, .. } => {422collector.reg_use(rs);423collector.reg_def(rd);424}425Inst::FpuRRRR {426rd, rs1, rs2, rs3, ..427} => {428collector.reg_use(rs1);429collector.reg_use(rs2);430collector.reg_use(rs3);431collector.reg_def(rd);432}433434Inst::Jalr { rd, base, .. } => {435collector.reg_use(base);436collector.reg_def(rd);437}438Inst::Atomic { rd, addr, src, .. } => {439collector.reg_use(addr);440collector.reg_use(src);441collector.reg_def(rd);442}443Inst::Select {444dst,445condition: IntegerCompare { rs1, rs2, .. },446x,447y,448..449} => {450// Mark the condition registers as late use so that they don't overlap with the destination451// register. We may potentially write to the destination register before evaluating the452// condition.453collector.reg_late_use(rs1);454collector.reg_late_use(rs2);455456for reg in x.regs_mut() {457collector.reg_use(reg);458}459for reg in y.regs_mut() {460collector.reg_use(reg);461}462463// If there's more than one destination register then use464// `reg_early_def` to prevent destination registers from overlapping465// with any operands. This ensures that the lowering doesn't have to466// deal with a situation such as when the input registers need to be467// swapped when moved to the destination.468//469// When there's only one destination register though don't use an470// early def because once the register is written no other inputs471// are read so it's ok for the destination to overlap the sources.472// The condition registers are already marked as late use so they473// won't overlap with the destination.474match dst.regs_mut() {475[reg] => collector.reg_def(reg),476regs => {477for d in regs {478collector.reg_early_def(d);479}480}481}482}483Inst::AtomicCas {484offset,485t0,486dst,487e,488addr,489v,490..491} => {492collector.reg_use(offset);493collector.reg_use(e);494collector.reg_use(addr);495collector.reg_use(v);496collector.reg_early_def(t0);497collector.reg_early_def(dst);498}499500Inst::RawData { .. } => {}501Inst::AtomicStore { src, p, .. } => {502collector.reg_use(src);503collector.reg_use(p);504}505Inst::AtomicLoad { rd, p, .. } => {506collector.reg_use(p);507collector.reg_def(rd);508}509Inst::AtomicRmwLoop {510offset,511dst,512p,513x,514t0,515..516} => {517collector.reg_use(offset);518collector.reg_use(p);519collector.reg_use(x);520collector.reg_early_def(t0);521collector.reg_early_def(dst);522}523Inst::TrapIf { rs1, rs2, .. } => {524collector.reg_use(rs1);525collector.reg_use(rs2);526}527Inst::Unwind { .. } => {}528Inst::DummyUse { reg } => {529collector.reg_use(reg);530}531Inst::Popcnt {532sum, step, rs, tmp, ..533} => {534collector.reg_use(rs);535collector.reg_early_def(tmp);536collector.reg_early_def(step);537collector.reg_early_def(sum);538}539Inst::Cltz {540sum, step, tmp, rs, ..541} => {542collector.reg_use(rs);543collector.reg_early_def(tmp);544collector.reg_early_def(step);545collector.reg_early_def(sum);546}547Inst::Brev8 {548rs,549rd,550step,551tmp,552tmp2,553..554} => {555collector.reg_use(rs);556collector.reg_early_def(step);557collector.reg_early_def(tmp);558collector.reg_early_def(tmp2);559collector.reg_early_def(rd);560}561Inst::StackProbeLoop { .. } => {562// StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue.563// t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg)564// gen_prologue is called at emit stage.565// no need let reg alloc know.566}567Inst::VecAluRRRR {568op,569vd,570vd_src,571vs1,572vs2,573mask,574..575} => {576debug_assert_eq!(vd_src.class(), RegClass::Vector);577debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);578debug_assert_eq!(vs2.class(), RegClass::Vector);579debug_assert_eq!(vs1.class(), op.vs1_regclass());580581collector.reg_late_use(vs1);582collector.reg_late_use(vs2);583collector.reg_use(vd_src);584collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`.585vec_mask_late_operands(mask, collector);586}587Inst::VecAluRRRImm5 {588op,589vd,590vd_src,591vs2,592mask,593..594} => {595debug_assert_eq!(vd_src.class(), RegClass::Vector);596debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);597debug_assert_eq!(vs2.class(), RegClass::Vector);598599// If the operation forbids source/destination overlap we need to600// ensure that the source and destination registers are different.601if op.forbids_overlaps(mask) {602collector.reg_late_use(vs2);603collector.reg_use(vd_src);604collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.605vec_mask_late_operands(mask, collector);606} else {607collector.reg_use(vs2);608collector.reg_use(vd_src);609collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.610vec_mask_operands(mask, collector);611}612}613Inst::VecAluRRR {614op,615vd,616vs1,617vs2,618mask,619..620} => {621debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);622debug_assert_eq!(vs2.class(), RegClass::Vector);623debug_assert_eq!(vs1.class(), op.vs1_regclass());624625collector.reg_use(vs1);626collector.reg_use(vs2);627628// If the operation forbids source/destination overlap, then we must629// register it as an early_def. This encodes the constraint that630// these must not overlap.631if op.forbids_overlaps(mask) {632collector.reg_early_def(vd);633} else {634collector.reg_def(vd);635}636637vec_mask_operands(mask, collector);638}639Inst::VecAluRRImm5 {640op, vd, vs2, mask, ..641} => {642debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);643debug_assert_eq!(vs2.class(), RegClass::Vector);644645collector.reg_use(vs2);646647// If the operation forbids source/destination overlap, then we must648// register it as an early_def. This encodes the constraint that649// these must not overlap.650if op.forbids_overlaps(mask) {651collector.reg_early_def(vd);652} else {653collector.reg_def(vd);654}655656vec_mask_operands(mask, collector);657}658Inst::VecAluRR {659op, vd, vs, mask, ..660} => {661debug_assert_eq!(vd.to_reg().class(), op.dst_regclass());662debug_assert_eq!(vs.class(), op.src_regclass());663664collector.reg_use(vs);665666// If the operation forbids source/destination overlap, then we must667// register it as an early_def. This encodes the constraint that668// these must not overlap.669if op.forbids_overlaps(mask) {670collector.reg_early_def(vd);671} else {672collector.reg_def(vd);673}674675vec_mask_operands(mask, collector);676}677Inst::VecAluRImm5 { op, vd, mask, .. } => {678debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);679debug_assert!(!op.forbids_overlaps(mask));680681collector.reg_def(vd);682vec_mask_operands(mask, collector);683}684Inst::VecSetState { rd, .. } => {685collector.reg_def(rd);686}687Inst::VecLoad { to, from, mask, .. } => {688from.get_operands(collector);689collector.reg_def(to);690vec_mask_operands(mask, collector);691}692Inst::VecStore { to, from, mask, .. } => {693to.get_operands(collector);694collector.reg_use(from);695vec_mask_operands(mask, collector);696}697Inst::EmitIsland { .. } => {}698Inst::LabelAddress { dst, .. } => {699collector.reg_def(dst);700}701Inst::SequencePoint { .. } => {}702}703}704705impl MachInst for Inst {706type LabelUse = LabelUse;707type ABIMachineSpec = Riscv64MachineDeps;708709// https://github.com/riscv/riscv-isa-manual/issues/850710// all zero will cause invalid opcode.711const TRAP_OPCODE: &'static [u8] = &[0; 4];712713fn gen_dummy_use(reg: Reg) -> Self {714Inst::DummyUse { reg }715}716717fn canonical_type_for_rc(rc: RegClass) -> Type {718match rc {719regalloc2::RegClass::Int => I64,720regalloc2::RegClass::Float => F64,721regalloc2::RegClass::Vector => I8X16,722}723}724725fn is_safepoint(&self) -> bool {726match self {727Inst::Call { .. } | Inst::CallInd { .. } => true,728_ => false,729}730}731732fn get_operands(&mut self, collector: &mut impl OperandVisitor) {733riscv64_get_operands(self, collector);734}735736fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {737match self {738Inst::Mov { rd, rm, .. } => Some((*rd, *rm)),739_ => None,740}741}742743fn is_included_in_clobbers(&self) -> bool {744match self {745&Inst::Args { .. } => false,746_ => true,747}748}749750fn is_trap(&self) -> bool {751match self {752Self::Udf { .. } => true,753_ => false,754}755}756757fn is_args(&self) -> bool {758match self {759Self::Args { .. } => true,760_ => false,761}762}763764fn call_type(&self) -> CallType {765match self {766Inst::Call { .. } | Inst::CallInd { .. } | Inst::ElfTlsGetAddr { .. } => {767CallType::Regular768}769770Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } => CallType::TailCall,771772_ => CallType::None,773}774}775776fn is_term(&self) -> MachTerminator {777match self {778&Inst::Jal { .. } => MachTerminator::Branch,779&Inst::CondBr { .. } => MachTerminator::Branch,780&Inst::Jalr { .. } => MachTerminator::Branch,781&Inst::Rets { .. } => MachTerminator::Ret,782&Inst::BrTable { .. } => MachTerminator::Branch,783&Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,784&Inst::Call { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,785&Inst::CallInd { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,786_ => MachTerminator::None,787}788}789790fn is_mem_access(&self) -> bool {791panic!("TODO FILL ME OUT")792}793794fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {795let x = Inst::Mov {796rd: to_reg,797rm: from_reg,798ty,799};800x801}802803fn gen_nop(preferred_size: usize) -> Inst {804if preferred_size == 0 {805return Inst::Nop0;806}807// We can't give a NOP (or any insn) < 4 bytes.808assert!(preferred_size >= 4);809Inst::Nop4810}811812fn gen_nop_units() -> Vec<Vec<u8>> {813vec![vec![0x13, 0x00, 0x00, 0x00]]814}815816fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {817match ty {818I8 => Ok((&[RegClass::Int], &[I8])),819I16 => Ok((&[RegClass::Int], &[I16])),820I32 => Ok((&[RegClass::Int], &[I32])),821I64 => Ok((&[RegClass::Int], &[I64])),822F16 => Ok((&[RegClass::Float], &[F16])),823F32 => Ok((&[RegClass::Float], &[F32])),824F64 => Ok((&[RegClass::Float], &[F64])),825// FIXME(#8312): Add support for Q extension826F128 | I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),827_ if ty.is_vector() => {828debug_assert!(ty.bits() <= 512);829830// Here we only need to return a SIMD type with the same size as `ty`.831// We use these types for spills and reloads, so prefer types with lanes <= 31832// since that fits in the immediate field of `vsetivli`.833const SIMD_TYPES: [[Type; 1]; 6] = [834[types::I8X2],835[types::I8X4],836[types::I8X8],837[types::I8X16],838[types::I16X16],839[types::I32X16],840];841let idx = (ty.bytes().ilog2() - 1) as usize;842let ty = &SIMD_TYPES[idx][..];843844Ok((&[RegClass::Vector], ty))845}846_ => Err(CodegenError::Unsupported(format!(847"Unexpected SSA-value type: {ty}"848))),849}850}851852fn gen_jump(target: MachLabel) -> Inst {853Inst::Jal { label: target }854}855856fn worst_case_size() -> CodeOffset {857// Our worst case size is determined by the riscv64_worst_case_instruction_size test85884859}860861fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {862RegClass::Int863}864865fn function_alignment() -> FunctionAlignment {866FunctionAlignment {867minimum: 2,868preferred: 4,869}870}871}872873//=============================================================================874// Pretty-printing of instructions.875pub fn reg_name(reg: Reg) -> String {876match reg.to_real_reg() {877Some(real) => match real.class() {878RegClass::Int => match real.hw_enc() {8790 => "zero".into(),8801 => "ra".into(),8812 => "sp".into(),8823 => "gp".into(),8834 => "tp".into(),8845..=7 => format!("t{}", real.hw_enc() - 5),8858 => "fp".into(),8869 => "s1".into(),88710..=17 => format!("a{}", real.hw_enc() - 10),88818..=27 => format!("s{}", real.hw_enc() - 16),88928..=31 => format!("t{}", real.hw_enc() - 25),890_ => unreachable!(),891},892RegClass::Float => match real.hw_enc() {8930..=7 => format!("ft{}", real.hw_enc() - 0),8948..=9 => format!("fs{}", real.hw_enc() - 8),89510..=17 => format!("fa{}", real.hw_enc() - 10),89618..=27 => format!("fs{}", real.hw_enc() - 16),89728..=31 => format!("ft{}", real.hw_enc() - 20),898_ => unreachable!(),899},900RegClass::Vector => format!("v{}", real.hw_enc()),901},902None => {903format!("{reg:?}")904}905}906}907908fn pretty_print_try_call(info: &TryCallInfo) -> String {909format!(910"; j {:?}; catch [{}]",911info.continuation,912info.pretty_print_dests()913)914}915916impl Inst {917fn print_with_state(&self, _state: &mut EmitState) -> String {918let format_reg = |reg: Reg| -> String { reg_name(reg) };919920let format_vec_amode = |amode: &VecAMode| -> String {921match amode {922VecAMode::UnitStride { base } => base.to_string(),923}924};925926let format_mask = |mask: &VecOpMasking| -> String {927match mask {928VecOpMasking::Enabled { reg } => format!(",{}.t", format_reg(*reg)),929VecOpMasking::Disabled => format!(""),930}931};932933let format_regs = |regs: &[Reg]| -> String {934let mut x = if regs.len() > 1 {935String::from("[")936} else {937String::default()938};939regs.iter().for_each(|i| {940x.push_str(format_reg(*i).as_str());941if *i != *regs.last().unwrap() {942x.push_str(",");943}944});945if regs.len() > 1 {946x.push_str("]");947}948x949};950let format_labels = |labels: &[MachLabel]| -> String {951if labels.len() == 0 {952return String::from("[_]");953}954let mut x = String::from("[");955labels.iter().for_each(|l| {956x.push_str(957format!(958"{:?}{}",959l,960if l != labels.last().unwrap() { "," } else { "" },961)962.as_str(),963);964});965x.push_str("]");966x967};968969fn format_frm(rounding_mode: FRM) -> String {970format!(",{}", rounding_mode.to_static_str())971}972973match self {974&Inst::Nop0 => {975format!("##zero length nop")976}977&Inst::Nop4 => {978format!("##fixed 4-size nop")979}980&Inst::StackProbeLoop {981guard_size,982probe_count,983tmp,984} => {985let tmp = format_reg(tmp.to_reg());986format!(987"inline_stack_probe##guard_size={guard_size} probe_count={probe_count} tmp={tmp}"988)989}990&Inst::AtomicStore { src, ty, p } => {991let src = format_reg(src);992let p = format_reg(p);993format!("atomic_store.{ty} {src},({p})")994}995&Inst::DummyUse { reg } => {996let reg = format_reg(reg);997format!("dummy_use {reg}")998}9991000&Inst::AtomicLoad { rd, ty, p } => {1001let p = format_reg(p);1002let rd = format_reg(rd.to_reg());1003format!("atomic_load.{ty} {rd},({p})")1004}1005&Inst::AtomicRmwLoop {1006offset,1007op,1008dst,1009ty,1010p,1011x,1012t0,1013} => {1014let offset = format_reg(offset);1015let p = format_reg(p);1016let x = format_reg(x);1017let t0 = format_reg(t0.to_reg());1018let dst = format_reg(dst.to_reg());1019format!("atomic_rmw.{ty} {op} {dst},{x},({p})##t0={t0} offset={offset}")1020}10211022&Inst::RawData { ref data } => match data.len() {10234 => {1024let mut bytes = [0; 4];1025for i in 0..bytes.len() {1026bytes[i] = data[i];1027}1028format!(".4byte 0x{:x}", u32::from_le_bytes(bytes))1029}10308 => {1031let mut bytes = [0; 8];1032for i in 0..bytes.len() {1033bytes[i] = data[i];1034}1035format!(".8byte 0x{:x}", u64::from_le_bytes(bytes))1036}1037_ => {1038format!(".data {data:?}")1039}1040},1041&Inst::Unwind { ref inst } => {1042format!("unwind {inst:?}")1043}1044&Inst::Brev8 {1045rs,1046ty,1047step,1048tmp,1049tmp2,1050rd,1051} => {1052let rs = format_reg(rs);1053let step = format_reg(step.to_reg());1054let tmp = format_reg(tmp.to_reg());1055let tmp2 = format_reg(tmp2.to_reg());1056let rd = format_reg(rd.to_reg());1057format!("brev8 {rd},{rs}##tmp={tmp} tmp2={tmp2} step={step} ty={ty}")1058}1059&Inst::Popcnt {1060sum,1061step,1062rs,1063tmp,1064ty,1065} => {1066let rs = format_reg(rs);1067let tmp = format_reg(tmp.to_reg());1068let step = format_reg(step.to_reg());1069let sum = format_reg(sum.to_reg());1070format!("popcnt {sum},{rs}##ty={ty} tmp={tmp} step={step}")1071}1072&Inst::Cltz {1073sum,1074step,1075rs,1076tmp,1077ty,1078leading,1079} => {1080let rs = format_reg(rs);1081let tmp = format_reg(tmp.to_reg());1082let step = format_reg(step.to_reg());1083let sum = format_reg(sum.to_reg());1084format!(1085"{} {},{}##ty={} tmp={} step={}",1086if leading { "clz" } else { "ctz" },1087sum,1088rs,1089ty,1090tmp,1091step1092)1093}1094&Inst::AtomicCas {1095offset,1096t0,1097dst,1098e,1099addr,1100v,1101ty,1102} => {1103let offset = format_reg(offset);1104let e = format_reg(e);1105let addr = format_reg(addr);1106let v = format_reg(v);1107let t0 = format_reg(t0.to_reg());1108let dst = format_reg(dst.to_reg());1109format!("atomic_cas.{ty} {dst},{e},{v},({addr})##t0={t0} offset={offset}",)1110}1111&Inst::BrTable {1112index,1113tmp1,1114tmp2,1115ref targets,1116} => {1117format!(1118"{} {},{}##tmp1={},tmp2={}",1119"br_table",1120format_reg(index),1121format_labels(&targets[..]),1122format_reg(tmp1.to_reg()),1123format_reg(tmp2.to_reg()),1124)1125}1126&Inst::Auipc { rd, imm } => {1127format!("{} {},{}", "auipc", format_reg(rd.to_reg()), imm.as_i32(),)1128}1129&Inst::Jalr { rd, base, offset } => {1130let base = format_reg(base);1131let rd = format_reg(rd.to_reg());1132format!("{} {},{}({})", "jalr", rd, offset.as_i16(), base)1133}1134&Inst::Lui { rd, ref imm } => {1135format!("{} {},{}", "lui", format_reg(rd.to_reg()), imm.as_i32())1136}1137&Inst::Fli { rd, width, imm } => {1138let rd_s = format_reg(rd.to_reg());1139let imm_s = imm.format();1140format!("fli.{width} {rd_s},{imm_s}")1141}1142&Inst::LoadInlineConst { rd, imm, .. } => {1143let rd = format_reg(rd.to_reg());1144let mut buf = String::new();1145write!(&mut buf, "auipc {rd},0; ").unwrap();1146write!(&mut buf, "ld {rd},12({rd}); ").unwrap();1147write!(&mut buf, "j {}; ", Inst::UNCOMPRESSED_INSTRUCTION_SIZE + 8).unwrap();1148write!(&mut buf, ".8byte 0x{imm:x}").unwrap();1149buf1150}1151&Inst::AluRRR {1152alu_op,1153rd,1154rs1,1155rs2,1156} => {1157let rs1_s = format_reg(rs1);1158let rs2_s = format_reg(rs2);1159let rd_s = format_reg(rd.to_reg());1160match alu_op {1161AluOPRRR::Adduw if rs2 == zero_reg() => {1162format!("zext.w {rd_s},{rs1_s}")1163}1164_ => {1165format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s)1166}1167}1168}1169&Inst::FpuRR {1170alu_op,1171width,1172frm,1173rd,1174rs,1175} => {1176let rs = format_reg(rs);1177let rd = format_reg(rd.to_reg());1178let frm = if alu_op.has_frm() {1179format_frm(frm)1180} else {1181String::new()1182};1183format!("{} {rd},{rs}{frm}", alu_op.op_name(width))1184}1185&Inst::FpuRRR {1186alu_op,1187width,1188rd,1189rs1,1190rs2,1191frm,1192} => {1193let rs1 = format_reg(rs1);1194let rs2 = format_reg(rs2);1195let rd = format_reg(rd.to_reg());1196let frm = if alu_op.has_frm() {1197format_frm(frm)1198} else {1199String::new()1200};12011202let rs1_is_rs2 = rs1 == rs2;1203match alu_op {1204FpuOPRRR::Fsgnj if rs1_is_rs2 => format!("fmv.{width} {rd},{rs1}"),1205FpuOPRRR::Fsgnjn if rs1_is_rs2 => format!("fneg.{width} {rd},{rs1}"),1206FpuOPRRR::Fsgnjx if rs1_is_rs2 => format!("fabs.{width} {rd},{rs1}"),1207_ => format!("{} {rd},{rs1},{rs2}{frm}", alu_op.op_name(width)),1208}1209}1210&Inst::FpuRRRR {1211alu_op,1212rd,1213rs1,1214rs2,1215rs3,1216frm,1217width,1218} => {1219let rs1 = format_reg(rs1);1220let rs2 = format_reg(rs2);1221let rs3 = format_reg(rs3);1222let rd = format_reg(rd.to_reg());1223let frm = format_frm(frm);1224let op_name = alu_op.op_name(width);1225format!("{op_name} {rd},{rs1},{rs2},{rs3}{frm}")1226}1227&Inst::AluRRImm12 {1228alu_op,1229rd,1230rs,1231ref imm12,1232} => {1233let rs_s = format_reg(rs);1234let rd = format_reg(rd.to_reg());12351236// Some of these special cases are better known as1237// their pseudo-instruction version, so prefer printing those.1238match (alu_op, rs, imm12) {1239(AluOPRRI::Addi, rs, _) if rs == zero_reg() => {1240return format!("li {},{}", rd, imm12.as_i16());1241}1242(AluOPRRI::Addiw, _, imm12) if imm12.as_i16() == 0 => {1243return format!("sext.w {rd},{rs_s}");1244}1245(AluOPRRI::Xori, _, imm12) if imm12.as_i16() == -1 => {1246return format!("not {rd},{rs_s}");1247}1248(AluOPRRI::SltiU, _, imm12) if imm12.as_i16() == 1 => {1249return format!("seqz {rd},{rs_s}");1250}1251(alu_op, _, _) if alu_op.option_funct12().is_some() => {1252format!("{} {},{}", alu_op.op_name(), rd, rs_s)1253}1254(alu_op, _, imm12) => {1255format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16())1256}1257}1258}1259&Inst::CsrReg { op, rd, rs, csr } => {1260let rs_s = format_reg(rs);1261let rd_s = format_reg(rd.to_reg());12621263match (op, csr, rd) {1264(CsrRegOP::CsrRW, CSR::Frm, rd) if rd.to_reg() == zero_reg() => {1265format!("fsrm {rs_s}")1266}1267_ => {1268format!("{op} {rd_s},{csr},{rs_s}")1269}1270}1271}1272&Inst::CsrImm { op, rd, csr, imm } => {1273let rd_s = format_reg(rd.to_reg());12741275match (op, csr, rd) {1276(CsrImmOP::CsrRWI, CSR::Frm, rd) if rd.to_reg() != zero_reg() => {1277format!("fsrmi {rd_s},{imm}")1278}1279_ => {1280format!("{op} {rd_s},{csr},{imm}")1281}1282}1283}1284&Inst::Load {1285rd,1286op,1287from,1288flags: _flags,1289} => {1290let base = from.to_string();1291let rd = format_reg(rd.to_reg());1292format!("{} {},{}", op.op_name(), rd, base,)1293}1294&Inst::Store {1295to,1296src,1297op,1298flags: _flags,1299} => {1300let base = to.to_string();1301let src = format_reg(src);1302format!("{} {},{}", op.op_name(), src, base,)1303}1304&Inst::Args { ref args } => {1305let mut s = "args".to_string();1306for arg in args {1307let preg = format_reg(arg.preg);1308let def = format_reg(arg.vreg.to_reg());1309write!(&mut s, " {def}={preg}").unwrap();1310}1311s1312}1313&Inst::Rets { ref rets } => {1314let mut s = "rets".to_string();1315for ret in rets {1316let preg = format_reg(ret.preg);1317let vreg = format_reg(ret.vreg);1318write!(&mut s, " {vreg}={preg}").unwrap();1319}1320s1321}1322&Inst::Ret {} => "ret".to_string(),13231324&MInst::Extend {1325rd,1326rn,1327signed,1328from_bits,1329..1330} => {1331let rn = format_reg(rn);1332let rd = format_reg(rd.to_reg());1333return if signed == false && from_bits == 8 {1334format!("andi {rd},{rn}")1335} else {1336let op = if signed { "srai" } else { "srli" };1337let shift_bits = (64 - from_bits) as i16;1338format!("slli {rd},{rn},{shift_bits}; {op} {rd},{rd},{shift_bits}")1339};1340}1341&MInst::Call { ref info } => {1342let try_call = info1343.try_call_info1344.as_ref()1345.map(|tci| pretty_print_try_call(tci))1346.unwrap_or_default();1347format!("call {}{try_call}", info.dest.display(None))1348}1349&MInst::CallInd { ref info } => {1350let rd = format_reg(info.dest);1351let try_call = info1352.try_call_info1353.as_ref()1354.map(|tci| pretty_print_try_call(tci))1355.unwrap_or_default();1356format!("callind {rd}{try_call}")1357}1358&MInst::ReturnCall { ref info } => {1359let mut s = format!(1360"return_call {:?} new_stack_arg_size:{}",1361info.dest, info.new_stack_arg_size1362);1363for ret in &info.uses {1364let preg = format_reg(ret.preg);1365let vreg = format_reg(ret.vreg);1366write!(&mut s, " {vreg}={preg}").unwrap();1367}1368s1369}1370&MInst::ReturnCallInd { ref info } => {1371let callee = format_reg(info.dest);1372let mut s = format!(1373"return_call_ind {callee} new_stack_arg_size:{}",1374info.new_stack_arg_size1375);1376for ret in &info.uses {1377let preg = format_reg(ret.preg);1378let vreg = format_reg(ret.vreg);1379write!(&mut s, " {vreg}={preg}").unwrap();1380}1381s1382}1383&MInst::TrapIf {1384rs1,1385rs2,1386cc,1387trap_code,1388} => {1389let rs1 = format_reg(rs1);1390let rs2 = format_reg(rs2);1391format!("trap_if {trap_code}##({rs1} {cc} {rs2})")1392}1393&MInst::Jal { label } => {1394format!("j {}", label.to_string())1395}1396&MInst::CondBr {1397taken,1398not_taken,1399kind,1400..1401} => {1402let rs1 = format_reg(kind.rs1);1403let rs2 = format_reg(kind.rs2);1404if not_taken.is_fallthrouh() && taken.as_label().is_none() {1405format!("{} {},{},0", kind.op_name(), rs1, rs2)1406} else {1407let x = format!(1408"{} {},{},taken({}),not_taken({})",1409kind.op_name(),1410rs1,1411rs2,1412taken,1413not_taken1414);1415x1416}1417}1418&MInst::Atomic {1419op,1420rd,1421addr,1422src,1423amo,1424} => {1425let op_name = op.op_name(amo);1426let addr = format_reg(addr);1427let src = format_reg(src);1428let rd = format_reg(rd.to_reg());1429if op.is_load() {1430format!("{op_name} {rd},({addr})")1431} else {1432format!("{op_name} {rd},{src},({addr})")1433}1434}1435&MInst::LoadExtNameGot { rd, ref name } => {1436let rd = format_reg(rd.to_reg());1437format!("load_ext_name_got {rd},{}", name.display(None))1438}1439&MInst::LoadExtNameNear {1440rd,1441ref name,1442offset,1443} => {1444let rd = format_reg(rd.to_reg());1445format!("load_ext_name_near {rd},{}{offset:+}", name.display(None))1446}1447&MInst::LoadExtNameFar {1448rd,1449ref name,1450offset,1451} => {1452let rd = format_reg(rd.to_reg());1453format!("load_ext_name_far {rd},{}{offset:+}", name.display(None))1454}1455&Inst::ElfTlsGetAddr { rd, ref name } => {1456let rd = format_reg(rd.to_reg());1457format!("elf_tls_get_addr {rd},{}", name.display(None))1458}1459&MInst::LoadAddr { ref rd, ref mem } => {1460let rs = mem.to_string();1461let rd = format_reg(rd.to_reg());1462format!("load_addr {rd},{rs}")1463}1464&MInst::Mov { rd, rm, ty } => {1465let rm = format_reg(rm);1466let rd = format_reg(rd.to_reg());14671468let op = match ty {1469F16 => "fmv.h",1470F32 => "fmv.s",1471F64 => "fmv.d",1472ty if ty.is_vector() => "vmv1r.v",1473_ => "mv",1474};14751476format!("{op} {rd},{rm}")1477}1478&MInst::MovFromPReg { rd, rm } => {1479let rd = format_reg(rd.to_reg());1480debug_assert!([px_reg(2), px_reg(8)].contains(&rm));1481let rm = reg_name(Reg::from(rm));1482format!("mv {rd},{rm}")1483}1484&MInst::Fence { pred, succ } => {1485format!(1486"fence {},{}",1487Inst::fence_req_to_string(pred),1488Inst::fence_req_to_string(succ),1489)1490}1491&MInst::Select {1492ref dst,1493condition,1494ref x,1495ref y,1496} => {1497let c_rs1 = format_reg(condition.rs1);1498let c_rs2 = format_reg(condition.rs2);1499let x = format_regs(x.regs());1500let y = format_regs(y.regs());1501let dst = dst.map(|r| r.to_reg());1502let dst = format_regs(dst.regs());1503format!(1504"select {},{},{}##condition=({} {} {})",1505dst,1506x,1507y,1508c_rs1,1509condition.kind.to_static_str(),1510c_rs21511)1512}1513&MInst::Udf { trap_code } => format!("udf##trap_code={trap_code}"),1514&MInst::EBreak {} => String::from("ebreak"),1515&Inst::VecAluRRRR {1516op,1517vd,1518vd_src,1519vs1,1520vs2,1521ref mask,1522ref vstate,1523} => {1524let vs1_s = format_reg(vs1);1525let vs2_s = format_reg(vs2);1526let vd_src_s = format_reg(vd_src);1527let vd_s = format_reg(vd.to_reg());1528let mask = format_mask(mask);15291530let vd_fmt = if vd_s != vd_src_s {1531format!("{vd_s},{vd_src_s}")1532} else {1533vd_s1534};15351536// Note: vs2 and vs1 here are opposite to the standard scalar ordering.1537// This is noted in Section 10.1 of the RISC-V Vector spec.1538format!("{op} {vd_fmt},{vs2_s},{vs1_s}{mask} {vstate}")1539}1540&Inst::VecAluRRRImm5 {1541op,1542vd,1543imm,1544vs2,1545ref mask,1546ref vstate,1547..1548} => {1549let vs2_s = format_reg(vs2);1550let vd_s = format_reg(vd.to_reg());1551let mask = format_mask(mask);15521553// Some opcodes interpret the immediate as unsigned, lets show the1554// correct number here.1555let imm_s = if op.imm_is_unsigned() {1556format!("{}", imm.bits())1557} else {1558format!("{imm}")1559};15601561format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}")1562}1563&Inst::VecAluRRR {1564op,1565vd,1566vs1,1567vs2,1568ref mask,1569ref vstate,1570} => {1571let vs1_s = format_reg(vs1);1572let vs2_s = format_reg(vs2);1573let vd_s = format_reg(vd.to_reg());1574let mask = format_mask(mask);15751576// Note: vs2 and vs1 here are opposite to the standard scalar ordering.1577// This is noted in Section 10.1 of the RISC-V Vector spec.1578match (op, vs2, vs1) {1579(VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => {1580format!("vneg.v {vd_s},{vs2_s}{mask} {vstate}")1581}1582(VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {1583format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}")1584}1585(VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => {1586format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}")1587}1588(VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => {1589format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}")1590}1591_ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"),1592}1593}1594&Inst::VecAluRRImm5 {1595op,1596vd,1597imm,1598vs2,1599ref mask,1600ref vstate,1601} => {1602let vs2_s = format_reg(vs2);1603let vd_s = format_reg(vd.to_reg());1604let mask = format_mask(mask);16051606// Some opcodes interpret the immediate as unsigned, lets show the1607// correct number here.1608let imm_s = if op.imm_is_unsigned() {1609format!("{}", imm.bits())1610} else {1611format!("{imm}")1612};16131614match (op, imm) {1615(VecAluOpRRImm5::VxorVI, imm) if imm == Imm5::maybe_from_i8(-1).unwrap() => {1616format!("vnot.v {vd_s},{vs2_s}{mask} {vstate}")1617}1618_ => format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}"),1619}1620}1621&Inst::VecAluRR {1622op,1623vd,1624vs,1625ref mask,1626ref vstate,1627} => {1628let vs_s = format_reg(vs);1629let vd_s = format_reg(vd.to_reg());1630let mask = format_mask(mask);16311632format!("{op} {vd_s},{vs_s}{mask} {vstate}")1633}1634&Inst::VecAluRImm5 {1635op,1636vd,1637imm,1638ref mask,1639ref vstate,1640} => {1641let vd_s = format_reg(vd.to_reg());1642let mask = format_mask(mask);16431644format!("{op} {vd_s},{imm}{mask} {vstate}")1645}1646&Inst::VecSetState { rd, ref vstate } => {1647let rd_s = format_reg(rd.to_reg());1648assert!(vstate.avl.is_static());1649format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype)1650}1651Inst::VecLoad {1652eew,1653to,1654from,1655mask,1656vstate,1657..1658} => {1659let base = format_vec_amode(from);1660let vd = format_reg(to.to_reg());1661let mask = format_mask(mask);16621663format!("vl{eew}.v {vd},{base}{mask} {vstate}")1664}1665Inst::VecStore {1666eew,1667to,1668from,1669mask,1670vstate,1671..1672} => {1673let dst = format_vec_amode(to);1674let vs3 = format_reg(*from);1675let mask = format_mask(mask);16761677format!("vs{eew}.v {vs3},{dst}{mask} {vstate}")1678}1679Inst::EmitIsland { needed_space } => {1680format!("emit_island {needed_space}")1681}16821683Inst::LabelAddress { dst, label } => {1684let dst = format_reg(dst.to_reg());1685format!("label_address {dst}, {label:?}")1686}16871688Inst::SequencePoint {} => {1689format!("sequence_point")1690}1691}1692}1693}16941695/// Different forms of label references for different instruction formats.1696#[derive(Clone, Copy, Debug, PartialEq, Eq)]1697pub enum LabelUse {1698/// 20-bit branch offset (unconditional branches). PC-rel, offset is1699/// imm << 1. Immediate is 20 signed bits. Use in Jal instructions.1700Jal20,17011702/// The unconditional jump instructions all use PC-relative1703/// addressing to help support position independent code. The JALR1704/// instruction was defined to enable a two-instruction sequence to1705/// jump anywhere in a 32-bit absolute address range. A LUI1706/// instruction can first load rs1 with the upper 20 bits of a1707/// target address, then JALR can add in the lower bits. Similarly,1708/// AUIPC then JALR can jump anywhere in a 32-bit pc-relative1709/// address range.1710PCRel32,17111712/// All branch instructions use the B-type instruction format. The1713/// 12-bit B-immediate encodes signed offsets in multiples of 2, and1714/// is added to the current pc to give the target address. The1715/// conditional branch range is ±4 KiB.1716B12,17171718/// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting1719/// the immediate field of an `auipc` instruction.1720PCRelHi20,17211722/// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to1723/// the final address, instead of the `PCREL_HI20` label. Allows setting1724/// the immediate field of I Type instructions such as `addi` or `lw`.1725///1726/// Since we currently don't support offsets in labels, this relocation has1727/// an implicit offset of 4.1728PCRelLo12I,17291730/// 11-bit PC-relative jump offset. Equivalent to the `RVC_JUMP` relocation1731RVCJump,1732}17331734impl MachInstLabelUse for LabelUse {1735/// Alignment for veneer code. Every Riscv64 instruction must be1736/// 4-byte-aligned.1737const ALIGN: CodeOffset = 4;17381739/// Maximum PC-relative range (positive), inclusive.1740fn max_pos_range(self) -> CodeOffset {1741match self {1742LabelUse::Jal20 => ((1 << 19) - 1) * 2,1743LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => {1744Inst::imm_max() as CodeOffset1745}1746LabelUse::B12 => ((1 << 11) - 1) * 2,1747LabelUse::RVCJump => ((1 << 10) - 1) * 2,1748}1749}17501751/// Maximum PC-relative range (negative).1752fn max_neg_range(self) -> CodeOffset {1753match self {1754LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset,1755_ => self.max_pos_range() + 2,1756}1757}17581759/// Size of window into code needed to do the patch.1760fn patch_size(self) -> CodeOffset {1761match self {1762LabelUse::RVCJump => 2,1763LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4,1764LabelUse::PCRel32 => 8,1765}1766}17671768/// Perform the patch.1769fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {1770assert!(use_offset % 2 == 0);1771assert!(label_offset % 2 == 0);1772let offset = (label_offset as i64) - (use_offset as i64);17731774// re-check range1775assert!(1776offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64),1777"{self:?} offset '{offset}' use_offset:'{use_offset}' label_offset:'{label_offset}' must not exceed max range.",1778);1779self.patch_raw_offset(buffer, offset);1780}17811782/// Is a veneer supported for this label reference type?1783fn supports_veneer(self) -> bool {1784match self {1785Self::Jal20 | Self::B12 | Self::RVCJump => true,1786_ => false,1787}1788}17891790/// How large is the veneer, if supported?1791fn veneer_size(self) -> CodeOffset {1792match self {1793Self::B12 | Self::Jal20 | Self::RVCJump => 8,1794_ => unreachable!(),1795}1796}17971798fn worst_case_veneer_size() -> CodeOffset {179981800}18011802/// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return1803/// an offset and label-use for the veneer's use of the original label.1804fn generate_veneer(1805self,1806buffer: &mut [u8],1807veneer_offset: CodeOffset,1808) -> (CodeOffset, LabelUse) {1809let base = writable_spilltmp_reg();1810{1811let x = enc_auipc(base, Imm20::ZERO).to_le_bytes();1812buffer[0] = x[0];1813buffer[1] = x[1];1814buffer[2] = x[2];1815buffer[3] = x[3];1816}1817{1818let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::ZERO).to_le_bytes();1819buffer[4] = x[0];1820buffer[5] = x[1];1821buffer[6] = x[2];1822buffer[7] = x[3];1823}1824(veneer_offset, Self::PCRel32)1825}18261827fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {1828match (reloc, addend) {1829(Reloc::RiscvCallPlt, _) => Some(Self::PCRel32),1830_ => None,1831}1832}1833}18341835impl LabelUse {1836#[expect(dead_code, reason = "in case it's needed in the future")]1837fn offset_in_range(self, offset: i64) -> bool {1838let min = -(self.max_neg_range() as i64);1839let max = self.max_pos_range() as i64;1840offset >= min && offset <= max1841}18421843fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) {1844let insn = match self {1845LabelUse::RVCJump => u16::from_le_bytes(buffer[..2].try_into().unwrap()) as u32,1846_ => u32::from_le_bytes(buffer[..4].try_into().unwrap()),1847};18481849match self {1850LabelUse::Jal20 => {1851let offset = offset as u32;1852let v = ((offset >> 12 & 0b1111_1111) << 12)1853| ((offset >> 11 & 0b1) << 20)1854| ((offset >> 1 & 0b11_1111_1111) << 21)1855| ((offset >> 20 & 0b1) << 31);1856buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));1857}1858LabelUse::PCRel32 => {1859let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]);1860Inst::generate_imm(offset as u64)1861.map(|(imm20, imm12)| {1862// Encode the OR-ed-in value with zero_reg(). The1863// register parameter must be in the original1864// encoded instruction and or'ing in zeroes does not1865// change it.1866buffer[0..4].clone_from_slice(&u32::to_le_bytes(1867insn | enc_auipc(writable_zero_reg(), imm20),1868));1869buffer[4..8].clone_from_slice(&u32::to_le_bytes(1870insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12),1871));1872})1873// expect make sure we handled.1874.expect("we have check the range before,this is a compiler error.");1875}18761877LabelUse::B12 => {1878let offset = offset as u32;1879let v = ((offset >> 11 & 0b1) << 7)1880| ((offset >> 1 & 0b1111) << 8)1881| ((offset >> 5 & 0b11_1111) << 25)1882| ((offset >> 12 & 0b1) << 31);1883buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));1884}18851886LabelUse::PCRelHi20 => {1887// See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses1888//1889// We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the1890// Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an1891// offset of 2048, we need to land at the next page and subtract instead.1892let offset = offset as u32;1893let hi20 = offset.wrapping_add(0x800) >> 12;1894let insn = (insn & 0xFFF) | (hi20 << 12);1895buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));1896}18971898LabelUse::PCRelLo12I => {1899// `offset` is the offset from the current instruction to the target address.1900//1901// However we are trying to compute the offset to the target address from the previous instruction.1902// The previous instruction should be the one that contains the PCRelHi20 relocation and1903// stores/references the program counter (`auipc` usually).1904//1905// Since we are trying to compute the offset from the previous instruction, we can1906// represent it as offset = target_address - (current_instruction_address - 4)1907// which is equivalent to offset = target_address - current_instruction_address + 4.1908//1909// Thus we need to add 4 to the offset here.1910let lo12 = (offset + 4) as u32 & 0xFFF;1911let insn = (insn & 0xFFFFF) | (lo12 << 20);1912buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));1913}1914LabelUse::RVCJump => {1915debug_assert!(offset & 1 == 0);19161917// We currently only support this for the C.J operation, so assert that is the opcode in1918// the buffer.1919debug_assert_eq!(insn & 0xFFFF, 0xA001);19201921buffer[0..2].clone_from_slice(&u16::to_le_bytes(encode_cj_type(1922CjOp::CJ,1923Imm12::from_i16(i16::try_from(offset).unwrap()),1924)));1925}1926}1927}1928}19291930#[cfg(test)]1931mod test {1932use super::*;1933#[test]1934fn label_use_max_range() {1935assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2);1936assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2);1937assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset));1938assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset));1939assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2);1940}1941}194219431944