Path: blob/main/cranelift/codegen/src/isa/riscv64/inst/mod.rs
1693 views
//! This module defines riscv64-specific machine instruction types.12use super::lower::isle::generated_code::{VecAMode, VecElementWidth, VecOpMasking};3use crate::binemit::{Addend, CodeOffset, Reloc};4pub use crate::ir::condcodes::IntCC;5use crate::ir::types::{self, F16, F32, F64, F128, I8, I8X16, I16, I32, I64, I128};67pub use crate::ir::{ExternalName, MemFlags, Type};8use crate::isa::{CallConv, FunctionAlignment};9use crate::machinst::*;10use crate::{CodegenError, CodegenResult, settings};1112pub use crate::ir::condcodes::FloatCC;1314use alloc::vec::Vec;15use regalloc2::RegClass;16use smallvec::{SmallVec, smallvec};17use std::boxed::Box;18use std::fmt::Write;19use std::string::{String, ToString};2021pub mod regs;22pub use self::regs::*;23pub mod imms;24pub use self::imms::*;25pub mod args;26pub use self::args::*;27pub mod emit;28pub use self::emit::*;29pub mod vector;30pub use self::vector::*;31pub mod encode;32pub use self::encode::*;33pub mod unwind;3435use crate::isa::riscv64::abi::Riscv64MachineDeps;3637#[cfg(test)]38mod emit_tests;3940use std::fmt::{Display, Formatter};4142pub(crate) type VecU8 = Vec<u8>;4344//=============================================================================45// Instructions (top level): definition4647pub use crate::isa::riscv64::lower::isle::generated_code::{48AluOPRRI, AluOPRRR, AtomicOP, CSR, CsrImmOP, CsrRegOP, FClassResult, FFlagsException, FRM,49FpuOPRR, FpuOPRRR, FpuOPRRRR, LoadOP, MInst as Inst, StoreOP,50};51use crate::isa::riscv64::lower::isle::generated_code::{CjOp, MInst, VecAluOpRRImm5, VecAluOpRRR};5253/// Additional information for `return_call[_ind]` instructions, left out of54/// line to lower the size of the `Inst` enum.55#[derive(Clone, Debug)]56pub struct ReturnCallInfo<T> {57pub dest: T,58pub uses: CallArgList,59pub new_stack_arg_size: u32,60}6162/// A conditional branch target.63#[derive(Clone, Copy, Debug, PartialEq, Eq)]64pub enum CondBrTarget {65/// An unresolved reference to a Label, as passed into66/// `lower_branch_group()`.67Label(MachLabel),68/// No jump; fall through to the next instruction.69Fallthrough,70}7172impl CondBrTarget {73/// Return the target's label, if it is a label-based target.74pub(crate) fn as_label(self) -> Option<MachLabel> {75match self {76CondBrTarget::Label(l) => Some(l),77_ => None,78}79}8081pub(crate) fn is_fallthrouh(&self) -> bool {82self == &CondBrTarget::Fallthrough83}84}8586impl Display for CondBrTarget {87fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {88match self {89CondBrTarget::Label(l) => write!(f, "{}", l.to_string()),90CondBrTarget::Fallthrough => write!(f, "0"),91}92}93}9495pub(crate) fn enc_auipc(rd: Writable<Reg>, imm: Imm20) -> u32 {96let x = 0b0010111 | reg_to_gpr_num(rd.to_reg()) << 7 | imm.bits() << 12;97x98}99100pub(crate) fn enc_jalr(rd: Writable<Reg>, base: Reg, offset: Imm12) -> u32 {101let x = 0b1100111102| reg_to_gpr_num(rd.to_reg()) << 7103| 0b000 << 12104| reg_to_gpr_num(base) << 15105| offset.bits() << 20;106x107}108109/// rd and src must have the same length.110pub(crate) fn gen_moves(rd: &[Writable<Reg>], src: &[Reg]) -> SmallInstVec<Inst> {111assert!(rd.len() == src.len());112assert!(rd.len() > 0);113let mut insts = SmallInstVec::new();114for (dst, src) in rd.iter().zip(src.iter()) {115let ty = Inst::canonical_type_for_rc(dst.to_reg().class());116insts.push(Inst::gen_move(*dst, *src, ty));117}118insts119}120121impl Inst {122/// RISC-V can have multiple instruction sizes. 2 bytes for compressed123/// instructions, 4 for regular instructions, 6 and 8 byte instructions124/// are also being considered.125const UNCOMPRESSED_INSTRUCTION_SIZE: i32 = 4;126127#[inline]128pub(crate) fn load_imm12(rd: Writable<Reg>, imm: Imm12) -> Inst {129Inst::AluRRImm12 {130alu_op: AluOPRRI::Addi,131rd,132rs: zero_reg(),133imm12: imm,134}135}136137/// Immediates can be loaded using lui and addi instructions.138fn load_const_imm(rd: Writable<Reg>, value: u64) -> Option<SmallInstVec<Inst>> {139Inst::generate_imm(value).map(|(imm20, imm12)| {140let mut insts = SmallVec::new();141142let imm20_is_zero = imm20.as_i32() == 0;143let imm12_is_zero = imm12.as_i16() == 0;144145let rs = if !imm20_is_zero {146insts.push(Inst::Lui { rd, imm: imm20 });147rd.to_reg()148} else {149zero_reg()150};151152// We also need to emit the addi if the value is 0, otherwise we just153// won't produce any instructions.154if !imm12_is_zero || (imm20_is_zero && imm12_is_zero) {155insts.push(Inst::AluRRImm12 {156alu_op: AluOPRRI::Addi,157rd,158rs,159imm12,160})161}162163insts164})165}166167pub(crate) fn load_constant_u32(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {168let insts = Inst::load_const_imm(rd, value);169insts.unwrap_or_else(|| {170smallvec![Inst::LoadInlineConst {171rd,172ty: I32,173imm: value174}]175})176}177178pub fn load_constant_u64(rd: Writable<Reg>, value: u64) -> SmallInstVec<Inst> {179let insts = Inst::load_const_imm(rd, value);180insts.unwrap_or_else(|| {181smallvec![Inst::LoadInlineConst {182rd,183ty: I64,184imm: value185}]186})187}188189pub(crate) fn construct_auipc_and_jalr(190link: Option<Writable<Reg>>,191tmp: Writable<Reg>,192offset: i64,193) -> [Inst; 2] {194Inst::generate_imm(offset as u64)195.map(|(imm20, imm12)| {196let a = Inst::Auipc {197rd: tmp,198imm: imm20,199};200let b = Inst::Jalr {201rd: link.unwrap_or(writable_zero_reg()),202base: tmp.to_reg(),203offset: imm12,204};205[a, b]206})207.expect("code range is too big.")208}209210/// Generic constructor for a load (zero-extending where appropriate).211pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {212if ty.is_vector() {213Inst::VecLoad {214eew: VecElementWidth::from_type(ty),215to: into_reg,216from: VecAMode::UnitStride { base: mem },217flags,218mask: VecOpMasking::Disabled,219vstate: VState::from_type(ty),220}221} else {222Inst::Load {223rd: into_reg,224op: LoadOP::from_type(ty),225from: mem,226flags,227}228}229}230231/// Generic constructor for a store.232pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {233if ty.is_vector() {234Inst::VecStore {235eew: VecElementWidth::from_type(ty),236to: VecAMode::UnitStride { base: mem },237from: from_reg,238flags,239mask: VecOpMasking::Disabled,240vstate: VState::from_type(ty),241}242} else {243Inst::Store {244src: from_reg,245op: StoreOP::from_type(ty),246to: mem,247flags,248}249}250}251}252253//=============================================================================254255fn vec_mask_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) {256match mask {257VecOpMasking::Enabled { reg } => {258collector.reg_fixed_use(reg, pv_reg(0).into());259}260VecOpMasking::Disabled => {}261}262}263fn vec_mask_late_operands(mask: &mut VecOpMasking, collector: &mut impl OperandVisitor) {264match mask {265VecOpMasking::Enabled { reg } => {266collector.reg_fixed_late_use(reg, pv_reg(0).into());267}268VecOpMasking::Disabled => {}269}270}271272fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {273match inst {274Inst::Nop0 | Inst::Nop4 => {}275Inst::BrTable {276index, tmp1, tmp2, ..277} => {278collector.reg_use(index);279collector.reg_early_def(tmp1);280collector.reg_early_def(tmp2);281}282Inst::Auipc { rd, .. } => collector.reg_def(rd),283Inst::Lui { rd, .. } => collector.reg_def(rd),284Inst::Fli { rd, .. } => collector.reg_def(rd),285Inst::LoadInlineConst { rd, .. } => collector.reg_def(rd),286Inst::AluRRR { rd, rs1, rs2, .. } => {287collector.reg_use(rs1);288collector.reg_use(rs2);289collector.reg_def(rd);290}291Inst::FpuRRR { rd, rs1, rs2, .. } => {292collector.reg_use(rs1);293collector.reg_use(rs2);294collector.reg_def(rd);295}296Inst::AluRRImm12 { rd, rs, .. } => {297collector.reg_use(rs);298collector.reg_def(rd);299}300Inst::CsrReg { rd, rs, .. } => {301collector.reg_use(rs);302collector.reg_def(rd);303}304Inst::CsrImm { rd, .. } => {305collector.reg_def(rd);306}307Inst::Load { rd, from, .. } => {308from.get_operands(collector);309collector.reg_def(rd);310}311Inst::Store { to, src, .. } => {312to.get_operands(collector);313collector.reg_use(src);314}315316Inst::Args { args } => {317for ArgPair { vreg, preg } in args {318collector.reg_fixed_def(vreg, *preg);319}320}321Inst::Rets { rets } => {322for RetPair { vreg, preg } in rets {323collector.reg_fixed_use(vreg, *preg);324}325}326Inst::Ret { .. } => {}327328Inst::Extend { rd, rn, .. } => {329collector.reg_use(rn);330collector.reg_def(rd);331}332Inst::Call { info, .. } => {333let CallInfo { uses, defs, .. } = &mut **info;334for CallArgPair { vreg, preg } in uses {335collector.reg_fixed_use(vreg, *preg);336}337for CallRetPair { vreg, location } in defs {338match location {339RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),340RetLocation::Stack(..) => collector.any_def(vreg),341}342}343collector.reg_clobbers(info.clobbers);344if let Some(try_call_info) = &mut info.try_call_info {345try_call_info.collect_operands(collector);346}347}348Inst::CallInd { info } => {349let CallInfo {350dest, uses, defs, ..351} = &mut **info;352collector.reg_use(dest);353for CallArgPair { vreg, preg } in uses {354collector.reg_fixed_use(vreg, *preg);355}356for CallRetPair { vreg, location } in defs {357match location {358RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),359RetLocation::Stack(..) => collector.any_def(vreg),360}361}362collector.reg_clobbers(info.clobbers);363if let Some(try_call_info) = &mut info.try_call_info {364try_call_info.collect_operands(collector);365}366}367Inst::ReturnCall { info } => {368for CallArgPair { vreg, preg } in &mut info.uses {369collector.reg_fixed_use(vreg, *preg);370}371}372Inst::ReturnCallInd { info } => {373// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):374// This shouldn't be a fixed register constraint.375collector.reg_fixed_use(&mut info.dest, x_reg(5));376377for CallArgPair { vreg, preg } in &mut info.uses {378collector.reg_fixed_use(vreg, *preg);379}380}381Inst::Jal { .. } => {382// JAL technically has a rd register, but we currently always383// hardcode it to x0.384}385Inst::CondBr {386kind: IntegerCompare { rs1, rs2, .. },387..388} => {389collector.reg_use(rs1);390collector.reg_use(rs2);391}392Inst::LoadExtNameGot { rd, .. }393| Inst::LoadExtNameNear { rd, .. }394| Inst::LoadExtNameFar { rd, .. } => {395collector.reg_def(rd);396}397Inst::ElfTlsGetAddr { rd, .. } => {398// x10 is a0 which is both the first argument and the first return value.399collector.reg_fixed_def(rd, a0());400let mut clobbers =401Riscv64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV, false);402clobbers.remove(px_reg(10));403collector.reg_clobbers(clobbers);404}405Inst::LoadAddr { rd, mem } => {406mem.get_operands(collector);407collector.reg_early_def(rd);408}409410Inst::Mov { rd, rm, .. } => {411collector.reg_use(rm);412collector.reg_def(rd);413}414Inst::MovFromPReg { rd, rm } => {415debug_assert!([px_reg(2), px_reg(8)].contains(rm));416collector.reg_def(rd);417}418Inst::Fence { .. } => {}419Inst::EBreak => {}420Inst::Udf { .. } => {}421Inst::FpuRR { rd, rs, .. } => {422collector.reg_use(rs);423collector.reg_def(rd);424}425Inst::FpuRRRR {426rd, rs1, rs2, rs3, ..427} => {428collector.reg_use(rs1);429collector.reg_use(rs2);430collector.reg_use(rs3);431collector.reg_def(rd);432}433434Inst::Jalr { rd, base, .. } => {435collector.reg_use(base);436collector.reg_def(rd);437}438Inst::Atomic { rd, addr, src, .. } => {439collector.reg_use(addr);440collector.reg_use(src);441collector.reg_def(rd);442}443Inst::Select {444dst,445condition: IntegerCompare { rs1, rs2, .. },446x,447y,448..449} => {450// Mark the condition registers as late use so that they don't overlap with the destination451// register. We may potentially write to the destination register before evaluating the452// condition.453collector.reg_late_use(rs1);454collector.reg_late_use(rs2);455456for reg in x.regs_mut() {457collector.reg_use(reg);458}459for reg in y.regs_mut() {460collector.reg_use(reg);461}462463// If there's more than one destination register then use464// `reg_early_def` to prevent destination registers from overlapping465// with any operands. This ensures that the lowering doesn't have to466// deal with a situation such as when the input registers need to be467// swapped when moved to the destination.468//469// When there's only one destination register though don't use an470// early def because once the register is written no other inputs471// are read so it's ok for the destination to overlap the sources.472// The condition registers are already marked as late use so they473// won't overlap with the destination.474match dst.regs_mut() {475[reg] => collector.reg_def(reg),476regs => {477for d in regs {478collector.reg_early_def(d);479}480}481}482}483Inst::AtomicCas {484offset,485t0,486dst,487e,488addr,489v,490..491} => {492collector.reg_use(offset);493collector.reg_use(e);494collector.reg_use(addr);495collector.reg_use(v);496collector.reg_early_def(t0);497collector.reg_early_def(dst);498}499500Inst::RawData { .. } => {}501Inst::AtomicStore { src, p, .. } => {502collector.reg_use(src);503collector.reg_use(p);504}505Inst::AtomicLoad { rd, p, .. } => {506collector.reg_use(p);507collector.reg_def(rd);508}509Inst::AtomicRmwLoop {510offset,511dst,512p,513x,514t0,515..516} => {517collector.reg_use(offset);518collector.reg_use(p);519collector.reg_use(x);520collector.reg_early_def(t0);521collector.reg_early_def(dst);522}523Inst::TrapIf { rs1, rs2, .. } => {524collector.reg_use(rs1);525collector.reg_use(rs2);526}527Inst::Unwind { .. } => {}528Inst::DummyUse { reg } => {529collector.reg_use(reg);530}531Inst::Popcnt {532sum, step, rs, tmp, ..533} => {534collector.reg_use(rs);535collector.reg_early_def(tmp);536collector.reg_early_def(step);537collector.reg_early_def(sum);538}539Inst::Cltz {540sum, step, tmp, rs, ..541} => {542collector.reg_use(rs);543collector.reg_early_def(tmp);544collector.reg_early_def(step);545collector.reg_early_def(sum);546}547Inst::Brev8 {548rs,549rd,550step,551tmp,552tmp2,553..554} => {555collector.reg_use(rs);556collector.reg_early_def(step);557collector.reg_early_def(tmp);558collector.reg_early_def(tmp2);559collector.reg_early_def(rd);560}561Inst::StackProbeLoop { .. } => {562// StackProbeLoop has a tmp register and StackProbeLoop used at gen_prologue.563// t3 will do the job. (t3 is caller-save register and not used directly by compiler like writable_spilltmp_reg)564// gen_prologue is called at emit stage.565// no need let reg alloc know.566}567Inst::VecAluRRRR {568op,569vd,570vd_src,571vs1,572vs2,573mask,574..575} => {576debug_assert_eq!(vd_src.class(), RegClass::Vector);577debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);578debug_assert_eq!(vs2.class(), RegClass::Vector);579debug_assert_eq!(vs1.class(), op.vs1_regclass());580581collector.reg_late_use(vs1);582collector.reg_late_use(vs2);583collector.reg_use(vd_src);584collector.reg_reuse_def(vd, 2); // `vd` == `vd_src`.585vec_mask_late_operands(mask, collector);586}587Inst::VecAluRRRImm5 {588op,589vd,590vd_src,591vs2,592mask,593..594} => {595debug_assert_eq!(vd_src.class(), RegClass::Vector);596debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);597debug_assert_eq!(vs2.class(), RegClass::Vector);598599// If the operation forbids source/destination overlap we need to600// ensure that the source and destination registers are different.601if op.forbids_overlaps(mask) {602collector.reg_late_use(vs2);603collector.reg_use(vd_src);604collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.605vec_mask_late_operands(mask, collector);606} else {607collector.reg_use(vs2);608collector.reg_use(vd_src);609collector.reg_reuse_def(vd, 1); // `vd` == `vd_src`.610vec_mask_operands(mask, collector);611}612}613Inst::VecAluRRR {614op,615vd,616vs1,617vs2,618mask,619..620} => {621debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);622debug_assert_eq!(vs2.class(), RegClass::Vector);623debug_assert_eq!(vs1.class(), op.vs1_regclass());624625collector.reg_use(vs1);626collector.reg_use(vs2);627628// If the operation forbids source/destination overlap, then we must629// register it as an early_def. This encodes the constraint that630// these must not overlap.631if op.forbids_overlaps(mask) {632collector.reg_early_def(vd);633} else {634collector.reg_def(vd);635}636637vec_mask_operands(mask, collector);638}639Inst::VecAluRRImm5 {640op, vd, vs2, mask, ..641} => {642debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);643debug_assert_eq!(vs2.class(), RegClass::Vector);644645collector.reg_use(vs2);646647// If the operation forbids source/destination overlap, then we must648// register it as an early_def. This encodes the constraint that649// these must not overlap.650if op.forbids_overlaps(mask) {651collector.reg_early_def(vd);652} else {653collector.reg_def(vd);654}655656vec_mask_operands(mask, collector);657}658Inst::VecAluRR {659op, vd, vs, mask, ..660} => {661debug_assert_eq!(vd.to_reg().class(), op.dst_regclass());662debug_assert_eq!(vs.class(), op.src_regclass());663664collector.reg_use(vs);665666// If the operation forbids source/destination overlap, then we must667// register it as an early_def. This encodes the constraint that668// these must not overlap.669if op.forbids_overlaps(mask) {670collector.reg_early_def(vd);671} else {672collector.reg_def(vd);673}674675vec_mask_operands(mask, collector);676}677Inst::VecAluRImm5 { op, vd, mask, .. } => {678debug_assert_eq!(vd.to_reg().class(), RegClass::Vector);679debug_assert!(!op.forbids_overlaps(mask));680681collector.reg_def(vd);682vec_mask_operands(mask, collector);683}684Inst::VecSetState { rd, .. } => {685collector.reg_def(rd);686}687Inst::VecLoad { to, from, mask, .. } => {688from.get_operands(collector);689collector.reg_def(to);690vec_mask_operands(mask, collector);691}692Inst::VecStore { to, from, mask, .. } => {693to.get_operands(collector);694collector.reg_use(from);695vec_mask_operands(mask, collector);696}697Inst::EmitIsland { .. } => {}698Inst::LabelAddress { dst, .. } => {699collector.reg_def(dst);700}701}702}703704impl MachInst for Inst {705type LabelUse = LabelUse;706type ABIMachineSpec = Riscv64MachineDeps;707708// https://github.com/riscv/riscv-isa-manual/issues/850709// all zero will cause invalid opcode.710const TRAP_OPCODE: &'static [u8] = &[0; 4];711712fn gen_dummy_use(reg: Reg) -> Self {713Inst::DummyUse { reg }714}715716fn canonical_type_for_rc(rc: RegClass) -> Type {717match rc {718regalloc2::RegClass::Int => I64,719regalloc2::RegClass::Float => F64,720regalloc2::RegClass::Vector => I8X16,721}722}723724fn is_safepoint(&self) -> bool {725match self {726Inst::Call { .. } | Inst::CallInd { .. } => true,727_ => false,728}729}730731fn get_operands(&mut self, collector: &mut impl OperandVisitor) {732riscv64_get_operands(self, collector);733}734735fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {736match self {737Inst::Mov { rd, rm, .. } => Some((*rd, *rm)),738_ => None,739}740}741742fn is_included_in_clobbers(&self) -> bool {743match self {744&Inst::Args { .. } => false,745_ => true,746}747}748749fn is_trap(&self) -> bool {750match self {751Self::Udf { .. } => true,752_ => false,753}754}755756fn is_args(&self) -> bool {757match self {758Self::Args { .. } => true,759_ => false,760}761}762763fn call_type(&self) -> CallType {764match self {765Inst::Call { .. } | Inst::CallInd { .. } | Inst::ElfTlsGetAddr { .. } => {766CallType::Regular767}768769Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } => CallType::TailCall,770771_ => CallType::None,772}773}774775fn is_term(&self) -> MachTerminator {776match self {777&Inst::Jal { .. } => MachTerminator::Branch,778&Inst::CondBr { .. } => MachTerminator::Branch,779&Inst::Jalr { .. } => MachTerminator::Branch,780&Inst::Rets { .. } => MachTerminator::Ret,781&Inst::BrTable { .. } => MachTerminator::Branch,782&Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,783&Inst::Call { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,784&Inst::CallInd { ref info } if info.try_call_info.is_some() => MachTerminator::Branch,785_ => MachTerminator::None,786}787}788789fn is_mem_access(&self) -> bool {790panic!("TODO FILL ME OUT")791}792793fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {794let x = Inst::Mov {795rd: to_reg,796rm: from_reg,797ty,798};799x800}801802fn gen_nop(preferred_size: usize) -> Inst {803if preferred_size == 0 {804return Inst::Nop0;805}806// We can't give a NOP (or any insn) < 4 bytes.807assert!(preferred_size >= 4);808Inst::Nop4809}810811fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> {812match ty {813I8 => Ok((&[RegClass::Int], &[I8])),814I16 => Ok((&[RegClass::Int], &[I16])),815I32 => Ok((&[RegClass::Int], &[I32])),816I64 => Ok((&[RegClass::Int], &[I64])),817F16 => Ok((&[RegClass::Float], &[F16])),818F32 => Ok((&[RegClass::Float], &[F32])),819F64 => Ok((&[RegClass::Float], &[F64])),820// FIXME(#8312): Add support for Q extension821F128 | I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),822_ if ty.is_vector() => {823debug_assert!(ty.bits() <= 512);824825// Here we only need to return a SIMD type with the same size as `ty`.826// We use these types for spills and reloads, so prefer types with lanes <= 31827// since that fits in the immediate field of `vsetivli`.828const SIMD_TYPES: [[Type; 1]; 6] = [829[types::I8X2],830[types::I8X4],831[types::I8X8],832[types::I8X16],833[types::I16X16],834[types::I32X16],835];836let idx = (ty.bytes().ilog2() - 1) as usize;837let ty = &SIMD_TYPES[idx][..];838839Ok((&[RegClass::Vector], ty))840}841_ => Err(CodegenError::Unsupported(format!(842"Unexpected SSA-value type: {ty}"843))),844}845}846847fn gen_jump(target: MachLabel) -> Inst {848Inst::Jal { label: target }849}850851fn worst_case_size() -> CodeOffset {852// Our worst case size is determined by the riscv64_worst_case_instruction_size test85384854}855856fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {857RegClass::Int858}859860fn function_alignment() -> FunctionAlignment {861FunctionAlignment {862minimum: 2,863preferred: 4,864}865}866}867868//=============================================================================869// Pretty-printing of instructions.870pub fn reg_name(reg: Reg) -> String {871match reg.to_real_reg() {872Some(real) => match real.class() {873RegClass::Int => match real.hw_enc() {8740 => "zero".into(),8751 => "ra".into(),8762 => "sp".into(),8773 => "gp".into(),8784 => "tp".into(),8795..=7 => format!("t{}", real.hw_enc() - 5),8808 => "fp".into(),8819 => "s1".into(),88210..=17 => format!("a{}", real.hw_enc() - 10),88318..=27 => format!("s{}", real.hw_enc() - 16),88428..=31 => format!("t{}", real.hw_enc() - 25),885_ => unreachable!(),886},887RegClass::Float => match real.hw_enc() {8880..=7 => format!("ft{}", real.hw_enc() - 0),8898..=9 => format!("fs{}", real.hw_enc() - 8),89010..=17 => format!("fa{}", real.hw_enc() - 10),89118..=27 => format!("fs{}", real.hw_enc() - 16),89228..=31 => format!("ft{}", real.hw_enc() - 20),893_ => unreachable!(),894},895RegClass::Vector => format!("v{}", real.hw_enc()),896},897None => {898format!("{reg:?}")899}900}901}902903fn pretty_print_try_call(info: &TryCallInfo) -> String {904format!(905"; j {:?}; catch [{}]",906info.continuation,907info.pretty_print_dests()908)909}910911impl Inst {912fn print_with_state(&self, _state: &mut EmitState) -> String {913let format_reg = |reg: Reg| -> String { reg_name(reg) };914915let format_vec_amode = |amode: &VecAMode| -> String {916match amode {917VecAMode::UnitStride { base } => base.to_string(),918}919};920921let format_mask = |mask: &VecOpMasking| -> String {922match mask {923VecOpMasking::Enabled { reg } => format!(",{}.t", format_reg(*reg)),924VecOpMasking::Disabled => format!(""),925}926};927928let format_regs = |regs: &[Reg]| -> String {929let mut x = if regs.len() > 1 {930String::from("[")931} else {932String::default()933};934regs.iter().for_each(|i| {935x.push_str(format_reg(*i).as_str());936if *i != *regs.last().unwrap() {937x.push_str(",");938}939});940if regs.len() > 1 {941x.push_str("]");942}943x944};945let format_labels = |labels: &[MachLabel]| -> String {946if labels.len() == 0 {947return String::from("[_]");948}949let mut x = String::from("[");950labels.iter().for_each(|l| {951x.push_str(952format!(953"{:?}{}",954l,955if l != labels.last().unwrap() { "," } else { "" },956)957.as_str(),958);959});960x.push_str("]");961x962};963964fn format_frm(rounding_mode: FRM) -> String {965format!(",{}", rounding_mode.to_static_str())966}967968match self {969&Inst::Nop0 => {970format!("##zero length nop")971}972&Inst::Nop4 => {973format!("##fixed 4-size nop")974}975&Inst::StackProbeLoop {976guard_size,977probe_count,978tmp,979} => {980let tmp = format_reg(tmp.to_reg());981format!(982"inline_stack_probe##guard_size={guard_size} probe_count={probe_count} tmp={tmp}"983)984}985&Inst::AtomicStore { src, ty, p } => {986let src = format_reg(src);987let p = format_reg(p);988format!("atomic_store.{ty} {src},({p})")989}990&Inst::DummyUse { reg } => {991let reg = format_reg(reg);992format!("dummy_use {reg}")993}994995&Inst::AtomicLoad { rd, ty, p } => {996let p = format_reg(p);997let rd = format_reg(rd.to_reg());998format!("atomic_load.{ty} {rd},({p})")999}1000&Inst::AtomicRmwLoop {1001offset,1002op,1003dst,1004ty,1005p,1006x,1007t0,1008} => {1009let offset = format_reg(offset);1010let p = format_reg(p);1011let x = format_reg(x);1012let t0 = format_reg(t0.to_reg());1013let dst = format_reg(dst.to_reg());1014format!("atomic_rmw.{ty} {op} {dst},{x},({p})##t0={t0} offset={offset}")1015}10161017&Inst::RawData { ref data } => match data.len() {10184 => {1019let mut bytes = [0; 4];1020for i in 0..bytes.len() {1021bytes[i] = data[i];1022}1023format!(".4byte 0x{:x}", u32::from_le_bytes(bytes))1024}10258 => {1026let mut bytes = [0; 8];1027for i in 0..bytes.len() {1028bytes[i] = data[i];1029}1030format!(".8byte 0x{:x}", u64::from_le_bytes(bytes))1031}1032_ => {1033format!(".data {data:?}")1034}1035},1036&Inst::Unwind { ref inst } => {1037format!("unwind {inst:?}")1038}1039&Inst::Brev8 {1040rs,1041ty,1042step,1043tmp,1044tmp2,1045rd,1046} => {1047let rs = format_reg(rs);1048let step = format_reg(step.to_reg());1049let tmp = format_reg(tmp.to_reg());1050let tmp2 = format_reg(tmp2.to_reg());1051let rd = format_reg(rd.to_reg());1052format!("brev8 {rd},{rs}##tmp={tmp} tmp2={tmp2} step={step} ty={ty}")1053}1054&Inst::Popcnt {1055sum,1056step,1057rs,1058tmp,1059ty,1060} => {1061let rs = format_reg(rs);1062let tmp = format_reg(tmp.to_reg());1063let step = format_reg(step.to_reg());1064let sum = format_reg(sum.to_reg());1065format!("popcnt {sum},{rs}##ty={ty} tmp={tmp} step={step}")1066}1067&Inst::Cltz {1068sum,1069step,1070rs,1071tmp,1072ty,1073leading,1074} => {1075let rs = format_reg(rs);1076let tmp = format_reg(tmp.to_reg());1077let step = format_reg(step.to_reg());1078let sum = format_reg(sum.to_reg());1079format!(1080"{} {},{}##ty={} tmp={} step={}",1081if leading { "clz" } else { "ctz" },1082sum,1083rs,1084ty,1085tmp,1086step1087)1088}1089&Inst::AtomicCas {1090offset,1091t0,1092dst,1093e,1094addr,1095v,1096ty,1097} => {1098let offset = format_reg(offset);1099let e = format_reg(e);1100let addr = format_reg(addr);1101let v = format_reg(v);1102let t0 = format_reg(t0.to_reg());1103let dst = format_reg(dst.to_reg());1104format!("atomic_cas.{ty} {dst},{e},{v},({addr})##t0={t0} offset={offset}",)1105}1106&Inst::BrTable {1107index,1108tmp1,1109tmp2,1110ref targets,1111} => {1112format!(1113"{} {},{}##tmp1={},tmp2={}",1114"br_table",1115format_reg(index),1116format_labels(&targets[..]),1117format_reg(tmp1.to_reg()),1118format_reg(tmp2.to_reg()),1119)1120}1121&Inst::Auipc { rd, imm } => {1122format!("{} {},{}", "auipc", format_reg(rd.to_reg()), imm.as_i32(),)1123}1124&Inst::Jalr { rd, base, offset } => {1125let base = format_reg(base);1126let rd = format_reg(rd.to_reg());1127format!("{} {},{}({})", "jalr", rd, offset.as_i16(), base)1128}1129&Inst::Lui { rd, ref imm } => {1130format!("{} {},{}", "lui", format_reg(rd.to_reg()), imm.as_i32())1131}1132&Inst::Fli { rd, width, imm } => {1133let rd_s = format_reg(rd.to_reg());1134let imm_s = imm.format();1135format!("fli.{width} {rd_s},{imm_s}")1136}1137&Inst::LoadInlineConst { rd, imm, .. } => {1138let rd = format_reg(rd.to_reg());1139let mut buf = String::new();1140write!(&mut buf, "auipc {rd},0; ").unwrap();1141write!(&mut buf, "ld {rd},12({rd}); ").unwrap();1142write!(&mut buf, "j {}; ", Inst::UNCOMPRESSED_INSTRUCTION_SIZE + 8).unwrap();1143write!(&mut buf, ".8byte 0x{imm:x}").unwrap();1144buf1145}1146&Inst::AluRRR {1147alu_op,1148rd,1149rs1,1150rs2,1151} => {1152let rs1_s = format_reg(rs1);1153let rs2_s = format_reg(rs2);1154let rd_s = format_reg(rd.to_reg());1155match alu_op {1156AluOPRRR::Adduw if rs2 == zero_reg() => {1157format!("zext.w {rd_s},{rs1_s}")1158}1159_ => {1160format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s)1161}1162}1163}1164&Inst::FpuRR {1165alu_op,1166width,1167frm,1168rd,1169rs,1170} => {1171let rs = format_reg(rs);1172let rd = format_reg(rd.to_reg());1173let frm = if alu_op.has_frm() {1174format_frm(frm)1175} else {1176String::new()1177};1178format!("{} {rd},{rs}{frm}", alu_op.op_name(width))1179}1180&Inst::FpuRRR {1181alu_op,1182width,1183rd,1184rs1,1185rs2,1186frm,1187} => {1188let rs1 = format_reg(rs1);1189let rs2 = format_reg(rs2);1190let rd = format_reg(rd.to_reg());1191let frm = if alu_op.has_frm() {1192format_frm(frm)1193} else {1194String::new()1195};11961197let rs1_is_rs2 = rs1 == rs2;1198match alu_op {1199FpuOPRRR::Fsgnj if rs1_is_rs2 => format!("fmv.{width} {rd},{rs1}"),1200FpuOPRRR::Fsgnjn if rs1_is_rs2 => format!("fneg.{width} {rd},{rs1}"),1201FpuOPRRR::Fsgnjx if rs1_is_rs2 => format!("fabs.{width} {rd},{rs1}"),1202_ => format!("{} {rd},{rs1},{rs2}{frm}", alu_op.op_name(width)),1203}1204}1205&Inst::FpuRRRR {1206alu_op,1207rd,1208rs1,1209rs2,1210rs3,1211frm,1212width,1213} => {1214let rs1 = format_reg(rs1);1215let rs2 = format_reg(rs2);1216let rs3 = format_reg(rs3);1217let rd = format_reg(rd.to_reg());1218let frm = format_frm(frm);1219let op_name = alu_op.op_name(width);1220format!("{op_name} {rd},{rs1},{rs2},{rs3}{frm}")1221}1222&Inst::AluRRImm12 {1223alu_op,1224rd,1225rs,1226ref imm12,1227} => {1228let rs_s = format_reg(rs);1229let rd = format_reg(rd.to_reg());12301231// Some of these special cases are better known as1232// their pseudo-instruction version, so prefer printing those.1233match (alu_op, rs, imm12) {1234(AluOPRRI::Addi, rs, _) if rs == zero_reg() => {1235return format!("li {},{}", rd, imm12.as_i16());1236}1237(AluOPRRI::Addiw, _, imm12) if imm12.as_i16() == 0 => {1238return format!("sext.w {rd},{rs_s}");1239}1240(AluOPRRI::Xori, _, imm12) if imm12.as_i16() == -1 => {1241return format!("not {rd},{rs_s}");1242}1243(AluOPRRI::SltiU, _, imm12) if imm12.as_i16() == 1 => {1244return format!("seqz {rd},{rs_s}");1245}1246(alu_op, _, _) if alu_op.option_funct12().is_some() => {1247format!("{} {},{}", alu_op.op_name(), rd, rs_s)1248}1249(alu_op, _, imm12) => {1250format!("{} {},{},{}", alu_op.op_name(), rd, rs_s, imm12.as_i16())1251}1252}1253}1254&Inst::CsrReg { op, rd, rs, csr } => {1255let rs_s = format_reg(rs);1256let rd_s = format_reg(rd.to_reg());12571258match (op, csr, rd) {1259(CsrRegOP::CsrRW, CSR::Frm, rd) if rd.to_reg() == zero_reg() => {1260format!("fsrm {rs_s}")1261}1262_ => {1263format!("{op} {rd_s},{csr},{rs_s}")1264}1265}1266}1267&Inst::CsrImm { op, rd, csr, imm } => {1268let rd_s = format_reg(rd.to_reg());12691270match (op, csr, rd) {1271(CsrImmOP::CsrRWI, CSR::Frm, rd) if rd.to_reg() != zero_reg() => {1272format!("fsrmi {rd_s},{imm}")1273}1274_ => {1275format!("{op} {rd_s},{csr},{imm}")1276}1277}1278}1279&Inst::Load {1280rd,1281op,1282from,1283flags: _flags,1284} => {1285let base = from.to_string();1286let rd = format_reg(rd.to_reg());1287format!("{} {},{}", op.op_name(), rd, base,)1288}1289&Inst::Store {1290to,1291src,1292op,1293flags: _flags,1294} => {1295let base = to.to_string();1296let src = format_reg(src);1297format!("{} {},{}", op.op_name(), src, base,)1298}1299&Inst::Args { ref args } => {1300let mut s = "args".to_string();1301for arg in args {1302let preg = format_reg(arg.preg);1303let def = format_reg(arg.vreg.to_reg());1304write!(&mut s, " {def}={preg}").unwrap();1305}1306s1307}1308&Inst::Rets { ref rets } => {1309let mut s = "rets".to_string();1310for ret in rets {1311let preg = format_reg(ret.preg);1312let vreg = format_reg(ret.vreg);1313write!(&mut s, " {vreg}={preg}").unwrap();1314}1315s1316}1317&Inst::Ret {} => "ret".to_string(),13181319&MInst::Extend {1320rd,1321rn,1322signed,1323from_bits,1324..1325} => {1326let rn = format_reg(rn);1327let rd = format_reg(rd.to_reg());1328return if signed == false && from_bits == 8 {1329format!("andi {rd},{rn}")1330} else {1331let op = if signed { "srai" } else { "srli" };1332let shift_bits = (64 - from_bits) as i16;1333format!("slli {rd},{rn},{shift_bits}; {op} {rd},{rd},{shift_bits}")1334};1335}1336&MInst::Call { ref info } => {1337let try_call = info1338.try_call_info1339.as_ref()1340.map(|tci| pretty_print_try_call(tci))1341.unwrap_or_default();1342format!("call {}{try_call}", info.dest.display(None))1343}1344&MInst::CallInd { ref info } => {1345let rd = format_reg(info.dest);1346let try_call = info1347.try_call_info1348.as_ref()1349.map(|tci| pretty_print_try_call(tci))1350.unwrap_or_default();1351format!("callind {rd}{try_call}")1352}1353&MInst::ReturnCall { ref info } => {1354let mut s = format!(1355"return_call {:?} new_stack_arg_size:{}",1356info.dest, info.new_stack_arg_size1357);1358for ret in &info.uses {1359let preg = format_reg(ret.preg);1360let vreg = format_reg(ret.vreg);1361write!(&mut s, " {vreg}={preg}").unwrap();1362}1363s1364}1365&MInst::ReturnCallInd { ref info } => {1366let callee = format_reg(info.dest);1367let mut s = format!(1368"return_call_ind {callee} new_stack_arg_size:{}",1369info.new_stack_arg_size1370);1371for ret in &info.uses {1372let preg = format_reg(ret.preg);1373let vreg = format_reg(ret.vreg);1374write!(&mut s, " {vreg}={preg}").unwrap();1375}1376s1377}1378&MInst::TrapIf {1379rs1,1380rs2,1381cc,1382trap_code,1383} => {1384let rs1 = format_reg(rs1);1385let rs2 = format_reg(rs2);1386format!("trap_if {trap_code}##({rs1} {cc} {rs2})")1387}1388&MInst::Jal { label } => {1389format!("j {}", label.to_string())1390}1391&MInst::CondBr {1392taken,1393not_taken,1394kind,1395..1396} => {1397let rs1 = format_reg(kind.rs1);1398let rs2 = format_reg(kind.rs2);1399if not_taken.is_fallthrouh() && taken.as_label().is_none() {1400format!("{} {},{},0", kind.op_name(), rs1, rs2)1401} else {1402let x = format!(1403"{} {},{},taken({}),not_taken({})",1404kind.op_name(),1405rs1,1406rs2,1407taken,1408not_taken1409);1410x1411}1412}1413&MInst::Atomic {1414op,1415rd,1416addr,1417src,1418amo,1419} => {1420let op_name = op.op_name(amo);1421let addr = format_reg(addr);1422let src = format_reg(src);1423let rd = format_reg(rd.to_reg());1424if op.is_load() {1425format!("{op_name} {rd},({addr})")1426} else {1427format!("{op_name} {rd},{src},({addr})")1428}1429}1430&MInst::LoadExtNameGot { rd, ref name } => {1431let rd = format_reg(rd.to_reg());1432format!("load_ext_name_got {rd},{}", name.display(None))1433}1434&MInst::LoadExtNameNear {1435rd,1436ref name,1437offset,1438} => {1439let rd = format_reg(rd.to_reg());1440format!("load_ext_name_near {rd},{}{offset:+}", name.display(None))1441}1442&MInst::LoadExtNameFar {1443rd,1444ref name,1445offset,1446} => {1447let rd = format_reg(rd.to_reg());1448format!("load_ext_name_far {rd},{}{offset:+}", name.display(None))1449}1450&Inst::ElfTlsGetAddr { rd, ref name } => {1451let rd = format_reg(rd.to_reg());1452format!("elf_tls_get_addr {rd},{}", name.display(None))1453}1454&MInst::LoadAddr { ref rd, ref mem } => {1455let rs = mem.to_string();1456let rd = format_reg(rd.to_reg());1457format!("load_addr {rd},{rs}")1458}1459&MInst::Mov { rd, rm, ty } => {1460let rm = format_reg(rm);1461let rd = format_reg(rd.to_reg());14621463let op = match ty {1464F16 => "fmv.h",1465F32 => "fmv.s",1466F64 => "fmv.d",1467ty if ty.is_vector() => "vmv1r.v",1468_ => "mv",1469};14701471format!("{op} {rd},{rm}")1472}1473&MInst::MovFromPReg { rd, rm } => {1474let rd = format_reg(rd.to_reg());1475debug_assert!([px_reg(2), px_reg(8)].contains(&rm));1476let rm = reg_name(Reg::from(rm));1477format!("mv {rd},{rm}")1478}1479&MInst::Fence { pred, succ } => {1480format!(1481"fence {},{}",1482Inst::fence_req_to_string(pred),1483Inst::fence_req_to_string(succ),1484)1485}1486&MInst::Select {1487ref dst,1488condition,1489ref x,1490ref y,1491} => {1492let c_rs1 = format_reg(condition.rs1);1493let c_rs2 = format_reg(condition.rs2);1494let x = format_regs(x.regs());1495let y = format_regs(y.regs());1496let dst = dst.map(|r| r.to_reg());1497let dst = format_regs(dst.regs());1498format!(1499"select {},{},{}##condition=({} {} {})",1500dst,1501x,1502y,1503c_rs1,1504condition.kind.to_static_str(),1505c_rs21506)1507}1508&MInst::Udf { trap_code } => format!("udf##trap_code={trap_code}"),1509&MInst::EBreak {} => String::from("ebreak"),1510&Inst::VecAluRRRR {1511op,1512vd,1513vd_src,1514vs1,1515vs2,1516ref mask,1517ref vstate,1518} => {1519let vs1_s = format_reg(vs1);1520let vs2_s = format_reg(vs2);1521let vd_src_s = format_reg(vd_src);1522let vd_s = format_reg(vd.to_reg());1523let mask = format_mask(mask);15241525let vd_fmt = if vd_s != vd_src_s {1526format!("{vd_s},{vd_src_s}")1527} else {1528vd_s1529};15301531// Note: vs2 and vs1 here are opposite to the standard scalar ordering.1532// This is noted in Section 10.1 of the RISC-V Vector spec.1533format!("{op} {vd_fmt},{vs2_s},{vs1_s}{mask} {vstate}")1534}1535&Inst::VecAluRRRImm5 {1536op,1537vd,1538imm,1539vs2,1540ref mask,1541ref vstate,1542..1543} => {1544let vs2_s = format_reg(vs2);1545let vd_s = format_reg(vd.to_reg());1546let mask = format_mask(mask);15471548// Some opcodes interpret the immediate as unsigned, lets show the1549// correct number here.1550let imm_s = if op.imm_is_unsigned() {1551format!("{}", imm.bits())1552} else {1553format!("{imm}")1554};15551556format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}")1557}1558&Inst::VecAluRRR {1559op,1560vd,1561vs1,1562vs2,1563ref mask,1564ref vstate,1565} => {1566let vs1_s = format_reg(vs1);1567let vs2_s = format_reg(vs2);1568let vd_s = format_reg(vd.to_reg());1569let mask = format_mask(mask);15701571// Note: vs2 and vs1 here are opposite to the standard scalar ordering.1572// This is noted in Section 10.1 of the RISC-V Vector spec.1573match (op, vs2, vs1) {1574(VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => {1575format!("vneg.v {vd_s},{vs2_s}{mask} {vstate}")1576}1577(VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {1578format!("vfneg.v {vd_s},{vs2_s}{mask} {vstate}")1579}1580(VecAluOpRRR::VfsgnjxVV, vs2, vs1) if vs2 == vs1 => {1581format!("vfabs.v {vd_s},{vs2_s}{mask} {vstate}")1582}1583(VecAluOpRRR::VmnandMM, vs2, vs1) if vs2 == vs1 => {1584format!("vmnot.m {vd_s},{vs2_s}{mask} {vstate}")1585}1586_ => format!("{op} {vd_s},{vs2_s},{vs1_s}{mask} {vstate}"),1587}1588}1589&Inst::VecAluRRImm5 {1590op,1591vd,1592imm,1593vs2,1594ref mask,1595ref vstate,1596} => {1597let vs2_s = format_reg(vs2);1598let vd_s = format_reg(vd.to_reg());1599let mask = format_mask(mask);16001601// Some opcodes interpret the immediate as unsigned, lets show the1602// correct number here.1603let imm_s = if op.imm_is_unsigned() {1604format!("{}", imm.bits())1605} else {1606format!("{imm}")1607};16081609match (op, imm) {1610(VecAluOpRRImm5::VxorVI, imm) if imm == Imm5::maybe_from_i8(-1).unwrap() => {1611format!("vnot.v {vd_s},{vs2_s}{mask} {vstate}")1612}1613_ => format!("{op} {vd_s},{vs2_s},{imm_s}{mask} {vstate}"),1614}1615}1616&Inst::VecAluRR {1617op,1618vd,1619vs,1620ref mask,1621ref vstate,1622} => {1623let vs_s = format_reg(vs);1624let vd_s = format_reg(vd.to_reg());1625let mask = format_mask(mask);16261627format!("{op} {vd_s},{vs_s}{mask} {vstate}")1628}1629&Inst::VecAluRImm5 {1630op,1631vd,1632imm,1633ref mask,1634ref vstate,1635} => {1636let vd_s = format_reg(vd.to_reg());1637let mask = format_mask(mask);16381639format!("{op} {vd_s},{imm}{mask} {vstate}")1640}1641&Inst::VecSetState { rd, ref vstate } => {1642let rd_s = format_reg(rd.to_reg());1643assert!(vstate.avl.is_static());1644format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype)1645}1646Inst::VecLoad {1647eew,1648to,1649from,1650mask,1651vstate,1652..1653} => {1654let base = format_vec_amode(from);1655let vd = format_reg(to.to_reg());1656let mask = format_mask(mask);16571658format!("vl{eew}.v {vd},{base}{mask} {vstate}")1659}1660Inst::VecStore {1661eew,1662to,1663from,1664mask,1665vstate,1666..1667} => {1668let dst = format_vec_amode(to);1669let vs3 = format_reg(*from);1670let mask = format_mask(mask);16711672format!("vs{eew}.v {vs3},{dst}{mask} {vstate}")1673}1674Inst::EmitIsland { needed_space } => {1675format!("emit_island {needed_space}")1676}16771678Inst::LabelAddress { dst, label } => {1679let dst = format_reg(dst.to_reg());1680format!("label_address {dst}, {label:?}")1681}1682}1683}1684}16851686/// Different forms of label references for different instruction formats.1687#[derive(Clone, Copy, Debug, PartialEq, Eq)]1688pub enum LabelUse {1689/// 20-bit branch offset (unconditional branches). PC-rel, offset is1690/// imm << 1. Immediate is 20 signed bits. Use in Jal instructions.1691Jal20,16921693/// The unconditional jump instructions all use PC-relative1694/// addressing to help support position independent code. The JALR1695/// instruction was defined to enable a two-instruction sequence to1696/// jump anywhere in a 32-bit absolute address range. A LUI1697/// instruction can first load rs1 with the upper 20 bits of a1698/// target address, then JALR can add in the lower bits. Similarly,1699/// AUIPC then JALR can jump anywhere in a 32-bit pc-relative1700/// address range.1701PCRel32,17021703/// All branch instructions use the B-type instruction format. The1704/// 12-bit B-immediate encodes signed offsets in multiples of 2, and1705/// is added to the current pc to give the target address. The1706/// conditional branch range is ±4 KiB.1707B12,17081709/// Equivalent to the `R_RISCV_PCREL_HI20` relocation, Allows setting1710/// the immediate field of an `auipc` instruction.1711PCRelHi20,17121713/// Similar to the `R_RISCV_PCREL_LO12_I` relocation but pointing to1714/// the final address, instead of the `PCREL_HI20` label. Allows setting1715/// the immediate field of I Type instructions such as `addi` or `lw`.1716///1717/// Since we currently don't support offsets in labels, this relocation has1718/// an implicit offset of 4.1719PCRelLo12I,17201721/// 11-bit PC-relative jump offset. Equivalent to the `RVC_JUMP` relocation1722RVCJump,1723}17241725impl MachInstLabelUse for LabelUse {1726/// Alignment for veneer code. Every Riscv64 instruction must be1727/// 4-byte-aligned.1728const ALIGN: CodeOffset = 4;17291730/// Maximum PC-relative range (positive), inclusive.1731fn max_pos_range(self) -> CodeOffset {1732match self {1733LabelUse::Jal20 => ((1 << 19) - 1) * 2,1734LabelUse::PCRelLo12I | LabelUse::PCRelHi20 | LabelUse::PCRel32 => {1735Inst::imm_max() as CodeOffset1736}1737LabelUse::B12 => ((1 << 11) - 1) * 2,1738LabelUse::RVCJump => ((1 << 10) - 1) * 2,1739}1740}17411742/// Maximum PC-relative range (negative).1743fn max_neg_range(self) -> CodeOffset {1744match self {1745LabelUse::PCRel32 => Inst::imm_min().abs() as CodeOffset,1746_ => self.max_pos_range() + 2,1747}1748}17491750/// Size of window into code needed to do the patch.1751fn patch_size(self) -> CodeOffset {1752match self {1753LabelUse::RVCJump => 2,1754LabelUse::Jal20 | LabelUse::B12 | LabelUse::PCRelHi20 | LabelUse::PCRelLo12I => 4,1755LabelUse::PCRel32 => 8,1756}1757}17581759/// Perform the patch.1760fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {1761assert!(use_offset % 2 == 0);1762assert!(label_offset % 2 == 0);1763let offset = (label_offset as i64) - (use_offset as i64);17641765// re-check range1766assert!(1767offset >= -(self.max_neg_range() as i64) && offset <= (self.max_pos_range() as i64),1768"{self:?} offset '{offset}' use_offset:'{use_offset}' label_offset:'{label_offset}' must not exceed max range.",1769);1770self.patch_raw_offset(buffer, offset);1771}17721773/// Is a veneer supported for this label reference type?1774fn supports_veneer(self) -> bool {1775match self {1776Self::Jal20 | Self::B12 | Self::RVCJump => true,1777_ => false,1778}1779}17801781/// How large is the veneer, if supported?1782fn veneer_size(self) -> CodeOffset {1783match self {1784Self::B12 | Self::Jal20 | Self::RVCJump => 8,1785_ => unreachable!(),1786}1787}17881789fn worst_case_veneer_size() -> CodeOffset {179081791}17921793/// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return1794/// an offset and label-use for the veneer's use of the original label.1795fn generate_veneer(1796self,1797buffer: &mut [u8],1798veneer_offset: CodeOffset,1799) -> (CodeOffset, LabelUse) {1800let base = writable_spilltmp_reg();1801{1802let x = enc_auipc(base, Imm20::ZERO).to_le_bytes();1803buffer[0] = x[0];1804buffer[1] = x[1];1805buffer[2] = x[2];1806buffer[3] = x[3];1807}1808{1809let x = enc_jalr(writable_zero_reg(), base.to_reg(), Imm12::ZERO).to_le_bytes();1810buffer[4] = x[0];1811buffer[5] = x[1];1812buffer[6] = x[2];1813buffer[7] = x[3];1814}1815(veneer_offset, Self::PCRel32)1816}18171818fn from_reloc(reloc: Reloc, addend: Addend) -> Option<LabelUse> {1819match (reloc, addend) {1820(Reloc::RiscvCallPlt, _) => Some(Self::PCRel32),1821_ => None,1822}1823}1824}18251826impl LabelUse {1827#[expect(dead_code, reason = "in case it's needed in the future")]1828fn offset_in_range(self, offset: i64) -> bool {1829let min = -(self.max_neg_range() as i64);1830let max = self.max_pos_range() as i64;1831offset >= min && offset <= max1832}18331834fn patch_raw_offset(self, buffer: &mut [u8], offset: i64) {1835let insn = match self {1836LabelUse::RVCJump => u16::from_le_bytes(buffer[..2].try_into().unwrap()) as u32,1837_ => u32::from_le_bytes(buffer[..4].try_into().unwrap()),1838};18391840match self {1841LabelUse::Jal20 => {1842let offset = offset as u32;1843let v = ((offset >> 12 & 0b1111_1111) << 12)1844| ((offset >> 11 & 0b1) << 20)1845| ((offset >> 1 & 0b11_1111_1111) << 21)1846| ((offset >> 20 & 0b1) << 31);1847buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));1848}1849LabelUse::PCRel32 => {1850let insn2 = u32::from_le_bytes([buffer[4], buffer[5], buffer[6], buffer[7]]);1851Inst::generate_imm(offset as u64)1852.map(|(imm20, imm12)| {1853// Encode the OR-ed-in value with zero_reg(). The1854// register parameter must be in the original1855// encoded instruction and or'ing in zeroes does not1856// change it.1857buffer[0..4].clone_from_slice(&u32::to_le_bytes(1858insn | enc_auipc(writable_zero_reg(), imm20),1859));1860buffer[4..8].clone_from_slice(&u32::to_le_bytes(1861insn2 | enc_jalr(writable_zero_reg(), zero_reg(), imm12),1862));1863})1864// expect make sure we handled.1865.expect("we have check the range before,this is a compiler error.");1866}18671868LabelUse::B12 => {1869let offset = offset as u32;1870let v = ((offset >> 11 & 0b1) << 7)1871| ((offset >> 1 & 0b1111) << 8)1872| ((offset >> 5 & 0b11_1111) << 25)1873| ((offset >> 12 & 0b1) << 31);1874buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn | v));1875}18761877LabelUse::PCRelHi20 => {1878// See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#pc-relative-symbol-addresses1879//1880// We need to add 0x800 to ensure that we land at the next page as soon as it goes out of range for the1881// Lo12 relocation. That relocation is signed and has a maximum range of -2048..2047. So when we get an1882// offset of 2048, we need to land at the next page and subtract instead.1883let offset = offset as u32;1884let hi20 = offset.wrapping_add(0x800) >> 12;1885let insn = (insn & 0xFFF) | (hi20 << 12);1886buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));1887}18881889LabelUse::PCRelLo12I => {1890// `offset` is the offset from the current instruction to the target address.1891//1892// However we are trying to compute the offset to the target address from the previous instruction.1893// The previous instruction should be the one that contains the PCRelHi20 relocation and1894// stores/references the program counter (`auipc` usually).1895//1896// Since we are trying to compute the offset from the previous instruction, we can1897// represent it as offset = target_address - (current_instruction_address - 4)1898// which is equivalent to offset = target_address - current_instruction_address + 4.1899//1900// Thus we need to add 4 to the offset here.1901let lo12 = (offset + 4) as u32 & 0xFFF;1902let insn = (insn & 0xFFFFF) | (lo12 << 20);1903buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn));1904}1905LabelUse::RVCJump => {1906debug_assert!(offset & 1 == 0);19071908// We currently only support this for the C.J operation, so assert that is the opcode in1909// the buffer.1910debug_assert_eq!(insn & 0xFFFF, 0xA001);19111912buffer[0..2].clone_from_slice(&u16::to_le_bytes(encode_cj_type(1913CjOp::CJ,1914Imm12::from_i16(i16::try_from(offset).unwrap()),1915)));1916}1917}1918}1919}19201921#[cfg(test)]1922mod test {1923use super::*;1924#[test]1925fn label_use_max_range() {1926assert!(LabelUse::B12.max_neg_range() == LabelUse::B12.max_pos_range() + 2);1927assert!(LabelUse::Jal20.max_neg_range() == LabelUse::Jal20.max_pos_range() + 2);1928assert!(LabelUse::PCRel32.max_pos_range() == (Inst::imm_max() as CodeOffset));1929assert!(LabelUse::PCRel32.max_neg_range() == (Inst::imm_min().abs() as CodeOffset));1930assert!(LabelUse::B12.max_pos_range() == ((1 << 11) - 1) * 2);1931}1932}193319341935