Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
35294 views
//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains the RISC-V implementation of the TargetInstrInfo class.9//10//===----------------------------------------------------------------------===//1112#include "RISCVInstrInfo.h"13#include "MCTargetDesc/RISCVMatInt.h"14#include "RISCV.h"15#include "RISCVMachineFunctionInfo.h"16#include "RISCVSubtarget.h"17#include "RISCVTargetMachine.h"18#include "llvm/ADT/STLExtras.h"19#include "llvm/ADT/SmallVector.h"20#include "llvm/Analysis/MemoryLocation.h"21#include "llvm/Analysis/ValueTracking.h"22#include "llvm/CodeGen/LiveIntervals.h"23#include "llvm/CodeGen/LiveVariables.h"24#include "llvm/CodeGen/MachineCombinerPattern.h"25#include "llvm/CodeGen/MachineFunctionPass.h"26#include "llvm/CodeGen/MachineInstrBuilder.h"27#include "llvm/CodeGen/MachineRegisterInfo.h"28#include "llvm/CodeGen/MachineTraceMetrics.h"29#include "llvm/CodeGen/RegisterScavenging.h"30#include "llvm/CodeGen/StackMaps.h"31#include "llvm/IR/DebugInfoMetadata.h"32#include "llvm/IR/Module.h"33#include "llvm/MC/MCInstBuilder.h"34#include "llvm/MC/TargetRegistry.h"35#include "llvm/Support/ErrorHandling.h"3637using namespace llvm;3839#define GEN_CHECK_COMPRESS_INSTR40#include "RISCVGenCompressInstEmitter.inc"4142#define GET_INSTRINFO_CTOR_DTOR43#define GET_INSTRINFO_NAMED_OPS44#include "RISCVGenInstrInfo.inc"4546static cl::opt<bool> PreferWholeRegisterMove(47"riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,48cl::desc("Prefer whole register move for vector registers."));4950static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(51"riscv-force-machine-combiner-strategy", cl::Hidden,52cl::desc("Force machine combiner to use a specific strategy for machine "53"trace metrics evaluation."),54cl::init(MachineTraceStrategy::TS_NumStrategies),55cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",56"Local strategy."),57clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",58"MinInstrCount strategy.")));5960namespace llvm::RISCVVPseudosTable {6162using namespace RISCV;6364#define GET_RISCVVPseudosTable_IMPL65#include "RISCVGenSearchableTables.inc"6667} // namespace llvm::RISCVVPseudosTable6869namespace llvm::RISCV {7071#define GET_RISCVMaskedPseudosTable_IMPL72#include "RISCVGenSearchableTables.inc"7374} // end namespace llvm::RISCV7576RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)77: RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),78STI(STI) {}7980MCInst RISCVInstrInfo::getNop() const {81if (STI.hasStdExtCOrZca())82return MCInstBuilder(RISCV::C_NOP);83return MCInstBuilder(RISCV::ADDI)84.addReg(RISCV::X0)85.addReg(RISCV::X0)86.addImm(0);87}8889Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,90int &FrameIndex) const {91unsigned Dummy;92return isLoadFromStackSlot(MI, FrameIndex, Dummy);93}9495Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,96int &FrameIndex,97unsigned &MemBytes) const {98switch (MI.getOpcode()) {99default:100return 0;101case RISCV::LB:102case RISCV::LBU:103MemBytes = 1;104break;105case RISCV::LH:106case RISCV::LHU:107case RISCV::FLH:108MemBytes = 2;109break;110case RISCV::LW:111case RISCV::FLW:112case RISCV::LWU:113MemBytes = 4;114break;115case RISCV::LD:116case RISCV::FLD:117MemBytes = 8;118break;119}120121if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&122MI.getOperand(2).getImm() == 0) {123FrameIndex = MI.getOperand(1).getIndex();124return MI.getOperand(0).getReg();125}126127return 0;128}129130Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,131int &FrameIndex) const {132unsigned Dummy;133return isStoreToStackSlot(MI, FrameIndex, Dummy);134}135136Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,137int &FrameIndex,138unsigned &MemBytes) const {139switch (MI.getOpcode()) {140default:141return 0;142case RISCV::SB:143MemBytes = 1;144break;145case RISCV::SH:146case RISCV::FSH:147MemBytes = 2;148break;149case RISCV::SW:150case RISCV::FSW:151MemBytes = 4;152break;153case RISCV::SD:154case RISCV::FSD:155MemBytes = 8;156break;157}158159if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&160MI.getOperand(2).getImm() == 0) {161FrameIndex = MI.getOperand(1).getIndex();162return MI.getOperand(0).getReg();163}164165return 0;166}167168bool RISCVInstrInfo::isReallyTriviallyReMaterializable(169const MachineInstr &MI) const {170if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VID_V &&171MI.getOperand(1).isUndef() &&172/* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl and173vtype. Make sure we only rematerialize before RISCVInsertVSETVLI174i.e. -riscv-vsetvl-after-rvv-regalloc=true */175!MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))176return true;177return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);178}179180static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,181unsigned NumRegs) {182return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;183}184185static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,186const MachineBasicBlock &MBB,187MachineBasicBlock::const_iterator MBBI,188MachineBasicBlock::const_iterator &DefMBBI,189RISCVII::VLMUL LMul) {190if (PreferWholeRegisterMove)191return false;192193assert(MBBI->getOpcode() == TargetOpcode::COPY &&194"Unexpected COPY instruction.");195Register SrcReg = MBBI->getOperand(1).getReg();196const TargetRegisterInfo *TRI = STI.getRegisterInfo();197198bool FoundDef = false;199bool FirstVSetVLI = false;200unsigned FirstSEW = 0;201while (MBBI != MBB.begin()) {202--MBBI;203if (MBBI->isMetaInstruction())204continue;205206if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||207MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||208MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {209// There is a vsetvli between COPY and source define instruction.210// vy = def_vop ... (producing instruction)211// ...212// vsetvli213// ...214// vx = COPY vy215if (!FoundDef) {216if (!FirstVSetVLI) {217FirstVSetVLI = true;218unsigned FirstVType = MBBI->getOperand(2).getImm();219RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);220FirstSEW = RISCVVType::getSEW(FirstVType);221// The first encountered vsetvli must have the same lmul as the222// register class of COPY.223if (FirstLMul != LMul)224return false;225}226// Only permit `vsetvli x0, x0, vtype` between COPY and the source227// define instruction.228if (MBBI->getOperand(0).getReg() != RISCV::X0)229return false;230if (MBBI->getOperand(1).isImm())231return false;232if (MBBI->getOperand(1).getReg() != RISCV::X0)233return false;234continue;235}236237// MBBI is the first vsetvli before the producing instruction.238unsigned VType = MBBI->getOperand(2).getImm();239// If there is a vsetvli between COPY and the producing instruction.240if (FirstVSetVLI) {241// If SEW is different, return false.242if (RISCVVType::getSEW(VType) != FirstSEW)243return false;244}245246// If the vsetvli is tail undisturbed, keep the whole register move.247if (!RISCVVType::isTailAgnostic(VType))248return false;249250// The checking is conservative. We only have register classes for251// LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v252// for fractional LMUL operations. However, we could not use the vsetvli253// lmul for widening operations. The result of widening operation is254// 2 x LMUL.255return LMul == RISCVVType::getVLMUL(VType);256} else if (MBBI->isInlineAsm() || MBBI->isCall()) {257return false;258} else if (MBBI->getNumDefs()) {259// Check all the instructions which will change VL.260// For example, vleff has implicit def VL.261if (MBBI->modifiesRegister(RISCV::VL, /*TRI=*/nullptr))262return false;263264// Only converting whole register copies to vmv.v.v when the defining265// value appears in the explicit operands.266for (const MachineOperand &MO : MBBI->explicit_operands()) {267if (!MO.isReg() || !MO.isDef())268continue;269if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) {270// We only permit the source of COPY has the same LMUL as the defined271// operand.272// There are cases we need to keep the whole register copy if the LMUL273// is different.274// For example,275// $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m276// $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2277// # The COPY may be created by vlmul_trunc intrinsic.278// $v26m2 = COPY renamable $v28m2, implicit killed $v28m4279//280// After widening, the valid value will be 4 x e32 elements. If we281// convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.282// FIXME: The COPY of subregister of Zvlsseg register will not be able283// to convert to vmv.v.[v|i] under the constraint.284if (MO.getReg() != SrcReg)285return false;286287// In widening reduction instructions with LMUL_1 input vector case,288// only checking the LMUL is insufficient due to reduction result is289// always LMUL_1.290// For example,291// $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu292// $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27293// $v26 = COPY killed renamable $v8294// After widening, The valid value will be 1 x e16 elements. If we295// convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.296uint64_t TSFlags = MBBI->getDesc().TSFlags;297if (RISCVII::isRVVWideningReduction(TSFlags))298return false;299300// If the producing instruction does not depend on vsetvli, do not301// convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.302if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))303return false;304305// Found the definition.306FoundDef = true;307DefMBBI = MBBI;308break;309}310}311}312}313314return false;315}316317void RISCVInstrInfo::copyPhysRegVector(318MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,319const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,320const TargetRegisterClass *RegClass) const {321const TargetRegisterInfo *TRI = STI.getRegisterInfo();322RISCVII::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags);323unsigned NF = RISCVRI::getNF(RegClass->TSFlags);324325uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);326uint16_t DstEncoding = TRI->getEncodingValue(DstReg);327auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(LMul);328assert(!Fractional && "It is impossible be fractional lmul here.");329unsigned NumRegs = NF * LMulVal;330bool ReversedCopy =331forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs);332if (ReversedCopy) {333// If the src and dest overlap when copying a tuple, we need to copy the334// registers in reverse.335SrcEncoding += NumRegs - 1;336DstEncoding += NumRegs - 1;337}338339unsigned I = 0;340auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)341-> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned,342unsigned, unsigned> {343if (ReversedCopy) {344// For reversed copying, if there are enough aligned registers(8/4/2), we345// can do a larger copy(LMUL8/4/2).346// Besides, we have already known that DstEncoding is larger than347// SrcEncoding in forwardCopyWillClobberTuple, so the difference between348// DstEncoding and SrcEncoding should be >= LMUL value we try to use to349// avoid clobbering.350uint16_t Diff = DstEncoding - SrcEncoding;351if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 &&352DstEncoding % 8 == 7)353return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,354RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};355if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 &&356DstEncoding % 4 == 3)357return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,358RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};359if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 &&360DstEncoding % 2 == 1)361return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,362RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};363// Or we should do LMUL1 copying.364return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,365RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};366}367368// For forward copying, if source register encoding and destination register369// encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.370if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0)371return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,372RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};373if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0)374return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,375RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};376if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0)377return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,378RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};379// Or we should do LMUL1 copying.380return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,381RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};382};383auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,384uint16_t Encoding) {385MCRegister Reg = RISCV::V0 + Encoding;386if (&RegClass == &RISCV::VRRegClass)387return Reg;388return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);389};390while (I != NumRegs) {391// For non-segment copying, we only do this once as the registers are always392// aligned.393// For segment copying, we may do this several times. If the registers are394// aligned to larger LMUL, we can eliminate some copyings.395auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =396GetCopyInfo(SrcEncoding, DstEncoding);397auto [NumCopied, _] = RISCVVType::decodeVLMUL(LMulCopied);398399MachineBasicBlock::const_iterator DefMBBI;400if (LMul == LMulCopied &&401isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {402Opc = VVOpc;403if (DefMBBI->getOpcode() == VIOpc)404Opc = VIOpc;405}406407// Emit actual copying.408// For reversed copying, the encoding should be decreased.409MCRegister ActualSrcReg = FindRegWithEncoding(410RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);411MCRegister ActualDstReg = FindRegWithEncoding(412RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);413414auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);415bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I;416bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V;417if (UseVMV)418MIB.addReg(ActualDstReg, RegState::Undef);419if (UseVMV_V_I)420MIB = MIB.add(DefMBBI->getOperand(2));421else422MIB = MIB.addReg(ActualSrcReg, getKillRegState(KillSrc));423if (UseVMV) {424const MCInstrDesc &Desc = DefMBBI->getDesc();425MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL426MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW427MIB.addImm(0); // tu, mu428MIB.addReg(RISCV::VL, RegState::Implicit);429MIB.addReg(RISCV::VTYPE, RegState::Implicit);430}431432// If we are copying reversely, we should decrease the encoding.433SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);434DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);435I += NumCopied;436}437}438439void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,440MachineBasicBlock::iterator MBBI,441const DebugLoc &DL, MCRegister DstReg,442MCRegister SrcReg, bool KillSrc) const {443const TargetRegisterInfo *TRI = STI.getRegisterInfo();444445if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {446BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)447.addReg(SrcReg, getKillRegState(KillSrc))448.addImm(0);449return;450}451452if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {453// Emit an ADDI for both parts of GPRPair.454BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),455TRI->getSubReg(DstReg, RISCV::sub_gpr_even))456.addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),457getKillRegState(KillSrc))458.addImm(0);459BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),460TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))461.addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),462getKillRegState(KillSrc))463.addImm(0);464return;465}466467// Handle copy from csr468if (RISCV::VCSRRegClass.contains(SrcReg) &&469RISCV::GPRRegClass.contains(DstReg)) {470BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)471.addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)472.addReg(RISCV::X0);473return;474}475476if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {477unsigned Opc;478if (STI.hasStdExtZfh()) {479Opc = RISCV::FSGNJ_H;480} else {481assert(STI.hasStdExtF() &&482(STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&483"Unexpected extensions");484// Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.485DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,486&RISCV::FPR32RegClass);487SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,488&RISCV::FPR32RegClass);489Opc = RISCV::FSGNJ_S;490}491BuildMI(MBB, MBBI, DL, get(Opc), DstReg)492.addReg(SrcReg, getKillRegState(KillSrc))493.addReg(SrcReg, getKillRegState(KillSrc));494return;495}496497if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {498BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)499.addReg(SrcReg, getKillRegState(KillSrc))500.addReg(SrcReg, getKillRegState(KillSrc));501return;502}503504if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {505BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)506.addReg(SrcReg, getKillRegState(KillSrc))507.addReg(SrcReg, getKillRegState(KillSrc));508return;509}510511if (RISCV::FPR32RegClass.contains(DstReg) &&512RISCV::GPRRegClass.contains(SrcReg)) {513BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)514.addReg(SrcReg, getKillRegState(KillSrc));515return;516}517518if (RISCV::GPRRegClass.contains(DstReg) &&519RISCV::FPR32RegClass.contains(SrcReg)) {520BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)521.addReg(SrcReg, getKillRegState(KillSrc));522return;523}524525if (RISCV::FPR64RegClass.contains(DstReg) &&526RISCV::GPRRegClass.contains(SrcReg)) {527assert(STI.getXLen() == 64 && "Unexpected GPR size");528BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)529.addReg(SrcReg, getKillRegState(KillSrc));530return;531}532533if (RISCV::GPRRegClass.contains(DstReg) &&534RISCV::FPR64RegClass.contains(SrcReg)) {535assert(STI.getXLen() == 64 && "Unexpected GPR size");536BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)537.addReg(SrcReg, getKillRegState(KillSrc));538return;539}540541// VR->VR copies.542static const TargetRegisterClass *RVVRegClasses[] = {543&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,544&RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass,545&RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass,546&RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass,547&RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass};548for (const auto &RegClass : RVVRegClasses) {549if (RegClass->contains(DstReg, SrcReg)) {550copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);551return;552}553}554555llvm_unreachable("Impossible reg-to-reg copy");556}557558void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,559MachineBasicBlock::iterator I,560Register SrcReg, bool IsKill, int FI,561const TargetRegisterClass *RC,562const TargetRegisterInfo *TRI,563Register VReg) const {564MachineFunction *MF = MBB.getParent();565MachineFrameInfo &MFI = MF->getFrameInfo();566567unsigned Opcode;568bool IsScalableVector = true;569if (RISCV::GPRRegClass.hasSubClassEq(RC)) {570Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?571RISCV::SW : RISCV::SD;572IsScalableVector = false;573} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {574Opcode = RISCV::PseudoRV32ZdinxSD;575IsScalableVector = false;576} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {577Opcode = RISCV::FSH;578IsScalableVector = false;579} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {580Opcode = RISCV::FSW;581IsScalableVector = false;582} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {583Opcode = RISCV::FSD;584IsScalableVector = false;585} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {586Opcode = RISCV::VS1R_V;587} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {588Opcode = RISCV::VS2R_V;589} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {590Opcode = RISCV::VS4R_V;591} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {592Opcode = RISCV::VS8R_V;593} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))594Opcode = RISCV::PseudoVSPILL2_M1;595else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))596Opcode = RISCV::PseudoVSPILL2_M2;597else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))598Opcode = RISCV::PseudoVSPILL2_M4;599else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))600Opcode = RISCV::PseudoVSPILL3_M1;601else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))602Opcode = RISCV::PseudoVSPILL3_M2;603else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))604Opcode = RISCV::PseudoVSPILL4_M1;605else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))606Opcode = RISCV::PseudoVSPILL4_M2;607else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))608Opcode = RISCV::PseudoVSPILL5_M1;609else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))610Opcode = RISCV::PseudoVSPILL6_M1;611else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))612Opcode = RISCV::PseudoVSPILL7_M1;613else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))614Opcode = RISCV::PseudoVSPILL8_M1;615else616llvm_unreachable("Can't store this register to stack slot");617618if (IsScalableVector) {619MachineMemOperand *MMO = MF->getMachineMemOperand(620MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,621LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI));622623MFI.setStackID(FI, TargetStackID::ScalableVector);624BuildMI(MBB, I, DebugLoc(), get(Opcode))625.addReg(SrcReg, getKillRegState(IsKill))626.addFrameIndex(FI)627.addMemOperand(MMO);628} else {629MachineMemOperand *MMO = MF->getMachineMemOperand(630MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,631MFI.getObjectSize(FI), MFI.getObjectAlign(FI));632633BuildMI(MBB, I, DebugLoc(), get(Opcode))634.addReg(SrcReg, getKillRegState(IsKill))635.addFrameIndex(FI)636.addImm(0)637.addMemOperand(MMO);638}639}640641void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,642MachineBasicBlock::iterator I,643Register DstReg, int FI,644const TargetRegisterClass *RC,645const TargetRegisterInfo *TRI,646Register VReg) const {647MachineFunction *MF = MBB.getParent();648MachineFrameInfo &MFI = MF->getFrameInfo();649650unsigned Opcode;651bool IsScalableVector = true;652if (RISCV::GPRRegClass.hasSubClassEq(RC)) {653Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?654RISCV::LW : RISCV::LD;655IsScalableVector = false;656} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {657Opcode = RISCV::PseudoRV32ZdinxLD;658IsScalableVector = false;659} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {660Opcode = RISCV::FLH;661IsScalableVector = false;662} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {663Opcode = RISCV::FLW;664IsScalableVector = false;665} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {666Opcode = RISCV::FLD;667IsScalableVector = false;668} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {669Opcode = RISCV::VL1RE8_V;670} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {671Opcode = RISCV::VL2RE8_V;672} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {673Opcode = RISCV::VL4RE8_V;674} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {675Opcode = RISCV::VL8RE8_V;676} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))677Opcode = RISCV::PseudoVRELOAD2_M1;678else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))679Opcode = RISCV::PseudoVRELOAD2_M2;680else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))681Opcode = RISCV::PseudoVRELOAD2_M4;682else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))683Opcode = RISCV::PseudoVRELOAD3_M1;684else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))685Opcode = RISCV::PseudoVRELOAD3_M2;686else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))687Opcode = RISCV::PseudoVRELOAD4_M1;688else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))689Opcode = RISCV::PseudoVRELOAD4_M2;690else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))691Opcode = RISCV::PseudoVRELOAD5_M1;692else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))693Opcode = RISCV::PseudoVRELOAD6_M1;694else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))695Opcode = RISCV::PseudoVRELOAD7_M1;696else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))697Opcode = RISCV::PseudoVRELOAD8_M1;698else699llvm_unreachable("Can't load this register from stack slot");700701if (IsScalableVector) {702MachineMemOperand *MMO = MF->getMachineMemOperand(703MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,704LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI));705706MFI.setStackID(FI, TargetStackID::ScalableVector);707BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)708.addFrameIndex(FI)709.addMemOperand(MMO);710} else {711MachineMemOperand *MMO = MF->getMachineMemOperand(712MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,713MFI.getObjectSize(FI), MFI.getObjectAlign(FI));714715BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)716.addFrameIndex(FI)717.addImm(0)718.addMemOperand(MMO);719}720}721722MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(723MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,724MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,725VirtRegMap *VRM) const {726const MachineFrameInfo &MFI = MF.getFrameInfo();727728// The below optimizations narrow the load so they are only valid for little729// endian.730// TODO: Support big endian by adding an offset into the frame object?731if (MF.getDataLayout().isBigEndian())732return nullptr;733734// Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.735if (Ops.size() != 1 || Ops[0] != 1)736return nullptr;737738unsigned LoadOpc;739switch (MI.getOpcode()) {740default:741if (RISCV::isSEXT_W(MI)) {742LoadOpc = RISCV::LW;743break;744}745if (RISCV::isZEXT_W(MI)) {746LoadOpc = RISCV::LWU;747break;748}749if (RISCV::isZEXT_B(MI)) {750LoadOpc = RISCV::LBU;751break;752}753return nullptr;754case RISCV::SEXT_H:755LoadOpc = RISCV::LH;756break;757case RISCV::SEXT_B:758LoadOpc = RISCV::LB;759break;760case RISCV::ZEXT_H_RV32:761case RISCV::ZEXT_H_RV64:762LoadOpc = RISCV::LHU;763break;764}765766MachineMemOperand *MMO = MF.getMachineMemOperand(767MachinePointerInfo::getFixedStack(MF, FrameIndex),768MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),769MFI.getObjectAlign(FrameIndex));770771Register DstReg = MI.getOperand(0).getReg();772return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),773DstReg)774.addFrameIndex(FrameIndex)775.addImm(0)776.addMemOperand(MMO);777}778779void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,780MachineBasicBlock::iterator MBBI,781const DebugLoc &DL, Register DstReg, uint64_t Val,782MachineInstr::MIFlag Flag, bool DstRenamable,783bool DstIsDead) const {784Register SrcReg = RISCV::X0;785786// For RV32, allow a sign or unsigned 32 bit value.787if (!STI.is64Bit() && !isInt<32>(Val)) {788// If have a uimm32 it will still fit in a register so we can allow it.789if (!isUInt<32>(Val))790report_fatal_error("Should only materialize 32-bit constants for RV32");791792// Sign extend for generateInstSeq.793Val = SignExtend64<32>(Val);794}795796RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);797assert(!Seq.empty());798799bool SrcRenamable = false;800unsigned Num = 0;801802for (const RISCVMatInt::Inst &Inst : Seq) {803bool LastItem = ++Num == Seq.size();804unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) |805getRenamableRegState(DstRenamable);806unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) |807getRenamableRegState(SrcRenamable);808switch (Inst.getOpndKind()) {809case RISCVMatInt::Imm:810BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))811.addReg(DstReg, RegState::Define | DstRegState)812.addImm(Inst.getImm())813.setMIFlag(Flag);814break;815case RISCVMatInt::RegX0:816BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))817.addReg(DstReg, RegState::Define | DstRegState)818.addReg(SrcReg, SrcRegState)819.addReg(RISCV::X0)820.setMIFlag(Flag);821break;822case RISCVMatInt::RegReg:823BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))824.addReg(DstReg, RegState::Define | DstRegState)825.addReg(SrcReg, SrcRegState)826.addReg(SrcReg, SrcRegState)827.setMIFlag(Flag);828break;829case RISCVMatInt::RegImm:830BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))831.addReg(DstReg, RegState::Define | DstRegState)832.addReg(SrcReg, SrcRegState)833.addImm(Inst.getImm())834.setMIFlag(Flag);835break;836}837838// Only the first instruction has X0 as its source.839SrcReg = DstReg;840SrcRenamable = DstRenamable;841}842}843844static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {845switch (Opc) {846default:847return RISCVCC::COND_INVALID;848case RISCV::CV_BEQIMM:849return RISCVCC::COND_EQ;850case RISCV::CV_BNEIMM:851return RISCVCC::COND_NE;852case RISCV::BEQ:853return RISCVCC::COND_EQ;854case RISCV::BNE:855return RISCVCC::COND_NE;856case RISCV::BLT:857return RISCVCC::COND_LT;858case RISCV::BGE:859return RISCVCC::COND_GE;860case RISCV::BLTU:861return RISCVCC::COND_LTU;862case RISCV::BGEU:863return RISCVCC::COND_GEU;864}865}866867// The contents of values added to Cond are not examined outside of868// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we869// push BranchOpcode, Reg1, Reg2.870static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,871SmallVectorImpl<MachineOperand> &Cond) {872// Block ends with fall-through condbranch.873assert(LastInst.getDesc().isConditionalBranch() &&874"Unknown conditional branch");875Target = LastInst.getOperand(2).getMBB();876unsigned CC = getCondFromBranchOpc(LastInst.getOpcode());877Cond.push_back(MachineOperand::CreateImm(CC));878Cond.push_back(LastInst.getOperand(0));879Cond.push_back(LastInst.getOperand(1));880}881882unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, bool Imm) {883switch (CC) {884default:885llvm_unreachable("Unknown condition code!");886case RISCVCC::COND_EQ:887return Imm ? RISCV::CV_BEQIMM : RISCV::BEQ;888case RISCVCC::COND_NE:889return Imm ? RISCV::CV_BNEIMM : RISCV::BNE;890case RISCVCC::COND_LT:891return RISCV::BLT;892case RISCVCC::COND_GE:893return RISCV::BGE;894case RISCVCC::COND_LTU:895return RISCV::BLTU;896case RISCVCC::COND_GEU:897return RISCV::BGEU;898}899}900901const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC,902bool Imm) const {903return get(RISCVCC::getBrCond(CC, Imm));904}905906RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {907switch (CC) {908default:909llvm_unreachable("Unrecognized conditional branch");910case RISCVCC::COND_EQ:911return RISCVCC::COND_NE;912case RISCVCC::COND_NE:913return RISCVCC::COND_EQ;914case RISCVCC::COND_LT:915return RISCVCC::COND_GE;916case RISCVCC::COND_GE:917return RISCVCC::COND_LT;918case RISCVCC::COND_LTU:919return RISCVCC::COND_GEU;920case RISCVCC::COND_GEU:921return RISCVCC::COND_LTU;922}923}924925bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,926MachineBasicBlock *&TBB,927MachineBasicBlock *&FBB,928SmallVectorImpl<MachineOperand> &Cond,929bool AllowModify) const {930TBB = FBB = nullptr;931Cond.clear();932933// If the block has no terminators, it just falls into the block after it.934MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();935if (I == MBB.end() || !isUnpredicatedTerminator(*I))936return false;937938// Count the number of terminators and find the first unconditional or939// indirect branch.940MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();941int NumTerminators = 0;942for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);943J++) {944NumTerminators++;945if (J->getDesc().isUnconditionalBranch() ||946J->getDesc().isIndirectBranch()) {947FirstUncondOrIndirectBr = J.getReverse();948}949}950951// If AllowModify is true, we can erase any terminators after952// FirstUncondOrIndirectBR.953if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {954while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {955std::next(FirstUncondOrIndirectBr)->eraseFromParent();956NumTerminators--;957}958I = FirstUncondOrIndirectBr;959}960961// We can't handle blocks that end in an indirect branch.962if (I->getDesc().isIndirectBranch())963return true;964965// We can't handle Generic branch opcodes from Global ISel.966if (I->isPreISelOpcode())967return true;968969// We can't handle blocks with more than 2 terminators.970if (NumTerminators > 2)971return true;972973// Handle a single unconditional branch.974if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {975TBB = getBranchDestBlock(*I);976return false;977}978979// Handle a single conditional branch.980if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {981parseCondBranch(*I, TBB, Cond);982return false;983}984985// Handle a conditional branch followed by an unconditional branch.986if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&987I->getDesc().isUnconditionalBranch()) {988parseCondBranch(*std::prev(I), TBB, Cond);989FBB = getBranchDestBlock(*I);990return false;991}992993// Otherwise, we can't handle this.994return true;995}996997unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,998int *BytesRemoved) const {999if (BytesRemoved)1000*BytesRemoved = 0;1001MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();1002if (I == MBB.end())1003return 0;10041005if (!I->getDesc().isUnconditionalBranch() &&1006!I->getDesc().isConditionalBranch())1007return 0;10081009// Remove the branch.1010if (BytesRemoved)1011*BytesRemoved += getInstSizeInBytes(*I);1012I->eraseFromParent();10131014I = MBB.end();10151016if (I == MBB.begin())1017return 1;1018--I;1019if (!I->getDesc().isConditionalBranch())1020return 1;10211022// Remove the branch.1023if (BytesRemoved)1024*BytesRemoved += getInstSizeInBytes(*I);1025I->eraseFromParent();1026return 2;1027}10281029// Inserts a branch into the end of the specific MachineBasicBlock, returning1030// the number of instructions inserted.1031unsigned RISCVInstrInfo::insertBranch(1032MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,1033ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {1034if (BytesAdded)1035*BytesAdded = 0;10361037// Shouldn't be a fall through.1038assert(TBB && "insertBranch must not be told to insert a fallthrough");1039assert((Cond.size() == 3 || Cond.size() == 0) &&1040"RISC-V branch conditions have two components!");10411042// Unconditional branch.1043if (Cond.empty()) {1044MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);1045if (BytesAdded)1046*BytesAdded += getInstSizeInBytes(MI);1047return 1;1048}10491050// Either a one or two-way conditional branch.1051auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());1052MachineInstr &CondMI = *BuildMI(&MBB, DL, getBrCond(CC, Cond[2].isImm()))1053.add(Cond[1])1054.add(Cond[2])1055.addMBB(TBB);1056if (BytesAdded)1057*BytesAdded += getInstSizeInBytes(CondMI);10581059// One-way conditional branch.1060if (!FBB)1061return 1;10621063// Two-way conditional branch.1064MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);1065if (BytesAdded)1066*BytesAdded += getInstSizeInBytes(MI);1067return 2;1068}10691070void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,1071MachineBasicBlock &DestBB,1072MachineBasicBlock &RestoreBB,1073const DebugLoc &DL, int64_t BrOffset,1074RegScavenger *RS) const {1075assert(RS && "RegScavenger required for long branching");1076assert(MBB.empty() &&1077"new block should be inserted for expanding unconditional branch");1078assert(MBB.pred_size() == 1);1079assert(RestoreBB.empty() &&1080"restore block should be inserted for restoring clobbered registers");10811082MachineFunction *MF = MBB.getParent();1083MachineRegisterInfo &MRI = MF->getRegInfo();1084RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();1085const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();10861087if (!isInt<32>(BrOffset))1088report_fatal_error(1089"Branch offsets outside of the signed 32-bit range not supported");10901091// FIXME: A virtual register must be used initially, as the register1092// scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch1093// uses the same workaround).1094Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass);1095auto II = MBB.end();1096// We may also update the jump target to RestoreBB later.1097MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))1098.addReg(ScratchReg, RegState::Define | RegState::Dead)1099.addMBB(&DestBB, RISCVII::MO_CALL);11001101RS->enterBasicBlockEnd(MBB);1102Register TmpGPR =1103RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),1104/*RestoreAfter=*/false, /*SpAdj=*/0,1105/*AllowSpill=*/false);1106if (TmpGPR != RISCV::NoRegister)1107RS->setRegUsed(TmpGPR);1108else {1109// The case when there is no scavenged register needs special handling.11101111// Pick s11 because it doesn't make a difference.1112TmpGPR = RISCV::X27;11131114int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();1115if (FrameIndex == -1)1116report_fatal_error("underestimated function size");11171118storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,1119&RISCV::GPRRegClass, TRI, Register());1120TRI->eliminateFrameIndex(std::prev(MI.getIterator()),1121/*SpAdj=*/0, /*FIOperandNum=*/1);11221123MI.getOperand(1).setMBB(&RestoreBB);11241125loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,1126&RISCV::GPRRegClass, TRI, Register());1127TRI->eliminateFrameIndex(RestoreBB.back(),1128/*SpAdj=*/0, /*FIOperandNum=*/1);1129}11301131MRI.replaceRegWith(ScratchReg, TmpGPR);1132MRI.clearVirtRegs();1133}11341135bool RISCVInstrInfo::reverseBranchCondition(1136SmallVectorImpl<MachineOperand> &Cond) const {1137assert((Cond.size() == 3) && "Invalid branch condition!");1138auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());1139Cond[0].setImm(getOppositeBranchCondition(CC));1140return false;1141}11421143bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {1144MachineBasicBlock *MBB = MI.getParent();1145MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();11461147MachineBasicBlock *TBB, *FBB;1148SmallVector<MachineOperand, 3> Cond;1149if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false))1150return false;11511152RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());1153assert(CC != RISCVCC::COND_INVALID);11541155if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)1156return false;11571158// For two constants C0 and C1 from1159// ```1160// li Y, C01161// li Z, C11162// ```1163// 1. if C1 = C0 + 11164// we can turn:1165// (a) blt Y, X -> bge X, Z1166// (b) bge Y, X -> blt X, Z1167//1168// 2. if C1 = C0 - 11169// we can turn:1170// (a) blt X, Y -> bge Z, X1171// (b) bge X, Y -> blt Z, X1172//1173// To make sure this optimization is really beneficial, we only1174// optimize for cases where Y had only one use (i.e. only used by the branch).11751176// Right now we only care about LI (i.e. ADDI x0, imm)1177auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {1178if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&1179MI->getOperand(1).getReg() == RISCV::X0) {1180Imm = MI->getOperand(2).getImm();1181return true;1182}1183return false;1184};1185// Either a load from immediate instruction or X0.1186auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {1187if (!Op.isReg())1188return false;1189Register Reg = Op.getReg();1190return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);1191};11921193MachineOperand &LHS = MI.getOperand(0);1194MachineOperand &RHS = MI.getOperand(1);1195// Try to find the register for constant Z; return1196// invalid register otherwise.1197auto searchConst = [&](int64_t C1) -> Register {1198MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();1199auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {1200int64_t Imm;1201return isLoadImm(&I, Imm) && Imm == C1 &&1202I.getOperand(0).getReg().isVirtual();1203});1204if (DefC1 != E)1205return DefC1->getOperand(0).getReg();12061207return Register();1208};12091210bool Modify = false;1211int64_t C0;1212if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {1213// Might be case 1.1214// Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need1215// to worry about unsigned overflow here)1216if (C0 < INT64_MAX)1217if (Register RegZ = searchConst(C0 + 1)) {1218reverseBranchCondition(Cond);1219Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);1220Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);1221// We might extend the live range of Z, clear its kill flag to1222// account for this.1223MRI.clearKillFlags(RegZ);1224Modify = true;1225}1226} else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) {1227// Might be case 2.1228// For unsigned cases, we don't want C1 to wrap back to UINT64_MAX1229// when C0 is zero.1230if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)1231if (Register RegZ = searchConst(C0 - 1)) {1232reverseBranchCondition(Cond);1233Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);1234Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);1235// We might extend the live range of Z, clear its kill flag to1236// account for this.1237MRI.clearKillFlags(RegZ);1238Modify = true;1239}1240}12411242if (!Modify)1243return false;12441245// Build the new branch and remove the old one.1246BuildMI(*MBB, MI, MI.getDebugLoc(),1247getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm())))1248.add(Cond[1])1249.add(Cond[2])1250.addMBB(TBB);1251MI.eraseFromParent();12521253return true;1254}12551256MachineBasicBlock *1257RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {1258assert(MI.getDesc().isBranch() && "Unexpected opcode!");1259// The branch target is always the last operand.1260int NumOp = MI.getNumExplicitOperands();1261return MI.getOperand(NumOp - 1).getMBB();1262}12631264bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,1265int64_t BrOffset) const {1266unsigned XLen = STI.getXLen();1267// Ideally we could determine the supported branch offset from the1268// RISCVII::FormMask, but this can't be used for Pseudo instructions like1269// PseudoBR.1270switch (BranchOp) {1271default:1272llvm_unreachable("Unexpected opcode!");1273case RISCV::BEQ:1274case RISCV::BNE:1275case RISCV::BLT:1276case RISCV::BGE:1277case RISCV::BLTU:1278case RISCV::BGEU:1279case RISCV::CV_BEQIMM:1280case RISCV::CV_BNEIMM:1281return isIntN(13, BrOffset);1282case RISCV::JAL:1283case RISCV::PseudoBR:1284return isIntN(21, BrOffset);1285case RISCV::PseudoJump:1286return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));1287}1288}12891290// If the operation has a predicated pseudo instruction, return the pseudo1291// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.1292// TODO: Support more operations.1293unsigned getPredicatedOpcode(unsigned Opcode) {1294switch (Opcode) {1295case RISCV::ADD: return RISCV::PseudoCCADD; break;1296case RISCV::SUB: return RISCV::PseudoCCSUB; break;1297case RISCV::SLL: return RISCV::PseudoCCSLL; break;1298case RISCV::SRL: return RISCV::PseudoCCSRL; break;1299case RISCV::SRA: return RISCV::PseudoCCSRA; break;1300case RISCV::AND: return RISCV::PseudoCCAND; break;1301case RISCV::OR: return RISCV::PseudoCCOR; break;1302case RISCV::XOR: return RISCV::PseudoCCXOR; break;13031304case RISCV::ADDI: return RISCV::PseudoCCADDI; break;1305case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;1306case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;1307case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;1308case RISCV::ANDI: return RISCV::PseudoCCANDI; break;1309case RISCV::ORI: return RISCV::PseudoCCORI; break;1310case RISCV::XORI: return RISCV::PseudoCCXORI; break;13111312case RISCV::ADDW: return RISCV::PseudoCCADDW; break;1313case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;1314case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;1315case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;1316case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;13171318case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;1319case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;1320case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;1321case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;13221323case RISCV::ANDN: return RISCV::PseudoCCANDN; break;1324case RISCV::ORN: return RISCV::PseudoCCORN; break;1325case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;1326}13271328return RISCV::INSTRUCTION_LIST_END;1329}13301331/// Identify instructions that can be folded into a CCMOV instruction, and1332/// return the defining instruction.1333static MachineInstr *canFoldAsPredicatedOp(Register Reg,1334const MachineRegisterInfo &MRI,1335const TargetInstrInfo *TII) {1336if (!Reg.isVirtual())1337return nullptr;1338if (!MRI.hasOneNonDBGUse(Reg))1339return nullptr;1340MachineInstr *MI = MRI.getVRegDef(Reg);1341if (!MI)1342return nullptr;1343// Check if MI can be predicated and folded into the CCMOV.1344if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)1345return nullptr;1346// Don't predicate li idiom.1347if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&1348MI->getOperand(1).getReg() == RISCV::X0)1349return nullptr;1350// Check if MI has any other defs or physreg uses.1351for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {1352// Reject frame index operands, PEI can't handle the predicated pseudos.1353if (MO.isFI() || MO.isCPI() || MO.isJTI())1354return nullptr;1355if (!MO.isReg())1356continue;1357// MI can't have any tied operands, that would conflict with predication.1358if (MO.isTied())1359return nullptr;1360if (MO.isDef())1361return nullptr;1362// Allow constant physregs.1363if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg()))1364return nullptr;1365}1366bool DontMoveAcrossStores = true;1367if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))1368return nullptr;1369return MI;1370}13711372bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,1373SmallVectorImpl<MachineOperand> &Cond,1374unsigned &TrueOp, unsigned &FalseOp,1375bool &Optimizable) const {1376assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&1377"Unknown select instruction");1378// CCMOV operands:1379// 0: Def.1380// 1: LHS of compare.1381// 2: RHS of compare.1382// 3: Condition code.1383// 4: False use.1384// 5: True use.1385TrueOp = 5;1386FalseOp = 4;1387Cond.push_back(MI.getOperand(1));1388Cond.push_back(MI.getOperand(2));1389Cond.push_back(MI.getOperand(3));1390// We can only fold when we support short forward branch opt.1391Optimizable = STI.hasShortForwardBranchOpt();1392return false;1393}13941395MachineInstr *1396RISCVInstrInfo::optimizeSelect(MachineInstr &MI,1397SmallPtrSetImpl<MachineInstr *> &SeenMIs,1398bool PreferFalse) const {1399assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&1400"Unknown select instruction");1401if (!STI.hasShortForwardBranchOpt())1402return nullptr;14031404MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();1405MachineInstr *DefMI =1406canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);1407bool Invert = !DefMI;1408if (!DefMI)1409DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);1410if (!DefMI)1411return nullptr;14121413// Find new register class to use.1414MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);1415Register DestReg = MI.getOperand(0).getReg();1416const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());1417if (!MRI.constrainRegClass(DestReg, PreviousClass))1418return nullptr;14191420unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());1421assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");14221423// Create a new predicated version of DefMI.1424MachineInstrBuilder NewMI =1425BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);14261427// Copy the condition portion.1428NewMI.add(MI.getOperand(1));1429NewMI.add(MI.getOperand(2));14301431// Add condition code, inverting if necessary.1432auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());1433if (Invert)1434CC = RISCVCC::getOppositeBranchCondition(CC);1435NewMI.addImm(CC);14361437// Copy the false register.1438NewMI.add(FalseReg);14391440// Copy all the DefMI operands.1441const MCInstrDesc &DefDesc = DefMI->getDesc();1442for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)1443NewMI.add(DefMI->getOperand(i));14441445// Update SeenMIs set: register newly created MI and erase removed DefMI.1446SeenMIs.insert(NewMI);1447SeenMIs.erase(DefMI);14481449// If MI is inside a loop, and DefMI is outside the loop, then kill flags on1450// DefMI would be invalid when tranferred inside the loop. Checking for a1451// loop is expensive, but at least remove kill flags if they are in different1452// BBs.1453if (DefMI->getParent() != MI.getParent())1454NewMI->clearKillInfo();14551456// The caller will erase MI, but not DefMI.1457DefMI->eraseFromParent();1458return NewMI;1459}14601461unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {1462if (MI.isMetaInstruction())1463return 0;14641465unsigned Opcode = MI.getOpcode();14661467if (Opcode == TargetOpcode::INLINEASM ||1468Opcode == TargetOpcode::INLINEASM_BR) {1469const MachineFunction &MF = *MI.getParent()->getParent();1470return getInlineAsmLength(MI.getOperand(0).getSymbolName(),1471*MF.getTarget().getMCAsmInfo());1472}14731474if (!MI.memoperands_empty()) {1475MachineMemOperand *MMO = *(MI.memoperands_begin());1476if (STI.hasStdExtZihintntl() && MMO->isNonTemporal()) {1477if (STI.hasStdExtCOrZca() && STI.enableRVCHintInstrs()) {1478if (isCompressibleInst(MI, STI))1479return 4; // c.ntl.all + c.load/c.store1480return 6; // c.ntl.all + load/store1481}1482return 8; // ntl.all + load/store1483}1484}14851486if (Opcode == TargetOpcode::BUNDLE)1487return getInstBundleLength(MI);14881489if (MI.getParent() && MI.getParent()->getParent()) {1490if (isCompressibleInst(MI, STI))1491return 2;1492}14931494switch (Opcode) {1495case TargetOpcode::STACKMAP:1496// The upper bound for a stackmap intrinsic is the full length of its shadow1497return StackMapOpers(&MI).getNumPatchBytes();1498case TargetOpcode::PATCHPOINT:1499// The size of the patchpoint intrinsic is the number of bytes requested1500return PatchPointOpers(&MI).getNumPatchBytes();1501case TargetOpcode::STATEPOINT: {1502// The size of the statepoint intrinsic is the number of bytes requested1503unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes();1504// No patch bytes means at most a PseudoCall is emitted1505return std::max(NumBytes, 8U);1506}1507default:1508return get(Opcode).getSize();1509}1510}15111512unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {1513unsigned Size = 0;1514MachineBasicBlock::const_instr_iterator I = MI.getIterator();1515MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();1516while (++I != E && I->isInsideBundle()) {1517assert(!I->isBundle() && "No nested bundle!");1518Size += getInstSizeInBytes(*I);1519}1520return Size;1521}15221523bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {1524const unsigned Opcode = MI.getOpcode();1525switch (Opcode) {1526default:1527break;1528case RISCV::FSGNJ_D:1529case RISCV::FSGNJ_S:1530case RISCV::FSGNJ_H:1531case RISCV::FSGNJ_D_INX:1532case RISCV::FSGNJ_D_IN32X:1533case RISCV::FSGNJ_S_INX:1534case RISCV::FSGNJ_H_INX:1535// The canonical floating-point move is fsgnj rd, rs, rs.1536return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&1537MI.getOperand(1).getReg() == MI.getOperand(2).getReg();1538case RISCV::ADDI:1539case RISCV::ORI:1540case RISCV::XORI:1541return (MI.getOperand(1).isReg() &&1542MI.getOperand(1).getReg() == RISCV::X0) ||1543(MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);1544}1545return MI.isAsCheapAsAMove();1546}15471548std::optional<DestSourcePair>1549RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {1550if (MI.isMoveReg())1551return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};1552switch (MI.getOpcode()) {1553default:1554break;1555case RISCV::ADDI:1556// Operand 1 can be a frameindex but callers expect registers1557if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&1558MI.getOperand(2).getImm() == 0)1559return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};1560break;1561case RISCV::FSGNJ_D:1562case RISCV::FSGNJ_S:1563case RISCV::FSGNJ_H:1564case RISCV::FSGNJ_D_INX:1565case RISCV::FSGNJ_D_IN32X:1566case RISCV::FSGNJ_S_INX:1567case RISCV::FSGNJ_H_INX:1568// The canonical floating-point move is fsgnj rd, rs, rs.1569if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&1570MI.getOperand(1).getReg() == MI.getOperand(2).getReg())1571return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};1572break;1573}1574return std::nullopt;1575}15761577MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {1578if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {1579// The option is unused. Choose Local strategy only for in-order cores. When1580// scheduling model is unspecified, use MinInstrCount strategy as more1581// generic one.1582const auto &SchedModel = STI.getSchedModel();1583return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())1584? MachineTraceStrategy::TS_MinInstrCount1585: MachineTraceStrategy::TS_Local;1586}1587// The strategy was forced by the option.1588return ForceMachineCombinerStrategy;1589}15901591void RISCVInstrInfo::finalizeInsInstrs(1592MachineInstr &Root, unsigned &Pattern,1593SmallVectorImpl<MachineInstr *> &InsInstrs) const {1594int16_t FrmOpIdx =1595RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);1596if (FrmOpIdx < 0) {1597assert(all_of(InsInstrs,1598[](MachineInstr *MI) {1599return RISCV::getNamedOperandIdx(MI->getOpcode(),1600RISCV::OpName::frm) < 0;1601}) &&1602"New instructions require FRM whereas the old one does not have it");1603return;1604}16051606const MachineOperand &FRM = Root.getOperand(FrmOpIdx);1607MachineFunction &MF = *Root.getMF();16081609for (auto *NewMI : InsInstrs) {1610// We'd already added the FRM operand.1611if (static_cast<unsigned>(RISCV::getNamedOperandIdx(1612NewMI->getOpcode(), RISCV::OpName::frm)) != NewMI->getNumOperands())1613continue;1614MachineInstrBuilder MIB(MF, NewMI);1615MIB.add(FRM);1616if (FRM.getImm() == RISCVFPRndMode::DYN)1617MIB.addUse(RISCV::FRM, RegState::Implicit);1618}1619}16201621static bool isFADD(unsigned Opc) {1622switch (Opc) {1623default:1624return false;1625case RISCV::FADD_H:1626case RISCV::FADD_S:1627case RISCV::FADD_D:1628return true;1629}1630}16311632static bool isFSUB(unsigned Opc) {1633switch (Opc) {1634default:1635return false;1636case RISCV::FSUB_H:1637case RISCV::FSUB_S:1638case RISCV::FSUB_D:1639return true;1640}1641}16421643static bool isFMUL(unsigned Opc) {1644switch (Opc) {1645default:1646return false;1647case RISCV::FMUL_H:1648case RISCV::FMUL_S:1649case RISCV::FMUL_D:1650return true;1651}1652}16531654bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst,1655bool Invert) const {1656#define OPCODE_LMUL_CASE(OPC) \1657case RISCV::OPC##_M1: \1658case RISCV::OPC##_M2: \1659case RISCV::OPC##_M4: \1660case RISCV::OPC##_M8: \1661case RISCV::OPC##_MF2: \1662case RISCV::OPC##_MF4: \1663case RISCV::OPC##_MF816641665#define OPCODE_LMUL_MASK_CASE(OPC) \1666case RISCV::OPC##_M1_MASK: \1667case RISCV::OPC##_M2_MASK: \1668case RISCV::OPC##_M4_MASK: \1669case RISCV::OPC##_M8_MASK: \1670case RISCV::OPC##_MF2_MASK: \1671case RISCV::OPC##_MF4_MASK: \1672case RISCV::OPC##_MF8_MASK16731674unsigned Opcode = Inst.getOpcode();1675if (Invert) {1676if (auto InvOpcode = getInverseOpcode(Opcode))1677Opcode = *InvOpcode;1678else1679return false;1680}16811682// clang-format off1683switch (Opcode) {1684default:1685return false;1686OPCODE_LMUL_CASE(PseudoVADD_VV):1687OPCODE_LMUL_MASK_CASE(PseudoVADD_VV):1688OPCODE_LMUL_CASE(PseudoVMUL_VV):1689OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV):1690return true;1691}1692// clang-format on16931694#undef OPCODE_LMUL_MASK_CASE1695#undef OPCODE_LMUL_CASE1696}16971698bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root,1699const MachineInstr &Prev) const {1700if (!areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()))1701return false;17021703assert(Root.getMF() == Prev.getMF());1704const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo();1705const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();17061707// Make sure vtype operands are also the same.1708const MCInstrDesc &Desc = get(Root.getOpcode());1709const uint64_t TSFlags = Desc.TSFlags;17101711auto checkImmOperand = [&](unsigned OpIdx) {1712return Root.getOperand(OpIdx).getImm() == Prev.getOperand(OpIdx).getImm();1713};17141715auto checkRegOperand = [&](unsigned OpIdx) {1716return Root.getOperand(OpIdx).getReg() == Prev.getOperand(OpIdx).getReg();1717};17181719// PassThru1720// TODO: Potentially we can loosen the condition to consider Root to be1721// associable with Prev if Root has NoReg as passthru. In which case we1722// also need to loosen the condition on vector policies between these.1723if (!checkRegOperand(1))1724return false;17251726// SEW1727if (RISCVII::hasSEWOp(TSFlags) &&1728!checkImmOperand(RISCVII::getSEWOpNum(Desc)))1729return false;17301731// Mask1732if (RISCVII::usesMaskPolicy(TSFlags)) {1733const MachineBasicBlock *MBB = Root.getParent();1734const MachineBasicBlock::const_reverse_iterator It1(&Root);1735const MachineBasicBlock::const_reverse_iterator It2(&Prev);1736Register MI1VReg;17371738bool SeenMI2 = false;1739for (auto End = MBB->rend(), It = It1; It != End; ++It) {1740if (It == It2) {1741SeenMI2 = true;1742if (!MI1VReg.isValid())1743// There is no V0 def between Root and Prev; they're sharing the1744// same V0.1745break;1746}17471748if (It->modifiesRegister(RISCV::V0, TRI)) {1749Register SrcReg = It->getOperand(1).getReg();1750// If it's not VReg it'll be more difficult to track its defs, so1751// bailing out here just to be safe.1752if (!SrcReg.isVirtual())1753return false;17541755if (!MI1VReg.isValid()) {1756// This is the V0 def for Root.1757MI1VReg = SrcReg;1758continue;1759}17601761// Some random mask updates.1762if (!SeenMI2)1763continue;17641765// This is the V0 def for Prev; check if it's the same as that of1766// Root.1767if (MI1VReg != SrcReg)1768return false;1769else1770break;1771}1772}17731774// If we haven't encountered Prev, it's likely that this function was1775// called in a wrong way (e.g. Root is before Prev).1776assert(SeenMI2 && "Prev is expected to appear before Root");1777}17781779// Tail / Mask policies1780if (RISCVII::hasVecPolicyOp(TSFlags) &&1781!checkImmOperand(RISCVII::getVecPolicyOpNum(Desc)))1782return false;17831784// VL1785if (RISCVII::hasVLOp(TSFlags)) {1786unsigned OpIdx = RISCVII::getVLOpNum(Desc);1787const MachineOperand &Op1 = Root.getOperand(OpIdx);1788const MachineOperand &Op2 = Prev.getOperand(OpIdx);1789if (Op1.getType() != Op2.getType())1790return false;1791switch (Op1.getType()) {1792case MachineOperand::MO_Register:1793if (Op1.getReg() != Op2.getReg())1794return false;1795break;1796case MachineOperand::MO_Immediate:1797if (Op1.getImm() != Op2.getImm())1798return false;1799break;1800default:1801llvm_unreachable("Unrecognized VL operand type");1802}1803}18041805// Rounding modes1806if (RISCVII::hasRoundModeOp(TSFlags) &&1807!checkImmOperand(RISCVII::getVLOpNum(Desc) - 1))1808return false;18091810return true;1811}18121813// Most of our RVV pseudos have passthru operand, so the real operands1814// start from index = 2.1815bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst,1816bool &Commuted) const {1817const MachineBasicBlock *MBB = Inst.getParent();1818const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();1819assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) &&1820"Expect the present of passthrough operand.");1821MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());1822MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(3).getReg());18231824// If only one operand has the same or inverse opcode and it's the second1825// source operand, the operands must be commuted.1826Commuted = !areRVVInstsReassociable(Inst, *MI1) &&1827areRVVInstsReassociable(Inst, *MI2);1828if (Commuted)1829std::swap(MI1, MI2);18301831return areRVVInstsReassociable(Inst, *MI1) &&1832(isVectorAssociativeAndCommutative(*MI1) ||1833isVectorAssociativeAndCommutative(*MI1, /* Invert */ true)) &&1834hasReassociableOperands(*MI1, MBB) &&1835MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());1836}18371838bool RISCVInstrInfo::hasReassociableOperands(1839const MachineInstr &Inst, const MachineBasicBlock *MBB) const {1840if (!isVectorAssociativeAndCommutative(Inst) &&1841!isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))1842return TargetInstrInfo::hasReassociableOperands(Inst, MBB);18431844const MachineOperand &Op1 = Inst.getOperand(2);1845const MachineOperand &Op2 = Inst.getOperand(3);1846const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();18471848// We need virtual register definitions for the operands that we will1849// reassociate.1850MachineInstr *MI1 = nullptr;1851MachineInstr *MI2 = nullptr;1852if (Op1.isReg() && Op1.getReg().isVirtual())1853MI1 = MRI.getUniqueVRegDef(Op1.getReg());1854if (Op2.isReg() && Op2.getReg().isVirtual())1855MI2 = MRI.getUniqueVRegDef(Op2.getReg());18561857// And at least one operand must be defined in MBB.1858return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB);1859}18601861void RISCVInstrInfo::getReassociateOperandIndices(1862const MachineInstr &Root, unsigned Pattern,1863std::array<unsigned, 5> &OperandIndices) const {1864TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices);1865if (RISCV::getRVVMCOpcode(Root.getOpcode())) {1866// Skip the passthrough operand, so increment all indices by one.1867for (unsigned I = 0; I < 5; ++I)1868++OperandIndices[I];1869}1870}18711872bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,1873bool &Commuted) const {1874if (isVectorAssociativeAndCommutative(Inst) ||1875isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))1876return hasReassociableVectorSibling(Inst, Commuted);18771878if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))1879return false;18801881const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();1882unsigned OperandIdx = Commuted ? 2 : 1;1883const MachineInstr &Sibling =1884*MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg());18851886int16_t InstFrmOpIdx =1887RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);1888int16_t SiblingFrmOpIdx =1889RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);18901891return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||1892RISCV::hasEqualFRM(Inst, Sibling);1893}18941895bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,1896bool Invert) const {1897if (isVectorAssociativeAndCommutative(Inst, Invert))1898return true;18991900unsigned Opc = Inst.getOpcode();1901if (Invert) {1902auto InverseOpcode = getInverseOpcode(Opc);1903if (!InverseOpcode)1904return false;1905Opc = *InverseOpcode;1906}19071908if (isFADD(Opc) || isFMUL(Opc))1909return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&1910Inst.getFlag(MachineInstr::MIFlag::FmNsz);19111912switch (Opc) {1913default:1914return false;1915case RISCV::ADD:1916case RISCV::ADDW:1917case RISCV::AND:1918case RISCV::OR:1919case RISCV::XOR:1920// From RISC-V ISA spec, if both the high and low bits of the same product1921// are required, then the recommended code sequence is:1922//1923// MULH[[S]U] rdh, rs1, rs21924// MUL rdl, rs1, rs21925// (source register specifiers must be in same order and rdh cannot be the1926// same as rs1 or rs2)1927//1928// Microarchitectures can then fuse these into a single multiply operation1929// instead of performing two separate multiplies.1930// MachineCombiner may reassociate MUL operands and lose the fusion1931// opportunity.1932case RISCV::MUL:1933case RISCV::MULW:1934case RISCV::MIN:1935case RISCV::MINU:1936case RISCV::MAX:1937case RISCV::MAXU:1938case RISCV::FMIN_H:1939case RISCV::FMIN_S:1940case RISCV::FMIN_D:1941case RISCV::FMAX_H:1942case RISCV::FMAX_S:1943case RISCV::FMAX_D:1944return true;1945}19461947return false;1948}19491950std::optional<unsigned>1951RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {1952#define RVV_OPC_LMUL_CASE(OPC, INV) \1953case RISCV::OPC##_M1: \1954return RISCV::INV##_M1; \1955case RISCV::OPC##_M2: \1956return RISCV::INV##_M2; \1957case RISCV::OPC##_M4: \1958return RISCV::INV##_M4; \1959case RISCV::OPC##_M8: \1960return RISCV::INV##_M8; \1961case RISCV::OPC##_MF2: \1962return RISCV::INV##_MF2; \1963case RISCV::OPC##_MF4: \1964return RISCV::INV##_MF4; \1965case RISCV::OPC##_MF8: \1966return RISCV::INV##_MF819671968#define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \1969case RISCV::OPC##_M1_MASK: \1970return RISCV::INV##_M1_MASK; \1971case RISCV::OPC##_M2_MASK: \1972return RISCV::INV##_M2_MASK; \1973case RISCV::OPC##_M4_MASK: \1974return RISCV::INV##_M4_MASK; \1975case RISCV::OPC##_M8_MASK: \1976return RISCV::INV##_M8_MASK; \1977case RISCV::OPC##_MF2_MASK: \1978return RISCV::INV##_MF2_MASK; \1979case RISCV::OPC##_MF4_MASK: \1980return RISCV::INV##_MF4_MASK; \1981case RISCV::OPC##_MF8_MASK: \1982return RISCV::INV##_MF8_MASK19831984switch (Opcode) {1985default:1986return std::nullopt;1987case RISCV::FADD_H:1988return RISCV::FSUB_H;1989case RISCV::FADD_S:1990return RISCV::FSUB_S;1991case RISCV::FADD_D:1992return RISCV::FSUB_D;1993case RISCV::FSUB_H:1994return RISCV::FADD_H;1995case RISCV::FSUB_S:1996return RISCV::FADD_S;1997case RISCV::FSUB_D:1998return RISCV::FADD_D;1999case RISCV::ADD:2000return RISCV::SUB;2001case RISCV::SUB:2002return RISCV::ADD;2003case RISCV::ADDW:2004return RISCV::SUBW;2005case RISCV::SUBW:2006return RISCV::ADDW;2007// clang-format off2008RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV);2009RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV);2010RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV);2011RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV);2012// clang-format on2013}20142015#undef RVV_OPC_LMUL_MASK_CASE2016#undef RVV_OPC_LMUL_CASE2017}20182019static bool canCombineFPFusedMultiply(const MachineInstr &Root,2020const MachineOperand &MO,2021bool DoRegPressureReduce) {2022if (!MO.isReg() || !MO.getReg().isVirtual())2023return false;2024const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();2025MachineInstr *MI = MRI.getVRegDef(MO.getReg());2026if (!MI || !isFMUL(MI->getOpcode()))2027return false;20282029if (!Root.getFlag(MachineInstr::MIFlag::FmContract) ||2030!MI->getFlag(MachineInstr::MIFlag::FmContract))2031return false;20322033// Try combining even if fmul has more than one use as it eliminates2034// dependency between fadd(fsub) and fmul. However, it can extend liveranges2035// for fmul operands, so reject the transformation in register pressure2036// reduction mode.2037if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))2038return false;20392040// Do not combine instructions from different basic blocks.2041if (Root.getParent() != MI->getParent())2042return false;2043return RISCV::hasEqualFRM(Root, *MI);2044}20452046static bool getFPFusedMultiplyPatterns(MachineInstr &Root,2047SmallVectorImpl<unsigned> &Patterns,2048bool DoRegPressureReduce) {2049unsigned Opc = Root.getOpcode();2050bool IsFAdd = isFADD(Opc);2051if (!IsFAdd && !isFSUB(Opc))2052return false;2053bool Added = false;2054if (canCombineFPFusedMultiply(Root, Root.getOperand(1),2055DoRegPressureReduce)) {2056Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX2057: RISCVMachineCombinerPattern::FMSUB);2058Added = true;2059}2060if (canCombineFPFusedMultiply(Root, Root.getOperand(2),2061DoRegPressureReduce)) {2062Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA2063: RISCVMachineCombinerPattern::FNMSUB);2064Added = true;2065}2066return Added;2067}20682069static bool getFPPatterns(MachineInstr &Root,2070SmallVectorImpl<unsigned> &Patterns,2071bool DoRegPressureReduce) {2072return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);2073}20742075/// Utility routine that checks if \param MO is defined by an2076/// \param CombineOpc instruction in the basic block \param MBB2077static const MachineInstr *canCombine(const MachineBasicBlock &MBB,2078const MachineOperand &MO,2079unsigned CombineOpc) {2080const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();2081const MachineInstr *MI = nullptr;20822083if (MO.isReg() && MO.getReg().isVirtual())2084MI = MRI.getUniqueVRegDef(MO.getReg());2085// And it needs to be in the trace (otherwise, it won't have a depth).2086if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)2087return nullptr;2088// Must only used by the user we combine with.2089if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))2090return nullptr;20912092return MI;2093}20942095/// Utility routine that checks if \param MO is defined by a SLLI in \param2096/// MBB that can be combined by splitting across 2 SHXADD instructions. The2097/// first SHXADD shift amount is given by \param OuterShiftAmt.2098static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB,2099const MachineOperand &MO,2100unsigned OuterShiftAmt) {2101const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI);2102if (!ShiftMI)2103return false;21042105unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();2106if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3)2107return false;21082109return true;2110}21112112// Returns the shift amount from a SHXADD instruction. Returns 0 if the2113// instruction is not a SHXADD.2114static unsigned getSHXADDShiftAmount(unsigned Opc) {2115switch (Opc) {2116default:2117return 0;2118case RISCV::SH1ADD:2119return 1;2120case RISCV::SH2ADD:2121return 2;2122case RISCV::SH3ADD:2123return 3;2124}2125}21262127// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into2128// (sh3add (sh2add Y, Z), X).2129static bool getSHXADDPatterns(const MachineInstr &Root,2130SmallVectorImpl<unsigned> &Patterns) {2131unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode());2132if (!ShiftAmt)2133return false;21342135const MachineBasicBlock &MBB = *Root.getParent();21362137const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD);2138if (!AddMI)2139return false;21402141bool Found = false;2142if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) {2143Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1);2144Found = true;2145}2146if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) {2147Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2);2148Found = true;2149}21502151return Found;2152}21532154CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const {2155switch (Pattern) {2156case RISCVMachineCombinerPattern::FMADD_AX:2157case RISCVMachineCombinerPattern::FMADD_XA:2158case RISCVMachineCombinerPattern::FMSUB:2159case RISCVMachineCombinerPattern::FNMSUB:2160return CombinerObjective::MustReduceDepth;2161default:2162return TargetInstrInfo::getCombinerObjective(Pattern);2163}2164}21652166bool RISCVInstrInfo::getMachineCombinerPatterns(2167MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,2168bool DoRegPressureReduce) const {21692170if (getFPPatterns(Root, Patterns, DoRegPressureReduce))2171return true;21722173if (getSHXADDPatterns(Root, Patterns))2174return true;21752176return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,2177DoRegPressureReduce);2178}21792180static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {2181switch (RootOpc) {2182default:2183llvm_unreachable("Unexpected opcode");2184case RISCV::FADD_H:2185return RISCV::FMADD_H;2186case RISCV::FADD_S:2187return RISCV::FMADD_S;2188case RISCV::FADD_D:2189return RISCV::FMADD_D;2190case RISCV::FSUB_H:2191return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H2192: RISCV::FNMSUB_H;2193case RISCV::FSUB_S:2194return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S2195: RISCV::FNMSUB_S;2196case RISCV::FSUB_D:2197return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D2198: RISCV::FNMSUB_D;2199}2200}22012202static unsigned getAddendOperandIdx(unsigned Pattern) {2203switch (Pattern) {2204default:2205llvm_unreachable("Unexpected pattern");2206case RISCVMachineCombinerPattern::FMADD_AX:2207case RISCVMachineCombinerPattern::FMSUB:2208return 2;2209case RISCVMachineCombinerPattern::FMADD_XA:2210case RISCVMachineCombinerPattern::FNMSUB:2211return 1;2212}2213}22142215static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,2216unsigned Pattern,2217SmallVectorImpl<MachineInstr *> &InsInstrs,2218SmallVectorImpl<MachineInstr *> &DelInstrs) {2219MachineFunction *MF = Root.getMF();2220MachineRegisterInfo &MRI = MF->getRegInfo();2221const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();22222223MachineOperand &Mul1 = Prev.getOperand(1);2224MachineOperand &Mul2 = Prev.getOperand(2);2225MachineOperand &Dst = Root.getOperand(0);2226MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern));22272228Register DstReg = Dst.getReg();2229unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);2230uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();2231DebugLoc MergedLoc =2232DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());22332234bool Mul1IsKill = Mul1.isKill();2235bool Mul2IsKill = Mul2.isKill();2236bool AddendIsKill = Addend.isKill();22372238// We need to clear kill flags since we may be extending the live range past2239// a kill. If the mul had kill flags, we can preserve those since we know2240// where the previous range stopped.2241MRI.clearKillFlags(Mul1.getReg());2242MRI.clearKillFlags(Mul2.getReg());22432244MachineInstrBuilder MIB =2245BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)2246.addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))2247.addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))2248.addReg(Addend.getReg(), getKillRegState(AddendIsKill))2249.setMIFlags(IntersectedFlags);22502251InsInstrs.push_back(MIB);2252if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))2253DelInstrs.push_back(&Prev);2254DelInstrs.push_back(&Root);2255}22562257// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to2258// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two2259// shXadd instructions. The outer shXadd keeps its original opcode.2260static void2261genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,2262SmallVectorImpl<MachineInstr *> &InsInstrs,2263SmallVectorImpl<MachineInstr *> &DelInstrs,2264DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {2265MachineFunction *MF = Root.getMF();2266MachineRegisterInfo &MRI = MF->getRegInfo();2267const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();22682269unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode());2270assert(OuterShiftAmt != 0 && "Unexpected opcode");22712272MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());2273MachineInstr *ShiftMI =2274MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg());22752276unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();2277assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");22782279unsigned InnerOpc;2280switch (InnerShiftAmt - OuterShiftAmt) {2281default:2282llvm_unreachable("Unexpected shift amount");2283case 0:2284InnerOpc = RISCV::ADD;2285break;2286case 1:2287InnerOpc = RISCV::SH1ADD;2288break;2289case 2:2290InnerOpc = RISCV::SH2ADD;2291break;2292case 3:2293InnerOpc = RISCV::SH3ADD;2294break;2295}22962297const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx);2298const MachineOperand &Y = ShiftMI->getOperand(1);2299const MachineOperand &Z = Root.getOperand(1);23002301Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass);23022303auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR)2304.addReg(Y.getReg(), getKillRegState(Y.isKill()))2305.addReg(Z.getReg(), getKillRegState(Z.isKill()));2306auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()),2307Root.getOperand(0).getReg())2308.addReg(NewVR, RegState::Kill)2309.addReg(X.getReg(), getKillRegState(X.isKill()));23102311InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));2312InsInstrs.push_back(MIB1);2313InsInstrs.push_back(MIB2);2314DelInstrs.push_back(ShiftMI);2315DelInstrs.push_back(AddMI);2316DelInstrs.push_back(&Root);2317}23182319void RISCVInstrInfo::genAlternativeCodeSequence(2320MachineInstr &Root, unsigned Pattern,2321SmallVectorImpl<MachineInstr *> &InsInstrs,2322SmallVectorImpl<MachineInstr *> &DelInstrs,2323DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {2324MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();2325switch (Pattern) {2326default:2327TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,2328DelInstrs, InstrIdxForVirtReg);2329return;2330case RISCVMachineCombinerPattern::FMADD_AX:2331case RISCVMachineCombinerPattern::FMSUB: {2332MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg());2333combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);2334return;2335}2336case RISCVMachineCombinerPattern::FMADD_XA:2337case RISCVMachineCombinerPattern::FNMSUB: {2338MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg());2339combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);2340return;2341}2342case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1:2343genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);2344return;2345case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2:2346genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);2347return;2348}2349}23502351bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,2352StringRef &ErrInfo) const {2353MCInstrDesc const &Desc = MI.getDesc();23542355for (const auto &[Index, Operand] : enumerate(Desc.operands())) {2356unsigned OpType = Operand.OperandType;2357if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&2358OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {2359const MachineOperand &MO = MI.getOperand(Index);2360if (MO.isImm()) {2361int64_t Imm = MO.getImm();2362bool Ok;2363switch (OpType) {2364default:2365llvm_unreachable("Unexpected operand type");23662367// clang-format off2368#define CASE_OPERAND_UIMM(NUM) \2369case RISCVOp::OPERAND_UIMM##NUM: \2370Ok = isUInt<NUM>(Imm); \2371break;2372CASE_OPERAND_UIMM(1)2373CASE_OPERAND_UIMM(2)2374CASE_OPERAND_UIMM(3)2375CASE_OPERAND_UIMM(4)2376CASE_OPERAND_UIMM(5)2377CASE_OPERAND_UIMM(6)2378CASE_OPERAND_UIMM(7)2379CASE_OPERAND_UIMM(8)2380CASE_OPERAND_UIMM(12)2381CASE_OPERAND_UIMM(20)2382// clang-format on2383case RISCVOp::OPERAND_UIMM2_LSB0:2384Ok = isShiftedUInt<1, 1>(Imm);2385break;2386case RISCVOp::OPERAND_UIMM5_LSB0:2387Ok = isShiftedUInt<4, 1>(Imm);2388break;2389case RISCVOp::OPERAND_UIMM6_LSB0:2390Ok = isShiftedUInt<5, 1>(Imm);2391break;2392case RISCVOp::OPERAND_UIMM7_LSB00:2393Ok = isShiftedUInt<5, 2>(Imm);2394break;2395case RISCVOp::OPERAND_UIMM8_LSB00:2396Ok = isShiftedUInt<6, 2>(Imm);2397break;2398case RISCVOp::OPERAND_UIMM8_LSB000:2399Ok = isShiftedUInt<5, 3>(Imm);2400break;2401case RISCVOp::OPERAND_UIMM8_GE32:2402Ok = isUInt<8>(Imm) && Imm >= 32;2403break;2404case RISCVOp::OPERAND_UIMM9_LSB000:2405Ok = isShiftedUInt<6, 3>(Imm);2406break;2407case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:2408Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);2409break;2410case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:2411Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0);2412break;2413case RISCVOp::OPERAND_ZERO:2414Ok = Imm == 0;2415break;2416case RISCVOp::OPERAND_SIMM5:2417Ok = isInt<5>(Imm);2418break;2419case RISCVOp::OPERAND_SIMM5_PLUS1:2420Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16;2421break;2422case RISCVOp::OPERAND_SIMM6:2423Ok = isInt<6>(Imm);2424break;2425case RISCVOp::OPERAND_SIMM6_NONZERO:2426Ok = Imm != 0 && isInt<6>(Imm);2427break;2428case RISCVOp::OPERAND_VTYPEI10:2429Ok = isUInt<10>(Imm);2430break;2431case RISCVOp::OPERAND_VTYPEI11:2432Ok = isUInt<11>(Imm);2433break;2434case RISCVOp::OPERAND_SIMM12:2435Ok = isInt<12>(Imm);2436break;2437case RISCVOp::OPERAND_SIMM12_LSB00000:2438Ok = isShiftedInt<7, 5>(Imm);2439break;2440case RISCVOp::OPERAND_UIMMLOG2XLEN:2441Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);2442break;2443case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:2444Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);2445Ok = Ok && Imm != 0;2446break;2447case RISCVOp::OPERAND_CLUI_IMM:2448Ok = (isUInt<5>(Imm) && Imm != 0) ||2449(Imm >= 0xfffe0 && Imm <= 0xfffff);2450break;2451case RISCVOp::OPERAND_RVKRNUM:2452Ok = Imm >= 0 && Imm <= 10;2453break;2454case RISCVOp::OPERAND_RVKRNUM_0_7:2455Ok = Imm >= 0 && Imm <= 7;2456break;2457case RISCVOp::OPERAND_RVKRNUM_1_10:2458Ok = Imm >= 1 && Imm <= 10;2459break;2460case RISCVOp::OPERAND_RVKRNUM_2_14:2461Ok = Imm >= 2 && Imm <= 14;2462break;2463case RISCVOp::OPERAND_SPIMM:2464Ok = (Imm & 0xf) == 0;2465break;2466}2467if (!Ok) {2468ErrInfo = "Invalid immediate";2469return false;2470}2471}2472}2473}24742475const uint64_t TSFlags = Desc.TSFlags;2476if (RISCVII::hasVLOp(TSFlags)) {2477const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));2478if (!Op.isImm() && !Op.isReg()) {2479ErrInfo = "Invalid operand type for VL operand";2480return false;2481}2482if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {2483const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();2484auto *RC = MRI.getRegClass(Op.getReg());2485if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {2486ErrInfo = "Invalid register class for VL operand";2487return false;2488}2489}2490if (!RISCVII::hasSEWOp(TSFlags)) {2491ErrInfo = "VL operand w/o SEW operand?";2492return false;2493}2494}2495if (RISCVII::hasSEWOp(TSFlags)) {2496unsigned OpIdx = RISCVII::getSEWOpNum(Desc);2497if (!MI.getOperand(OpIdx).isImm()) {2498ErrInfo = "SEW value expected to be an immediate";2499return false;2500}2501uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();2502if (Log2SEW > 31) {2503ErrInfo = "Unexpected SEW value";2504return false;2505}2506unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;2507if (!RISCVVType::isValidSEW(SEW)) {2508ErrInfo = "Unexpected SEW value";2509return false;2510}2511}2512if (RISCVII::hasVecPolicyOp(TSFlags)) {2513unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);2514if (!MI.getOperand(OpIdx).isImm()) {2515ErrInfo = "Policy operand expected to be an immediate";2516return false;2517}2518uint64_t Policy = MI.getOperand(OpIdx).getImm();2519if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {2520ErrInfo = "Invalid Policy Value";2521return false;2522}2523if (!RISCVII::hasVLOp(TSFlags)) {2524ErrInfo = "policy operand w/o VL operand?";2525return false;2526}25272528// VecPolicy operands can only exist on instructions with passthru/merge2529// arguments. Note that not all arguments with passthru have vec policy2530// operands- some instructions have implicit policies.2531unsigned UseOpIdx;2532if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {2533ErrInfo = "policy operand w/o tied operand?";2534return false;2535}2536}25372538if (int Idx = RISCVII::getFRMOpNum(Desc);2539Idx >= 0 && MI.getOperand(Idx).getImm() == RISCVFPRndMode::DYN &&2540!MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr)) {2541ErrInfo = "dynamic rounding mode should read FRM";2542return false;2543}25442545return true;2546}25472548bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,2549const MachineInstr &AddrI,2550ExtAddrMode &AM) const {2551switch (MemI.getOpcode()) {2552default:2553return false;2554case RISCV::LB:2555case RISCV::LBU:2556case RISCV::LH:2557case RISCV::LHU:2558case RISCV::LW:2559case RISCV::LWU:2560case RISCV::LD:2561case RISCV::FLH:2562case RISCV::FLW:2563case RISCV::FLD:2564case RISCV::SB:2565case RISCV::SH:2566case RISCV::SW:2567case RISCV::SD:2568case RISCV::FSH:2569case RISCV::FSW:2570case RISCV::FSD:2571break;2572}25732574if (MemI.getOperand(0).getReg() == Reg)2575return false;25762577if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||2578!AddrI.getOperand(2).isImm())2579return false;25802581int64_t OldOffset = MemI.getOperand(2).getImm();2582int64_t Disp = AddrI.getOperand(2).getImm();2583int64_t NewOffset = OldOffset + Disp;2584if (!STI.is64Bit())2585NewOffset = SignExtend64<32>(NewOffset);25862587if (!isInt<12>(NewOffset))2588return false;25892590AM.BaseReg = AddrI.getOperand(1).getReg();2591AM.ScaledReg = 0;2592AM.Scale = 0;2593AM.Displacement = NewOffset;2594AM.Form = ExtAddrMode::Formula::Basic;2595return true;2596}25972598MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,2599const ExtAddrMode &AM) const {26002601const DebugLoc &DL = MemI.getDebugLoc();2602MachineBasicBlock &MBB = *MemI.getParent();26032604assert(AM.ScaledReg == 0 && AM.Scale == 0 &&2605"Addressing mode not supported for folding");26062607return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))2608.addReg(MemI.getOperand(0).getReg(),2609MemI.mayLoad() ? RegState::Define : 0)2610.addReg(AM.BaseReg)2611.addImm(AM.Displacement)2612.setMemRefs(MemI.memoperands())2613.setMIFlags(MemI.getFlags());2614}26152616bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(2617const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,2618int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,2619const TargetRegisterInfo *TRI) const {2620if (!LdSt.mayLoadOrStore())2621return false;26222623// Conservatively, only handle scalar loads/stores for now.2624switch (LdSt.getOpcode()) {2625case RISCV::LB:2626case RISCV::LBU:2627case RISCV::SB:2628case RISCV::LH:2629case RISCV::LHU:2630case RISCV::FLH:2631case RISCV::SH:2632case RISCV::FSH:2633case RISCV::LW:2634case RISCV::LWU:2635case RISCV::FLW:2636case RISCV::SW:2637case RISCV::FSW:2638case RISCV::LD:2639case RISCV::FLD:2640case RISCV::SD:2641case RISCV::FSD:2642break;2643default:2644return false;2645}2646const MachineOperand *BaseOp;2647OffsetIsScalable = false;2648if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))2649return false;2650BaseOps.push_back(BaseOp);2651return true;2652}26532654// TODO: This was copied from SIInstrInfo. Could it be lifted to a common2655// helper?2656static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,2657ArrayRef<const MachineOperand *> BaseOps1,2658const MachineInstr &MI2,2659ArrayRef<const MachineOperand *> BaseOps2) {2660// Only examine the first "base" operand of each instruction, on the2661// assumption that it represents the real base address of the memory access.2662// Other operands are typically offsets or indices from this base address.2663if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))2664return true;26652666if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())2667return false;26682669auto MO1 = *MI1.memoperands_begin();2670auto MO2 = *MI2.memoperands_begin();2671if (MO1->getAddrSpace() != MO2->getAddrSpace())2672return false;26732674auto Base1 = MO1->getValue();2675auto Base2 = MO2->getValue();2676if (!Base1 || !Base2)2677return false;2678Base1 = getUnderlyingObject(Base1);2679Base2 = getUnderlyingObject(Base2);26802681if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))2682return false;26832684return Base1 == Base2;2685}26862687bool RISCVInstrInfo::shouldClusterMemOps(2688ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,2689bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,2690int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,2691unsigned NumBytes) const {2692// If the mem ops (to be clustered) do not have the same base ptr, then they2693// should not be clustered2694if (!BaseOps1.empty() && !BaseOps2.empty()) {2695const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();2696const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();2697if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))2698return false;2699} else if (!BaseOps1.empty() || !BaseOps2.empty()) {2700// If only one base op is empty, they do not have the same base ptr2701return false;2702}27032704unsigned CacheLineSize =2705BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();2706// Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.2707CacheLineSize = CacheLineSize ? CacheLineSize : 64;2708// Cluster if the memory operations are on the same or a neighbouring cache2709// line, but limit the maximum ClusterSize to avoid creating too much2710// additional register pressure.2711return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize;2712}27132714// Set BaseReg (the base register operand), Offset (the byte offset being2715// accessed) and the access Width of the passed instruction that reads/writes2716// memory. Returns false if the instruction does not read/write memory or the2717// BaseReg/Offset/Width can't be determined. Is not guaranteed to always2718// recognise base operands and offsets in all cases.2719// TODO: Add an IsScalable bool ref argument (like the equivalent AArch642720// function) and set it as appropriate.2721bool RISCVInstrInfo::getMemOperandWithOffsetWidth(2722const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,2723LocationSize &Width, const TargetRegisterInfo *TRI) const {2724if (!LdSt.mayLoadOrStore())2725return false;27262727// Here we assume the standard RISC-V ISA, which uses a base+offset2728// addressing mode. You'll need to relax these conditions to support custom2729// load/store instructions.2730if (LdSt.getNumExplicitOperands() != 3)2731return false;2732if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||2733!LdSt.getOperand(2).isImm())2734return false;27352736if (!LdSt.hasOneMemOperand())2737return false;27382739Width = (*LdSt.memoperands_begin())->getSize();2740BaseReg = &LdSt.getOperand(1);2741Offset = LdSt.getOperand(2).getImm();2742return true;2743}27442745bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(2746const MachineInstr &MIa, const MachineInstr &MIb) const {2747assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");2748assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");27492750if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||2751MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())2752return false;27532754// Retrieve the base register, offset from the base register and width. Width2755// is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If2756// base registers are identical, and the offset of a lower memory access +2757// the width doesn't overlap the offset of a higher memory access,2758// then the memory accesses are different.2759const TargetRegisterInfo *TRI = STI.getRegisterInfo();2760const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;2761int64_t OffsetA = 0, OffsetB = 0;2762LocationSize WidthA = 0, WidthB = 0;2763if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&2764getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {2765if (BaseOpA->isIdenticalTo(*BaseOpB)) {2766int LowOffset = std::min(OffsetA, OffsetB);2767int HighOffset = std::max(OffsetA, OffsetB);2768LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;2769if (LowWidth.hasValue() &&2770LowOffset + (int)LowWidth.getValue() <= HighOffset)2771return true;2772}2773}2774return false;2775}27762777std::pair<unsigned, unsigned>2778RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {2779const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;2780return std::make_pair(TF & Mask, TF & ~Mask);2781}27822783ArrayRef<std::pair<unsigned, const char *>>2784RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {2785using namespace RISCVII;2786static const std::pair<unsigned, const char *> TargetFlags[] = {2787{MO_CALL, "riscv-call"},2788{MO_LO, "riscv-lo"},2789{MO_HI, "riscv-hi"},2790{MO_PCREL_LO, "riscv-pcrel-lo"},2791{MO_PCREL_HI, "riscv-pcrel-hi"},2792{MO_GOT_HI, "riscv-got-hi"},2793{MO_TPREL_LO, "riscv-tprel-lo"},2794{MO_TPREL_HI, "riscv-tprel-hi"},2795{MO_TPREL_ADD, "riscv-tprel-add"},2796{MO_TLS_GOT_HI, "riscv-tls-got-hi"},2797{MO_TLS_GD_HI, "riscv-tls-gd-hi"},2798{MO_TLSDESC_HI, "riscv-tlsdesc-hi"},2799{MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},2800{MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},2801{MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};2802return ArrayRef(TargetFlags);2803}2804bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(2805MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {2806const Function &F = MF.getFunction();28072808// Can F be deduplicated by the linker? If it can, don't outline from it.2809if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())2810return false;28112812// Don't outline from functions with section markings; the program could2813// expect that all the code is in the named section.2814if (F.hasSection())2815return false;28162817// It's safe to outline from MF.2818return true;2819}28202821bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,2822unsigned &Flags) const {2823// More accurate safety checking is done in getOutliningCandidateInfo.2824return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);2825}28262827// Enum values indicating how an outlined call should be constructed.2828enum MachineOutlinerConstructionID {2829MachineOutlinerDefault2830};28312832bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(2833MachineFunction &MF) const {2834return MF.getFunction().hasMinSize();2835}28362837std::optional<outliner::OutlinedFunction>2838RISCVInstrInfo::getOutliningCandidateInfo(2839std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {28402841// First we need to filter out candidates where the X5 register (IE t0) can't2842// be used to setup the function call.2843auto CannotInsertCall = [](outliner::Candidate &C) {2844const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();2845return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);2846};28472848llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);28492850// If the sequence doesn't have enough candidates left, then we're done.2851if (RepeatedSequenceLocs.size() < 2)2852return std::nullopt;28532854unsigned SequenceSize = 0;28552856for (auto &MI : RepeatedSequenceLocs[0])2857SequenceSize += getInstSizeInBytes(MI);28582859// call t0, function = 8 bytes.2860unsigned CallOverhead = 8;2861for (auto &C : RepeatedSequenceLocs)2862C.setCallInfo(MachineOutlinerDefault, CallOverhead);28632864// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.2865unsigned FrameOverhead = 4;2866if (RepeatedSequenceLocs[0]2867.getMF()2868->getSubtarget<RISCVSubtarget>()2869.hasStdExtCOrZca())2870FrameOverhead = 2;28712872return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,2873FrameOverhead, MachineOutlinerDefault);2874}28752876outliner::InstrType2877RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,2878unsigned Flags) const {2879MachineInstr &MI = *MBBI;2880MachineBasicBlock *MBB = MI.getParent();2881const TargetRegisterInfo *TRI =2882MBB->getParent()->getSubtarget().getRegisterInfo();2883const auto &F = MI.getMF()->getFunction();28842885// We can manually strip out CFI instructions later.2886if (MI.isCFIInstruction())2887// If current function has exception handling code, we can't outline &2888// strip these CFI instructions since it may break .eh_frame section2889// needed in unwinding.2890return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal2891: outliner::InstrType::Invisible;28922893// We need support for tail calls to outlined functions before return2894// statements can be allowed.2895if (MI.isReturn())2896return outliner::InstrType::Illegal;28972898// Don't allow modifying the X5 register which we use for return addresses for2899// these outlined functions.2900if (MI.modifiesRegister(RISCV::X5, TRI) ||2901MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))2902return outliner::InstrType::Illegal;29032904// Make sure the operands don't reference something unsafe.2905for (const auto &MO : MI.operands()) {29062907// pcrel-hi and pcrel-lo can't put in separate sections, filter that out2908// if any possible.2909if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&2910(MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||2911F.hasSection() || F.getSectionPrefix()))2912return outliner::InstrType::Illegal;2913}29142915return outliner::InstrType::Legal;2916}29172918void RISCVInstrInfo::buildOutlinedFrame(2919MachineBasicBlock &MBB, MachineFunction &MF,2920const outliner::OutlinedFunction &OF) const {29212922// Strip out any CFI instructions2923bool Changed = true;2924while (Changed) {2925Changed = false;2926auto I = MBB.begin();2927auto E = MBB.end();2928for (; I != E; ++I) {2929if (I->isCFIInstruction()) {2930I->removeFromParent();2931Changed = true;2932break;2933}2934}2935}29362937MBB.addLiveIn(RISCV::X5);29382939// Add in a return instruction to the end of the outlined frame.2940MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))2941.addReg(RISCV::X0, RegState::Define)2942.addReg(RISCV::X5)2943.addImm(0));2944}29452946MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(2947Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,2948MachineFunction &MF, outliner::Candidate &C) const {29492950// Add in a call instruction to the outlined function at the given location.2951It = MBB.insert(It,2952BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)2953.addGlobalAddress(M.getNamedValue(MF.getName()), 0,2954RISCVII::MO_CALL));2955return It;2956}29572958std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,2959Register Reg) const {2960// TODO: Handle cases where Reg is a super- or sub-register of the2961// destination register.2962const MachineOperand &Op0 = MI.getOperand(0);2963if (!Op0.isReg() || Reg != Op0.getReg())2964return std::nullopt;29652966// Don't consider ADDIW as a candidate because the caller may not be aware2967// of its sign extension behaviour.2968if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&2969MI.getOperand(2).isImm())2970return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()};29712972return std::nullopt;2973}29742975// MIR printer helper function to annotate Operands with a comment.2976std::string RISCVInstrInfo::createMIROperandComment(2977const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,2978const TargetRegisterInfo *TRI) const {2979// Print a generic comment for this operand if there is one.2980std::string GenericComment =2981TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);2982if (!GenericComment.empty())2983return GenericComment;29842985// If not, we must have an immediate operand.2986if (!Op.isImm())2987return std::string();29882989std::string Comment;2990raw_string_ostream OS(Comment);29912992uint64_t TSFlags = MI.getDesc().TSFlags;29932994// Print the full VType operand of vsetvli/vsetivli instructions, and the SEW2995// operand of vector codegen pseudos.2996if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI ||2997MI.getOpcode() == RISCV::PseudoVSETVLI ||2998MI.getOpcode() == RISCV::PseudoVSETIVLI ||2999MI.getOpcode() == RISCV::PseudoVSETVLIX0) &&3000OpIdx == 2) {3001unsigned Imm = MI.getOperand(OpIdx).getImm();3002RISCVVType::printVType(Imm, OS);3003} else if (RISCVII::hasSEWOp(TSFlags) &&3004OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) {3005unsigned Log2SEW = MI.getOperand(OpIdx).getImm();3006unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;3007assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");3008OS << "e" << SEW;3009} else if (RISCVII::hasVecPolicyOp(TSFlags) &&3010OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) {3011unsigned Policy = MI.getOperand(OpIdx).getImm();3012assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&3013"Invalid Policy Value");3014OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "3015<< (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");3016}30173018OS.flush();3019return Comment;3020}30213022// clang-format off3023#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \3024RISCV::Pseudo##OP##_##LMUL30253026#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \3027RISCV::Pseudo##OP##_##LMUL##_MASK30283029#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \3030CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \3031case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)30323033#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \3034CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \3035case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \3036case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \3037case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \3038case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \3039case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)30403041#define CASE_RVV_OPCODE_UNMASK(OP) \3042CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \3043case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)30443045#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \3046CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \3047case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \3048case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \3049case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \3050case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \3051case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)30523053#define CASE_RVV_OPCODE_MASK(OP) \3054CASE_RVV_OPCODE_MASK_WIDEN(OP): \3055case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)30563057#define CASE_RVV_OPCODE_WIDEN(OP) \3058CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \3059case CASE_RVV_OPCODE_MASK_WIDEN(OP)30603061#define CASE_RVV_OPCODE(OP) \3062CASE_RVV_OPCODE_UNMASK(OP): \3063case CASE_RVV_OPCODE_MASK(OP)3064// clang-format on30653066// clang-format off3067#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \3068RISCV::PseudoV##OP##_##TYPE##_##LMUL30693070#define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \3071CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \3072case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \3073case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \3074case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)30753076#define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \3077CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \3078case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE)30793080#define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \3081CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \3082case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE)30833084#define CASE_VMA_OPCODE_LMULS(OP, TYPE) \3085CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \3086case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE)30873088// VFMA instructions are SEW specific.3089#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \3090RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW30913092#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \3093CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \3094case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \3095case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \3096case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)30973098#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \3099CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \3100case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)31013102#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \3103CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \3104case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)31053106#define CASE_VFMA_OPCODE_VV(OP) \3107CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \3108case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \3109case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)31103111#define CASE_VFMA_SPLATS(OP) \3112CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \3113case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \3114case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)3115// clang-format on31163117bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,3118unsigned &SrcOpIdx1,3119unsigned &SrcOpIdx2) const {3120const MCInstrDesc &Desc = MI.getDesc();3121if (!Desc.isCommutable())3122return false;31233124switch (MI.getOpcode()) {3125case RISCV::TH_MVEQZ:3126case RISCV::TH_MVNEZ:3127// We can't commute operands if operand 2 (i.e., rs1 in3128// mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is3129// not valid as the in/out-operand 1).3130if (MI.getOperand(2).getReg() == RISCV::X0)3131return false;3132// Operands 1 and 2 are commutable, if we switch the opcode.3133return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);3134case RISCV::TH_MULA:3135case RISCV::TH_MULAW:3136case RISCV::TH_MULAH:3137case RISCV::TH_MULS:3138case RISCV::TH_MULSW:3139case RISCV::TH_MULSH:3140// Operands 2 and 3 are commutable.3141return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);3142case RISCV::PseudoCCMOVGPRNoX0:3143case RISCV::PseudoCCMOVGPR:3144// Operands 4 and 5 are commutable.3145return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);3146case CASE_RVV_OPCODE(VADD_VV):3147case CASE_RVV_OPCODE(VAND_VV):3148case CASE_RVV_OPCODE(VOR_VV):3149case CASE_RVV_OPCODE(VXOR_VV):3150case CASE_RVV_OPCODE_MASK(VMSEQ_VV):3151case CASE_RVV_OPCODE_MASK(VMSNE_VV):3152case CASE_RVV_OPCODE(VMIN_VV):3153case CASE_RVV_OPCODE(VMINU_VV):3154case CASE_RVV_OPCODE(VMAX_VV):3155case CASE_RVV_OPCODE(VMAXU_VV):3156case CASE_RVV_OPCODE(VMUL_VV):3157case CASE_RVV_OPCODE(VMULH_VV):3158case CASE_RVV_OPCODE(VMULHU_VV):3159case CASE_RVV_OPCODE_WIDEN(VWADD_VV):3160case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):3161case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):3162case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):3163case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):3164case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):3165case CASE_RVV_OPCODE_UNMASK(VADC_VVM):3166case CASE_RVV_OPCODE(VSADD_VV):3167case CASE_RVV_OPCODE(VSADDU_VV):3168case CASE_RVV_OPCODE(VAADD_VV):3169case CASE_RVV_OPCODE(VAADDU_VV):3170case CASE_RVV_OPCODE(VSMUL_VV):3171// Operands 2 and 3 are commutable.3172return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);3173case CASE_VFMA_SPLATS(FMADD):3174case CASE_VFMA_SPLATS(FMSUB):3175case CASE_VFMA_SPLATS(FMACC):3176case CASE_VFMA_SPLATS(FMSAC):3177case CASE_VFMA_SPLATS(FNMADD):3178case CASE_VFMA_SPLATS(FNMSUB):3179case CASE_VFMA_SPLATS(FNMACC):3180case CASE_VFMA_SPLATS(FNMSAC):3181case CASE_VFMA_OPCODE_VV(FMACC):3182case CASE_VFMA_OPCODE_VV(FMSAC):3183case CASE_VFMA_OPCODE_VV(FNMACC):3184case CASE_VFMA_OPCODE_VV(FNMSAC):3185case CASE_VMA_OPCODE_LMULS(MADD, VX):3186case CASE_VMA_OPCODE_LMULS(NMSUB, VX):3187case CASE_VMA_OPCODE_LMULS(MACC, VX):3188case CASE_VMA_OPCODE_LMULS(NMSAC, VX):3189case CASE_VMA_OPCODE_LMULS(MACC, VV):3190case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {3191// If the tail policy is undisturbed we can't commute.3192assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));3193if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)3194return false;31953196// For these instructions we can only swap operand 1 and operand 3 by3197// changing the opcode.3198unsigned CommutableOpIdx1 = 1;3199unsigned CommutableOpIdx2 = 3;3200if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,3201CommutableOpIdx2))3202return false;3203return true;3204}3205case CASE_VFMA_OPCODE_VV(FMADD):3206case CASE_VFMA_OPCODE_VV(FMSUB):3207case CASE_VFMA_OPCODE_VV(FNMADD):3208case CASE_VFMA_OPCODE_VV(FNMSUB):3209case CASE_VMA_OPCODE_LMULS(MADD, VV):3210case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {3211// If the tail policy is undisturbed we can't commute.3212assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));3213if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)3214return false;32153216// For these instructions we have more freedom. We can commute with the3217// other multiplicand or with the addend/subtrahend/minuend.32183219// Any fixed operand must be from source 1, 2 or 3.3220if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)3221return false;3222if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)3223return false;32243225// It both ops are fixed one must be the tied source.3226if (SrcOpIdx1 != CommuteAnyOperandIndex &&3227SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)3228return false;32293230// Look for two different register operands assumed to be commutable3231// regardless of the FMA opcode. The FMA opcode is adjusted later if3232// needed.3233if (SrcOpIdx1 == CommuteAnyOperandIndex ||3234SrcOpIdx2 == CommuteAnyOperandIndex) {3235// At least one of operands to be commuted is not specified and3236// this method is free to choose appropriate commutable operands.3237unsigned CommutableOpIdx1 = SrcOpIdx1;3238if (SrcOpIdx1 == SrcOpIdx2) {3239// Both of operands are not fixed. Set one of commutable3240// operands to the tied source.3241CommutableOpIdx1 = 1;3242} else if (SrcOpIdx1 == CommuteAnyOperandIndex) {3243// Only one of the operands is not fixed.3244CommutableOpIdx1 = SrcOpIdx2;3245}32463247// CommutableOpIdx1 is well defined now. Let's choose another commutable3248// operand and assign its index to CommutableOpIdx2.3249unsigned CommutableOpIdx2;3250if (CommutableOpIdx1 != 1) {3251// If we haven't already used the tied source, we must use it now.3252CommutableOpIdx2 = 1;3253} else {3254Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();32553256// The commuted operands should have different registers.3257// Otherwise, the commute transformation does not change anything and3258// is useless. We use this as a hint to make our decision.3259if (Op1Reg != MI.getOperand(2).getReg())3260CommutableOpIdx2 = 2;3261else3262CommutableOpIdx2 = 3;3263}32643265// Assign the found pair of commutable indices to SrcOpIdx1 and3266// SrcOpIdx2 to return those values.3267if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,3268CommutableOpIdx2))3269return false;3270}32713272return true;3273}3274}32753276return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);3277}32783279// clang-format off3280#define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \3281case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \3282Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \3283break;32843285#define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \3286CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \3287CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \3288CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \3289CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)32903291#define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \3292CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \3293CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)32943295#define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \3296CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \3297CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)32983299#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \3300CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \3301CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)33023303#define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \3304CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \3305CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \3306CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)33073308// VFMA depends on SEW.3309#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \3310case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \3311Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \3312break;33133314#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \3315CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \3316CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \3317CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \3318CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)33193320#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \3321CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \3322CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)33233324#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \3325CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \3326CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \3327CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)33283329#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \3330CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \3331CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)33323333#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \3334CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \3335CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW)33363337#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \3338CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \3339CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \3340CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)33413342MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,3343bool NewMI,3344unsigned OpIdx1,3345unsigned OpIdx2) const {3346auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {3347if (NewMI)3348return *MI.getParent()->getParent()->CloneMachineInstr(&MI);3349return MI;3350};33513352switch (MI.getOpcode()) {3353case RISCV::TH_MVEQZ:3354case RISCV::TH_MVNEZ: {3355auto &WorkingMI = cloneIfNew(MI);3356WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ3357: RISCV::TH_MVEQZ));3358return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,3359OpIdx2);3360}3361case RISCV::PseudoCCMOVGPRNoX0:3362case RISCV::PseudoCCMOVGPR: {3363// CCMOV can be commuted by inverting the condition.3364auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());3365CC = RISCVCC::getOppositeBranchCondition(CC);3366auto &WorkingMI = cloneIfNew(MI);3367WorkingMI.getOperand(3).setImm(CC);3368return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,3369OpIdx1, OpIdx2);3370}3371case CASE_VFMA_SPLATS(FMACC):3372case CASE_VFMA_SPLATS(FMADD):3373case CASE_VFMA_SPLATS(FMSAC):3374case CASE_VFMA_SPLATS(FMSUB):3375case CASE_VFMA_SPLATS(FNMACC):3376case CASE_VFMA_SPLATS(FNMADD):3377case CASE_VFMA_SPLATS(FNMSAC):3378case CASE_VFMA_SPLATS(FNMSUB):3379case CASE_VFMA_OPCODE_VV(FMACC):3380case CASE_VFMA_OPCODE_VV(FMSAC):3381case CASE_VFMA_OPCODE_VV(FNMACC):3382case CASE_VFMA_OPCODE_VV(FNMSAC):3383case CASE_VMA_OPCODE_LMULS(MADD, VX):3384case CASE_VMA_OPCODE_LMULS(NMSUB, VX):3385case CASE_VMA_OPCODE_LMULS(MACC, VX):3386case CASE_VMA_OPCODE_LMULS(NMSAC, VX):3387case CASE_VMA_OPCODE_LMULS(MACC, VV):3388case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {3389// It only make sense to toggle these between clobbering the3390// addend/subtrahend/minuend one of the multiplicands.3391assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");3392assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");3393unsigned Opc;3394switch (MI.getOpcode()) {3395default:3396llvm_unreachable("Unexpected opcode");3397CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)3398CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)3399CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)3400CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)3401CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)3402CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)3403CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)3404CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)3405CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)3406CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB)3407CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD)3408CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB)3409CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)3410CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)3411CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)3412CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)3413CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)3414CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)3415}34163417auto &WorkingMI = cloneIfNew(MI);3418WorkingMI.setDesc(get(Opc));3419return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,3420OpIdx1, OpIdx2);3421}3422case CASE_VFMA_OPCODE_VV(FMADD):3423case CASE_VFMA_OPCODE_VV(FMSUB):3424case CASE_VFMA_OPCODE_VV(FNMADD):3425case CASE_VFMA_OPCODE_VV(FNMSUB):3426case CASE_VMA_OPCODE_LMULS(MADD, VV):3427case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {3428assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");3429// If one of the operands, is the addend we need to change opcode.3430// Otherwise we're just swapping 2 of the multiplicands.3431if (OpIdx1 == 3 || OpIdx2 == 3) {3432unsigned Opc;3433switch (MI.getOpcode()) {3434default:3435llvm_unreachable("Unexpected opcode");3436CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)3437CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC)3438CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC)3439CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC)3440CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)3441CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)3442}34433444auto &WorkingMI = cloneIfNew(MI);3445WorkingMI.setDesc(get(Opc));3446return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,3447OpIdx1, OpIdx2);3448}3449// Let the default code handle it.3450break;3451}3452}34533454return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);3455}34563457#undef CASE_RVV_OPCODE_UNMASK_LMUL3458#undef CASE_RVV_OPCODE_MASK_LMUL3459#undef CASE_RVV_OPCODE_LMUL3460#undef CASE_RVV_OPCODE_UNMASK_WIDEN3461#undef CASE_RVV_OPCODE_UNMASK3462#undef CASE_RVV_OPCODE_MASK_WIDEN3463#undef CASE_RVV_OPCODE_MASK3464#undef CASE_RVV_OPCODE_WIDEN3465#undef CASE_RVV_OPCODE34663467#undef CASE_VMA_OPCODE_COMMON3468#undef CASE_VMA_OPCODE_LMULS_M13469#undef CASE_VMA_OPCODE_LMULS_MF23470#undef CASE_VMA_OPCODE_LMULS_MF43471#undef CASE_VMA_OPCODE_LMULS3472#undef CASE_VFMA_OPCODE_COMMON3473#undef CASE_VFMA_OPCODE_LMULS_M13474#undef CASE_VFMA_OPCODE_LMULS_MF23475#undef CASE_VFMA_OPCODE_LMULS_MF43476#undef CASE_VFMA_OPCODE_VV3477#undef CASE_VFMA_SPLATS34783479// clang-format off3480#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \3481RISCV::PseudoV##OP##_##LMUL##_TIED34823483#define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \3484CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \3485case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \3486case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \3487case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \3488case CASE_WIDEOP_OPCODE_COMMON(OP, M4)34893490#define CASE_WIDEOP_OPCODE_LMULS(OP) \3491CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \3492case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)34933494#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \3495case RISCV::PseudoV##OP##_##LMUL##_TIED: \3496NewOpc = RISCV::PseudoV##OP##_##LMUL; \3497break;34983499#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \3500CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \3501CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \3502CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \3503CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \3504CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)35053506#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \3507CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \3508CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)35093510// FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.3511#define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \3512RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED35133514#define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \3515CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \3516case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \3517case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \3518case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \3519case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \3520case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \3521case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \3522case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \3523case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \35243525#define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \3526case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \3527NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \3528break;35293530#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \3531CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \3532CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \3533CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \3534CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \3535CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \3536CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \3537CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \3538CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \3539CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \35403541#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \3542CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)3543// clang-format on35443545MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,3546LiveVariables *LV,3547LiveIntervals *LIS) const {3548MachineInstrBuilder MIB;3549switch (MI.getOpcode()) {3550default:3551return nullptr;3552case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):3553case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {3554assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&3555MI.getNumExplicitOperands() == 7 &&3556"Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");3557// If the tail policy is undisturbed we can't convert.3558if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &35591) == 0)3560return nullptr;3561// clang-format off3562unsigned NewOpc;3563switch (MI.getOpcode()) {3564default:3565llvm_unreachable("Unexpected opcode");3566CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)3567CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)3568}3569// clang-format on35703571MachineBasicBlock &MBB = *MI.getParent();3572MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))3573.add(MI.getOperand(0))3574.addReg(MI.getOperand(0).getReg(), RegState::Undef)3575.add(MI.getOperand(1))3576.add(MI.getOperand(2))3577.add(MI.getOperand(3))3578.add(MI.getOperand(4))3579.add(MI.getOperand(5))3580.add(MI.getOperand(6));3581break;3582}3583case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):3584case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):3585case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):3586case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {3587// If the tail policy is undisturbed we can't convert.3588assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&3589MI.getNumExplicitOperands() == 6);3590if ((MI.getOperand(5).getImm() & 1) == 0)3591return nullptr;35923593// clang-format off3594unsigned NewOpc;3595switch (MI.getOpcode()) {3596default:3597llvm_unreachable("Unexpected opcode");3598CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)3599CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)3600CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)3601CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)3602}3603// clang-format on36043605MachineBasicBlock &MBB = *MI.getParent();3606MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))3607.add(MI.getOperand(0))3608.addReg(MI.getOperand(0).getReg(), RegState::Undef)3609.add(MI.getOperand(1))3610.add(MI.getOperand(2))3611.add(MI.getOperand(3))3612.add(MI.getOperand(4))3613.add(MI.getOperand(5));3614break;3615}3616}3617MIB.copyImplicitOps(MI);36183619if (LV) {3620unsigned NumOps = MI.getNumOperands();3621for (unsigned I = 1; I < NumOps; ++I) {3622MachineOperand &Op = MI.getOperand(I);3623if (Op.isReg() && Op.isKill())3624LV->replaceKillInstruction(Op.getReg(), MI, *MIB);3625}3626}36273628if (LIS) {3629SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);36303631if (MI.getOperand(0).isEarlyClobber()) {3632// Use operand 1 was tied to early-clobber def operand 0, so its live3633// interval could have ended at an early-clobber slot. Now they are not3634// tied we need to update it to the normal register slot.3635LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());3636LiveRange::Segment *S = LI.getSegmentContaining(Idx);3637if (S->end == Idx.getRegSlot(true))3638S->end = Idx.getRegSlot();3639}3640}36413642return MIB;3643}36443645#undef CASE_WIDEOP_OPCODE_COMMON3646#undef CASE_WIDEOP_OPCODE_LMULS_MF43647#undef CASE_WIDEOP_OPCODE_LMULS3648#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON3649#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF43650#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS3651#undef CASE_FP_WIDEOP_OPCODE_COMMON3652#undef CASE_FP_WIDEOP_OPCODE_LMULS_MF43653#undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON3654#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF43655#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS36563657void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,3658MachineBasicBlock::iterator II, const DebugLoc &DL,3659Register DestReg, uint32_t Amount,3660MachineInstr::MIFlag Flag) const {3661MachineRegisterInfo &MRI = MF.getRegInfo();3662if (llvm::has_single_bit<uint32_t>(Amount)) {3663uint32_t ShiftAmount = Log2_32(Amount);3664if (ShiftAmount == 0)3665return;3666BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)3667.addReg(DestReg, RegState::Kill)3668.addImm(ShiftAmount)3669.setMIFlag(Flag);3670} else if (STI.hasStdExtZba() &&3671((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) ||3672(Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) ||3673(Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) {3674// We can use Zba SHXADD+SLLI instructions for multiply in some cases.3675unsigned Opc;3676uint32_t ShiftAmount;3677if (Amount % 9 == 0) {3678Opc = RISCV::SH3ADD;3679ShiftAmount = Log2_64(Amount / 9);3680} else if (Amount % 5 == 0) {3681Opc = RISCV::SH2ADD;3682ShiftAmount = Log2_64(Amount / 5);3683} else if (Amount % 3 == 0) {3684Opc = RISCV::SH1ADD;3685ShiftAmount = Log2_64(Amount / 3);3686} else {3687llvm_unreachable("implied by if-clause");3688}3689if (ShiftAmount)3690BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)3691.addReg(DestReg, RegState::Kill)3692.addImm(ShiftAmount)3693.setMIFlag(Flag);3694BuildMI(MBB, II, DL, get(Opc), DestReg)3695.addReg(DestReg, RegState::Kill)3696.addReg(DestReg)3697.setMIFlag(Flag);3698} else if (llvm::has_single_bit<uint32_t>(Amount - 1)) {3699Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);3700uint32_t ShiftAmount = Log2_32(Amount - 1);3701BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)3702.addReg(DestReg)3703.addImm(ShiftAmount)3704.setMIFlag(Flag);3705BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)3706.addReg(ScaledRegister, RegState::Kill)3707.addReg(DestReg, RegState::Kill)3708.setMIFlag(Flag);3709} else if (llvm::has_single_bit<uint32_t>(Amount + 1)) {3710Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);3711uint32_t ShiftAmount = Log2_32(Amount + 1);3712BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)3713.addReg(DestReg)3714.addImm(ShiftAmount)3715.setMIFlag(Flag);3716BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)3717.addReg(ScaledRegister, RegState::Kill)3718.addReg(DestReg, RegState::Kill)3719.setMIFlag(Flag);3720} else if (STI.hasStdExtZmmul()) {3721Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);3722movImm(MBB, II, DL, N, Amount, Flag);3723BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)3724.addReg(DestReg, RegState::Kill)3725.addReg(N, RegState::Kill)3726.setMIFlag(Flag);3727} else {3728Register Acc;3729uint32_t PrevShiftAmount = 0;3730for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {3731if (Amount & (1U << ShiftAmount)) {3732if (ShiftAmount)3733BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)3734.addReg(DestReg, RegState::Kill)3735.addImm(ShiftAmount - PrevShiftAmount)3736.setMIFlag(Flag);3737if (Amount >> (ShiftAmount + 1)) {3738// If we don't have an accmulator yet, create it and copy DestReg.3739if (!Acc) {3740Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);3741BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc)3742.addReg(DestReg)3743.setMIFlag(Flag);3744} else {3745BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)3746.addReg(Acc, RegState::Kill)3747.addReg(DestReg)3748.setMIFlag(Flag);3749}3750}3751PrevShiftAmount = ShiftAmount;3752}3753}3754assert(Acc && "Expected valid accumulator");3755BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)3756.addReg(DestReg, RegState::Kill)3757.addReg(Acc, RegState::Kill)3758.setMIFlag(Flag);3759}3760}37613762ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>3763RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {3764static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =3765{{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},3766{MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};3767return ArrayRef(TargetFlags);3768}37693770// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.3771bool RISCV::isSEXT_W(const MachineInstr &MI) {3772return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&3773MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0;3774}37753776// Returns true if this is the zext.w pattern, adduw rd, rs1, x0.3777bool RISCV::isZEXT_W(const MachineInstr &MI) {3778return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() &&3779MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0;3780}37813782// Returns true if this is the zext.b pattern, andi rd, rs1, 255.3783bool RISCV::isZEXT_B(const MachineInstr &MI) {3784return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() &&3785MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;3786}37873788static bool isRVVWholeLoadStore(unsigned Opcode) {3789switch (Opcode) {3790default:3791return false;3792case RISCV::VS1R_V:3793case RISCV::VS2R_V:3794case RISCV::VS4R_V:3795case RISCV::VS8R_V:3796case RISCV::VL1RE8_V:3797case RISCV::VL2RE8_V:3798case RISCV::VL4RE8_V:3799case RISCV::VL8RE8_V:3800case RISCV::VL1RE16_V:3801case RISCV::VL2RE16_V:3802case RISCV::VL4RE16_V:3803case RISCV::VL8RE16_V:3804case RISCV::VL1RE32_V:3805case RISCV::VL2RE32_V:3806case RISCV::VL4RE32_V:3807case RISCV::VL8RE32_V:3808case RISCV::VL1RE64_V:3809case RISCV::VL2RE64_V:3810case RISCV::VL4RE64_V:3811case RISCV::VL8RE64_V:3812return true;3813}3814}38153816bool RISCV::isRVVSpill(const MachineInstr &MI) {3817// RVV lacks any support for immediate addressing for stack addresses, so be3818// conservative.3819unsigned Opcode = MI.getOpcode();3820if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&3821!isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))3822return false;3823return true;3824}38253826std::optional<std::pair<unsigned, unsigned>>3827RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {3828switch (Opcode) {3829default:3830return std::nullopt;3831case RISCV::PseudoVSPILL2_M1:3832case RISCV::PseudoVRELOAD2_M1:3833return std::make_pair(2u, 1u);3834case RISCV::PseudoVSPILL2_M2:3835case RISCV::PseudoVRELOAD2_M2:3836return std::make_pair(2u, 2u);3837case RISCV::PseudoVSPILL2_M4:3838case RISCV::PseudoVRELOAD2_M4:3839return std::make_pair(2u, 4u);3840case RISCV::PseudoVSPILL3_M1:3841case RISCV::PseudoVRELOAD3_M1:3842return std::make_pair(3u, 1u);3843case RISCV::PseudoVSPILL3_M2:3844case RISCV::PseudoVRELOAD3_M2:3845return std::make_pair(3u, 2u);3846case RISCV::PseudoVSPILL4_M1:3847case RISCV::PseudoVRELOAD4_M1:3848return std::make_pair(4u, 1u);3849case RISCV::PseudoVSPILL4_M2:3850case RISCV::PseudoVRELOAD4_M2:3851return std::make_pair(4u, 2u);3852case RISCV::PseudoVSPILL5_M1:3853case RISCV::PseudoVRELOAD5_M1:3854return std::make_pair(5u, 1u);3855case RISCV::PseudoVSPILL6_M1:3856case RISCV::PseudoVRELOAD6_M1:3857return std::make_pair(6u, 1u);3858case RISCV::PseudoVSPILL7_M1:3859case RISCV::PseudoVRELOAD7_M1:3860return std::make_pair(7u, 1u);3861case RISCV::PseudoVSPILL8_M1:3862case RISCV::PseudoVRELOAD8_M1:3863return std::make_pair(8u, 1u);3864}3865}38663867bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {3868return MI.getNumExplicitDefs() == 2 &&3869MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) && !MI.isInlineAsm();3870}38713872bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {3873int16_t MI1FrmOpIdx =3874RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);3875int16_t MI2FrmOpIdx =3876RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);3877if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)3878return false;3879MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx);3880MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);3881return FrmOp1.getImm() == FrmOp2.getImm();3882}38833884std::optional<unsigned>3885RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) {3886// TODO: Handle Zvbb instructions3887switch (Opcode) {3888default:3889return std::nullopt;38903891// 11.6. Vector Single-Width Shift Instructions3892case RISCV::VSLL_VX:3893case RISCV::VSRL_VX:3894case RISCV::VSRA_VX:3895// 12.4. Vector Single-Width Scaling Shift Instructions3896case RISCV::VSSRL_VX:3897case RISCV::VSSRA_VX:3898// Only the low lg2(SEW) bits of the shift-amount value are used.3899return Log2SEW;39003901// 11.7 Vector Narrowing Integer Right Shift Instructions3902case RISCV::VNSRL_WX:3903case RISCV::VNSRA_WX:3904// 12.5. Vector Narrowing Fixed-Point Clip Instructions3905case RISCV::VNCLIPU_WX:3906case RISCV::VNCLIP_WX:3907// Only the low lg2(2*SEW) bits of the shift-amount value are used.3908return Log2SEW + 1;39093910// 11.1. Vector Single-Width Integer Add and Subtract3911case RISCV::VADD_VX:3912case RISCV::VSUB_VX:3913case RISCV::VRSUB_VX:3914// 11.2. Vector Widening Integer Add/Subtract3915case RISCV::VWADDU_VX:3916case RISCV::VWSUBU_VX:3917case RISCV::VWADD_VX:3918case RISCV::VWSUB_VX:3919case RISCV::VWADDU_WX:3920case RISCV::VWSUBU_WX:3921case RISCV::VWADD_WX:3922case RISCV::VWSUB_WX:3923// 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions3924case RISCV::VADC_VXM:3925case RISCV::VADC_VIM:3926case RISCV::VMADC_VXM:3927case RISCV::VMADC_VIM:3928case RISCV::VMADC_VX:3929case RISCV::VSBC_VXM:3930case RISCV::VMSBC_VXM:3931case RISCV::VMSBC_VX:3932// 11.5 Vector Bitwise Logical Instructions3933case RISCV::VAND_VX:3934case RISCV::VOR_VX:3935case RISCV::VXOR_VX:3936// 11.8. Vector Integer Compare Instructions3937case RISCV::VMSEQ_VX:3938case RISCV::VMSNE_VX:3939case RISCV::VMSLTU_VX:3940case RISCV::VMSLT_VX:3941case RISCV::VMSLEU_VX:3942case RISCV::VMSLE_VX:3943case RISCV::VMSGTU_VX:3944case RISCV::VMSGT_VX:3945// 11.9. Vector Integer Min/Max Instructions3946case RISCV::VMINU_VX:3947case RISCV::VMIN_VX:3948case RISCV::VMAXU_VX:3949case RISCV::VMAX_VX:3950// 11.10. Vector Single-Width Integer Multiply Instructions3951case RISCV::VMUL_VX:3952case RISCV::VMULH_VX:3953case RISCV::VMULHU_VX:3954case RISCV::VMULHSU_VX:3955// 11.11. Vector Integer Divide Instructions3956case RISCV::VDIVU_VX:3957case RISCV::VDIV_VX:3958case RISCV::VREMU_VX:3959case RISCV::VREM_VX:3960// 11.12. Vector Widening Integer Multiply Instructions3961case RISCV::VWMUL_VX:3962case RISCV::VWMULU_VX:3963case RISCV::VWMULSU_VX:3964// 11.13. Vector Single-Width Integer Multiply-Add Instructions3965case RISCV::VMACC_VX:3966case RISCV::VNMSAC_VX:3967case RISCV::VMADD_VX:3968case RISCV::VNMSUB_VX:3969// 11.14. Vector Widening Integer Multiply-Add Instructions3970case RISCV::VWMACCU_VX:3971case RISCV::VWMACC_VX:3972case RISCV::VWMACCSU_VX:3973case RISCV::VWMACCUS_VX:3974// 11.15. Vector Integer Merge Instructions3975case RISCV::VMERGE_VXM:3976// 11.16. Vector Integer Move Instructions3977case RISCV::VMV_V_X:3978// 12.1. Vector Single-Width Saturating Add and Subtract3979case RISCV::VSADDU_VX:3980case RISCV::VSADD_VX:3981case RISCV::VSSUBU_VX:3982case RISCV::VSSUB_VX:3983// 12.2. Vector Single-Width Averaging Add and Subtract3984case RISCV::VAADDU_VX:3985case RISCV::VAADD_VX:3986case RISCV::VASUBU_VX:3987case RISCV::VASUB_VX:3988// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation3989case RISCV::VSMUL_VX:3990// 16.1. Integer Scalar Move Instructions3991case RISCV::VMV_S_X:3992return 1U << Log2SEW;3993}3994}39953996unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {3997const RISCVVPseudosTable::PseudoInfo *RVV =3998RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);3999if (!RVV)4000return 0;4001return RVV->BaseInstr;4002}400340044005