Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
35294 views
//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines an instruction selector for the RISC-V target.9//10//===----------------------------------------------------------------------===//1112#include "RISCVISelDAGToDAG.h"13#include "MCTargetDesc/RISCVBaseInfo.h"14#include "MCTargetDesc/RISCVMCTargetDesc.h"15#include "MCTargetDesc/RISCVMatInt.h"16#include "RISCVISelLowering.h"17#include "RISCVMachineFunctionInfo.h"18#include "llvm/CodeGen/MachineFrameInfo.h"19#include "llvm/IR/IntrinsicsRISCV.h"20#include "llvm/Support/Alignment.h"21#include "llvm/Support/Debug.h"22#include "llvm/Support/MathExtras.h"23#include "llvm/Support/raw_ostream.h"2425using namespace llvm;2627#define DEBUG_TYPE "riscv-isel"28#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"2930static cl::opt<bool> UsePseudoMovImm(31"riscv-use-rematerializable-movimm", cl::Hidden,32cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "33"constant materialization"),34cl::init(false));3536namespace llvm::RISCV {37#define GET_RISCVVSSEGTable_IMPL38#define GET_RISCVVLSEGTable_IMPL39#define GET_RISCVVLXSEGTable_IMPL40#define GET_RISCVVSXSEGTable_IMPL41#define GET_RISCVVLETable_IMPL42#define GET_RISCVVSETable_IMPL43#define GET_RISCVVLXTable_IMPL44#define GET_RISCVVSXTable_IMPL45#include "RISCVGenSearchableTables.inc"46} // namespace llvm::RISCV4748void RISCVDAGToDAGISel::PreprocessISelDAG() {49SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();5051bool MadeChange = false;52while (Position != CurDAG->allnodes_begin()) {53SDNode *N = &*--Position;54if (N->use_empty())55continue;5657SDValue Result;58switch (N->getOpcode()) {59case ISD::SPLAT_VECTOR: {60// Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point61// SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.62MVT VT = N->getSimpleValueType(0);63unsigned Opc =64VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;65SDLoc DL(N);66SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());67SDValue Src = N->getOperand(0);68if (VT.isInteger())69Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),70N->getOperand(0));71Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);72break;73}74case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {75// Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector76// load. Done after lowering and combining so that we have a chance to77// optimize this to VMV_V_X_VL when the upper bits aren't needed.78assert(N->getNumOperands() == 4 && "Unexpected number of operands");79MVT VT = N->getSimpleValueType(0);80SDValue Passthru = N->getOperand(0);81SDValue Lo = N->getOperand(1);82SDValue Hi = N->getOperand(2);83SDValue VL = N->getOperand(3);84assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&85Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&86"Unexpected VTs!");87MachineFunction &MF = CurDAG->getMachineFunction();88SDLoc DL(N);8990// Create temporary stack for each expanding node.91SDValue StackSlot =92CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));93int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();94MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);9596SDValue Chain = CurDAG->getEntryNode();97Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));9899SDValue OffsetSlot =100CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);101Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),102Align(8));103104Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);105106SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});107SDValue IntID =108CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);109SDValue Ops[] = {Chain,110IntID,111Passthru,112StackSlot,113CurDAG->getRegister(RISCV::X0, MVT::i64),114VL};115116Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,117MVT::i64, MPI, Align(8),118MachineMemOperand::MOLoad);119break;120}121}122123if (Result) {124LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");125LLVM_DEBUG(N->dump(CurDAG));126LLVM_DEBUG(dbgs() << "\nNew: ");127LLVM_DEBUG(Result->dump(CurDAG));128LLVM_DEBUG(dbgs() << "\n");129130CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);131MadeChange = true;132}133}134135if (MadeChange)136CurDAG->RemoveDeadNodes();137}138139void RISCVDAGToDAGISel::PostprocessISelDAG() {140HandleSDNode Dummy(CurDAG->getRoot());141SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();142143bool MadeChange = false;144while (Position != CurDAG->allnodes_begin()) {145SDNode *N = &*--Position;146// Skip dead nodes and any non-machine opcodes.147if (N->use_empty() || !N->isMachineOpcode())148continue;149150MadeChange |= doPeepholeSExtW(N);151152// FIXME: This is here only because the VMerge transform doesn't153// know how to handle masked true inputs. Once that has been moved154// to post-ISEL, this can be deleted as well.155MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));156}157158CurDAG->setRoot(Dummy.getValue());159160MadeChange |= doPeepholeMergeVVMFold();161162// After we're done with everything else, convert IMPLICIT_DEF163// passthru operands to NoRegister. This is required to workaround164// an optimization deficiency in MachineCSE. This really should165// be merged back into each of the patterns (i.e. there's no good166// reason not to go directly to NoReg), but is being done this way167// to allow easy backporting.168MadeChange |= doPeepholeNoRegPassThru();169170if (MadeChange)171CurDAG->RemoveDeadNodes();172}173174static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,175RISCVMatInt::InstSeq &Seq) {176SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);177for (const RISCVMatInt::Inst &Inst : Seq) {178SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);179SDNode *Result = nullptr;180switch (Inst.getOpndKind()) {181case RISCVMatInt::Imm:182Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);183break;184case RISCVMatInt::RegX0:185Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,186CurDAG->getRegister(RISCV::X0, VT));187break;188case RISCVMatInt::RegReg:189Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);190break;191case RISCVMatInt::RegImm:192Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);193break;194}195196// Only the first instruction has X0 as its source.197SrcReg = SDValue(Result, 0);198}199200return SrcReg;201}202203static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,204int64_t Imm, const RISCVSubtarget &Subtarget) {205RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);206207// Use a rematerializable pseudo instruction for short sequences if enabled.208if (Seq.size() == 2 && UsePseudoMovImm)209return SDValue(210CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,211CurDAG->getTargetConstant(Imm, DL, VT)),2120);213214// See if we can create this constant as (ADD (SLLI X, C), X) where X is at215// worst an LUI+ADDIW. This will require an extra register, but avoids a216// constant pool.217// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where218// low and high 32 bits are the same and bit 31 and 63 are set.219if (Seq.size() > 3) {220unsigned ShiftAmt, AddOpc;221RISCVMatInt::InstSeq SeqLo =222RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);223if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {224SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);225226SDValue SLLI = SDValue(227CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,228CurDAG->getTargetConstant(ShiftAmt, DL, VT)),2290);230return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);231}232}233234// Otherwise, use the original sequence.235return selectImmSeq(CurDAG, DL, VT, Seq);236}237238static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,239unsigned NF, RISCVII::VLMUL LMUL) {240static const unsigned M1TupleRegClassIDs[] = {241RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,242RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,243RISCV::VRN8M1RegClassID};244static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,245RISCV::VRN3M2RegClassID,246RISCV::VRN4M2RegClassID};247248assert(Regs.size() >= 2 && Regs.size() <= 8);249250unsigned RegClassID;251unsigned SubReg0;252switch (LMUL) {253default:254llvm_unreachable("Invalid LMUL.");255case RISCVII::VLMUL::LMUL_F8:256case RISCVII::VLMUL::LMUL_F4:257case RISCVII::VLMUL::LMUL_F2:258case RISCVII::VLMUL::LMUL_1:259static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,260"Unexpected subreg numbering");261SubReg0 = RISCV::sub_vrm1_0;262RegClassID = M1TupleRegClassIDs[NF - 2];263break;264case RISCVII::VLMUL::LMUL_2:265static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,266"Unexpected subreg numbering");267SubReg0 = RISCV::sub_vrm2_0;268RegClassID = M2TupleRegClassIDs[NF - 2];269break;270case RISCVII::VLMUL::LMUL_4:271static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,272"Unexpected subreg numbering");273SubReg0 = RISCV::sub_vrm4_0;274RegClassID = RISCV::VRN2M4RegClassID;275break;276}277278SDLoc DL(Regs[0]);279SmallVector<SDValue, 8> Ops;280281Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));282283for (unsigned I = 0; I < Regs.size(); ++I) {284Ops.push_back(Regs[I]);285Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));286}287SDNode *N =288CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);289return SDValue(N, 0);290}291292void RISCVDAGToDAGISel::addVectorLoadStoreOperands(293SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,294bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,295bool IsLoad, MVT *IndexVT) {296SDValue Chain = Node->getOperand(0);297SDValue Glue;298299Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.300301if (IsStridedOrIndexed) {302Operands.push_back(Node->getOperand(CurOp++)); // Index.303if (IndexVT)304*IndexVT = Operands.back()->getSimpleValueType(0);305}306307if (IsMasked) {308// Mask needs to be copied to V0.309SDValue Mask = Node->getOperand(CurOp++);310Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());311Glue = Chain.getValue(1);312Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));313}314SDValue VL;315selectVLOp(Node->getOperand(CurOp++), VL);316Operands.push_back(VL);317318MVT XLenVT = Subtarget->getXLenVT();319SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);320Operands.push_back(SEWOp);321322// At the IR layer, all the masked load intrinsics have policy operands,323// none of the others do. All have passthru operands. For our pseudos,324// all loads have policy operands.325if (IsLoad) {326uint64_t Policy = RISCVII::MASK_AGNOSTIC;327if (IsMasked)328Policy = Node->getConstantOperandVal(CurOp++);329SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);330Operands.push_back(PolicyOp);331}332333Operands.push_back(Chain); // Chain.334if (Glue)335Operands.push_back(Glue);336}337338void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,339bool IsStrided) {340SDLoc DL(Node);341unsigned NF = Node->getNumValues() - 1;342MVT VT = Node->getSimpleValueType(0);343unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());344RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);345346unsigned CurOp = 2;347SmallVector<SDValue, 8> Operands;348349SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,350Node->op_begin() + CurOp + NF);351SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);352Operands.push_back(Merge);353CurOp += NF;354355addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,356Operands, /*IsLoad=*/true);357358const RISCV::VLSEGPseudo *P =359RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,360static_cast<unsigned>(LMUL));361MachineSDNode *Load =362CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);363364if (auto *MemOp = dyn_cast<MemSDNode>(Node))365CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});366367SDValue SuperReg = SDValue(Load, 0);368for (unsigned I = 0; I < NF; ++I) {369unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);370ReplaceUses(SDValue(Node, I),371CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));372}373374ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));375CurDAG->RemoveDeadNode(Node);376}377378void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {379SDLoc DL(Node);380unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.381MVT VT = Node->getSimpleValueType(0);382MVT XLenVT = Subtarget->getXLenVT();383unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());384RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);385386unsigned CurOp = 2;387SmallVector<SDValue, 7> Operands;388389SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,390Node->op_begin() + CurOp + NF);391SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);392Operands.push_back(MaskedOff);393CurOp += NF;394395addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,396/*IsStridedOrIndexed*/ false, Operands,397/*IsLoad=*/true);398399const RISCV::VLSEGPseudo *P =400RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,401Log2SEW, static_cast<unsigned>(LMUL));402MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,403XLenVT, MVT::Other, Operands);404405if (auto *MemOp = dyn_cast<MemSDNode>(Node))406CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});407408SDValue SuperReg = SDValue(Load, 0);409for (unsigned I = 0; I < NF; ++I) {410unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);411ReplaceUses(SDValue(Node, I),412CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));413}414415ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL416ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain417CurDAG->RemoveDeadNode(Node);418}419420void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,421bool IsOrdered) {422SDLoc DL(Node);423unsigned NF = Node->getNumValues() - 1;424MVT VT = Node->getSimpleValueType(0);425unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());426RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);427428unsigned CurOp = 2;429SmallVector<SDValue, 8> Operands;430431SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,432Node->op_begin() + CurOp + NF);433SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);434Operands.push_back(MaskedOff);435CurOp += NF;436437MVT IndexVT;438addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,439/*IsStridedOrIndexed*/ true, Operands,440/*IsLoad=*/true, &IndexVT);441442assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&443"Element count mismatch");444445RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);446unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());447if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {448report_fatal_error("The V extension does not support EEW=64 for index "449"values when XLEN=32");450}451const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(452NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),453static_cast<unsigned>(IndexLMUL));454MachineSDNode *Load =455CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);456457if (auto *MemOp = dyn_cast<MemSDNode>(Node))458CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});459460SDValue SuperReg = SDValue(Load, 0);461for (unsigned I = 0; I < NF; ++I) {462unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);463ReplaceUses(SDValue(Node, I),464CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));465}466467ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));468CurDAG->RemoveDeadNode(Node);469}470471void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,472bool IsStrided) {473SDLoc DL(Node);474unsigned NF = Node->getNumOperands() - 4;475if (IsStrided)476NF--;477if (IsMasked)478NF--;479MVT VT = Node->getOperand(2)->getSimpleValueType(0);480unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());481RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);482SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);483SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);484485SmallVector<SDValue, 8> Operands;486Operands.push_back(StoreVal);487unsigned CurOp = 2 + NF;488489addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,490Operands);491492const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(493NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));494MachineSDNode *Store =495CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);496497if (auto *MemOp = dyn_cast<MemSDNode>(Node))498CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});499500ReplaceNode(Node, Store);501}502503void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,504bool IsOrdered) {505SDLoc DL(Node);506unsigned NF = Node->getNumOperands() - 5;507if (IsMasked)508--NF;509MVT VT = Node->getOperand(2)->getSimpleValueType(0);510unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());511RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);512SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);513SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);514515SmallVector<SDValue, 8> Operands;516Operands.push_back(StoreVal);517unsigned CurOp = 2 + NF;518519MVT IndexVT;520addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,521/*IsStridedOrIndexed*/ true, Operands,522/*IsLoad=*/false, &IndexVT);523524assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&525"Element count mismatch");526527RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);528unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());529if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {530report_fatal_error("The V extension does not support EEW=64 for index "531"values when XLEN=32");532}533const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(534NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),535static_cast<unsigned>(IndexLMUL));536MachineSDNode *Store =537CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);538539if (auto *MemOp = dyn_cast<MemSDNode>(Node))540CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});541542ReplaceNode(Node, Store);543}544545void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {546if (!Subtarget->hasVInstructions())547return;548549assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");550551SDLoc DL(Node);552MVT XLenVT = Subtarget->getXLenVT();553554unsigned IntNo = Node->getConstantOperandVal(0);555556assert((IntNo == Intrinsic::riscv_vsetvli ||557IntNo == Intrinsic::riscv_vsetvlimax) &&558"Unexpected vsetvli intrinsic");559560bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;561unsigned Offset = (VLMax ? 1 : 2);562563assert(Node->getNumOperands() == Offset + 2 &&564"Unexpected number of operands");565566unsigned SEW =567RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);568RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(569Node->getConstantOperandVal(Offset + 1) & 0x7);570571unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,572/*MaskAgnostic*/ true);573SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);574575SDValue VLOperand;576unsigned Opcode = RISCV::PseudoVSETVLI;577if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {578if (auto VLEN = Subtarget->getRealVLen())579if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())580VLMax = true;581}582if (VLMax || isAllOnesConstant(Node->getOperand(1))) {583VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);584Opcode = RISCV::PseudoVSETVLIX0;585} else {586VLOperand = Node->getOperand(1);587588if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {589uint64_t AVL = C->getZExtValue();590if (isUInt<5>(AVL)) {591SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);592ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,593XLenVT, VLImm, VTypeIOp));594return;595}596}597}598599ReplaceNode(Node,600CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));601}602603bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {604MVT VT = Node->getSimpleValueType(0);605unsigned Opcode = Node->getOpcode();606assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&607"Unexpected opcode");608SDLoc DL(Node);609610// For operations of the form (x << C1) op C2, check if we can use611// ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.612SDValue N0 = Node->getOperand(0);613SDValue N1 = Node->getOperand(1);614615ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);616if (!Cst)617return false;618619int64_t Val = Cst->getSExtValue();620621// Check if immediate can already use ANDI/ORI/XORI.622if (isInt<12>(Val))623return false;624625SDValue Shift = N0;626627// If Val is simm32 and we have a sext_inreg from i32, then the binop628// produces at least 33 sign bits. We can peek through the sext_inreg and use629// a SLLIW at the end.630bool SignExt = false;631if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&632N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {633SignExt = true;634Shift = N0.getOperand(0);635}636637if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())638return false;639640ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));641if (!ShlCst)642return false;643644uint64_t ShAmt = ShlCst->getZExtValue();645646// Make sure that we don't change the operation by removing bits.647// This only matters for OR and XOR, AND is unaffected.648uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);649if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)650return false;651652int64_t ShiftedVal = Val >> ShAmt;653if (!isInt<12>(ShiftedVal))654return false;655656// If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.657if (SignExt && ShAmt >= 32)658return false;659660// Ok, we can reorder to get a smaller immediate.661unsigned BinOpc;662switch (Opcode) {663default: llvm_unreachable("Unexpected opcode");664case ISD::AND: BinOpc = RISCV::ANDI; break;665case ISD::OR: BinOpc = RISCV::ORI; break;666case ISD::XOR: BinOpc = RISCV::XORI; break;667}668669unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;670671SDNode *BinOp =672CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),673CurDAG->getTargetConstant(ShiftedVal, DL, VT));674SDNode *SLLI =675CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),676CurDAG->getTargetConstant(ShAmt, DL, VT));677ReplaceNode(Node, SLLI);678return true;679}680681bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {682// Only supported with XTHeadBb at the moment.683if (!Subtarget->hasVendorXTHeadBb())684return false;685686auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));687if (!N1C)688return false;689690SDValue N0 = Node->getOperand(0);691if (!N0.hasOneUse())692return false;693694auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,695MVT VT) {696return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),697CurDAG->getTargetConstant(Msb, DL, VT),698CurDAG->getTargetConstant(Lsb, DL, VT));699};700701SDLoc DL(Node);702MVT VT = Node->getSimpleValueType(0);703const unsigned RightShAmt = N1C->getZExtValue();704705// Transform (sra (shl X, C1) C2) with C1 < C2706// -> (TH.EXT X, msb, lsb)707if (N0.getOpcode() == ISD::SHL) {708auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));709if (!N01C)710return false;711712const unsigned LeftShAmt = N01C->getZExtValue();713// Make sure that this is a bitfield extraction (i.e., the shift-right714// amount can not be less than the left-shift).715if (LeftShAmt > RightShAmt)716return false;717718const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;719const unsigned Msb = MsbPlusOne - 1;720const unsigned Lsb = RightShAmt - LeftShAmt;721722SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);723ReplaceNode(Node, TH_EXT);724return true;725}726727// Transform (sra (sext_inreg X, _), C) ->728// (TH.EXT X, msb, lsb)729if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {730unsigned ExtSize =731cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();732733// ExtSize of 32 should use sraiw via tablegen pattern.734if (ExtSize == 32)735return false;736737const unsigned Msb = ExtSize - 1;738const unsigned Lsb = RightShAmt;739740SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);741ReplaceNode(Node, TH_EXT);742return true;743}744745return false;746}747748bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {749// Target does not support indexed loads.750if (!Subtarget->hasVendorXTHeadMemIdx())751return false;752753LoadSDNode *Ld = cast<LoadSDNode>(Node);754ISD::MemIndexedMode AM = Ld->getAddressingMode();755if (AM == ISD::UNINDEXED)756return false;757758const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());759if (!C)760return false;761762EVT LoadVT = Ld->getMemoryVT();763assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&764"Unexpected addressing mode");765bool IsPre = AM == ISD::PRE_INC;766bool IsPost = AM == ISD::POST_INC;767int64_t Offset = C->getSExtValue();768769// The constants that can be encoded in the THeadMemIdx instructions770// are of the form (sign_extend(imm5) << imm2).771int64_t Shift;772for (Shift = 0; Shift < 4; Shift++)773if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))774break;775776// Constant cannot be encoded.777if (Shift == 4)778return false;779780bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);781unsigned Opcode;782if (LoadVT == MVT::i8 && IsPre)783Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;784else if (LoadVT == MVT::i8 && IsPost)785Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;786else if (LoadVT == MVT::i16 && IsPre)787Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;788else if (LoadVT == MVT::i16 && IsPost)789Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;790else if (LoadVT == MVT::i32 && IsPre)791Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;792else if (LoadVT == MVT::i32 && IsPost)793Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;794else if (LoadVT == MVT::i64 && IsPre)795Opcode = RISCV::TH_LDIB;796else if (LoadVT == MVT::i64 && IsPost)797Opcode = RISCV::TH_LDIA;798else799return false;800801EVT Ty = Ld->getOffset().getValueType();802SDValue Ops[] = {Ld->getBasePtr(),803CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),804CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),805Ld->getChain()};806SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),807Ld->getValueType(1), MVT::Other, Ops);808809MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();810CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});811812ReplaceNode(Node, New);813814return true;815}816817void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {818if (!Subtarget->hasVInstructions())819return;820821assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");822823SDLoc DL(Node);824unsigned IntNo = Node->getConstantOperandVal(1);825826assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||827IntNo == Intrinsic::riscv_sf_vc_i_se) &&828"Unexpected vsetvli intrinsic");829830// imm, imm, imm, simm5/scalar, sew, log2lmul, vl831unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));832SDValue SEWOp =833CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());834SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),835Node->getOperand(4), Node->getOperand(5),836Node->getOperand(8), SEWOp,837Node->getOperand(0)};838839unsigned Opcode;840auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));841switch (LMulSDNode->getSExtValue()) {842case 5:843Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8844: RISCV::PseudoVC_I_SE_MF8;845break;846case 6:847Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4848: RISCV::PseudoVC_I_SE_MF4;849break;850case 7:851Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2852: RISCV::PseudoVC_I_SE_MF2;853break;854case 0:855Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1856: RISCV::PseudoVC_I_SE_M1;857break;858case 1:859Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2860: RISCV::PseudoVC_I_SE_M2;861break;862case 2:863Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4864: RISCV::PseudoVC_I_SE_M4;865break;866case 3:867Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8868: RISCV::PseudoVC_I_SE_M8;869break;870}871872ReplaceNode(Node, CurDAG->getMachineNode(873Opcode, DL, Node->getSimpleValueType(0), Operands));874}875876void RISCVDAGToDAGISel::Select(SDNode *Node) {877// If we have a custom node, we have already selected.878if (Node->isMachineOpcode()) {879LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");880Node->setNodeId(-1);881return;882}883884// Instruction Selection not handled by the auto-generated tablegen selection885// should be handled here.886unsigned Opcode = Node->getOpcode();887MVT XLenVT = Subtarget->getXLenVT();888SDLoc DL(Node);889MVT VT = Node->getSimpleValueType(0);890891bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();892893switch (Opcode) {894case ISD::Constant: {895assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");896auto *ConstNode = cast<ConstantSDNode>(Node);897if (ConstNode->isZero()) {898SDValue New =899CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);900ReplaceNode(Node, New.getNode());901return;902}903int64_t Imm = ConstNode->getSExtValue();904// If only the lower 8 bits are used, try to convert this to a simm6 by905// sign-extending bit 7. This is neutral without the C extension, and906// allows C.LI to be used if C is present.907if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))908Imm = SignExtend64<8>(Imm);909// If the upper XLen-16 bits are not used, try to convert this to a simm12910// by sign extending bit 15.911if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&912hasAllHUsers(Node))913Imm = SignExtend64<16>(Imm);914// If the upper 32-bits are not used try to convert this into a simm32 by915// sign extending bit 32.916if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))917Imm = SignExtend64<32>(Imm);918919ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());920return;921}922case ISD::ConstantFP: {923const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();924auto [FPImm, NeedsFNeg] =925static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,926VT);927if (FPImm >= 0) {928unsigned Opc;929unsigned FNegOpc;930switch (VT.SimpleTy) {931default:932llvm_unreachable("Unexpected size");933case MVT::f16:934Opc = RISCV::FLI_H;935FNegOpc = RISCV::FSGNJN_H;936break;937case MVT::f32:938Opc = RISCV::FLI_S;939FNegOpc = RISCV::FSGNJN_S;940break;941case MVT::f64:942Opc = RISCV::FLI_D;943FNegOpc = RISCV::FSGNJN_D;944break;945}946SDNode *Res = CurDAG->getMachineNode(947Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));948if (NeedsFNeg)949Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),950SDValue(Res, 0));951952ReplaceNode(Node, Res);953return;954}955956bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;957SDValue Imm;958// For +0.0 or f64 -0.0 we need to start from X0. For all others, we will959// create an integer immediate.960if (APF.isPosZero() || NegZeroF64)961Imm = CurDAG->getRegister(RISCV::X0, XLenVT);962else963Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),964*Subtarget);965966bool HasZdinx = Subtarget->hasStdExtZdinx();967bool Is64Bit = Subtarget->is64Bit();968unsigned Opc;969switch (VT.SimpleTy) {970default:971llvm_unreachable("Unexpected size");972case MVT::bf16:973assert(Subtarget->hasStdExtZfbfmin());974Opc = RISCV::FMV_H_X;975break;976case MVT::f16:977Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;978break;979case MVT::f32:980Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;981break;982case MVT::f64:983// For RV32, we can't move from a GPR, we need to convert instead. This984// should only happen for +0.0 and -0.0.985assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");986if (Is64Bit)987Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;988else989Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;990break;991}992993SDNode *Res;994if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)995Res = CurDAG->getMachineNode(996Opc, DL, VT, Imm,997CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));998else999Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);10001001// For f64 -0.0, we need to insert a fneg.d idiom.1002if (NegZeroF64) {1003Opc = RISCV::FSGNJN_D;1004if (HasZdinx)1005Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;1006Res =1007CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));1008}10091010ReplaceNode(Node, Res);1011return;1012}1013case RISCVISD::BuildPairF64: {1014if (!Subtarget->hasStdExtZdinx())1015break;10161017assert(!Subtarget->is64Bit() && "Unexpected subtarget");10181019SDValue Ops[] = {1020CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),1021Node->getOperand(0),1022CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),1023Node->getOperand(1),1024CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};10251026SDNode *N =1027CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);1028ReplaceNode(Node, N);1029return;1030}1031case RISCVISD::SplitF64: {1032if (Subtarget->hasStdExtZdinx()) {1033assert(!Subtarget->is64Bit() && "Unexpected subtarget");10341035if (!SDValue(Node, 0).use_empty()) {1036SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,1037Node->getOperand(0));1038ReplaceUses(SDValue(Node, 0), Lo);1039}10401041if (!SDValue(Node, 1).use_empty()) {1042SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,1043Node->getOperand(0));1044ReplaceUses(SDValue(Node, 1), Hi);1045}10461047CurDAG->RemoveDeadNode(Node);1048return;1049}10501051if (!Subtarget->hasStdExtZfa())1052break;1053assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&1054"Unexpected subtarget");10551056// With Zfa, lower to fmv.x.w and fmvh.x.d.1057if (!SDValue(Node, 0).use_empty()) {1058SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,1059Node->getOperand(0));1060ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));1061}1062if (!SDValue(Node, 1).use_empty()) {1063SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,1064Node->getOperand(0));1065ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));1066}10671068CurDAG->RemoveDeadNode(Node);1069return;1070}1071case ISD::SHL: {1072auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));1073if (!N1C)1074break;1075SDValue N0 = Node->getOperand(0);1076if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||1077!isa<ConstantSDNode>(N0.getOperand(1)))1078break;1079unsigned ShAmt = N1C->getZExtValue();1080uint64_t Mask = N0.getConstantOperandVal(1);10811082// Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has1083// 32 leading zeros and C3 trailing zeros.1084if (ShAmt <= 32 && isShiftedMask_64(Mask)) {1085unsigned XLen = Subtarget->getXLen();1086unsigned LeadingZeros = XLen - llvm::bit_width(Mask);1087unsigned TrailingZeros = llvm::countr_zero(Mask);1088if (TrailingZeros > 0 && LeadingZeros == 32) {1089SDNode *SRLIW = CurDAG->getMachineNode(1090RISCV::SRLIW, DL, VT, N0->getOperand(0),1091CurDAG->getTargetConstant(TrailingZeros, DL, VT));1092SDNode *SLLI = CurDAG->getMachineNode(1093RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),1094CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));1095ReplaceNode(Node, SLLI);1096return;1097}1098}1099break;1100}1101case ISD::SRL: {1102auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));1103if (!N1C)1104break;1105SDValue N0 = Node->getOperand(0);1106if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))1107break;1108unsigned ShAmt = N1C->getZExtValue();1109uint64_t Mask = N0.getConstantOperandVal(1);11101111// Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has1112// 32 leading zeros and C3 trailing zeros.1113if (isShiftedMask_64(Mask) && N0.hasOneUse()) {1114unsigned XLen = Subtarget->getXLen();1115unsigned LeadingZeros = XLen - llvm::bit_width(Mask);1116unsigned TrailingZeros = llvm::countr_zero(Mask);1117if (LeadingZeros == 32 && TrailingZeros > ShAmt) {1118SDNode *SRLIW = CurDAG->getMachineNode(1119RISCV::SRLIW, DL, VT, N0->getOperand(0),1120CurDAG->getTargetConstant(TrailingZeros, DL, VT));1121SDNode *SLLI = CurDAG->getMachineNode(1122RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),1123CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));1124ReplaceNode(Node, SLLI);1125return;1126}1127}11281129// Optimize (srl (and X, C2), C) ->1130// (srli (slli X, (XLen-C3), (XLen-C3) + C)1131// Where C2 is a mask with C3 trailing ones.1132// Taking into account that the C2 may have had lower bits unset by1133// SimplifyDemandedBits. This avoids materializing the C2 immediate.1134// This pattern occurs when type legalizing right shifts for types with1135// less than XLen bits.1136Mask |= maskTrailingOnes<uint64_t>(ShAmt);1137if (!isMask_64(Mask))1138break;1139unsigned TrailingOnes = llvm::countr_one(Mask);1140if (ShAmt >= TrailingOnes)1141break;1142// If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.1143if (TrailingOnes == 32) {1144SDNode *SRLI = CurDAG->getMachineNode(1145Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,1146N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));1147ReplaceNode(Node, SRLI);1148return;1149}11501151// Only do the remaining transforms if the AND has one use.1152if (!N0.hasOneUse())1153break;11541155// If C2 is (1 << ShAmt) use bexti or th.tst if possible.1156if (HasBitTest && ShAmt + 1 == TrailingOnes) {1157SDNode *BEXTI = CurDAG->getMachineNode(1158Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,1159N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));1160ReplaceNode(Node, BEXTI);1161return;1162}11631164unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;1165SDNode *SLLI =1166CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),1167CurDAG->getTargetConstant(LShAmt, DL, VT));1168SDNode *SRLI = CurDAG->getMachineNode(1169RISCV::SRLI, DL, VT, SDValue(SLLI, 0),1170CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));1171ReplaceNode(Node, SRLI);1172return;1173}1174case ISD::SRA: {1175if (trySignedBitfieldExtract(Node))1176return;11771178// Optimize (sra (sext_inreg X, i16), C) ->1179// (srai (slli X, (XLen-16), (XLen-16) + C)1180// And (sra (sext_inreg X, i8), C) ->1181// (srai (slli X, (XLen-8), (XLen-8) + C)1182// This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.1183// This transform matches the code we get without Zbb. The shifts are more1184// compressible, and this can help expose CSE opportunities in the sdiv by1185// constant optimization.1186auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));1187if (!N1C)1188break;1189SDValue N0 = Node->getOperand(0);1190if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())1191break;1192unsigned ShAmt = N1C->getZExtValue();1193unsigned ExtSize =1194cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();1195// ExtSize of 32 should use sraiw via tablegen pattern.1196if (ExtSize >= 32 || ShAmt >= ExtSize)1197break;1198unsigned LShAmt = Subtarget->getXLen() - ExtSize;1199SDNode *SLLI =1200CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),1201CurDAG->getTargetConstant(LShAmt, DL, VT));1202SDNode *SRAI = CurDAG->getMachineNode(1203RISCV::SRAI, DL, VT, SDValue(SLLI, 0),1204CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));1205ReplaceNode(Node, SRAI);1206return;1207}1208case ISD::OR:1209case ISD::XOR:1210if (tryShrinkShlLogicImm(Node))1211return;12121213break;1214case ISD::AND: {1215auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));1216if (!N1C)1217break;1218uint64_t C1 = N1C->getZExtValue();1219const bool isC1Mask = isMask_64(C1);1220const bool isC1ANDI = isInt<12>(C1);12211222SDValue N0 = Node->getOperand(0);12231224auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,1225SDValue X, unsigned Msb,1226unsigned Lsb) {1227if (!Subtarget->hasVendorXTHeadBb())1228return false;12291230SDNode *TH_EXTU = CurDAG->getMachineNode(1231RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),1232CurDAG->getTargetConstant(Lsb, DL, VT));1233ReplaceNode(Node, TH_EXTU);1234return true;1235};12361237bool LeftShift = N0.getOpcode() == ISD::SHL;1238if (LeftShift || N0.getOpcode() == ISD::SRL) {1239auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));1240if (!C)1241break;1242unsigned C2 = C->getZExtValue();1243unsigned XLen = Subtarget->getXLen();1244assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");12451246// Keep track of whether this is a c.andi. If we can't use c.andi, the1247// shift pair might offer more compression opportunities.1248// TODO: We could check for C extension here, but we don't have many lit1249// tests with the C extension enabled so not checking gets better1250// coverage.1251// TODO: What if ANDI faster than shift?1252bool IsCANDI = isInt<6>(N1C->getSExtValue());12531254// Clear irrelevant bits in the mask.1255if (LeftShift)1256C1 &= maskTrailingZeros<uint64_t>(C2);1257else1258C1 &= maskTrailingOnes<uint64_t>(XLen - C2);12591260// Some transforms should only be done if the shift has a single use or1261// the AND would become (srli (slli X, 32), 32)1262bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);12631264SDValue X = N0.getOperand(0);12651266// Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask1267// with c3 leading zeros.1268if (!LeftShift && isC1Mask) {1269unsigned Leading = XLen - llvm::bit_width(C1);1270if (C2 < Leading) {1271// If the number of leading zeros is C2+32 this can be SRLIW.1272if (C2 + 32 == Leading) {1273SDNode *SRLIW = CurDAG->getMachineNode(1274RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));1275ReplaceNode(Node, SRLIW);1276return;1277}12781279// (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)1280// if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.1281//1282// This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type1283// legalized and goes through DAG combine.1284if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&1285X.getOpcode() == ISD::SIGN_EXTEND_INREG &&1286cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {1287SDNode *SRAIW =1288CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),1289CurDAG->getTargetConstant(31, DL, VT));1290SDNode *SRLIW = CurDAG->getMachineNode(1291RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),1292CurDAG->getTargetConstant(Leading - 32, DL, VT));1293ReplaceNode(Node, SRLIW);1294return;1295}12961297// Try to use an unsigned bitfield extract (e.g., th.extu) if1298// available.1299// Transform (and (srl x, C2), C1)1300// -> (<bfextract> x, msb, lsb)1301//1302// Make sure to keep this below the SRLIW cases, as we always want to1303// prefer the more common instruction.1304const unsigned Msb = llvm::bit_width(C1) + C2 - 1;1305const unsigned Lsb = C2;1306if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))1307return;13081309// (srli (slli x, c3-c2), c3).1310// Skip if we could use (zext.w (sraiw X, C2)).1311bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&1312X.getOpcode() == ISD::SIGN_EXTEND_INREG &&1313cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;1314// Also Skip if we can use bexti or th.tst.1315Skip |= HasBitTest && Leading == XLen - 1;1316if (OneUseOrZExtW && !Skip) {1317SDNode *SLLI = CurDAG->getMachineNode(1318RISCV::SLLI, DL, VT, X,1319CurDAG->getTargetConstant(Leading - C2, DL, VT));1320SDNode *SRLI = CurDAG->getMachineNode(1321RISCV::SRLI, DL, VT, SDValue(SLLI, 0),1322CurDAG->getTargetConstant(Leading, DL, VT));1323ReplaceNode(Node, SRLI);1324return;1325}1326}1327}13281329// Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask1330// shifted by c2 bits with c3 leading zeros.1331if (LeftShift && isShiftedMask_64(C1)) {1332unsigned Leading = XLen - llvm::bit_width(C1);13331334if (C2 + Leading < XLen &&1335C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {1336// Use slli.uw when possible.1337if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {1338SDNode *SLLI_UW =1339CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,1340CurDAG->getTargetConstant(C2, DL, VT));1341ReplaceNode(Node, SLLI_UW);1342return;1343}13441345// (srli (slli c2+c3), c3)1346if (OneUseOrZExtW && !IsCANDI) {1347SDNode *SLLI = CurDAG->getMachineNode(1348RISCV::SLLI, DL, VT, X,1349CurDAG->getTargetConstant(C2 + Leading, DL, VT));1350SDNode *SRLI = CurDAG->getMachineNode(1351RISCV::SRLI, DL, VT, SDValue(SLLI, 0),1352CurDAG->getTargetConstant(Leading, DL, VT));1353ReplaceNode(Node, SRLI);1354return;1355}1356}1357}13581359// Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a1360// shifted mask with c2 leading zeros and c3 trailing zeros.1361if (!LeftShift && isShiftedMask_64(C1)) {1362unsigned Leading = XLen - llvm::bit_width(C1);1363unsigned Trailing = llvm::countr_zero(C1);1364if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&1365!IsCANDI) {1366unsigned SrliOpc = RISCV::SRLI;1367// If the input is zexti32 we should use SRLIW.1368if (X.getOpcode() == ISD::AND &&1369isa<ConstantSDNode>(X.getOperand(1)) &&1370X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {1371SrliOpc = RISCV::SRLIW;1372X = X.getOperand(0);1373}1374SDNode *SRLI = CurDAG->getMachineNode(1375SrliOpc, DL, VT, X,1376CurDAG->getTargetConstant(C2 + Trailing, DL, VT));1377SDNode *SLLI = CurDAG->getMachineNode(1378RISCV::SLLI, DL, VT, SDValue(SRLI, 0),1379CurDAG->getTargetConstant(Trailing, DL, VT));1380ReplaceNode(Node, SLLI);1381return;1382}1383// If the leading zero count is C2+32, we can use SRLIW instead of SRLI.1384if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&1385OneUseOrZExtW && !IsCANDI) {1386SDNode *SRLIW = CurDAG->getMachineNode(1387RISCV::SRLIW, DL, VT, X,1388CurDAG->getTargetConstant(C2 + Trailing, DL, VT));1389SDNode *SLLI = CurDAG->getMachineNode(1390RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),1391CurDAG->getTargetConstant(Trailing, DL, VT));1392ReplaceNode(Node, SLLI);1393return;1394}1395}13961397// Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a1398// shifted mask with no leading zeros and c3 trailing zeros.1399if (LeftShift && isShiftedMask_64(C1)) {1400unsigned Leading = XLen - llvm::bit_width(C1);1401unsigned Trailing = llvm::countr_zero(C1);1402if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {1403SDNode *SRLI = CurDAG->getMachineNode(1404RISCV::SRLI, DL, VT, X,1405CurDAG->getTargetConstant(Trailing - C2, DL, VT));1406SDNode *SLLI = CurDAG->getMachineNode(1407RISCV::SLLI, DL, VT, SDValue(SRLI, 0),1408CurDAG->getTargetConstant(Trailing, DL, VT));1409ReplaceNode(Node, SLLI);1410return;1411}1412// If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.1413if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {1414SDNode *SRLIW = CurDAG->getMachineNode(1415RISCV::SRLIW, DL, VT, X,1416CurDAG->getTargetConstant(Trailing - C2, DL, VT));1417SDNode *SLLI = CurDAG->getMachineNode(1418RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),1419CurDAG->getTargetConstant(Trailing, DL, VT));1420ReplaceNode(Node, SLLI);1421return;1422}14231424// If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.1425if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&1426Subtarget->hasStdExtZba()) {1427SDNode *SRLI = CurDAG->getMachineNode(1428RISCV::SRLI, DL, VT, X,1429CurDAG->getTargetConstant(Trailing - C2, DL, VT));1430SDNode *SLLI_UW = CurDAG->getMachineNode(1431RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),1432CurDAG->getTargetConstant(Trailing, DL, VT));1433ReplaceNode(Node, SLLI_UW);1434return;1435}1436}1437}14381439// If C1 masks off the upper bits only (but can't be formed as an1440// ANDI), use an unsigned bitfield extract (e.g., th.extu), if1441// available.1442// Transform (and x, C1)1443// -> (<bfextract> x, msb, lsb)1444if (isC1Mask && !isC1ANDI) {1445const unsigned Msb = llvm::bit_width(C1) - 1;1446if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))1447return;1448}14491450if (tryShrinkShlLogicImm(Node))1451return;14521453break;1454}1455case ISD::MUL: {1456// Special case for calculating (mul (and X, C2), C1) where the full product1457// fits in XLen bits. We can shift X left by the number of leading zeros in1458// C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final1459// product has XLen trailing zeros, putting it in the output of MULHU. This1460// can avoid materializing a constant in a register for C2.14611462// RHS should be a constant.1463auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));1464if (!N1C || !N1C->hasOneUse())1465break;14661467// LHS should be an AND with constant.1468SDValue N0 = Node->getOperand(0);1469if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))1470break;14711472uint64_t C2 = N0.getConstantOperandVal(1);14731474// Constant should be a mask.1475if (!isMask_64(C2))1476break;14771478// If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has1479// multiple users or the constant is a simm12. This prevents inserting a1480// shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely1481// make it more costly to materialize. Otherwise, using a SLLI might allow1482// it to be compressed.1483bool IsANDIOrZExt =1484isInt<12>(C2) ||1485(C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());1486// With XTHeadBb, we can use TH.EXTU.1487IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();1488if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))1489break;1490// If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or1491// the constant is a simm32.1492bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();1493// With XTHeadBb, we can use TH.EXTU.1494IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();1495if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))1496break;14971498// We need to shift left the AND input and C1 by a total of XLen bits.14991500// How far left do we need to shift the AND input?1501unsigned XLen = Subtarget->getXLen();1502unsigned LeadingZeros = XLen - llvm::bit_width(C2);15031504// The constant gets shifted by the remaining amount unless that would1505// shift bits out.1506uint64_t C1 = N1C->getZExtValue();1507unsigned ConstantShift = XLen - LeadingZeros;1508if (ConstantShift > (XLen - llvm::bit_width(C1)))1509break;15101511uint64_t ShiftedC1 = C1 << ConstantShift;1512// If this RV32, we need to sign extend the constant.1513if (XLen == 32)1514ShiftedC1 = SignExtend64<32>(ShiftedC1);15151516// Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).1517SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();1518SDNode *SLLI =1519CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),1520CurDAG->getTargetConstant(LeadingZeros, DL, VT));1521SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,1522SDValue(SLLI, 0), SDValue(Imm, 0));1523ReplaceNode(Node, MULHU);1524return;1525}1526case ISD::LOAD: {1527if (tryIndexedLoad(Node))1528return;15291530if (Subtarget->hasVendorXCVmem()) {1531// We match post-incrementing load here1532LoadSDNode *Load = cast<LoadSDNode>(Node);1533if (Load->getAddressingMode() != ISD::POST_INC)1534break;15351536SDValue Chain = Node->getOperand(0);1537SDValue Base = Node->getOperand(1);1538SDValue Offset = Node->getOperand(2);15391540bool Simm12 = false;1541bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;15421543if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {1544int ConstantVal = ConstantOffset->getSExtValue();1545Simm12 = isInt<12>(ConstantVal);1546if (Simm12)1547Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),1548Offset.getValueType());1549}15501551unsigned Opcode = 0;1552switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {1553case MVT::i8:1554if (Simm12 && SignExtend)1555Opcode = RISCV::CV_LB_ri_inc;1556else if (Simm12 && !SignExtend)1557Opcode = RISCV::CV_LBU_ri_inc;1558else if (!Simm12 && SignExtend)1559Opcode = RISCV::CV_LB_rr_inc;1560else1561Opcode = RISCV::CV_LBU_rr_inc;1562break;1563case MVT::i16:1564if (Simm12 && SignExtend)1565Opcode = RISCV::CV_LH_ri_inc;1566else if (Simm12 && !SignExtend)1567Opcode = RISCV::CV_LHU_ri_inc;1568else if (!Simm12 && SignExtend)1569Opcode = RISCV::CV_LH_rr_inc;1570else1571Opcode = RISCV::CV_LHU_rr_inc;1572break;1573case MVT::i32:1574if (Simm12)1575Opcode = RISCV::CV_LW_ri_inc;1576else1577Opcode = RISCV::CV_LW_rr_inc;1578break;1579default:1580break;1581}1582if (!Opcode)1583break;15841585ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,1586Chain.getSimpleValueType(), Base,1587Offset, Chain));1588return;1589}1590break;1591}1592case ISD::INTRINSIC_WO_CHAIN: {1593unsigned IntNo = Node->getConstantOperandVal(0);1594switch (IntNo) {1595// By default we do not custom select any intrinsic.1596default:1597break;1598case Intrinsic::riscv_vmsgeu:1599case Intrinsic::riscv_vmsge: {1600SDValue Src1 = Node->getOperand(1);1601SDValue Src2 = Node->getOperand(2);1602bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;1603bool IsCmpUnsignedZero = false;1604// Only custom select scalar second operand.1605if (Src2.getValueType() != XLenVT)1606break;1607// Small constants are handled with patterns.1608if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {1609int64_t CVal = C->getSExtValue();1610if (CVal >= -15 && CVal <= 16) {1611if (!IsUnsigned || CVal != 0)1612break;1613IsCmpUnsignedZero = true;1614}1615}1616MVT Src1VT = Src1.getSimpleValueType();1617unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;1618switch (RISCVTargetLowering::getLMUL(Src1VT)) {1619default:1620llvm_unreachable("Unexpected LMUL!");1621#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \1622case RISCVII::VLMUL::lmulenum: \1623VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \1624: RISCV::PseudoVMSLT_VX_##suffix; \1625VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \1626VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \1627break;1628CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)1629CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)1630CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)1631CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)1632CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)1633CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)1634CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)1635#undef CASE_VMSLT_VMNAND_VMSET_OPCODES1636}1637SDValue SEW = CurDAG->getTargetConstant(1638Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);1639SDValue VL;1640selectVLOp(Node->getOperand(3), VL);16411642// If vmsgeu with 0 immediate, expand it to vmset.1643if (IsCmpUnsignedZero) {1644ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));1645return;1646}16471648// Expand to1649// vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd1650SDValue Cmp = SDValue(1651CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),16520);1653ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,1654{Cmp, Cmp, VL, SEW}));1655return;1656}1657case Intrinsic::riscv_vmsgeu_mask:1658case Intrinsic::riscv_vmsge_mask: {1659SDValue Src1 = Node->getOperand(2);1660SDValue Src2 = Node->getOperand(3);1661bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;1662bool IsCmpUnsignedZero = false;1663// Only custom select scalar second operand.1664if (Src2.getValueType() != XLenVT)1665break;1666// Small constants are handled with patterns.1667if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {1668int64_t CVal = C->getSExtValue();1669if (CVal >= -15 && CVal <= 16) {1670if (!IsUnsigned || CVal != 0)1671break;1672IsCmpUnsignedZero = true;1673}1674}1675MVT Src1VT = Src1.getSimpleValueType();1676unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,1677VMOROpcode;1678switch (RISCVTargetLowering::getLMUL(Src1VT)) {1679default:1680llvm_unreachable("Unexpected LMUL!");1681#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \1682case RISCVII::VLMUL::lmulenum: \1683VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \1684: RISCV::PseudoVMSLT_VX_##suffix; \1685VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \1686: RISCV::PseudoVMSLT_VX_##suffix##_MASK; \1687break;1688CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)1689CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)1690CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)1691CASE_VMSLT_OPCODES(LMUL_1, M1, B8)1692CASE_VMSLT_OPCODES(LMUL_2, M2, B16)1693CASE_VMSLT_OPCODES(LMUL_4, M4, B32)1694CASE_VMSLT_OPCODES(LMUL_8, M8, B64)1695#undef CASE_VMSLT_OPCODES1696}1697// Mask operations use the LMUL from the mask type.1698switch (RISCVTargetLowering::getLMUL(VT)) {1699default:1700llvm_unreachable("Unexpected LMUL!");1701#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \1702case RISCVII::VLMUL::lmulenum: \1703VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \1704VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \1705VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \1706break;1707CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)1708CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)1709CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)1710CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)1711CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)1712CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)1713CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)1714#undef CASE_VMXOR_VMANDN_VMOR_OPCODES1715}1716SDValue SEW = CurDAG->getTargetConstant(1717Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);1718SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);1719SDValue VL;1720selectVLOp(Node->getOperand(5), VL);1721SDValue MaskedOff = Node->getOperand(1);1722SDValue Mask = Node->getOperand(4);17231724// If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.1725if (IsCmpUnsignedZero) {1726// We don't need vmor if the MaskedOff and the Mask are the same1727// value.1728if (Mask == MaskedOff) {1729ReplaceUses(Node, Mask.getNode());1730return;1731}1732ReplaceNode(Node,1733CurDAG->getMachineNode(VMOROpcode, DL, VT,1734{Mask, MaskedOff, VL, MaskSEW}));1735return;1736}17371738// If the MaskedOff value and the Mask are the same value use1739// vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt1740// This avoids needing to copy v0 to vd before starting the next sequence.1741if (Mask == MaskedOff) {1742SDValue Cmp = SDValue(1743CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),17440);1745ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,1746{Mask, Cmp, VL, MaskSEW}));1747return;1748}17491750// Mask needs to be copied to V0.1751SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,1752RISCV::V0, Mask, SDValue());1753SDValue Glue = Chain.getValue(1);1754SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);17551756// Otherwise use1757// vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v01758// The result is mask undisturbed.1759// We use the same instructions to emulate mask agnostic behavior, because1760// the agnostic result can be either undisturbed or all 1.1761SDValue Cmp = SDValue(1762CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,1763{MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),17640);1765// vmxor.mm vd, vd, v0 is used to update active value.1766ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,1767{Cmp, Mask, VL, MaskSEW}));1768return;1769}1770case Intrinsic::riscv_vsetvli:1771case Intrinsic::riscv_vsetvlimax:1772return selectVSETVLI(Node);1773}1774break;1775}1776case ISD::INTRINSIC_W_CHAIN: {1777unsigned IntNo = Node->getConstantOperandVal(1);1778switch (IntNo) {1779// By default we do not custom select any intrinsic.1780default:1781break;1782case Intrinsic::riscv_vlseg2:1783case Intrinsic::riscv_vlseg3:1784case Intrinsic::riscv_vlseg4:1785case Intrinsic::riscv_vlseg5:1786case Intrinsic::riscv_vlseg6:1787case Intrinsic::riscv_vlseg7:1788case Intrinsic::riscv_vlseg8: {1789selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);1790return;1791}1792case Intrinsic::riscv_vlseg2_mask:1793case Intrinsic::riscv_vlseg3_mask:1794case Intrinsic::riscv_vlseg4_mask:1795case Intrinsic::riscv_vlseg5_mask:1796case Intrinsic::riscv_vlseg6_mask:1797case Intrinsic::riscv_vlseg7_mask:1798case Intrinsic::riscv_vlseg8_mask: {1799selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);1800return;1801}1802case Intrinsic::riscv_vlsseg2:1803case Intrinsic::riscv_vlsseg3:1804case Intrinsic::riscv_vlsseg4:1805case Intrinsic::riscv_vlsseg5:1806case Intrinsic::riscv_vlsseg6:1807case Intrinsic::riscv_vlsseg7:1808case Intrinsic::riscv_vlsseg8: {1809selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);1810return;1811}1812case Intrinsic::riscv_vlsseg2_mask:1813case Intrinsic::riscv_vlsseg3_mask:1814case Intrinsic::riscv_vlsseg4_mask:1815case Intrinsic::riscv_vlsseg5_mask:1816case Intrinsic::riscv_vlsseg6_mask:1817case Intrinsic::riscv_vlsseg7_mask:1818case Intrinsic::riscv_vlsseg8_mask: {1819selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);1820return;1821}1822case Intrinsic::riscv_vloxseg2:1823case Intrinsic::riscv_vloxseg3:1824case Intrinsic::riscv_vloxseg4:1825case Intrinsic::riscv_vloxseg5:1826case Intrinsic::riscv_vloxseg6:1827case Intrinsic::riscv_vloxseg7:1828case Intrinsic::riscv_vloxseg8:1829selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);1830return;1831case Intrinsic::riscv_vluxseg2:1832case Intrinsic::riscv_vluxseg3:1833case Intrinsic::riscv_vluxseg4:1834case Intrinsic::riscv_vluxseg5:1835case Intrinsic::riscv_vluxseg6:1836case Intrinsic::riscv_vluxseg7:1837case Intrinsic::riscv_vluxseg8:1838selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);1839return;1840case Intrinsic::riscv_vloxseg2_mask:1841case Intrinsic::riscv_vloxseg3_mask:1842case Intrinsic::riscv_vloxseg4_mask:1843case Intrinsic::riscv_vloxseg5_mask:1844case Intrinsic::riscv_vloxseg6_mask:1845case Intrinsic::riscv_vloxseg7_mask:1846case Intrinsic::riscv_vloxseg8_mask:1847selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);1848return;1849case Intrinsic::riscv_vluxseg2_mask:1850case Intrinsic::riscv_vluxseg3_mask:1851case Intrinsic::riscv_vluxseg4_mask:1852case Intrinsic::riscv_vluxseg5_mask:1853case Intrinsic::riscv_vluxseg6_mask:1854case Intrinsic::riscv_vluxseg7_mask:1855case Intrinsic::riscv_vluxseg8_mask:1856selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);1857return;1858case Intrinsic::riscv_vlseg8ff:1859case Intrinsic::riscv_vlseg7ff:1860case Intrinsic::riscv_vlseg6ff:1861case Intrinsic::riscv_vlseg5ff:1862case Intrinsic::riscv_vlseg4ff:1863case Intrinsic::riscv_vlseg3ff:1864case Intrinsic::riscv_vlseg2ff: {1865selectVLSEGFF(Node, /*IsMasked*/ false);1866return;1867}1868case Intrinsic::riscv_vlseg8ff_mask:1869case Intrinsic::riscv_vlseg7ff_mask:1870case Intrinsic::riscv_vlseg6ff_mask:1871case Intrinsic::riscv_vlseg5ff_mask:1872case Intrinsic::riscv_vlseg4ff_mask:1873case Intrinsic::riscv_vlseg3ff_mask:1874case Intrinsic::riscv_vlseg2ff_mask: {1875selectVLSEGFF(Node, /*IsMasked*/ true);1876return;1877}1878case Intrinsic::riscv_vloxei:1879case Intrinsic::riscv_vloxei_mask:1880case Intrinsic::riscv_vluxei:1881case Intrinsic::riscv_vluxei_mask: {1882bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||1883IntNo == Intrinsic::riscv_vluxei_mask;1884bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||1885IntNo == Intrinsic::riscv_vloxei_mask;18861887MVT VT = Node->getSimpleValueType(0);1888unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());18891890unsigned CurOp = 2;1891SmallVector<SDValue, 8> Operands;1892Operands.push_back(Node->getOperand(CurOp++));18931894MVT IndexVT;1895addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,1896/*IsStridedOrIndexed*/ true, Operands,1897/*IsLoad=*/true, &IndexVT);18981899assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&1900"Element count mismatch");19011902RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);1903RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);1904unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());1905if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {1906report_fatal_error("The V extension does not support EEW=64 for index "1907"values when XLEN=32");1908}1909const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(1910IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),1911static_cast<unsigned>(IndexLMUL));1912MachineSDNode *Load =1913CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);19141915if (auto *MemOp = dyn_cast<MemSDNode>(Node))1916CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});19171918ReplaceNode(Node, Load);1919return;1920}1921case Intrinsic::riscv_vlm:1922case Intrinsic::riscv_vle:1923case Intrinsic::riscv_vle_mask:1924case Intrinsic::riscv_vlse:1925case Intrinsic::riscv_vlse_mask: {1926bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||1927IntNo == Intrinsic::riscv_vlse_mask;1928bool IsStrided =1929IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;19301931MVT VT = Node->getSimpleValueType(0);1932unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());19331934// The riscv_vlm intrinsic are always tail agnostic and no passthru1935// operand at the IR level. In pseudos, they have both policy and1936// passthru operand. The passthru operand is needed to track the1937// "tail undefined" state, and the policy is there just for1938// for consistency - it will always be "don't care" for the1939// unmasked form.1940bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;1941unsigned CurOp = 2;1942SmallVector<SDValue, 8> Operands;1943if (HasPassthruOperand)1944Operands.push_back(Node->getOperand(CurOp++));1945else {1946// We eagerly lower to implicit_def (instead of undef), as we1947// otherwise fail to select nodes such as: nxv1i1 = undef1948SDNode *Passthru =1949CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);1950Operands.push_back(SDValue(Passthru, 0));1951}1952addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,1953Operands, /*IsLoad=*/true);19541955RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);1956const RISCV::VLEPseudo *P =1957RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,1958static_cast<unsigned>(LMUL));1959MachineSDNode *Load =1960CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);19611962if (auto *MemOp = dyn_cast<MemSDNode>(Node))1963CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});19641965ReplaceNode(Node, Load);1966return;1967}1968case Intrinsic::riscv_vleff:1969case Intrinsic::riscv_vleff_mask: {1970bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;19711972MVT VT = Node->getSimpleValueType(0);1973unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());19741975unsigned CurOp = 2;1976SmallVector<SDValue, 7> Operands;1977Operands.push_back(Node->getOperand(CurOp++));1978addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,1979/*IsStridedOrIndexed*/ false, Operands,1980/*IsLoad=*/true);19811982RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);1983const RISCV::VLEPseudo *P =1984RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,1985Log2SEW, static_cast<unsigned>(LMUL));1986MachineSDNode *Load = CurDAG->getMachineNode(1987P->Pseudo, DL, Node->getVTList(), Operands);1988if (auto *MemOp = dyn_cast<MemSDNode>(Node))1989CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});19901991ReplaceNode(Node, Load);1992return;1993}1994}1995break;1996}1997case ISD::INTRINSIC_VOID: {1998unsigned IntNo = Node->getConstantOperandVal(1);1999switch (IntNo) {2000case Intrinsic::riscv_vsseg2:2001case Intrinsic::riscv_vsseg3:2002case Intrinsic::riscv_vsseg4:2003case Intrinsic::riscv_vsseg5:2004case Intrinsic::riscv_vsseg6:2005case Intrinsic::riscv_vsseg7:2006case Intrinsic::riscv_vsseg8: {2007selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);2008return;2009}2010case Intrinsic::riscv_vsseg2_mask:2011case Intrinsic::riscv_vsseg3_mask:2012case Intrinsic::riscv_vsseg4_mask:2013case Intrinsic::riscv_vsseg5_mask:2014case Intrinsic::riscv_vsseg6_mask:2015case Intrinsic::riscv_vsseg7_mask:2016case Intrinsic::riscv_vsseg8_mask: {2017selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);2018return;2019}2020case Intrinsic::riscv_vssseg2:2021case Intrinsic::riscv_vssseg3:2022case Intrinsic::riscv_vssseg4:2023case Intrinsic::riscv_vssseg5:2024case Intrinsic::riscv_vssseg6:2025case Intrinsic::riscv_vssseg7:2026case Intrinsic::riscv_vssseg8: {2027selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);2028return;2029}2030case Intrinsic::riscv_vssseg2_mask:2031case Intrinsic::riscv_vssseg3_mask:2032case Intrinsic::riscv_vssseg4_mask:2033case Intrinsic::riscv_vssseg5_mask:2034case Intrinsic::riscv_vssseg6_mask:2035case Intrinsic::riscv_vssseg7_mask:2036case Intrinsic::riscv_vssseg8_mask: {2037selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);2038return;2039}2040case Intrinsic::riscv_vsoxseg2:2041case Intrinsic::riscv_vsoxseg3:2042case Intrinsic::riscv_vsoxseg4:2043case Intrinsic::riscv_vsoxseg5:2044case Intrinsic::riscv_vsoxseg6:2045case Intrinsic::riscv_vsoxseg7:2046case Intrinsic::riscv_vsoxseg8:2047selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);2048return;2049case Intrinsic::riscv_vsuxseg2:2050case Intrinsic::riscv_vsuxseg3:2051case Intrinsic::riscv_vsuxseg4:2052case Intrinsic::riscv_vsuxseg5:2053case Intrinsic::riscv_vsuxseg6:2054case Intrinsic::riscv_vsuxseg7:2055case Intrinsic::riscv_vsuxseg8:2056selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);2057return;2058case Intrinsic::riscv_vsoxseg2_mask:2059case Intrinsic::riscv_vsoxseg3_mask:2060case Intrinsic::riscv_vsoxseg4_mask:2061case Intrinsic::riscv_vsoxseg5_mask:2062case Intrinsic::riscv_vsoxseg6_mask:2063case Intrinsic::riscv_vsoxseg7_mask:2064case Intrinsic::riscv_vsoxseg8_mask:2065selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);2066return;2067case Intrinsic::riscv_vsuxseg2_mask:2068case Intrinsic::riscv_vsuxseg3_mask:2069case Intrinsic::riscv_vsuxseg4_mask:2070case Intrinsic::riscv_vsuxseg5_mask:2071case Intrinsic::riscv_vsuxseg6_mask:2072case Intrinsic::riscv_vsuxseg7_mask:2073case Intrinsic::riscv_vsuxseg8_mask:2074selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);2075return;2076case Intrinsic::riscv_vsoxei:2077case Intrinsic::riscv_vsoxei_mask:2078case Intrinsic::riscv_vsuxei:2079case Intrinsic::riscv_vsuxei_mask: {2080bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||2081IntNo == Intrinsic::riscv_vsuxei_mask;2082bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||2083IntNo == Intrinsic::riscv_vsoxei_mask;20842085MVT VT = Node->getOperand(2)->getSimpleValueType(0);2086unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());20872088unsigned CurOp = 2;2089SmallVector<SDValue, 8> Operands;2090Operands.push_back(Node->getOperand(CurOp++)); // Store value.20912092MVT IndexVT;2093addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,2094/*IsStridedOrIndexed*/ true, Operands,2095/*IsLoad=*/false, &IndexVT);20962097assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&2098"Element count mismatch");20992100RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);2101RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);2102unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());2103if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {2104report_fatal_error("The V extension does not support EEW=64 for index "2105"values when XLEN=32");2106}2107const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(2108IsMasked, IsOrdered, IndexLog2EEW,2109static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));2110MachineSDNode *Store =2111CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);21122113if (auto *MemOp = dyn_cast<MemSDNode>(Node))2114CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});21152116ReplaceNode(Node, Store);2117return;2118}2119case Intrinsic::riscv_vsm:2120case Intrinsic::riscv_vse:2121case Intrinsic::riscv_vse_mask:2122case Intrinsic::riscv_vsse:2123case Intrinsic::riscv_vsse_mask: {2124bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||2125IntNo == Intrinsic::riscv_vsse_mask;2126bool IsStrided =2127IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;21282129MVT VT = Node->getOperand(2)->getSimpleValueType(0);2130unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());21312132unsigned CurOp = 2;2133SmallVector<SDValue, 8> Operands;2134Operands.push_back(Node->getOperand(CurOp++)); // Store value.21352136addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,2137Operands);21382139RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);2140const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(2141IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));2142MachineSDNode *Store =2143CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);2144if (auto *MemOp = dyn_cast<MemSDNode>(Node))2145CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});21462147ReplaceNode(Node, Store);2148return;2149}2150case Intrinsic::riscv_sf_vc_x_se:2151case Intrinsic::riscv_sf_vc_i_se:2152selectSF_VC_X_SE(Node);2153return;2154}2155break;2156}2157case ISD::BITCAST: {2158MVT SrcVT = Node->getOperand(0).getSimpleValueType();2159// Just drop bitcasts between vectors if both are fixed or both are2160// scalable.2161if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||2162(VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {2163ReplaceUses(SDValue(Node, 0), Node->getOperand(0));2164CurDAG->RemoveDeadNode(Node);2165return;2166}2167break;2168}2169case ISD::INSERT_SUBVECTOR: {2170SDValue V = Node->getOperand(0);2171SDValue SubV = Node->getOperand(1);2172SDLoc DL(SubV);2173auto Idx = Node->getConstantOperandVal(2);2174MVT SubVecVT = SubV.getSimpleValueType();21752176const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();2177MVT SubVecContainerVT = SubVecVT;2178// Establish the correct scalable-vector types for any fixed-length type.2179if (SubVecVT.isFixedLengthVector()) {2180SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);2181TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);2182[[maybe_unused]] bool ExactlyVecRegSized =2183Subtarget->expandVScale(SubVecVT.getSizeInBits())2184.isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));2185assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())2186.getKnownMinValue()));2187assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));2188}2189MVT ContainerVT = VT;2190if (VT.isFixedLengthVector())2191ContainerVT = TLI.getContainerForFixedLengthVector(VT);21922193const auto *TRI = Subtarget->getRegisterInfo();2194unsigned SubRegIdx;2195std::tie(SubRegIdx, Idx) =2196RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(2197ContainerVT, SubVecContainerVT, Idx, TRI);21982199// If the Idx hasn't been completely eliminated then this is a subvector2200// insert which doesn't naturally align to a vector register. These must2201// be handled using instructions to manipulate the vector registers.2202if (Idx != 0)2203break;22042205RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);2206[[maybe_unused]] bool IsSubVecPartReg =2207SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||2208SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||2209SubVecLMUL == RISCVII::VLMUL::LMUL_F8;2210assert((!IsSubVecPartReg || V.isUndef()) &&2211"Expecting lowering to have created legal INSERT_SUBVECTORs when "2212"the subvector is smaller than a full-sized register");22132214// If we haven't set a SubRegIdx, then we must be going between2215// equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.2216if (SubRegIdx == RISCV::NoSubRegister) {2217unsigned InRegClassID =2218RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT);2219assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==2220InRegClassID &&2221"Unexpected subvector extraction");2222SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);2223SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,2224DL, VT, SubV, RC);2225ReplaceNode(Node, NewNode);2226return;2227}22282229SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);2230ReplaceNode(Node, Insert.getNode());2231return;2232}2233case ISD::EXTRACT_SUBVECTOR: {2234SDValue V = Node->getOperand(0);2235auto Idx = Node->getConstantOperandVal(1);2236MVT InVT = V.getSimpleValueType();2237SDLoc DL(V);22382239const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();2240MVT SubVecContainerVT = VT;2241// Establish the correct scalable-vector types for any fixed-length type.2242if (VT.isFixedLengthVector()) {2243assert(Idx == 0);2244SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);2245}2246if (InVT.isFixedLengthVector())2247InVT = TLI.getContainerForFixedLengthVector(InVT);22482249const auto *TRI = Subtarget->getRegisterInfo();2250unsigned SubRegIdx;2251std::tie(SubRegIdx, Idx) =2252RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(2253InVT, SubVecContainerVT, Idx, TRI);22542255// If the Idx hasn't been completely eliminated then this is a subvector2256// extract which doesn't naturally align to a vector register. These must2257// be handled using instructions to manipulate the vector registers.2258if (Idx != 0)2259break;22602261// If we haven't set a SubRegIdx, then we must be going between2262// equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.2263if (SubRegIdx == RISCV::NoSubRegister) {2264unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);2265assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==2266InRegClassID &&2267"Unexpected subvector extraction");2268SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);2269SDNode *NewNode =2270CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);2271ReplaceNode(Node, NewNode);2272return;2273}22742275SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);2276ReplaceNode(Node, Extract.getNode());2277return;2278}2279case RISCVISD::VMV_S_X_VL:2280case RISCVISD::VFMV_S_F_VL:2281case RISCVISD::VMV_V_X_VL:2282case RISCVISD::VFMV_V_F_VL: {2283// Try to match splat of a scalar load to a strided load with stride of x0.2284bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||2285Node->getOpcode() == RISCVISD::VFMV_S_F_VL;2286if (!Node->getOperand(0).isUndef())2287break;2288SDValue Src = Node->getOperand(1);2289auto *Ld = dyn_cast<LoadSDNode>(Src);2290// Can't fold load update node because the second2291// output is used so that load update node can't be removed.2292if (!Ld || Ld->isIndexed())2293break;2294EVT MemVT = Ld->getMemoryVT();2295// The memory VT should be the same size as the element type.2296if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())2297break;2298if (!IsProfitableToFold(Src, Node, Node) ||2299!IsLegalToFold(Src, Node, Node, TM.getOptLevel()))2300break;23012302SDValue VL;2303if (IsScalarMove) {2304// We could deal with more VL if we update the VSETVLI insert pass to2305// avoid introducing more VSETVLI.2306if (!isOneConstant(Node->getOperand(2)))2307break;2308selectVLOp(Node->getOperand(2), VL);2309} else2310selectVLOp(Node->getOperand(2), VL);23112312unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());2313SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);23142315// If VL=1, then we don't need to do a strided load and can just do a2316// regular load.2317bool IsStrided = !isOneConstant(VL);23182319// Only do a strided load if we have optimized zero-stride vector load.2320if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())2321break;23222323SmallVector<SDValue> Operands = {2324SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),2325Ld->getBasePtr()};2326if (IsStrided)2327Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));2328uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;2329SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);2330Operands.append({VL, SEW, PolicyOp, Ld->getChain()});23312332RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);2333const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(2334/*IsMasked*/ false, IsStrided, /*FF*/ false,2335Log2SEW, static_cast<unsigned>(LMUL));2336MachineSDNode *Load =2337CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);2338// Update the chain.2339ReplaceUses(Src.getValue(1), SDValue(Load, 1));2340// Record the mem-refs2341CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});2342// Replace the splat with the vlse.2343ReplaceNode(Node, Load);2344return;2345}2346case ISD::PREFETCH:2347unsigned Locality = Node->getConstantOperandVal(3);2348if (Locality > 2)2349break;23502351if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {2352MachineMemOperand *MMO = LoadStoreMem->getMemOperand();2353MMO->setFlags(MachineMemOperand::MONonTemporal);23542355int NontemporalLevel = 0;2356switch (Locality) {2357case 0:2358NontemporalLevel = 3; // NTL.ALL2359break;2360case 1:2361NontemporalLevel = 1; // NTL.PALL2362break;2363case 2:2364NontemporalLevel = 0; // NTL.P12365break;2366default:2367llvm_unreachable("unexpected locality value.");2368}23692370if (NontemporalLevel & 0b1)2371MMO->setFlags(MONontemporalBit0);2372if (NontemporalLevel & 0b10)2373MMO->setFlags(MONontemporalBit1);2374}2375break;2376}23772378// Select the default instruction.2379SelectCode(Node);2380}23812382bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(2383const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,2384std::vector<SDValue> &OutOps) {2385// Always produce a register and immediate operand, as expected by2386// RISCVAsmPrinter::PrintAsmMemoryOperand.2387switch (ConstraintID) {2388case InlineAsm::ConstraintCode::o:2389case InlineAsm::ConstraintCode::m: {2390SDValue Op0, Op1;2391[[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);2392assert(Found && "SelectAddrRegImm should always succeed");2393OutOps.push_back(Op0);2394OutOps.push_back(Op1);2395return false;2396}2397case InlineAsm::ConstraintCode::A:2398OutOps.push_back(Op);2399OutOps.push_back(2400CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));2401return false;2402default:2403report_fatal_error("Unexpected asm memory constraint " +2404InlineAsm::getMemConstraintName(ConstraintID));2405}24062407return true;2408}24092410bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,2411SDValue &Offset) {2412if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {2413Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());2414Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());2415return true;2416}24172418return false;2419}24202421// Select a frame index and an optional immediate offset from an ADD or OR.2422bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,2423SDValue &Offset) {2424if (SelectAddrFrameIndex(Addr, Base, Offset))2425return true;24262427if (!CurDAG->isBaseWithConstantOffset(Addr))2428return false;24292430if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {2431int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();2432if (isInt<12>(CVal)) {2433Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),2434Subtarget->getXLenVT());2435Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),2436Subtarget->getXLenVT());2437return true;2438}2439}24402441return false;2442}24432444// Fold constant addresses.2445static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,2446const MVT VT, const RISCVSubtarget *Subtarget,2447SDValue Addr, SDValue &Base, SDValue &Offset,2448bool IsPrefetch = false) {2449if (!isa<ConstantSDNode>(Addr))2450return false;24512452int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();24532454// If the constant is a simm12, we can fold the whole constant and use X0 as2455// the base. If the constant can be materialized with LUI+simm12, use LUI as2456// the base. We can't use generateInstSeq because it favors LUI+ADDIW.2457int64_t Lo12 = SignExtend64<12>(CVal);2458int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;2459if (!Subtarget->is64Bit() || isInt<32>(Hi)) {2460if (IsPrefetch && (Lo12 & 0b11111) != 0)2461return false;24622463if (Hi) {2464int64_t Hi20 = (Hi >> 12) & 0xfffff;2465Base = SDValue(2466CurDAG->getMachineNode(RISCV::LUI, DL, VT,2467CurDAG->getTargetConstant(Hi20, DL, VT)),24680);2469} else {2470Base = CurDAG->getRegister(RISCV::X0, VT);2471}2472Offset = CurDAG->getTargetConstant(Lo12, DL, VT);2473return true;2474}24752476// Ask how constant materialization would handle this constant.2477RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);24782479// If the last instruction would be an ADDI, we can fold its immediate and2480// emit the rest of the sequence as the base.2481if (Seq.back().getOpcode() != RISCV::ADDI)2482return false;2483Lo12 = Seq.back().getImm();2484if (IsPrefetch && (Lo12 & 0b11111) != 0)2485return false;24862487// Drop the last instruction.2488Seq.pop_back();2489assert(!Seq.empty() && "Expected more instructions in sequence");24902491Base = selectImmSeq(CurDAG, DL, VT, Seq);2492Offset = CurDAG->getTargetConstant(Lo12, DL, VT);2493return true;2494}24952496// Is this ADD instruction only used as the base pointer of scalar loads and2497// stores?2498static bool isWorthFoldingAdd(SDValue Add) {2499for (auto *Use : Add->uses()) {2500if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&2501Use->getOpcode() != ISD::ATOMIC_LOAD &&2502Use->getOpcode() != ISD::ATOMIC_STORE)2503return false;2504EVT VT = cast<MemSDNode>(Use)->getMemoryVT();2505if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&2506VT != MVT::f64)2507return false;2508// Don't allow stores of the value. It must be used as the address.2509if (Use->getOpcode() == ISD::STORE &&2510cast<StoreSDNode>(Use)->getValue() == Add)2511return false;2512if (Use->getOpcode() == ISD::ATOMIC_STORE &&2513cast<AtomicSDNode>(Use)->getVal() == Add)2514return false;2515}25162517return true;2518}25192520bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,2521unsigned MaxShiftAmount,2522SDValue &Base, SDValue &Index,2523SDValue &Scale) {2524EVT VT = Addr.getSimpleValueType();2525auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,2526SDValue &Shift) {2527uint64_t ShiftAmt = 0;2528Index = N;25292530if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {2531// Only match shifts by a value in range [0, MaxShiftAmount].2532if (N.getConstantOperandVal(1) <= MaxShiftAmount) {2533Index = N.getOperand(0);2534ShiftAmt = N.getConstantOperandVal(1);2535}2536}25372538Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);2539return ShiftAmt != 0;2540};25412542if (Addr.getOpcode() == ISD::ADD) {2543if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {2544SDValue AddrB = Addr.getOperand(0);2545if (AddrB.getOpcode() == ISD::ADD &&2546UnwrapShl(AddrB.getOperand(0), Index, Scale) &&2547!isa<ConstantSDNode>(AddrB.getOperand(1)) &&2548isInt<12>(C1->getSExtValue())) {2549// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))2550SDValue C1Val =2551CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);2552Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,2553AddrB.getOperand(1), C1Val),25540);2555return true;2556}2557} else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {2558Base = Addr.getOperand(1);2559return true;2560} else {2561UnwrapShl(Addr.getOperand(1), Index, Scale);2562Base = Addr.getOperand(0);2563return true;2564}2565} else if (UnwrapShl(Addr, Index, Scale)) {2566EVT VT = Addr.getValueType();2567Base = CurDAG->getRegister(RISCV::X0, VT);2568return true;2569}25702571return false;2572}25732574bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,2575SDValue &Offset, bool IsINX) {2576if (SelectAddrFrameIndex(Addr, Base, Offset))2577return true;25782579SDLoc DL(Addr);2580MVT VT = Addr.getSimpleValueType();25812582if (Addr.getOpcode() == RISCVISD::ADD_LO) {2583Base = Addr.getOperand(0);2584Offset = Addr.getOperand(1);2585return true;2586}25872588int64_t RV32ZdinxRange = IsINX ? 4 : 0;2589if (CurDAG->isBaseWithConstantOffset(Addr)) {2590int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();2591if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {2592Base = Addr.getOperand(0);2593if (Base.getOpcode() == RISCVISD::ADD_LO) {2594SDValue LoOperand = Base.getOperand(1);2595if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {2596// If the Lo in (ADD_LO hi, lo) is a global variable's address2597// (its low part, really), then we can rely on the alignment of that2598// variable to provide a margin of safety before low part can overflow2599// the 12 bits of the load/store offset. Check if CVal falls within2600// that margin; if so (low part + CVal) can't overflow.2601const DataLayout &DL = CurDAG->getDataLayout();2602Align Alignment = commonAlignment(2603GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());2604if (CVal == 0 || Alignment > CVal) {2605int64_t CombinedOffset = CVal + GA->getOffset();2606Base = Base.getOperand(0);2607Offset = CurDAG->getTargetGlobalAddress(2608GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),2609CombinedOffset, GA->getTargetFlags());2610return true;2611}2612}2613}26142615if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))2616Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);2617Offset = CurDAG->getTargetConstant(CVal, DL, VT);2618return true;2619}2620}26212622// Handle ADD with large immediates.2623if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {2624int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();2625assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&2626"simm12 not already handled?");26272628// Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use2629// an ADDI for part of the offset and fold the rest into the load/store.2630// This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.2631if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {2632int64_t Adj = CVal < 0 ? -2048 : 2047;2633Base = SDValue(2634CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),2635CurDAG->getTargetConstant(Adj, DL, VT)),26360);2637Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);2638return true;2639}26402641// For larger immediates, we might be able to save one instruction from2642// constant materialization by folding the Lo12 bits of the immediate into2643// the address. We should only do this if the ADD is only used by loads and2644// stores that can fold the lo12 bits. Otherwise, the ADD will get iseled2645// separately with the full materialized immediate creating extra2646// instructions.2647if (isWorthFoldingAdd(Addr) &&2648selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,2649Offset)) {2650// Insert an ADD instruction with the materialized Hi52 bits.2651Base = SDValue(2652CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),26530);2654return true;2655}2656}26572658if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))2659return true;26602661Base = Addr;2662Offset = CurDAG->getTargetConstant(0, DL, VT);2663return true;2664}26652666/// Similar to SelectAddrRegImm, except that the least significant 5 bits of2667/// Offset shoule be all zeros.2668bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,2669SDValue &Offset) {2670if (SelectAddrFrameIndex(Addr, Base, Offset))2671return true;26722673SDLoc DL(Addr);2674MVT VT = Addr.getSimpleValueType();26752676if (CurDAG->isBaseWithConstantOffset(Addr)) {2677int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();2678if (isInt<12>(CVal)) {2679Base = Addr.getOperand(0);26802681// Early-out if not a valid offset.2682if ((CVal & 0b11111) != 0) {2683Base = Addr;2684Offset = CurDAG->getTargetConstant(0, DL, VT);2685return true;2686}26872688if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))2689Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);2690Offset = CurDAG->getTargetConstant(CVal, DL, VT);2691return true;2692}2693}26942695// Handle ADD with large immediates.2696if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {2697int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();2698assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&2699"simm12 not already handled?");27002701// Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save2702// one instruction by folding adjustment (-2048 or 2016) into the address.2703if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {2704int64_t Adj = CVal < 0 ? -2048 : 2016;2705int64_t AdjustedOffset = CVal - Adj;2706Base = SDValue(CurDAG->getMachineNode(2707RISCV::ADDI, DL, VT, Addr.getOperand(0),2708CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),27090);2710Offset = CurDAG->getTargetConstant(Adj, DL, VT);2711return true;2712}27132714if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,2715Offset, true)) {2716// Insert an ADD instruction with the materialized Hi52 bits.2717Base = SDValue(2718CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),27190);2720return true;2721}2722}27232724if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))2725return true;27262727Base = Addr;2728Offset = CurDAG->getTargetConstant(0, DL, VT);2729return true;2730}27312732bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,2733SDValue &Offset) {2734if (Addr.getOpcode() != ISD::ADD)2735return false;27362737if (isa<ConstantSDNode>(Addr.getOperand(1)))2738return false;27392740Base = Addr.getOperand(1);2741Offset = Addr.getOperand(0);2742return true;2743}27442745bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,2746SDValue &ShAmt) {2747ShAmt = N;27482749// Peek through zext.2750if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)2751ShAmt = ShAmt.getOperand(0);27522753// Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift2754// amount. If there is an AND on the shift amount, we can bypass it if it2755// doesn't affect any of those bits.2756if (ShAmt.getOpcode() == ISD::AND &&2757isa<ConstantSDNode>(ShAmt.getOperand(1))) {2758const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);27592760// Since the max shift amount is a power of 2 we can subtract 1 to make a2761// mask that covers the bits needed to represent all shift amounts.2762assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");2763APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);27642765if (ShMask.isSubsetOf(AndMask)) {2766ShAmt = ShAmt.getOperand(0);2767} else {2768// SimplifyDemandedBits may have optimized the mask so try restoring any2769// bits that are known zero.2770KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));2771if (!ShMask.isSubsetOf(AndMask | Known.Zero))2772return true;2773ShAmt = ShAmt.getOperand(0);2774}2775}27762777if (ShAmt.getOpcode() == ISD::ADD &&2778isa<ConstantSDNode>(ShAmt.getOperand(1))) {2779uint64_t Imm = ShAmt.getConstantOperandVal(1);2780// If we are shifting by X+N where N == 0 mod Size, then just shift by X2781// to avoid the ADD.2782if (Imm != 0 && Imm % ShiftWidth == 0) {2783ShAmt = ShAmt.getOperand(0);2784return true;2785}2786} else if (ShAmt.getOpcode() == ISD::SUB &&2787isa<ConstantSDNode>(ShAmt.getOperand(0))) {2788uint64_t Imm = ShAmt.getConstantOperandVal(0);2789// If we are shifting by N-X where N == 0 mod Size, then just shift by -X to2790// generate a NEG instead of a SUB of a constant.2791if (Imm != 0 && Imm % ShiftWidth == 0) {2792SDLoc DL(ShAmt);2793EVT VT = ShAmt.getValueType();2794SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);2795unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;2796MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,2797ShAmt.getOperand(1));2798ShAmt = SDValue(Neg, 0);2799return true;2800}2801// If we are shifting by N-X where N == -1 mod Size, then just shift by ~X2802// to generate a NOT instead of a SUB of a constant.2803if (Imm % ShiftWidth == ShiftWidth - 1) {2804SDLoc DL(ShAmt);2805EVT VT = ShAmt.getValueType();2806MachineSDNode *Not =2807CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),2808CurDAG->getTargetConstant(-1, DL, VT));2809ShAmt = SDValue(Not, 0);2810return true;2811}2812}28132814return true;2815}28162817/// RISC-V doesn't have general instructions for integer setne/seteq, but we can2818/// check for equality with 0. This function emits instructions that convert the2819/// seteq/setne into something that can be compared with 0.2820/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.2821/// ISD::SETNE).2822bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,2823SDValue &Val) {2824assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&2825"Unexpected condition code!");28262827// We're looking for a setcc.2828if (N->getOpcode() != ISD::SETCC)2829return false;28302831// Must be an equality comparison.2832ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();2833if (CCVal != ExpectedCCVal)2834return false;28352836SDValue LHS = N->getOperand(0);2837SDValue RHS = N->getOperand(1);28382839if (!LHS.getValueType().isScalarInteger())2840return false;28412842// If the RHS side is 0, we don't need any extra instructions, return the LHS.2843if (isNullConstant(RHS)) {2844Val = LHS;2845return true;2846}28472848SDLoc DL(N);28492850if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {2851int64_t CVal = C->getSExtValue();2852// If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and2853// non-zero otherwise.2854if (CVal == -2048) {2855Val =2856SDValue(CurDAG->getMachineNode(2857RISCV::XORI, DL, N->getValueType(0), LHS,2858CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),28590);2860return true;2861}2862// If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the2863// LHS is equal to the RHS and non-zero otherwise.2864if (isInt<12>(CVal) || CVal == 2048) {2865Val =2866SDValue(CurDAG->getMachineNode(2867RISCV::ADDI, DL, N->getValueType(0), LHS,2868CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),28690);2870return true;2871}2872}28732874// If nothing else we can XOR the LHS and RHS to produce zero if they are2875// equal and a non-zero value if they aren't.2876Val = SDValue(2877CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);2878return true;2879}28802881bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {2882if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&2883cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {2884Val = N.getOperand(0);2885return true;2886}28872888auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {2889if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))2890return N;28912892SDValue N0 = N.getOperand(0);2893if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&2894N.getConstantOperandVal(1) == ShiftAmt &&2895N0.getConstantOperandVal(1) == ShiftAmt)2896return N0.getOperand(0);28972898return N;2899};29002901MVT VT = N.getSimpleValueType();2902if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {2903Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);2904return true;2905}29062907return false;2908}29092910bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {2911if (N.getOpcode() == ISD::AND) {2912auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));2913if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {2914Val = N.getOperand(0);2915return true;2916}2917}2918MVT VT = N.getSimpleValueType();2919APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);2920if (CurDAG->MaskedValueIsZero(N, Mask)) {2921Val = N;2922return true;2923}29242925return false;2926}29272928/// Look for various patterns that can be done with a SHL that can be folded2929/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which2930/// SHXADD we are trying to match.2931bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,2932SDValue &Val) {2933if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {2934SDValue N0 = N.getOperand(0);29352936bool LeftShift = N0.getOpcode() == ISD::SHL;2937if ((LeftShift || N0.getOpcode() == ISD::SRL) &&2938isa<ConstantSDNode>(N0.getOperand(1))) {2939uint64_t Mask = N.getConstantOperandVal(1);2940unsigned C2 = N0.getConstantOperandVal(1);29412942unsigned XLen = Subtarget->getXLen();2943if (LeftShift)2944Mask &= maskTrailingZeros<uint64_t>(C2);2945else2946Mask &= maskTrailingOnes<uint64_t>(XLen - C2);29472948// Look for (and (shl y, c2), c1) where c1 is a shifted mask with no2949// leading zeros and c3 trailing zeros. We can use an SRLI by c2+c32950// followed by a SHXADD with c3 for the X amount.2951if (isShiftedMask_64(Mask)) {2952unsigned Leading = XLen - llvm::bit_width(Mask);2953unsigned Trailing = llvm::countr_zero(Mask);2954if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {2955SDLoc DL(N);2956EVT VT = N.getValueType();2957Val = SDValue(CurDAG->getMachineNode(2958RISCV::SRLI, DL, VT, N0.getOperand(0),2959CurDAG->getTargetConstant(Trailing - C2, DL, VT)),29600);2961return true;2962}2963// Look for (and (shr y, c2), c1) where c1 is a shifted mask with c22964// leading zeros and c3 trailing zeros. We can use an SRLI by C32965// followed by a SHXADD using c3 for the X amount.2966if (!LeftShift && Leading == C2 && Trailing == ShAmt) {2967SDLoc DL(N);2968EVT VT = N.getValueType();2969Val = SDValue(2970CurDAG->getMachineNode(2971RISCV::SRLI, DL, VT, N0.getOperand(0),2972CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),29730);2974return true;2975}2976}2977}2978}29792980bool LeftShift = N.getOpcode() == ISD::SHL;2981if ((LeftShift || N.getOpcode() == ISD::SRL) &&2982isa<ConstantSDNode>(N.getOperand(1))) {2983SDValue N0 = N.getOperand(0);2984if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&2985isa<ConstantSDNode>(N0.getOperand(1))) {2986uint64_t Mask = N0.getConstantOperandVal(1);2987if (isShiftedMask_64(Mask)) {2988unsigned C1 = N.getConstantOperandVal(1);2989unsigned XLen = Subtarget->getXLen();2990unsigned Leading = XLen - llvm::bit_width(Mask);2991unsigned Trailing = llvm::countr_zero(Mask);2992// Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and2993// C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.2994if (LeftShift && Leading == 32 && Trailing > 0 &&2995(Trailing + C1) == ShAmt) {2996SDLoc DL(N);2997EVT VT = N.getValueType();2998Val = SDValue(CurDAG->getMachineNode(2999RISCV::SRLIW, DL, VT, N0.getOperand(0),3000CurDAG->getTargetConstant(Trailing, DL, VT)),30010);3002return true;3003}3004// Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and3005// C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.3006if (!LeftShift && Leading == 32 && Trailing > C1 &&3007(Trailing - C1) == ShAmt) {3008SDLoc DL(N);3009EVT VT = N.getValueType();3010Val = SDValue(CurDAG->getMachineNode(3011RISCV::SRLIW, DL, VT, N0.getOperand(0),3012CurDAG->getTargetConstant(Trailing, DL, VT)),30130);3014return true;3015}3016}3017}3018}30193020return false;3021}30223023/// Look for various patterns that can be done with a SHL that can be folded3024/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which3025/// SHXADD_UW we are trying to match.3026bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,3027SDValue &Val) {3028if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&3029N.hasOneUse()) {3030SDValue N0 = N.getOperand(0);3031if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&3032N0.hasOneUse()) {3033uint64_t Mask = N.getConstantOperandVal(1);3034unsigned C2 = N0.getConstantOperandVal(1);30353036Mask &= maskTrailingZeros<uint64_t>(C2);30373038// Look for (and (shl y, c2), c1) where c1 is a shifted mask with3039// 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by3040// c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.3041if (isShiftedMask_64(Mask)) {3042unsigned Leading = llvm::countl_zero(Mask);3043unsigned Trailing = llvm::countr_zero(Mask);3044if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {3045SDLoc DL(N);3046EVT VT = N.getValueType();3047Val = SDValue(CurDAG->getMachineNode(3048RISCV::SLLI, DL, VT, N0.getOperand(0),3049CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),30500);3051return true;3052}3053}3054}3055}30563057return false;3058}30593060static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,3061unsigned Bits,3062const TargetInstrInfo *TII) {3063unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());30643065if (!MCOpcode)3066return false;30673068const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());3069const uint64_t TSFlags = MCID.TSFlags;3070if (!RISCVII::hasSEWOp(TSFlags))3071return false;3072assert(RISCVII::hasVLOp(TSFlags));30733074bool HasGlueOp = User->getGluedNode() != nullptr;3075unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;3076bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;3077bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);3078unsigned VLIdx =3079User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;3080const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);30813082if (UserOpNo == VLIdx)3083return false;30843085auto NumDemandedBits =3086RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);3087return NumDemandedBits && Bits >= *NumDemandedBits;3088}30893090// Return true if all users of this SDNode* only consume the lower \p Bits.3091// This can be used to form W instructions for add/sub/mul/shl even when the3092// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if3093// SimplifyDemandedBits has made it so some users see a sext_inreg and some3094// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave3095// the add/sub/mul/shl to become non-W instructions. By checking the users we3096// may be able to use a W instruction and CSE with the other instruction if3097// this has happened. We could try to detect that the CSE opportunity exists3098// before doing this, but that would be more complicated.3099bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,3100const unsigned Depth) const {3101assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||3102Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||3103Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||3104Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||3105Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||3106isa<ConstantSDNode>(Node) || Depth != 0) &&3107"Unexpected opcode");31083109if (Depth >= SelectionDAG::MaxRecursionDepth)3110return false;31113112// The PatFrags that call this may run before RISCVGenDAGISel.inc has checked3113// the VT. Ensure the type is scalar to avoid wasting time on vectors.3114if (Depth == 0 && !Node->getValueType(0).isScalarInteger())3115return false;31163117for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {3118SDNode *User = *UI;3119// Users of this node should have already been instruction selected3120if (!User->isMachineOpcode())3121return false;31223123// TODO: Add more opcodes?3124switch (User->getMachineOpcode()) {3125default:3126if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))3127break;3128return false;3129case RISCV::ADDW:3130case RISCV::ADDIW:3131case RISCV::SUBW:3132case RISCV::MULW:3133case RISCV::SLLW:3134case RISCV::SLLIW:3135case RISCV::SRAW:3136case RISCV::SRAIW:3137case RISCV::SRLW:3138case RISCV::SRLIW:3139case RISCV::DIVW:3140case RISCV::DIVUW:3141case RISCV::REMW:3142case RISCV::REMUW:3143case RISCV::ROLW:3144case RISCV::RORW:3145case RISCV::RORIW:3146case RISCV::CLZW:3147case RISCV::CTZW:3148case RISCV::CPOPW:3149case RISCV::SLLI_UW:3150case RISCV::FMV_W_X:3151case RISCV::FCVT_H_W:3152case RISCV::FCVT_H_WU:3153case RISCV::FCVT_S_W:3154case RISCV::FCVT_S_WU:3155case RISCV::FCVT_D_W:3156case RISCV::FCVT_D_WU:3157case RISCV::TH_REVW:3158case RISCV::TH_SRRIW:3159if (Bits < 32)3160return false;3161break;3162case RISCV::SLL:3163case RISCV::SRA:3164case RISCV::SRL:3165case RISCV::ROL:3166case RISCV::ROR:3167case RISCV::BSET:3168case RISCV::BCLR:3169case RISCV::BINV:3170// Shift amount operands only use log2(Xlen) bits.3171if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))3172return false;3173break;3174case RISCV::SLLI:3175// SLLI only uses the lower (XLen - ShAmt) bits.3176if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))3177return false;3178break;3179case RISCV::ANDI:3180if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))3181break;3182goto RecCheck;3183case RISCV::ORI: {3184uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();3185if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))3186break;3187[[fallthrough]];3188}3189case RISCV::AND:3190case RISCV::OR:3191case RISCV::XOR:3192case RISCV::XORI:3193case RISCV::ANDN:3194case RISCV::ORN:3195case RISCV::XNOR:3196case RISCV::SH1ADD:3197case RISCV::SH2ADD:3198case RISCV::SH3ADD:3199RecCheck:3200if (hasAllNBitUsers(User, Bits, Depth + 1))3201break;3202return false;3203case RISCV::SRLI: {3204unsigned ShAmt = User->getConstantOperandVal(1);3205// If we are shifting right by less than Bits, and users don't demand any3206// bits that were shifted into [Bits-1:0], then we can consider this as an3207// N-Bit user.3208if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))3209break;3210return false;3211}3212case RISCV::SEXT_B:3213case RISCV::PACKH:3214if (Bits < 8)3215return false;3216break;3217case RISCV::SEXT_H:3218case RISCV::FMV_H_X:3219case RISCV::ZEXT_H_RV32:3220case RISCV::ZEXT_H_RV64:3221case RISCV::PACKW:3222if (Bits < 16)3223return false;3224break;3225case RISCV::PACK:3226if (Bits < (Subtarget->getXLen() / 2))3227return false;3228break;3229case RISCV::ADD_UW:3230case RISCV::SH1ADD_UW:3231case RISCV::SH2ADD_UW:3232case RISCV::SH3ADD_UW:3233// The first operand to add.uw/shXadd.uw is implicitly zero extended from3234// 32 bits.3235if (UI.getOperandNo() != 0 || Bits < 32)3236return false;3237break;3238case RISCV::SB:3239if (UI.getOperandNo() != 0 || Bits < 8)3240return false;3241break;3242case RISCV::SH:3243if (UI.getOperandNo() != 0 || Bits < 16)3244return false;3245break;3246case RISCV::SW:3247if (UI.getOperandNo() != 0 || Bits < 32)3248return false;3249break;3250}3251}32523253return true;3254}32553256// Select a constant that can be represented as (sign_extend(imm5) << imm2).3257bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,3258SDValue &Shl2) {3259if (auto *C = dyn_cast<ConstantSDNode>(N)) {3260int64_t Offset = C->getSExtValue();3261int64_t Shift;3262for (Shift = 0; Shift < 4; Shift++)3263if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))3264break;32653266// Constant cannot be encoded.3267if (Shift == 4)3268return false;32693270EVT Ty = N->getValueType(0);3271Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);3272Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);3273return true;3274}32753276return false;3277}32783279// Select VL as a 5 bit immediate or a value that will become a register. This3280// allows us to choose betwen VSETIVLI or VSETVLI later.3281bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {3282auto *C = dyn_cast<ConstantSDNode>(N);3283if (C && isUInt<5>(C->getZExtValue())) {3284VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),3285N->getValueType(0));3286} else if (C && C->isAllOnes()) {3287// Treat all ones as VLMax.3288VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),3289N->getValueType(0));3290} else if (isa<RegisterSDNode>(N) &&3291cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {3292// All our VL operands use an operand that allows GPRNoX0 or an immediate3293// as the register class. Convert X0 to a special immediate to pass the3294// MachineVerifier. This is recognized specially by the vsetvli insertion3295// pass.3296VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),3297N->getValueType(0));3298} else {3299VL = N;3300}33013302return true;3303}33043305static SDValue findVSplat(SDValue N) {3306if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {3307if (!N.getOperand(0).isUndef())3308return SDValue();3309N = N.getOperand(1);3310}3311SDValue Splat = N;3312if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&3313Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||3314!Splat.getOperand(0).isUndef())3315return SDValue();3316assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");3317return Splat;3318}33193320bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {3321SDValue Splat = findVSplat(N);3322if (!Splat)3323return false;33243325SplatVal = Splat.getOperand(1);3326return true;3327}33283329static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,3330SelectionDAG &DAG,3331const RISCVSubtarget &Subtarget,3332std::function<bool(int64_t)> ValidateImm) {3333SDValue Splat = findVSplat(N);3334if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))3335return false;33363337const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();3338assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&3339"Unexpected splat operand type");33403341// The semantics of RISCVISD::VMV_V_X_VL is that when the operand3342// type is wider than the resulting vector element type: an implicit3343// truncation first takes place. Therefore, perform a manual3344// truncation/sign-extension in order to ignore any truncated bits and catch3345// any zero-extended immediate.3346// For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first3347// sign-extending to (XLenVT -1).3348APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);33493350int64_t SplatImm = SplatConst.getSExtValue();33513352if (!ValidateImm(SplatImm))3353return false;33543355SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());3356return true;3357}33583359bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {3360return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,3361[](int64_t Imm) { return isInt<5>(Imm); });3362}33633364bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {3365return selectVSplatImmHelper(3366N, SplatVal, *CurDAG, *Subtarget,3367[](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });3368}33693370bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,3371SDValue &SplatVal) {3372return selectVSplatImmHelper(3373N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {3374return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);3375});3376}33773378bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,3379SDValue &SplatVal) {3380return selectVSplatImmHelper(3381N, SplatVal, *CurDAG, *Subtarget,3382[Bits](int64_t Imm) { return isUIntN(Bits, Imm); });3383}33843385bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {3386auto IsExtOrTrunc = [](SDValue N) {3387switch (N->getOpcode()) {3388case ISD::SIGN_EXTEND:3389case ISD::ZERO_EXTEND:3390// There's no passthru on these _VL nodes so any VL/mask is ok, since any3391// inactive elements will be undef.3392case RISCVISD::TRUNCATE_VECTOR_VL:3393case RISCVISD::VSEXT_VL:3394case RISCVISD::VZEXT_VL:3395return true;3396default:3397return false;3398}3399};34003401// We can have multiple nested nodes, so unravel them all if needed.3402while (IsExtOrTrunc(N)) {3403if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)3404return false;3405N = N->getOperand(0);3406}34073408return selectVSplat(N, SplatVal);3409}34103411bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {3412ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());3413if (!CFP)3414return false;3415const APFloat &APF = CFP->getValueAPF();3416// td can handle +0.0 already.3417if (APF.isPosZero())3418return false;34193420MVT VT = CFP->getSimpleValueType(0);34213422// Even if this FPImm requires an additional FNEG (i.e. the second element of3423// the returned pair is true) we still prefer FLI + FNEG over immediate3424// materialization as the latter might generate a longer instruction sequence.3425if (static_cast<const RISCVTargetLowering *>(TLI)3426->getLegalZfaFPImm(APF, VT)3427.first >= 0)3428return false;34293430MVT XLenVT = Subtarget->getXLenVT();3431if (VT == MVT::f64 && !Subtarget->is64Bit()) {3432assert(APF.isNegZero() && "Unexpected constant.");3433return false;3434}3435SDLoc DL(N);3436Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),3437*Subtarget);3438return true;3439}34403441bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,3442SDValue &Imm) {3443if (auto *C = dyn_cast<ConstantSDNode>(N)) {3444int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);34453446if (!isInt<5>(ImmVal))3447return false;34483449Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());3450return true;3451}34523453return false;3454}34553456// Try to remove sext.w if the input is a W instruction or can be made into3457// a W instruction cheaply.3458bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {3459// Look for the sext.w pattern, addiw rd, rs1, 0.3460if (N->getMachineOpcode() != RISCV::ADDIW ||3461!isNullConstant(N->getOperand(1)))3462return false;34633464SDValue N0 = N->getOperand(0);3465if (!N0.isMachineOpcode())3466return false;34673468switch (N0.getMachineOpcode()) {3469default:3470break;3471case RISCV::ADD:3472case RISCV::ADDI:3473case RISCV::SUB:3474case RISCV::MUL:3475case RISCV::SLLI: {3476// Convert sext.w+add/sub/mul to their W instructions. This will create3477// a new independent instruction. This improves latency.3478unsigned Opc;3479switch (N0.getMachineOpcode()) {3480default:3481llvm_unreachable("Unexpected opcode!");3482case RISCV::ADD: Opc = RISCV::ADDW; break;3483case RISCV::ADDI: Opc = RISCV::ADDIW; break;3484case RISCV::SUB: Opc = RISCV::SUBW; break;3485case RISCV::MUL: Opc = RISCV::MULW; break;3486case RISCV::SLLI: Opc = RISCV::SLLIW; break;3487}34883489SDValue N00 = N0.getOperand(0);3490SDValue N01 = N0.getOperand(1);34913492// Shift amount needs to be uimm5.3493if (N0.getMachineOpcode() == RISCV::SLLI &&3494!isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))3495break;34963497SDNode *Result =3498CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),3499N00, N01);3500ReplaceUses(N, Result);3501return true;3502}3503case RISCV::ADDW:3504case RISCV::ADDIW:3505case RISCV::SUBW:3506case RISCV::MULW:3507case RISCV::SLLIW:3508case RISCV::PACKW:3509case RISCV::TH_MULAW:3510case RISCV::TH_MULAH:3511case RISCV::TH_MULSW:3512case RISCV::TH_MULSH:3513if (N0.getValueType() == MVT::i32)3514break;35153516// Result is already sign extended just remove the sext.w.3517// NOTE: We only handle the nodes that are selected with hasAllWUsers.3518ReplaceUses(N, N0.getNode());3519return true;3520}35213522return false;3523}35243525// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg3526// that's glued to the pseudo. This tries to look up the value that was copied3527// to V0.3528static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {3529// Check that we're using V0 as a mask register.3530if (!isa<RegisterSDNode>(MaskOp) ||3531cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)3532return SDValue();35333534// The glued user defines V0.3535const auto *Glued = GlueOp.getNode();35363537if (!Glued || Glued->getOpcode() != ISD::CopyToReg)3538return SDValue();35393540// Check that we're defining V0 as a mask register.3541if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||3542cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)3543return SDValue();35443545SDValue MaskSetter = Glued->getOperand(2);35463547// Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came3548// from an extract_subvector or insert_subvector.3549if (MaskSetter->isMachineOpcode() &&3550MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)3551MaskSetter = MaskSetter->getOperand(0);35523553return MaskSetter;3554}35553556static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {3557// Check the instruction defining V0; it needs to be a VMSET pseudo.3558SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);3559if (!MaskSetter)3560return false;35613562const auto IsVMSet = [](unsigned Opc) {3563return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||3564Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||3565Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||3566Opc == RISCV::PseudoVMSET_M_B8;3567};35683569// TODO: Check that the VMSET is the expected bitwidth? The pseudo has3570// undefined behaviour if it's the wrong bitwidth, so we could choose to3571// assume that it's all-ones? Same applies to its VL.3572return MaskSetter->isMachineOpcode() &&3573IsVMSet(MaskSetter.getMachineOpcode());3574}35753576// Return true if we can make sure mask of N is all-ones mask.3577static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {3578return usesAllOnesMask(N->getOperand(MaskOpIdx),3579N->getOperand(N->getNumOperands() - 1));3580}35813582static bool isImplicitDef(SDValue V) {3583if (!V.isMachineOpcode())3584return false;3585if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {3586for (unsigned I = 1; I < V.getNumOperands(); I += 2)3587if (!isImplicitDef(V.getOperand(I)))3588return false;3589return true;3590}3591return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;3592}35933594// Optimize masked RVV pseudo instructions with a known all-ones mask to their3595// corresponding "unmasked" pseudo versions. The mask we're interested in will3596// take the form of a V0 physical register operand, with a glued3597// register-setting instruction.3598bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {3599const RISCV::RISCVMaskedPseudoInfo *I =3600RISCV::getMaskedPseudoInfo(N->getMachineOpcode());3601if (!I)3602return false;36033604unsigned MaskOpIdx = I->MaskOpIdx;3605if (!usesAllOnesMask(N, MaskOpIdx))3606return false;36073608// There are two classes of pseudos in the table - compares and3609// everything else. See the comment on RISCVMaskedPseudo for details.3610const unsigned Opc = I->UnmaskedPseudo;3611const MCInstrDesc &MCID = TII->get(Opc);3612const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);3613#ifndef NDEBUG3614const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());3615assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==3616RISCVII::hasVecPolicyOp(MCID.TSFlags) &&3617"Masked and unmasked pseudos are inconsistent");3618const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);3619assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");3620#endif36213622SmallVector<SDValue, 8> Ops;3623// Skip the merge operand at index 0 if !UseTUPseudo.3624for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {3625// Skip the mask, and the Glue.3626SDValue Op = N->getOperand(I);3627if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)3628continue;3629Ops.push_back(Op);3630}36313632// Transitively apply any node glued to our new node.3633const auto *Glued = N->getGluedNode();3634if (auto *TGlued = Glued->getGluedNode())3635Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));36363637MachineSDNode *Result =3638CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);36393640if (!N->memoperands_empty())3641CurDAG->setNodeMemRefs(Result, N->memoperands());36423643Result->setFlags(N->getFlags());3644ReplaceUses(N, Result);36453646return true;3647}36483649static bool IsVMerge(SDNode *N) {3650return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;3651}36523653static bool IsVMv(SDNode *N) {3654return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;3655}36563657static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {3658switch (LMUL) {3659case RISCVII::LMUL_F8:3660return RISCV::PseudoVMSET_M_B1;3661case RISCVII::LMUL_F4:3662return RISCV::PseudoVMSET_M_B2;3663case RISCVII::LMUL_F2:3664return RISCV::PseudoVMSET_M_B4;3665case RISCVII::LMUL_1:3666return RISCV::PseudoVMSET_M_B8;3667case RISCVII::LMUL_2:3668return RISCV::PseudoVMSET_M_B16;3669case RISCVII::LMUL_4:3670return RISCV::PseudoVMSET_M_B32;3671case RISCVII::LMUL_8:3672return RISCV::PseudoVMSET_M_B64;3673case RISCVII::LMUL_RESERVED:3674llvm_unreachable("Unexpected LMUL");3675}3676llvm_unreachable("Unknown VLMUL enum");3677}36783679// Try to fold away VMERGE_VVM instructions into their true operands:3680//3681// %true = PseudoVADD_VV ...3682// %x = PseudoVMERGE_VVM %false, %false, %true, %mask3683// ->3684// %x = PseudoVADD_VV_MASK %false, ..., %mask3685//3686// We can only fold if vmerge's merge operand, vmerge's false operand and3687// %true's merge operand (if it has one) are the same. This is because we have3688// to consolidate them into one merge operand in the result.3689//3690// If %true is masked, then we can use its mask instead of vmerge's if vmerge's3691// mask is all ones.3692//3693// We can also fold a VMV_V_V into its true operand, since it is equivalent to a3694// VMERGE_VVM with an all ones mask.3695//3696// The resulting VL is the minimum of the two VLs.3697//3698// The resulting policy is the effective policy the vmerge would have had,3699// i.e. whether or not it's merge operand was implicit-def.3700bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {3701SDValue Merge, False, True, VL, Mask, Glue;3702// A vmv.v.v is equivalent to a vmerge with an all-ones mask.3703if (IsVMv(N)) {3704Merge = N->getOperand(0);3705False = N->getOperand(0);3706True = N->getOperand(1);3707VL = N->getOperand(2);3708// A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones3709// mask later below.3710} else {3711assert(IsVMerge(N));3712Merge = N->getOperand(0);3713False = N->getOperand(1);3714True = N->getOperand(2);3715Mask = N->getOperand(3);3716VL = N->getOperand(4);3717// We always have a glue node for the mask at v0.3718Glue = N->getOperand(N->getNumOperands() - 1);3719}3720assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);3721assert(!Glue || Glue.getValueType() == MVT::Glue);37223723// If the EEW of True is different from vmerge's SEW, then we can't fold.3724if (True.getSimpleValueType() != N->getSimpleValueType(0))3725return false;37263727// We require that either merge and false are the same, or that merge3728// is undefined.3729if (Merge != False && !isImplicitDef(Merge))3730return false;37313732assert(True.getResNo() == 0 &&3733"Expect True is the first output of an instruction.");37343735// Need N is the exactly one using True.3736if (!True.hasOneUse())3737return false;37383739if (!True.isMachineOpcode())3740return false;37413742unsigned TrueOpc = True.getMachineOpcode();3743const MCInstrDesc &TrueMCID = TII->get(TrueOpc);3744uint64_t TrueTSFlags = TrueMCID.TSFlags;3745bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);37463747bool IsMasked = false;3748const RISCV::RISCVMaskedPseudoInfo *Info =3749RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);3750if (!Info && HasTiedDest) {3751Info = RISCV::getMaskedPseudoInfo(TrueOpc);3752IsMasked = true;3753}3754assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");37553756if (!Info)3757return false;37583759// If True has a merge operand then it needs to be the same as vmerge's False,3760// since False will be used for the result's merge operand.3761if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {3762SDValue MergeOpTrue = True->getOperand(0);3763if (False != MergeOpTrue)3764return false;3765}37663767// If True is masked then the vmerge must have either the same mask or an all3768// 1s mask, since we're going to keep the mask from True.3769if (IsMasked && Mask) {3770// FIXME: Support mask agnostic True instruction which would have an3771// undef merge operand.3772SDValue TrueMask =3773getMaskSetter(True->getOperand(Info->MaskOpIdx),3774True->getOperand(True->getNumOperands() - 1));3775assert(TrueMask);3776if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)3777return false;3778}37793780// Skip if True has side effect.3781if (TII->get(TrueOpc).hasUnmodeledSideEffects())3782return false;37833784// The last operand of a masked instruction may be glued.3785bool HasGlueOp = True->getGluedNode() != nullptr;37863787// The chain operand may exist either before the glued operands or in the last3788// position.3789unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;3790bool HasChainOp =3791True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;37923793if (HasChainOp) {3794// Avoid creating cycles in the DAG. We must ensure that none of the other3795// operands depend on True through it's Chain.3796SmallVector<const SDNode *, 4> LoopWorklist;3797SmallPtrSet<const SDNode *, 16> Visited;3798LoopWorklist.push_back(False.getNode());3799if (Mask)3800LoopWorklist.push_back(Mask.getNode());3801LoopWorklist.push_back(VL.getNode());3802if (Glue)3803LoopWorklist.push_back(Glue.getNode());3804if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))3805return false;3806}38073808// The vector policy operand may be present for masked intrinsics3809bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);3810unsigned TrueVLIndex =3811True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;3812SDValue TrueVL = True.getOperand(TrueVLIndex);3813SDValue SEW = True.getOperand(TrueVLIndex + 1);38143815auto GetMinVL = [](SDValue LHS, SDValue RHS) {3816if (LHS == RHS)3817return LHS;3818if (isAllOnesConstant(LHS))3819return RHS;3820if (isAllOnesConstant(RHS))3821return LHS;3822auto *CLHS = dyn_cast<ConstantSDNode>(LHS);3823auto *CRHS = dyn_cast<ConstantSDNode>(RHS);3824if (!CLHS || !CRHS)3825return SDValue();3826return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;3827};38283829// Because N and True must have the same merge operand (or True's operand is3830// implicit_def), the "effective" body is the minimum of their VLs.3831SDValue OrigVL = VL;3832VL = GetMinVL(TrueVL, VL);3833if (!VL)3834return false;38353836// Some operations produce different elementwise results depending on the3837// active elements, like viota.m or vredsum. This transformation is illegal3838// for these if we change the active elements (i.e. mask or VL).3839if (Info->ActiveElementsAffectResult) {3840if (Mask && !usesAllOnesMask(Mask, Glue))3841return false;3842if (TrueVL != VL)3843return false;3844}38453846// If we end up changing the VL or mask of True, then we need to make sure it3847// doesn't raise any observable fp exceptions, since changing the active3848// elements will affect how fflags is set.3849if (TrueVL != VL || !IsMasked)3850if (mayRaiseFPException(True.getNode()) &&3851!True->getFlags().hasNoFPExcept())3852return false;38533854SDLoc DL(N);38553856// From the preconditions we checked above, we know the mask and thus glue3857// for the result node will be taken from True.3858if (IsMasked) {3859Mask = True->getOperand(Info->MaskOpIdx);3860Glue = True->getOperand(True->getNumOperands() - 1);3861assert(Glue.getValueType() == MVT::Glue);3862}3863// If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create3864// an all-ones mask to use.3865else if (IsVMv(N)) {3866unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;3867unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));3868ElementCount EC = N->getValueType(0).getVectorElementCount();3869MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);38703871SDValue AllOnesMask =3872SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);3873SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,3874RISCV::V0, AllOnesMask, SDValue());3875Mask = CurDAG->getRegister(RISCV::V0, MaskVT);3876Glue = MaskCopy.getValue(1);3877}38783879unsigned MaskedOpc = Info->MaskedPseudo;3880#ifndef NDEBUG3881const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);3882assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&3883"Expected instructions with mask have policy operand.");3884assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),3885MCOI::TIED_TO) == 0 &&3886"Expected instructions with mask have a tied dest.");3887#endif38883889// Use a tumu policy, relaxing it to tail agnostic provided that the merge3890// operand is undefined.3891//3892// However, if the VL became smaller than what the vmerge had originally, then3893// elements past VL that were previously in the vmerge's body will have moved3894// to the tail. In that case we always need to use tail undisturbed to3895// preserve them.3896bool MergeVLShrunk = VL != OrigVL;3897uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)3898? RISCVII::TAIL_AGNOSTIC3899: /*TUMU*/ 0;3900SDValue PolicyOp =3901CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());390239033904SmallVector<SDValue, 8> Ops;3905Ops.push_back(False);39063907const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);3908const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;3909assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);3910Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);39113912Ops.push_back(Mask);39133914// For unmasked "VOp" with rounding mode operand, that is interfaces like3915// (..., rm, vl) or (..., rm, vl, policy).3916// Its masked version is (..., vm, rm, vl, policy).3917// Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td3918if (HasRoundingMode)3919Ops.push_back(True->getOperand(TrueVLIndex - 1));39203921Ops.append({VL, SEW, PolicyOp});39223923// Result node should have chain operand of True.3924if (HasChainOp)3925Ops.push_back(True.getOperand(TrueChainOpIdx));39263927// Add the glue for the CopyToReg of mask->v0.3928Ops.push_back(Glue);39293930MachineSDNode *Result =3931CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);3932Result->setFlags(True->getFlags());39333934if (!cast<MachineSDNode>(True)->memoperands_empty())3935CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());39363937// Replace vmerge.vvm node by Result.3938ReplaceUses(SDValue(N, 0), SDValue(Result, 0));39393940// Replace another value of True. E.g. chain and VL.3941for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)3942ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));39433944return true;3945}39463947bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {3948bool MadeChange = false;3949SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();39503951while (Position != CurDAG->allnodes_begin()) {3952SDNode *N = &*--Position;3953if (N->use_empty() || !N->isMachineOpcode())3954continue;39553956if (IsVMerge(N) || IsVMv(N))3957MadeChange |= performCombineVMergeAndVOps(N);3958}3959return MadeChange;3960}39613962/// If our passthru is an implicit_def, use noreg instead. This side3963/// steps issues with MachineCSE not being able to CSE expressions with3964/// IMPLICIT_DEF operands while preserving the semantic intent. See3965/// pr64282 for context. Note that this transform is the last one3966/// performed at ISEL DAG to DAG.3967bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {3968bool MadeChange = false;3969SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();39703971while (Position != CurDAG->allnodes_begin()) {3972SDNode *N = &*--Position;3973if (N->use_empty() || !N->isMachineOpcode())3974continue;39753976const unsigned Opc = N->getMachineOpcode();3977if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||3978!RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||3979!isImplicitDef(N->getOperand(0)))3980continue;39813982SmallVector<SDValue> Ops;3983Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));3984for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {3985SDValue Op = N->getOperand(I);3986Ops.push_back(Op);3987}39883989MachineSDNode *Result =3990CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);3991Result->setFlags(N->getFlags());3992CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());3993ReplaceUses(N, Result);3994MadeChange = true;3995}3996return MadeChange;3997}399839994000// This pass converts a legalized DAG into a RISCV-specific DAG, ready4001// for instruction scheduling.4002FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,4003CodeGenOptLevel OptLevel) {4004return new RISCVDAGToDAGISelLegacy(TM, OptLevel);4005}40064007char RISCVDAGToDAGISelLegacy::ID = 0;40084009RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,4010CodeGenOptLevel OptLevel)4011: SelectionDAGISelLegacy(4012ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}40134014INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)401540164017