CoCalc -- RISCVISelDAGToDAG.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
³⁵²⁹⁴ views
1
//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines an instruction selector for the RISC-V target.
10
//
11
//===----------------------------------------------------------------------===//
12

13
#include "RISCVISelDAGToDAG.h"
14
#include "MCTargetDesc/RISCVBaseInfo.h"
15
#include "MCTargetDesc/RISCVMCTargetDesc.h"
16
#include "MCTargetDesc/RISCVMatInt.h"
17
#include "RISCVISelLowering.h"
18
#include "RISCVMachineFunctionInfo.h"
19
#include "llvm/CodeGen/MachineFrameInfo.h"
20
#include "llvm/IR/IntrinsicsRISCV.h"
21
#include "llvm/Support/Alignment.h"
22
#include "llvm/Support/Debug.h"
23
#include "llvm/Support/MathExtras.h"
24
#include "llvm/Support/raw_ostream.h"
25

26
using namespace llvm;
27

28
#define DEBUG_TYPE "riscv-isel"
29
#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30

31
static cl::opt<bool> UsePseudoMovImm(
32
    "riscv-use-rematerializable-movimm", cl::Hidden,
33
    cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34
             "constant materialization"),
35
    cl::init(false));
36

37
namespace llvm::RISCV {
38
#define GET_RISCVVSSEGTable_IMPL
39
#define GET_RISCVVLSEGTable_IMPL
40
#define GET_RISCVVLXSEGTable_IMPL
41
#define GET_RISCVVSXSEGTable_IMPL
42
#define GET_RISCVVLETable_IMPL
43
#define GET_RISCVVSETable_IMPL
44
#define GET_RISCVVLXTable_IMPL
45
#define GET_RISCVVSXTable_IMPL
46
#include "RISCVGenSearchableTables.inc"
47
} // namespace llvm::RISCV
48

49
void RISCVDAGToDAGISel::PreprocessISelDAG() {
50
  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
51

52
  bool MadeChange = false;
53
  while (Position != CurDAG->allnodes_begin()) {
54
    SDNode *N = &*--Position;
55
    if (N->use_empty())
56
      continue;
57

58
    SDValue Result;
59
    switch (N->getOpcode()) {
60
    case ISD::SPLAT_VECTOR: {
61
      // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62
      // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63
      MVT VT = N->getSimpleValueType(0);
64
      unsigned Opc =
65
          VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
66
      SDLoc DL(N);
67
      SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68
      SDValue Src = N->getOperand(0);
69
      if (VT.isInteger())
70
        Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71
                              N->getOperand(0));
72
      Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73
      break;
74
    }
75
    case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
76
      // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77
      // load. Done after lowering and combining so that we have a chance to
78
      // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79
      assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80
      MVT VT = N->getSimpleValueType(0);
81
      SDValue Passthru = N->getOperand(0);
82
      SDValue Lo = N->getOperand(1);
83
      SDValue Hi = N->getOperand(2);
84
      SDValue VL = N->getOperand(3);
85
      assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86
             Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87
             "Unexpected VTs!");
88
      MachineFunction &MF = CurDAG->getMachineFunction();
89
      SDLoc DL(N);
90

91
      // Create temporary stack for each expanding node.
92
      SDValue StackSlot =
93
          CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
94
      int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
95
      MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
96

97
      SDValue Chain = CurDAG->getEntryNode();
98
      Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99

100
      SDValue OffsetSlot =
101
          CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
102
      Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103
                            Align(8));
104

105
      Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106

107
      SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108
      SDValue IntID =
109
          CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110
      SDValue Ops[] = {Chain,
111
                       IntID,
112
                       Passthru,
113
                       StackSlot,
114
                       CurDAG->getRegister(RISCV::X0, MVT::i64),
115
                       VL};
116

117
      Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
118
                                           MVT::i64, MPI, Align(8),
119
                                           MachineMemOperand::MOLoad);
120
      break;
121
    }
122
    }
123

124
    if (Result) {
125
      LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld:    ");
126
      LLVM_DEBUG(N->dump(CurDAG));
127
      LLVM_DEBUG(dbgs() << "\nNew: ");
128
      LLVM_DEBUG(Result->dump(CurDAG));
129
      LLVM_DEBUG(dbgs() << "\n");
130

131
      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
132
      MadeChange = true;
133
    }
134
  }
135

136
  if (MadeChange)
137
    CurDAG->RemoveDeadNodes();
138
}
139

140
void RISCVDAGToDAGISel::PostprocessISelDAG() {
141
  HandleSDNode Dummy(CurDAG->getRoot());
142
  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
143

144
  bool MadeChange = false;
145
  while (Position != CurDAG->allnodes_begin()) {
146
    SDNode *N = &*--Position;
147
    // Skip dead nodes and any non-machine opcodes.
148
    if (N->use_empty() || !N->isMachineOpcode())
149
      continue;
150

151
    MadeChange |= doPeepholeSExtW(N);
152

153
    // FIXME: This is here only because the VMerge transform doesn't
154
    // know how to handle masked true inputs.  Once that has been moved
155
    // to post-ISEL, this can be deleted as well.
156
    MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157
  }
158

159
  CurDAG->setRoot(Dummy.getValue());
160

161
  MadeChange |= doPeepholeMergeVVMFold();
162

163
  // After we're done with everything else, convert IMPLICIT_DEF
164
  // passthru operands to NoRegister.  This is required to workaround
165
  // an optimization deficiency in MachineCSE.  This really should
166
  // be merged back into each of the patterns (i.e. there's no good
167
  // reason not to go directly to NoReg), but is being done this way
168
  // to allow easy backporting.
169
  MadeChange |= doPeepholeNoRegPassThru();
170

171
  if (MadeChange)
172
    CurDAG->RemoveDeadNodes();
173
}
174

175
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
176
                            RISCVMatInt::InstSeq &Seq) {
177
  SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178
  for (const RISCVMatInt::Inst &Inst : Seq) {
179
    SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180
    SDNode *Result = nullptr;
181
    switch (Inst.getOpndKind()) {
182
    case RISCVMatInt::Imm:
183
      Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184
      break;
185
    case RISCVMatInt::RegX0:
186
      Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187
                                      CurDAG->getRegister(RISCV::X0, VT));
188
      break;
189
    case RISCVMatInt::RegReg:
190
      Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191
      break;
192
    case RISCVMatInt::RegImm:
193
      Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194
      break;
195
    }
196

197
    // Only the first instruction has X0 as its source.
198
    SrcReg = SDValue(Result, 0);
199
  }
200

201
  return SrcReg;
202
}
203

204
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205
                         int64_t Imm, const RISCVSubtarget &Subtarget) {
206
  RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
207

208
  // Use a rematerializable pseudo instruction for short sequences if enabled.
209
  if (Seq.size() == 2 && UsePseudoMovImm)
210
    return SDValue(
211
        CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212
                               CurDAG->getTargetConstant(Imm, DL, VT)),
213
        0);
214

215
  // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216
  // worst an LUI+ADDIW. This will require an extra register, but avoids a
217
  // constant pool.
218
  // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219
  // low and high 32 bits are the same and bit 31 and 63 are set.
220
  if (Seq.size() > 3) {
221
    unsigned ShiftAmt, AddOpc;
222
    RISCVMatInt::InstSeq SeqLo =
223
        RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224
    if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225
      SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226

227
      SDValue SLLI = SDValue(
228
          CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229
                                 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230
          0);
231
      return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232
    }
233
  }
234

235
  // Otherwise, use the original sequence.
236
  return selectImmSeq(CurDAG, DL, VT, Seq);
237
}
238

239
static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
240
                           unsigned NF, RISCVII::VLMUL LMUL) {
241
  static const unsigned M1TupleRegClassIDs[] = {
242
      RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
243
      RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
244
      RISCV::VRN8M1RegClassID};
245
  static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
246
                                                RISCV::VRN3M2RegClassID,
247
                                                RISCV::VRN4M2RegClassID};
248

249
  assert(Regs.size() >= 2 && Regs.size() <= 8);
250

251
  unsigned RegClassID;
252
  unsigned SubReg0;
253
  switch (LMUL) {
254
  default:
255
    llvm_unreachable("Invalid LMUL.");
256
  case RISCVII::VLMUL::LMUL_F8:
257
  case RISCVII::VLMUL::LMUL_F4:
258
  case RISCVII::VLMUL::LMUL_F2:
259
  case RISCVII::VLMUL::LMUL_1:
260
    static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
261
                  "Unexpected subreg numbering");
262
    SubReg0 = RISCV::sub_vrm1_0;
263
    RegClassID = M1TupleRegClassIDs[NF - 2];
264
    break;
265
  case RISCVII::VLMUL::LMUL_2:
266
    static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
267
                  "Unexpected subreg numbering");
268
    SubReg0 = RISCV::sub_vrm2_0;
269
    RegClassID = M2TupleRegClassIDs[NF - 2];
270
    break;
271
  case RISCVII::VLMUL::LMUL_4:
272
    static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
273
                  "Unexpected subreg numbering");
274
    SubReg0 = RISCV::sub_vrm4_0;
275
    RegClassID = RISCV::VRN2M4RegClassID;
276
    break;
277
  }
278

279
  SDLoc DL(Regs[0]);
280
  SmallVector<SDValue, 8> Ops;
281

282
  Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
283

284
  for (unsigned I = 0; I < Regs.size(); ++I) {
285
    Ops.push_back(Regs[I]);
286
    Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
287
  }
288
  SDNode *N =
289
      CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
290
  return SDValue(N, 0);
291
}
292

293
void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
294
    SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
295
    bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
296
    bool IsLoad, MVT *IndexVT) {
297
  SDValue Chain = Node->getOperand(0);
298
  SDValue Glue;
299

300
  Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
301

302
  if (IsStridedOrIndexed) {
303
    Operands.push_back(Node->getOperand(CurOp++)); // Index.
304
    if (IndexVT)
305
      *IndexVT = Operands.back()->getSimpleValueType(0);
306
  }
307

308
  if (IsMasked) {
309
    // Mask needs to be copied to V0.
310
    SDValue Mask = Node->getOperand(CurOp++);
311
    Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
312
    Glue = Chain.getValue(1);
313
    Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
314
  }
315
  SDValue VL;
316
  selectVLOp(Node->getOperand(CurOp++), VL);
317
  Operands.push_back(VL);
318

319
  MVT XLenVT = Subtarget->getXLenVT();
320
  SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
321
  Operands.push_back(SEWOp);
322

323
  // At the IR layer, all the masked load intrinsics have policy operands,
324
  // none of the others do.  All have passthru operands.  For our pseudos,
325
  // all loads have policy operands.
326
  if (IsLoad) {
327
    uint64_t Policy = RISCVII::MASK_AGNOSTIC;
328
    if (IsMasked)
329
      Policy = Node->getConstantOperandVal(CurOp++);
330
    SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
331
    Operands.push_back(PolicyOp);
332
  }
333

334
  Operands.push_back(Chain); // Chain.
335
  if (Glue)
336
    Operands.push_back(Glue);
337
}
338

339
void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
340
                                    bool IsStrided) {
341
  SDLoc DL(Node);
342
  unsigned NF = Node->getNumValues() - 1;
343
  MVT VT = Node->getSimpleValueType(0);
344
  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
345
  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
346

347
  unsigned CurOp = 2;
348
  SmallVector<SDValue, 8> Operands;
349

350
  SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
351
                               Node->op_begin() + CurOp + NF);
352
  SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
353
  Operands.push_back(Merge);
354
  CurOp += NF;
355

356
  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
357
                             Operands, /*IsLoad=*/true);
358

359
  const RISCV::VLSEGPseudo *P =
360
      RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
361
                            static_cast<unsigned>(LMUL));
362
  MachineSDNode *Load =
363
      CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
364

365
  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
366
    CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
367

368
  SDValue SuperReg = SDValue(Load, 0);
369
  for (unsigned I = 0; I < NF; ++I) {
370
    unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
371
    ReplaceUses(SDValue(Node, I),
372
                CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
373
  }
374

375
  ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
376
  CurDAG->RemoveDeadNode(Node);
377
}
378

379
void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
380
  SDLoc DL(Node);
381
  unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
382
  MVT VT = Node->getSimpleValueType(0);
383
  MVT XLenVT = Subtarget->getXLenVT();
384
  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
385
  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
386

387
  unsigned CurOp = 2;
388
  SmallVector<SDValue, 7> Operands;
389

390
  SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
391
                               Node->op_begin() + CurOp + NF);
392
  SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
393
  Operands.push_back(MaskedOff);
394
  CurOp += NF;
395

396
  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
397
                             /*IsStridedOrIndexed*/ false, Operands,
398
                             /*IsLoad=*/true);
399

400
  const RISCV::VLSEGPseudo *P =
401
      RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
402
                            Log2SEW, static_cast<unsigned>(LMUL));
403
  MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
404
                                               XLenVT, MVT::Other, Operands);
405

406
  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
407
    CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
408

409
  SDValue SuperReg = SDValue(Load, 0);
410
  for (unsigned I = 0; I < NF; ++I) {
411
    unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
412
    ReplaceUses(SDValue(Node, I),
413
                CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
414
  }
415

416
  ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));     // VL
417
  ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
418
  CurDAG->RemoveDeadNode(Node);
419
}
420

421
void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
422
                                     bool IsOrdered) {
423
  SDLoc DL(Node);
424
  unsigned NF = Node->getNumValues() - 1;
425
  MVT VT = Node->getSimpleValueType(0);
426
  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
427
  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
428

429
  unsigned CurOp = 2;
430
  SmallVector<SDValue, 8> Operands;
431

432
  SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
433
                               Node->op_begin() + CurOp + NF);
434
  SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
435
  Operands.push_back(MaskedOff);
436
  CurOp += NF;
437

438
  MVT IndexVT;
439
  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
440
                             /*IsStridedOrIndexed*/ true, Operands,
441
                             /*IsLoad=*/true, &IndexVT);
442

443
  assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
444
         "Element count mismatch");
445

446
  RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
447
  unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448
  if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449
    report_fatal_error("The V extension does not support EEW=64 for index "
450
                       "values when XLEN=32");
451
  }
452
  const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
453
      NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454
      static_cast<unsigned>(IndexLMUL));
455
  MachineSDNode *Load =
456
      CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
457

458
  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
459
    CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
460

461
  SDValue SuperReg = SDValue(Load, 0);
462
  for (unsigned I = 0; I < NF; ++I) {
463
    unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
464
    ReplaceUses(SDValue(Node, I),
465
                CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
466
  }
467

468
  ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
469
  CurDAG->RemoveDeadNode(Node);
470
}
471

472
void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
473
                                    bool IsStrided) {
474
  SDLoc DL(Node);
475
  unsigned NF = Node->getNumOperands() - 4;
476
  if (IsStrided)
477
    NF--;
478
  if (IsMasked)
479
    NF--;
480
  MVT VT = Node->getOperand(2)->getSimpleValueType(0);
481
  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
482
  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
483
  SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
484
  SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
485

486
  SmallVector<SDValue, 8> Operands;
487
  Operands.push_back(StoreVal);
488
  unsigned CurOp = 2 + NF;
489

490
  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
491
                             Operands);
492

493
  const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
494
      NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
495
  MachineSDNode *Store =
496
      CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
497

498
  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
499
    CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
500

501
  ReplaceNode(Node, Store);
502
}
503

504
void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
505
                                     bool IsOrdered) {
506
  SDLoc DL(Node);
507
  unsigned NF = Node->getNumOperands() - 5;
508
  if (IsMasked)
509
    --NF;
510
  MVT VT = Node->getOperand(2)->getSimpleValueType(0);
511
  unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
512
  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
513
  SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
514
  SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
515

516
  SmallVector<SDValue, 8> Operands;
517
  Operands.push_back(StoreVal);
518
  unsigned CurOp = 2 + NF;
519

520
  MVT IndexVT;
521
  addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
522
                             /*IsStridedOrIndexed*/ true, Operands,
523
                             /*IsLoad=*/false, &IndexVT);
524

525
  assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
526
         "Element count mismatch");
527

528
  RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
529
  unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
530
  if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
531
    report_fatal_error("The V extension does not support EEW=64 for index "
532
                       "values when XLEN=32");
533
  }
534
  const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
535
      NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
536
      static_cast<unsigned>(IndexLMUL));
537
  MachineSDNode *Store =
538
      CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
539

540
  if (auto *MemOp = dyn_cast<MemSDNode>(Node))
541
    CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
542

543
  ReplaceNode(Node, Store);
544
}
545

546
void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
547
  if (!Subtarget->hasVInstructions())
548
    return;
549

550
  assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
551

552
  SDLoc DL(Node);
553
  MVT XLenVT = Subtarget->getXLenVT();
554

555
  unsigned IntNo = Node->getConstantOperandVal(0);
556

557
  assert((IntNo == Intrinsic::riscv_vsetvli ||
558
          IntNo == Intrinsic::riscv_vsetvlimax) &&
559
         "Unexpected vsetvli intrinsic");
560

561
  bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
562
  unsigned Offset = (VLMax ? 1 : 2);
563

564
  assert(Node->getNumOperands() == Offset + 2 &&
565
         "Unexpected number of operands");
566

567
  unsigned SEW =
568
      RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
569
  RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
570
      Node->getConstantOperandVal(Offset + 1) & 0x7);
571

572
  unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
573
                                            /*MaskAgnostic*/ true);
574
  SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
575

576
  SDValue VLOperand;
577
  unsigned Opcode = RISCV::PseudoVSETVLI;
578
  if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
579
    if (auto VLEN = Subtarget->getRealVLen())
580
      if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
581
        VLMax = true;
582
  }
583
  if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
584
    VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
585
    Opcode = RISCV::PseudoVSETVLIX0;
586
  } else {
587
    VLOperand = Node->getOperand(1);
588

589
    if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
590
      uint64_t AVL = C->getZExtValue();
591
      if (isUInt<5>(AVL)) {
592
        SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
593
        ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
594
                                                 XLenVT, VLImm, VTypeIOp));
595
        return;
596
      }
597
    }
598
  }
599

600
  ReplaceNode(Node,
601
              CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
602
}
603

604
bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
605
  MVT VT = Node->getSimpleValueType(0);
606
  unsigned Opcode = Node->getOpcode();
607
  assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
608
         "Unexpected opcode");
609
  SDLoc DL(Node);
610

611
  // For operations of the form (x << C1) op C2, check if we can use
612
  // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
613
  SDValue N0 = Node->getOperand(0);
614
  SDValue N1 = Node->getOperand(1);
615

616
  ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
617
  if (!Cst)
618
    return false;
619

620
  int64_t Val = Cst->getSExtValue();
621

622
  // Check if immediate can already use ANDI/ORI/XORI.
623
  if (isInt<12>(Val))
624
    return false;
625

626
  SDValue Shift = N0;
627

628
  // If Val is simm32 and we have a sext_inreg from i32, then the binop
629
  // produces at least 33 sign bits. We can peek through the sext_inreg and use
630
  // a SLLIW at the end.
631
  bool SignExt = false;
632
  if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
633
      N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
634
    SignExt = true;
635
    Shift = N0.getOperand(0);
636
  }
637

638
  if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
639
    return false;
640

641
  ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
642
  if (!ShlCst)
643
    return false;
644

645
  uint64_t ShAmt = ShlCst->getZExtValue();
646

647
  // Make sure that we don't change the operation by removing bits.
648
  // This only matters for OR and XOR, AND is unaffected.
649
  uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
650
  if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
651
    return false;
652

653
  int64_t ShiftedVal = Val >> ShAmt;
654
  if (!isInt<12>(ShiftedVal))
655
    return false;
656

657
  // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
658
  if (SignExt && ShAmt >= 32)
659
    return false;
660

661
  // Ok, we can reorder to get a smaller immediate.
662
  unsigned BinOpc;
663
  switch (Opcode) {
664
  default: llvm_unreachable("Unexpected opcode");
665
  case ISD::AND: BinOpc = RISCV::ANDI; break;
666
  case ISD::OR:  BinOpc = RISCV::ORI;  break;
667
  case ISD::XOR: BinOpc = RISCV::XORI; break;
668
  }
669

670
  unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
671

672
  SDNode *BinOp =
673
      CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
674
                             CurDAG->getTargetConstant(ShiftedVal, DL, VT));
675
  SDNode *SLLI =
676
      CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
677
                             CurDAG->getTargetConstant(ShAmt, DL, VT));
678
  ReplaceNode(Node, SLLI);
679
  return true;
680
}
681

682
bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
683
  // Only supported with XTHeadBb at the moment.
684
  if (!Subtarget->hasVendorXTHeadBb())
685
    return false;
686

687
  auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
688
  if (!N1C)
689
    return false;
690

691
  SDValue N0 = Node->getOperand(0);
692
  if (!N0.hasOneUse())
693
    return false;
694

695
  auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
696
                             MVT VT) {
697
    return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
698
                                  CurDAG->getTargetConstant(Msb, DL, VT),
699
                                  CurDAG->getTargetConstant(Lsb, DL, VT));
700
  };
701

702
  SDLoc DL(Node);
703
  MVT VT = Node->getSimpleValueType(0);
704
  const unsigned RightShAmt = N1C->getZExtValue();
705

706
  // Transform (sra (shl X, C1) C2) with C1 < C2
707
  //        -> (TH.EXT X, msb, lsb)
708
  if (N0.getOpcode() == ISD::SHL) {
709
    auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
710
    if (!N01C)
711
      return false;
712

713
    const unsigned LeftShAmt = N01C->getZExtValue();
714
    // Make sure that this is a bitfield extraction (i.e., the shift-right
715
    // amount can not be less than the left-shift).
716
    if (LeftShAmt > RightShAmt)
717
      return false;
718

719
    const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
720
    const unsigned Msb = MsbPlusOne - 1;
721
    const unsigned Lsb = RightShAmt - LeftShAmt;
722

723
    SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724
    ReplaceNode(Node, TH_EXT);
725
    return true;
726
  }
727

728
  // Transform (sra (sext_inreg X, _), C) ->
729
  //           (TH.EXT X, msb, lsb)
730
  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
731
    unsigned ExtSize =
732
        cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
733

734
    // ExtSize of 32 should use sraiw via tablegen pattern.
735
    if (ExtSize == 32)
736
      return false;
737

738
    const unsigned Msb = ExtSize - 1;
739
    const unsigned Lsb = RightShAmt;
740

741
    SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
742
    ReplaceNode(Node, TH_EXT);
743
    return true;
744
  }
745

746
  return false;
747
}
748

749
bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
750
  // Target does not support indexed loads.
751
  if (!Subtarget->hasVendorXTHeadMemIdx())
752
    return false;
753

754
  LoadSDNode *Ld = cast<LoadSDNode>(Node);
755
  ISD::MemIndexedMode AM = Ld->getAddressingMode();
756
  if (AM == ISD::UNINDEXED)
757
    return false;
758

759
  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
760
  if (!C)
761
    return false;
762

763
  EVT LoadVT = Ld->getMemoryVT();
764
  assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
765
         "Unexpected addressing mode");
766
  bool IsPre = AM == ISD::PRE_INC;
767
  bool IsPost = AM == ISD::POST_INC;
768
  int64_t Offset = C->getSExtValue();
769

770
  // The constants that can be encoded in the THeadMemIdx instructions
771
  // are of the form (sign_extend(imm5) << imm2).
772
  int64_t Shift;
773
  for (Shift = 0; Shift < 4; Shift++)
774
    if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
775
      break;
776

777
  // Constant cannot be encoded.
778
  if (Shift == 4)
779
    return false;
780

781
  bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
782
  unsigned Opcode;
783
  if (LoadVT == MVT::i8 && IsPre)
784
    Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
785
  else if (LoadVT == MVT::i8 && IsPost)
786
    Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
787
  else if (LoadVT == MVT::i16 && IsPre)
788
    Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
789
  else if (LoadVT == MVT::i16 && IsPost)
790
    Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
791
  else if (LoadVT == MVT::i32 && IsPre)
792
    Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
793
  else if (LoadVT == MVT::i32 && IsPost)
794
    Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
795
  else if (LoadVT == MVT::i64 && IsPre)
796
    Opcode = RISCV::TH_LDIB;
797
  else if (LoadVT == MVT::i64 && IsPost)
798
    Opcode = RISCV::TH_LDIA;
799
  else
800
    return false;
801

802
  EVT Ty = Ld->getOffset().getValueType();
803
  SDValue Ops[] = {Ld->getBasePtr(),
804
                   CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
805
                   CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
806
                   Ld->getChain()};
807
  SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
808
                                       Ld->getValueType(1), MVT::Other, Ops);
809

810
  MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
811
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
812

813
  ReplaceNode(Node, New);
814

815
  return true;
816
}
817

818
void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
819
  if (!Subtarget->hasVInstructions())
820
    return;
821

822
  assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
823

824
  SDLoc DL(Node);
825
  unsigned IntNo = Node->getConstantOperandVal(1);
826

827
  assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
828
          IntNo == Intrinsic::riscv_sf_vc_i_se) &&
829
         "Unexpected vsetvli intrinsic");
830

831
  // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
832
  unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
833
  SDValue SEWOp =
834
      CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
835
  SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
836
                                      Node->getOperand(4), Node->getOperand(5),
837
                                      Node->getOperand(8), SEWOp,
838
                                      Node->getOperand(0)};
839

840
  unsigned Opcode;
841
  auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
842
  switch (LMulSDNode->getSExtValue()) {
843
  case 5:
844
    Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
845
                                                  : RISCV::PseudoVC_I_SE_MF8;
846
    break;
847
  case 6:
848
    Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
849
                                                  : RISCV::PseudoVC_I_SE_MF4;
850
    break;
851
  case 7:
852
    Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
853
                                                  : RISCV::PseudoVC_I_SE_MF2;
854
    break;
855
  case 0:
856
    Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
857
                                                  : RISCV::PseudoVC_I_SE_M1;
858
    break;
859
  case 1:
860
    Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
861
                                                  : RISCV::PseudoVC_I_SE_M2;
862
    break;
863
  case 2:
864
    Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
865
                                                  : RISCV::PseudoVC_I_SE_M4;
866
    break;
867
  case 3:
868
    Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
869
                                                  : RISCV::PseudoVC_I_SE_M8;
870
    break;
871
  }
872

873
  ReplaceNode(Node, CurDAG->getMachineNode(
874
                        Opcode, DL, Node->getSimpleValueType(0), Operands));
875
}
876

877
void RISCVDAGToDAGISel::Select(SDNode *Node) {
878
  // If we have a custom node, we have already selected.
879
  if (Node->isMachineOpcode()) {
880
    LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
881
    Node->setNodeId(-1);
882
    return;
883
  }
884

885
  // Instruction Selection not handled by the auto-generated tablegen selection
886
  // should be handled here.
887
  unsigned Opcode = Node->getOpcode();
888
  MVT XLenVT = Subtarget->getXLenVT();
889
  SDLoc DL(Node);
890
  MVT VT = Node->getSimpleValueType(0);
891

892
  bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
893

894
  switch (Opcode) {
895
  case ISD::Constant: {
896
    assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
897
    auto *ConstNode = cast<ConstantSDNode>(Node);
898
    if (ConstNode->isZero()) {
899
      SDValue New =
900
          CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
901
      ReplaceNode(Node, New.getNode());
902
      return;
903
    }
904
    int64_t Imm = ConstNode->getSExtValue();
905
    // If only the lower 8 bits are used, try to convert this to a simm6 by
906
    // sign-extending bit 7. This is neutral without the C extension, and
907
    // allows C.LI to be used if C is present.
908
    if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
909
      Imm = SignExtend64<8>(Imm);
910
    // If the upper XLen-16 bits are not used, try to convert this to a simm12
911
    // by sign extending bit 15.
912
    if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
913
        hasAllHUsers(Node))
914
      Imm = SignExtend64<16>(Imm);
915
    // If the upper 32-bits are not used try to convert this into a simm32 by
916
    // sign extending bit 32.
917
    if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
918
      Imm = SignExtend64<32>(Imm);
919

920
    ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
921
    return;
922
  }
923
  case ISD::ConstantFP: {
924
    const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
925
    auto [FPImm, NeedsFNeg] =
926
        static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
927
                                                                        VT);
928
    if (FPImm >= 0) {
929
      unsigned Opc;
930
      unsigned FNegOpc;
931
      switch (VT.SimpleTy) {
932
      default:
933
        llvm_unreachable("Unexpected size");
934
      case MVT::f16:
935
        Opc = RISCV::FLI_H;
936
        FNegOpc = RISCV::FSGNJN_H;
937
        break;
938
      case MVT::f32:
939
        Opc = RISCV::FLI_S;
940
        FNegOpc = RISCV::FSGNJN_S;
941
        break;
942
      case MVT::f64:
943
        Opc = RISCV::FLI_D;
944
        FNegOpc = RISCV::FSGNJN_D;
945
        break;
946
      }
947
      SDNode *Res = CurDAG->getMachineNode(
948
          Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
949
      if (NeedsFNeg)
950
        Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
951
                                     SDValue(Res, 0));
952

953
      ReplaceNode(Node, Res);
954
      return;
955
    }
956

957
    bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
958
    SDValue Imm;
959
    // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
960
    // create an integer immediate.
961
    if (APF.isPosZero() || NegZeroF64)
962
      Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
963
    else
964
      Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
965
                      *Subtarget);
966

967
    bool HasZdinx = Subtarget->hasStdExtZdinx();
968
    bool Is64Bit = Subtarget->is64Bit();
969
    unsigned Opc;
970
    switch (VT.SimpleTy) {
971
    default:
972
      llvm_unreachable("Unexpected size");
973
    case MVT::bf16:
974
      assert(Subtarget->hasStdExtZfbfmin());
975
      Opc = RISCV::FMV_H_X;
976
      break;
977
    case MVT::f16:
978
      Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
979
      break;
980
    case MVT::f32:
981
      Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
982
      break;
983
    case MVT::f64:
984
      // For RV32, we can't move from a GPR, we need to convert instead. This
985
      // should only happen for +0.0 and -0.0.
986
      assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
987
      if (Is64Bit)
988
        Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
989
      else
990
        Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
991
      break;
992
    }
993

994
    SDNode *Res;
995
    if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
996
      Res = CurDAG->getMachineNode(
997
          Opc, DL, VT, Imm,
998
          CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
999
    else
1000
      Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1001

1002
    // For f64 -0.0, we need to insert a fneg.d idiom.
1003
    if (NegZeroF64) {
1004
      Opc = RISCV::FSGNJN_D;
1005
      if (HasZdinx)
1006
        Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1007
      Res =
1008
          CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1009
    }
1010

1011
    ReplaceNode(Node, Res);
1012
    return;
1013
  }
1014
  case RISCVISD::BuildPairF64: {
1015
    if (!Subtarget->hasStdExtZdinx())
1016
      break;
1017

1018
    assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1019

1020
    SDValue Ops[] = {
1021
        CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1022
        Node->getOperand(0),
1023
        CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1024
        Node->getOperand(1),
1025
        CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1026

1027
    SDNode *N =
1028
        CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1029
    ReplaceNode(Node, N);
1030
    return;
1031
  }
1032
  case RISCVISD::SplitF64: {
1033
    if (Subtarget->hasStdExtZdinx()) {
1034
      assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1035

1036
      if (!SDValue(Node, 0).use_empty()) {
1037
        SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1038
                                                    Node->getOperand(0));
1039
        ReplaceUses(SDValue(Node, 0), Lo);
1040
      }
1041

1042
      if (!SDValue(Node, 1).use_empty()) {
1043
        SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1044
                                                    Node->getOperand(0));
1045
        ReplaceUses(SDValue(Node, 1), Hi);
1046
      }
1047

1048
      CurDAG->RemoveDeadNode(Node);
1049
      return;
1050
    }
1051

1052
    if (!Subtarget->hasStdExtZfa())
1053
      break;
1054
    assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1055
           "Unexpected subtarget");
1056

1057
    // With Zfa, lower to fmv.x.w and fmvh.x.d.
1058
    if (!SDValue(Node, 0).use_empty()) {
1059
      SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1060
                                          Node->getOperand(0));
1061
      ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1062
    }
1063
    if (!SDValue(Node, 1).use_empty()) {
1064
      SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1065
                                          Node->getOperand(0));
1066
      ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1067
    }
1068

1069
    CurDAG->RemoveDeadNode(Node);
1070
    return;
1071
  }
1072
  case ISD::SHL: {
1073
    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1074
    if (!N1C)
1075
      break;
1076
    SDValue N0 = Node->getOperand(0);
1077
    if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1078
        !isa<ConstantSDNode>(N0.getOperand(1)))
1079
      break;
1080
    unsigned ShAmt = N1C->getZExtValue();
1081
    uint64_t Mask = N0.getConstantOperandVal(1);
1082

1083
    // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1084
    // 32 leading zeros and C3 trailing zeros.
1085
    if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1086
      unsigned XLen = Subtarget->getXLen();
1087
      unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1088
      unsigned TrailingZeros = llvm::countr_zero(Mask);
1089
      if (TrailingZeros > 0 && LeadingZeros == 32) {
1090
        SDNode *SRLIW = CurDAG->getMachineNode(
1091
            RISCV::SRLIW, DL, VT, N0->getOperand(0),
1092
            CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1093
        SDNode *SLLI = CurDAG->getMachineNode(
1094
            RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1095
            CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1096
        ReplaceNode(Node, SLLI);
1097
        return;
1098
      }
1099
    }
1100
    break;
1101
  }
1102
  case ISD::SRL: {
1103
    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1104
    if (!N1C)
1105
      break;
1106
    SDValue N0 = Node->getOperand(0);
1107
    if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1108
      break;
1109
    unsigned ShAmt = N1C->getZExtValue();
1110
    uint64_t Mask = N0.getConstantOperandVal(1);
1111

1112
    // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1113
    // 32 leading zeros and C3 trailing zeros.
1114
    if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1115
      unsigned XLen = Subtarget->getXLen();
1116
      unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1117
      unsigned TrailingZeros = llvm::countr_zero(Mask);
1118
      if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1119
        SDNode *SRLIW = CurDAG->getMachineNode(
1120
            RISCV::SRLIW, DL, VT, N0->getOperand(0),
1121
            CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1122
        SDNode *SLLI = CurDAG->getMachineNode(
1123
            RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1124
            CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1125
        ReplaceNode(Node, SLLI);
1126
        return;
1127
      }
1128
    }
1129

1130
    // Optimize (srl (and X, C2), C) ->
1131
    //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
1132
    // Where C2 is a mask with C3 trailing ones.
1133
    // Taking into account that the C2 may have had lower bits unset by
1134
    // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1135
    // This pattern occurs when type legalizing right shifts for types with
1136
    // less than XLen bits.
1137
    Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1138
    if (!isMask_64(Mask))
1139
      break;
1140
    unsigned TrailingOnes = llvm::countr_one(Mask);
1141
    if (ShAmt >= TrailingOnes)
1142
      break;
1143
    // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1144
    if (TrailingOnes == 32) {
1145
      SDNode *SRLI = CurDAG->getMachineNode(
1146
          Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1147
          N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1148
      ReplaceNode(Node, SRLI);
1149
      return;
1150
    }
1151

1152
    // Only do the remaining transforms if the AND has one use.
1153
    if (!N0.hasOneUse())
1154
      break;
1155

1156
    // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1157
    if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1158
      SDNode *BEXTI = CurDAG->getMachineNode(
1159
          Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1160
          N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1161
      ReplaceNode(Node, BEXTI);
1162
      return;
1163
    }
1164

1165
    unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1166
    SDNode *SLLI =
1167
        CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1168
                               CurDAG->getTargetConstant(LShAmt, DL, VT));
1169
    SDNode *SRLI = CurDAG->getMachineNode(
1170
        RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1171
        CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1172
    ReplaceNode(Node, SRLI);
1173
    return;
1174
  }
1175
  case ISD::SRA: {
1176
    if (trySignedBitfieldExtract(Node))
1177
      return;
1178

1179
    // Optimize (sra (sext_inreg X, i16), C) ->
1180
    //          (srai (slli X, (XLen-16), (XLen-16) + C)
1181
    // And      (sra (sext_inreg X, i8), C) ->
1182
    //          (srai (slli X, (XLen-8), (XLen-8) + C)
1183
    // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1184
    // This transform matches the code we get without Zbb. The shifts are more
1185
    // compressible, and this can help expose CSE opportunities in the sdiv by
1186
    // constant optimization.
1187
    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1188
    if (!N1C)
1189
      break;
1190
    SDValue N0 = Node->getOperand(0);
1191
    if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1192
      break;
1193
    unsigned ShAmt = N1C->getZExtValue();
1194
    unsigned ExtSize =
1195
        cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1196
    // ExtSize of 32 should use sraiw via tablegen pattern.
1197
    if (ExtSize >= 32 || ShAmt >= ExtSize)
1198
      break;
1199
    unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1200
    SDNode *SLLI =
1201
        CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1202
                               CurDAG->getTargetConstant(LShAmt, DL, VT));
1203
    SDNode *SRAI = CurDAG->getMachineNode(
1204
        RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1205
        CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1206
    ReplaceNode(Node, SRAI);
1207
    return;
1208
  }
1209
  case ISD::OR:
1210
  case ISD::XOR:
1211
    if (tryShrinkShlLogicImm(Node))
1212
      return;
1213

1214
    break;
1215
  case ISD::AND: {
1216
    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1217
    if (!N1C)
1218
      break;
1219
    uint64_t C1 = N1C->getZExtValue();
1220
    const bool isC1Mask = isMask_64(C1);
1221
    const bool isC1ANDI = isInt<12>(C1);
1222

1223
    SDValue N0 = Node->getOperand(0);
1224

1225
    auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1226
                                          SDValue X, unsigned Msb,
1227
                                          unsigned Lsb) {
1228
      if (!Subtarget->hasVendorXTHeadBb())
1229
        return false;
1230

1231
      SDNode *TH_EXTU = CurDAG->getMachineNode(
1232
          RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1233
          CurDAG->getTargetConstant(Lsb, DL, VT));
1234
      ReplaceNode(Node, TH_EXTU);
1235
      return true;
1236
    };
1237

1238
    bool LeftShift = N0.getOpcode() == ISD::SHL;
1239
    if (LeftShift || N0.getOpcode() == ISD::SRL) {
1240
      auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1241
      if (!C)
1242
        break;
1243
      unsigned C2 = C->getZExtValue();
1244
      unsigned XLen = Subtarget->getXLen();
1245
      assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1246

1247
      // Keep track of whether this is a c.andi. If we can't use c.andi, the
1248
      // shift pair might offer more compression opportunities.
1249
      // TODO: We could check for C extension here, but we don't have many lit
1250
      // tests with the C extension enabled so not checking gets better
1251
      // coverage.
1252
      // TODO: What if ANDI faster than shift?
1253
      bool IsCANDI = isInt<6>(N1C->getSExtValue());
1254

1255
      // Clear irrelevant bits in the mask.
1256
      if (LeftShift)
1257
        C1 &= maskTrailingZeros<uint64_t>(C2);
1258
      else
1259
        C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1260

1261
      // Some transforms should only be done if the shift has a single use or
1262
      // the AND would become (srli (slli X, 32), 32)
1263
      bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1264

1265
      SDValue X = N0.getOperand(0);
1266

1267
      // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1268
      // with c3 leading zeros.
1269
      if (!LeftShift && isC1Mask) {
1270
        unsigned Leading = XLen - llvm::bit_width(C1);
1271
        if (C2 < Leading) {
1272
          // If the number of leading zeros is C2+32 this can be SRLIW.
1273
          if (C2 + 32 == Leading) {
1274
            SDNode *SRLIW = CurDAG->getMachineNode(
1275
                RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1276
            ReplaceNode(Node, SRLIW);
1277
            return;
1278
          }
1279

1280
          // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1281
          // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1282
          //
1283
          // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1284
          // legalized and goes through DAG combine.
1285
          if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1286
              X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287
              cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1288
            SDNode *SRAIW =
1289
                CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1290
                                       CurDAG->getTargetConstant(31, DL, VT));
1291
            SDNode *SRLIW = CurDAG->getMachineNode(
1292
                RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1293
                CurDAG->getTargetConstant(Leading - 32, DL, VT));
1294
            ReplaceNode(Node, SRLIW);
1295
            return;
1296
          }
1297

1298
          // Try to use an unsigned bitfield extract (e.g., th.extu) if
1299
          // available.
1300
          // Transform (and (srl x, C2), C1)
1301
          //        -> (<bfextract> x, msb, lsb)
1302
          //
1303
          // Make sure to keep this below the SRLIW cases, as we always want to
1304
          // prefer the more common instruction.
1305
          const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1306
          const unsigned Lsb = C2;
1307
          if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1308
            return;
1309

1310
          // (srli (slli x, c3-c2), c3).
1311
          // Skip if we could use (zext.w (sraiw X, C2)).
1312
          bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1313
                      X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1314
                      cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1315
          // Also Skip if we can use bexti or th.tst.
1316
          Skip |= HasBitTest && Leading == XLen - 1;
1317
          if (OneUseOrZExtW && !Skip) {
1318
            SDNode *SLLI = CurDAG->getMachineNode(
1319
                RISCV::SLLI, DL, VT, X,
1320
                CurDAG->getTargetConstant(Leading - C2, DL, VT));
1321
            SDNode *SRLI = CurDAG->getMachineNode(
1322
                RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1323
                CurDAG->getTargetConstant(Leading, DL, VT));
1324
            ReplaceNode(Node, SRLI);
1325
            return;
1326
          }
1327
        }
1328
      }
1329

1330
      // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1331
      // shifted by c2 bits with c3 leading zeros.
1332
      if (LeftShift && isShiftedMask_64(C1)) {
1333
        unsigned Leading = XLen - llvm::bit_width(C1);
1334

1335
        if (C2 + Leading < XLen &&
1336
            C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1337
          // Use slli.uw when possible.
1338
          if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1339
            SDNode *SLLI_UW =
1340
                CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1341
                                       CurDAG->getTargetConstant(C2, DL, VT));
1342
            ReplaceNode(Node, SLLI_UW);
1343
            return;
1344
          }
1345

1346
          // (srli (slli c2+c3), c3)
1347
          if (OneUseOrZExtW && !IsCANDI) {
1348
            SDNode *SLLI = CurDAG->getMachineNode(
1349
                RISCV::SLLI, DL, VT, X,
1350
                CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1351
            SDNode *SRLI = CurDAG->getMachineNode(
1352
                RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1353
                CurDAG->getTargetConstant(Leading, DL, VT));
1354
            ReplaceNode(Node, SRLI);
1355
            return;
1356
          }
1357
        }
1358
      }
1359

1360
      // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1361
      // shifted mask with c2 leading zeros and c3 trailing zeros.
1362
      if (!LeftShift && isShiftedMask_64(C1)) {
1363
        unsigned Leading = XLen - llvm::bit_width(C1);
1364
        unsigned Trailing = llvm::countr_zero(C1);
1365
        if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1366
            !IsCANDI) {
1367
          unsigned SrliOpc = RISCV::SRLI;
1368
          // If the input is zexti32 we should use SRLIW.
1369
          if (X.getOpcode() == ISD::AND &&
1370
              isa<ConstantSDNode>(X.getOperand(1)) &&
1371
              X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1372
            SrliOpc = RISCV::SRLIW;
1373
            X = X.getOperand(0);
1374
          }
1375
          SDNode *SRLI = CurDAG->getMachineNode(
1376
              SrliOpc, DL, VT, X,
1377
              CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1378
          SDNode *SLLI = CurDAG->getMachineNode(
1379
              RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1380
              CurDAG->getTargetConstant(Trailing, DL, VT));
1381
          ReplaceNode(Node, SLLI);
1382
          return;
1383
        }
1384
        // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1385
        if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1386
            OneUseOrZExtW && !IsCANDI) {
1387
          SDNode *SRLIW = CurDAG->getMachineNode(
1388
              RISCV::SRLIW, DL, VT, X,
1389
              CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1390
          SDNode *SLLI = CurDAG->getMachineNode(
1391
              RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1392
              CurDAG->getTargetConstant(Trailing, DL, VT));
1393
          ReplaceNode(Node, SLLI);
1394
          return;
1395
        }
1396
      }
1397

1398
      // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1399
      // shifted mask with no leading zeros and c3 trailing zeros.
1400
      if (LeftShift && isShiftedMask_64(C1)) {
1401
        unsigned Leading = XLen - llvm::bit_width(C1);
1402
        unsigned Trailing = llvm::countr_zero(C1);
1403
        if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1404
          SDNode *SRLI = CurDAG->getMachineNode(
1405
              RISCV::SRLI, DL, VT, X,
1406
              CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1407
          SDNode *SLLI = CurDAG->getMachineNode(
1408
              RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1409
              CurDAG->getTargetConstant(Trailing, DL, VT));
1410
          ReplaceNode(Node, SLLI);
1411
          return;
1412
        }
1413
        // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1414
        if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1415
          SDNode *SRLIW = CurDAG->getMachineNode(
1416
              RISCV::SRLIW, DL, VT, X,
1417
              CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1418
          SDNode *SLLI = CurDAG->getMachineNode(
1419
              RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1420
              CurDAG->getTargetConstant(Trailing, DL, VT));
1421
          ReplaceNode(Node, SLLI);
1422
          return;
1423
        }
1424

1425
        // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1426
        if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1427
            Subtarget->hasStdExtZba()) {
1428
          SDNode *SRLI = CurDAG->getMachineNode(
1429
              RISCV::SRLI, DL, VT, X,
1430
              CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1431
          SDNode *SLLI_UW = CurDAG->getMachineNode(
1432
              RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1433
              CurDAG->getTargetConstant(Trailing, DL, VT));
1434
          ReplaceNode(Node, SLLI_UW);
1435
          return;
1436
        }
1437
      }
1438
    }
1439

1440
    // If C1 masks off the upper bits only (but can't be formed as an
1441
    // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1442
    // available.
1443
    // Transform (and x, C1)
1444
    //        -> (<bfextract> x, msb, lsb)
1445
    if (isC1Mask && !isC1ANDI) {
1446
      const unsigned Msb = llvm::bit_width(C1) - 1;
1447
      if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1448
        return;
1449
    }
1450

1451
    if (tryShrinkShlLogicImm(Node))
1452
      return;
1453

1454
    break;
1455
  }
1456
  case ISD::MUL: {
1457
    // Special case for calculating (mul (and X, C2), C1) where the full product
1458
    // fits in XLen bits. We can shift X left by the number of leading zeros in
1459
    // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1460
    // product has XLen trailing zeros, putting it in the output of MULHU. This
1461
    // can avoid materializing a constant in a register for C2.
1462

1463
    // RHS should be a constant.
1464
    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1465
    if (!N1C || !N1C->hasOneUse())
1466
      break;
1467

1468
    // LHS should be an AND with constant.
1469
    SDValue N0 = Node->getOperand(0);
1470
    if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1471
      break;
1472

1473
    uint64_t C2 = N0.getConstantOperandVal(1);
1474

1475
    // Constant should be a mask.
1476
    if (!isMask_64(C2))
1477
      break;
1478

1479
    // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1480
    // multiple users or the constant is a simm12. This prevents inserting a
1481
    // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1482
    // make it more costly to materialize. Otherwise, using a SLLI might allow
1483
    // it to be compressed.
1484
    bool IsANDIOrZExt =
1485
        isInt<12>(C2) ||
1486
        (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1487
    // With XTHeadBb, we can use TH.EXTU.
1488
    IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1489
    if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1490
      break;
1491
    // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1492
    // the constant is a simm32.
1493
    bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1494
    // With XTHeadBb, we can use TH.EXTU.
1495
    IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1496
    if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1497
      break;
1498

1499
    // We need to shift left the AND input and C1 by a total of XLen bits.
1500

1501
    // How far left do we need to shift the AND input?
1502
    unsigned XLen = Subtarget->getXLen();
1503
    unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1504

1505
    // The constant gets shifted by the remaining amount unless that would
1506
    // shift bits out.
1507
    uint64_t C1 = N1C->getZExtValue();
1508
    unsigned ConstantShift = XLen - LeadingZeros;
1509
    if (ConstantShift > (XLen - llvm::bit_width(C1)))
1510
      break;
1511

1512
    uint64_t ShiftedC1 = C1 << ConstantShift;
1513
    // If this RV32, we need to sign extend the constant.
1514
    if (XLen == 32)
1515
      ShiftedC1 = SignExtend64<32>(ShiftedC1);
1516

1517
    // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1518
    SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1519
    SDNode *SLLI =
1520
        CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1521
                               CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1522
    SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1523
                                           SDValue(SLLI, 0), SDValue(Imm, 0));
1524
    ReplaceNode(Node, MULHU);
1525
    return;
1526
  }
1527
  case ISD::LOAD: {
1528
    if (tryIndexedLoad(Node))
1529
      return;
1530

1531
    if (Subtarget->hasVendorXCVmem()) {
1532
      // We match post-incrementing load here
1533
      LoadSDNode *Load = cast<LoadSDNode>(Node);
1534
      if (Load->getAddressingMode() != ISD::POST_INC)
1535
        break;
1536

1537
      SDValue Chain = Node->getOperand(0);
1538
      SDValue Base = Node->getOperand(1);
1539
      SDValue Offset = Node->getOperand(2);
1540

1541
      bool Simm12 = false;
1542
      bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1543

1544
      if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1545
        int ConstantVal = ConstantOffset->getSExtValue();
1546
        Simm12 = isInt<12>(ConstantVal);
1547
        if (Simm12)
1548
          Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1549
                                             Offset.getValueType());
1550
      }
1551

1552
      unsigned Opcode = 0;
1553
      switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1554
      case MVT::i8:
1555
        if (Simm12 && SignExtend)
1556
          Opcode = RISCV::CV_LB_ri_inc;
1557
        else if (Simm12 && !SignExtend)
1558
          Opcode = RISCV::CV_LBU_ri_inc;
1559
        else if (!Simm12 && SignExtend)
1560
          Opcode = RISCV::CV_LB_rr_inc;
1561
        else
1562
          Opcode = RISCV::CV_LBU_rr_inc;
1563
        break;
1564
      case MVT::i16:
1565
        if (Simm12 && SignExtend)
1566
          Opcode = RISCV::CV_LH_ri_inc;
1567
        else if (Simm12 && !SignExtend)
1568
          Opcode = RISCV::CV_LHU_ri_inc;
1569
        else if (!Simm12 && SignExtend)
1570
          Opcode = RISCV::CV_LH_rr_inc;
1571
        else
1572
          Opcode = RISCV::CV_LHU_rr_inc;
1573
        break;
1574
      case MVT::i32:
1575
        if (Simm12)
1576
          Opcode = RISCV::CV_LW_ri_inc;
1577
        else
1578
          Opcode = RISCV::CV_LW_rr_inc;
1579
        break;
1580
      default:
1581
        break;
1582
      }
1583
      if (!Opcode)
1584
        break;
1585

1586
      ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1587
                                               Chain.getSimpleValueType(), Base,
1588
                                               Offset, Chain));
1589
      return;
1590
    }
1591
    break;
1592
  }
1593
  case ISD::INTRINSIC_WO_CHAIN: {
1594
    unsigned IntNo = Node->getConstantOperandVal(0);
1595
    switch (IntNo) {
1596
      // By default we do not custom select any intrinsic.
1597
    default:
1598
      break;
1599
    case Intrinsic::riscv_vmsgeu:
1600
    case Intrinsic::riscv_vmsge: {
1601
      SDValue Src1 = Node->getOperand(1);
1602
      SDValue Src2 = Node->getOperand(2);
1603
      bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1604
      bool IsCmpUnsignedZero = false;
1605
      // Only custom select scalar second operand.
1606
      if (Src2.getValueType() != XLenVT)
1607
        break;
1608
      // Small constants are handled with patterns.
1609
      if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1610
        int64_t CVal = C->getSExtValue();
1611
        if (CVal >= -15 && CVal <= 16) {
1612
          if (!IsUnsigned || CVal != 0)
1613
            break;
1614
          IsCmpUnsignedZero = true;
1615
        }
1616
      }
1617
      MVT Src1VT = Src1.getSimpleValueType();
1618
      unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1619
      switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1620
      default:
1621
        llvm_unreachable("Unexpected LMUL!");
1622
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)            \
1623
  case RISCVII::VLMUL::lmulenum:                                               \
1624
    VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1625
                             : RISCV::PseudoVMSLT_VX_##suffix;                 \
1626
    VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
1627
    VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
1628
    break;
1629
        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1630
        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1631
        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1632
        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
1633
        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1634
        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1635
        CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1636
#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1637
      }
1638
      SDValue SEW = CurDAG->getTargetConstant(
1639
          Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1640
      SDValue VL;
1641
      selectVLOp(Node->getOperand(3), VL);
1642

1643
      // If vmsgeu with 0 immediate, expand it to vmset.
1644
      if (IsCmpUnsignedZero) {
1645
        ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1646
        return;
1647
      }
1648

1649
      // Expand to
1650
      // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1651
      SDValue Cmp = SDValue(
1652
          CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1653
          0);
1654
      ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1655
                                               {Cmp, Cmp, VL, SEW}));
1656
      return;
1657
    }
1658
    case Intrinsic::riscv_vmsgeu_mask:
1659
    case Intrinsic::riscv_vmsge_mask: {
1660
      SDValue Src1 = Node->getOperand(2);
1661
      SDValue Src2 = Node->getOperand(3);
1662
      bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1663
      bool IsCmpUnsignedZero = false;
1664
      // Only custom select scalar second operand.
1665
      if (Src2.getValueType() != XLenVT)
1666
        break;
1667
      // Small constants are handled with patterns.
1668
      if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1669
        int64_t CVal = C->getSExtValue();
1670
        if (CVal >= -15 && CVal <= 16) {
1671
          if (!IsUnsigned || CVal != 0)
1672
            break;
1673
          IsCmpUnsignedZero = true;
1674
        }
1675
      }
1676
      MVT Src1VT = Src1.getSimpleValueType();
1677
      unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1678
          VMOROpcode;
1679
      switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1680
      default:
1681
        llvm_unreachable("Unexpected LMUL!");
1682
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)                         \
1683
  case RISCVII::VLMUL::lmulenum:                                               \
1684
    VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1685
                             : RISCV::PseudoVMSLT_VX_##suffix;                 \
1686
    VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1687
                                 : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1688
    break;
1689
        CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1690
        CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1691
        CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1692
        CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1693
        CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1694
        CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1695
        CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1696
#undef CASE_VMSLT_OPCODES
1697
      }
1698
      // Mask operations use the LMUL from the mask type.
1699
      switch (RISCVTargetLowering::getLMUL(VT)) {
1700
      default:
1701
        llvm_unreachable("Unexpected LMUL!");
1702
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)                       \
1703
  case RISCVII::VLMUL::lmulenum:                                               \
1704
    VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1705
    VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1706
    VMOROpcode = RISCV::PseudoVMOR_MM_##suffix;                                \
1707
    break;
1708
        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1709
        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1710
        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1711
        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
1712
        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
1713
        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
1714
        CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
1715
#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1716
      }
1717
      SDValue SEW = CurDAG->getTargetConstant(
1718
          Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1719
      SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1720
      SDValue VL;
1721
      selectVLOp(Node->getOperand(5), VL);
1722
      SDValue MaskedOff = Node->getOperand(1);
1723
      SDValue Mask = Node->getOperand(4);
1724

1725
      // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1726
      if (IsCmpUnsignedZero) {
1727
        // We don't need vmor if the MaskedOff and the Mask are the same
1728
        // value.
1729
        if (Mask == MaskedOff) {
1730
          ReplaceUses(Node, Mask.getNode());
1731
          return;
1732
        }
1733
        ReplaceNode(Node,
1734
                    CurDAG->getMachineNode(VMOROpcode, DL, VT,
1735
                                           {Mask, MaskedOff, VL, MaskSEW}));
1736
        return;
1737
      }
1738

1739
      // If the MaskedOff value and the Mask are the same value use
1740
      // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1741
      // This avoids needing to copy v0 to vd before starting the next sequence.
1742
      if (Mask == MaskedOff) {
1743
        SDValue Cmp = SDValue(
1744
            CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1745
            0);
1746
        ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1747
                                                 {Mask, Cmp, VL, MaskSEW}));
1748
        return;
1749
      }
1750

1751
      // Mask needs to be copied to V0.
1752
      SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1753
                                           RISCV::V0, Mask, SDValue());
1754
      SDValue Glue = Chain.getValue(1);
1755
      SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1756

1757
      // Otherwise use
1758
      // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1759
      // The result is mask undisturbed.
1760
      // We use the same instructions to emulate mask agnostic behavior, because
1761
      // the agnostic result can be either undisturbed or all 1.
1762
      SDValue Cmp = SDValue(
1763
          CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1764
                                 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1765
          0);
1766
      // vmxor.mm vd, vd, v0 is used to update active value.
1767
      ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1768
                                               {Cmp, Mask, VL, MaskSEW}));
1769
      return;
1770
    }
1771
    case Intrinsic::riscv_vsetvli:
1772
    case Intrinsic::riscv_vsetvlimax:
1773
      return selectVSETVLI(Node);
1774
    }
1775
    break;
1776
  }
1777
  case ISD::INTRINSIC_W_CHAIN: {
1778
    unsigned IntNo = Node->getConstantOperandVal(1);
1779
    switch (IntNo) {
1780
      // By default we do not custom select any intrinsic.
1781
    default:
1782
      break;
1783
    case Intrinsic::riscv_vlseg2:
1784
    case Intrinsic::riscv_vlseg3:
1785
    case Intrinsic::riscv_vlseg4:
1786
    case Intrinsic::riscv_vlseg5:
1787
    case Intrinsic::riscv_vlseg6:
1788
    case Intrinsic::riscv_vlseg7:
1789
    case Intrinsic::riscv_vlseg8: {
1790
      selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1791
      return;
1792
    }
1793
    case Intrinsic::riscv_vlseg2_mask:
1794
    case Intrinsic::riscv_vlseg3_mask:
1795
    case Intrinsic::riscv_vlseg4_mask:
1796
    case Intrinsic::riscv_vlseg5_mask:
1797
    case Intrinsic::riscv_vlseg6_mask:
1798
    case Intrinsic::riscv_vlseg7_mask:
1799
    case Intrinsic::riscv_vlseg8_mask: {
1800
      selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1801
      return;
1802
    }
1803
    case Intrinsic::riscv_vlsseg2:
1804
    case Intrinsic::riscv_vlsseg3:
1805
    case Intrinsic::riscv_vlsseg4:
1806
    case Intrinsic::riscv_vlsseg5:
1807
    case Intrinsic::riscv_vlsseg6:
1808
    case Intrinsic::riscv_vlsseg7:
1809
    case Intrinsic::riscv_vlsseg8: {
1810
      selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1811
      return;
1812
    }
1813
    case Intrinsic::riscv_vlsseg2_mask:
1814
    case Intrinsic::riscv_vlsseg3_mask:
1815
    case Intrinsic::riscv_vlsseg4_mask:
1816
    case Intrinsic::riscv_vlsseg5_mask:
1817
    case Intrinsic::riscv_vlsseg6_mask:
1818
    case Intrinsic::riscv_vlsseg7_mask:
1819
    case Intrinsic::riscv_vlsseg8_mask: {
1820
      selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1821
      return;
1822
    }
1823
    case Intrinsic::riscv_vloxseg2:
1824
    case Intrinsic::riscv_vloxseg3:
1825
    case Intrinsic::riscv_vloxseg4:
1826
    case Intrinsic::riscv_vloxseg5:
1827
    case Intrinsic::riscv_vloxseg6:
1828
    case Intrinsic::riscv_vloxseg7:
1829
    case Intrinsic::riscv_vloxseg8:
1830
      selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1831
      return;
1832
    case Intrinsic::riscv_vluxseg2:
1833
    case Intrinsic::riscv_vluxseg3:
1834
    case Intrinsic::riscv_vluxseg4:
1835
    case Intrinsic::riscv_vluxseg5:
1836
    case Intrinsic::riscv_vluxseg6:
1837
    case Intrinsic::riscv_vluxseg7:
1838
    case Intrinsic::riscv_vluxseg8:
1839
      selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1840
      return;
1841
    case Intrinsic::riscv_vloxseg2_mask:
1842
    case Intrinsic::riscv_vloxseg3_mask:
1843
    case Intrinsic::riscv_vloxseg4_mask:
1844
    case Intrinsic::riscv_vloxseg5_mask:
1845
    case Intrinsic::riscv_vloxseg6_mask:
1846
    case Intrinsic::riscv_vloxseg7_mask:
1847
    case Intrinsic::riscv_vloxseg8_mask:
1848
      selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1849
      return;
1850
    case Intrinsic::riscv_vluxseg2_mask:
1851
    case Intrinsic::riscv_vluxseg3_mask:
1852
    case Intrinsic::riscv_vluxseg4_mask:
1853
    case Intrinsic::riscv_vluxseg5_mask:
1854
    case Intrinsic::riscv_vluxseg6_mask:
1855
    case Intrinsic::riscv_vluxseg7_mask:
1856
    case Intrinsic::riscv_vluxseg8_mask:
1857
      selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1858
      return;
1859
    case Intrinsic::riscv_vlseg8ff:
1860
    case Intrinsic::riscv_vlseg7ff:
1861
    case Intrinsic::riscv_vlseg6ff:
1862
    case Intrinsic::riscv_vlseg5ff:
1863
    case Intrinsic::riscv_vlseg4ff:
1864
    case Intrinsic::riscv_vlseg3ff:
1865
    case Intrinsic::riscv_vlseg2ff: {
1866
      selectVLSEGFF(Node, /*IsMasked*/ false);
1867
      return;
1868
    }
1869
    case Intrinsic::riscv_vlseg8ff_mask:
1870
    case Intrinsic::riscv_vlseg7ff_mask:
1871
    case Intrinsic::riscv_vlseg6ff_mask:
1872
    case Intrinsic::riscv_vlseg5ff_mask:
1873
    case Intrinsic::riscv_vlseg4ff_mask:
1874
    case Intrinsic::riscv_vlseg3ff_mask:
1875
    case Intrinsic::riscv_vlseg2ff_mask: {
1876
      selectVLSEGFF(Node, /*IsMasked*/ true);
1877
      return;
1878
    }
1879
    case Intrinsic::riscv_vloxei:
1880
    case Intrinsic::riscv_vloxei_mask:
1881
    case Intrinsic::riscv_vluxei:
1882
    case Intrinsic::riscv_vluxei_mask: {
1883
      bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1884
                      IntNo == Intrinsic::riscv_vluxei_mask;
1885
      bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1886
                       IntNo == Intrinsic::riscv_vloxei_mask;
1887

1888
      MVT VT = Node->getSimpleValueType(0);
1889
      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1890

1891
      unsigned CurOp = 2;
1892
      SmallVector<SDValue, 8> Operands;
1893
      Operands.push_back(Node->getOperand(CurOp++));
1894

1895
      MVT IndexVT;
1896
      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1897
                                 /*IsStridedOrIndexed*/ true, Operands,
1898
                                 /*IsLoad=*/true, &IndexVT);
1899

1900
      assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1901
             "Element count mismatch");
1902

1903
      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1904
      RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1905
      unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1906
      if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1907
        report_fatal_error("The V extension does not support EEW=64 for index "
1908
                           "values when XLEN=32");
1909
      }
1910
      const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1911
          IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1912
          static_cast<unsigned>(IndexLMUL));
1913
      MachineSDNode *Load =
1914
          CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1915

1916
      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1917
        CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1918

1919
      ReplaceNode(Node, Load);
1920
      return;
1921
    }
1922
    case Intrinsic::riscv_vlm:
1923
    case Intrinsic::riscv_vle:
1924
    case Intrinsic::riscv_vle_mask:
1925
    case Intrinsic::riscv_vlse:
1926
    case Intrinsic::riscv_vlse_mask: {
1927
      bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1928
                      IntNo == Intrinsic::riscv_vlse_mask;
1929
      bool IsStrided =
1930
          IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1931

1932
      MVT VT = Node->getSimpleValueType(0);
1933
      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1934

1935
      // The riscv_vlm intrinsic are always tail agnostic and no passthru
1936
      // operand at the IR level.  In pseudos, they have both policy and
1937
      // passthru operand. The passthru operand is needed to track the
1938
      // "tail undefined" state, and the policy is there just for
1939
      // for consistency - it will always be "don't care" for the
1940
      // unmasked form.
1941
      bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1942
      unsigned CurOp = 2;
1943
      SmallVector<SDValue, 8> Operands;
1944
      if (HasPassthruOperand)
1945
        Operands.push_back(Node->getOperand(CurOp++));
1946
      else {
1947
        // We eagerly lower to implicit_def (instead of undef), as we
1948
        // otherwise fail to select nodes such as: nxv1i1 = undef
1949
        SDNode *Passthru =
1950
          CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1951
        Operands.push_back(SDValue(Passthru, 0));
1952
      }
1953
      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1954
                                 Operands, /*IsLoad=*/true);
1955

1956
      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1957
      const RISCV::VLEPseudo *P =
1958
          RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1959
                              static_cast<unsigned>(LMUL));
1960
      MachineSDNode *Load =
1961
          CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1962

1963
      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1964
        CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1965

1966
      ReplaceNode(Node, Load);
1967
      return;
1968
    }
1969
    case Intrinsic::riscv_vleff:
1970
    case Intrinsic::riscv_vleff_mask: {
1971
      bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1972

1973
      MVT VT = Node->getSimpleValueType(0);
1974
      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1975

1976
      unsigned CurOp = 2;
1977
      SmallVector<SDValue, 7> Operands;
1978
      Operands.push_back(Node->getOperand(CurOp++));
1979
      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1980
                                 /*IsStridedOrIndexed*/ false, Operands,
1981
                                 /*IsLoad=*/true);
1982

1983
      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1984
      const RISCV::VLEPseudo *P =
1985
          RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1986
                              Log2SEW, static_cast<unsigned>(LMUL));
1987
      MachineSDNode *Load = CurDAG->getMachineNode(
1988
          P->Pseudo, DL, Node->getVTList(), Operands);
1989
      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1990
        CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1991

1992
      ReplaceNode(Node, Load);
1993
      return;
1994
    }
1995
    }
1996
    break;
1997
  }
1998
  case ISD::INTRINSIC_VOID: {
1999
    unsigned IntNo = Node->getConstantOperandVal(1);
2000
    switch (IntNo) {
2001
    case Intrinsic::riscv_vsseg2:
2002
    case Intrinsic::riscv_vsseg3:
2003
    case Intrinsic::riscv_vsseg4:
2004
    case Intrinsic::riscv_vsseg5:
2005
    case Intrinsic::riscv_vsseg6:
2006
    case Intrinsic::riscv_vsseg7:
2007
    case Intrinsic::riscv_vsseg8: {
2008
      selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
2009
      return;
2010
    }
2011
    case Intrinsic::riscv_vsseg2_mask:
2012
    case Intrinsic::riscv_vsseg3_mask:
2013
    case Intrinsic::riscv_vsseg4_mask:
2014
    case Intrinsic::riscv_vsseg5_mask:
2015
    case Intrinsic::riscv_vsseg6_mask:
2016
    case Intrinsic::riscv_vsseg7_mask:
2017
    case Intrinsic::riscv_vsseg8_mask: {
2018
      selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
2019
      return;
2020
    }
2021
    case Intrinsic::riscv_vssseg2:
2022
    case Intrinsic::riscv_vssseg3:
2023
    case Intrinsic::riscv_vssseg4:
2024
    case Intrinsic::riscv_vssseg5:
2025
    case Intrinsic::riscv_vssseg6:
2026
    case Intrinsic::riscv_vssseg7:
2027
    case Intrinsic::riscv_vssseg8: {
2028
      selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
2029
      return;
2030
    }
2031
    case Intrinsic::riscv_vssseg2_mask:
2032
    case Intrinsic::riscv_vssseg3_mask:
2033
    case Intrinsic::riscv_vssseg4_mask:
2034
    case Intrinsic::riscv_vssseg5_mask:
2035
    case Intrinsic::riscv_vssseg6_mask:
2036
    case Intrinsic::riscv_vssseg7_mask:
2037
    case Intrinsic::riscv_vssseg8_mask: {
2038
      selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
2039
      return;
2040
    }
2041
    case Intrinsic::riscv_vsoxseg2:
2042
    case Intrinsic::riscv_vsoxseg3:
2043
    case Intrinsic::riscv_vsoxseg4:
2044
    case Intrinsic::riscv_vsoxseg5:
2045
    case Intrinsic::riscv_vsoxseg6:
2046
    case Intrinsic::riscv_vsoxseg7:
2047
    case Intrinsic::riscv_vsoxseg8:
2048
      selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
2049
      return;
2050
    case Intrinsic::riscv_vsuxseg2:
2051
    case Intrinsic::riscv_vsuxseg3:
2052
    case Intrinsic::riscv_vsuxseg4:
2053
    case Intrinsic::riscv_vsuxseg5:
2054
    case Intrinsic::riscv_vsuxseg6:
2055
    case Intrinsic::riscv_vsuxseg7:
2056
    case Intrinsic::riscv_vsuxseg8:
2057
      selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
2058
      return;
2059
    case Intrinsic::riscv_vsoxseg2_mask:
2060
    case Intrinsic::riscv_vsoxseg3_mask:
2061
    case Intrinsic::riscv_vsoxseg4_mask:
2062
    case Intrinsic::riscv_vsoxseg5_mask:
2063
    case Intrinsic::riscv_vsoxseg6_mask:
2064
    case Intrinsic::riscv_vsoxseg7_mask:
2065
    case Intrinsic::riscv_vsoxseg8_mask:
2066
      selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
2067
      return;
2068
    case Intrinsic::riscv_vsuxseg2_mask:
2069
    case Intrinsic::riscv_vsuxseg3_mask:
2070
    case Intrinsic::riscv_vsuxseg4_mask:
2071
    case Intrinsic::riscv_vsuxseg5_mask:
2072
    case Intrinsic::riscv_vsuxseg6_mask:
2073
    case Intrinsic::riscv_vsuxseg7_mask:
2074
    case Intrinsic::riscv_vsuxseg8_mask:
2075
      selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
2076
      return;
2077
    case Intrinsic::riscv_vsoxei:
2078
    case Intrinsic::riscv_vsoxei_mask:
2079
    case Intrinsic::riscv_vsuxei:
2080
    case Intrinsic::riscv_vsuxei_mask: {
2081
      bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2082
                      IntNo == Intrinsic::riscv_vsuxei_mask;
2083
      bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2084
                       IntNo == Intrinsic::riscv_vsoxei_mask;
2085

2086
      MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2087
      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2088

2089
      unsigned CurOp = 2;
2090
      SmallVector<SDValue, 8> Operands;
2091
      Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2092

2093
      MVT IndexVT;
2094
      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2095
                                 /*IsStridedOrIndexed*/ true, Operands,
2096
                                 /*IsLoad=*/false, &IndexVT);
2097

2098
      assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2099
             "Element count mismatch");
2100

2101
      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2102
      RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2103
      unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2104
      if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2105
        report_fatal_error("The V extension does not support EEW=64 for index "
2106
                           "values when XLEN=32");
2107
      }
2108
      const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2109
          IsMasked, IsOrdered, IndexLog2EEW,
2110
          static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2111
      MachineSDNode *Store =
2112
          CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2113

2114
      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2115
        CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2116

2117
      ReplaceNode(Node, Store);
2118
      return;
2119
    }
2120
    case Intrinsic::riscv_vsm:
2121
    case Intrinsic::riscv_vse:
2122
    case Intrinsic::riscv_vse_mask:
2123
    case Intrinsic::riscv_vsse:
2124
    case Intrinsic::riscv_vsse_mask: {
2125
      bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2126
                      IntNo == Intrinsic::riscv_vsse_mask;
2127
      bool IsStrided =
2128
          IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2129

2130
      MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2131
      unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2132

2133
      unsigned CurOp = 2;
2134
      SmallVector<SDValue, 8> Operands;
2135
      Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2136

2137
      addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2138
                                 Operands);
2139

2140
      RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2141
      const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2142
          IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2143
      MachineSDNode *Store =
2144
          CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2145
      if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2146
        CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2147

2148
      ReplaceNode(Node, Store);
2149
      return;
2150
    }
2151
    case Intrinsic::riscv_sf_vc_x_se:
2152
    case Intrinsic::riscv_sf_vc_i_se:
2153
      selectSF_VC_X_SE(Node);
2154
      return;
2155
    }
2156
    break;
2157
  }
2158
  case ISD::BITCAST: {
2159
    MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2160
    // Just drop bitcasts between vectors if both are fixed or both are
2161
    // scalable.
2162
    if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2163
        (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2164
      ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2165
      CurDAG->RemoveDeadNode(Node);
2166
      return;
2167
    }
2168
    break;
2169
  }
2170
  case ISD::INSERT_SUBVECTOR: {
2171
    SDValue V = Node->getOperand(0);
2172
    SDValue SubV = Node->getOperand(1);
2173
    SDLoc DL(SubV);
2174
    auto Idx = Node->getConstantOperandVal(2);
2175
    MVT SubVecVT = SubV.getSimpleValueType();
2176

2177
    const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2178
    MVT SubVecContainerVT = SubVecVT;
2179
    // Establish the correct scalable-vector types for any fixed-length type.
2180
    if (SubVecVT.isFixedLengthVector()) {
2181
      SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2182
      TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2183
      [[maybe_unused]] bool ExactlyVecRegSized =
2184
          Subtarget->expandVScale(SubVecVT.getSizeInBits())
2185
              .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2186
      assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2187
                               .getKnownMinValue()));
2188
      assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2189
    }
2190
    MVT ContainerVT = VT;
2191
    if (VT.isFixedLengthVector())
2192
      ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2193

2194
    const auto *TRI = Subtarget->getRegisterInfo();
2195
    unsigned SubRegIdx;
2196
    std::tie(SubRegIdx, Idx) =
2197
        RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2198
            ContainerVT, SubVecContainerVT, Idx, TRI);
2199

2200
    // If the Idx hasn't been completely eliminated then this is a subvector
2201
    // insert which doesn't naturally align to a vector register. These must
2202
    // be handled using instructions to manipulate the vector registers.
2203
    if (Idx != 0)
2204
      break;
2205

2206
    RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2207
    [[maybe_unused]] bool IsSubVecPartReg =
2208
        SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2209
        SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2210
        SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2211
    assert((!IsSubVecPartReg || V.isUndef()) &&
2212
           "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2213
           "the subvector is smaller than a full-sized register");
2214

2215
    // If we haven't set a SubRegIdx, then we must be going between
2216
    // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2217
    if (SubRegIdx == RISCV::NoSubRegister) {
2218
      unsigned InRegClassID =
2219
          RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT);
2220
      assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2221
                 InRegClassID &&
2222
             "Unexpected subvector extraction");
2223
      SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2224
      SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2225
                                               DL, VT, SubV, RC);
2226
      ReplaceNode(Node, NewNode);
2227
      return;
2228
    }
2229

2230
    SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2231
    ReplaceNode(Node, Insert.getNode());
2232
    return;
2233
  }
2234
  case ISD::EXTRACT_SUBVECTOR: {
2235
    SDValue V = Node->getOperand(0);
2236
    auto Idx = Node->getConstantOperandVal(1);
2237
    MVT InVT = V.getSimpleValueType();
2238
    SDLoc DL(V);
2239

2240
    const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2241
    MVT SubVecContainerVT = VT;
2242
    // Establish the correct scalable-vector types for any fixed-length type.
2243
    if (VT.isFixedLengthVector()) {
2244
      assert(Idx == 0);
2245
      SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2246
    }
2247
    if (InVT.isFixedLengthVector())
2248
      InVT = TLI.getContainerForFixedLengthVector(InVT);
2249

2250
    const auto *TRI = Subtarget->getRegisterInfo();
2251
    unsigned SubRegIdx;
2252
    std::tie(SubRegIdx, Idx) =
2253
        RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2254
            InVT, SubVecContainerVT, Idx, TRI);
2255

2256
    // If the Idx hasn't been completely eliminated then this is a subvector
2257
    // extract which doesn't naturally align to a vector register. These must
2258
    // be handled using instructions to manipulate the vector registers.
2259
    if (Idx != 0)
2260
      break;
2261

2262
    // If we haven't set a SubRegIdx, then we must be going between
2263
    // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2264
    if (SubRegIdx == RISCV::NoSubRegister) {
2265
      unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2266
      assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2267
                 InRegClassID &&
2268
             "Unexpected subvector extraction");
2269
      SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2270
      SDNode *NewNode =
2271
          CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2272
      ReplaceNode(Node, NewNode);
2273
      return;
2274
    }
2275

2276
    SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2277
    ReplaceNode(Node, Extract.getNode());
2278
    return;
2279
  }
2280
  case RISCVISD::VMV_S_X_VL:
2281
  case RISCVISD::VFMV_S_F_VL:
2282
  case RISCVISD::VMV_V_X_VL:
2283
  case RISCVISD::VFMV_V_F_VL: {
2284
    // Try to match splat of a scalar load to a strided load with stride of x0.
2285
    bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2286
                        Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2287
    if (!Node->getOperand(0).isUndef())
2288
      break;
2289
    SDValue Src = Node->getOperand(1);
2290
    auto *Ld = dyn_cast<LoadSDNode>(Src);
2291
    // Can't fold load update node because the second
2292
    // output is used so that load update node can't be removed.
2293
    if (!Ld || Ld->isIndexed())
2294
      break;
2295
    EVT MemVT = Ld->getMemoryVT();
2296
    // The memory VT should be the same size as the element type.
2297
    if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2298
      break;
2299
    if (!IsProfitableToFold(Src, Node, Node) ||
2300
        !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2301
      break;
2302

2303
    SDValue VL;
2304
    if (IsScalarMove) {
2305
      // We could deal with more VL if we update the VSETVLI insert pass to
2306
      // avoid introducing more VSETVLI.
2307
      if (!isOneConstant(Node->getOperand(2)))
2308
        break;
2309
      selectVLOp(Node->getOperand(2), VL);
2310
    } else
2311
      selectVLOp(Node->getOperand(2), VL);
2312

2313
    unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2314
    SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2315

2316
    // If VL=1, then we don't need to do a strided load and can just do a
2317
    // regular load.
2318
    bool IsStrided = !isOneConstant(VL);
2319

2320
    // Only do a strided load if we have optimized zero-stride vector load.
2321
    if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2322
      break;
2323

2324
    SmallVector<SDValue> Operands = {
2325
        SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2326
        Ld->getBasePtr()};
2327
    if (IsStrided)
2328
      Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2329
    uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
2330
    SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2331
    Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2332

2333
    RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2334
    const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2335
        /*IsMasked*/ false, IsStrided, /*FF*/ false,
2336
        Log2SEW, static_cast<unsigned>(LMUL));
2337
    MachineSDNode *Load =
2338
        CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2339
    // Update the chain.
2340
    ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2341
    // Record the mem-refs
2342
    CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2343
    // Replace the splat with the vlse.
2344
    ReplaceNode(Node, Load);
2345
    return;
2346
  }
2347
  case ISD::PREFETCH:
2348
    unsigned Locality = Node->getConstantOperandVal(3);
2349
    if (Locality > 2)
2350
      break;
2351

2352
    if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2353
      MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2354
      MMO->setFlags(MachineMemOperand::MONonTemporal);
2355

2356
      int NontemporalLevel = 0;
2357
      switch (Locality) {
2358
      case 0:
2359
        NontemporalLevel = 3; // NTL.ALL
2360
        break;
2361
      case 1:
2362
        NontemporalLevel = 1; // NTL.PALL
2363
        break;
2364
      case 2:
2365
        NontemporalLevel = 0; // NTL.P1
2366
        break;
2367
      default:
2368
        llvm_unreachable("unexpected locality value.");
2369
      }
2370

2371
      if (NontemporalLevel & 0b1)
2372
        MMO->setFlags(MONontemporalBit0);
2373
      if (NontemporalLevel & 0b10)
2374
        MMO->setFlags(MONontemporalBit1);
2375
    }
2376
    break;
2377
  }
2378

2379
  // Select the default instruction.
2380
  SelectCode(Node);
2381
}
2382

2383
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2384
    const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2385
    std::vector<SDValue> &OutOps) {
2386
  // Always produce a register and immediate operand, as expected by
2387
  // RISCVAsmPrinter::PrintAsmMemoryOperand.
2388
  switch (ConstraintID) {
2389
  case InlineAsm::ConstraintCode::o:
2390
  case InlineAsm::ConstraintCode::m: {
2391
    SDValue Op0, Op1;
2392
    [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2393
    assert(Found && "SelectAddrRegImm should always succeed");
2394
    OutOps.push_back(Op0);
2395
    OutOps.push_back(Op1);
2396
    return false;
2397
  }
2398
  case InlineAsm::ConstraintCode::A:
2399
    OutOps.push_back(Op);
2400
    OutOps.push_back(
2401
        CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2402
    return false;
2403
  default:
2404
    report_fatal_error("Unexpected asm memory constraint " +
2405
                       InlineAsm::getMemConstraintName(ConstraintID));
2406
  }
2407

2408
  return true;
2409
}
2410

2411
bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2412
                                             SDValue &Offset) {
2413
  if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2414
    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2415
    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2416
    return true;
2417
  }
2418

2419
  return false;
2420
}
2421

2422
// Select a frame index and an optional immediate offset from an ADD or OR.
2423
bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
2424
                                              SDValue &Offset) {
2425
  if (SelectAddrFrameIndex(Addr, Base, Offset))
2426
    return true;
2427

2428
  if (!CurDAG->isBaseWithConstantOffset(Addr))
2429
    return false;
2430

2431
  if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2432
    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2433
    if (isInt<12>(CVal)) {
2434
      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2435
                                         Subtarget->getXLenVT());
2436
      Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),
2437
                                         Subtarget->getXLenVT());
2438
      return true;
2439
    }
2440
  }
2441

2442
  return false;
2443
}
2444

2445
// Fold constant addresses.
2446
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2447
                               const MVT VT, const RISCVSubtarget *Subtarget,
2448
                               SDValue Addr, SDValue &Base, SDValue &Offset,
2449
                               bool IsPrefetch = false) {
2450
  if (!isa<ConstantSDNode>(Addr))
2451
    return false;
2452

2453
  int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2454

2455
  // If the constant is a simm12, we can fold the whole constant and use X0 as
2456
  // the base. If the constant can be materialized with LUI+simm12, use LUI as
2457
  // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2458
  int64_t Lo12 = SignExtend64<12>(CVal);
2459
  int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2460
  if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2461
    if (IsPrefetch && (Lo12 & 0b11111) != 0)
2462
      return false;
2463

2464
    if (Hi) {
2465
      int64_t Hi20 = (Hi >> 12) & 0xfffff;
2466
      Base = SDValue(
2467
          CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2468
                                 CurDAG->getTargetConstant(Hi20, DL, VT)),
2469
          0);
2470
    } else {
2471
      Base = CurDAG->getRegister(RISCV::X0, VT);
2472
    }
2473
    Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2474
    return true;
2475
  }
2476

2477
  // Ask how constant materialization would handle this constant.
2478
  RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2479

2480
  // If the last instruction would be an ADDI, we can fold its immediate and
2481
  // emit the rest of the sequence as the base.
2482
  if (Seq.back().getOpcode() != RISCV::ADDI)
2483
    return false;
2484
  Lo12 = Seq.back().getImm();
2485
  if (IsPrefetch && (Lo12 & 0b11111) != 0)
2486
    return false;
2487

2488
  // Drop the last instruction.
2489
  Seq.pop_back();
2490
  assert(!Seq.empty() && "Expected more instructions in sequence");
2491

2492
  Base = selectImmSeq(CurDAG, DL, VT, Seq);
2493
  Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2494
  return true;
2495
}
2496

2497
// Is this ADD instruction only used as the base pointer of scalar loads and
2498
// stores?
2499
static bool isWorthFoldingAdd(SDValue Add) {
2500
  for (auto *Use : Add->uses()) {
2501
    if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2502
        Use->getOpcode() != ISD::ATOMIC_LOAD &&
2503
        Use->getOpcode() != ISD::ATOMIC_STORE)
2504
      return false;
2505
    EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2506
    if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2507
        VT != MVT::f64)
2508
      return false;
2509
    // Don't allow stores of the value. It must be used as the address.
2510
    if (Use->getOpcode() == ISD::STORE &&
2511
        cast<StoreSDNode>(Use)->getValue() == Add)
2512
      return false;
2513
    if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2514
        cast<AtomicSDNode>(Use)->getVal() == Add)
2515
      return false;
2516
  }
2517

2518
  return true;
2519
}
2520

2521
bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
2522
                                              unsigned MaxShiftAmount,
2523
                                              SDValue &Base, SDValue &Index,
2524
                                              SDValue &Scale) {
2525
  EVT VT = Addr.getSimpleValueType();
2526
  auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2527
                                              SDValue &Shift) {
2528
    uint64_t ShiftAmt = 0;
2529
    Index = N;
2530

2531
    if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2532
      // Only match shifts by a value in range [0, MaxShiftAmount].
2533
      if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2534
        Index = N.getOperand(0);
2535
        ShiftAmt = N.getConstantOperandVal(1);
2536
      }
2537
    }
2538

2539
    Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2540
    return ShiftAmt != 0;
2541
  };
2542

2543
  if (Addr.getOpcode() == ISD::ADD) {
2544
    if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2545
      SDValue AddrB = Addr.getOperand(0);
2546
      if (AddrB.getOpcode() == ISD::ADD &&
2547
          UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2548
          !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2549
          isInt<12>(C1->getSExtValue())) {
2550
        // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2551
        SDValue C1Val =
2552
            CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2553
        Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2554
                                              AddrB.getOperand(1), C1Val),
2555
                       0);
2556
        return true;
2557
      }
2558
    } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2559
      Base = Addr.getOperand(1);
2560
      return true;
2561
    } else {
2562
      UnwrapShl(Addr.getOperand(1), Index, Scale);
2563
      Base = Addr.getOperand(0);
2564
      return true;
2565
    }
2566
  } else if (UnwrapShl(Addr, Index, Scale)) {
2567
    EVT VT = Addr.getValueType();
2568
    Base = CurDAG->getRegister(RISCV::X0, VT);
2569
    return true;
2570
  }
2571

2572
  return false;
2573
}
2574

2575
bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2576
                                         SDValue &Offset, bool IsINX) {
2577
  if (SelectAddrFrameIndex(Addr, Base, Offset))
2578
    return true;
2579

2580
  SDLoc DL(Addr);
2581
  MVT VT = Addr.getSimpleValueType();
2582

2583
  if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2584
    Base = Addr.getOperand(0);
2585
    Offset = Addr.getOperand(1);
2586
    return true;
2587
  }
2588

2589
  int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2590
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
2591
    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2592
    if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2593
      Base = Addr.getOperand(0);
2594
      if (Base.getOpcode() == RISCVISD::ADD_LO) {
2595
        SDValue LoOperand = Base.getOperand(1);
2596
        if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2597
          // If the Lo in (ADD_LO hi, lo) is a global variable's address
2598
          // (its low part, really), then we can rely on the alignment of that
2599
          // variable to provide a margin of safety before low part can overflow
2600
          // the 12 bits of the load/store offset. Check if CVal falls within
2601
          // that margin; if so (low part + CVal) can't overflow.
2602
          const DataLayout &DL = CurDAG->getDataLayout();
2603
          Align Alignment = commonAlignment(
2604
              GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2605
          if (CVal == 0 || Alignment > CVal) {
2606
            int64_t CombinedOffset = CVal + GA->getOffset();
2607
            Base = Base.getOperand(0);
2608
            Offset = CurDAG->getTargetGlobalAddress(
2609
                GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2610
                CombinedOffset, GA->getTargetFlags());
2611
            return true;
2612
          }
2613
        }
2614
      }
2615

2616
      if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2617
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2618
      Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2619
      return true;
2620
    }
2621
  }
2622

2623
  // Handle ADD with large immediates.
2624
  if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2625
    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2626
    assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2627
           "simm12 not already handled?");
2628

2629
    // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2630
    // an ADDI for part of the offset and fold the rest into the load/store.
2631
    // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2632
    if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2633
      int64_t Adj = CVal < 0 ? -2048 : 2047;
2634
      Base = SDValue(
2635
          CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2636
                                 CurDAG->getTargetConstant(Adj, DL, VT)),
2637
          0);
2638
      Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2639
      return true;
2640
    }
2641

2642
    // For larger immediates, we might be able to save one instruction from
2643
    // constant materialization by folding the Lo12 bits of the immediate into
2644
    // the address. We should only do this if the ADD is only used by loads and
2645
    // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2646
    // separately with the full materialized immediate creating extra
2647
    // instructions.
2648
    if (isWorthFoldingAdd(Addr) &&
2649
        selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2650
                           Offset)) {
2651
      // Insert an ADD instruction with the materialized Hi52 bits.
2652
      Base = SDValue(
2653
          CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2654
          0);
2655
      return true;
2656
    }
2657
  }
2658

2659
  if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2660
    return true;
2661

2662
  Base = Addr;
2663
  Offset = CurDAG->getTargetConstant(0, DL, VT);
2664
  return true;
2665
}
2666

2667
/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2668
/// Offset shoule be all zeros.
2669
bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2670
                                                 SDValue &Offset) {
2671
  if (SelectAddrFrameIndex(Addr, Base, Offset))
2672
    return true;
2673

2674
  SDLoc DL(Addr);
2675
  MVT VT = Addr.getSimpleValueType();
2676

2677
  if (CurDAG->isBaseWithConstantOffset(Addr)) {
2678
    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2679
    if (isInt<12>(CVal)) {
2680
      Base = Addr.getOperand(0);
2681

2682
      // Early-out if not a valid offset.
2683
      if ((CVal & 0b11111) != 0) {
2684
        Base = Addr;
2685
        Offset = CurDAG->getTargetConstant(0, DL, VT);
2686
        return true;
2687
      }
2688

2689
      if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2690
        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2691
      Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2692
      return true;
2693
    }
2694
  }
2695

2696
  // Handle ADD with large immediates.
2697
  if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2698
    int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2699
    assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2700
           "simm12 not already handled?");
2701

2702
    // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2703
    // one instruction by folding adjustment (-2048 or 2016) into the address.
2704
    if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2705
      int64_t Adj = CVal < 0 ? -2048 : 2016;
2706
      int64_t AdjustedOffset = CVal - Adj;
2707
      Base = SDValue(CurDAG->getMachineNode(
2708
                         RISCV::ADDI, DL, VT, Addr.getOperand(0),
2709
                         CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2710
                     0);
2711
      Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2712
      return true;
2713
    }
2714

2715
    if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2716
                           Offset, true)) {
2717
      // Insert an ADD instruction with the materialized Hi52 bits.
2718
      Base = SDValue(
2719
          CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2720
          0);
2721
      return true;
2722
    }
2723
  }
2724

2725
  if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2726
    return true;
2727

2728
  Base = Addr;
2729
  Offset = CurDAG->getTargetConstant(0, DL, VT);
2730
  return true;
2731
}
2732

2733
bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
2734
                                         SDValue &Offset) {
2735
  if (Addr.getOpcode() != ISD::ADD)
2736
    return false;
2737

2738
  if (isa<ConstantSDNode>(Addr.getOperand(1)))
2739
    return false;
2740

2741
  Base = Addr.getOperand(1);
2742
  Offset = Addr.getOperand(0);
2743
  return true;
2744
}
2745

2746
bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
2747
                                        SDValue &ShAmt) {
2748
  ShAmt = N;
2749

2750
  // Peek through zext.
2751
  if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2752
    ShAmt = ShAmt.getOperand(0);
2753

2754
  // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2755
  // amount. If there is an AND on the shift amount, we can bypass it if it
2756
  // doesn't affect any of those bits.
2757
  if (ShAmt.getOpcode() == ISD::AND &&
2758
      isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2759
    const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2760

2761
    // Since the max shift amount is a power of 2 we can subtract 1 to make a
2762
    // mask that covers the bits needed to represent all shift amounts.
2763
    assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2764
    APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2765

2766
    if (ShMask.isSubsetOf(AndMask)) {
2767
      ShAmt = ShAmt.getOperand(0);
2768
    } else {
2769
      // SimplifyDemandedBits may have optimized the mask so try restoring any
2770
      // bits that are known zero.
2771
      KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2772
      if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2773
        return true;
2774
      ShAmt = ShAmt.getOperand(0);
2775
    }
2776
  }
2777

2778
  if (ShAmt.getOpcode() == ISD::ADD &&
2779
      isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2780
    uint64_t Imm = ShAmt.getConstantOperandVal(1);
2781
    // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2782
    // to avoid the ADD.
2783
    if (Imm != 0 && Imm % ShiftWidth == 0) {
2784
      ShAmt = ShAmt.getOperand(0);
2785
      return true;
2786
    }
2787
  } else if (ShAmt.getOpcode() == ISD::SUB &&
2788
             isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2789
    uint64_t Imm = ShAmt.getConstantOperandVal(0);
2790
    // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2791
    // generate a NEG instead of a SUB of a constant.
2792
    if (Imm != 0 && Imm % ShiftWidth == 0) {
2793
      SDLoc DL(ShAmt);
2794
      EVT VT = ShAmt.getValueType();
2795
      SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2796
      unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2797
      MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2798
                                                  ShAmt.getOperand(1));
2799
      ShAmt = SDValue(Neg, 0);
2800
      return true;
2801
    }
2802
    // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2803
    // to generate a NOT instead of a SUB of a constant.
2804
    if (Imm % ShiftWidth == ShiftWidth - 1) {
2805
      SDLoc DL(ShAmt);
2806
      EVT VT = ShAmt.getValueType();
2807
      MachineSDNode *Not =
2808
          CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2809
                                 CurDAG->getTargetConstant(-1, DL, VT));
2810
      ShAmt = SDValue(Not, 0);
2811
      return true;
2812
    }
2813
  }
2814

2815
  return true;
2816
}
2817

2818
/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2819
/// check for equality with 0. This function emits instructions that convert the
2820
/// seteq/setne into something that can be compared with 0.
2821
/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2822
/// ISD::SETNE).
2823
bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
2824
                                    SDValue &Val) {
2825
  assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2826
         "Unexpected condition code!");
2827

2828
  // We're looking for a setcc.
2829
  if (N->getOpcode() != ISD::SETCC)
2830
    return false;
2831

2832
  // Must be an equality comparison.
2833
  ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2834
  if (CCVal != ExpectedCCVal)
2835
    return false;
2836

2837
  SDValue LHS = N->getOperand(0);
2838
  SDValue RHS = N->getOperand(1);
2839

2840
  if (!LHS.getValueType().isScalarInteger())
2841
    return false;
2842

2843
  // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2844
  if (isNullConstant(RHS)) {
2845
    Val = LHS;
2846
    return true;
2847
  }
2848

2849
  SDLoc DL(N);
2850

2851
  if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2852
    int64_t CVal = C->getSExtValue();
2853
    // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2854
    // non-zero otherwise.
2855
    if (CVal == -2048) {
2856
      Val =
2857
          SDValue(CurDAG->getMachineNode(
2858
                      RISCV::XORI, DL, N->getValueType(0), LHS,
2859
                      CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2860
                  0);
2861
      return true;
2862
    }
2863
    // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2864
    // LHS is equal to the RHS and non-zero otherwise.
2865
    if (isInt<12>(CVal) || CVal == 2048) {
2866
      Val =
2867
          SDValue(CurDAG->getMachineNode(
2868
                      RISCV::ADDI, DL, N->getValueType(0), LHS,
2869
                      CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2870
                  0);
2871
      return true;
2872
    }
2873
  }
2874

2875
  // If nothing else we can XOR the LHS and RHS to produce zero if they are
2876
  // equal and a non-zero value if they aren't.
2877
  Val = SDValue(
2878
      CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2879
  return true;
2880
}
2881

2882
bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2883
  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2884
      cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2885
    Val = N.getOperand(0);
2886
    return true;
2887
  }
2888

2889
  auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2890
    if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2891
      return N;
2892

2893
    SDValue N0 = N.getOperand(0);
2894
    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2895
        N.getConstantOperandVal(1) == ShiftAmt &&
2896
        N0.getConstantOperandVal(1) == ShiftAmt)
2897
      return N0.getOperand(0);
2898

2899
    return N;
2900
  };
2901

2902
  MVT VT = N.getSimpleValueType();
2903
  if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2904
    Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2905
    return true;
2906
  }
2907

2908
  return false;
2909
}
2910

2911
bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2912
  if (N.getOpcode() == ISD::AND) {
2913
    auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2914
    if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2915
      Val = N.getOperand(0);
2916
      return true;
2917
    }
2918
  }
2919
  MVT VT = N.getSimpleValueType();
2920
  APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2921
  if (CurDAG->MaskedValueIsZero(N, Mask)) {
2922
    Val = N;
2923
    return true;
2924
  }
2925

2926
  return false;
2927
}
2928

2929
/// Look for various patterns that can be done with a SHL that can be folded
2930
/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2931
/// SHXADD we are trying to match.
2932
bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
2933
                                       SDValue &Val) {
2934
  if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2935
    SDValue N0 = N.getOperand(0);
2936

2937
    bool LeftShift = N0.getOpcode() == ISD::SHL;
2938
    if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2939
        isa<ConstantSDNode>(N0.getOperand(1))) {
2940
      uint64_t Mask = N.getConstantOperandVal(1);
2941
      unsigned C2 = N0.getConstantOperandVal(1);
2942

2943
      unsigned XLen = Subtarget->getXLen();
2944
      if (LeftShift)
2945
        Mask &= maskTrailingZeros<uint64_t>(C2);
2946
      else
2947
        Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2948

2949
      // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2950
      // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2951
      // followed by a SHXADD with c3 for the X amount.
2952
      if (isShiftedMask_64(Mask)) {
2953
        unsigned Leading = XLen - llvm::bit_width(Mask);
2954
        unsigned Trailing = llvm::countr_zero(Mask);
2955
        if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2956
          SDLoc DL(N);
2957
          EVT VT = N.getValueType();
2958
          Val = SDValue(CurDAG->getMachineNode(
2959
                            RISCV::SRLI, DL, VT, N0.getOperand(0),
2960
                            CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2961
                        0);
2962
          return true;
2963
        }
2964
        // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2965
        // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2966
        // followed by a SHXADD using c3 for the X amount.
2967
        if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2968
          SDLoc DL(N);
2969
          EVT VT = N.getValueType();
2970
          Val = SDValue(
2971
              CurDAG->getMachineNode(
2972
                  RISCV::SRLI, DL, VT, N0.getOperand(0),
2973
                  CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2974
              0);
2975
          return true;
2976
        }
2977
      }
2978
    }
2979
  }
2980

2981
  bool LeftShift = N.getOpcode() == ISD::SHL;
2982
  if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2983
      isa<ConstantSDNode>(N.getOperand(1))) {
2984
    SDValue N0 = N.getOperand(0);
2985
    if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2986
        isa<ConstantSDNode>(N0.getOperand(1))) {
2987
      uint64_t Mask = N0.getConstantOperandVal(1);
2988
      if (isShiftedMask_64(Mask)) {
2989
        unsigned C1 = N.getConstantOperandVal(1);
2990
        unsigned XLen = Subtarget->getXLen();
2991
        unsigned Leading = XLen - llvm::bit_width(Mask);
2992
        unsigned Trailing = llvm::countr_zero(Mask);
2993
        // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2994
        // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2995
        if (LeftShift && Leading == 32 && Trailing > 0 &&
2996
            (Trailing + C1) == ShAmt) {
2997
          SDLoc DL(N);
2998
          EVT VT = N.getValueType();
2999
          Val = SDValue(CurDAG->getMachineNode(
3000
                            RISCV::SRLIW, DL, VT, N0.getOperand(0),
3001
                            CurDAG->getTargetConstant(Trailing, DL, VT)),
3002
                        0);
3003
          return true;
3004
        }
3005
        // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3006
        // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3007
        if (!LeftShift && Leading == 32 && Trailing > C1 &&
3008
            (Trailing - C1) == ShAmt) {
3009
          SDLoc DL(N);
3010
          EVT VT = N.getValueType();
3011
          Val = SDValue(CurDAG->getMachineNode(
3012
                            RISCV::SRLIW, DL, VT, N0.getOperand(0),
3013
                            CurDAG->getTargetConstant(Trailing, DL, VT)),
3014
                        0);
3015
          return true;
3016
        }
3017
      }
3018
    }
3019
  }
3020

3021
  return false;
3022
}
3023

3024
/// Look for various patterns that can be done with a SHL that can be folded
3025
/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3026
/// SHXADD_UW we are trying to match.
3027
bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3028
                                          SDValue &Val) {
3029
  if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3030
      N.hasOneUse()) {
3031
    SDValue N0 = N.getOperand(0);
3032
    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3033
        N0.hasOneUse()) {
3034
      uint64_t Mask = N.getConstantOperandVal(1);
3035
      unsigned C2 = N0.getConstantOperandVal(1);
3036

3037
      Mask &= maskTrailingZeros<uint64_t>(C2);
3038

3039
      // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3040
      // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3041
      // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3042
      if (isShiftedMask_64(Mask)) {
3043
        unsigned Leading = llvm::countl_zero(Mask);
3044
        unsigned Trailing = llvm::countr_zero(Mask);
3045
        if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3046
          SDLoc DL(N);
3047
          EVT VT = N.getValueType();
3048
          Val = SDValue(CurDAG->getMachineNode(
3049
                            RISCV::SLLI, DL, VT, N0.getOperand(0),
3050
                            CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3051
                        0);
3052
          return true;
3053
        }
3054
      }
3055
    }
3056
  }
3057

3058
  return false;
3059
}
3060

3061
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3062
                                        unsigned Bits,
3063
                                        const TargetInstrInfo *TII) {
3064
  unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3065

3066
  if (!MCOpcode)
3067
    return false;
3068

3069
  const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3070
  const uint64_t TSFlags = MCID.TSFlags;
3071
  if (!RISCVII::hasSEWOp(TSFlags))
3072
    return false;
3073
  assert(RISCVII::hasVLOp(TSFlags));
3074

3075
  bool HasGlueOp = User->getGluedNode() != nullptr;
3076
  unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3077
  bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3078
  bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3079
  unsigned VLIdx =
3080
      User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3081
  const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3082

3083
  if (UserOpNo == VLIdx)
3084
    return false;
3085

3086
  auto NumDemandedBits =
3087
      RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3088
  return NumDemandedBits && Bits >= *NumDemandedBits;
3089
}
3090

3091
// Return true if all users of this SDNode* only consume the lower \p Bits.
3092
// This can be used to form W instructions for add/sub/mul/shl even when the
3093
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3094
// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3095
// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3096
// the add/sub/mul/shl to become non-W instructions. By checking the users we
3097
// may be able to use a W instruction and CSE with the other instruction if
3098
// this has happened. We could try to detect that the CSE opportunity exists
3099
// before doing this, but that would be more complicated.
3100
bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
3101
                                        const unsigned Depth) const {
3102
  assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3103
          Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3104
          Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3105
          Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3106
          Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3107
          isa<ConstantSDNode>(Node) || Depth != 0) &&
3108
         "Unexpected opcode");
3109

3110
  if (Depth >= SelectionDAG::MaxRecursionDepth)
3111
    return false;
3112

3113
  // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3114
  // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3115
  if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3116
    return false;
3117

3118
  for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3119
    SDNode *User = *UI;
3120
    // Users of this node should have already been instruction selected
3121
    if (!User->isMachineOpcode())
3122
      return false;
3123

3124
    // TODO: Add more opcodes?
3125
    switch (User->getMachineOpcode()) {
3126
    default:
3127
      if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3128
        break;
3129
      return false;
3130
    case RISCV::ADDW:
3131
    case RISCV::ADDIW:
3132
    case RISCV::SUBW:
3133
    case RISCV::MULW:
3134
    case RISCV::SLLW:
3135
    case RISCV::SLLIW:
3136
    case RISCV::SRAW:
3137
    case RISCV::SRAIW:
3138
    case RISCV::SRLW:
3139
    case RISCV::SRLIW:
3140
    case RISCV::DIVW:
3141
    case RISCV::DIVUW:
3142
    case RISCV::REMW:
3143
    case RISCV::REMUW:
3144
    case RISCV::ROLW:
3145
    case RISCV::RORW:
3146
    case RISCV::RORIW:
3147
    case RISCV::CLZW:
3148
    case RISCV::CTZW:
3149
    case RISCV::CPOPW:
3150
    case RISCV::SLLI_UW:
3151
    case RISCV::FMV_W_X:
3152
    case RISCV::FCVT_H_W:
3153
    case RISCV::FCVT_H_WU:
3154
    case RISCV::FCVT_S_W:
3155
    case RISCV::FCVT_S_WU:
3156
    case RISCV::FCVT_D_W:
3157
    case RISCV::FCVT_D_WU:
3158
    case RISCV::TH_REVW:
3159
    case RISCV::TH_SRRIW:
3160
      if (Bits < 32)
3161
        return false;
3162
      break;
3163
    case RISCV::SLL:
3164
    case RISCV::SRA:
3165
    case RISCV::SRL:
3166
    case RISCV::ROL:
3167
    case RISCV::ROR:
3168
    case RISCV::BSET:
3169
    case RISCV::BCLR:
3170
    case RISCV::BINV:
3171
      // Shift amount operands only use log2(Xlen) bits.
3172
      if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3173
        return false;
3174
      break;
3175
    case RISCV::SLLI:
3176
      // SLLI only uses the lower (XLen - ShAmt) bits.
3177
      if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3178
        return false;
3179
      break;
3180
    case RISCV::ANDI:
3181
      if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3182
        break;
3183
      goto RecCheck;
3184
    case RISCV::ORI: {
3185
      uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3186
      if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3187
        break;
3188
      [[fallthrough]];
3189
    }
3190
    case RISCV::AND:
3191
    case RISCV::OR:
3192
    case RISCV::XOR:
3193
    case RISCV::XORI:
3194
    case RISCV::ANDN:
3195
    case RISCV::ORN:
3196
    case RISCV::XNOR:
3197
    case RISCV::SH1ADD:
3198
    case RISCV::SH2ADD:
3199
    case RISCV::SH3ADD:
3200
    RecCheck:
3201
      if (hasAllNBitUsers(User, Bits, Depth + 1))
3202
        break;
3203
      return false;
3204
    case RISCV::SRLI: {
3205
      unsigned ShAmt = User->getConstantOperandVal(1);
3206
      // If we are shifting right by less than Bits, and users don't demand any
3207
      // bits that were shifted into [Bits-1:0], then we can consider this as an
3208
      // N-Bit user.
3209
      if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3210
        break;
3211
      return false;
3212
    }
3213
    case RISCV::SEXT_B:
3214
    case RISCV::PACKH:
3215
      if (Bits < 8)
3216
        return false;
3217
      break;
3218
    case RISCV::SEXT_H:
3219
    case RISCV::FMV_H_X:
3220
    case RISCV::ZEXT_H_RV32:
3221
    case RISCV::ZEXT_H_RV64:
3222
    case RISCV::PACKW:
3223
      if (Bits < 16)
3224
        return false;
3225
      break;
3226
    case RISCV::PACK:
3227
      if (Bits < (Subtarget->getXLen() / 2))
3228
        return false;
3229
      break;
3230
    case RISCV::ADD_UW:
3231
    case RISCV::SH1ADD_UW:
3232
    case RISCV::SH2ADD_UW:
3233
    case RISCV::SH3ADD_UW:
3234
      // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3235
      // 32 bits.
3236
      if (UI.getOperandNo() != 0 || Bits < 32)
3237
        return false;
3238
      break;
3239
    case RISCV::SB:
3240
      if (UI.getOperandNo() != 0 || Bits < 8)
3241
        return false;
3242
      break;
3243
    case RISCV::SH:
3244
      if (UI.getOperandNo() != 0 || Bits < 16)
3245
        return false;
3246
      break;
3247
    case RISCV::SW:
3248
      if (UI.getOperandNo() != 0 || Bits < 32)
3249
        return false;
3250
      break;
3251
    }
3252
  }
3253

3254
  return true;
3255
}
3256

3257
// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3258
bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3259
                                        SDValue &Shl2) {
3260
  if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3261
    int64_t Offset = C->getSExtValue();
3262
    int64_t Shift;
3263
    for (Shift = 0; Shift < 4; Shift++)
3264
      if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3265
        break;
3266

3267
    // Constant cannot be encoded.
3268
    if (Shift == 4)
3269
      return false;
3270

3271
    EVT Ty = N->getValueType(0);
3272
    Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3273
    Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3274
    return true;
3275
  }
3276

3277
  return false;
3278
}
3279

3280
// Select VL as a 5 bit immediate or a value that will become a register. This
3281
// allows us to choose betwen VSETIVLI or VSETVLI later.
3282
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3283
  auto *C = dyn_cast<ConstantSDNode>(N);
3284
  if (C && isUInt<5>(C->getZExtValue())) {
3285
    VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3286
                                   N->getValueType(0));
3287
  } else if (C && C->isAllOnes()) {
3288
    // Treat all ones as VLMax.
3289
    VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3290
                                   N->getValueType(0));
3291
  } else if (isa<RegisterSDNode>(N) &&
3292
             cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3293
    // All our VL operands use an operand that allows GPRNoX0 or an immediate
3294
    // as the register class. Convert X0 to a special immediate to pass the
3295
    // MachineVerifier. This is recognized specially by the vsetvli insertion
3296
    // pass.
3297
    VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3298
                                   N->getValueType(0));
3299
  } else {
3300
    VL = N;
3301
  }
3302

3303
  return true;
3304
}
3305

3306
static SDValue findVSplat(SDValue N) {
3307
  if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3308
    if (!N.getOperand(0).isUndef())
3309
      return SDValue();
3310
    N = N.getOperand(1);
3311
  }
3312
  SDValue Splat = N;
3313
  if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3314
       Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3315
      !Splat.getOperand(0).isUndef())
3316
    return SDValue();
3317
  assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3318
  return Splat;
3319
}
3320

3321
bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3322
  SDValue Splat = findVSplat(N);
3323
  if (!Splat)
3324
    return false;
3325

3326
  SplatVal = Splat.getOperand(1);
3327
  return true;
3328
}
3329

3330
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3331
                                  SelectionDAG &DAG,
3332
                                  const RISCVSubtarget &Subtarget,
3333
                                  std::function<bool(int64_t)> ValidateImm) {
3334
  SDValue Splat = findVSplat(N);
3335
  if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3336
    return false;
3337

3338
  const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3339
  assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3340
         "Unexpected splat operand type");
3341

3342
  // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3343
  // type is wider than the resulting vector element type: an implicit
3344
  // truncation first takes place. Therefore, perform a manual
3345
  // truncation/sign-extension in order to ignore any truncated bits and catch
3346
  // any zero-extended immediate.
3347
  // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3348
  // sign-extending to (XLenVT -1).
3349
  APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3350

3351
  int64_t SplatImm = SplatConst.getSExtValue();
3352

3353
  if (!ValidateImm(SplatImm))
3354
    return false;
3355

3356
  SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3357
  return true;
3358
}
3359

3360
bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3361
  return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3362
                               [](int64_t Imm) { return isInt<5>(Imm); });
3363
}
3364

3365
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3366
  return selectVSplatImmHelper(
3367
      N, SplatVal, *CurDAG, *Subtarget,
3368
      [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3369
}
3370

3371
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3372
                                                      SDValue &SplatVal) {
3373
  return selectVSplatImmHelper(
3374
      N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3375
        return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3376
      });
3377
}
3378

3379
bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3380
                                         SDValue &SplatVal) {
3381
  return selectVSplatImmHelper(
3382
      N, SplatVal, *CurDAG, *Subtarget,
3383
      [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3384
}
3385

3386
bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3387
  auto IsExtOrTrunc = [](SDValue N) {
3388
    switch (N->getOpcode()) {
3389
    case ISD::SIGN_EXTEND:
3390
    case ISD::ZERO_EXTEND:
3391
    // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3392
    // inactive elements will be undef.
3393
    case RISCVISD::TRUNCATE_VECTOR_VL:
3394
    case RISCVISD::VSEXT_VL:
3395
    case RISCVISD::VZEXT_VL:
3396
      return true;
3397
    default:
3398
      return false;
3399
    }
3400
  };
3401

3402
  // We can have multiple nested nodes, so unravel them all if needed.
3403
  while (IsExtOrTrunc(N)) {
3404
    if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3405
      return false;
3406
    N = N->getOperand(0);
3407
  }
3408

3409
  return selectVSplat(N, SplatVal);
3410
}
3411

3412
bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
3413
  ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3414
  if (!CFP)
3415
    return false;
3416
  const APFloat &APF = CFP->getValueAPF();
3417
  // td can handle +0.0 already.
3418
  if (APF.isPosZero())
3419
    return false;
3420

3421
  MVT VT = CFP->getSimpleValueType(0);
3422

3423
  // Even if this FPImm requires an additional FNEG (i.e. the second element of
3424
  // the returned pair is true) we still prefer FLI + FNEG over immediate
3425
  // materialization as the latter might generate a longer instruction sequence.
3426
  if (static_cast<const RISCVTargetLowering *>(TLI)
3427
          ->getLegalZfaFPImm(APF, VT)
3428
          .first >= 0)
3429
    return false;
3430

3431
  MVT XLenVT = Subtarget->getXLenVT();
3432
  if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3433
    assert(APF.isNegZero() && "Unexpected constant.");
3434
    return false;
3435
  }
3436
  SDLoc DL(N);
3437
  Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3438
                  *Subtarget);
3439
  return true;
3440
}
3441

3442
bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3443
                                       SDValue &Imm) {
3444
  if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3445
    int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3446

3447
    if (!isInt<5>(ImmVal))
3448
      return false;
3449

3450
    Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3451
    return true;
3452
  }
3453

3454
  return false;
3455
}
3456

3457
// Try to remove sext.w if the input is a W instruction or can be made into
3458
// a W instruction cheaply.
3459
bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3460
  // Look for the sext.w pattern, addiw rd, rs1, 0.
3461
  if (N->getMachineOpcode() != RISCV::ADDIW ||
3462
      !isNullConstant(N->getOperand(1)))
3463
    return false;
3464

3465
  SDValue N0 = N->getOperand(0);
3466
  if (!N0.isMachineOpcode())
3467
    return false;
3468

3469
  switch (N0.getMachineOpcode()) {
3470
  default:
3471
    break;
3472
  case RISCV::ADD:
3473
  case RISCV::ADDI:
3474
  case RISCV::SUB:
3475
  case RISCV::MUL:
3476
  case RISCV::SLLI: {
3477
    // Convert sext.w+add/sub/mul to their W instructions. This will create
3478
    // a new independent instruction. This improves latency.
3479
    unsigned Opc;
3480
    switch (N0.getMachineOpcode()) {
3481
    default:
3482
      llvm_unreachable("Unexpected opcode!");
3483
    case RISCV::ADD:  Opc = RISCV::ADDW;  break;
3484
    case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3485
    case RISCV::SUB:  Opc = RISCV::SUBW;  break;
3486
    case RISCV::MUL:  Opc = RISCV::MULW;  break;
3487
    case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3488
    }
3489

3490
    SDValue N00 = N0.getOperand(0);
3491
    SDValue N01 = N0.getOperand(1);
3492

3493
    // Shift amount needs to be uimm5.
3494
    if (N0.getMachineOpcode() == RISCV::SLLI &&
3495
        !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3496
      break;
3497

3498
    SDNode *Result =
3499
        CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3500
                               N00, N01);
3501
    ReplaceUses(N, Result);
3502
    return true;
3503
  }
3504
  case RISCV::ADDW:
3505
  case RISCV::ADDIW:
3506
  case RISCV::SUBW:
3507
  case RISCV::MULW:
3508
  case RISCV::SLLIW:
3509
  case RISCV::PACKW:
3510
  case RISCV::TH_MULAW:
3511
  case RISCV::TH_MULAH:
3512
  case RISCV::TH_MULSW:
3513
  case RISCV::TH_MULSH:
3514
    if (N0.getValueType() == MVT::i32)
3515
      break;
3516

3517
    // Result is already sign extended just remove the sext.w.
3518
    // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3519
    ReplaceUses(N, N0.getNode());
3520
    return true;
3521
  }
3522

3523
  return false;
3524
}
3525

3526
// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3527
// that's glued to the pseudo. This tries to look up the value that was copied
3528
// to V0.
3529
static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3530
  // Check that we're using V0 as a mask register.
3531
  if (!isa<RegisterSDNode>(MaskOp) ||
3532
      cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3533
    return SDValue();
3534

3535
  // The glued user defines V0.
3536
  const auto *Glued = GlueOp.getNode();
3537

3538
  if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3539
    return SDValue();
3540

3541
  // Check that we're defining V0 as a mask register.
3542
  if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3543
      cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3544
    return SDValue();
3545

3546
  SDValue MaskSetter = Glued->getOperand(2);
3547

3548
  // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3549
  // from an extract_subvector or insert_subvector.
3550
  if (MaskSetter->isMachineOpcode() &&
3551
      MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3552
    MaskSetter = MaskSetter->getOperand(0);
3553

3554
  return MaskSetter;
3555
}
3556

3557
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3558
  // Check the instruction defining V0; it needs to be a VMSET pseudo.
3559
  SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3560
  if (!MaskSetter)
3561
    return false;
3562

3563
  const auto IsVMSet = [](unsigned Opc) {
3564
    return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3565
           Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3566
           Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3567
           Opc == RISCV::PseudoVMSET_M_B8;
3568
  };
3569

3570
  // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3571
  // undefined behaviour if it's the wrong bitwidth, so we could choose to
3572
  // assume that it's all-ones? Same applies to its VL.
3573
  return MaskSetter->isMachineOpcode() &&
3574
         IsVMSet(MaskSetter.getMachineOpcode());
3575
}
3576

3577
// Return true if we can make sure mask of N is all-ones mask.
3578
static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3579
  return usesAllOnesMask(N->getOperand(MaskOpIdx),
3580
                         N->getOperand(N->getNumOperands() - 1));
3581
}
3582

3583
static bool isImplicitDef(SDValue V) {
3584
  if (!V.isMachineOpcode())
3585
    return false;
3586
  if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3587
    for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3588
      if (!isImplicitDef(V.getOperand(I)))
3589
        return false;
3590
    return true;
3591
  }
3592
  return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3593
}
3594

3595
// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3596
// corresponding "unmasked" pseudo versions. The mask we're interested in will
3597
// take the form of a V0 physical register operand, with a glued
3598
// register-setting instruction.
3599
bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3600
  const RISCV::RISCVMaskedPseudoInfo *I =
3601
      RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3602
  if (!I)
3603
    return false;
3604

3605
  unsigned MaskOpIdx = I->MaskOpIdx;
3606
  if (!usesAllOnesMask(N, MaskOpIdx))
3607
    return false;
3608

3609
  // There are two classes of pseudos in the table - compares and
3610
  // everything else.  See the comment on RISCVMaskedPseudo for details.
3611
  const unsigned Opc = I->UnmaskedPseudo;
3612
  const MCInstrDesc &MCID = TII->get(Opc);
3613
  const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3614
#ifndef NDEBUG
3615
  const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3616
  assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
3617
         RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
3618
         "Masked and unmasked pseudos are inconsistent");
3619
  const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3620
  assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3621
#endif
3622

3623
  SmallVector<SDValue, 8> Ops;
3624
  // Skip the merge operand at index 0 if !UseTUPseudo.
3625
  for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3626
    // Skip the mask, and the Glue.
3627
    SDValue Op = N->getOperand(I);
3628
    if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3629
      continue;
3630
    Ops.push_back(Op);
3631
  }
3632

3633
  // Transitively apply any node glued to our new node.
3634
  const auto *Glued = N->getGluedNode();
3635
  if (auto *TGlued = Glued->getGluedNode())
3636
    Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3637

3638
  MachineSDNode *Result =
3639
      CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3640

3641
  if (!N->memoperands_empty())
3642
    CurDAG->setNodeMemRefs(Result, N->memoperands());
3643

3644
  Result->setFlags(N->getFlags());
3645
  ReplaceUses(N, Result);
3646

3647
  return true;
3648
}
3649

3650
static bool IsVMerge(SDNode *N) {
3651
  return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3652
}
3653

3654
static bool IsVMv(SDNode *N) {
3655
  return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3656
}
3657

3658
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3659
  switch (LMUL) {
3660
  case RISCVII::LMUL_F8:
3661
    return RISCV::PseudoVMSET_M_B1;
3662
  case RISCVII::LMUL_F4:
3663
    return RISCV::PseudoVMSET_M_B2;
3664
  case RISCVII::LMUL_F2:
3665
    return RISCV::PseudoVMSET_M_B4;
3666
  case RISCVII::LMUL_1:
3667
    return RISCV::PseudoVMSET_M_B8;
3668
  case RISCVII::LMUL_2:
3669
    return RISCV::PseudoVMSET_M_B16;
3670
  case RISCVII::LMUL_4:
3671
    return RISCV::PseudoVMSET_M_B32;
3672
  case RISCVII::LMUL_8:
3673
    return RISCV::PseudoVMSET_M_B64;
3674
  case RISCVII::LMUL_RESERVED:
3675
    llvm_unreachable("Unexpected LMUL");
3676
  }
3677
  llvm_unreachable("Unknown VLMUL enum");
3678
}
3679

3680
// Try to fold away VMERGE_VVM instructions into their true operands:
3681
//
3682
// %true = PseudoVADD_VV ...
3683
// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3684
// ->
3685
// %x = PseudoVADD_VV_MASK %false, ..., %mask
3686
//
3687
// We can only fold if vmerge's merge operand, vmerge's false operand and
3688
// %true's merge operand (if it has one) are the same. This is because we have
3689
// to consolidate them into one merge operand in the result.
3690
//
3691
// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3692
// mask is all ones.
3693
//
3694
// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3695
// VMERGE_VVM with an all ones mask.
3696
//
3697
// The resulting VL is the minimum of the two VLs.
3698
//
3699
// The resulting policy is the effective policy the vmerge would have had,
3700
// i.e. whether or not it's merge operand was implicit-def.
3701
bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3702
  SDValue Merge, False, True, VL, Mask, Glue;
3703
  // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3704
  if (IsVMv(N)) {
3705
    Merge = N->getOperand(0);
3706
    False = N->getOperand(0);
3707
    True = N->getOperand(1);
3708
    VL = N->getOperand(2);
3709
    // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3710
    // mask later below.
3711
  } else {
3712
    assert(IsVMerge(N));
3713
    Merge = N->getOperand(0);
3714
    False = N->getOperand(1);
3715
    True = N->getOperand(2);
3716
    Mask = N->getOperand(3);
3717
    VL = N->getOperand(4);
3718
    // We always have a glue node for the mask at v0.
3719
    Glue = N->getOperand(N->getNumOperands() - 1);
3720
  }
3721
  assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3722
  assert(!Glue || Glue.getValueType() == MVT::Glue);
3723

3724
  // If the EEW of True is different from vmerge's SEW, then we can't fold.
3725
  if (True.getSimpleValueType() != N->getSimpleValueType(0))
3726
    return false;
3727

3728
  // We require that either merge and false are the same, or that merge
3729
  // is undefined.
3730
  if (Merge != False && !isImplicitDef(Merge))
3731
    return false;
3732

3733
  assert(True.getResNo() == 0 &&
3734
         "Expect True is the first output of an instruction.");
3735

3736
  // Need N is the exactly one using True.
3737
  if (!True.hasOneUse())
3738
    return false;
3739

3740
  if (!True.isMachineOpcode())
3741
    return false;
3742

3743
  unsigned TrueOpc = True.getMachineOpcode();
3744
  const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3745
  uint64_t TrueTSFlags = TrueMCID.TSFlags;
3746
  bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3747

3748
  bool IsMasked = false;
3749
  const RISCV::RISCVMaskedPseudoInfo *Info =
3750
      RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3751
  if (!Info && HasTiedDest) {
3752
    Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3753
    IsMasked = true;
3754
  }
3755
  assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");
3756

3757
  if (!Info)
3758
    return false;
3759

3760
  // If True has a merge operand then it needs to be the same as vmerge's False,
3761
  // since False will be used for the result's merge operand.
3762
  if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3763
    SDValue MergeOpTrue = True->getOperand(0);
3764
    if (False != MergeOpTrue)
3765
      return false;
3766
  }
3767

3768
  // If True is masked then the vmerge must have either the same mask or an all
3769
  // 1s mask, since we're going to keep the mask from True.
3770
  if (IsMasked && Mask) {
3771
    // FIXME: Support mask agnostic True instruction which would have an
3772
    // undef merge operand.
3773
    SDValue TrueMask =
3774
        getMaskSetter(True->getOperand(Info->MaskOpIdx),
3775
                      True->getOperand(True->getNumOperands() - 1));
3776
    assert(TrueMask);
3777
    if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
3778
      return false;
3779
  }
3780

3781
  // Skip if True has side effect.
3782
  if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3783
    return false;
3784

3785
  // The last operand of a masked instruction may be glued.
3786
  bool HasGlueOp = True->getGluedNode() != nullptr;
3787

3788
  // The chain operand may exist either before the glued operands or in the last
3789
  // position.
3790
  unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3791
  bool HasChainOp =
3792
      True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3793

3794
  if (HasChainOp) {
3795
    // Avoid creating cycles in the DAG. We must ensure that none of the other
3796
    // operands depend on True through it's Chain.
3797
    SmallVector<const SDNode *, 4> LoopWorklist;
3798
    SmallPtrSet<const SDNode *, 16> Visited;
3799
    LoopWorklist.push_back(False.getNode());
3800
    if (Mask)
3801
      LoopWorklist.push_back(Mask.getNode());
3802
    LoopWorklist.push_back(VL.getNode());
3803
    if (Glue)
3804
      LoopWorklist.push_back(Glue.getNode());
3805
    if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3806
      return false;
3807
  }
3808

3809
  // The vector policy operand may be present for masked intrinsics
3810
  bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3811
  unsigned TrueVLIndex =
3812
      True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3813
  SDValue TrueVL = True.getOperand(TrueVLIndex);
3814
  SDValue SEW = True.getOperand(TrueVLIndex + 1);
3815

3816
  auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3817
    if (LHS == RHS)
3818
      return LHS;
3819
    if (isAllOnesConstant(LHS))
3820
      return RHS;
3821
    if (isAllOnesConstant(RHS))
3822
      return LHS;
3823
    auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3824
    auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3825
    if (!CLHS || !CRHS)
3826
      return SDValue();
3827
    return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3828
  };
3829

3830
  // Because N and True must have the same merge operand (or True's operand is
3831
  // implicit_def), the "effective" body is the minimum of their VLs.
3832
  SDValue OrigVL = VL;
3833
  VL = GetMinVL(TrueVL, VL);
3834
  if (!VL)
3835
    return false;
3836

3837
  // Some operations produce different elementwise results depending on the
3838
  // active elements, like viota.m or vredsum. This transformation is illegal
3839
  // for these if we change the active elements (i.e. mask or VL).
3840
  if (Info->ActiveElementsAffectResult) {
3841
    if (Mask && !usesAllOnesMask(Mask, Glue))
3842
      return false;
3843
    if (TrueVL != VL)
3844
      return false;
3845
  }
3846

3847
  // If we end up changing the VL or mask of True, then we need to make sure it
3848
  // doesn't raise any observable fp exceptions, since changing the active
3849
  // elements will affect how fflags is set.
3850
  if (TrueVL != VL || !IsMasked)
3851
    if (mayRaiseFPException(True.getNode()) &&
3852
        !True->getFlags().hasNoFPExcept())
3853
      return false;
3854

3855
  SDLoc DL(N);
3856

3857
  // From the preconditions we checked above, we know the mask and thus glue
3858
  // for the result node will be taken from True.
3859
  if (IsMasked) {
3860
    Mask = True->getOperand(Info->MaskOpIdx);
3861
    Glue = True->getOperand(True->getNumOperands() - 1);
3862
    assert(Glue.getValueType() == MVT::Glue);
3863
  }
3864
  // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3865
  // an all-ones mask to use.
3866
  else if (IsVMv(N)) {
3867
    unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3868
    unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3869
    ElementCount EC = N->getValueType(0).getVectorElementCount();
3870
    MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3871

3872
    SDValue AllOnesMask =
3873
        SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3874
    SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
3875
                                            RISCV::V0, AllOnesMask, SDValue());
3876
    Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3877
    Glue = MaskCopy.getValue(1);
3878
  }
3879

3880
  unsigned MaskedOpc = Info->MaskedPseudo;
3881
#ifndef NDEBUG
3882
  const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3883
  assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
3884
         "Expected instructions with mask have policy operand.");
3885
  assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3886
                                         MCOI::TIED_TO) == 0 &&
3887
         "Expected instructions with mask have a tied dest.");
3888
#endif
3889

3890
  // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3891
  // operand is undefined.
3892
  //
3893
  // However, if the VL became smaller than what the vmerge had originally, then
3894
  // elements past VL that were previously in the vmerge's body will have moved
3895
  // to the tail. In that case we always need to use tail undisturbed to
3896
  // preserve them.
3897
  bool MergeVLShrunk = VL != OrigVL;
3898
  uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3899
                        ? RISCVII::TAIL_AGNOSTIC
3900
                        : /*TUMU*/ 0;
3901
  SDValue PolicyOp =
3902
    CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3903

3904

3905
  SmallVector<SDValue, 8> Ops;
3906
  Ops.push_back(False);
3907

3908
  const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3909
  const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3910
  assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3911
  Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3912

3913
  Ops.push_back(Mask);
3914

3915
  // For unmasked "VOp" with rounding mode operand, that is interfaces like
3916
  // (..., rm, vl) or (..., rm, vl, policy).
3917
  // Its masked version is (..., vm, rm, vl, policy).
3918
  // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3919
  if (HasRoundingMode)
3920
    Ops.push_back(True->getOperand(TrueVLIndex - 1));
3921

3922
  Ops.append({VL, SEW, PolicyOp});
3923

3924
  // Result node should have chain operand of True.
3925
  if (HasChainOp)
3926
    Ops.push_back(True.getOperand(TrueChainOpIdx));
3927

3928
  // Add the glue for the CopyToReg of mask->v0.
3929
  Ops.push_back(Glue);
3930

3931
  MachineSDNode *Result =
3932
      CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3933
  Result->setFlags(True->getFlags());
3934

3935
  if (!cast<MachineSDNode>(True)->memoperands_empty())
3936
    CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3937

3938
  // Replace vmerge.vvm node by Result.
3939
  ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3940

3941
  // Replace another value of True. E.g. chain and VL.
3942
  for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3943
    ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3944

3945
  return true;
3946
}
3947

3948
bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3949
  bool MadeChange = false;
3950
  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3951

3952
  while (Position != CurDAG->allnodes_begin()) {
3953
    SDNode *N = &*--Position;
3954
    if (N->use_empty() || !N->isMachineOpcode())
3955
      continue;
3956

3957
    if (IsVMerge(N) || IsVMv(N))
3958
      MadeChange |= performCombineVMergeAndVOps(N);
3959
  }
3960
  return MadeChange;
3961
}
3962

3963
/// If our passthru is an implicit_def, use noreg instead.  This side
3964
/// steps issues with MachineCSE not being able to CSE expressions with
3965
/// IMPLICIT_DEF operands while preserving the semantic intent. See
3966
/// pr64282 for context. Note that this transform is the last one
3967
/// performed at ISEL DAG to DAG.
3968
bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3969
  bool MadeChange = false;
3970
  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3971

3972
  while (Position != CurDAG->allnodes_begin()) {
3973
    SDNode *N = &*--Position;
3974
    if (N->use_empty() || !N->isMachineOpcode())
3975
      continue;
3976

3977
    const unsigned Opc = N->getMachineOpcode();
3978
    if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3979
        !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
3980
        !isImplicitDef(N->getOperand(0)))
3981
      continue;
3982

3983
    SmallVector<SDValue> Ops;
3984
    Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3985
    for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3986
      SDValue Op = N->getOperand(I);
3987
      Ops.push_back(Op);
3988
    }
3989

3990
    MachineSDNode *Result =
3991
      CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3992
    Result->setFlags(N->getFlags());
3993
    CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3994
    ReplaceUses(N, Result);
3995
    MadeChange = true;
3996
  }
3997
  return MadeChange;
3998
}
3999

4000

4001
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4002
// for instruction scheduling.
4003
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4004
                                       CodeGenOptLevel OptLevel) {
4005
  return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4006
}
4007

4008
char RISCVDAGToDAGISelLegacy::ID = 0;
4009

4010
RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4011
                                                 CodeGenOptLevel OptLevel)
4012
    : SelectionDAGISelLegacy(
4013
          ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4014

4015
INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4016

4017
Product

Resources

Company