Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
35294 views
1
//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines an instruction selector for the RISC-V target.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "RISCVISelDAGToDAG.h"
14
#include "MCTargetDesc/RISCVBaseInfo.h"
15
#include "MCTargetDesc/RISCVMCTargetDesc.h"
16
#include "MCTargetDesc/RISCVMatInt.h"
17
#include "RISCVISelLowering.h"
18
#include "RISCVMachineFunctionInfo.h"
19
#include "llvm/CodeGen/MachineFrameInfo.h"
20
#include "llvm/IR/IntrinsicsRISCV.h"
21
#include "llvm/Support/Alignment.h"
22
#include "llvm/Support/Debug.h"
23
#include "llvm/Support/MathExtras.h"
24
#include "llvm/Support/raw_ostream.h"
25
26
using namespace llvm;
27
28
#define DEBUG_TYPE "riscv-isel"
29
#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
31
static cl::opt<bool> UsePseudoMovImm(
32
"riscv-use-rematerializable-movimm", cl::Hidden,
33
cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34
"constant materialization"),
35
cl::init(false));
36
37
namespace llvm::RISCV {
38
#define GET_RISCVVSSEGTable_IMPL
39
#define GET_RISCVVLSEGTable_IMPL
40
#define GET_RISCVVLXSEGTable_IMPL
41
#define GET_RISCVVSXSEGTable_IMPL
42
#define GET_RISCVVLETable_IMPL
43
#define GET_RISCVVSETable_IMPL
44
#define GET_RISCVVLXTable_IMPL
45
#define GET_RISCVVSXTable_IMPL
46
#include "RISCVGenSearchableTables.inc"
47
} // namespace llvm::RISCV
48
49
void RISCVDAGToDAGISel::PreprocessISelDAG() {
50
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
51
52
bool MadeChange = false;
53
while (Position != CurDAG->allnodes_begin()) {
54
SDNode *N = &*--Position;
55
if (N->use_empty())
56
continue;
57
58
SDValue Result;
59
switch (N->getOpcode()) {
60
case ISD::SPLAT_VECTOR: {
61
// Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62
// SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63
MVT VT = N->getSimpleValueType(0);
64
unsigned Opc =
65
VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
66
SDLoc DL(N);
67
SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68
SDValue Src = N->getOperand(0);
69
if (VT.isInteger())
70
Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71
N->getOperand(0));
72
Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73
break;
74
}
75
case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
76
// Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77
// load. Done after lowering and combining so that we have a chance to
78
// optimize this to VMV_V_X_VL when the upper bits aren't needed.
79
assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80
MVT VT = N->getSimpleValueType(0);
81
SDValue Passthru = N->getOperand(0);
82
SDValue Lo = N->getOperand(1);
83
SDValue Hi = N->getOperand(2);
84
SDValue VL = N->getOperand(3);
85
assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86
Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87
"Unexpected VTs!");
88
MachineFunction &MF = CurDAG->getMachineFunction();
89
SDLoc DL(N);
90
91
// Create temporary stack for each expanding node.
92
SDValue StackSlot =
93
CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
94
int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
95
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
96
97
SDValue Chain = CurDAG->getEntryNode();
98
Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99
100
SDValue OffsetSlot =
101
CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
102
Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103
Align(8));
104
105
Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106
107
SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108
SDValue IntID =
109
CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110
SDValue Ops[] = {Chain,
111
IntID,
112
Passthru,
113
StackSlot,
114
CurDAG->getRegister(RISCV::X0, MVT::i64),
115
VL};
116
117
Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
118
MVT::i64, MPI, Align(8),
119
MachineMemOperand::MOLoad);
120
break;
121
}
122
}
123
124
if (Result) {
125
LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
126
LLVM_DEBUG(N->dump(CurDAG));
127
LLVM_DEBUG(dbgs() << "\nNew: ");
128
LLVM_DEBUG(Result->dump(CurDAG));
129
LLVM_DEBUG(dbgs() << "\n");
130
131
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
132
MadeChange = true;
133
}
134
}
135
136
if (MadeChange)
137
CurDAG->RemoveDeadNodes();
138
}
139
140
void RISCVDAGToDAGISel::PostprocessISelDAG() {
141
HandleSDNode Dummy(CurDAG->getRoot());
142
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
143
144
bool MadeChange = false;
145
while (Position != CurDAG->allnodes_begin()) {
146
SDNode *N = &*--Position;
147
// Skip dead nodes and any non-machine opcodes.
148
if (N->use_empty() || !N->isMachineOpcode())
149
continue;
150
151
MadeChange |= doPeepholeSExtW(N);
152
153
// FIXME: This is here only because the VMerge transform doesn't
154
// know how to handle masked true inputs. Once that has been moved
155
// to post-ISEL, this can be deleted as well.
156
MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157
}
158
159
CurDAG->setRoot(Dummy.getValue());
160
161
MadeChange |= doPeepholeMergeVVMFold();
162
163
// After we're done with everything else, convert IMPLICIT_DEF
164
// passthru operands to NoRegister. This is required to workaround
165
// an optimization deficiency in MachineCSE. This really should
166
// be merged back into each of the patterns (i.e. there's no good
167
// reason not to go directly to NoReg), but is being done this way
168
// to allow easy backporting.
169
MadeChange |= doPeepholeNoRegPassThru();
170
171
if (MadeChange)
172
CurDAG->RemoveDeadNodes();
173
}
174
175
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
176
RISCVMatInt::InstSeq &Seq) {
177
SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178
for (const RISCVMatInt::Inst &Inst : Seq) {
179
SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180
SDNode *Result = nullptr;
181
switch (Inst.getOpndKind()) {
182
case RISCVMatInt::Imm:
183
Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184
break;
185
case RISCVMatInt::RegX0:
186
Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187
CurDAG->getRegister(RISCV::X0, VT));
188
break;
189
case RISCVMatInt::RegReg:
190
Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191
break;
192
case RISCVMatInt::RegImm:
193
Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194
break;
195
}
196
197
// Only the first instruction has X0 as its source.
198
SrcReg = SDValue(Result, 0);
199
}
200
201
return SrcReg;
202
}
203
204
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205
int64_t Imm, const RISCVSubtarget &Subtarget) {
206
RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
207
208
// Use a rematerializable pseudo instruction for short sequences if enabled.
209
if (Seq.size() == 2 && UsePseudoMovImm)
210
return SDValue(
211
CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212
CurDAG->getTargetConstant(Imm, DL, VT)),
213
0);
214
215
// See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216
// worst an LUI+ADDIW. This will require an extra register, but avoids a
217
// constant pool.
218
// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219
// low and high 32 bits are the same and bit 31 and 63 are set.
220
if (Seq.size() > 3) {
221
unsigned ShiftAmt, AddOpc;
222
RISCVMatInt::InstSeq SeqLo =
223
RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224
if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225
SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226
227
SDValue SLLI = SDValue(
228
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229
CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230
0);
231
return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232
}
233
}
234
235
// Otherwise, use the original sequence.
236
return selectImmSeq(CurDAG, DL, VT, Seq);
237
}
238
239
static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
240
unsigned NF, RISCVII::VLMUL LMUL) {
241
static const unsigned M1TupleRegClassIDs[] = {
242
RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
243
RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
244
RISCV::VRN8M1RegClassID};
245
static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
246
RISCV::VRN3M2RegClassID,
247
RISCV::VRN4M2RegClassID};
248
249
assert(Regs.size() >= 2 && Regs.size() <= 8);
250
251
unsigned RegClassID;
252
unsigned SubReg0;
253
switch (LMUL) {
254
default:
255
llvm_unreachable("Invalid LMUL.");
256
case RISCVII::VLMUL::LMUL_F8:
257
case RISCVII::VLMUL::LMUL_F4:
258
case RISCVII::VLMUL::LMUL_F2:
259
case RISCVII::VLMUL::LMUL_1:
260
static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
261
"Unexpected subreg numbering");
262
SubReg0 = RISCV::sub_vrm1_0;
263
RegClassID = M1TupleRegClassIDs[NF - 2];
264
break;
265
case RISCVII::VLMUL::LMUL_2:
266
static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
267
"Unexpected subreg numbering");
268
SubReg0 = RISCV::sub_vrm2_0;
269
RegClassID = M2TupleRegClassIDs[NF - 2];
270
break;
271
case RISCVII::VLMUL::LMUL_4:
272
static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
273
"Unexpected subreg numbering");
274
SubReg0 = RISCV::sub_vrm4_0;
275
RegClassID = RISCV::VRN2M4RegClassID;
276
break;
277
}
278
279
SDLoc DL(Regs[0]);
280
SmallVector<SDValue, 8> Ops;
281
282
Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
283
284
for (unsigned I = 0; I < Regs.size(); ++I) {
285
Ops.push_back(Regs[I]);
286
Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
287
}
288
SDNode *N =
289
CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
290
return SDValue(N, 0);
291
}
292
293
void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
294
SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
295
bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
296
bool IsLoad, MVT *IndexVT) {
297
SDValue Chain = Node->getOperand(0);
298
SDValue Glue;
299
300
Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
301
302
if (IsStridedOrIndexed) {
303
Operands.push_back(Node->getOperand(CurOp++)); // Index.
304
if (IndexVT)
305
*IndexVT = Operands.back()->getSimpleValueType(0);
306
}
307
308
if (IsMasked) {
309
// Mask needs to be copied to V0.
310
SDValue Mask = Node->getOperand(CurOp++);
311
Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
312
Glue = Chain.getValue(1);
313
Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
314
}
315
SDValue VL;
316
selectVLOp(Node->getOperand(CurOp++), VL);
317
Operands.push_back(VL);
318
319
MVT XLenVT = Subtarget->getXLenVT();
320
SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
321
Operands.push_back(SEWOp);
322
323
// At the IR layer, all the masked load intrinsics have policy operands,
324
// none of the others do. All have passthru operands. For our pseudos,
325
// all loads have policy operands.
326
if (IsLoad) {
327
uint64_t Policy = RISCVII::MASK_AGNOSTIC;
328
if (IsMasked)
329
Policy = Node->getConstantOperandVal(CurOp++);
330
SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
331
Operands.push_back(PolicyOp);
332
}
333
334
Operands.push_back(Chain); // Chain.
335
if (Glue)
336
Operands.push_back(Glue);
337
}
338
339
void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
340
bool IsStrided) {
341
SDLoc DL(Node);
342
unsigned NF = Node->getNumValues() - 1;
343
MVT VT = Node->getSimpleValueType(0);
344
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
345
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
346
347
unsigned CurOp = 2;
348
SmallVector<SDValue, 8> Operands;
349
350
SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
351
Node->op_begin() + CurOp + NF);
352
SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
353
Operands.push_back(Merge);
354
CurOp += NF;
355
356
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
357
Operands, /*IsLoad=*/true);
358
359
const RISCV::VLSEGPseudo *P =
360
RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
361
static_cast<unsigned>(LMUL));
362
MachineSDNode *Load =
363
CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
364
365
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
366
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
367
368
SDValue SuperReg = SDValue(Load, 0);
369
for (unsigned I = 0; I < NF; ++I) {
370
unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
371
ReplaceUses(SDValue(Node, I),
372
CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
373
}
374
375
ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
376
CurDAG->RemoveDeadNode(Node);
377
}
378
379
void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
380
SDLoc DL(Node);
381
unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
382
MVT VT = Node->getSimpleValueType(0);
383
MVT XLenVT = Subtarget->getXLenVT();
384
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
385
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
386
387
unsigned CurOp = 2;
388
SmallVector<SDValue, 7> Operands;
389
390
SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
391
Node->op_begin() + CurOp + NF);
392
SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
393
Operands.push_back(MaskedOff);
394
CurOp += NF;
395
396
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
397
/*IsStridedOrIndexed*/ false, Operands,
398
/*IsLoad=*/true);
399
400
const RISCV::VLSEGPseudo *P =
401
RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
402
Log2SEW, static_cast<unsigned>(LMUL));
403
MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
404
XLenVT, MVT::Other, Operands);
405
406
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
407
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
408
409
SDValue SuperReg = SDValue(Load, 0);
410
for (unsigned I = 0; I < NF; ++I) {
411
unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
412
ReplaceUses(SDValue(Node, I),
413
CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
414
}
415
416
ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
417
ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
418
CurDAG->RemoveDeadNode(Node);
419
}
420
421
void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
422
bool IsOrdered) {
423
SDLoc DL(Node);
424
unsigned NF = Node->getNumValues() - 1;
425
MVT VT = Node->getSimpleValueType(0);
426
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
427
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
428
429
unsigned CurOp = 2;
430
SmallVector<SDValue, 8> Operands;
431
432
SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
433
Node->op_begin() + CurOp + NF);
434
SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
435
Operands.push_back(MaskedOff);
436
CurOp += NF;
437
438
MVT IndexVT;
439
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
440
/*IsStridedOrIndexed*/ true, Operands,
441
/*IsLoad=*/true, &IndexVT);
442
443
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
444
"Element count mismatch");
445
446
RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
447
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449
report_fatal_error("The V extension does not support EEW=64 for index "
450
"values when XLEN=32");
451
}
452
const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
453
NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454
static_cast<unsigned>(IndexLMUL));
455
MachineSDNode *Load =
456
CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
457
458
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
459
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
460
461
SDValue SuperReg = SDValue(Load, 0);
462
for (unsigned I = 0; I < NF; ++I) {
463
unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
464
ReplaceUses(SDValue(Node, I),
465
CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
466
}
467
468
ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
469
CurDAG->RemoveDeadNode(Node);
470
}
471
472
void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
473
bool IsStrided) {
474
SDLoc DL(Node);
475
unsigned NF = Node->getNumOperands() - 4;
476
if (IsStrided)
477
NF--;
478
if (IsMasked)
479
NF--;
480
MVT VT = Node->getOperand(2)->getSimpleValueType(0);
481
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
482
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
483
SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
484
SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
485
486
SmallVector<SDValue, 8> Operands;
487
Operands.push_back(StoreVal);
488
unsigned CurOp = 2 + NF;
489
490
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
491
Operands);
492
493
const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
494
NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
495
MachineSDNode *Store =
496
CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
497
498
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
499
CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
500
501
ReplaceNode(Node, Store);
502
}
503
504
void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
505
bool IsOrdered) {
506
SDLoc DL(Node);
507
unsigned NF = Node->getNumOperands() - 5;
508
if (IsMasked)
509
--NF;
510
MVT VT = Node->getOperand(2)->getSimpleValueType(0);
511
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
512
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
513
SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
514
SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
515
516
SmallVector<SDValue, 8> Operands;
517
Operands.push_back(StoreVal);
518
unsigned CurOp = 2 + NF;
519
520
MVT IndexVT;
521
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
522
/*IsStridedOrIndexed*/ true, Operands,
523
/*IsLoad=*/false, &IndexVT);
524
525
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
526
"Element count mismatch");
527
528
RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
529
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
530
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
531
report_fatal_error("The V extension does not support EEW=64 for index "
532
"values when XLEN=32");
533
}
534
const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
535
NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
536
static_cast<unsigned>(IndexLMUL));
537
MachineSDNode *Store =
538
CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
539
540
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
541
CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
542
543
ReplaceNode(Node, Store);
544
}
545
546
void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
547
if (!Subtarget->hasVInstructions())
548
return;
549
550
assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
551
552
SDLoc DL(Node);
553
MVT XLenVT = Subtarget->getXLenVT();
554
555
unsigned IntNo = Node->getConstantOperandVal(0);
556
557
assert((IntNo == Intrinsic::riscv_vsetvli ||
558
IntNo == Intrinsic::riscv_vsetvlimax) &&
559
"Unexpected vsetvli intrinsic");
560
561
bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
562
unsigned Offset = (VLMax ? 1 : 2);
563
564
assert(Node->getNumOperands() == Offset + 2 &&
565
"Unexpected number of operands");
566
567
unsigned SEW =
568
RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
569
RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
570
Node->getConstantOperandVal(Offset + 1) & 0x7);
571
572
unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
573
/*MaskAgnostic*/ true);
574
SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
575
576
SDValue VLOperand;
577
unsigned Opcode = RISCV::PseudoVSETVLI;
578
if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
579
if (auto VLEN = Subtarget->getRealVLen())
580
if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
581
VLMax = true;
582
}
583
if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
584
VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
585
Opcode = RISCV::PseudoVSETVLIX0;
586
} else {
587
VLOperand = Node->getOperand(1);
588
589
if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
590
uint64_t AVL = C->getZExtValue();
591
if (isUInt<5>(AVL)) {
592
SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
593
ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
594
XLenVT, VLImm, VTypeIOp));
595
return;
596
}
597
}
598
}
599
600
ReplaceNode(Node,
601
CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
602
}
603
604
bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
605
MVT VT = Node->getSimpleValueType(0);
606
unsigned Opcode = Node->getOpcode();
607
assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
608
"Unexpected opcode");
609
SDLoc DL(Node);
610
611
// For operations of the form (x << C1) op C2, check if we can use
612
// ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
613
SDValue N0 = Node->getOperand(0);
614
SDValue N1 = Node->getOperand(1);
615
616
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
617
if (!Cst)
618
return false;
619
620
int64_t Val = Cst->getSExtValue();
621
622
// Check if immediate can already use ANDI/ORI/XORI.
623
if (isInt<12>(Val))
624
return false;
625
626
SDValue Shift = N0;
627
628
// If Val is simm32 and we have a sext_inreg from i32, then the binop
629
// produces at least 33 sign bits. We can peek through the sext_inreg and use
630
// a SLLIW at the end.
631
bool SignExt = false;
632
if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
633
N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
634
SignExt = true;
635
Shift = N0.getOperand(0);
636
}
637
638
if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
639
return false;
640
641
ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
642
if (!ShlCst)
643
return false;
644
645
uint64_t ShAmt = ShlCst->getZExtValue();
646
647
// Make sure that we don't change the operation by removing bits.
648
// This only matters for OR and XOR, AND is unaffected.
649
uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
650
if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
651
return false;
652
653
int64_t ShiftedVal = Val >> ShAmt;
654
if (!isInt<12>(ShiftedVal))
655
return false;
656
657
// If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
658
if (SignExt && ShAmt >= 32)
659
return false;
660
661
// Ok, we can reorder to get a smaller immediate.
662
unsigned BinOpc;
663
switch (Opcode) {
664
default: llvm_unreachable("Unexpected opcode");
665
case ISD::AND: BinOpc = RISCV::ANDI; break;
666
case ISD::OR: BinOpc = RISCV::ORI; break;
667
case ISD::XOR: BinOpc = RISCV::XORI; break;
668
}
669
670
unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
671
672
SDNode *BinOp =
673
CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
674
CurDAG->getTargetConstant(ShiftedVal, DL, VT));
675
SDNode *SLLI =
676
CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
677
CurDAG->getTargetConstant(ShAmt, DL, VT));
678
ReplaceNode(Node, SLLI);
679
return true;
680
}
681
682
bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
683
// Only supported with XTHeadBb at the moment.
684
if (!Subtarget->hasVendorXTHeadBb())
685
return false;
686
687
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
688
if (!N1C)
689
return false;
690
691
SDValue N0 = Node->getOperand(0);
692
if (!N0.hasOneUse())
693
return false;
694
695
auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
696
MVT VT) {
697
return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
698
CurDAG->getTargetConstant(Msb, DL, VT),
699
CurDAG->getTargetConstant(Lsb, DL, VT));
700
};
701
702
SDLoc DL(Node);
703
MVT VT = Node->getSimpleValueType(0);
704
const unsigned RightShAmt = N1C->getZExtValue();
705
706
// Transform (sra (shl X, C1) C2) with C1 < C2
707
// -> (TH.EXT X, msb, lsb)
708
if (N0.getOpcode() == ISD::SHL) {
709
auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
710
if (!N01C)
711
return false;
712
713
const unsigned LeftShAmt = N01C->getZExtValue();
714
// Make sure that this is a bitfield extraction (i.e., the shift-right
715
// amount can not be less than the left-shift).
716
if (LeftShAmt > RightShAmt)
717
return false;
718
719
const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
720
const unsigned Msb = MsbPlusOne - 1;
721
const unsigned Lsb = RightShAmt - LeftShAmt;
722
723
SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724
ReplaceNode(Node, TH_EXT);
725
return true;
726
}
727
728
// Transform (sra (sext_inreg X, _), C) ->
729
// (TH.EXT X, msb, lsb)
730
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
731
unsigned ExtSize =
732
cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
733
734
// ExtSize of 32 should use sraiw via tablegen pattern.
735
if (ExtSize == 32)
736
return false;
737
738
const unsigned Msb = ExtSize - 1;
739
const unsigned Lsb = RightShAmt;
740
741
SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
742
ReplaceNode(Node, TH_EXT);
743
return true;
744
}
745
746
return false;
747
}
748
749
bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
750
// Target does not support indexed loads.
751
if (!Subtarget->hasVendorXTHeadMemIdx())
752
return false;
753
754
LoadSDNode *Ld = cast<LoadSDNode>(Node);
755
ISD::MemIndexedMode AM = Ld->getAddressingMode();
756
if (AM == ISD::UNINDEXED)
757
return false;
758
759
const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
760
if (!C)
761
return false;
762
763
EVT LoadVT = Ld->getMemoryVT();
764
assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
765
"Unexpected addressing mode");
766
bool IsPre = AM == ISD::PRE_INC;
767
bool IsPost = AM == ISD::POST_INC;
768
int64_t Offset = C->getSExtValue();
769
770
// The constants that can be encoded in the THeadMemIdx instructions
771
// are of the form (sign_extend(imm5) << imm2).
772
int64_t Shift;
773
for (Shift = 0; Shift < 4; Shift++)
774
if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
775
break;
776
777
// Constant cannot be encoded.
778
if (Shift == 4)
779
return false;
780
781
bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
782
unsigned Opcode;
783
if (LoadVT == MVT::i8 && IsPre)
784
Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
785
else if (LoadVT == MVT::i8 && IsPost)
786
Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
787
else if (LoadVT == MVT::i16 && IsPre)
788
Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
789
else if (LoadVT == MVT::i16 && IsPost)
790
Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
791
else if (LoadVT == MVT::i32 && IsPre)
792
Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
793
else if (LoadVT == MVT::i32 && IsPost)
794
Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
795
else if (LoadVT == MVT::i64 && IsPre)
796
Opcode = RISCV::TH_LDIB;
797
else if (LoadVT == MVT::i64 && IsPost)
798
Opcode = RISCV::TH_LDIA;
799
else
800
return false;
801
802
EVT Ty = Ld->getOffset().getValueType();
803
SDValue Ops[] = {Ld->getBasePtr(),
804
CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
805
CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
806
Ld->getChain()};
807
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
808
Ld->getValueType(1), MVT::Other, Ops);
809
810
MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
811
CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
812
813
ReplaceNode(Node, New);
814
815
return true;
816
}
817
818
void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
819
if (!Subtarget->hasVInstructions())
820
return;
821
822
assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
823
824
SDLoc DL(Node);
825
unsigned IntNo = Node->getConstantOperandVal(1);
826
827
assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
828
IntNo == Intrinsic::riscv_sf_vc_i_se) &&
829
"Unexpected vsetvli intrinsic");
830
831
// imm, imm, imm, simm5/scalar, sew, log2lmul, vl
832
unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
833
SDValue SEWOp =
834
CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
835
SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
836
Node->getOperand(4), Node->getOperand(5),
837
Node->getOperand(8), SEWOp,
838
Node->getOperand(0)};
839
840
unsigned Opcode;
841
auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
842
switch (LMulSDNode->getSExtValue()) {
843
case 5:
844
Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
845
: RISCV::PseudoVC_I_SE_MF8;
846
break;
847
case 6:
848
Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
849
: RISCV::PseudoVC_I_SE_MF4;
850
break;
851
case 7:
852
Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
853
: RISCV::PseudoVC_I_SE_MF2;
854
break;
855
case 0:
856
Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
857
: RISCV::PseudoVC_I_SE_M1;
858
break;
859
case 1:
860
Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
861
: RISCV::PseudoVC_I_SE_M2;
862
break;
863
case 2:
864
Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
865
: RISCV::PseudoVC_I_SE_M4;
866
break;
867
case 3:
868
Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
869
: RISCV::PseudoVC_I_SE_M8;
870
break;
871
}
872
873
ReplaceNode(Node, CurDAG->getMachineNode(
874
Opcode, DL, Node->getSimpleValueType(0), Operands));
875
}
876
877
void RISCVDAGToDAGISel::Select(SDNode *Node) {
878
// If we have a custom node, we have already selected.
879
if (Node->isMachineOpcode()) {
880
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
881
Node->setNodeId(-1);
882
return;
883
}
884
885
// Instruction Selection not handled by the auto-generated tablegen selection
886
// should be handled here.
887
unsigned Opcode = Node->getOpcode();
888
MVT XLenVT = Subtarget->getXLenVT();
889
SDLoc DL(Node);
890
MVT VT = Node->getSimpleValueType(0);
891
892
bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
893
894
switch (Opcode) {
895
case ISD::Constant: {
896
assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
897
auto *ConstNode = cast<ConstantSDNode>(Node);
898
if (ConstNode->isZero()) {
899
SDValue New =
900
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
901
ReplaceNode(Node, New.getNode());
902
return;
903
}
904
int64_t Imm = ConstNode->getSExtValue();
905
// If only the lower 8 bits are used, try to convert this to a simm6 by
906
// sign-extending bit 7. This is neutral without the C extension, and
907
// allows C.LI to be used if C is present.
908
if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
909
Imm = SignExtend64<8>(Imm);
910
// If the upper XLen-16 bits are not used, try to convert this to a simm12
911
// by sign extending bit 15.
912
if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
913
hasAllHUsers(Node))
914
Imm = SignExtend64<16>(Imm);
915
// If the upper 32-bits are not used try to convert this into a simm32 by
916
// sign extending bit 32.
917
if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
918
Imm = SignExtend64<32>(Imm);
919
920
ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
921
return;
922
}
923
case ISD::ConstantFP: {
924
const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
925
auto [FPImm, NeedsFNeg] =
926
static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
927
VT);
928
if (FPImm >= 0) {
929
unsigned Opc;
930
unsigned FNegOpc;
931
switch (VT.SimpleTy) {
932
default:
933
llvm_unreachable("Unexpected size");
934
case MVT::f16:
935
Opc = RISCV::FLI_H;
936
FNegOpc = RISCV::FSGNJN_H;
937
break;
938
case MVT::f32:
939
Opc = RISCV::FLI_S;
940
FNegOpc = RISCV::FSGNJN_S;
941
break;
942
case MVT::f64:
943
Opc = RISCV::FLI_D;
944
FNegOpc = RISCV::FSGNJN_D;
945
break;
946
}
947
SDNode *Res = CurDAG->getMachineNode(
948
Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
949
if (NeedsFNeg)
950
Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
951
SDValue(Res, 0));
952
953
ReplaceNode(Node, Res);
954
return;
955
}
956
957
bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
958
SDValue Imm;
959
// For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
960
// create an integer immediate.
961
if (APF.isPosZero() || NegZeroF64)
962
Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
963
else
964
Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
965
*Subtarget);
966
967
bool HasZdinx = Subtarget->hasStdExtZdinx();
968
bool Is64Bit = Subtarget->is64Bit();
969
unsigned Opc;
970
switch (VT.SimpleTy) {
971
default:
972
llvm_unreachable("Unexpected size");
973
case MVT::bf16:
974
assert(Subtarget->hasStdExtZfbfmin());
975
Opc = RISCV::FMV_H_X;
976
break;
977
case MVT::f16:
978
Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
979
break;
980
case MVT::f32:
981
Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
982
break;
983
case MVT::f64:
984
// For RV32, we can't move from a GPR, we need to convert instead. This
985
// should only happen for +0.0 and -0.0.
986
assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
987
if (Is64Bit)
988
Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
989
else
990
Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
991
break;
992
}
993
994
SDNode *Res;
995
if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
996
Res = CurDAG->getMachineNode(
997
Opc, DL, VT, Imm,
998
CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
999
else
1000
Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1001
1002
// For f64 -0.0, we need to insert a fneg.d idiom.
1003
if (NegZeroF64) {
1004
Opc = RISCV::FSGNJN_D;
1005
if (HasZdinx)
1006
Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1007
Res =
1008
CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1009
}
1010
1011
ReplaceNode(Node, Res);
1012
return;
1013
}
1014
case RISCVISD::BuildPairF64: {
1015
if (!Subtarget->hasStdExtZdinx())
1016
break;
1017
1018
assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1019
1020
SDValue Ops[] = {
1021
CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1022
Node->getOperand(0),
1023
CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1024
Node->getOperand(1),
1025
CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1026
1027
SDNode *N =
1028
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1029
ReplaceNode(Node, N);
1030
return;
1031
}
1032
case RISCVISD::SplitF64: {
1033
if (Subtarget->hasStdExtZdinx()) {
1034
assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1035
1036
if (!SDValue(Node, 0).use_empty()) {
1037
SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1038
Node->getOperand(0));
1039
ReplaceUses(SDValue(Node, 0), Lo);
1040
}
1041
1042
if (!SDValue(Node, 1).use_empty()) {
1043
SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1044
Node->getOperand(0));
1045
ReplaceUses(SDValue(Node, 1), Hi);
1046
}
1047
1048
CurDAG->RemoveDeadNode(Node);
1049
return;
1050
}
1051
1052
if (!Subtarget->hasStdExtZfa())
1053
break;
1054
assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1055
"Unexpected subtarget");
1056
1057
// With Zfa, lower to fmv.x.w and fmvh.x.d.
1058
if (!SDValue(Node, 0).use_empty()) {
1059
SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1060
Node->getOperand(0));
1061
ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1062
}
1063
if (!SDValue(Node, 1).use_empty()) {
1064
SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1065
Node->getOperand(0));
1066
ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1067
}
1068
1069
CurDAG->RemoveDeadNode(Node);
1070
return;
1071
}
1072
case ISD::SHL: {
1073
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1074
if (!N1C)
1075
break;
1076
SDValue N0 = Node->getOperand(0);
1077
if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1078
!isa<ConstantSDNode>(N0.getOperand(1)))
1079
break;
1080
unsigned ShAmt = N1C->getZExtValue();
1081
uint64_t Mask = N0.getConstantOperandVal(1);
1082
1083
// Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1084
// 32 leading zeros and C3 trailing zeros.
1085
if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1086
unsigned XLen = Subtarget->getXLen();
1087
unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1088
unsigned TrailingZeros = llvm::countr_zero(Mask);
1089
if (TrailingZeros > 0 && LeadingZeros == 32) {
1090
SDNode *SRLIW = CurDAG->getMachineNode(
1091
RISCV::SRLIW, DL, VT, N0->getOperand(0),
1092
CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1093
SDNode *SLLI = CurDAG->getMachineNode(
1094
RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1095
CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1096
ReplaceNode(Node, SLLI);
1097
return;
1098
}
1099
}
1100
break;
1101
}
1102
case ISD::SRL: {
1103
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1104
if (!N1C)
1105
break;
1106
SDValue N0 = Node->getOperand(0);
1107
if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1108
break;
1109
unsigned ShAmt = N1C->getZExtValue();
1110
uint64_t Mask = N0.getConstantOperandVal(1);
1111
1112
// Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1113
// 32 leading zeros and C3 trailing zeros.
1114
if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1115
unsigned XLen = Subtarget->getXLen();
1116
unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1117
unsigned TrailingZeros = llvm::countr_zero(Mask);
1118
if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1119
SDNode *SRLIW = CurDAG->getMachineNode(
1120
RISCV::SRLIW, DL, VT, N0->getOperand(0),
1121
CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1122
SDNode *SLLI = CurDAG->getMachineNode(
1123
RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1124
CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1125
ReplaceNode(Node, SLLI);
1126
return;
1127
}
1128
}
1129
1130
// Optimize (srl (and X, C2), C) ->
1131
// (srli (slli X, (XLen-C3), (XLen-C3) + C)
1132
// Where C2 is a mask with C3 trailing ones.
1133
// Taking into account that the C2 may have had lower bits unset by
1134
// SimplifyDemandedBits. This avoids materializing the C2 immediate.
1135
// This pattern occurs when type legalizing right shifts for types with
1136
// less than XLen bits.
1137
Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1138
if (!isMask_64(Mask))
1139
break;
1140
unsigned TrailingOnes = llvm::countr_one(Mask);
1141
if (ShAmt >= TrailingOnes)
1142
break;
1143
// If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1144
if (TrailingOnes == 32) {
1145
SDNode *SRLI = CurDAG->getMachineNode(
1146
Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1147
N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1148
ReplaceNode(Node, SRLI);
1149
return;
1150
}
1151
1152
// Only do the remaining transforms if the AND has one use.
1153
if (!N0.hasOneUse())
1154
break;
1155
1156
// If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1157
if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1158
SDNode *BEXTI = CurDAG->getMachineNode(
1159
Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1160
N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1161
ReplaceNode(Node, BEXTI);
1162
return;
1163
}
1164
1165
unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1166
SDNode *SLLI =
1167
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1168
CurDAG->getTargetConstant(LShAmt, DL, VT));
1169
SDNode *SRLI = CurDAG->getMachineNode(
1170
RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1171
CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1172
ReplaceNode(Node, SRLI);
1173
return;
1174
}
1175
case ISD::SRA: {
1176
if (trySignedBitfieldExtract(Node))
1177
return;
1178
1179
// Optimize (sra (sext_inreg X, i16), C) ->
1180
// (srai (slli X, (XLen-16), (XLen-16) + C)
1181
// And (sra (sext_inreg X, i8), C) ->
1182
// (srai (slli X, (XLen-8), (XLen-8) + C)
1183
// This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1184
// This transform matches the code we get without Zbb. The shifts are more
1185
// compressible, and this can help expose CSE opportunities in the sdiv by
1186
// constant optimization.
1187
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1188
if (!N1C)
1189
break;
1190
SDValue N0 = Node->getOperand(0);
1191
if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1192
break;
1193
unsigned ShAmt = N1C->getZExtValue();
1194
unsigned ExtSize =
1195
cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1196
// ExtSize of 32 should use sraiw via tablegen pattern.
1197
if (ExtSize >= 32 || ShAmt >= ExtSize)
1198
break;
1199
unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1200
SDNode *SLLI =
1201
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1202
CurDAG->getTargetConstant(LShAmt, DL, VT));
1203
SDNode *SRAI = CurDAG->getMachineNode(
1204
RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1205
CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1206
ReplaceNode(Node, SRAI);
1207
return;
1208
}
1209
case ISD::OR:
1210
case ISD::XOR:
1211
if (tryShrinkShlLogicImm(Node))
1212
return;
1213
1214
break;
1215
case ISD::AND: {
1216
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1217
if (!N1C)
1218
break;
1219
uint64_t C1 = N1C->getZExtValue();
1220
const bool isC1Mask = isMask_64(C1);
1221
const bool isC1ANDI = isInt<12>(C1);
1222
1223
SDValue N0 = Node->getOperand(0);
1224
1225
auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1226
SDValue X, unsigned Msb,
1227
unsigned Lsb) {
1228
if (!Subtarget->hasVendorXTHeadBb())
1229
return false;
1230
1231
SDNode *TH_EXTU = CurDAG->getMachineNode(
1232
RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1233
CurDAG->getTargetConstant(Lsb, DL, VT));
1234
ReplaceNode(Node, TH_EXTU);
1235
return true;
1236
};
1237
1238
bool LeftShift = N0.getOpcode() == ISD::SHL;
1239
if (LeftShift || N0.getOpcode() == ISD::SRL) {
1240
auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1241
if (!C)
1242
break;
1243
unsigned C2 = C->getZExtValue();
1244
unsigned XLen = Subtarget->getXLen();
1245
assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1246
1247
// Keep track of whether this is a c.andi. If we can't use c.andi, the
1248
// shift pair might offer more compression opportunities.
1249
// TODO: We could check for C extension here, but we don't have many lit
1250
// tests with the C extension enabled so not checking gets better
1251
// coverage.
1252
// TODO: What if ANDI faster than shift?
1253
bool IsCANDI = isInt<6>(N1C->getSExtValue());
1254
1255
// Clear irrelevant bits in the mask.
1256
if (LeftShift)
1257
C1 &= maskTrailingZeros<uint64_t>(C2);
1258
else
1259
C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1260
1261
// Some transforms should only be done if the shift has a single use or
1262
// the AND would become (srli (slli X, 32), 32)
1263
bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1264
1265
SDValue X = N0.getOperand(0);
1266
1267
// Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1268
// with c3 leading zeros.
1269
if (!LeftShift && isC1Mask) {
1270
unsigned Leading = XLen - llvm::bit_width(C1);
1271
if (C2 < Leading) {
1272
// If the number of leading zeros is C2+32 this can be SRLIW.
1273
if (C2 + 32 == Leading) {
1274
SDNode *SRLIW = CurDAG->getMachineNode(
1275
RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1276
ReplaceNode(Node, SRLIW);
1277
return;
1278
}
1279
1280
// (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1281
// if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1282
//
1283
// This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1284
// legalized and goes through DAG combine.
1285
if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1286
X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287
cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1288
SDNode *SRAIW =
1289
CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1290
CurDAG->getTargetConstant(31, DL, VT));
1291
SDNode *SRLIW = CurDAG->getMachineNode(
1292
RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1293
CurDAG->getTargetConstant(Leading - 32, DL, VT));
1294
ReplaceNode(Node, SRLIW);
1295
return;
1296
}
1297
1298
// Try to use an unsigned bitfield extract (e.g., th.extu) if
1299
// available.
1300
// Transform (and (srl x, C2), C1)
1301
// -> (<bfextract> x, msb, lsb)
1302
//
1303
// Make sure to keep this below the SRLIW cases, as we always want to
1304
// prefer the more common instruction.
1305
const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1306
const unsigned Lsb = C2;
1307
if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1308
return;
1309
1310
// (srli (slli x, c3-c2), c3).
1311
// Skip if we could use (zext.w (sraiw X, C2)).
1312
bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1313
X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1314
cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1315
// Also Skip if we can use bexti or th.tst.
1316
Skip |= HasBitTest && Leading == XLen - 1;
1317
if (OneUseOrZExtW && !Skip) {
1318
SDNode *SLLI = CurDAG->getMachineNode(
1319
RISCV::SLLI, DL, VT, X,
1320
CurDAG->getTargetConstant(Leading - C2, DL, VT));
1321
SDNode *SRLI = CurDAG->getMachineNode(
1322
RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1323
CurDAG->getTargetConstant(Leading, DL, VT));
1324
ReplaceNode(Node, SRLI);
1325
return;
1326
}
1327
}
1328
}
1329
1330
// Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1331
// shifted by c2 bits with c3 leading zeros.
1332
if (LeftShift && isShiftedMask_64(C1)) {
1333
unsigned Leading = XLen - llvm::bit_width(C1);
1334
1335
if (C2 + Leading < XLen &&
1336
C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1337
// Use slli.uw when possible.
1338
if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1339
SDNode *SLLI_UW =
1340
CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1341
CurDAG->getTargetConstant(C2, DL, VT));
1342
ReplaceNode(Node, SLLI_UW);
1343
return;
1344
}
1345
1346
// (srli (slli c2+c3), c3)
1347
if (OneUseOrZExtW && !IsCANDI) {
1348
SDNode *SLLI = CurDAG->getMachineNode(
1349
RISCV::SLLI, DL, VT, X,
1350
CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1351
SDNode *SRLI = CurDAG->getMachineNode(
1352
RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1353
CurDAG->getTargetConstant(Leading, DL, VT));
1354
ReplaceNode(Node, SRLI);
1355
return;
1356
}
1357
}
1358
}
1359
1360
// Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1361
// shifted mask with c2 leading zeros and c3 trailing zeros.
1362
if (!LeftShift && isShiftedMask_64(C1)) {
1363
unsigned Leading = XLen - llvm::bit_width(C1);
1364
unsigned Trailing = llvm::countr_zero(C1);
1365
if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1366
!IsCANDI) {
1367
unsigned SrliOpc = RISCV::SRLI;
1368
// If the input is zexti32 we should use SRLIW.
1369
if (X.getOpcode() == ISD::AND &&
1370
isa<ConstantSDNode>(X.getOperand(1)) &&
1371
X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1372
SrliOpc = RISCV::SRLIW;
1373
X = X.getOperand(0);
1374
}
1375
SDNode *SRLI = CurDAG->getMachineNode(
1376
SrliOpc, DL, VT, X,
1377
CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1378
SDNode *SLLI = CurDAG->getMachineNode(
1379
RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1380
CurDAG->getTargetConstant(Trailing, DL, VT));
1381
ReplaceNode(Node, SLLI);
1382
return;
1383
}
1384
// If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1385
if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1386
OneUseOrZExtW && !IsCANDI) {
1387
SDNode *SRLIW = CurDAG->getMachineNode(
1388
RISCV::SRLIW, DL, VT, X,
1389
CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1390
SDNode *SLLI = CurDAG->getMachineNode(
1391
RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1392
CurDAG->getTargetConstant(Trailing, DL, VT));
1393
ReplaceNode(Node, SLLI);
1394
return;
1395
}
1396
}
1397
1398
// Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1399
// shifted mask with no leading zeros and c3 trailing zeros.
1400
if (LeftShift && isShiftedMask_64(C1)) {
1401
unsigned Leading = XLen - llvm::bit_width(C1);
1402
unsigned Trailing = llvm::countr_zero(C1);
1403
if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1404
SDNode *SRLI = CurDAG->getMachineNode(
1405
RISCV::SRLI, DL, VT, X,
1406
CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1407
SDNode *SLLI = CurDAG->getMachineNode(
1408
RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1409
CurDAG->getTargetConstant(Trailing, DL, VT));
1410
ReplaceNode(Node, SLLI);
1411
return;
1412
}
1413
// If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1414
if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1415
SDNode *SRLIW = CurDAG->getMachineNode(
1416
RISCV::SRLIW, DL, VT, X,
1417
CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1418
SDNode *SLLI = CurDAG->getMachineNode(
1419
RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1420
CurDAG->getTargetConstant(Trailing, DL, VT));
1421
ReplaceNode(Node, SLLI);
1422
return;
1423
}
1424
1425
// If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1426
if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1427
Subtarget->hasStdExtZba()) {
1428
SDNode *SRLI = CurDAG->getMachineNode(
1429
RISCV::SRLI, DL, VT, X,
1430
CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1431
SDNode *SLLI_UW = CurDAG->getMachineNode(
1432
RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1433
CurDAG->getTargetConstant(Trailing, DL, VT));
1434
ReplaceNode(Node, SLLI_UW);
1435
return;
1436
}
1437
}
1438
}
1439
1440
// If C1 masks off the upper bits only (but can't be formed as an
1441
// ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1442
// available.
1443
// Transform (and x, C1)
1444
// -> (<bfextract> x, msb, lsb)
1445
if (isC1Mask && !isC1ANDI) {
1446
const unsigned Msb = llvm::bit_width(C1) - 1;
1447
if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1448
return;
1449
}
1450
1451
if (tryShrinkShlLogicImm(Node))
1452
return;
1453
1454
break;
1455
}
1456
case ISD::MUL: {
1457
// Special case for calculating (mul (and X, C2), C1) where the full product
1458
// fits in XLen bits. We can shift X left by the number of leading zeros in
1459
// C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1460
// product has XLen trailing zeros, putting it in the output of MULHU. This
1461
// can avoid materializing a constant in a register for C2.
1462
1463
// RHS should be a constant.
1464
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1465
if (!N1C || !N1C->hasOneUse())
1466
break;
1467
1468
// LHS should be an AND with constant.
1469
SDValue N0 = Node->getOperand(0);
1470
if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1471
break;
1472
1473
uint64_t C2 = N0.getConstantOperandVal(1);
1474
1475
// Constant should be a mask.
1476
if (!isMask_64(C2))
1477
break;
1478
1479
// If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1480
// multiple users or the constant is a simm12. This prevents inserting a
1481
// shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1482
// make it more costly to materialize. Otherwise, using a SLLI might allow
1483
// it to be compressed.
1484
bool IsANDIOrZExt =
1485
isInt<12>(C2) ||
1486
(C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1487
// With XTHeadBb, we can use TH.EXTU.
1488
IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1489
if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1490
break;
1491
// If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1492
// the constant is a simm32.
1493
bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1494
// With XTHeadBb, we can use TH.EXTU.
1495
IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1496
if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1497
break;
1498
1499
// We need to shift left the AND input and C1 by a total of XLen bits.
1500
1501
// How far left do we need to shift the AND input?
1502
unsigned XLen = Subtarget->getXLen();
1503
unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1504
1505
// The constant gets shifted by the remaining amount unless that would
1506
// shift bits out.
1507
uint64_t C1 = N1C->getZExtValue();
1508
unsigned ConstantShift = XLen - LeadingZeros;
1509
if (ConstantShift > (XLen - llvm::bit_width(C1)))
1510
break;
1511
1512
uint64_t ShiftedC1 = C1 << ConstantShift;
1513
// If this RV32, we need to sign extend the constant.
1514
if (XLen == 32)
1515
ShiftedC1 = SignExtend64<32>(ShiftedC1);
1516
1517
// Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1518
SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1519
SDNode *SLLI =
1520
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1521
CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1522
SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1523
SDValue(SLLI, 0), SDValue(Imm, 0));
1524
ReplaceNode(Node, MULHU);
1525
return;
1526
}
1527
case ISD::LOAD: {
1528
if (tryIndexedLoad(Node))
1529
return;
1530
1531
if (Subtarget->hasVendorXCVmem()) {
1532
// We match post-incrementing load here
1533
LoadSDNode *Load = cast<LoadSDNode>(Node);
1534
if (Load->getAddressingMode() != ISD::POST_INC)
1535
break;
1536
1537
SDValue Chain = Node->getOperand(0);
1538
SDValue Base = Node->getOperand(1);
1539
SDValue Offset = Node->getOperand(2);
1540
1541
bool Simm12 = false;
1542
bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1543
1544
if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1545
int ConstantVal = ConstantOffset->getSExtValue();
1546
Simm12 = isInt<12>(ConstantVal);
1547
if (Simm12)
1548
Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1549
Offset.getValueType());
1550
}
1551
1552
unsigned Opcode = 0;
1553
switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1554
case MVT::i8:
1555
if (Simm12 && SignExtend)
1556
Opcode = RISCV::CV_LB_ri_inc;
1557
else if (Simm12 && !SignExtend)
1558
Opcode = RISCV::CV_LBU_ri_inc;
1559
else if (!Simm12 && SignExtend)
1560
Opcode = RISCV::CV_LB_rr_inc;
1561
else
1562
Opcode = RISCV::CV_LBU_rr_inc;
1563
break;
1564
case MVT::i16:
1565
if (Simm12 && SignExtend)
1566
Opcode = RISCV::CV_LH_ri_inc;
1567
else if (Simm12 && !SignExtend)
1568
Opcode = RISCV::CV_LHU_ri_inc;
1569
else if (!Simm12 && SignExtend)
1570
Opcode = RISCV::CV_LH_rr_inc;
1571
else
1572
Opcode = RISCV::CV_LHU_rr_inc;
1573
break;
1574
case MVT::i32:
1575
if (Simm12)
1576
Opcode = RISCV::CV_LW_ri_inc;
1577
else
1578
Opcode = RISCV::CV_LW_rr_inc;
1579
break;
1580
default:
1581
break;
1582
}
1583
if (!Opcode)
1584
break;
1585
1586
ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1587
Chain.getSimpleValueType(), Base,
1588
Offset, Chain));
1589
return;
1590
}
1591
break;
1592
}
1593
case ISD::INTRINSIC_WO_CHAIN: {
1594
unsigned IntNo = Node->getConstantOperandVal(0);
1595
switch (IntNo) {
1596
// By default we do not custom select any intrinsic.
1597
default:
1598
break;
1599
case Intrinsic::riscv_vmsgeu:
1600
case Intrinsic::riscv_vmsge: {
1601
SDValue Src1 = Node->getOperand(1);
1602
SDValue Src2 = Node->getOperand(2);
1603
bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1604
bool IsCmpUnsignedZero = false;
1605
// Only custom select scalar second operand.
1606
if (Src2.getValueType() != XLenVT)
1607
break;
1608
// Small constants are handled with patterns.
1609
if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1610
int64_t CVal = C->getSExtValue();
1611
if (CVal >= -15 && CVal <= 16) {
1612
if (!IsUnsigned || CVal != 0)
1613
break;
1614
IsCmpUnsignedZero = true;
1615
}
1616
}
1617
MVT Src1VT = Src1.getSimpleValueType();
1618
unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1619
switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1620
default:
1621
llvm_unreachable("Unexpected LMUL!");
1622
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1623
case RISCVII::VLMUL::lmulenum: \
1624
VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1625
: RISCV::PseudoVMSLT_VX_##suffix; \
1626
VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1627
VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1628
break;
1629
CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1630
CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1631
CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1632
CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
1633
CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1634
CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1635
CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1636
#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1637
}
1638
SDValue SEW = CurDAG->getTargetConstant(
1639
Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1640
SDValue VL;
1641
selectVLOp(Node->getOperand(3), VL);
1642
1643
// If vmsgeu with 0 immediate, expand it to vmset.
1644
if (IsCmpUnsignedZero) {
1645
ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1646
return;
1647
}
1648
1649
// Expand to
1650
// vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1651
SDValue Cmp = SDValue(
1652
CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1653
0);
1654
ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1655
{Cmp, Cmp, VL, SEW}));
1656
return;
1657
}
1658
case Intrinsic::riscv_vmsgeu_mask:
1659
case Intrinsic::riscv_vmsge_mask: {
1660
SDValue Src1 = Node->getOperand(2);
1661
SDValue Src2 = Node->getOperand(3);
1662
bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1663
bool IsCmpUnsignedZero = false;
1664
// Only custom select scalar second operand.
1665
if (Src2.getValueType() != XLenVT)
1666
break;
1667
// Small constants are handled with patterns.
1668
if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1669
int64_t CVal = C->getSExtValue();
1670
if (CVal >= -15 && CVal <= 16) {
1671
if (!IsUnsigned || CVal != 0)
1672
break;
1673
IsCmpUnsignedZero = true;
1674
}
1675
}
1676
MVT Src1VT = Src1.getSimpleValueType();
1677
unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1678
VMOROpcode;
1679
switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1680
default:
1681
llvm_unreachable("Unexpected LMUL!");
1682
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1683
case RISCVII::VLMUL::lmulenum: \
1684
VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1685
: RISCV::PseudoVMSLT_VX_##suffix; \
1686
VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1687
: RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1688
break;
1689
CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1690
CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1691
CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1692
CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1693
CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1694
CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1695
CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1696
#undef CASE_VMSLT_OPCODES
1697
}
1698
// Mask operations use the LMUL from the mask type.
1699
switch (RISCVTargetLowering::getLMUL(VT)) {
1700
default:
1701
llvm_unreachable("Unexpected LMUL!");
1702
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1703
case RISCVII::VLMUL::lmulenum: \
1704
VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1705
VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1706
VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1707
break;
1708
CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1709
CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1710
CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1711
CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
1712
CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
1713
CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
1714
CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
1715
#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1716
}
1717
SDValue SEW = CurDAG->getTargetConstant(
1718
Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1719
SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1720
SDValue VL;
1721
selectVLOp(Node->getOperand(5), VL);
1722
SDValue MaskedOff = Node->getOperand(1);
1723
SDValue Mask = Node->getOperand(4);
1724
1725
// If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1726
if (IsCmpUnsignedZero) {
1727
// We don't need vmor if the MaskedOff and the Mask are the same
1728
// value.
1729
if (Mask == MaskedOff) {
1730
ReplaceUses(Node, Mask.getNode());
1731
return;
1732
}
1733
ReplaceNode(Node,
1734
CurDAG->getMachineNode(VMOROpcode, DL, VT,
1735
{Mask, MaskedOff, VL, MaskSEW}));
1736
return;
1737
}
1738
1739
// If the MaskedOff value and the Mask are the same value use
1740
// vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1741
// This avoids needing to copy v0 to vd before starting the next sequence.
1742
if (Mask == MaskedOff) {
1743
SDValue Cmp = SDValue(
1744
CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1745
0);
1746
ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1747
{Mask, Cmp, VL, MaskSEW}));
1748
return;
1749
}
1750
1751
// Mask needs to be copied to V0.
1752
SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1753
RISCV::V0, Mask, SDValue());
1754
SDValue Glue = Chain.getValue(1);
1755
SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1756
1757
// Otherwise use
1758
// vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1759
// The result is mask undisturbed.
1760
// We use the same instructions to emulate mask agnostic behavior, because
1761
// the agnostic result can be either undisturbed or all 1.
1762
SDValue Cmp = SDValue(
1763
CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1764
{MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1765
0);
1766
// vmxor.mm vd, vd, v0 is used to update active value.
1767
ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1768
{Cmp, Mask, VL, MaskSEW}));
1769
return;
1770
}
1771
case Intrinsic::riscv_vsetvli:
1772
case Intrinsic::riscv_vsetvlimax:
1773
return selectVSETVLI(Node);
1774
}
1775
break;
1776
}
1777
case ISD::INTRINSIC_W_CHAIN: {
1778
unsigned IntNo = Node->getConstantOperandVal(1);
1779
switch (IntNo) {
1780
// By default we do not custom select any intrinsic.
1781
default:
1782
break;
1783
case Intrinsic::riscv_vlseg2:
1784
case Intrinsic::riscv_vlseg3:
1785
case Intrinsic::riscv_vlseg4:
1786
case Intrinsic::riscv_vlseg5:
1787
case Intrinsic::riscv_vlseg6:
1788
case Intrinsic::riscv_vlseg7:
1789
case Intrinsic::riscv_vlseg8: {
1790
selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1791
return;
1792
}
1793
case Intrinsic::riscv_vlseg2_mask:
1794
case Intrinsic::riscv_vlseg3_mask:
1795
case Intrinsic::riscv_vlseg4_mask:
1796
case Intrinsic::riscv_vlseg5_mask:
1797
case Intrinsic::riscv_vlseg6_mask:
1798
case Intrinsic::riscv_vlseg7_mask:
1799
case Intrinsic::riscv_vlseg8_mask: {
1800
selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1801
return;
1802
}
1803
case Intrinsic::riscv_vlsseg2:
1804
case Intrinsic::riscv_vlsseg3:
1805
case Intrinsic::riscv_vlsseg4:
1806
case Intrinsic::riscv_vlsseg5:
1807
case Intrinsic::riscv_vlsseg6:
1808
case Intrinsic::riscv_vlsseg7:
1809
case Intrinsic::riscv_vlsseg8: {
1810
selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1811
return;
1812
}
1813
case Intrinsic::riscv_vlsseg2_mask:
1814
case Intrinsic::riscv_vlsseg3_mask:
1815
case Intrinsic::riscv_vlsseg4_mask:
1816
case Intrinsic::riscv_vlsseg5_mask:
1817
case Intrinsic::riscv_vlsseg6_mask:
1818
case Intrinsic::riscv_vlsseg7_mask:
1819
case Intrinsic::riscv_vlsseg8_mask: {
1820
selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1821
return;
1822
}
1823
case Intrinsic::riscv_vloxseg2:
1824
case Intrinsic::riscv_vloxseg3:
1825
case Intrinsic::riscv_vloxseg4:
1826
case Intrinsic::riscv_vloxseg5:
1827
case Intrinsic::riscv_vloxseg6:
1828
case Intrinsic::riscv_vloxseg7:
1829
case Intrinsic::riscv_vloxseg8:
1830
selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1831
return;
1832
case Intrinsic::riscv_vluxseg2:
1833
case Intrinsic::riscv_vluxseg3:
1834
case Intrinsic::riscv_vluxseg4:
1835
case Intrinsic::riscv_vluxseg5:
1836
case Intrinsic::riscv_vluxseg6:
1837
case Intrinsic::riscv_vluxseg7:
1838
case Intrinsic::riscv_vluxseg8:
1839
selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1840
return;
1841
case Intrinsic::riscv_vloxseg2_mask:
1842
case Intrinsic::riscv_vloxseg3_mask:
1843
case Intrinsic::riscv_vloxseg4_mask:
1844
case Intrinsic::riscv_vloxseg5_mask:
1845
case Intrinsic::riscv_vloxseg6_mask:
1846
case Intrinsic::riscv_vloxseg7_mask:
1847
case Intrinsic::riscv_vloxseg8_mask:
1848
selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1849
return;
1850
case Intrinsic::riscv_vluxseg2_mask:
1851
case Intrinsic::riscv_vluxseg3_mask:
1852
case Intrinsic::riscv_vluxseg4_mask:
1853
case Intrinsic::riscv_vluxseg5_mask:
1854
case Intrinsic::riscv_vluxseg6_mask:
1855
case Intrinsic::riscv_vluxseg7_mask:
1856
case Intrinsic::riscv_vluxseg8_mask:
1857
selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1858
return;
1859
case Intrinsic::riscv_vlseg8ff:
1860
case Intrinsic::riscv_vlseg7ff:
1861
case Intrinsic::riscv_vlseg6ff:
1862
case Intrinsic::riscv_vlseg5ff:
1863
case Intrinsic::riscv_vlseg4ff:
1864
case Intrinsic::riscv_vlseg3ff:
1865
case Intrinsic::riscv_vlseg2ff: {
1866
selectVLSEGFF(Node, /*IsMasked*/ false);
1867
return;
1868
}
1869
case Intrinsic::riscv_vlseg8ff_mask:
1870
case Intrinsic::riscv_vlseg7ff_mask:
1871
case Intrinsic::riscv_vlseg6ff_mask:
1872
case Intrinsic::riscv_vlseg5ff_mask:
1873
case Intrinsic::riscv_vlseg4ff_mask:
1874
case Intrinsic::riscv_vlseg3ff_mask:
1875
case Intrinsic::riscv_vlseg2ff_mask: {
1876
selectVLSEGFF(Node, /*IsMasked*/ true);
1877
return;
1878
}
1879
case Intrinsic::riscv_vloxei:
1880
case Intrinsic::riscv_vloxei_mask:
1881
case Intrinsic::riscv_vluxei:
1882
case Intrinsic::riscv_vluxei_mask: {
1883
bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1884
IntNo == Intrinsic::riscv_vluxei_mask;
1885
bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1886
IntNo == Intrinsic::riscv_vloxei_mask;
1887
1888
MVT VT = Node->getSimpleValueType(0);
1889
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1890
1891
unsigned CurOp = 2;
1892
SmallVector<SDValue, 8> Operands;
1893
Operands.push_back(Node->getOperand(CurOp++));
1894
1895
MVT IndexVT;
1896
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1897
/*IsStridedOrIndexed*/ true, Operands,
1898
/*IsLoad=*/true, &IndexVT);
1899
1900
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1901
"Element count mismatch");
1902
1903
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1904
RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1905
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1906
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1907
report_fatal_error("The V extension does not support EEW=64 for index "
1908
"values when XLEN=32");
1909
}
1910
const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1911
IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1912
static_cast<unsigned>(IndexLMUL));
1913
MachineSDNode *Load =
1914
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1915
1916
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1917
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1918
1919
ReplaceNode(Node, Load);
1920
return;
1921
}
1922
case Intrinsic::riscv_vlm:
1923
case Intrinsic::riscv_vle:
1924
case Intrinsic::riscv_vle_mask:
1925
case Intrinsic::riscv_vlse:
1926
case Intrinsic::riscv_vlse_mask: {
1927
bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1928
IntNo == Intrinsic::riscv_vlse_mask;
1929
bool IsStrided =
1930
IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1931
1932
MVT VT = Node->getSimpleValueType(0);
1933
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1934
1935
// The riscv_vlm intrinsic are always tail agnostic and no passthru
1936
// operand at the IR level. In pseudos, they have both policy and
1937
// passthru operand. The passthru operand is needed to track the
1938
// "tail undefined" state, and the policy is there just for
1939
// for consistency - it will always be "don't care" for the
1940
// unmasked form.
1941
bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1942
unsigned CurOp = 2;
1943
SmallVector<SDValue, 8> Operands;
1944
if (HasPassthruOperand)
1945
Operands.push_back(Node->getOperand(CurOp++));
1946
else {
1947
// We eagerly lower to implicit_def (instead of undef), as we
1948
// otherwise fail to select nodes such as: nxv1i1 = undef
1949
SDNode *Passthru =
1950
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1951
Operands.push_back(SDValue(Passthru, 0));
1952
}
1953
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1954
Operands, /*IsLoad=*/true);
1955
1956
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1957
const RISCV::VLEPseudo *P =
1958
RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1959
static_cast<unsigned>(LMUL));
1960
MachineSDNode *Load =
1961
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1962
1963
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1964
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1965
1966
ReplaceNode(Node, Load);
1967
return;
1968
}
1969
case Intrinsic::riscv_vleff:
1970
case Intrinsic::riscv_vleff_mask: {
1971
bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1972
1973
MVT VT = Node->getSimpleValueType(0);
1974
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1975
1976
unsigned CurOp = 2;
1977
SmallVector<SDValue, 7> Operands;
1978
Operands.push_back(Node->getOperand(CurOp++));
1979
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1980
/*IsStridedOrIndexed*/ false, Operands,
1981
/*IsLoad=*/true);
1982
1983
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1984
const RISCV::VLEPseudo *P =
1985
RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1986
Log2SEW, static_cast<unsigned>(LMUL));
1987
MachineSDNode *Load = CurDAG->getMachineNode(
1988
P->Pseudo, DL, Node->getVTList(), Operands);
1989
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1990
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1991
1992
ReplaceNode(Node, Load);
1993
return;
1994
}
1995
}
1996
break;
1997
}
1998
case ISD::INTRINSIC_VOID: {
1999
unsigned IntNo = Node->getConstantOperandVal(1);
2000
switch (IntNo) {
2001
case Intrinsic::riscv_vsseg2:
2002
case Intrinsic::riscv_vsseg3:
2003
case Intrinsic::riscv_vsseg4:
2004
case Intrinsic::riscv_vsseg5:
2005
case Intrinsic::riscv_vsseg6:
2006
case Intrinsic::riscv_vsseg7:
2007
case Intrinsic::riscv_vsseg8: {
2008
selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
2009
return;
2010
}
2011
case Intrinsic::riscv_vsseg2_mask:
2012
case Intrinsic::riscv_vsseg3_mask:
2013
case Intrinsic::riscv_vsseg4_mask:
2014
case Intrinsic::riscv_vsseg5_mask:
2015
case Intrinsic::riscv_vsseg6_mask:
2016
case Intrinsic::riscv_vsseg7_mask:
2017
case Intrinsic::riscv_vsseg8_mask: {
2018
selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
2019
return;
2020
}
2021
case Intrinsic::riscv_vssseg2:
2022
case Intrinsic::riscv_vssseg3:
2023
case Intrinsic::riscv_vssseg4:
2024
case Intrinsic::riscv_vssseg5:
2025
case Intrinsic::riscv_vssseg6:
2026
case Intrinsic::riscv_vssseg7:
2027
case Intrinsic::riscv_vssseg8: {
2028
selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
2029
return;
2030
}
2031
case Intrinsic::riscv_vssseg2_mask:
2032
case Intrinsic::riscv_vssseg3_mask:
2033
case Intrinsic::riscv_vssseg4_mask:
2034
case Intrinsic::riscv_vssseg5_mask:
2035
case Intrinsic::riscv_vssseg6_mask:
2036
case Intrinsic::riscv_vssseg7_mask:
2037
case Intrinsic::riscv_vssseg8_mask: {
2038
selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
2039
return;
2040
}
2041
case Intrinsic::riscv_vsoxseg2:
2042
case Intrinsic::riscv_vsoxseg3:
2043
case Intrinsic::riscv_vsoxseg4:
2044
case Intrinsic::riscv_vsoxseg5:
2045
case Intrinsic::riscv_vsoxseg6:
2046
case Intrinsic::riscv_vsoxseg7:
2047
case Intrinsic::riscv_vsoxseg8:
2048
selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
2049
return;
2050
case Intrinsic::riscv_vsuxseg2:
2051
case Intrinsic::riscv_vsuxseg3:
2052
case Intrinsic::riscv_vsuxseg4:
2053
case Intrinsic::riscv_vsuxseg5:
2054
case Intrinsic::riscv_vsuxseg6:
2055
case Intrinsic::riscv_vsuxseg7:
2056
case Intrinsic::riscv_vsuxseg8:
2057
selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
2058
return;
2059
case Intrinsic::riscv_vsoxseg2_mask:
2060
case Intrinsic::riscv_vsoxseg3_mask:
2061
case Intrinsic::riscv_vsoxseg4_mask:
2062
case Intrinsic::riscv_vsoxseg5_mask:
2063
case Intrinsic::riscv_vsoxseg6_mask:
2064
case Intrinsic::riscv_vsoxseg7_mask:
2065
case Intrinsic::riscv_vsoxseg8_mask:
2066
selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
2067
return;
2068
case Intrinsic::riscv_vsuxseg2_mask:
2069
case Intrinsic::riscv_vsuxseg3_mask:
2070
case Intrinsic::riscv_vsuxseg4_mask:
2071
case Intrinsic::riscv_vsuxseg5_mask:
2072
case Intrinsic::riscv_vsuxseg6_mask:
2073
case Intrinsic::riscv_vsuxseg7_mask:
2074
case Intrinsic::riscv_vsuxseg8_mask:
2075
selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
2076
return;
2077
case Intrinsic::riscv_vsoxei:
2078
case Intrinsic::riscv_vsoxei_mask:
2079
case Intrinsic::riscv_vsuxei:
2080
case Intrinsic::riscv_vsuxei_mask: {
2081
bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2082
IntNo == Intrinsic::riscv_vsuxei_mask;
2083
bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2084
IntNo == Intrinsic::riscv_vsoxei_mask;
2085
2086
MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2087
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2088
2089
unsigned CurOp = 2;
2090
SmallVector<SDValue, 8> Operands;
2091
Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2092
2093
MVT IndexVT;
2094
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2095
/*IsStridedOrIndexed*/ true, Operands,
2096
/*IsLoad=*/false, &IndexVT);
2097
2098
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2099
"Element count mismatch");
2100
2101
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2102
RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2103
unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2104
if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2105
report_fatal_error("The V extension does not support EEW=64 for index "
2106
"values when XLEN=32");
2107
}
2108
const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2109
IsMasked, IsOrdered, IndexLog2EEW,
2110
static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2111
MachineSDNode *Store =
2112
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2113
2114
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2115
CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2116
2117
ReplaceNode(Node, Store);
2118
return;
2119
}
2120
case Intrinsic::riscv_vsm:
2121
case Intrinsic::riscv_vse:
2122
case Intrinsic::riscv_vse_mask:
2123
case Intrinsic::riscv_vsse:
2124
case Intrinsic::riscv_vsse_mask: {
2125
bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2126
IntNo == Intrinsic::riscv_vsse_mask;
2127
bool IsStrided =
2128
IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2129
2130
MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2131
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2132
2133
unsigned CurOp = 2;
2134
SmallVector<SDValue, 8> Operands;
2135
Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2136
2137
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2138
Operands);
2139
2140
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2141
const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2142
IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2143
MachineSDNode *Store =
2144
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2145
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2146
CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2147
2148
ReplaceNode(Node, Store);
2149
return;
2150
}
2151
case Intrinsic::riscv_sf_vc_x_se:
2152
case Intrinsic::riscv_sf_vc_i_se:
2153
selectSF_VC_X_SE(Node);
2154
return;
2155
}
2156
break;
2157
}
2158
case ISD::BITCAST: {
2159
MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2160
// Just drop bitcasts between vectors if both are fixed or both are
2161
// scalable.
2162
if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2163
(VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2164
ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2165
CurDAG->RemoveDeadNode(Node);
2166
return;
2167
}
2168
break;
2169
}
2170
case ISD::INSERT_SUBVECTOR: {
2171
SDValue V = Node->getOperand(0);
2172
SDValue SubV = Node->getOperand(1);
2173
SDLoc DL(SubV);
2174
auto Idx = Node->getConstantOperandVal(2);
2175
MVT SubVecVT = SubV.getSimpleValueType();
2176
2177
const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2178
MVT SubVecContainerVT = SubVecVT;
2179
// Establish the correct scalable-vector types for any fixed-length type.
2180
if (SubVecVT.isFixedLengthVector()) {
2181
SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2182
TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2183
[[maybe_unused]] bool ExactlyVecRegSized =
2184
Subtarget->expandVScale(SubVecVT.getSizeInBits())
2185
.isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2186
assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2187
.getKnownMinValue()));
2188
assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2189
}
2190
MVT ContainerVT = VT;
2191
if (VT.isFixedLengthVector())
2192
ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2193
2194
const auto *TRI = Subtarget->getRegisterInfo();
2195
unsigned SubRegIdx;
2196
std::tie(SubRegIdx, Idx) =
2197
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2198
ContainerVT, SubVecContainerVT, Idx, TRI);
2199
2200
// If the Idx hasn't been completely eliminated then this is a subvector
2201
// insert which doesn't naturally align to a vector register. These must
2202
// be handled using instructions to manipulate the vector registers.
2203
if (Idx != 0)
2204
break;
2205
2206
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2207
[[maybe_unused]] bool IsSubVecPartReg =
2208
SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2209
SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2210
SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2211
assert((!IsSubVecPartReg || V.isUndef()) &&
2212
"Expecting lowering to have created legal INSERT_SUBVECTORs when "
2213
"the subvector is smaller than a full-sized register");
2214
2215
// If we haven't set a SubRegIdx, then we must be going between
2216
// equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2217
if (SubRegIdx == RISCV::NoSubRegister) {
2218
unsigned InRegClassID =
2219
RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT);
2220
assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2221
InRegClassID &&
2222
"Unexpected subvector extraction");
2223
SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2224
SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2225
DL, VT, SubV, RC);
2226
ReplaceNode(Node, NewNode);
2227
return;
2228
}
2229
2230
SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2231
ReplaceNode(Node, Insert.getNode());
2232
return;
2233
}
2234
case ISD::EXTRACT_SUBVECTOR: {
2235
SDValue V = Node->getOperand(0);
2236
auto Idx = Node->getConstantOperandVal(1);
2237
MVT InVT = V.getSimpleValueType();
2238
SDLoc DL(V);
2239
2240
const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2241
MVT SubVecContainerVT = VT;
2242
// Establish the correct scalable-vector types for any fixed-length type.
2243
if (VT.isFixedLengthVector()) {
2244
assert(Idx == 0);
2245
SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2246
}
2247
if (InVT.isFixedLengthVector())
2248
InVT = TLI.getContainerForFixedLengthVector(InVT);
2249
2250
const auto *TRI = Subtarget->getRegisterInfo();
2251
unsigned SubRegIdx;
2252
std::tie(SubRegIdx, Idx) =
2253
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2254
InVT, SubVecContainerVT, Idx, TRI);
2255
2256
// If the Idx hasn't been completely eliminated then this is a subvector
2257
// extract which doesn't naturally align to a vector register. These must
2258
// be handled using instructions to manipulate the vector registers.
2259
if (Idx != 0)
2260
break;
2261
2262
// If we haven't set a SubRegIdx, then we must be going between
2263
// equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2264
if (SubRegIdx == RISCV::NoSubRegister) {
2265
unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2266
assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2267
InRegClassID &&
2268
"Unexpected subvector extraction");
2269
SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2270
SDNode *NewNode =
2271
CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2272
ReplaceNode(Node, NewNode);
2273
return;
2274
}
2275
2276
SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2277
ReplaceNode(Node, Extract.getNode());
2278
return;
2279
}
2280
case RISCVISD::VMV_S_X_VL:
2281
case RISCVISD::VFMV_S_F_VL:
2282
case RISCVISD::VMV_V_X_VL:
2283
case RISCVISD::VFMV_V_F_VL: {
2284
// Try to match splat of a scalar load to a strided load with stride of x0.
2285
bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2286
Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2287
if (!Node->getOperand(0).isUndef())
2288
break;
2289
SDValue Src = Node->getOperand(1);
2290
auto *Ld = dyn_cast<LoadSDNode>(Src);
2291
// Can't fold load update node because the second
2292
// output is used so that load update node can't be removed.
2293
if (!Ld || Ld->isIndexed())
2294
break;
2295
EVT MemVT = Ld->getMemoryVT();
2296
// The memory VT should be the same size as the element type.
2297
if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2298
break;
2299
if (!IsProfitableToFold(Src, Node, Node) ||
2300
!IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2301
break;
2302
2303
SDValue VL;
2304
if (IsScalarMove) {
2305
// We could deal with more VL if we update the VSETVLI insert pass to
2306
// avoid introducing more VSETVLI.
2307
if (!isOneConstant(Node->getOperand(2)))
2308
break;
2309
selectVLOp(Node->getOperand(2), VL);
2310
} else
2311
selectVLOp(Node->getOperand(2), VL);
2312
2313
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2314
SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2315
2316
// If VL=1, then we don't need to do a strided load and can just do a
2317
// regular load.
2318
bool IsStrided = !isOneConstant(VL);
2319
2320
// Only do a strided load if we have optimized zero-stride vector load.
2321
if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2322
break;
2323
2324
SmallVector<SDValue> Operands = {
2325
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2326
Ld->getBasePtr()};
2327
if (IsStrided)
2328
Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2329
uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
2330
SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2331
Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2332
2333
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2334
const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2335
/*IsMasked*/ false, IsStrided, /*FF*/ false,
2336
Log2SEW, static_cast<unsigned>(LMUL));
2337
MachineSDNode *Load =
2338
CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2339
// Update the chain.
2340
ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2341
// Record the mem-refs
2342
CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2343
// Replace the splat with the vlse.
2344
ReplaceNode(Node, Load);
2345
return;
2346
}
2347
case ISD::PREFETCH:
2348
unsigned Locality = Node->getConstantOperandVal(3);
2349
if (Locality > 2)
2350
break;
2351
2352
if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2353
MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2354
MMO->setFlags(MachineMemOperand::MONonTemporal);
2355
2356
int NontemporalLevel = 0;
2357
switch (Locality) {
2358
case 0:
2359
NontemporalLevel = 3; // NTL.ALL
2360
break;
2361
case 1:
2362
NontemporalLevel = 1; // NTL.PALL
2363
break;
2364
case 2:
2365
NontemporalLevel = 0; // NTL.P1
2366
break;
2367
default:
2368
llvm_unreachable("unexpected locality value.");
2369
}
2370
2371
if (NontemporalLevel & 0b1)
2372
MMO->setFlags(MONontemporalBit0);
2373
if (NontemporalLevel & 0b10)
2374
MMO->setFlags(MONontemporalBit1);
2375
}
2376
break;
2377
}
2378
2379
// Select the default instruction.
2380
SelectCode(Node);
2381
}
2382
2383
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2384
const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2385
std::vector<SDValue> &OutOps) {
2386
// Always produce a register and immediate operand, as expected by
2387
// RISCVAsmPrinter::PrintAsmMemoryOperand.
2388
switch (ConstraintID) {
2389
case InlineAsm::ConstraintCode::o:
2390
case InlineAsm::ConstraintCode::m: {
2391
SDValue Op0, Op1;
2392
[[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2393
assert(Found && "SelectAddrRegImm should always succeed");
2394
OutOps.push_back(Op0);
2395
OutOps.push_back(Op1);
2396
return false;
2397
}
2398
case InlineAsm::ConstraintCode::A:
2399
OutOps.push_back(Op);
2400
OutOps.push_back(
2401
CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2402
return false;
2403
default:
2404
report_fatal_error("Unexpected asm memory constraint " +
2405
InlineAsm::getMemConstraintName(ConstraintID));
2406
}
2407
2408
return true;
2409
}
2410
2411
bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2412
SDValue &Offset) {
2413
if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2414
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2415
Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2416
return true;
2417
}
2418
2419
return false;
2420
}
2421
2422
// Select a frame index and an optional immediate offset from an ADD or OR.
2423
bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
2424
SDValue &Offset) {
2425
if (SelectAddrFrameIndex(Addr, Base, Offset))
2426
return true;
2427
2428
if (!CurDAG->isBaseWithConstantOffset(Addr))
2429
return false;
2430
2431
if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2432
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2433
if (isInt<12>(CVal)) {
2434
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2435
Subtarget->getXLenVT());
2436
Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),
2437
Subtarget->getXLenVT());
2438
return true;
2439
}
2440
}
2441
2442
return false;
2443
}
2444
2445
// Fold constant addresses.
2446
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2447
const MVT VT, const RISCVSubtarget *Subtarget,
2448
SDValue Addr, SDValue &Base, SDValue &Offset,
2449
bool IsPrefetch = false) {
2450
if (!isa<ConstantSDNode>(Addr))
2451
return false;
2452
2453
int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2454
2455
// If the constant is a simm12, we can fold the whole constant and use X0 as
2456
// the base. If the constant can be materialized with LUI+simm12, use LUI as
2457
// the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2458
int64_t Lo12 = SignExtend64<12>(CVal);
2459
int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2460
if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2461
if (IsPrefetch && (Lo12 & 0b11111) != 0)
2462
return false;
2463
2464
if (Hi) {
2465
int64_t Hi20 = (Hi >> 12) & 0xfffff;
2466
Base = SDValue(
2467
CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2468
CurDAG->getTargetConstant(Hi20, DL, VT)),
2469
0);
2470
} else {
2471
Base = CurDAG->getRegister(RISCV::X0, VT);
2472
}
2473
Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2474
return true;
2475
}
2476
2477
// Ask how constant materialization would handle this constant.
2478
RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2479
2480
// If the last instruction would be an ADDI, we can fold its immediate and
2481
// emit the rest of the sequence as the base.
2482
if (Seq.back().getOpcode() != RISCV::ADDI)
2483
return false;
2484
Lo12 = Seq.back().getImm();
2485
if (IsPrefetch && (Lo12 & 0b11111) != 0)
2486
return false;
2487
2488
// Drop the last instruction.
2489
Seq.pop_back();
2490
assert(!Seq.empty() && "Expected more instructions in sequence");
2491
2492
Base = selectImmSeq(CurDAG, DL, VT, Seq);
2493
Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2494
return true;
2495
}
2496
2497
// Is this ADD instruction only used as the base pointer of scalar loads and
2498
// stores?
2499
static bool isWorthFoldingAdd(SDValue Add) {
2500
for (auto *Use : Add->uses()) {
2501
if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2502
Use->getOpcode() != ISD::ATOMIC_LOAD &&
2503
Use->getOpcode() != ISD::ATOMIC_STORE)
2504
return false;
2505
EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2506
if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2507
VT != MVT::f64)
2508
return false;
2509
// Don't allow stores of the value. It must be used as the address.
2510
if (Use->getOpcode() == ISD::STORE &&
2511
cast<StoreSDNode>(Use)->getValue() == Add)
2512
return false;
2513
if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2514
cast<AtomicSDNode>(Use)->getVal() == Add)
2515
return false;
2516
}
2517
2518
return true;
2519
}
2520
2521
bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
2522
unsigned MaxShiftAmount,
2523
SDValue &Base, SDValue &Index,
2524
SDValue &Scale) {
2525
EVT VT = Addr.getSimpleValueType();
2526
auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2527
SDValue &Shift) {
2528
uint64_t ShiftAmt = 0;
2529
Index = N;
2530
2531
if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2532
// Only match shifts by a value in range [0, MaxShiftAmount].
2533
if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2534
Index = N.getOperand(0);
2535
ShiftAmt = N.getConstantOperandVal(1);
2536
}
2537
}
2538
2539
Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2540
return ShiftAmt != 0;
2541
};
2542
2543
if (Addr.getOpcode() == ISD::ADD) {
2544
if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2545
SDValue AddrB = Addr.getOperand(0);
2546
if (AddrB.getOpcode() == ISD::ADD &&
2547
UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2548
!isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2549
isInt<12>(C1->getSExtValue())) {
2550
// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2551
SDValue C1Val =
2552
CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2553
Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2554
AddrB.getOperand(1), C1Val),
2555
0);
2556
return true;
2557
}
2558
} else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2559
Base = Addr.getOperand(1);
2560
return true;
2561
} else {
2562
UnwrapShl(Addr.getOperand(1), Index, Scale);
2563
Base = Addr.getOperand(0);
2564
return true;
2565
}
2566
} else if (UnwrapShl(Addr, Index, Scale)) {
2567
EVT VT = Addr.getValueType();
2568
Base = CurDAG->getRegister(RISCV::X0, VT);
2569
return true;
2570
}
2571
2572
return false;
2573
}
2574
2575
bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2576
SDValue &Offset, bool IsINX) {
2577
if (SelectAddrFrameIndex(Addr, Base, Offset))
2578
return true;
2579
2580
SDLoc DL(Addr);
2581
MVT VT = Addr.getSimpleValueType();
2582
2583
if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2584
Base = Addr.getOperand(0);
2585
Offset = Addr.getOperand(1);
2586
return true;
2587
}
2588
2589
int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2590
if (CurDAG->isBaseWithConstantOffset(Addr)) {
2591
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2592
if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2593
Base = Addr.getOperand(0);
2594
if (Base.getOpcode() == RISCVISD::ADD_LO) {
2595
SDValue LoOperand = Base.getOperand(1);
2596
if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2597
// If the Lo in (ADD_LO hi, lo) is a global variable's address
2598
// (its low part, really), then we can rely on the alignment of that
2599
// variable to provide a margin of safety before low part can overflow
2600
// the 12 bits of the load/store offset. Check if CVal falls within
2601
// that margin; if so (low part + CVal) can't overflow.
2602
const DataLayout &DL = CurDAG->getDataLayout();
2603
Align Alignment = commonAlignment(
2604
GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2605
if (CVal == 0 || Alignment > CVal) {
2606
int64_t CombinedOffset = CVal + GA->getOffset();
2607
Base = Base.getOperand(0);
2608
Offset = CurDAG->getTargetGlobalAddress(
2609
GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2610
CombinedOffset, GA->getTargetFlags());
2611
return true;
2612
}
2613
}
2614
}
2615
2616
if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2617
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2618
Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2619
return true;
2620
}
2621
}
2622
2623
// Handle ADD with large immediates.
2624
if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2625
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2626
assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2627
"simm12 not already handled?");
2628
2629
// Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2630
// an ADDI for part of the offset and fold the rest into the load/store.
2631
// This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2632
if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2633
int64_t Adj = CVal < 0 ? -2048 : 2047;
2634
Base = SDValue(
2635
CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2636
CurDAG->getTargetConstant(Adj, DL, VT)),
2637
0);
2638
Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2639
return true;
2640
}
2641
2642
// For larger immediates, we might be able to save one instruction from
2643
// constant materialization by folding the Lo12 bits of the immediate into
2644
// the address. We should only do this if the ADD is only used by loads and
2645
// stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2646
// separately with the full materialized immediate creating extra
2647
// instructions.
2648
if (isWorthFoldingAdd(Addr) &&
2649
selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2650
Offset)) {
2651
// Insert an ADD instruction with the materialized Hi52 bits.
2652
Base = SDValue(
2653
CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2654
0);
2655
return true;
2656
}
2657
}
2658
2659
if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2660
return true;
2661
2662
Base = Addr;
2663
Offset = CurDAG->getTargetConstant(0, DL, VT);
2664
return true;
2665
}
2666
2667
/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2668
/// Offset shoule be all zeros.
2669
bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2670
SDValue &Offset) {
2671
if (SelectAddrFrameIndex(Addr, Base, Offset))
2672
return true;
2673
2674
SDLoc DL(Addr);
2675
MVT VT = Addr.getSimpleValueType();
2676
2677
if (CurDAG->isBaseWithConstantOffset(Addr)) {
2678
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2679
if (isInt<12>(CVal)) {
2680
Base = Addr.getOperand(0);
2681
2682
// Early-out if not a valid offset.
2683
if ((CVal & 0b11111) != 0) {
2684
Base = Addr;
2685
Offset = CurDAG->getTargetConstant(0, DL, VT);
2686
return true;
2687
}
2688
2689
if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2690
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2691
Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2692
return true;
2693
}
2694
}
2695
2696
// Handle ADD with large immediates.
2697
if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2698
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2699
assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2700
"simm12 not already handled?");
2701
2702
// Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2703
// one instruction by folding adjustment (-2048 or 2016) into the address.
2704
if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2705
int64_t Adj = CVal < 0 ? -2048 : 2016;
2706
int64_t AdjustedOffset = CVal - Adj;
2707
Base = SDValue(CurDAG->getMachineNode(
2708
RISCV::ADDI, DL, VT, Addr.getOperand(0),
2709
CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2710
0);
2711
Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2712
return true;
2713
}
2714
2715
if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2716
Offset, true)) {
2717
// Insert an ADD instruction with the materialized Hi52 bits.
2718
Base = SDValue(
2719
CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2720
0);
2721
return true;
2722
}
2723
}
2724
2725
if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2726
return true;
2727
2728
Base = Addr;
2729
Offset = CurDAG->getTargetConstant(0, DL, VT);
2730
return true;
2731
}
2732
2733
bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
2734
SDValue &Offset) {
2735
if (Addr.getOpcode() != ISD::ADD)
2736
return false;
2737
2738
if (isa<ConstantSDNode>(Addr.getOperand(1)))
2739
return false;
2740
2741
Base = Addr.getOperand(1);
2742
Offset = Addr.getOperand(0);
2743
return true;
2744
}
2745
2746
bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
2747
SDValue &ShAmt) {
2748
ShAmt = N;
2749
2750
// Peek through zext.
2751
if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2752
ShAmt = ShAmt.getOperand(0);
2753
2754
// Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2755
// amount. If there is an AND on the shift amount, we can bypass it if it
2756
// doesn't affect any of those bits.
2757
if (ShAmt.getOpcode() == ISD::AND &&
2758
isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2759
const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2760
2761
// Since the max shift amount is a power of 2 we can subtract 1 to make a
2762
// mask that covers the bits needed to represent all shift amounts.
2763
assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2764
APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2765
2766
if (ShMask.isSubsetOf(AndMask)) {
2767
ShAmt = ShAmt.getOperand(0);
2768
} else {
2769
// SimplifyDemandedBits may have optimized the mask so try restoring any
2770
// bits that are known zero.
2771
KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2772
if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2773
return true;
2774
ShAmt = ShAmt.getOperand(0);
2775
}
2776
}
2777
2778
if (ShAmt.getOpcode() == ISD::ADD &&
2779
isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2780
uint64_t Imm = ShAmt.getConstantOperandVal(1);
2781
// If we are shifting by X+N where N == 0 mod Size, then just shift by X
2782
// to avoid the ADD.
2783
if (Imm != 0 && Imm % ShiftWidth == 0) {
2784
ShAmt = ShAmt.getOperand(0);
2785
return true;
2786
}
2787
} else if (ShAmt.getOpcode() == ISD::SUB &&
2788
isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2789
uint64_t Imm = ShAmt.getConstantOperandVal(0);
2790
// If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2791
// generate a NEG instead of a SUB of a constant.
2792
if (Imm != 0 && Imm % ShiftWidth == 0) {
2793
SDLoc DL(ShAmt);
2794
EVT VT = ShAmt.getValueType();
2795
SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2796
unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2797
MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2798
ShAmt.getOperand(1));
2799
ShAmt = SDValue(Neg, 0);
2800
return true;
2801
}
2802
// If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2803
// to generate a NOT instead of a SUB of a constant.
2804
if (Imm % ShiftWidth == ShiftWidth - 1) {
2805
SDLoc DL(ShAmt);
2806
EVT VT = ShAmt.getValueType();
2807
MachineSDNode *Not =
2808
CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2809
CurDAG->getTargetConstant(-1, DL, VT));
2810
ShAmt = SDValue(Not, 0);
2811
return true;
2812
}
2813
}
2814
2815
return true;
2816
}
2817
2818
/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2819
/// check for equality with 0. This function emits instructions that convert the
2820
/// seteq/setne into something that can be compared with 0.
2821
/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2822
/// ISD::SETNE).
2823
bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
2824
SDValue &Val) {
2825
assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2826
"Unexpected condition code!");
2827
2828
// We're looking for a setcc.
2829
if (N->getOpcode() != ISD::SETCC)
2830
return false;
2831
2832
// Must be an equality comparison.
2833
ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2834
if (CCVal != ExpectedCCVal)
2835
return false;
2836
2837
SDValue LHS = N->getOperand(0);
2838
SDValue RHS = N->getOperand(1);
2839
2840
if (!LHS.getValueType().isScalarInteger())
2841
return false;
2842
2843
// If the RHS side is 0, we don't need any extra instructions, return the LHS.
2844
if (isNullConstant(RHS)) {
2845
Val = LHS;
2846
return true;
2847
}
2848
2849
SDLoc DL(N);
2850
2851
if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2852
int64_t CVal = C->getSExtValue();
2853
// If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2854
// non-zero otherwise.
2855
if (CVal == -2048) {
2856
Val =
2857
SDValue(CurDAG->getMachineNode(
2858
RISCV::XORI, DL, N->getValueType(0), LHS,
2859
CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2860
0);
2861
return true;
2862
}
2863
// If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2864
// LHS is equal to the RHS and non-zero otherwise.
2865
if (isInt<12>(CVal) || CVal == 2048) {
2866
Val =
2867
SDValue(CurDAG->getMachineNode(
2868
RISCV::ADDI, DL, N->getValueType(0), LHS,
2869
CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2870
0);
2871
return true;
2872
}
2873
}
2874
2875
// If nothing else we can XOR the LHS and RHS to produce zero if they are
2876
// equal and a non-zero value if they aren't.
2877
Val = SDValue(
2878
CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2879
return true;
2880
}
2881
2882
bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2883
if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2884
cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2885
Val = N.getOperand(0);
2886
return true;
2887
}
2888
2889
auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2890
if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2891
return N;
2892
2893
SDValue N0 = N.getOperand(0);
2894
if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2895
N.getConstantOperandVal(1) == ShiftAmt &&
2896
N0.getConstantOperandVal(1) == ShiftAmt)
2897
return N0.getOperand(0);
2898
2899
return N;
2900
};
2901
2902
MVT VT = N.getSimpleValueType();
2903
if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2904
Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2905
return true;
2906
}
2907
2908
return false;
2909
}
2910
2911
bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2912
if (N.getOpcode() == ISD::AND) {
2913
auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2914
if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2915
Val = N.getOperand(0);
2916
return true;
2917
}
2918
}
2919
MVT VT = N.getSimpleValueType();
2920
APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2921
if (CurDAG->MaskedValueIsZero(N, Mask)) {
2922
Val = N;
2923
return true;
2924
}
2925
2926
return false;
2927
}
2928
2929
/// Look for various patterns that can be done with a SHL that can be folded
2930
/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2931
/// SHXADD we are trying to match.
2932
bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
2933
SDValue &Val) {
2934
if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2935
SDValue N0 = N.getOperand(0);
2936
2937
bool LeftShift = N0.getOpcode() == ISD::SHL;
2938
if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2939
isa<ConstantSDNode>(N0.getOperand(1))) {
2940
uint64_t Mask = N.getConstantOperandVal(1);
2941
unsigned C2 = N0.getConstantOperandVal(1);
2942
2943
unsigned XLen = Subtarget->getXLen();
2944
if (LeftShift)
2945
Mask &= maskTrailingZeros<uint64_t>(C2);
2946
else
2947
Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2948
2949
// Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2950
// leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2951
// followed by a SHXADD with c3 for the X amount.
2952
if (isShiftedMask_64(Mask)) {
2953
unsigned Leading = XLen - llvm::bit_width(Mask);
2954
unsigned Trailing = llvm::countr_zero(Mask);
2955
if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2956
SDLoc DL(N);
2957
EVT VT = N.getValueType();
2958
Val = SDValue(CurDAG->getMachineNode(
2959
RISCV::SRLI, DL, VT, N0.getOperand(0),
2960
CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2961
0);
2962
return true;
2963
}
2964
// Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2965
// leading zeros and c3 trailing zeros. We can use an SRLI by C3
2966
// followed by a SHXADD using c3 for the X amount.
2967
if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2968
SDLoc DL(N);
2969
EVT VT = N.getValueType();
2970
Val = SDValue(
2971
CurDAG->getMachineNode(
2972
RISCV::SRLI, DL, VT, N0.getOperand(0),
2973
CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2974
0);
2975
return true;
2976
}
2977
}
2978
}
2979
}
2980
2981
bool LeftShift = N.getOpcode() == ISD::SHL;
2982
if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2983
isa<ConstantSDNode>(N.getOperand(1))) {
2984
SDValue N0 = N.getOperand(0);
2985
if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2986
isa<ConstantSDNode>(N0.getOperand(1))) {
2987
uint64_t Mask = N0.getConstantOperandVal(1);
2988
if (isShiftedMask_64(Mask)) {
2989
unsigned C1 = N.getConstantOperandVal(1);
2990
unsigned XLen = Subtarget->getXLen();
2991
unsigned Leading = XLen - llvm::bit_width(Mask);
2992
unsigned Trailing = llvm::countr_zero(Mask);
2993
// Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2994
// C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2995
if (LeftShift && Leading == 32 && Trailing > 0 &&
2996
(Trailing + C1) == ShAmt) {
2997
SDLoc DL(N);
2998
EVT VT = N.getValueType();
2999
Val = SDValue(CurDAG->getMachineNode(
3000
RISCV::SRLIW, DL, VT, N0.getOperand(0),
3001
CurDAG->getTargetConstant(Trailing, DL, VT)),
3002
0);
3003
return true;
3004
}
3005
// Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3006
// C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3007
if (!LeftShift && Leading == 32 && Trailing > C1 &&
3008
(Trailing - C1) == ShAmt) {
3009
SDLoc DL(N);
3010
EVT VT = N.getValueType();
3011
Val = SDValue(CurDAG->getMachineNode(
3012
RISCV::SRLIW, DL, VT, N0.getOperand(0),
3013
CurDAG->getTargetConstant(Trailing, DL, VT)),
3014
0);
3015
return true;
3016
}
3017
}
3018
}
3019
}
3020
3021
return false;
3022
}
3023
3024
/// Look for various patterns that can be done with a SHL that can be folded
3025
/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3026
/// SHXADD_UW we are trying to match.
3027
bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3028
SDValue &Val) {
3029
if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3030
N.hasOneUse()) {
3031
SDValue N0 = N.getOperand(0);
3032
if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3033
N0.hasOneUse()) {
3034
uint64_t Mask = N.getConstantOperandVal(1);
3035
unsigned C2 = N0.getConstantOperandVal(1);
3036
3037
Mask &= maskTrailingZeros<uint64_t>(C2);
3038
3039
// Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3040
// 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3041
// c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3042
if (isShiftedMask_64(Mask)) {
3043
unsigned Leading = llvm::countl_zero(Mask);
3044
unsigned Trailing = llvm::countr_zero(Mask);
3045
if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3046
SDLoc DL(N);
3047
EVT VT = N.getValueType();
3048
Val = SDValue(CurDAG->getMachineNode(
3049
RISCV::SLLI, DL, VT, N0.getOperand(0),
3050
CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3051
0);
3052
return true;
3053
}
3054
}
3055
}
3056
}
3057
3058
return false;
3059
}
3060
3061
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3062
unsigned Bits,
3063
const TargetInstrInfo *TII) {
3064
unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3065
3066
if (!MCOpcode)
3067
return false;
3068
3069
const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3070
const uint64_t TSFlags = MCID.TSFlags;
3071
if (!RISCVII::hasSEWOp(TSFlags))
3072
return false;
3073
assert(RISCVII::hasVLOp(TSFlags));
3074
3075
bool HasGlueOp = User->getGluedNode() != nullptr;
3076
unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3077
bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3078
bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3079
unsigned VLIdx =
3080
User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3081
const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3082
3083
if (UserOpNo == VLIdx)
3084
return false;
3085
3086
auto NumDemandedBits =
3087
RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3088
return NumDemandedBits && Bits >= *NumDemandedBits;
3089
}
3090
3091
// Return true if all users of this SDNode* only consume the lower \p Bits.
3092
// This can be used to form W instructions for add/sub/mul/shl even when the
3093
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3094
// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3095
// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3096
// the add/sub/mul/shl to become non-W instructions. By checking the users we
3097
// may be able to use a W instruction and CSE with the other instruction if
3098
// this has happened. We could try to detect that the CSE opportunity exists
3099
// before doing this, but that would be more complicated.
3100
bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
3101
const unsigned Depth) const {
3102
assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3103
Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3104
Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3105
Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3106
Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3107
isa<ConstantSDNode>(Node) || Depth != 0) &&
3108
"Unexpected opcode");
3109
3110
if (Depth >= SelectionDAG::MaxRecursionDepth)
3111
return false;
3112
3113
// The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3114
// the VT. Ensure the type is scalar to avoid wasting time on vectors.
3115
if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3116
return false;
3117
3118
for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3119
SDNode *User = *UI;
3120
// Users of this node should have already been instruction selected
3121
if (!User->isMachineOpcode())
3122
return false;
3123
3124
// TODO: Add more opcodes?
3125
switch (User->getMachineOpcode()) {
3126
default:
3127
if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3128
break;
3129
return false;
3130
case RISCV::ADDW:
3131
case RISCV::ADDIW:
3132
case RISCV::SUBW:
3133
case RISCV::MULW:
3134
case RISCV::SLLW:
3135
case RISCV::SLLIW:
3136
case RISCV::SRAW:
3137
case RISCV::SRAIW:
3138
case RISCV::SRLW:
3139
case RISCV::SRLIW:
3140
case RISCV::DIVW:
3141
case RISCV::DIVUW:
3142
case RISCV::REMW:
3143
case RISCV::REMUW:
3144
case RISCV::ROLW:
3145
case RISCV::RORW:
3146
case RISCV::RORIW:
3147
case RISCV::CLZW:
3148
case RISCV::CTZW:
3149
case RISCV::CPOPW:
3150
case RISCV::SLLI_UW:
3151
case RISCV::FMV_W_X:
3152
case RISCV::FCVT_H_W:
3153
case RISCV::FCVT_H_WU:
3154
case RISCV::FCVT_S_W:
3155
case RISCV::FCVT_S_WU:
3156
case RISCV::FCVT_D_W:
3157
case RISCV::FCVT_D_WU:
3158
case RISCV::TH_REVW:
3159
case RISCV::TH_SRRIW:
3160
if (Bits < 32)
3161
return false;
3162
break;
3163
case RISCV::SLL:
3164
case RISCV::SRA:
3165
case RISCV::SRL:
3166
case RISCV::ROL:
3167
case RISCV::ROR:
3168
case RISCV::BSET:
3169
case RISCV::BCLR:
3170
case RISCV::BINV:
3171
// Shift amount operands only use log2(Xlen) bits.
3172
if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3173
return false;
3174
break;
3175
case RISCV::SLLI:
3176
// SLLI only uses the lower (XLen - ShAmt) bits.
3177
if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3178
return false;
3179
break;
3180
case RISCV::ANDI:
3181
if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3182
break;
3183
goto RecCheck;
3184
case RISCV::ORI: {
3185
uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3186
if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3187
break;
3188
[[fallthrough]];
3189
}
3190
case RISCV::AND:
3191
case RISCV::OR:
3192
case RISCV::XOR:
3193
case RISCV::XORI:
3194
case RISCV::ANDN:
3195
case RISCV::ORN:
3196
case RISCV::XNOR:
3197
case RISCV::SH1ADD:
3198
case RISCV::SH2ADD:
3199
case RISCV::SH3ADD:
3200
RecCheck:
3201
if (hasAllNBitUsers(User, Bits, Depth + 1))
3202
break;
3203
return false;
3204
case RISCV::SRLI: {
3205
unsigned ShAmt = User->getConstantOperandVal(1);
3206
// If we are shifting right by less than Bits, and users don't demand any
3207
// bits that were shifted into [Bits-1:0], then we can consider this as an
3208
// N-Bit user.
3209
if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3210
break;
3211
return false;
3212
}
3213
case RISCV::SEXT_B:
3214
case RISCV::PACKH:
3215
if (Bits < 8)
3216
return false;
3217
break;
3218
case RISCV::SEXT_H:
3219
case RISCV::FMV_H_X:
3220
case RISCV::ZEXT_H_RV32:
3221
case RISCV::ZEXT_H_RV64:
3222
case RISCV::PACKW:
3223
if (Bits < 16)
3224
return false;
3225
break;
3226
case RISCV::PACK:
3227
if (Bits < (Subtarget->getXLen() / 2))
3228
return false;
3229
break;
3230
case RISCV::ADD_UW:
3231
case RISCV::SH1ADD_UW:
3232
case RISCV::SH2ADD_UW:
3233
case RISCV::SH3ADD_UW:
3234
// The first operand to add.uw/shXadd.uw is implicitly zero extended from
3235
// 32 bits.
3236
if (UI.getOperandNo() != 0 || Bits < 32)
3237
return false;
3238
break;
3239
case RISCV::SB:
3240
if (UI.getOperandNo() != 0 || Bits < 8)
3241
return false;
3242
break;
3243
case RISCV::SH:
3244
if (UI.getOperandNo() != 0 || Bits < 16)
3245
return false;
3246
break;
3247
case RISCV::SW:
3248
if (UI.getOperandNo() != 0 || Bits < 32)
3249
return false;
3250
break;
3251
}
3252
}
3253
3254
return true;
3255
}
3256
3257
// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3258
bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3259
SDValue &Shl2) {
3260
if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3261
int64_t Offset = C->getSExtValue();
3262
int64_t Shift;
3263
for (Shift = 0; Shift < 4; Shift++)
3264
if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3265
break;
3266
3267
// Constant cannot be encoded.
3268
if (Shift == 4)
3269
return false;
3270
3271
EVT Ty = N->getValueType(0);
3272
Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3273
Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3274
return true;
3275
}
3276
3277
return false;
3278
}
3279
3280
// Select VL as a 5 bit immediate or a value that will become a register. This
3281
// allows us to choose betwen VSETIVLI or VSETVLI later.
3282
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3283
auto *C = dyn_cast<ConstantSDNode>(N);
3284
if (C && isUInt<5>(C->getZExtValue())) {
3285
VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3286
N->getValueType(0));
3287
} else if (C && C->isAllOnes()) {
3288
// Treat all ones as VLMax.
3289
VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3290
N->getValueType(0));
3291
} else if (isa<RegisterSDNode>(N) &&
3292
cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3293
// All our VL operands use an operand that allows GPRNoX0 or an immediate
3294
// as the register class. Convert X0 to a special immediate to pass the
3295
// MachineVerifier. This is recognized specially by the vsetvli insertion
3296
// pass.
3297
VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3298
N->getValueType(0));
3299
} else {
3300
VL = N;
3301
}
3302
3303
return true;
3304
}
3305
3306
static SDValue findVSplat(SDValue N) {
3307
if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3308
if (!N.getOperand(0).isUndef())
3309
return SDValue();
3310
N = N.getOperand(1);
3311
}
3312
SDValue Splat = N;
3313
if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3314
Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3315
!Splat.getOperand(0).isUndef())
3316
return SDValue();
3317
assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3318
return Splat;
3319
}
3320
3321
bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3322
SDValue Splat = findVSplat(N);
3323
if (!Splat)
3324
return false;
3325
3326
SplatVal = Splat.getOperand(1);
3327
return true;
3328
}
3329
3330
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3331
SelectionDAG &DAG,
3332
const RISCVSubtarget &Subtarget,
3333
std::function<bool(int64_t)> ValidateImm) {
3334
SDValue Splat = findVSplat(N);
3335
if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3336
return false;
3337
3338
const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3339
assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3340
"Unexpected splat operand type");
3341
3342
// The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3343
// type is wider than the resulting vector element type: an implicit
3344
// truncation first takes place. Therefore, perform a manual
3345
// truncation/sign-extension in order to ignore any truncated bits and catch
3346
// any zero-extended immediate.
3347
// For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3348
// sign-extending to (XLenVT -1).
3349
APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3350
3351
int64_t SplatImm = SplatConst.getSExtValue();
3352
3353
if (!ValidateImm(SplatImm))
3354
return false;
3355
3356
SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3357
return true;
3358
}
3359
3360
bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3361
return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3362
[](int64_t Imm) { return isInt<5>(Imm); });
3363
}
3364
3365
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3366
return selectVSplatImmHelper(
3367
N, SplatVal, *CurDAG, *Subtarget,
3368
[](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3369
}
3370
3371
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3372
SDValue &SplatVal) {
3373
return selectVSplatImmHelper(
3374
N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3375
return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3376
});
3377
}
3378
3379
bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3380
SDValue &SplatVal) {
3381
return selectVSplatImmHelper(
3382
N, SplatVal, *CurDAG, *Subtarget,
3383
[Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3384
}
3385
3386
bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3387
auto IsExtOrTrunc = [](SDValue N) {
3388
switch (N->getOpcode()) {
3389
case ISD::SIGN_EXTEND:
3390
case ISD::ZERO_EXTEND:
3391
// There's no passthru on these _VL nodes so any VL/mask is ok, since any
3392
// inactive elements will be undef.
3393
case RISCVISD::TRUNCATE_VECTOR_VL:
3394
case RISCVISD::VSEXT_VL:
3395
case RISCVISD::VZEXT_VL:
3396
return true;
3397
default:
3398
return false;
3399
}
3400
};
3401
3402
// We can have multiple nested nodes, so unravel them all if needed.
3403
while (IsExtOrTrunc(N)) {
3404
if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3405
return false;
3406
N = N->getOperand(0);
3407
}
3408
3409
return selectVSplat(N, SplatVal);
3410
}
3411
3412
bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
3413
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3414
if (!CFP)
3415
return false;
3416
const APFloat &APF = CFP->getValueAPF();
3417
// td can handle +0.0 already.
3418
if (APF.isPosZero())
3419
return false;
3420
3421
MVT VT = CFP->getSimpleValueType(0);
3422
3423
// Even if this FPImm requires an additional FNEG (i.e. the second element of
3424
// the returned pair is true) we still prefer FLI + FNEG over immediate
3425
// materialization as the latter might generate a longer instruction sequence.
3426
if (static_cast<const RISCVTargetLowering *>(TLI)
3427
->getLegalZfaFPImm(APF, VT)
3428
.first >= 0)
3429
return false;
3430
3431
MVT XLenVT = Subtarget->getXLenVT();
3432
if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3433
assert(APF.isNegZero() && "Unexpected constant.");
3434
return false;
3435
}
3436
SDLoc DL(N);
3437
Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3438
*Subtarget);
3439
return true;
3440
}
3441
3442
bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3443
SDValue &Imm) {
3444
if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3445
int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3446
3447
if (!isInt<5>(ImmVal))
3448
return false;
3449
3450
Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3451
return true;
3452
}
3453
3454
return false;
3455
}
3456
3457
// Try to remove sext.w if the input is a W instruction or can be made into
3458
// a W instruction cheaply.
3459
bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3460
// Look for the sext.w pattern, addiw rd, rs1, 0.
3461
if (N->getMachineOpcode() != RISCV::ADDIW ||
3462
!isNullConstant(N->getOperand(1)))
3463
return false;
3464
3465
SDValue N0 = N->getOperand(0);
3466
if (!N0.isMachineOpcode())
3467
return false;
3468
3469
switch (N0.getMachineOpcode()) {
3470
default:
3471
break;
3472
case RISCV::ADD:
3473
case RISCV::ADDI:
3474
case RISCV::SUB:
3475
case RISCV::MUL:
3476
case RISCV::SLLI: {
3477
// Convert sext.w+add/sub/mul to their W instructions. This will create
3478
// a new independent instruction. This improves latency.
3479
unsigned Opc;
3480
switch (N0.getMachineOpcode()) {
3481
default:
3482
llvm_unreachable("Unexpected opcode!");
3483
case RISCV::ADD: Opc = RISCV::ADDW; break;
3484
case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3485
case RISCV::SUB: Opc = RISCV::SUBW; break;
3486
case RISCV::MUL: Opc = RISCV::MULW; break;
3487
case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3488
}
3489
3490
SDValue N00 = N0.getOperand(0);
3491
SDValue N01 = N0.getOperand(1);
3492
3493
// Shift amount needs to be uimm5.
3494
if (N0.getMachineOpcode() == RISCV::SLLI &&
3495
!isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3496
break;
3497
3498
SDNode *Result =
3499
CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3500
N00, N01);
3501
ReplaceUses(N, Result);
3502
return true;
3503
}
3504
case RISCV::ADDW:
3505
case RISCV::ADDIW:
3506
case RISCV::SUBW:
3507
case RISCV::MULW:
3508
case RISCV::SLLIW:
3509
case RISCV::PACKW:
3510
case RISCV::TH_MULAW:
3511
case RISCV::TH_MULAH:
3512
case RISCV::TH_MULSW:
3513
case RISCV::TH_MULSH:
3514
if (N0.getValueType() == MVT::i32)
3515
break;
3516
3517
// Result is already sign extended just remove the sext.w.
3518
// NOTE: We only handle the nodes that are selected with hasAllWUsers.
3519
ReplaceUses(N, N0.getNode());
3520
return true;
3521
}
3522
3523
return false;
3524
}
3525
3526
// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3527
// that's glued to the pseudo. This tries to look up the value that was copied
3528
// to V0.
3529
static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3530
// Check that we're using V0 as a mask register.
3531
if (!isa<RegisterSDNode>(MaskOp) ||
3532
cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3533
return SDValue();
3534
3535
// The glued user defines V0.
3536
const auto *Glued = GlueOp.getNode();
3537
3538
if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3539
return SDValue();
3540
3541
// Check that we're defining V0 as a mask register.
3542
if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3543
cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3544
return SDValue();
3545
3546
SDValue MaskSetter = Glued->getOperand(2);
3547
3548
// Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3549
// from an extract_subvector or insert_subvector.
3550
if (MaskSetter->isMachineOpcode() &&
3551
MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3552
MaskSetter = MaskSetter->getOperand(0);
3553
3554
return MaskSetter;
3555
}
3556
3557
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3558
// Check the instruction defining V0; it needs to be a VMSET pseudo.
3559
SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3560
if (!MaskSetter)
3561
return false;
3562
3563
const auto IsVMSet = [](unsigned Opc) {
3564
return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3565
Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3566
Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3567
Opc == RISCV::PseudoVMSET_M_B8;
3568
};
3569
3570
// TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3571
// undefined behaviour if it's the wrong bitwidth, so we could choose to
3572
// assume that it's all-ones? Same applies to its VL.
3573
return MaskSetter->isMachineOpcode() &&
3574
IsVMSet(MaskSetter.getMachineOpcode());
3575
}
3576
3577
// Return true if we can make sure mask of N is all-ones mask.
3578
static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3579
return usesAllOnesMask(N->getOperand(MaskOpIdx),
3580
N->getOperand(N->getNumOperands() - 1));
3581
}
3582
3583
static bool isImplicitDef(SDValue V) {
3584
if (!V.isMachineOpcode())
3585
return false;
3586
if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3587
for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3588
if (!isImplicitDef(V.getOperand(I)))
3589
return false;
3590
return true;
3591
}
3592
return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3593
}
3594
3595
// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3596
// corresponding "unmasked" pseudo versions. The mask we're interested in will
3597
// take the form of a V0 physical register operand, with a glued
3598
// register-setting instruction.
3599
bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3600
const RISCV::RISCVMaskedPseudoInfo *I =
3601
RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3602
if (!I)
3603
return false;
3604
3605
unsigned MaskOpIdx = I->MaskOpIdx;
3606
if (!usesAllOnesMask(N, MaskOpIdx))
3607
return false;
3608
3609
// There are two classes of pseudos in the table - compares and
3610
// everything else. See the comment on RISCVMaskedPseudo for details.
3611
const unsigned Opc = I->UnmaskedPseudo;
3612
const MCInstrDesc &MCID = TII->get(Opc);
3613
const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3614
#ifndef NDEBUG
3615
const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3616
assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
3617
RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
3618
"Masked and unmasked pseudos are inconsistent");
3619
const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3620
assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3621
#endif
3622
3623
SmallVector<SDValue, 8> Ops;
3624
// Skip the merge operand at index 0 if !UseTUPseudo.
3625
for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3626
// Skip the mask, and the Glue.
3627
SDValue Op = N->getOperand(I);
3628
if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3629
continue;
3630
Ops.push_back(Op);
3631
}
3632
3633
// Transitively apply any node glued to our new node.
3634
const auto *Glued = N->getGluedNode();
3635
if (auto *TGlued = Glued->getGluedNode())
3636
Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3637
3638
MachineSDNode *Result =
3639
CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3640
3641
if (!N->memoperands_empty())
3642
CurDAG->setNodeMemRefs(Result, N->memoperands());
3643
3644
Result->setFlags(N->getFlags());
3645
ReplaceUses(N, Result);
3646
3647
return true;
3648
}
3649
3650
static bool IsVMerge(SDNode *N) {
3651
return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3652
}
3653
3654
static bool IsVMv(SDNode *N) {
3655
return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3656
}
3657
3658
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3659
switch (LMUL) {
3660
case RISCVII::LMUL_F8:
3661
return RISCV::PseudoVMSET_M_B1;
3662
case RISCVII::LMUL_F4:
3663
return RISCV::PseudoVMSET_M_B2;
3664
case RISCVII::LMUL_F2:
3665
return RISCV::PseudoVMSET_M_B4;
3666
case RISCVII::LMUL_1:
3667
return RISCV::PseudoVMSET_M_B8;
3668
case RISCVII::LMUL_2:
3669
return RISCV::PseudoVMSET_M_B16;
3670
case RISCVII::LMUL_4:
3671
return RISCV::PseudoVMSET_M_B32;
3672
case RISCVII::LMUL_8:
3673
return RISCV::PseudoVMSET_M_B64;
3674
case RISCVII::LMUL_RESERVED:
3675
llvm_unreachable("Unexpected LMUL");
3676
}
3677
llvm_unreachable("Unknown VLMUL enum");
3678
}
3679
3680
// Try to fold away VMERGE_VVM instructions into their true operands:
3681
//
3682
// %true = PseudoVADD_VV ...
3683
// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3684
// ->
3685
// %x = PseudoVADD_VV_MASK %false, ..., %mask
3686
//
3687
// We can only fold if vmerge's merge operand, vmerge's false operand and
3688
// %true's merge operand (if it has one) are the same. This is because we have
3689
// to consolidate them into one merge operand in the result.
3690
//
3691
// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3692
// mask is all ones.
3693
//
3694
// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3695
// VMERGE_VVM with an all ones mask.
3696
//
3697
// The resulting VL is the minimum of the two VLs.
3698
//
3699
// The resulting policy is the effective policy the vmerge would have had,
3700
// i.e. whether or not it's merge operand was implicit-def.
3701
bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3702
SDValue Merge, False, True, VL, Mask, Glue;
3703
// A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3704
if (IsVMv(N)) {
3705
Merge = N->getOperand(0);
3706
False = N->getOperand(0);
3707
True = N->getOperand(1);
3708
VL = N->getOperand(2);
3709
// A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3710
// mask later below.
3711
} else {
3712
assert(IsVMerge(N));
3713
Merge = N->getOperand(0);
3714
False = N->getOperand(1);
3715
True = N->getOperand(2);
3716
Mask = N->getOperand(3);
3717
VL = N->getOperand(4);
3718
// We always have a glue node for the mask at v0.
3719
Glue = N->getOperand(N->getNumOperands() - 1);
3720
}
3721
assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3722
assert(!Glue || Glue.getValueType() == MVT::Glue);
3723
3724
// If the EEW of True is different from vmerge's SEW, then we can't fold.
3725
if (True.getSimpleValueType() != N->getSimpleValueType(0))
3726
return false;
3727
3728
// We require that either merge and false are the same, or that merge
3729
// is undefined.
3730
if (Merge != False && !isImplicitDef(Merge))
3731
return false;
3732
3733
assert(True.getResNo() == 0 &&
3734
"Expect True is the first output of an instruction.");
3735
3736
// Need N is the exactly one using True.
3737
if (!True.hasOneUse())
3738
return false;
3739
3740
if (!True.isMachineOpcode())
3741
return false;
3742
3743
unsigned TrueOpc = True.getMachineOpcode();
3744
const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3745
uint64_t TrueTSFlags = TrueMCID.TSFlags;
3746
bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3747
3748
bool IsMasked = false;
3749
const RISCV::RISCVMaskedPseudoInfo *Info =
3750
RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3751
if (!Info && HasTiedDest) {
3752
Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3753
IsMasked = true;
3754
}
3755
assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");
3756
3757
if (!Info)
3758
return false;
3759
3760
// If True has a merge operand then it needs to be the same as vmerge's False,
3761
// since False will be used for the result's merge operand.
3762
if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3763
SDValue MergeOpTrue = True->getOperand(0);
3764
if (False != MergeOpTrue)
3765
return false;
3766
}
3767
3768
// If True is masked then the vmerge must have either the same mask or an all
3769
// 1s mask, since we're going to keep the mask from True.
3770
if (IsMasked && Mask) {
3771
// FIXME: Support mask agnostic True instruction which would have an
3772
// undef merge operand.
3773
SDValue TrueMask =
3774
getMaskSetter(True->getOperand(Info->MaskOpIdx),
3775
True->getOperand(True->getNumOperands() - 1));
3776
assert(TrueMask);
3777
if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
3778
return false;
3779
}
3780
3781
// Skip if True has side effect.
3782
if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3783
return false;
3784
3785
// The last operand of a masked instruction may be glued.
3786
bool HasGlueOp = True->getGluedNode() != nullptr;
3787
3788
// The chain operand may exist either before the glued operands or in the last
3789
// position.
3790
unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3791
bool HasChainOp =
3792
True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3793
3794
if (HasChainOp) {
3795
// Avoid creating cycles in the DAG. We must ensure that none of the other
3796
// operands depend on True through it's Chain.
3797
SmallVector<const SDNode *, 4> LoopWorklist;
3798
SmallPtrSet<const SDNode *, 16> Visited;
3799
LoopWorklist.push_back(False.getNode());
3800
if (Mask)
3801
LoopWorklist.push_back(Mask.getNode());
3802
LoopWorklist.push_back(VL.getNode());
3803
if (Glue)
3804
LoopWorklist.push_back(Glue.getNode());
3805
if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3806
return false;
3807
}
3808
3809
// The vector policy operand may be present for masked intrinsics
3810
bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3811
unsigned TrueVLIndex =
3812
True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3813
SDValue TrueVL = True.getOperand(TrueVLIndex);
3814
SDValue SEW = True.getOperand(TrueVLIndex + 1);
3815
3816
auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3817
if (LHS == RHS)
3818
return LHS;
3819
if (isAllOnesConstant(LHS))
3820
return RHS;
3821
if (isAllOnesConstant(RHS))
3822
return LHS;
3823
auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3824
auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3825
if (!CLHS || !CRHS)
3826
return SDValue();
3827
return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3828
};
3829
3830
// Because N and True must have the same merge operand (or True's operand is
3831
// implicit_def), the "effective" body is the minimum of their VLs.
3832
SDValue OrigVL = VL;
3833
VL = GetMinVL(TrueVL, VL);
3834
if (!VL)
3835
return false;
3836
3837
// Some operations produce different elementwise results depending on the
3838
// active elements, like viota.m or vredsum. This transformation is illegal
3839
// for these if we change the active elements (i.e. mask or VL).
3840
if (Info->ActiveElementsAffectResult) {
3841
if (Mask && !usesAllOnesMask(Mask, Glue))
3842
return false;
3843
if (TrueVL != VL)
3844
return false;
3845
}
3846
3847
// If we end up changing the VL or mask of True, then we need to make sure it
3848
// doesn't raise any observable fp exceptions, since changing the active
3849
// elements will affect how fflags is set.
3850
if (TrueVL != VL || !IsMasked)
3851
if (mayRaiseFPException(True.getNode()) &&
3852
!True->getFlags().hasNoFPExcept())
3853
return false;
3854
3855
SDLoc DL(N);
3856
3857
// From the preconditions we checked above, we know the mask and thus glue
3858
// for the result node will be taken from True.
3859
if (IsMasked) {
3860
Mask = True->getOperand(Info->MaskOpIdx);
3861
Glue = True->getOperand(True->getNumOperands() - 1);
3862
assert(Glue.getValueType() == MVT::Glue);
3863
}
3864
// If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3865
// an all-ones mask to use.
3866
else if (IsVMv(N)) {
3867
unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3868
unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3869
ElementCount EC = N->getValueType(0).getVectorElementCount();
3870
MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3871
3872
SDValue AllOnesMask =
3873
SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3874
SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
3875
RISCV::V0, AllOnesMask, SDValue());
3876
Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3877
Glue = MaskCopy.getValue(1);
3878
}
3879
3880
unsigned MaskedOpc = Info->MaskedPseudo;
3881
#ifndef NDEBUG
3882
const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3883
assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
3884
"Expected instructions with mask have policy operand.");
3885
assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3886
MCOI::TIED_TO) == 0 &&
3887
"Expected instructions with mask have a tied dest.");
3888
#endif
3889
3890
// Use a tumu policy, relaxing it to tail agnostic provided that the merge
3891
// operand is undefined.
3892
//
3893
// However, if the VL became smaller than what the vmerge had originally, then
3894
// elements past VL that were previously in the vmerge's body will have moved
3895
// to the tail. In that case we always need to use tail undisturbed to
3896
// preserve them.
3897
bool MergeVLShrunk = VL != OrigVL;
3898
uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3899
? RISCVII::TAIL_AGNOSTIC
3900
: /*TUMU*/ 0;
3901
SDValue PolicyOp =
3902
CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3903
3904
3905
SmallVector<SDValue, 8> Ops;
3906
Ops.push_back(False);
3907
3908
const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3909
const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3910
assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3911
Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3912
3913
Ops.push_back(Mask);
3914
3915
// For unmasked "VOp" with rounding mode operand, that is interfaces like
3916
// (..., rm, vl) or (..., rm, vl, policy).
3917
// Its masked version is (..., vm, rm, vl, policy).
3918
// Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3919
if (HasRoundingMode)
3920
Ops.push_back(True->getOperand(TrueVLIndex - 1));
3921
3922
Ops.append({VL, SEW, PolicyOp});
3923
3924
// Result node should have chain operand of True.
3925
if (HasChainOp)
3926
Ops.push_back(True.getOperand(TrueChainOpIdx));
3927
3928
// Add the glue for the CopyToReg of mask->v0.
3929
Ops.push_back(Glue);
3930
3931
MachineSDNode *Result =
3932
CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3933
Result->setFlags(True->getFlags());
3934
3935
if (!cast<MachineSDNode>(True)->memoperands_empty())
3936
CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3937
3938
// Replace vmerge.vvm node by Result.
3939
ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3940
3941
// Replace another value of True. E.g. chain and VL.
3942
for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3943
ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3944
3945
return true;
3946
}
3947
3948
bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3949
bool MadeChange = false;
3950
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3951
3952
while (Position != CurDAG->allnodes_begin()) {
3953
SDNode *N = &*--Position;
3954
if (N->use_empty() || !N->isMachineOpcode())
3955
continue;
3956
3957
if (IsVMerge(N) || IsVMv(N))
3958
MadeChange |= performCombineVMergeAndVOps(N);
3959
}
3960
return MadeChange;
3961
}
3962
3963
/// If our passthru is an implicit_def, use noreg instead. This side
3964
/// steps issues with MachineCSE not being able to CSE expressions with
3965
/// IMPLICIT_DEF operands while preserving the semantic intent. See
3966
/// pr64282 for context. Note that this transform is the last one
3967
/// performed at ISEL DAG to DAG.
3968
bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3969
bool MadeChange = false;
3970
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3971
3972
while (Position != CurDAG->allnodes_begin()) {
3973
SDNode *N = &*--Position;
3974
if (N->use_empty() || !N->isMachineOpcode())
3975
continue;
3976
3977
const unsigned Opc = N->getMachineOpcode();
3978
if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3979
!RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
3980
!isImplicitDef(N->getOperand(0)))
3981
continue;
3982
3983
SmallVector<SDValue> Ops;
3984
Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3985
for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3986
SDValue Op = N->getOperand(I);
3987
Ops.push_back(Op);
3988
}
3989
3990
MachineSDNode *Result =
3991
CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3992
Result->setFlags(N->getFlags());
3993
CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3994
ReplaceUses(N, Result);
3995
MadeChange = true;
3996
}
3997
return MadeChange;
3998
}
3999
4000
4001
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4002
// for instruction scheduling.
4003
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4004
CodeGenOptLevel OptLevel) {
4005
return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4006
}
4007
4008
char RISCVDAGToDAGISelLegacy::ID = 0;
4009
4010
RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4011
CodeGenOptLevel OptLevel)
4012
: SelectionDAGISelLegacy(
4013
ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4014
4015
INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4016
4017