Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
35269 views
1
//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This file contains the declarations of the Vectorization Plan base classes:
11
/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12
/// VPBlockBase, together implementing a Hierarchical CFG;
13
/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14
/// within VPBasicBlocks;
15
/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16
/// also inherit from VPValue.
17
/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18
/// instruction;
19
/// 5. The VPlan class holding a candidate for vectorization;
20
/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21
/// These are documented in docs/VectorizationPlan.rst.
22
//
23
//===----------------------------------------------------------------------===//
24
25
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28
#include "VPlanAnalysis.h"
29
#include "VPlanValue.h"
30
#include "llvm/ADT/DenseMap.h"
31
#include "llvm/ADT/MapVector.h"
32
#include "llvm/ADT/SmallBitVector.h"
33
#include "llvm/ADT/SmallPtrSet.h"
34
#include "llvm/ADT/SmallVector.h"
35
#include "llvm/ADT/Twine.h"
36
#include "llvm/ADT/ilist.h"
37
#include "llvm/ADT/ilist_node.h"
38
#include "llvm/Analysis/DomTreeUpdater.h"
39
#include "llvm/Analysis/IVDescriptors.h"
40
#include "llvm/Analysis/LoopInfo.h"
41
#include "llvm/Analysis/VectorUtils.h"
42
#include "llvm/IR/DebugLoc.h"
43
#include "llvm/IR/FMF.h"
44
#include "llvm/IR/Operator.h"
45
#include "llvm/Support/InstructionCost.h"
46
#include <algorithm>
47
#include <cassert>
48
#include <cstddef>
49
#include <string>
50
51
namespace llvm {
52
53
class BasicBlock;
54
class DominatorTree;
55
class InnerLoopVectorizer;
56
class IRBuilderBase;
57
class LoopInfo;
58
class raw_ostream;
59
class RecurrenceDescriptor;
60
class SCEV;
61
class Type;
62
class VPBasicBlock;
63
class VPRegionBlock;
64
class VPlan;
65
class VPReplicateRecipe;
66
class VPlanSlp;
67
class Value;
68
class LoopVectorizationCostModel;
69
class LoopVersioning;
70
71
struct VPCostContext;
72
73
namespace Intrinsic {
74
typedef unsigned ID;
75
}
76
77
/// Returns a calculation for the total number of elements for a given \p VF.
78
/// For fixed width vectors this value is a constant, whereas for scalable
79
/// vectors it is an expression determined at runtime.
80
Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
81
82
/// Return a value for Step multiplied by VF.
83
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84
int64_t Step);
85
86
const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
87
Loop *CurLoop = nullptr);
88
89
/// A helper function that returns the reciprocal of the block probability of
90
/// predicated blocks. If we return X, we are assuming the predicated block
91
/// will execute once for every X iterations of the loop header.
92
///
93
/// TODO: We should use actual block probability here, if available. Currently,
94
/// we always assume predicated blocks have a 50% chance of executing.
95
inline unsigned getReciprocalPredBlockProb() { return 2; }
96
97
/// A range of powers-of-2 vectorization factors with fixed start and
98
/// adjustable end. The range includes start and excludes end, e.g.,:
99
/// [1, 16) = {1, 2, 4, 8}
100
struct VFRange {
101
// A power of 2.
102
const ElementCount Start;
103
104
// A power of 2. If End <= Start range is empty.
105
ElementCount End;
106
107
bool isEmpty() const {
108
return End.getKnownMinValue() <= Start.getKnownMinValue();
109
}
110
111
VFRange(const ElementCount &Start, const ElementCount &End)
112
: Start(Start), End(End) {
113
assert(Start.isScalable() == End.isScalable() &&
114
"Both Start and End should have the same scalable flag");
115
assert(isPowerOf2_32(Start.getKnownMinValue()) &&
116
"Expected Start to be a power of 2");
117
assert(isPowerOf2_32(End.getKnownMinValue()) &&
118
"Expected End to be a power of 2");
119
}
120
121
/// Iterator to iterate over vectorization factors in a VFRange.
122
class iterator
123
: public iterator_facade_base<iterator, std::forward_iterator_tag,
124
ElementCount> {
125
ElementCount VF;
126
127
public:
128
iterator(ElementCount VF) : VF(VF) {}
129
130
bool operator==(const iterator &Other) const { return VF == Other.VF; }
131
132
ElementCount operator*() const { return VF; }
133
134
iterator &operator++() {
135
VF *= 2;
136
return *this;
137
}
138
};
139
140
iterator begin() { return iterator(Start); }
141
iterator end() {
142
assert(isPowerOf2_32(End.getKnownMinValue()));
143
return iterator(End);
144
}
145
};
146
147
using VPlanPtr = std::unique_ptr<VPlan>;
148
149
/// In what follows, the term "input IR" refers to code that is fed into the
150
/// vectorizer whereas the term "output IR" refers to code that is generated by
151
/// the vectorizer.
152
153
/// VPLane provides a way to access lanes in both fixed width and scalable
154
/// vectors, where for the latter the lane index sometimes needs calculating
155
/// as a runtime expression.
156
class VPLane {
157
public:
158
/// Kind describes how to interpret Lane.
159
enum class Kind : uint8_t {
160
/// For First, Lane is the index into the first N elements of a
161
/// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
162
First,
163
/// For ScalableLast, Lane is the offset from the start of the last
164
/// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
165
/// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
166
/// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
167
ScalableLast
168
};
169
170
private:
171
/// in [0..VF)
172
unsigned Lane;
173
174
/// Indicates how the Lane should be interpreted, as described above.
175
Kind LaneKind;
176
177
public:
178
VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
179
180
static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
181
182
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
183
assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
184
"trying to extract with invalid offset");
185
unsigned LaneOffset = VF.getKnownMinValue() - Offset;
186
Kind LaneKind;
187
if (VF.isScalable())
188
// In this case 'LaneOffset' refers to the offset from the start of the
189
// last subvector with VF.getKnownMinValue() elements.
190
LaneKind = VPLane::Kind::ScalableLast;
191
else
192
LaneKind = VPLane::Kind::First;
193
return VPLane(LaneOffset, LaneKind);
194
}
195
196
static VPLane getLastLaneForVF(const ElementCount &VF) {
197
return getLaneFromEnd(VF, 1);
198
}
199
200
/// Returns a compile-time known value for the lane index and asserts if the
201
/// lane can only be calculated at runtime.
202
unsigned getKnownLane() const {
203
assert(LaneKind == Kind::First);
204
return Lane;
205
}
206
207
/// Returns an expression describing the lane index that can be used at
208
/// runtime.
209
Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
210
211
/// Returns the Kind of lane offset.
212
Kind getKind() const { return LaneKind; }
213
214
/// Returns true if this is the first lane of the whole vector.
215
bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
216
217
/// Maps the lane to a cache index based on \p VF.
218
unsigned mapToCacheIndex(const ElementCount &VF) const {
219
switch (LaneKind) {
220
case VPLane::Kind::ScalableLast:
221
assert(VF.isScalable() && Lane < VF.getKnownMinValue());
222
return VF.getKnownMinValue() + Lane;
223
default:
224
assert(Lane < VF.getKnownMinValue());
225
return Lane;
226
}
227
}
228
229
/// Returns the maxmimum number of lanes that we are able to consider
230
/// caching for \p VF.
231
static unsigned getNumCachedLanes(const ElementCount &VF) {
232
return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
233
}
234
};
235
236
/// VPIteration represents a single point in the iteration space of the output
237
/// (vectorized and/or unrolled) IR loop.
238
struct VPIteration {
239
/// in [0..UF)
240
unsigned Part;
241
242
VPLane Lane;
243
244
VPIteration(unsigned Part, unsigned Lane,
245
VPLane::Kind Kind = VPLane::Kind::First)
246
: Part(Part), Lane(Lane, Kind) {}
247
248
VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
249
250
bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
251
};
252
253
/// VPTransformState holds information passed down when "executing" a VPlan,
254
/// needed for generating the output IR.
255
struct VPTransformState {
256
VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
257
DominatorTree *DT, IRBuilderBase &Builder,
258
InnerLoopVectorizer *ILV, VPlan *Plan, LLVMContext &Ctx);
259
260
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
261
ElementCount VF;
262
unsigned UF;
263
264
/// Hold the indices to generate specific scalar instructions. Null indicates
265
/// that all instances are to be generated, using either scalar or vector
266
/// instructions.
267
std::optional<VPIteration> Instance;
268
269
struct DataState {
270
/// A type for vectorized values in the new loop. Each value from the
271
/// original loop, when vectorized, is represented by UF vector values in
272
/// the new unrolled loop, where UF is the unroll factor.
273
typedef SmallVector<Value *, 2> PerPartValuesTy;
274
275
DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
276
277
using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;
278
DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
279
} Data;
280
281
/// Get the generated vector Value for a given VPValue \p Def and a given \p
282
/// Part if \p IsScalar is false, otherwise return the generated scalar
283
/// for \p Part. \See set.
284
Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
285
286
/// Get the generated Value for a given VPValue and given Part and Lane.
287
Value *get(VPValue *Def, const VPIteration &Instance);
288
289
bool hasVectorValue(VPValue *Def, unsigned Part) {
290
auto I = Data.PerPartOutput.find(Def);
291
return I != Data.PerPartOutput.end() && Part < I->second.size() &&
292
I->second[Part];
293
}
294
295
bool hasScalarValue(VPValue *Def, VPIteration Instance) {
296
auto I = Data.PerPartScalars.find(Def);
297
if (I == Data.PerPartScalars.end())
298
return false;
299
unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
300
return Instance.Part < I->second.size() &&
301
CacheIdx < I->second[Instance.Part].size() &&
302
I->second[Instance.Part][CacheIdx];
303
}
304
305
/// Set the generated vector Value for a given VPValue and a given Part, if \p
306
/// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
307
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
308
if (IsScalar) {
309
set(Def, V, VPIteration(Part, 0));
310
return;
311
}
312
assert((VF.isScalar() || V->getType()->isVectorTy()) &&
313
"scalar values must be stored as (Part, 0)");
314
if (!Data.PerPartOutput.count(Def)) {
315
DataState::PerPartValuesTy Entry(UF);
316
Data.PerPartOutput[Def] = Entry;
317
}
318
Data.PerPartOutput[Def][Part] = V;
319
}
320
321
/// Reset an existing vector value for \p Def and a given \p Part.
322
void reset(VPValue *Def, Value *V, unsigned Part) {
323
auto Iter = Data.PerPartOutput.find(Def);
324
assert(Iter != Data.PerPartOutput.end() &&
325
"need to overwrite existing value");
326
Iter->second[Part] = V;
327
}
328
329
/// Set the generated scalar \p V for \p Def and the given \p Instance.
330
void set(VPValue *Def, Value *V, const VPIteration &Instance) {
331
auto Iter = Data.PerPartScalars.insert({Def, {}});
332
auto &PerPartVec = Iter.first->second;
333
if (PerPartVec.size() <= Instance.Part)
334
PerPartVec.resize(Instance.Part + 1);
335
auto &Scalars = PerPartVec[Instance.Part];
336
unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
337
if (Scalars.size() <= CacheIdx)
338
Scalars.resize(CacheIdx + 1);
339
assert(!Scalars[CacheIdx] && "should overwrite existing value");
340
Scalars[CacheIdx] = V;
341
}
342
343
/// Reset an existing scalar value for \p Def and a given \p Instance.
344
void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
345
auto Iter = Data.PerPartScalars.find(Def);
346
assert(Iter != Data.PerPartScalars.end() &&
347
"need to overwrite existing value");
348
assert(Instance.Part < Iter->second.size() &&
349
"need to overwrite existing value");
350
unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
351
assert(CacheIdx < Iter->second[Instance.Part].size() &&
352
"need to overwrite existing value");
353
Iter->second[Instance.Part][CacheIdx] = V;
354
}
355
356
/// Add additional metadata to \p To that was not present on \p Orig.
357
///
358
/// Currently this is used to add the noalias annotations based on the
359
/// inserted memchecks. Use this for instructions that are *cloned* into the
360
/// vector loop.
361
void addNewMetadata(Instruction *To, const Instruction *Orig);
362
363
/// Add metadata from one instruction to another.
364
///
365
/// This includes both the original MDs from \p From and additional ones (\see
366
/// addNewMetadata). Use this for *newly created* instructions in the vector
367
/// loop.
368
void addMetadata(Value *To, Instruction *From);
369
370
/// Set the debug location in the builder using the debug location \p DL.
371
void setDebugLocFrom(DebugLoc DL);
372
373
/// Construct the vector value of a scalarized value \p V one lane at a time.
374
void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance);
375
376
/// Hold state information used when constructing the CFG of the output IR,
377
/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
378
struct CFGState {
379
/// The previous VPBasicBlock visited. Initially set to null.
380
VPBasicBlock *PrevVPBB = nullptr;
381
382
/// The previous IR BasicBlock created or used. Initially set to the new
383
/// header BasicBlock.
384
BasicBlock *PrevBB = nullptr;
385
386
/// The last IR BasicBlock in the output IR. Set to the exit block of the
387
/// vector loop.
388
BasicBlock *ExitBB = nullptr;
389
390
/// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
391
/// of replication, maps the BasicBlock of the last replica created.
392
SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
393
394
/// Updater for the DominatorTree.
395
DomTreeUpdater DTU;
396
397
CFGState(DominatorTree *DT)
398
: DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
399
400
/// Returns the BasicBlock* mapped to the pre-header of the loop region
401
/// containing \p R.
402
BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
403
} CFG;
404
405
/// Hold a pointer to LoopInfo to register new basic blocks in the loop.
406
LoopInfo *LI;
407
408
/// Hold a reference to the IRBuilder used to generate output IR code.
409
IRBuilderBase &Builder;
410
411
/// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
412
InnerLoopVectorizer *ILV;
413
414
/// Pointer to the VPlan code is generated for.
415
VPlan *Plan;
416
417
/// The loop object for the current parent region, or nullptr.
418
Loop *CurrentVectorLoop = nullptr;
419
420
/// LoopVersioning. It's only set up (non-null) if memchecks were
421
/// used.
422
///
423
/// This is currently only used to add no-alias metadata based on the
424
/// memchecks. The actually versioning is performed manually.
425
LoopVersioning *LVer = nullptr;
426
427
/// Map SCEVs to their expanded values. Populated when executing
428
/// VPExpandSCEVRecipes.
429
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
430
431
/// VPlan-based type analysis.
432
VPTypeAnalysis TypeAnalysis;
433
};
434
435
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
436
/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
437
class VPBlockBase {
438
friend class VPBlockUtils;
439
440
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
441
442
/// An optional name for the block.
443
std::string Name;
444
445
/// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
446
/// it is a topmost VPBlockBase.
447
VPRegionBlock *Parent = nullptr;
448
449
/// List of predecessor blocks.
450
SmallVector<VPBlockBase *, 1> Predecessors;
451
452
/// List of successor blocks.
453
SmallVector<VPBlockBase *, 1> Successors;
454
455
/// VPlan containing the block. Can only be set on the entry block of the
456
/// plan.
457
VPlan *Plan = nullptr;
458
459
/// Add \p Successor as the last successor to this block.
460
void appendSuccessor(VPBlockBase *Successor) {
461
assert(Successor && "Cannot add nullptr successor!");
462
Successors.push_back(Successor);
463
}
464
465
/// Add \p Predecessor as the last predecessor to this block.
466
void appendPredecessor(VPBlockBase *Predecessor) {
467
assert(Predecessor && "Cannot add nullptr predecessor!");
468
Predecessors.push_back(Predecessor);
469
}
470
471
/// Remove \p Predecessor from the predecessors of this block.
472
void removePredecessor(VPBlockBase *Predecessor) {
473
auto Pos = find(Predecessors, Predecessor);
474
assert(Pos && "Predecessor does not exist");
475
Predecessors.erase(Pos);
476
}
477
478
/// Remove \p Successor from the successors of this block.
479
void removeSuccessor(VPBlockBase *Successor) {
480
auto Pos = find(Successors, Successor);
481
assert(Pos && "Successor does not exist");
482
Successors.erase(Pos);
483
}
484
485
protected:
486
VPBlockBase(const unsigned char SC, const std::string &N)
487
: SubclassID(SC), Name(N) {}
488
489
public:
490
/// An enumeration for keeping track of the concrete subclass of VPBlockBase
491
/// that are actually instantiated. Values of this enumeration are kept in the
492
/// SubclassID field of the VPBlockBase objects. They are used for concrete
493
/// type identification.
494
using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
495
496
using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
497
498
virtual ~VPBlockBase() = default;
499
500
const std::string &getName() const { return Name; }
501
502
void setName(const Twine &newName) { Name = newName.str(); }
503
504
/// \return an ID for the concrete type of this object.
505
/// This is used to implement the classof checks. This should not be used
506
/// for any other purpose, as the values may change as LLVM evolves.
507
unsigned getVPBlockID() const { return SubclassID; }
508
509
VPRegionBlock *getParent() { return Parent; }
510
const VPRegionBlock *getParent() const { return Parent; }
511
512
/// \return A pointer to the plan containing the current block.
513
VPlan *getPlan();
514
const VPlan *getPlan() const;
515
516
/// Sets the pointer of the plan containing the block. The block must be the
517
/// entry block into the VPlan.
518
void setPlan(VPlan *ParentPlan);
519
520
void setParent(VPRegionBlock *P) { Parent = P; }
521
522
/// \return the VPBasicBlock that is the entry of this VPBlockBase,
523
/// recursively, if the latter is a VPRegionBlock. Otherwise, if this
524
/// VPBlockBase is a VPBasicBlock, it is returned.
525
const VPBasicBlock *getEntryBasicBlock() const;
526
VPBasicBlock *getEntryBasicBlock();
527
528
/// \return the VPBasicBlock that is the exiting this VPBlockBase,
529
/// recursively, if the latter is a VPRegionBlock. Otherwise, if this
530
/// VPBlockBase is a VPBasicBlock, it is returned.
531
const VPBasicBlock *getExitingBasicBlock() const;
532
VPBasicBlock *getExitingBasicBlock();
533
534
const VPBlocksTy &getSuccessors() const { return Successors; }
535
VPBlocksTy &getSuccessors() { return Successors; }
536
537
iterator_range<VPBlockBase **> successors() { return Successors; }
538
539
const VPBlocksTy &getPredecessors() const { return Predecessors; }
540
VPBlocksTy &getPredecessors() { return Predecessors; }
541
542
/// \return the successor of this VPBlockBase if it has a single successor.
543
/// Otherwise return a null pointer.
544
VPBlockBase *getSingleSuccessor() const {
545
return (Successors.size() == 1 ? *Successors.begin() : nullptr);
546
}
547
548
/// \return the predecessor of this VPBlockBase if it has a single
549
/// predecessor. Otherwise return a null pointer.
550
VPBlockBase *getSinglePredecessor() const {
551
return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
552
}
553
554
size_t getNumSuccessors() const { return Successors.size(); }
555
size_t getNumPredecessors() const { return Predecessors.size(); }
556
557
/// An Enclosing Block of a block B is any block containing B, including B
558
/// itself. \return the closest enclosing block starting from "this", which
559
/// has successors. \return the root enclosing block if all enclosing blocks
560
/// have no successors.
561
VPBlockBase *getEnclosingBlockWithSuccessors();
562
563
/// \return the closest enclosing block starting from "this", which has
564
/// predecessors. \return the root enclosing block if all enclosing blocks
565
/// have no predecessors.
566
VPBlockBase *getEnclosingBlockWithPredecessors();
567
568
/// \return the successors either attached directly to this VPBlockBase or, if
569
/// this VPBlockBase is the exit block of a VPRegionBlock and has no
570
/// successors of its own, search recursively for the first enclosing
571
/// VPRegionBlock that has successors and return them. If no such
572
/// VPRegionBlock exists, return the (empty) successors of the topmost
573
/// VPBlockBase reached.
574
const VPBlocksTy &getHierarchicalSuccessors() {
575
return getEnclosingBlockWithSuccessors()->getSuccessors();
576
}
577
578
/// \return the hierarchical successor of this VPBlockBase if it has a single
579
/// hierarchical successor. Otherwise return a null pointer.
580
VPBlockBase *getSingleHierarchicalSuccessor() {
581
return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
582
}
583
584
/// \return the predecessors either attached directly to this VPBlockBase or,
585
/// if this VPBlockBase is the entry block of a VPRegionBlock and has no
586
/// predecessors of its own, search recursively for the first enclosing
587
/// VPRegionBlock that has predecessors and return them. If no such
588
/// VPRegionBlock exists, return the (empty) predecessors of the topmost
589
/// VPBlockBase reached.
590
const VPBlocksTy &getHierarchicalPredecessors() {
591
return getEnclosingBlockWithPredecessors()->getPredecessors();
592
}
593
594
/// \return the hierarchical predecessor of this VPBlockBase if it has a
595
/// single hierarchical predecessor. Otherwise return a null pointer.
596
VPBlockBase *getSingleHierarchicalPredecessor() {
597
return getEnclosingBlockWithPredecessors()->getSinglePredecessor();
598
}
599
600
/// Set a given VPBlockBase \p Successor as the single successor of this
601
/// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
602
/// This VPBlockBase must have no successors.
603
void setOneSuccessor(VPBlockBase *Successor) {
604
assert(Successors.empty() && "Setting one successor when others exist.");
605
assert(Successor->getParent() == getParent() &&
606
"connected blocks must have the same parent");
607
appendSuccessor(Successor);
608
}
609
610
/// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
611
/// successors of this VPBlockBase. This VPBlockBase is not added as
612
/// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
613
/// successors.
614
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
615
assert(Successors.empty() && "Setting two successors when others exist.");
616
appendSuccessor(IfTrue);
617
appendSuccessor(IfFalse);
618
}
619
620
/// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
621
/// This VPBlockBase must have no predecessors. This VPBlockBase is not added
622
/// as successor of any VPBasicBlock in \p NewPreds.
623
void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {
624
assert(Predecessors.empty() && "Block predecessors already set.");
625
for (auto *Pred : NewPreds)
626
appendPredecessor(Pred);
627
}
628
629
/// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
630
/// This VPBlockBase must have no successors. This VPBlockBase is not added
631
/// as predecessor of any VPBasicBlock in \p NewSuccs.
632
void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) {
633
assert(Successors.empty() && "Block successors already set.");
634
for (auto *Succ : NewSuccs)
635
appendSuccessor(Succ);
636
}
637
638
/// Remove all the predecessor of this block.
639
void clearPredecessors() { Predecessors.clear(); }
640
641
/// Remove all the successors of this block.
642
void clearSuccessors() { Successors.clear(); }
643
644
/// The method which generates the output IR that correspond to this
645
/// VPBlockBase, thereby "executing" the VPlan.
646
virtual void execute(VPTransformState *State) = 0;
647
648
/// Return the cost of the block.
649
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
650
651
/// Delete all blocks reachable from a given VPBlockBase, inclusive.
652
static void deleteCFG(VPBlockBase *Entry);
653
654
/// Return true if it is legal to hoist instructions into this block.
655
bool isLegalToHoistInto() {
656
// There are currently no constraints that prevent an instruction to be
657
// hoisted into a VPBlockBase.
658
return true;
659
}
660
661
/// Replace all operands of VPUsers in the block with \p NewValue and also
662
/// replaces all uses of VPValues defined in the block with NewValue.
663
virtual void dropAllReferences(VPValue *NewValue) = 0;
664
665
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
666
void printAsOperand(raw_ostream &OS, bool PrintType) const {
667
OS << getName();
668
}
669
670
/// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
671
/// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
672
/// consequtive numbers.
673
///
674
/// Note that the numbering is applied to the whole VPlan, so printing
675
/// individual blocks is consistent with the whole VPlan printing.
676
virtual void print(raw_ostream &O, const Twine &Indent,
677
VPSlotTracker &SlotTracker) const = 0;
678
679
/// Print plain-text dump of this VPlan to \p O.
680
void print(raw_ostream &O) const {
681
VPSlotTracker SlotTracker(getPlan());
682
print(O, "", SlotTracker);
683
}
684
685
/// Print the successors of this block to \p O, prefixing all lines with \p
686
/// Indent.
687
void printSuccessors(raw_ostream &O, const Twine &Indent) const;
688
689
/// Dump this VPBlockBase to dbgs().
690
LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
691
#endif
692
693
/// Clone the current block and it's recipes without updating the operands of
694
/// the cloned recipes, including all blocks in the single-entry single-exit
695
/// region for VPRegionBlocks.
696
virtual VPBlockBase *clone() = 0;
697
};
698
699
/// A value that is used outside the VPlan. The operand of the user needs to be
700
/// added to the associated phi node. The incoming block from VPlan is
701
/// determined by where the VPValue is defined: if it is defined by a recipe
702
/// outside a region, its parent block is used, otherwise the middle block is
703
/// used.
704
class VPLiveOut : public VPUser {
705
PHINode *Phi;
706
707
public:
708
VPLiveOut(PHINode *Phi, VPValue *Op)
709
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
710
711
static inline bool classof(const VPUser *U) {
712
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
713
}
714
715
/// Fix the wrapped phi node. This means adding an incoming value to exit
716
/// block phi's from the vector loop via middle block (values from scalar loop
717
/// already reach these phi's), and updating the value to scalar header phi's
718
/// from the scalar preheader.
719
void fixPhi(VPlan &Plan, VPTransformState &State);
720
721
/// Returns true if the VPLiveOut uses scalars of operand \p Op.
722
bool usesScalars(const VPValue *Op) const override {
723
assert(is_contained(operands(), Op) &&
724
"Op must be an operand of the recipe");
725
return true;
726
}
727
728
PHINode *getPhi() const { return Phi; }
729
730
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
731
/// Print the VPLiveOut to \p O.
732
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
733
#endif
734
};
735
736
/// Struct to hold various analysis needed for cost computations.
737
struct VPCostContext {
738
const TargetTransformInfo &TTI;
739
VPTypeAnalysis Types;
740
LLVMContext &LLVMCtx;
741
LoopVectorizationCostModel &CM;
742
SmallPtrSet<Instruction *, 8> SkipCostComputation;
743
744
VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy,
745
LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
746
: TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
747
748
/// Return the cost for \p UI with \p VF using the legacy cost model as
749
/// fallback until computing the cost of all recipes migrates to VPlan.
750
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
751
752
/// Return true if the cost for \p UI shouldn't be computed, e.g. because it
753
/// has already been pre-computed.
754
bool skipCostComputation(Instruction *UI, bool IsVector) const;
755
};
756
757
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
758
/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
759
/// and is responsible for deleting its defined values. Single-value
760
/// recipes must inherit from VPSingleDef instead of inheriting from both
761
/// VPRecipeBase and VPValue separately.
762
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
763
public VPDef,
764
public VPUser {
765
friend VPBasicBlock;
766
friend class VPBlockUtils;
767
768
/// Each VPRecipe belongs to a single VPBasicBlock.
769
VPBasicBlock *Parent = nullptr;
770
771
/// The debug location for the recipe.
772
DebugLoc DL;
773
774
public:
775
VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
776
DebugLoc DL = {})
777
: VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
778
779
template <typename IterT>
780
VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,
781
DebugLoc DL = {})
782
: VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
783
virtual ~VPRecipeBase() = default;
784
785
/// Clone the current recipe.
786
virtual VPRecipeBase *clone() = 0;
787
788
/// \return the VPBasicBlock which this VPRecipe belongs to.
789
VPBasicBlock *getParent() { return Parent; }
790
const VPBasicBlock *getParent() const { return Parent; }
791
792
/// The method which generates the output IR instructions that correspond to
793
/// this VPRecipe, thereby "executing" the VPlan.
794
virtual void execute(VPTransformState &State) = 0;
795
796
/// Return the cost of this recipe, taking into account if the cost
797
/// computation should be skipped and the ForceTargetInstructionCost flag.
798
/// Also takes care of printing the cost for debugging.
799
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
800
801
/// Insert an unlinked recipe into a basic block immediately before
802
/// the specified recipe.
803
void insertBefore(VPRecipeBase *InsertPos);
804
/// Insert an unlinked recipe into \p BB immediately before the insertion
805
/// point \p IP;
806
void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
807
808
/// Insert an unlinked Recipe into a basic block immediately after
809
/// the specified Recipe.
810
void insertAfter(VPRecipeBase *InsertPos);
811
812
/// Unlink this recipe from its current VPBasicBlock and insert it into
813
/// the VPBasicBlock that MovePos lives in, right after MovePos.
814
void moveAfter(VPRecipeBase *MovePos);
815
816
/// Unlink this recipe and insert into BB before I.
817
///
818
/// \pre I is a valid iterator into BB.
819
void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
820
821
/// This method unlinks 'this' from the containing basic block, but does not
822
/// delete it.
823
void removeFromParent();
824
825
/// This method unlinks 'this' from the containing basic block and deletes it.
826
///
827
/// \returns an iterator pointing to the element after the erased one
828
iplist<VPRecipeBase>::iterator eraseFromParent();
829
830
/// Method to support type inquiry through isa, cast, and dyn_cast.
831
static inline bool classof(const VPDef *D) {
832
// All VPDefs are also VPRecipeBases.
833
return true;
834
}
835
836
static inline bool classof(const VPUser *U) {
837
return U->getVPUserID() == VPUser::VPUserID::Recipe;
838
}
839
840
/// Returns true if the recipe may have side-effects.
841
bool mayHaveSideEffects() const;
842
843
/// Returns true for PHI-like recipes.
844
bool isPhi() const {
845
return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
846
}
847
848
/// Returns true if the recipe may read from memory.
849
bool mayReadFromMemory() const;
850
851
/// Returns true if the recipe may write to memory.
852
bool mayWriteToMemory() const;
853
854
/// Returns true if the recipe may read from or write to memory.
855
bool mayReadOrWriteMemory() const {
856
return mayReadFromMemory() || mayWriteToMemory();
857
}
858
859
/// Returns the debug location of the recipe.
860
DebugLoc getDebugLoc() const { return DL; }
861
862
protected:
863
/// Compute the cost of this recipe using the legacy cost model and the
864
/// underlying instructions.
865
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
866
};
867
868
// Helper macro to define common classof implementations for recipes.
869
#define VP_CLASSOF_IMPL(VPDefID) \
870
static inline bool classof(const VPDef *D) { \
871
return D->getVPDefID() == VPDefID; \
872
} \
873
static inline bool classof(const VPValue *V) { \
874
auto *R = V->getDefiningRecipe(); \
875
return R && R->getVPDefID() == VPDefID; \
876
} \
877
static inline bool classof(const VPUser *U) { \
878
auto *R = dyn_cast<VPRecipeBase>(U); \
879
return R && R->getVPDefID() == VPDefID; \
880
} \
881
static inline bool classof(const VPRecipeBase *R) { \
882
return R->getVPDefID() == VPDefID; \
883
} \
884
static inline bool classof(const VPSingleDefRecipe *R) { \
885
return R->getVPDefID() == VPDefID; \
886
}
887
888
/// VPSingleDef is a base class for recipes for modeling a sequence of one or
889
/// more output IR that define a single result VPValue.
890
/// Note that VPRecipeBase must be inherited from before VPValue.
891
class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
892
public:
893
template <typename IterT>
894
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
895
: VPRecipeBase(SC, Operands, DL), VPValue(this) {}
896
897
VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
898
DebugLoc DL = {})
899
: VPRecipeBase(SC, Operands, DL), VPValue(this) {}
900
901
template <typename IterT>
902
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
903
DebugLoc DL = {})
904
: VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
905
906
static inline bool classof(const VPRecipeBase *R) {
907
switch (R->getVPDefID()) {
908
case VPRecipeBase::VPDerivedIVSC:
909
case VPRecipeBase::VPEVLBasedIVPHISC:
910
case VPRecipeBase::VPExpandSCEVSC:
911
case VPRecipeBase::VPInstructionSC:
912
case VPRecipeBase::VPReductionEVLSC:
913
case VPRecipeBase::VPReductionSC:
914
case VPRecipeBase::VPReplicateSC:
915
case VPRecipeBase::VPScalarIVStepsSC:
916
case VPRecipeBase::VPVectorPointerSC:
917
case VPRecipeBase::VPWidenCallSC:
918
case VPRecipeBase::VPWidenCanonicalIVSC:
919
case VPRecipeBase::VPWidenCastSC:
920
case VPRecipeBase::VPWidenGEPSC:
921
case VPRecipeBase::VPWidenSC:
922
case VPRecipeBase::VPWidenSelectSC:
923
case VPRecipeBase::VPBlendSC:
924
case VPRecipeBase::VPPredInstPHISC:
925
case VPRecipeBase::VPCanonicalIVPHISC:
926
case VPRecipeBase::VPActiveLaneMaskPHISC:
927
case VPRecipeBase::VPFirstOrderRecurrencePHISC:
928
case VPRecipeBase::VPWidenPHISC:
929
case VPRecipeBase::VPWidenIntOrFpInductionSC:
930
case VPRecipeBase::VPWidenPointerInductionSC:
931
case VPRecipeBase::VPReductionPHISC:
932
case VPRecipeBase::VPScalarCastSC:
933
return true;
934
case VPRecipeBase::VPInterleaveSC:
935
case VPRecipeBase::VPBranchOnMaskSC:
936
case VPRecipeBase::VPWidenLoadEVLSC:
937
case VPRecipeBase::VPWidenLoadSC:
938
case VPRecipeBase::VPWidenStoreEVLSC:
939
case VPRecipeBase::VPWidenStoreSC:
940
// TODO: Widened stores don't define a value, but widened loads do. Split
941
// the recipes to be able to make widened loads VPSingleDefRecipes.
942
return false;
943
}
944
llvm_unreachable("Unhandled VPDefID");
945
}
946
947
static inline bool classof(const VPUser *U) {
948
auto *R = dyn_cast<VPRecipeBase>(U);
949
return R && classof(R);
950
}
951
952
virtual VPSingleDefRecipe *clone() override = 0;
953
954
/// Returns the underlying instruction.
955
Instruction *getUnderlyingInstr() {
956
return cast<Instruction>(getUnderlyingValue());
957
}
958
const Instruction *getUnderlyingInstr() const {
959
return cast<Instruction>(getUnderlyingValue());
960
}
961
};
962
963
/// Class to record LLVM IR flag for a recipe along with it.
964
class VPRecipeWithIRFlags : public VPSingleDefRecipe {
965
enum class OperationType : unsigned char {
966
Cmp,
967
OverflowingBinOp,
968
DisjointOp,
969
PossiblyExactOp,
970
GEPOp,
971
FPMathOp,
972
NonNegOp,
973
Other
974
};
975
976
public:
977
struct WrapFlagsTy {
978
char HasNUW : 1;
979
char HasNSW : 1;
980
981
WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
982
};
983
984
struct DisjointFlagsTy {
985
char IsDisjoint : 1;
986
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
987
};
988
989
protected:
990
struct GEPFlagsTy {
991
char IsInBounds : 1;
992
GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
993
};
994
995
private:
996
struct ExactFlagsTy {
997
char IsExact : 1;
998
};
999
struct NonNegFlagsTy {
1000
char NonNeg : 1;
1001
};
1002
struct FastMathFlagsTy {
1003
char AllowReassoc : 1;
1004
char NoNaNs : 1;
1005
char NoInfs : 1;
1006
char NoSignedZeros : 1;
1007
char AllowReciprocal : 1;
1008
char AllowContract : 1;
1009
char ApproxFunc : 1;
1010
1011
FastMathFlagsTy(const FastMathFlags &FMF);
1012
};
1013
1014
OperationType OpType;
1015
1016
union {
1017
CmpInst::Predicate CmpPredicate;
1018
WrapFlagsTy WrapFlags;
1019
DisjointFlagsTy DisjointFlags;
1020
ExactFlagsTy ExactFlags;
1021
GEPFlagsTy GEPFlags;
1022
NonNegFlagsTy NonNegFlags;
1023
FastMathFlagsTy FMFs;
1024
unsigned AllFlags;
1025
};
1026
1027
protected:
1028
void transferFlags(VPRecipeWithIRFlags &Other) {
1029
OpType = Other.OpType;
1030
AllFlags = Other.AllFlags;
1031
}
1032
1033
public:
1034
template <typename IterT>
1035
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
1036
: VPSingleDefRecipe(SC, Operands, DL) {
1037
OpType = OperationType::Other;
1038
AllFlags = 0;
1039
}
1040
1041
template <typename IterT>
1042
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
1043
: VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) {
1044
if (auto *Op = dyn_cast<CmpInst>(&I)) {
1045
OpType = OperationType::Cmp;
1046
CmpPredicate = Op->getPredicate();
1047
} else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1048
OpType = OperationType::DisjointOp;
1049
DisjointFlags.IsDisjoint = Op->isDisjoint();
1050
} else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1051
OpType = OperationType::OverflowingBinOp;
1052
WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1053
} else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1054
OpType = OperationType::PossiblyExactOp;
1055
ExactFlags.IsExact = Op->isExact();
1056
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1057
OpType = OperationType::GEPOp;
1058
GEPFlags.IsInBounds = GEP->isInBounds();
1059
} else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1060
OpType = OperationType::NonNegOp;
1061
NonNegFlags.NonNeg = PNNI->hasNonNeg();
1062
} else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1063
OpType = OperationType::FPMathOp;
1064
FMFs = Op->getFastMathFlags();
1065
} else {
1066
OpType = OperationType::Other;
1067
AllFlags = 0;
1068
}
1069
}
1070
1071
template <typename IterT>
1072
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1073
CmpInst::Predicate Pred, DebugLoc DL = {})
1074
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1075
CmpPredicate(Pred) {}
1076
1077
template <typename IterT>
1078
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1079
WrapFlagsTy WrapFlags, DebugLoc DL = {})
1080
: VPSingleDefRecipe(SC, Operands, DL),
1081
OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1082
1083
template <typename IterT>
1084
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1085
FastMathFlags FMFs, DebugLoc DL = {})
1086
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1087
FMFs(FMFs) {}
1088
1089
template <typename IterT>
1090
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1091
DisjointFlagsTy DisjointFlags, DebugLoc DL = {})
1092
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1093
DisjointFlags(DisjointFlags) {}
1094
1095
protected:
1096
template <typename IterT>
1097
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1098
GEPFlagsTy GEPFlags, DebugLoc DL = {})
1099
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1100
GEPFlags(GEPFlags) {}
1101
1102
public:
1103
static inline bool classof(const VPRecipeBase *R) {
1104
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1105
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1106
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1107
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1108
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1109
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1110
}
1111
1112
static inline bool classof(const VPUser *U) {
1113
auto *R = dyn_cast<VPRecipeBase>(U);
1114
return R && classof(R);
1115
}
1116
1117
/// Drop all poison-generating flags.
1118
void dropPoisonGeneratingFlags() {
1119
// NOTE: This needs to be kept in-sync with
1120
// Instruction::dropPoisonGeneratingFlags.
1121
switch (OpType) {
1122
case OperationType::OverflowingBinOp:
1123
WrapFlags.HasNUW = false;
1124
WrapFlags.HasNSW = false;
1125
break;
1126
case OperationType::DisjointOp:
1127
DisjointFlags.IsDisjoint = false;
1128
break;
1129
case OperationType::PossiblyExactOp:
1130
ExactFlags.IsExact = false;
1131
break;
1132
case OperationType::GEPOp:
1133
GEPFlags.IsInBounds = false;
1134
break;
1135
case OperationType::FPMathOp:
1136
FMFs.NoNaNs = false;
1137
FMFs.NoInfs = false;
1138
break;
1139
case OperationType::NonNegOp:
1140
NonNegFlags.NonNeg = false;
1141
break;
1142
case OperationType::Cmp:
1143
case OperationType::Other:
1144
break;
1145
}
1146
}
1147
1148
/// Set the IR flags for \p I.
1149
void setFlags(Instruction *I) const {
1150
switch (OpType) {
1151
case OperationType::OverflowingBinOp:
1152
I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1153
I->setHasNoSignedWrap(WrapFlags.HasNSW);
1154
break;
1155
case OperationType::DisjointOp:
1156
cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1157
break;
1158
case OperationType::PossiblyExactOp:
1159
I->setIsExact(ExactFlags.IsExact);
1160
break;
1161
case OperationType::GEPOp:
1162
// TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
1163
cast<GetElementPtrInst>(I)->setNoWrapFlags(
1164
GEPFlags.IsInBounds ? GEPNoWrapFlags::inBounds()
1165
: GEPNoWrapFlags::none());
1166
break;
1167
case OperationType::FPMathOp:
1168
I->setHasAllowReassoc(FMFs.AllowReassoc);
1169
I->setHasNoNaNs(FMFs.NoNaNs);
1170
I->setHasNoInfs(FMFs.NoInfs);
1171
I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1172
I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1173
I->setHasAllowContract(FMFs.AllowContract);
1174
I->setHasApproxFunc(FMFs.ApproxFunc);
1175
break;
1176
case OperationType::NonNegOp:
1177
I->setNonNeg(NonNegFlags.NonNeg);
1178
break;
1179
case OperationType::Cmp:
1180
case OperationType::Other:
1181
break;
1182
}
1183
}
1184
1185
CmpInst::Predicate getPredicate() const {
1186
assert(OpType == OperationType::Cmp &&
1187
"recipe doesn't have a compare predicate");
1188
return CmpPredicate;
1189
}
1190
1191
bool isInBounds() const {
1192
assert(OpType == OperationType::GEPOp &&
1193
"recipe doesn't have inbounds flag");
1194
return GEPFlags.IsInBounds;
1195
}
1196
1197
/// Returns true if the recipe has fast-math flags.
1198
bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1199
1200
FastMathFlags getFastMathFlags() const;
1201
1202
bool hasNoUnsignedWrap() const {
1203
assert(OpType == OperationType::OverflowingBinOp &&
1204
"recipe doesn't have a NUW flag");
1205
return WrapFlags.HasNUW;
1206
}
1207
1208
bool hasNoSignedWrap() const {
1209
assert(OpType == OperationType::OverflowingBinOp &&
1210
"recipe doesn't have a NSW flag");
1211
return WrapFlags.HasNSW;
1212
}
1213
1214
bool isDisjoint() const {
1215
assert(OpType == OperationType::DisjointOp &&
1216
"recipe cannot have a disjoing flag");
1217
return DisjointFlags.IsDisjoint;
1218
}
1219
1220
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1221
void printFlags(raw_ostream &O) const;
1222
#endif
1223
};
1224
1225
/// This is a concrete Recipe that models a single VPlan-level instruction.
1226
/// While as any Recipe it may generate a sequence of IR instructions when
1227
/// executed, these instructions would always form a single-def expression as
1228
/// the VPInstruction is also a single def-use vertex.
1229
class VPInstruction : public VPRecipeWithIRFlags {
1230
friend class VPlanSlp;
1231
1232
public:
1233
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1234
enum {
1235
FirstOrderRecurrenceSplice =
1236
Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1237
// values of a first-order recurrence.
1238
Not,
1239
SLPLoad,
1240
SLPStore,
1241
ActiveLaneMask,
1242
ExplicitVectorLength,
1243
/// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1244
/// The first operand is the incoming value from the predecessor in VPlan,
1245
/// the second operand is the incoming value for all other predecessors
1246
/// (which are currently not modeled in VPlan).
1247
ResumePhi,
1248
CalculateTripCountMinusVF,
1249
// Increment the canonical IV separately for each unrolled part.
1250
CanonicalIVIncrementForPart,
1251
BranchOnCount,
1252
BranchOnCond,
1253
ComputeReductionResult,
1254
// Takes the VPValue to extract from as first operand and the lane or part
1255
// to extract as second operand, counting from the end starting with 1 for
1256
// last. The second operand must be a positive constant and <= VF when
1257
// extracting from a vector or <= UF when extracting from an unrolled
1258
// scalar.
1259
ExtractFromEnd,
1260
LogicalAnd, // Non-poison propagating logical And.
1261
// Add an offset in bytes (second operand) to a base pointer (first
1262
// operand). Only generates scalar values (either for the first lane only or
1263
// for all lanes, depending on its uses).
1264
PtrAdd,
1265
};
1266
1267
private:
1268
typedef unsigned char OpcodeTy;
1269
OpcodeTy Opcode;
1270
1271
/// An optional name that can be used for the generated IR instruction.
1272
const std::string Name;
1273
1274
/// Returns true if this VPInstruction generates scalar values for all lanes.
1275
/// Most VPInstructions generate a single value per part, either vector or
1276
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1277
/// values per all lanes, stemming from an original ingredient. This method
1278
/// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1279
/// underlying ingredient.
1280
bool doesGeneratePerAllLanes() const;
1281
1282
/// Returns true if we can generate a scalar for the first lane only if
1283
/// needed.
1284
bool canGenerateScalarForFirstLane() const;
1285
1286
/// Utility methods serving execute(): generates a single instance of the
1287
/// modeled instruction for a given part. \returns the generated value for \p
1288
/// Part. In some cases an existing value is returned rather than a generated
1289
/// one.
1290
Value *generatePerPart(VPTransformState &State, unsigned Part);
1291
1292
/// Utility methods serving execute(): generates a scalar single instance of
1293
/// the modeled instruction for a given lane. \returns the scalar generated
1294
/// value for lane \p Lane.
1295
Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
1296
1297
#if !defined(NDEBUG)
1298
/// Return true if the VPInstruction is a floating point math operation, i.e.
1299
/// has fast-math flags.
1300
bool isFPMathOp() const;
1301
#endif
1302
1303
public:
1304
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
1305
const Twine &Name = "")
1306
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1307
Opcode(Opcode), Name(Name.str()) {}
1308
1309
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1310
DebugLoc DL = {}, const Twine &Name = "")
1311
: VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
1312
1313
VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1314
VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1315
1316
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1317
WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1318
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1319
Opcode(Opcode), Name(Name.str()) {}
1320
1321
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1322
DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1323
const Twine &Name = "")
1324
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1325
Opcode(Opcode), Name(Name.str()) {
1326
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1327
}
1328
1329
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1330
FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1331
1332
VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1333
1334
VPInstruction *clone() override {
1335
SmallVector<VPValue *, 2> Operands(operands());
1336
auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1337
New->transferFlags(*this);
1338
return New;
1339
}
1340
1341
unsigned getOpcode() const { return Opcode; }
1342
1343
/// Generate the instruction.
1344
/// TODO: We currently execute only per-part unless a specific instance is
1345
/// provided.
1346
void execute(VPTransformState &State) override;
1347
1348
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1349
/// Print the VPInstruction to \p O.
1350
void print(raw_ostream &O, const Twine &Indent,
1351
VPSlotTracker &SlotTracker) const override;
1352
1353
/// Print the VPInstruction to dbgs() (for debugging).
1354
LLVM_DUMP_METHOD void dump() const;
1355
#endif
1356
1357
/// Return true if this instruction may modify memory.
1358
bool mayWriteToMemory() const {
1359
// TODO: we can use attributes of the called function to rule out memory
1360
// modifications.
1361
return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1362
Opcode == Instruction::Invoke || Opcode == SLPStore;
1363
}
1364
1365
bool hasResult() const {
1366
// CallInst may or may not have a result, depending on the called function.
1367
// Conservatively return calls have results for now.
1368
switch (getOpcode()) {
1369
case Instruction::Ret:
1370
case Instruction::Br:
1371
case Instruction::Store:
1372
case Instruction::Switch:
1373
case Instruction::IndirectBr:
1374
case Instruction::Resume:
1375
case Instruction::CatchRet:
1376
case Instruction::Unreachable:
1377
case Instruction::Fence:
1378
case Instruction::AtomicRMW:
1379
case VPInstruction::BranchOnCond:
1380
case VPInstruction::BranchOnCount:
1381
return false;
1382
default:
1383
return true;
1384
}
1385
}
1386
1387
/// Returns true if the recipe only uses the first lane of operand \p Op.
1388
bool onlyFirstLaneUsed(const VPValue *Op) const override;
1389
1390
/// Returns true if the recipe only uses the first part of operand \p Op.
1391
bool onlyFirstPartUsed(const VPValue *Op) const override;
1392
1393
/// Returns true if this VPInstruction produces a scalar value from a vector,
1394
/// e.g. by performing a reduction or extracting a lane.
1395
bool isVectorToScalar() const;
1396
1397
/// Returns true if this VPInstruction's operands are single scalars and the
1398
/// result is also a single scalar.
1399
bool isSingleScalar() const;
1400
};
1401
1402
/// VPWidenRecipe is a recipe for producing a widened instruction using the
1403
/// opcode and operands of the recipe. This recipe covers most of the
1404
/// traditional vectorization cases where each recipe transforms into a
1405
/// vectorized version of itself.
1406
class VPWidenRecipe : public VPRecipeWithIRFlags {
1407
unsigned Opcode;
1408
1409
public:
1410
template <typename IterT>
1411
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1412
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1413
Opcode(I.getOpcode()) {}
1414
1415
~VPWidenRecipe() override = default;
1416
1417
VPWidenRecipe *clone() override {
1418
auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1419
R->transferFlags(*this);
1420
return R;
1421
}
1422
1423
VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1424
1425
/// Produce a widened instruction using the opcode and operands of the recipe,
1426
/// processing State.VF elements.
1427
void execute(VPTransformState &State) override;
1428
1429
unsigned getOpcode() const { return Opcode; }
1430
1431
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1432
/// Print the recipe.
1433
void print(raw_ostream &O, const Twine &Indent,
1434
VPSlotTracker &SlotTracker) const override;
1435
#endif
1436
};
1437
1438
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1439
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
1440
/// Cast instruction opcode.
1441
Instruction::CastOps Opcode;
1442
1443
/// Result type for the cast.
1444
Type *ResultTy;
1445
1446
public:
1447
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1448
CastInst &UI)
1449
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1450
ResultTy(ResultTy) {
1451
assert(UI.getOpcode() == Opcode &&
1452
"opcode of underlying cast doesn't match");
1453
}
1454
1455
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1456
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1457
ResultTy(ResultTy) {}
1458
1459
~VPWidenCastRecipe() override = default;
1460
1461
VPWidenCastRecipe *clone() override {
1462
if (auto *UV = getUnderlyingValue())
1463
return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1464
*cast<CastInst>(UV));
1465
1466
return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1467
}
1468
1469
VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1470
1471
/// Produce widened copies of the cast.
1472
void execute(VPTransformState &State) override;
1473
1474
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1475
/// Print the recipe.
1476
void print(raw_ostream &O, const Twine &Indent,
1477
VPSlotTracker &SlotTracker) const override;
1478
#endif
1479
1480
Instruction::CastOps getOpcode() const { return Opcode; }
1481
1482
/// Returns the result type of the cast.
1483
Type *getResultType() const { return ResultTy; }
1484
};
1485
1486
/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1487
class VPScalarCastRecipe : public VPSingleDefRecipe {
1488
Instruction::CastOps Opcode;
1489
1490
Type *ResultTy;
1491
1492
Value *generate(VPTransformState &State, unsigned Part);
1493
1494
public:
1495
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1496
: VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1497
ResultTy(ResultTy) {}
1498
1499
~VPScalarCastRecipe() override = default;
1500
1501
VPScalarCastRecipe *clone() override {
1502
return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
1503
}
1504
1505
VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1506
1507
void execute(VPTransformState &State) override;
1508
1509
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1510
void print(raw_ostream &O, const Twine &Indent,
1511
VPSlotTracker &SlotTracker) const override;
1512
#endif
1513
1514
/// Returns the result type of the cast.
1515
Type *getResultType() const { return ResultTy; }
1516
1517
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1518
// At the moment, only uniform codegen is implemented.
1519
assert(is_contained(operands(), Op) &&
1520
"Op must be an operand of the recipe");
1521
return true;
1522
}
1523
};
1524
1525
/// A recipe for widening Call instructions.
1526
class VPWidenCallRecipe : public VPSingleDefRecipe {
1527
/// ID of the vector intrinsic to call when widening the call. If set the
1528
/// Intrinsic::not_intrinsic, a library call will be used instead.
1529
Intrinsic::ID VectorIntrinsicID;
1530
/// If this recipe represents a library call, Variant stores a pointer to
1531
/// the chosen function. There is a 1:1 mapping between a given VF and the
1532
/// chosen vectorized variant, so there will be a different vplan for each
1533
/// VF with a valid variant.
1534
Function *Variant;
1535
1536
public:
1537
template <typename IterT>
1538
VPWidenCallRecipe(Value *UV, iterator_range<IterT> CallArguments,
1539
Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1540
Function *Variant = nullptr)
1541
: VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, UV, DL),
1542
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
1543
assert(
1544
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1545
"last operand must be the called function");
1546
}
1547
1548
~VPWidenCallRecipe() override = default;
1549
1550
VPWidenCallRecipe *clone() override {
1551
return new VPWidenCallRecipe(getUnderlyingValue(), operands(),
1552
VectorIntrinsicID, getDebugLoc(), Variant);
1553
}
1554
1555
VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1556
1557
/// Produce a widened version of the call instruction.
1558
void execute(VPTransformState &State) override;
1559
1560
Function *getCalledScalarFunction() const {
1561
return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1562
}
1563
1564
operand_range arg_operands() {
1565
return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1566
}
1567
const_operand_range arg_operands() const {
1568
return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1569
}
1570
1571
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1572
/// Print the recipe.
1573
void print(raw_ostream &O, const Twine &Indent,
1574
VPSlotTracker &SlotTracker) const override;
1575
#endif
1576
};
1577
1578
/// A recipe for widening select instructions.
1579
struct VPWidenSelectRecipe : public VPSingleDefRecipe {
1580
template <typename IterT>
1581
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
1582
: VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1583
I.getDebugLoc()) {}
1584
1585
~VPWidenSelectRecipe() override = default;
1586
1587
VPWidenSelectRecipe *clone() override {
1588
return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1589
operands());
1590
}
1591
1592
VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1593
1594
/// Produce a widened version of the select instruction.
1595
void execute(VPTransformState &State) override;
1596
1597
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1598
/// Print the recipe.
1599
void print(raw_ostream &O, const Twine &Indent,
1600
VPSlotTracker &SlotTracker) const override;
1601
#endif
1602
1603
VPValue *getCond() const {
1604
return getOperand(0);
1605
}
1606
1607
bool isInvariantCond() const {
1608
return getCond()->isDefinedOutsideVectorRegions();
1609
}
1610
};
1611
1612
/// A recipe for handling GEP instructions.
1613
class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
1614
bool isPointerLoopInvariant() const {
1615
return getOperand(0)->isDefinedOutsideVectorRegions();
1616
}
1617
1618
bool isIndexLoopInvariant(unsigned I) const {
1619
return getOperand(I + 1)->isDefinedOutsideVectorRegions();
1620
}
1621
1622
bool areAllOperandsInvariant() const {
1623
return all_of(operands(), [](VPValue *Op) {
1624
return Op->isDefinedOutsideVectorRegions();
1625
});
1626
}
1627
1628
public:
1629
template <typename IterT>
1630
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
1631
: VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1632
1633
~VPWidenGEPRecipe() override = default;
1634
1635
VPWidenGEPRecipe *clone() override {
1636
return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1637
operands());
1638
}
1639
1640
VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1641
1642
/// Generate the gep nodes.
1643
void execute(VPTransformState &State) override;
1644
1645
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1646
/// Print the recipe.
1647
void print(raw_ostream &O, const Twine &Indent,
1648
VPSlotTracker &SlotTracker) const override;
1649
#endif
1650
};
1651
1652
/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1653
/// all parts. If IsReverse is true, compute pointers for accessing the input in
1654
/// reverse order per part.
1655
class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
1656
Type *IndexedTy;
1657
bool IsReverse;
1658
1659
public:
1660
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1661
bool IsInBounds, DebugLoc DL)
1662
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1663
GEPFlagsTy(IsInBounds), DL),
1664
IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1665
1666
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1667
1668
void execute(VPTransformState &State) override;
1669
1670
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1671
assert(is_contained(operands(), Op) &&
1672
"Op must be an operand of the recipe");
1673
return true;
1674
}
1675
1676
VPVectorPointerRecipe *clone() override {
1677
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1678
isInBounds(), getDebugLoc());
1679
}
1680
1681
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1682
/// Print the recipe.
1683
void print(raw_ostream &O, const Twine &Indent,
1684
VPSlotTracker &SlotTracker) const override;
1685
#endif
1686
};
1687
1688
/// A pure virtual base class for all recipes modeling header phis, including
1689
/// phis for first order recurrences, pointer inductions and reductions. The
1690
/// start value is the first operand of the recipe and the incoming value from
1691
/// the backedge is the second operand.
1692
///
1693
/// Inductions are modeled using the following sub-classes:
1694
/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1695
/// starting at a specified value (zero for the main vector loop, the resume
1696
/// value for the epilogue vector loop) and stepping by 1. The induction
1697
/// controls exiting of the vector loop by comparing against the vector trip
1698
/// count. Produces a single scalar PHI for the induction value per
1699
/// iteration.
1700
/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1701
/// floating point inductions with arbitrary start and step values. Produces
1702
/// a vector PHI per-part.
1703
/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1704
/// value of an IV with different start and step values. Produces a single
1705
/// scalar value per iteration
1706
/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1707
/// canonical or derived induction.
1708
/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1709
/// pointer induction. Produces either a vector PHI per-part or scalar values
1710
/// per-lane based on the canonical induction.
1711
class VPHeaderPHIRecipe : public VPSingleDefRecipe {
1712
protected:
1713
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1714
VPValue *Start = nullptr, DebugLoc DL = {})
1715
: VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
1716
if (Start)
1717
addOperand(Start);
1718
}
1719
1720
public:
1721
~VPHeaderPHIRecipe() override = default;
1722
1723
/// Method to support type inquiry through isa, cast, and dyn_cast.
1724
static inline bool classof(const VPRecipeBase *B) {
1725
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1726
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1727
}
1728
static inline bool classof(const VPValue *V) {
1729
auto *B = V->getDefiningRecipe();
1730
return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1731
B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1732
}
1733
1734
/// Generate the phi nodes.
1735
void execute(VPTransformState &State) override = 0;
1736
1737
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1738
/// Print the recipe.
1739
void print(raw_ostream &O, const Twine &Indent,
1740
VPSlotTracker &SlotTracker) const override = 0;
1741
#endif
1742
1743
/// Returns the start value of the phi, if one is set.
1744
VPValue *getStartValue() {
1745
return getNumOperands() == 0 ? nullptr : getOperand(0);
1746
}
1747
VPValue *getStartValue() const {
1748
return getNumOperands() == 0 ? nullptr : getOperand(0);
1749
}
1750
1751
/// Update the start value of the recipe.
1752
void setStartValue(VPValue *V) { setOperand(0, V); }
1753
1754
/// Returns the incoming value from the loop backedge.
1755
virtual VPValue *getBackedgeValue() {
1756
return getOperand(1);
1757
}
1758
1759
/// Returns the backedge value as a recipe. The backedge value is guaranteed
1760
/// to be a recipe.
1761
virtual VPRecipeBase &getBackedgeRecipe() {
1762
return *getBackedgeValue()->getDefiningRecipe();
1763
}
1764
};
1765
1766
/// A recipe for handling phi nodes of integer and floating-point inductions,
1767
/// producing their vector values.
1768
class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
1769
PHINode *IV;
1770
TruncInst *Trunc;
1771
const InductionDescriptor &IndDesc;
1772
1773
public:
1774
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1775
const InductionDescriptor &IndDesc)
1776
: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1777
Trunc(nullptr), IndDesc(IndDesc) {
1778
addOperand(Step);
1779
}
1780
1781
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1782
const InductionDescriptor &IndDesc,
1783
TruncInst *Trunc)
1784
: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1785
IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1786
addOperand(Step);
1787
}
1788
1789
~VPWidenIntOrFpInductionRecipe() override = default;
1790
1791
VPWidenIntOrFpInductionRecipe *clone() override {
1792
return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(),
1793
getStepValue(), IndDesc, Trunc);
1794
}
1795
1796
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1797
1798
/// Generate the vectorized and scalarized versions of the phi node as
1799
/// needed by their users.
1800
void execute(VPTransformState &State) override;
1801
1802
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1803
/// Print the recipe.
1804
void print(raw_ostream &O, const Twine &Indent,
1805
VPSlotTracker &SlotTracker) const override;
1806
#endif
1807
1808
VPValue *getBackedgeValue() override {
1809
// TODO: All operands of base recipe must exist and be at same index in
1810
// derived recipe.
1811
llvm_unreachable(
1812
"VPWidenIntOrFpInductionRecipe generates its own backedge value");
1813
}
1814
1815
VPRecipeBase &getBackedgeRecipe() override {
1816
// TODO: All operands of base recipe must exist and be at same index in
1817
// derived recipe.
1818
llvm_unreachable(
1819
"VPWidenIntOrFpInductionRecipe generates its own backedge value");
1820
}
1821
1822
/// Returns the step value of the induction.
1823
VPValue *getStepValue() { return getOperand(1); }
1824
const VPValue *getStepValue() const { return getOperand(1); }
1825
1826
/// Returns the first defined value as TruncInst, if it is one or nullptr
1827
/// otherwise.
1828
TruncInst *getTruncInst() { return Trunc; }
1829
const TruncInst *getTruncInst() const { return Trunc; }
1830
1831
PHINode *getPHINode() { return IV; }
1832
1833
/// Returns the induction descriptor for the recipe.
1834
const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1835
1836
/// Returns true if the induction is canonical, i.e. starting at 0 and
1837
/// incremented by UF * VF (= the original IV is incremented by 1) and has the
1838
/// same type as the canonical induction.
1839
bool isCanonical() const;
1840
1841
/// Returns the scalar type of the induction.
1842
Type *getScalarType() const {
1843
return Trunc ? Trunc->getType() : IV->getType();
1844
}
1845
};
1846
1847
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
1848
const InductionDescriptor &IndDesc;
1849
1850
bool IsScalarAfterVectorization;
1851
1852
public:
1853
/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1854
/// Start.
1855
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,
1856
const InductionDescriptor &IndDesc,
1857
bool IsScalarAfterVectorization)
1858
: VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1859
IndDesc(IndDesc),
1860
IsScalarAfterVectorization(IsScalarAfterVectorization) {
1861
addOperand(Start);
1862
addOperand(Step);
1863
}
1864
1865
~VPWidenPointerInductionRecipe() override = default;
1866
1867
VPWidenPointerInductionRecipe *clone() override {
1868
return new VPWidenPointerInductionRecipe(
1869
cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
1870
IndDesc, IsScalarAfterVectorization);
1871
}
1872
1873
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1874
1875
/// Generate vector values for the pointer induction.
1876
void execute(VPTransformState &State) override;
1877
1878
/// Returns true if only scalar values will be generated.
1879
bool onlyScalarsGenerated(bool IsScalable);
1880
1881
/// Returns the induction descriptor for the recipe.
1882
const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1883
1884
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1885
/// Print the recipe.
1886
void print(raw_ostream &O, const Twine &Indent,
1887
VPSlotTracker &SlotTracker) const override;
1888
#endif
1889
};
1890
1891
/// A recipe for handling phis that are widened in the vector loop.
1892
/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1893
/// managed in the recipe directly.
1894
class VPWidenPHIRecipe : public VPSingleDefRecipe {
1895
/// List of incoming blocks. Only used in the VPlan native path.
1896
SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1897
1898
public:
1899
/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1900
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1901
: VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
1902
if (Start)
1903
addOperand(Start);
1904
}
1905
1906
VPWidenPHIRecipe *clone() override {
1907
llvm_unreachable("cloning not implemented yet");
1908
}
1909
1910
~VPWidenPHIRecipe() override = default;
1911
1912
VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1913
1914
/// Generate the phi/select nodes.
1915
void execute(VPTransformState &State) override;
1916
1917
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1918
/// Print the recipe.
1919
void print(raw_ostream &O, const Twine &Indent,
1920
VPSlotTracker &SlotTracker) const override;
1921
#endif
1922
1923
/// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1924
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1925
addOperand(IncomingV);
1926
IncomingBlocks.push_back(IncomingBlock);
1927
}
1928
1929
/// Returns the \p I th incoming VPBasicBlock.
1930
VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1931
1932
/// Returns the \p I th incoming VPValue.
1933
VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1934
};
1935
1936
/// A recipe for handling first-order recurrence phis. The start value is the
1937
/// first operand of the recipe and the incoming value from the backedge is the
1938
/// second operand.
1939
struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
1940
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
1941
: VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1942
1943
VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1944
1945
static inline bool classof(const VPHeaderPHIRecipe *R) {
1946
return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1947
}
1948
1949
VPFirstOrderRecurrencePHIRecipe *clone() override {
1950
return new VPFirstOrderRecurrencePHIRecipe(
1951
cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
1952
}
1953
1954
void execute(VPTransformState &State) override;
1955
1956
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1957
/// Print the recipe.
1958
void print(raw_ostream &O, const Twine &Indent,
1959
VPSlotTracker &SlotTracker) const override;
1960
#endif
1961
};
1962
1963
/// A recipe for handling reduction phis. The start value is the first operand
1964
/// of the recipe and the incoming value from the backedge is the second
1965
/// operand.
1966
class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
1967
/// Descriptor for the reduction.
1968
const RecurrenceDescriptor &RdxDesc;
1969
1970
/// The phi is part of an in-loop reduction.
1971
bool IsInLoop;
1972
1973
/// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1974
bool IsOrdered;
1975
1976
public:
1977
/// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1978
/// RdxDesc.
1979
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
1980
VPValue &Start, bool IsInLoop = false,
1981
bool IsOrdered = false)
1982
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1983
RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1984
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1985
}
1986
1987
~VPReductionPHIRecipe() override = default;
1988
1989
VPReductionPHIRecipe *clone() override {
1990
auto *R =
1991
new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
1992
*getOperand(0), IsInLoop, IsOrdered);
1993
R->addOperand(getBackedgeValue());
1994
return R;
1995
}
1996
1997
VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1998
1999
static inline bool classof(const VPHeaderPHIRecipe *R) {
2000
return R->getVPDefID() == VPDef::VPReductionPHISC;
2001
}
2002
2003
/// Generate the phi/select nodes.
2004
void execute(VPTransformState &State) override;
2005
2006
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2007
/// Print the recipe.
2008
void print(raw_ostream &O, const Twine &Indent,
2009
VPSlotTracker &SlotTracker) const override;
2010
#endif
2011
2012
const RecurrenceDescriptor &getRecurrenceDescriptor() const {
2013
return RdxDesc;
2014
}
2015
2016
/// Returns true, if the phi is part of an ordered reduction.
2017
bool isOrdered() const { return IsOrdered; }
2018
2019
/// Returns true, if the phi is part of an in-loop reduction.
2020
bool isInLoop() const { return IsInLoop; }
2021
};
2022
2023
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2024
/// instructions.
2025
class VPBlendRecipe : public VPSingleDefRecipe {
2026
public:
2027
/// The blend operation is a User of the incoming values and of their
2028
/// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
2029
/// incoming value does not have a mask associated.
2030
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)
2031
: VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2032
assert((Operands.size() + 1) % 2 == 0 &&
2033
"Expected an odd number of operands");
2034
}
2035
2036
VPBlendRecipe *clone() override {
2037
SmallVector<VPValue *> Ops(operands());
2038
return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2039
}
2040
2041
VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2042
2043
/// Return the number of incoming values, taking into account that the first
2044
/// incoming value has no mask.
2045
unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
2046
2047
/// Return incoming value number \p Idx.
2048
VPValue *getIncomingValue(unsigned Idx) const {
2049
return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);
2050
}
2051
2052
/// Return mask number \p Idx.
2053
VPValue *getMask(unsigned Idx) const {
2054
assert(Idx > 0 && "First index has no mask associated.");
2055
return getOperand(Idx * 2);
2056
}
2057
2058
/// Generate the phi/select nodes.
2059
void execute(VPTransformState &State) override;
2060
2061
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2062
/// Print the recipe.
2063
void print(raw_ostream &O, const Twine &Indent,
2064
VPSlotTracker &SlotTracker) const override;
2065
#endif
2066
2067
/// Returns true if the recipe only uses the first lane of operand \p Op.
2068
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2069
assert(is_contained(operands(), Op) &&
2070
"Op must be an operand of the recipe");
2071
// Recursing through Blend recipes only, must terminate at header phi's the
2072
// latest.
2073
return all_of(users(),
2074
[this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2075
}
2076
};
2077
2078
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2079
/// or stores into one wide load/store and shuffles. The first operand of a
2080
/// VPInterleave recipe is the address, followed by the stored values, followed
2081
/// by an optional mask.
2082
class VPInterleaveRecipe : public VPRecipeBase {
2083
const InterleaveGroup<Instruction> *IG;
2084
2085
/// Indicates if the interleave group is in a conditional block and requires a
2086
/// mask.
2087
bool HasMask = false;
2088
2089
/// Indicates if gaps between members of the group need to be masked out or if
2090
/// unusued gaps can be loaded speculatively.
2091
bool NeedsMaskForGaps = false;
2092
2093
public:
2094
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
2095
ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2096
bool NeedsMaskForGaps)
2097
: VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2098
NeedsMaskForGaps(NeedsMaskForGaps) {
2099
for (unsigned i = 0; i < IG->getFactor(); ++i)
2100
if (Instruction *I = IG->getMember(i)) {
2101
if (I->getType()->isVoidTy())
2102
continue;
2103
new VPValue(I, this);
2104
}
2105
2106
for (auto *SV : StoredValues)
2107
addOperand(SV);
2108
if (Mask) {
2109
HasMask = true;
2110
addOperand(Mask);
2111
}
2112
}
2113
~VPInterleaveRecipe() override = default;
2114
2115
VPInterleaveRecipe *clone() override {
2116
return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2117
NeedsMaskForGaps);
2118
}
2119
2120
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2121
2122
/// Return the address accessed by this recipe.
2123
VPValue *getAddr() const {
2124
return getOperand(0); // Address is the 1st, mandatory operand.
2125
}
2126
2127
/// Return the mask used by this recipe. Note that a full mask is represented
2128
/// by a nullptr.
2129
VPValue *getMask() const {
2130
// Mask is optional and therefore the last, currently 2nd operand.
2131
return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2132
}
2133
2134
/// Return the VPValues stored by this interleave group. If it is a load
2135
/// interleave group, return an empty ArrayRef.
2136
ArrayRef<VPValue *> getStoredValues() const {
2137
// The first operand is the address, followed by the stored values, followed
2138
// by an optional mask.
2139
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
2140
.slice(1, getNumStoreOperands());
2141
}
2142
2143
/// Generate the wide load or store, and shuffles.
2144
void execute(VPTransformState &State) override;
2145
2146
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2147
/// Print the recipe.
2148
void print(raw_ostream &O, const Twine &Indent,
2149
VPSlotTracker &SlotTracker) const override;
2150
#endif
2151
2152
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
2153
2154
/// Returns the number of stored operands of this interleave group. Returns 0
2155
/// for load interleave groups.
2156
unsigned getNumStoreOperands() const {
2157
return getNumOperands() - (HasMask ? 2 : 1);
2158
}
2159
2160
/// The recipe only uses the first lane of the address.
2161
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2162
assert(is_contained(operands(), Op) &&
2163
"Op must be an operand of the recipe");
2164
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2165
}
2166
2167
Instruction *getInsertPos() const { return IG->getInsertPos(); }
2168
};
2169
2170
/// A recipe to represent inloop reduction operations, performing a reduction on
2171
/// a vector operand into a scalar value, and adding the result to a chain.
2172
/// The Operands are {ChainOp, VecOp, [Condition]}.
2173
class VPReductionRecipe : public VPSingleDefRecipe {
2174
/// The recurrence decriptor for the reduction in question.
2175
const RecurrenceDescriptor &RdxDesc;
2176
bool IsOrdered;
2177
/// Whether the reduction is conditional.
2178
bool IsConditional = false;
2179
2180
protected:
2181
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2182
Instruction *I, ArrayRef<VPValue *> Operands,
2183
VPValue *CondOp, bool IsOrdered)
2184
: VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
2185
if (CondOp) {
2186
IsConditional = true;
2187
addOperand(CondOp);
2188
}
2189
}
2190
2191
public:
2192
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
2193
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2194
bool IsOrdered)
2195
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
2196
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2197
IsOrdered) {}
2198
2199
~VPReductionRecipe() override = default;
2200
2201
VPReductionRecipe *clone() override {
2202
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2203
getVecOp(), getCondOp(), IsOrdered);
2204
}
2205
2206
static inline bool classof(const VPRecipeBase *R) {
2207
return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2208
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2209
}
2210
2211
static inline bool classof(const VPUser *U) {
2212
auto *R = dyn_cast<VPRecipeBase>(U);
2213
return R && classof(R);
2214
}
2215
2216
/// Generate the reduction in the loop
2217
void execute(VPTransformState &State) override;
2218
2219
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2220
/// Print the recipe.
2221
void print(raw_ostream &O, const Twine &Indent,
2222
VPSlotTracker &SlotTracker) const override;
2223
#endif
2224
2225
/// Return the recurrence decriptor for the in-loop reduction.
2226
const RecurrenceDescriptor &getRecurrenceDescriptor() const {
2227
return RdxDesc;
2228
}
2229
/// Return true if the in-loop reduction is ordered.
2230
bool isOrdered() const { return IsOrdered; };
2231
/// Return true if the in-loop reduction is conditional.
2232
bool isConditional() const { return IsConditional; };
2233
/// The VPValue of the scalar Chain being accumulated.
2234
VPValue *getChainOp() const { return getOperand(0); }
2235
/// The VPValue of the vector value to be reduced.
2236
VPValue *getVecOp() const { return getOperand(1); }
2237
/// The VPValue of the condition for the block.
2238
VPValue *getCondOp() const {
2239
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2240
}
2241
};
2242
2243
/// A recipe to represent inloop reduction operations with vector-predication
2244
/// intrinsics, performing a reduction on a vector operand with the explicit
2245
/// vector length (EVL) into a scalar value, and adding the result to a chain.
2246
/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2247
class VPReductionEVLRecipe : public VPReductionRecipe {
2248
public:
2249
VPReductionEVLRecipe(VPReductionRecipe *R, VPValue *EVL, VPValue *CondOp)
2250
: VPReductionRecipe(
2251
VPDef::VPReductionEVLSC, R->getRecurrenceDescriptor(),
2252
cast_or_null<Instruction>(R->getUnderlyingValue()),
2253
ArrayRef<VPValue *>({R->getChainOp(), R->getVecOp(), EVL}), CondOp,
2254
R->isOrdered()) {}
2255
2256
~VPReductionEVLRecipe() override = default;
2257
2258
VPReductionEVLRecipe *clone() override {
2259
llvm_unreachable("cloning not implemented yet");
2260
}
2261
2262
VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2263
2264
/// Generate the reduction in the loop
2265
void execute(VPTransformState &State) override;
2266
2267
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2268
/// Print the recipe.
2269
void print(raw_ostream &O, const Twine &Indent,
2270
VPSlotTracker &SlotTracker) const override;
2271
#endif
2272
2273
/// The VPValue of the explicit vector length.
2274
VPValue *getEVL() const { return getOperand(2); }
2275
2276
/// Returns true if the recipe only uses the first lane of operand \p Op.
2277
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2278
assert(is_contained(operands(), Op) &&
2279
"Op must be an operand of the recipe");
2280
return Op == getEVL();
2281
}
2282
};
2283
2284
/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2285
/// copies of the original scalar type, one per lane, instead of producing a
2286
/// single copy of widened type for all lanes. If the instruction is known to be
2287
/// uniform only one copy, per lane zero, will be generated.
2288
class VPReplicateRecipe : public VPRecipeWithIRFlags {
2289
/// Indicator if only a single replica per lane is needed.
2290
bool IsUniform;
2291
2292
/// Indicator if the replicas are also predicated.
2293
bool IsPredicated;
2294
2295
public:
2296
template <typename IterT>
2297
VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
2298
bool IsUniform, VPValue *Mask = nullptr)
2299
: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2300
IsUniform(IsUniform), IsPredicated(Mask) {
2301
if (Mask)
2302
addOperand(Mask);
2303
}
2304
2305
~VPReplicateRecipe() override = default;
2306
2307
VPReplicateRecipe *clone() override {
2308
auto *Copy =
2309
new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2310
isPredicated() ? getMask() : nullptr);
2311
Copy->transferFlags(*this);
2312
return Copy;
2313
}
2314
2315
VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2316
2317
/// Generate replicas of the desired Ingredient. Replicas will be generated
2318
/// for all parts and lanes unless a specific part and lane are specified in
2319
/// the \p State.
2320
void execute(VPTransformState &State) override;
2321
2322
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2323
/// Print the recipe.
2324
void print(raw_ostream &O, const Twine &Indent,
2325
VPSlotTracker &SlotTracker) const override;
2326
#endif
2327
2328
bool isUniform() const { return IsUniform; }
2329
2330
bool isPredicated() const { return IsPredicated; }
2331
2332
/// Returns true if the recipe only uses the first lane of operand \p Op.
2333
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2334
assert(is_contained(operands(), Op) &&
2335
"Op must be an operand of the recipe");
2336
return isUniform();
2337
}
2338
2339
/// Returns true if the recipe uses scalars of operand \p Op.
2340
bool usesScalars(const VPValue *Op) const override {
2341
assert(is_contained(operands(), Op) &&
2342
"Op must be an operand of the recipe");
2343
return true;
2344
}
2345
2346
/// Returns true if the recipe is used by a widened recipe via an intervening
2347
/// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2348
/// in a vector.
2349
bool shouldPack() const;
2350
2351
/// Return the mask of a predicated VPReplicateRecipe.
2352
VPValue *getMask() {
2353
assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2354
return getOperand(getNumOperands() - 1);
2355
}
2356
2357
unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2358
};
2359
2360
/// A recipe for generating conditional branches on the bits of a mask.
2361
class VPBranchOnMaskRecipe : public VPRecipeBase {
2362
public:
2363
VPBranchOnMaskRecipe(VPValue *BlockInMask)
2364
: VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2365
if (BlockInMask) // nullptr means all-one mask.
2366
addOperand(BlockInMask);
2367
}
2368
2369
VPBranchOnMaskRecipe *clone() override {
2370
return new VPBranchOnMaskRecipe(getOperand(0));
2371
}
2372
2373
VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2374
2375
/// Generate the extraction of the appropriate bit from the block mask and the
2376
/// conditional branch.
2377
void execute(VPTransformState &State) override;
2378
2379
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2380
/// Print the recipe.
2381
void print(raw_ostream &O, const Twine &Indent,
2382
VPSlotTracker &SlotTracker) const override {
2383
O << Indent << "BRANCH-ON-MASK ";
2384
if (VPValue *Mask = getMask())
2385
Mask->printAsOperand(O, SlotTracker);
2386
else
2387
O << " All-One";
2388
}
2389
#endif
2390
2391
/// Return the mask used by this recipe. Note that a full mask is represented
2392
/// by a nullptr.
2393
VPValue *getMask() const {
2394
assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2395
// Mask is optional.
2396
return getNumOperands() == 1 ? getOperand(0) : nullptr;
2397
}
2398
2399
/// Returns true if the recipe uses scalars of operand \p Op.
2400
bool usesScalars(const VPValue *Op) const override {
2401
assert(is_contained(operands(), Op) &&
2402
"Op must be an operand of the recipe");
2403
return true;
2404
}
2405
};
2406
2407
/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2408
/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2409
/// order to merge values that are set under such a branch and feed their uses.
2410
/// The phi nodes can be scalar or vector depending on the users of the value.
2411
/// This recipe works in concert with VPBranchOnMaskRecipe.
2412
class VPPredInstPHIRecipe : public VPSingleDefRecipe {
2413
public:
2414
/// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2415
/// nodes after merging back from a Branch-on-Mask.
2416
VPPredInstPHIRecipe(VPValue *PredV)
2417
: VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2418
~VPPredInstPHIRecipe() override = default;
2419
2420
VPPredInstPHIRecipe *clone() override {
2421
return new VPPredInstPHIRecipe(getOperand(0));
2422
}
2423
2424
VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2425
2426
/// Generates phi nodes for live-outs as needed to retain SSA form.
2427
void execute(VPTransformState &State) override;
2428
2429
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2430
/// Print the recipe.
2431
void print(raw_ostream &O, const Twine &Indent,
2432
VPSlotTracker &SlotTracker) const override;
2433
#endif
2434
2435
/// Returns true if the recipe uses scalars of operand \p Op.
2436
bool usesScalars(const VPValue *Op) const override {
2437
assert(is_contained(operands(), Op) &&
2438
"Op must be an operand of the recipe");
2439
return true;
2440
}
2441
};
2442
2443
/// A common base class for widening memory operations. An optional mask can be
2444
/// provided as the last operand.
2445
class VPWidenMemoryRecipe : public VPRecipeBase {
2446
protected:
2447
Instruction &Ingredient;
2448
2449
/// Whether the accessed addresses are consecutive.
2450
bool Consecutive;
2451
2452
/// Whether the consecutive accessed addresses are in reverse order.
2453
bool Reverse;
2454
2455
/// Whether the memory access is masked.
2456
bool IsMasked = false;
2457
2458
void setMask(VPValue *Mask) {
2459
assert(!IsMasked && "cannot re-set mask");
2460
if (!Mask)
2461
return;
2462
addOperand(Mask);
2463
IsMasked = true;
2464
}
2465
2466
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2467
std::initializer_list<VPValue *> Operands,
2468
bool Consecutive, bool Reverse, DebugLoc DL)
2469
: VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
2470
Reverse(Reverse) {
2471
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2472
}
2473
2474
public:
2475
VPWidenMemoryRecipe *clone() override {
2476
llvm_unreachable("cloning not supported");
2477
}
2478
2479
static inline bool classof(const VPRecipeBase *R) {
2480
return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2481
R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2482
R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2483
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2484
}
2485
2486
static inline bool classof(const VPUser *U) {
2487
auto *R = dyn_cast<VPRecipeBase>(U);
2488
return R && classof(R);
2489
}
2490
2491
/// Return whether the loaded-from / stored-to addresses are consecutive.
2492
bool isConsecutive() const { return Consecutive; }
2493
2494
/// Return whether the consecutive loaded/stored addresses are in reverse
2495
/// order.
2496
bool isReverse() const { return Reverse; }
2497
2498
/// Return the address accessed by this recipe.
2499
VPValue *getAddr() const { return getOperand(0); }
2500
2501
/// Returns true if the recipe is masked.
2502
bool isMasked() const { return IsMasked; }
2503
2504
/// Return the mask used by this recipe. Note that a full mask is represented
2505
/// by a nullptr.
2506
VPValue *getMask() const {
2507
// Mask is optional and therefore the last operand.
2508
return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2509
}
2510
2511
/// Generate the wide load/store.
2512
void execute(VPTransformState &State) override {
2513
llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2514
}
2515
2516
Instruction &getIngredient() const { return Ingredient; }
2517
};
2518
2519
/// A recipe for widening load operations, using the address to load from and an
2520
/// optional mask.
2521
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2522
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2523
bool Consecutive, bool Reverse, DebugLoc DL)
2524
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2525
Reverse, DL),
2526
VPValue(this, &Load) {
2527
setMask(Mask);
2528
}
2529
2530
VPWidenLoadRecipe *clone() override {
2531
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2532
getMask(), Consecutive, Reverse,
2533
getDebugLoc());
2534
}
2535
2536
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2537
2538
/// Generate a wide load or gather.
2539
void execute(VPTransformState &State) override;
2540
2541
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2542
/// Print the recipe.
2543
void print(raw_ostream &O, const Twine &Indent,
2544
VPSlotTracker &SlotTracker) const override;
2545
#endif
2546
2547
/// Returns true if the recipe only uses the first lane of operand \p Op.
2548
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2549
assert(is_contained(operands(), Op) &&
2550
"Op must be an operand of the recipe");
2551
// Widened, consecutive loads operations only demand the first lane of
2552
// their address.
2553
return Op == getAddr() && isConsecutive();
2554
}
2555
};
2556
2557
/// A recipe for widening load operations with vector-predication intrinsics,
2558
/// using the address to load from, the explicit vector length and an optional
2559
/// mask.
2560
struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
2561
VPWidenLoadEVLRecipe(VPWidenLoadRecipe *L, VPValue *EVL, VPValue *Mask)
2562
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L->getIngredient(),
2563
{L->getAddr(), EVL}, L->isConsecutive(),
2564
L->isReverse(), L->getDebugLoc()),
2565
VPValue(this, &getIngredient()) {
2566
setMask(Mask);
2567
}
2568
2569
VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
2570
2571
/// Return the EVL operand.
2572
VPValue *getEVL() const { return getOperand(1); }
2573
2574
/// Generate the wide load or gather.
2575
void execute(VPTransformState &State) override;
2576
2577
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2578
/// Print the recipe.
2579
void print(raw_ostream &O, const Twine &Indent,
2580
VPSlotTracker &SlotTracker) const override;
2581
#endif
2582
2583
/// Returns true if the recipe only uses the first lane of operand \p Op.
2584
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2585
assert(is_contained(operands(), Op) &&
2586
"Op must be an operand of the recipe");
2587
// Widened loads only demand the first lane of EVL and consecutive loads
2588
// only demand the first lane of their address.
2589
return Op == getEVL() || (Op == getAddr() && isConsecutive());
2590
}
2591
};
2592
2593
/// A recipe for widening store operations, using the stored value, the address
2594
/// to store to and an optional mask.
2595
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
2596
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2597
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2598
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2599
Consecutive, Reverse, DL) {
2600
setMask(Mask);
2601
}
2602
2603
VPWidenStoreRecipe *clone() override {
2604
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
2605
getStoredValue(), getMask(), Consecutive,
2606
Reverse, getDebugLoc());
2607
}
2608
2609
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
2610
2611
/// Return the value stored by this recipe.
2612
VPValue *getStoredValue() const { return getOperand(1); }
2613
2614
/// Generate a wide store or scatter.
2615
void execute(VPTransformState &State) override;
2616
2617
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2618
/// Print the recipe.
2619
void print(raw_ostream &O, const Twine &Indent,
2620
VPSlotTracker &SlotTracker) const override;
2621
#endif
2622
2623
/// Returns true if the recipe only uses the first lane of operand \p Op.
2624
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2625
assert(is_contained(operands(), Op) &&
2626
"Op must be an operand of the recipe");
2627
// Widened, consecutive stores only demand the first lane of their address,
2628
// unless the same operand is also stored.
2629
return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2630
}
2631
};
2632
2633
/// A recipe for widening store operations with vector-predication intrinsics,
2634
/// using the value to store, the address to store to, the explicit vector
2635
/// length and an optional mask.
2636
struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
2637
VPWidenStoreEVLRecipe(VPWidenStoreRecipe *S, VPValue *EVL, VPValue *Mask)
2638
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S->getIngredient(),
2639
{S->getAddr(), S->getStoredValue(), EVL},
2640
S->isConsecutive(), S->isReverse(),
2641
S->getDebugLoc()) {
2642
setMask(Mask);
2643
}
2644
2645
VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
2646
2647
/// Return the address accessed by this recipe.
2648
VPValue *getStoredValue() const { return getOperand(1); }
2649
2650
/// Return the EVL operand.
2651
VPValue *getEVL() const { return getOperand(2); }
2652
2653
/// Generate the wide store or scatter.
2654
void execute(VPTransformState &State) override;
2655
2656
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2657
/// Print the recipe.
2658
void print(raw_ostream &O, const Twine &Indent,
2659
VPSlotTracker &SlotTracker) const override;
2660
#endif
2661
2662
/// Returns true if the recipe only uses the first lane of operand \p Op.
2663
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2664
assert(is_contained(operands(), Op) &&
2665
"Op must be an operand of the recipe");
2666
if (Op == getEVL()) {
2667
assert(getStoredValue() != Op && "unexpected store of EVL");
2668
return true;
2669
}
2670
// Widened, consecutive memory operations only demand the first lane of
2671
// their address, unless the same operand is also stored. That latter can
2672
// happen with opaque pointers.
2673
return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2674
}
2675
};
2676
2677
/// Recipe to expand a SCEV expression.
2678
class VPExpandSCEVRecipe : public VPSingleDefRecipe {
2679
const SCEV *Expr;
2680
ScalarEvolution &SE;
2681
2682
public:
2683
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
2684
: VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
2685
2686
~VPExpandSCEVRecipe() override = default;
2687
2688
VPExpandSCEVRecipe *clone() override {
2689
return new VPExpandSCEVRecipe(Expr, SE);
2690
}
2691
2692
VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2693
2694
/// Generate a canonical vector induction variable of the vector loop, with
2695
void execute(VPTransformState &State) override;
2696
2697
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2698
/// Print the recipe.
2699
void print(raw_ostream &O, const Twine &Indent,
2700
VPSlotTracker &SlotTracker) const override;
2701
#endif
2702
2703
const SCEV *getSCEV() const { return Expr; }
2704
};
2705
2706
/// Canonical scalar induction phi of the vector loop. Starting at the specified
2707
/// start value (either 0 or the resume value when vectorizing the epilogue
2708
/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2709
/// canonical induction variable.
2710
class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
2711
public:
2712
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
2713
: VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2714
2715
~VPCanonicalIVPHIRecipe() override = default;
2716
2717
VPCanonicalIVPHIRecipe *clone() override {
2718
auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
2719
R->addOperand(getBackedgeValue());
2720
return R;
2721
}
2722
2723
VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2724
2725
static inline bool classof(const VPHeaderPHIRecipe *D) {
2726
return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2727
}
2728
2729
/// Generate the canonical scalar induction phi of the vector loop.
2730
void execute(VPTransformState &State) override;
2731
2732
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2733
/// Print the recipe.
2734
void print(raw_ostream &O, const Twine &Indent,
2735
VPSlotTracker &SlotTracker) const override;
2736
#endif
2737
2738
/// Returns the scalar type of the induction.
2739
Type *getScalarType() const {
2740
return getStartValue()->getLiveInIRValue()->getType();
2741
}
2742
2743
/// Returns true if the recipe only uses the first lane of operand \p Op.
2744
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2745
assert(is_contained(operands(), Op) &&
2746
"Op must be an operand of the recipe");
2747
return true;
2748
}
2749
2750
/// Returns true if the recipe only uses the first part of operand \p Op.
2751
bool onlyFirstPartUsed(const VPValue *Op) const override {
2752
assert(is_contained(operands(), Op) &&
2753
"Op must be an operand of the recipe");
2754
return true;
2755
}
2756
2757
/// Check if the induction described by \p Kind, /p Start and \p Step is
2758
/// canonical, i.e. has the same start and step (of 1) as the canonical IV.
2759
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
2760
VPValue *Step) const;
2761
};
2762
2763
/// A recipe for generating the active lane mask for the vector loop that is
2764
/// used to predicate the vector operations.
2765
/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2766
/// remove VPActiveLaneMaskPHIRecipe.
2767
class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
2768
public:
2769
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
2770
: VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2771
DL) {}
2772
2773
~VPActiveLaneMaskPHIRecipe() override = default;
2774
2775
VPActiveLaneMaskPHIRecipe *clone() override {
2776
return new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc());
2777
}
2778
2779
VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2780
2781
static inline bool classof(const VPHeaderPHIRecipe *D) {
2782
return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2783
}
2784
2785
/// Generate the active lane mask phi of the vector loop.
2786
void execute(VPTransformState &State) override;
2787
2788
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2789
/// Print the recipe.
2790
void print(raw_ostream &O, const Twine &Indent,
2791
VPSlotTracker &SlotTracker) const override;
2792
#endif
2793
};
2794
2795
/// A recipe for generating the phi node for the current index of elements,
2796
/// adjusted in accordance with EVL value. It starts at the start value of the
2797
/// canonical induction and gets incremented by EVL in each iteration of the
2798
/// vector loop.
2799
class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
2800
public:
2801
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
2802
: VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
2803
2804
~VPEVLBasedIVPHIRecipe() override = default;
2805
2806
VPEVLBasedIVPHIRecipe *clone() override {
2807
llvm_unreachable("cloning not implemented yet");
2808
}
2809
2810
VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
2811
2812
static inline bool classof(const VPHeaderPHIRecipe *D) {
2813
return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
2814
}
2815
2816
/// Generate phi for handling IV based on EVL over iterations correctly.
2817
/// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
2818
void execute(VPTransformState &State) override;
2819
2820
/// Returns true if the recipe only uses the first lane of operand \p Op.
2821
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2822
assert(is_contained(operands(), Op) &&
2823
"Op must be an operand of the recipe");
2824
return true;
2825
}
2826
2827
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2828
/// Print the recipe.
2829
void print(raw_ostream &O, const Twine &Indent,
2830
VPSlotTracker &SlotTracker) const override;
2831
#endif
2832
};
2833
2834
/// A Recipe for widening the canonical induction variable of the vector loop.
2835
class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
2836
public:
2837
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
2838
: VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
2839
2840
~VPWidenCanonicalIVRecipe() override = default;
2841
2842
VPWidenCanonicalIVRecipe *clone() override {
2843
return new VPWidenCanonicalIVRecipe(
2844
cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
2845
}
2846
2847
VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2848
2849
/// Generate a canonical vector induction variable of the vector loop, with
2850
/// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2851
/// step = <VF*UF, VF*UF, ..., VF*UF>.
2852
void execute(VPTransformState &State) override;
2853
2854
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2855
/// Print the recipe.
2856
void print(raw_ostream &O, const Twine &Indent,
2857
VPSlotTracker &SlotTracker) const override;
2858
#endif
2859
};
2860
2861
/// A recipe for converting the input value \p IV value to the corresponding
2862
/// value of an IV with different start and step values, using Start + IV *
2863
/// Step.
2864
class VPDerivedIVRecipe : public VPSingleDefRecipe {
2865
/// Kind of the induction.
2866
const InductionDescriptor::InductionKind Kind;
2867
/// If not nullptr, the floating point induction binary operator. Must be set
2868
/// for floating point inductions.
2869
const FPMathOperator *FPBinOp;
2870
2871
public:
2872
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
2873
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
2874
: VPDerivedIVRecipe(
2875
IndDesc.getKind(),
2876
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
2877
Start, CanonicalIV, Step) {}
2878
2879
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
2880
const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
2881
VPValue *Step)
2882
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
2883
FPBinOp(FPBinOp) {}
2884
2885
~VPDerivedIVRecipe() override = default;
2886
2887
VPDerivedIVRecipe *clone() override {
2888
return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
2889
getStepValue());
2890
}
2891
2892
VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2893
2894
/// Generate the transformed value of the induction at offset StartValue (1.
2895
/// operand) + IV (2. operand) * StepValue (3, operand).
2896
void execute(VPTransformState &State) override;
2897
2898
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2899
/// Print the recipe.
2900
void print(raw_ostream &O, const Twine &Indent,
2901
VPSlotTracker &SlotTracker) const override;
2902
#endif
2903
2904
Type *getScalarType() const {
2905
return getStartValue()->getLiveInIRValue()->getType();
2906
}
2907
2908
VPValue *getStartValue() const { return getOperand(0); }
2909
VPValue *getStepValue() const { return getOperand(2); }
2910
2911
/// Returns true if the recipe only uses the first lane of operand \p Op.
2912
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2913
assert(is_contained(operands(), Op) &&
2914
"Op must be an operand of the recipe");
2915
return true;
2916
}
2917
};
2918
2919
/// A recipe for handling phi nodes of integer and floating-point inductions,
2920
/// producing their scalar values.
2921
class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
2922
Instruction::BinaryOps InductionOpcode;
2923
2924
public:
2925
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step,
2926
Instruction::BinaryOps Opcode, FastMathFlags FMFs)
2927
: VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
2928
ArrayRef<VPValue *>({IV, Step}), FMFs),
2929
InductionOpcode(Opcode) {}
2930
2931
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
2932
VPValue *Step)
2933
: VPScalarIVStepsRecipe(
2934
IV, Step, IndDesc.getInductionOpcode(),
2935
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
2936
? IndDesc.getInductionBinOp()->getFastMathFlags()
2937
: FastMathFlags()) {}
2938
2939
~VPScalarIVStepsRecipe() override = default;
2940
2941
VPScalarIVStepsRecipe *clone() override {
2942
return new VPScalarIVStepsRecipe(
2943
getOperand(0), getOperand(1), InductionOpcode,
2944
hasFastMathFlags() ? getFastMathFlags() : FastMathFlags());
2945
}
2946
2947
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2948
2949
/// Generate the scalarized versions of the phi node as needed by their users.
2950
void execute(VPTransformState &State) override;
2951
2952
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2953
/// Print the recipe.
2954
void print(raw_ostream &O, const Twine &Indent,
2955
VPSlotTracker &SlotTracker) const override;
2956
#endif
2957
2958
VPValue *getStepValue() const { return getOperand(1); }
2959
2960
/// Returns true if the recipe only uses the first lane of operand \p Op.
2961
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2962
assert(is_contained(operands(), Op) &&
2963
"Op must be an operand of the recipe");
2964
return true;
2965
}
2966
};
2967
2968
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2969
/// holds a sequence of zero or more VPRecipe's each representing a sequence of
2970
/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2971
class VPBasicBlock : public VPBlockBase {
2972
public:
2973
using RecipeListTy = iplist<VPRecipeBase>;
2974
2975
protected:
2976
/// The VPRecipes held in the order of output instructions to generate.
2977
RecipeListTy Recipes;
2978
2979
VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
2980
: VPBlockBase(BlockSC, Name.str()) {}
2981
2982
public:
2983
VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
2984
: VPBlockBase(VPBasicBlockSC, Name.str()) {
2985
if (Recipe)
2986
appendRecipe(Recipe);
2987
}
2988
2989
~VPBasicBlock() override {
2990
while (!Recipes.empty())
2991
Recipes.pop_back();
2992
}
2993
2994
/// Instruction iterators...
2995
using iterator = RecipeListTy::iterator;
2996
using const_iterator = RecipeListTy::const_iterator;
2997
using reverse_iterator = RecipeListTy::reverse_iterator;
2998
using const_reverse_iterator = RecipeListTy::const_reverse_iterator;
2999
3000
//===--------------------------------------------------------------------===//
3001
/// Recipe iterator methods
3002
///
3003
inline iterator begin() { return Recipes.begin(); }
3004
inline const_iterator begin() const { return Recipes.begin(); }
3005
inline iterator end() { return Recipes.end(); }
3006
inline const_iterator end() const { return Recipes.end(); }
3007
3008
inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3009
inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3010
inline reverse_iterator rend() { return Recipes.rend(); }
3011
inline const_reverse_iterator rend() const { return Recipes.rend(); }
3012
3013
inline size_t size() const { return Recipes.size(); }
3014
inline bool empty() const { return Recipes.empty(); }
3015
inline const VPRecipeBase &front() const { return Recipes.front(); }
3016
inline VPRecipeBase &front() { return Recipes.front(); }
3017
inline const VPRecipeBase &back() const { return Recipes.back(); }
3018
inline VPRecipeBase &back() { return Recipes.back(); }
3019
3020
/// Returns a reference to the list of recipes.
3021
RecipeListTy &getRecipeList() { return Recipes; }
3022
3023
/// Returns a pointer to a member of the recipe list.
3024
static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
3025
return &VPBasicBlock::Recipes;
3026
}
3027
3028
/// Method to support type inquiry through isa, cast, and dyn_cast.
3029
static inline bool classof(const VPBlockBase *V) {
3030
return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3031
V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3032
}
3033
3034
void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3035
assert(Recipe && "No recipe to append.");
3036
assert(!Recipe->Parent && "Recipe already in VPlan");
3037
Recipe->Parent = this;
3038
Recipes.insert(InsertPt, Recipe);
3039
}
3040
3041
/// Augment the existing recipes of a VPBasicBlock with an additional
3042
/// \p Recipe as the last recipe.
3043
void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3044
3045
/// The method which generates the output IR instructions that correspond to
3046
/// this VPBasicBlock, thereby "executing" the VPlan.
3047
void execute(VPTransformState *State) override;
3048
3049
/// Return the cost of this VPBasicBlock.
3050
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3051
3052
/// Return the position of the first non-phi node recipe in the block.
3053
iterator getFirstNonPhi();
3054
3055
/// Returns an iterator range over the PHI-like recipes in the block.
3056
iterator_range<iterator> phis() {
3057
return make_range(begin(), getFirstNonPhi());
3058
}
3059
3060
void dropAllReferences(VPValue *NewValue) override;
3061
3062
/// Split current block at \p SplitAt by inserting a new block between the
3063
/// current block and its successors and moving all recipes starting at
3064
/// SplitAt to the new block. Returns the new block.
3065
VPBasicBlock *splitAt(iterator SplitAt);
3066
3067
VPRegionBlock *getEnclosingLoopRegion();
3068
3069
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3070
/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3071
/// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3072
///
3073
/// Note that the numbering is applied to the whole VPlan, so printing
3074
/// individual blocks is consistent with the whole VPlan printing.
3075
void print(raw_ostream &O, const Twine &Indent,
3076
VPSlotTracker &SlotTracker) const override;
3077
using VPBlockBase::print; // Get the print(raw_stream &O) version.
3078
#endif
3079
3080
/// If the block has multiple successors, return the branch recipe terminating
3081
/// the block. If there are no or only a single successor, return nullptr;
3082
VPRecipeBase *getTerminator();
3083
const VPRecipeBase *getTerminator() const;
3084
3085
/// Returns true if the block is exiting it's parent region.
3086
bool isExiting() const;
3087
3088
/// Clone the current block and it's recipes, without updating the operands of
3089
/// the cloned recipes.
3090
VPBasicBlock *clone() override {
3091
auto *NewBlock = new VPBasicBlock(getName());
3092
for (VPRecipeBase &R : *this)
3093
NewBlock->appendRecipe(R.clone());
3094
return NewBlock;
3095
}
3096
3097
protected:
3098
/// Execute the recipes in the IR basic block \p BB.
3099
void executeRecipes(VPTransformState *State, BasicBlock *BB);
3100
3101
private:
3102
/// Create an IR BasicBlock to hold the output instructions generated by this
3103
/// VPBasicBlock, and return it. Update the CFGState accordingly.
3104
BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3105
};
3106
3107
/// A special type of VPBasicBlock that wraps an existing IR basic block.
3108
/// Recipes of the block get added before the first non-phi instruction in the
3109
/// wrapped block.
3110
/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3111
/// preheader block.
3112
class VPIRBasicBlock : public VPBasicBlock {
3113
BasicBlock *IRBB;
3114
3115
public:
3116
VPIRBasicBlock(BasicBlock *IRBB)
3117
: VPBasicBlock(VPIRBasicBlockSC,
3118
(Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3119
IRBB(IRBB) {}
3120
3121
~VPIRBasicBlock() override {}
3122
3123
static inline bool classof(const VPBlockBase *V) {
3124
return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3125
}
3126
3127
/// The method which generates the output IR instructions that correspond to
3128
/// this VPBasicBlock, thereby "executing" the VPlan.
3129
void execute(VPTransformState *State) override;
3130
3131
VPIRBasicBlock *clone() override {
3132
auto *NewBlock = new VPIRBasicBlock(IRBB);
3133
for (VPRecipeBase &R : Recipes)
3134
NewBlock->appendRecipe(R.clone());
3135
return NewBlock;
3136
}
3137
3138
BasicBlock *getIRBasicBlock() const { return IRBB; }
3139
};
3140
3141
/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3142
/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3143
/// A VPRegionBlock may indicate that its contents are to be replicated several
3144
/// times. This is designed to support predicated scalarization, in which a
3145
/// scalar if-then code structure needs to be generated VF * UF times. Having
3146
/// this replication indicator helps to keep a single model for multiple
3147
/// candidate VF's. The actual replication takes place only once the desired VF
3148
/// and UF have been determined.
3149
class VPRegionBlock : public VPBlockBase {
3150
/// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3151
VPBlockBase *Entry;
3152
3153
/// Hold the Single Exiting block of the SESE region modelled by the
3154
/// VPRegionBlock.
3155
VPBlockBase *Exiting;
3156
3157
/// An indicator whether this region is to generate multiple replicated
3158
/// instances of output IR corresponding to its VPBlockBases.
3159
bool IsReplicator;
3160
3161
public:
3162
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
3163
const std::string &Name = "", bool IsReplicator = false)
3164
: VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3165
IsReplicator(IsReplicator) {
3166
assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3167
assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3168
Entry->setParent(this);
3169
Exiting->setParent(this);
3170
}
3171
VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3172
: VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3173
IsReplicator(IsReplicator) {}
3174
3175
~VPRegionBlock() override {
3176
if (Entry) {
3177
VPValue DummyValue;
3178
Entry->dropAllReferences(&DummyValue);
3179
deleteCFG(Entry);
3180
}
3181
}
3182
3183
/// Method to support type inquiry through isa, cast, and dyn_cast.
3184
static inline bool classof(const VPBlockBase *V) {
3185
return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3186
}
3187
3188
const VPBlockBase *getEntry() const { return Entry; }
3189
VPBlockBase *getEntry() { return Entry; }
3190
3191
/// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3192
/// EntryBlock must have no predecessors.
3193
void setEntry(VPBlockBase *EntryBlock) {
3194
assert(EntryBlock->getPredecessors().empty() &&
3195
"Entry block cannot have predecessors.");
3196
Entry = EntryBlock;
3197
EntryBlock->setParent(this);
3198
}
3199
3200
const VPBlockBase *getExiting() const { return Exiting; }
3201
VPBlockBase *getExiting() { return Exiting; }
3202
3203
/// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3204
/// ExitingBlock must have no successors.
3205
void setExiting(VPBlockBase *ExitingBlock) {
3206
assert(ExitingBlock->getSuccessors().empty() &&
3207
"Exit block cannot have successors.");
3208
Exiting = ExitingBlock;
3209
ExitingBlock->setParent(this);
3210
}
3211
3212
/// Returns the pre-header VPBasicBlock of the loop region.
3213
VPBasicBlock *getPreheaderVPBB() {
3214
assert(!isReplicator() && "should only get pre-header of loop regions");
3215
return getSinglePredecessor()->getExitingBasicBlock();
3216
}
3217
3218
/// An indicator whether this region is to generate multiple replicated
3219
/// instances of output IR corresponding to its VPBlockBases.
3220
bool isReplicator() const { return IsReplicator; }
3221
3222
/// The method which generates the output IR instructions that correspond to
3223
/// this VPRegionBlock, thereby "executing" the VPlan.
3224
void execute(VPTransformState *State) override;
3225
3226
// Return the cost of this region.
3227
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3228
3229
void dropAllReferences(VPValue *NewValue) override;
3230
3231
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3232
/// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3233
/// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3234
/// consequtive numbers.
3235
///
3236
/// Note that the numbering is applied to the whole VPlan, so printing
3237
/// individual regions is consistent with the whole VPlan printing.
3238
void print(raw_ostream &O, const Twine &Indent,
3239
VPSlotTracker &SlotTracker) const override;
3240
using VPBlockBase::print; // Get the print(raw_stream &O) version.
3241
#endif
3242
3243
/// Clone all blocks in the single-entry single-exit region of the block and
3244
/// their recipes without updating the operands of the cloned recipes.
3245
VPRegionBlock *clone() override;
3246
};
3247
3248
/// VPlan models a candidate for vectorization, encoding various decisions take
3249
/// to produce efficient output IR, including which branches, basic-blocks and
3250
/// output IR instructions to generate, and their cost. VPlan holds a
3251
/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3252
/// VPBasicBlock.
3253
class VPlan {
3254
friend class VPlanPrinter;
3255
friend class VPSlotTracker;
3256
3257
/// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
3258
/// preheader of the vector loop.
3259
VPBasicBlock *Entry;
3260
3261
/// VPBasicBlock corresponding to the original preheader. Used to place
3262
/// VPExpandSCEV recipes for expressions used during skeleton creation and the
3263
/// rest of VPlan execution.
3264
VPBasicBlock *Preheader;
3265
3266
/// Holds the VFs applicable to this VPlan.
3267
SmallSetVector<ElementCount, 2> VFs;
3268
3269
/// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3270
/// any UF.
3271
SmallSetVector<unsigned, 2> UFs;
3272
3273
/// Holds the name of the VPlan, for printing.
3274
std::string Name;
3275
3276
/// Represents the trip count of the original loop, for folding
3277
/// the tail.
3278
VPValue *TripCount = nullptr;
3279
3280
/// Represents the backedge taken count of the original loop, for folding
3281
/// the tail. It equals TripCount - 1.
3282
VPValue *BackedgeTakenCount = nullptr;
3283
3284
/// Represents the vector trip count.
3285
VPValue VectorTripCount;
3286
3287
/// Represents the loop-invariant VF * UF of the vector loop region.
3288
VPValue VFxUF;
3289
3290
/// Holds a mapping between Values and their corresponding VPValue inside
3291
/// VPlan.
3292
Value2VPValueTy Value2VPValue;
3293
3294
/// Contains all the external definitions created for this VPlan. External
3295
/// definitions are VPValues that hold a pointer to their underlying IR.
3296
SmallVector<VPValue *, 16> VPLiveInsToFree;
3297
3298
/// Values used outside the plan. It contains live-outs that need fixing. Any
3299
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3300
/// live-outs are fixed via VPLiveOut::fixPhi.
3301
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3302
3303
/// Mapping from SCEVs to the VPValues representing their expansions.
3304
/// NOTE: This mapping is temporary and will be removed once all users have
3305
/// been modeled in VPlan directly.
3306
DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3307
3308
public:
3309
/// Construct a VPlan with original preheader \p Preheader, trip count \p TC
3310
/// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
3311
/// be disconnected, as the bypass blocks between them are not yet modeled in
3312
/// VPlan.
3313
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
3314
: VPlan(Preheader, Entry) {
3315
TripCount = TC;
3316
}
3317
3318
/// Construct a VPlan with original preheader \p Preheader and \p Entry to
3319
/// the plan. At the moment, \p Preheader and \p Entry need to be
3320
/// disconnected, as the bypass blocks between them are not yet modeled in
3321
/// VPlan.
3322
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
3323
: Entry(Entry), Preheader(Preheader) {
3324
Entry->setPlan(this);
3325
Preheader->setPlan(this);
3326
assert(Preheader->getNumSuccessors() == 0 &&
3327
Preheader->getNumPredecessors() == 0 &&
3328
"preheader must be disconnected");
3329
}
3330
3331
~VPlan();
3332
3333
/// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3334
/// original scalar pre-header ) which contains SCEV expansions that need
3335
/// to happen before the CFG is modified; a VPBasicBlock for the vector
3336
/// pre-header, followed by a region for the vector loop, followed by the
3337
/// middle VPBasicBlock. If a check is needed to guard executing the scalar
3338
/// epilogue loop, it will be added to the middle block, together with
3339
/// VPBasicBlocks for the scalar preheader and exit blocks.
3340
static VPlanPtr createInitialVPlan(const SCEV *TripCount,
3341
ScalarEvolution &PSE,
3342
bool RequiresScalarEpilogueCheck,
3343
bool TailFolded, Loop *TheLoop);
3344
3345
/// Prepare the plan for execution, setting up the required live-in values.
3346
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3347
Value *CanonicalIVStartValue, VPTransformState &State);
3348
3349
/// Generate the IR code for this VPlan.
3350
void execute(VPTransformState *State);
3351
3352
/// Return the cost of this plan.
3353
InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
3354
3355
VPBasicBlock *getEntry() { return Entry; }
3356
const VPBasicBlock *getEntry() const { return Entry; }
3357
3358
/// The trip count of the original loop.
3359
VPValue *getTripCount() const {
3360
assert(TripCount && "trip count needs to be set before accessing it");
3361
return TripCount;
3362
}
3363
3364
/// Resets the trip count for the VPlan. The caller must make sure all uses of
3365
/// the original trip count have been replaced.
3366
void resetTripCount(VPValue *NewTripCount) {
3367
assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3368
"TripCount always must be set");
3369
TripCount = NewTripCount;
3370
}
3371
3372
/// The backedge taken count of the original loop.
3373
VPValue *getOrCreateBackedgeTakenCount() {
3374
if (!BackedgeTakenCount)
3375
BackedgeTakenCount = new VPValue();
3376
return BackedgeTakenCount;
3377
}
3378
3379
/// The vector trip count.
3380
VPValue &getVectorTripCount() { return VectorTripCount; }
3381
3382
/// Returns VF * UF of the vector loop region.
3383
VPValue &getVFxUF() { return VFxUF; }
3384
3385
void addVF(ElementCount VF) { VFs.insert(VF); }
3386
3387
void setVF(ElementCount VF) {
3388
assert(hasVF(VF) && "Cannot set VF not already in plan");
3389
VFs.clear();
3390
VFs.insert(VF);
3391
}
3392
3393
bool hasVF(ElementCount VF) { return VFs.count(VF); }
3394
bool hasScalableVF() {
3395
return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3396
}
3397
3398
/// Returns an iterator range over all VFs of the plan.
3399
iterator_range<SmallSetVector<ElementCount, 2>::iterator>
3400
vectorFactors() const {
3401
return {VFs.begin(), VFs.end()};
3402
}
3403
3404
bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3405
3406
bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3407
3408
void setUF(unsigned UF) {
3409
assert(hasUF(UF) && "Cannot set the UF not already in plan");
3410
UFs.clear();
3411
UFs.insert(UF);
3412
}
3413
3414
/// Return a string with the name of the plan and the applicable VFs and UFs.
3415
std::string getName() const;
3416
3417
void setName(const Twine &newName) { Name = newName.str(); }
3418
3419
/// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3420
/// yet) for \p V.
3421
VPValue *getOrAddLiveIn(Value *V) {
3422
assert(V && "Trying to get or add the VPValue of a null Value");
3423
if (!Value2VPValue.count(V)) {
3424
VPValue *VPV = new VPValue(V);
3425
VPLiveInsToFree.push_back(VPV);
3426
assert(VPV->isLiveIn() && "VPV must be a live-in.");
3427
assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3428
Value2VPValue[V] = VPV;
3429
}
3430
3431
assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3432
assert(Value2VPValue[V]->isLiveIn() &&
3433
"Only live-ins should be in mapping");
3434
return Value2VPValue[V];
3435
}
3436
3437
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3438
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3439
3440
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3441
/// Print the live-ins of this VPlan to \p O.
3442
void printLiveIns(raw_ostream &O) const;
3443
3444
/// Print this VPlan to \p O.
3445
void print(raw_ostream &O) const;
3446
3447
/// Print this VPlan in DOT format to \p O.
3448
void printDOT(raw_ostream &O) const;
3449
3450
/// Dump the plan to stderr (for debugging).
3451
LLVM_DUMP_METHOD void dump() const;
3452
#endif
3453
3454
/// Returns the VPRegionBlock of the vector loop.
3455
VPRegionBlock *getVectorLoopRegion() {
3456
return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3457
}
3458
const VPRegionBlock *getVectorLoopRegion() const {
3459
return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3460
}
3461
3462
/// Returns the canonical induction recipe of the vector loop.
3463
VPCanonicalIVPHIRecipe *getCanonicalIV() {
3464
VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
3465
if (EntryVPBB->empty()) {
3466
// VPlan native path.
3467
EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
3468
}
3469
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
3470
}
3471
3472
void addLiveOut(PHINode *PN, VPValue *V);
3473
3474
void removeLiveOut(PHINode *PN) {
3475
delete LiveOuts[PN];
3476
LiveOuts.erase(PN);
3477
}
3478
3479
const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
3480
return LiveOuts;
3481
}
3482
3483
VPValue *getSCEVExpansion(const SCEV *S) const {
3484
return SCEVToExpansion.lookup(S);
3485
}
3486
3487
void addSCEVExpansion(const SCEV *S, VPValue *V) {
3488
assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
3489
SCEVToExpansion[S] = V;
3490
}
3491
3492
/// \return The block corresponding to the original preheader.
3493
VPBasicBlock *getPreheader() { return Preheader; }
3494
const VPBasicBlock *getPreheader() const { return Preheader; }
3495
3496
/// Clone the current VPlan, update all VPValues of the new VPlan and cloned
3497
/// recipes to refer to the clones, and return it.
3498
VPlan *duplicate();
3499
};
3500
3501
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3502
/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
3503
/// indented and follows the dot format.
3504
class VPlanPrinter {
3505
raw_ostream &OS;
3506
const VPlan &Plan;
3507
unsigned Depth = 0;
3508
unsigned TabWidth = 2;
3509
std::string Indent;
3510
unsigned BID = 0;
3511
SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
3512
3513
VPSlotTracker SlotTracker;
3514
3515
/// Handle indentation.
3516
void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
3517
3518
/// Print a given \p Block of the Plan.
3519
void dumpBlock(const VPBlockBase *Block);
3520
3521
/// Print the information related to the CFG edges going out of a given
3522
/// \p Block, followed by printing the successor blocks themselves.
3523
void dumpEdges(const VPBlockBase *Block);
3524
3525
/// Print a given \p BasicBlock, including its VPRecipes, followed by printing
3526
/// its successor blocks.
3527
void dumpBasicBlock(const VPBasicBlock *BasicBlock);
3528
3529
/// Print a given \p Region of the Plan.
3530
void dumpRegion(const VPRegionBlock *Region);
3531
3532
unsigned getOrCreateBID(const VPBlockBase *Block) {
3533
return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
3534
}
3535
3536
Twine getOrCreateName(const VPBlockBase *Block);
3537
3538
Twine getUID(const VPBlockBase *Block);
3539
3540
/// Print the information related to a CFG edge between two VPBlockBases.
3541
void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
3542
const Twine &Label);
3543
3544
public:
3545
VPlanPrinter(raw_ostream &O, const VPlan &P)
3546
: OS(O), Plan(P), SlotTracker(&P) {}
3547
3548
LLVM_DUMP_METHOD void dump();
3549
};
3550
3551
struct VPlanIngredient {
3552
const Value *V;
3553
3554
VPlanIngredient(const Value *V) : V(V) {}
3555
3556
void print(raw_ostream &O) const;
3557
};
3558
3559
inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
3560
I.print(OS);
3561
return OS;
3562
}
3563
3564
inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
3565
Plan.print(OS);
3566
return OS;
3567
}
3568
#endif
3569
3570
//===----------------------------------------------------------------------===//
3571
// VPlan Utilities
3572
//===----------------------------------------------------------------------===//
3573
3574
/// Class that provides utilities for VPBlockBases in VPlan.
3575
class VPBlockUtils {
3576
public:
3577
VPBlockUtils() = delete;
3578
3579
/// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
3580
/// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
3581
/// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
3582
/// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
3583
/// have neither successors nor predecessors.
3584
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
3585
assert(NewBlock->getSuccessors().empty() &&
3586
NewBlock->getPredecessors().empty() &&
3587
"Can't insert new block with predecessors or successors.");
3588
NewBlock->setParent(BlockPtr->getParent());
3589
SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
3590
for (VPBlockBase *Succ : Succs) {
3591
disconnectBlocks(BlockPtr, Succ);
3592
connectBlocks(NewBlock, Succ);
3593
}
3594
connectBlocks(BlockPtr, NewBlock);
3595
}
3596
3597
/// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
3598
/// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
3599
/// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
3600
/// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
3601
/// and \p IfTrue and \p IfFalse must have neither successors nor
3602
/// predecessors.
3603
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
3604
VPBlockBase *BlockPtr) {
3605
assert(IfTrue->getSuccessors().empty() &&
3606
"Can't insert IfTrue with successors.");
3607
assert(IfFalse->getSuccessors().empty() &&
3608
"Can't insert IfFalse with successors.");
3609
BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
3610
IfTrue->setPredecessors({BlockPtr});
3611
IfFalse->setPredecessors({BlockPtr});
3612
IfTrue->setParent(BlockPtr->getParent());
3613
IfFalse->setParent(BlockPtr->getParent());
3614
}
3615
3616
/// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3617
/// the successors of \p From and \p From to the predecessors of \p To. Both
3618
/// VPBlockBases must have the same parent, which can be null. Both
3619
/// VPBlockBases can be already connected to other VPBlockBases.
3620
static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {
3621
assert((From->getParent() == To->getParent()) &&
3622
"Can't connect two block with different parents");
3623
assert(From->getNumSuccessors() < 2 &&
3624
"Blocks can't have more than two successors.");
3625
From->appendSuccessor(To);
3626
To->appendPredecessor(From);
3627
}
3628
3629
/// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3630
/// from the successors of \p From and \p From from the predecessors of \p To.
3631
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {
3632
assert(To && "Successor to disconnect is null.");
3633
From->removeSuccessor(To);
3634
To->removePredecessor(From);
3635
}
3636
3637
/// Return an iterator range over \p Range which only includes \p BlockTy
3638
/// blocks. The accesses are casted to \p BlockTy.
3639
template <typename BlockTy, typename T>
3640
static auto blocksOnly(const T &Range) {
3641
// Create BaseTy with correct const-ness based on BlockTy.
3642
using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
3643
const VPBlockBase, VPBlockBase>;
3644
3645
// We need to first create an iterator range over (const) BlocktTy & instead
3646
// of (const) BlockTy * for filter_range to work properly.
3647
auto Mapped =
3648
map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
3649
auto Filter = make_filter_range(
3650
Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
3651
return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
3652
return cast<BlockTy>(&Block);
3653
});
3654
}
3655
};
3656
3657
class VPInterleavedAccessInfo {
3658
DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
3659
InterleaveGroupMap;
3660
3661
/// Type for mapping of instruction based interleave groups to VPInstruction
3662
/// interleave groups
3663
using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
3664
InterleaveGroup<VPInstruction> *>;
3665
3666
/// Recursively \p Region and populate VPlan based interleave groups based on
3667
/// \p IAI.
3668
void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
3669
InterleavedAccessInfo &IAI);
3670
/// Recursively traverse \p Block and populate VPlan based interleave groups
3671
/// based on \p IAI.
3672
void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
3673
InterleavedAccessInfo &IAI);
3674
3675
public:
3676
VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
3677
3678
~VPInterleavedAccessInfo() {
3679
SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
3680
// Avoid releasing a pointer twice.
3681
for (auto &I : InterleaveGroupMap)
3682
DelSet.insert(I.second);
3683
for (auto *Ptr : DelSet)
3684
delete Ptr;
3685
}
3686
3687
/// Get the interleave group that \p Instr belongs to.
3688
///
3689
/// \returns nullptr if doesn't have such group.
3690
InterleaveGroup<VPInstruction> *
3691
getInterleaveGroup(VPInstruction *Instr) const {
3692
return InterleaveGroupMap.lookup(Instr);
3693
}
3694
};
3695
3696
/// Class that maps (parts of) an existing VPlan to trees of combined
3697
/// VPInstructions.
3698
class VPlanSlp {
3699
enum class OpMode { Failed, Load, Opcode };
3700
3701
/// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
3702
/// DenseMap keys.
3703
struct BundleDenseMapInfo {
3704
static SmallVector<VPValue *, 4> getEmptyKey() {
3705
return {reinterpret_cast<VPValue *>(-1)};
3706
}
3707
3708
static SmallVector<VPValue *, 4> getTombstoneKey() {
3709
return {reinterpret_cast<VPValue *>(-2)};
3710
}
3711
3712
static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
3713
return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
3714
}
3715
3716
static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
3717
const SmallVector<VPValue *, 4> &RHS) {
3718
return LHS == RHS;
3719
}
3720
};
3721
3722
/// Mapping of values in the original VPlan to a combined VPInstruction.
3723
DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
3724
BundleToCombined;
3725
3726
VPInterleavedAccessInfo &IAI;
3727
3728
/// Basic block to operate on. For now, only instructions in a single BB are
3729
/// considered.
3730
const VPBasicBlock &BB;
3731
3732
/// Indicates whether we managed to combine all visited instructions or not.
3733
bool CompletelySLP = true;
3734
3735
/// Width of the widest combined bundle in bits.
3736
unsigned WidestBundleBits = 0;
3737
3738
using MultiNodeOpTy =
3739
typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
3740
3741
// Input operand bundles for the current multi node. Each multi node operand
3742
// bundle contains values not matching the multi node's opcode. They will
3743
// be reordered in reorderMultiNodeOps, once we completed building a
3744
// multi node.
3745
SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
3746
3747
/// Indicates whether we are building a multi node currently.
3748
bool MultiNodeActive = false;
3749
3750
/// Check if we can vectorize Operands together.
3751
bool areVectorizable(ArrayRef<VPValue *> Operands) const;
3752
3753
/// Add combined instruction \p New for the bundle \p Operands.
3754
void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
3755
3756
/// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3757
VPInstruction *markFailed();
3758
3759
/// Reorder operands in the multi node to maximize sequential memory access
3760
/// and commutative operations.
3761
SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
3762
3763
/// Choose the best candidate to use for the lane after \p Last. The set of
3764
/// candidates to choose from are values with an opcode matching \p Last's
3765
/// or loads consecutive to \p Last.
3766
std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
3767
SmallPtrSetImpl<VPValue *> &Candidates,
3768
VPInterleavedAccessInfo &IAI);
3769
3770
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3771
/// Print bundle \p Values to dbgs().
3772
void dumpBundle(ArrayRef<VPValue *> Values);
3773
#endif
3774
3775
public:
3776
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3777
3778
~VPlanSlp() = default;
3779
3780
/// Tries to build an SLP tree rooted at \p Operands and returns a
3781
/// VPInstruction combining \p Operands, if they can be combined.
3782
VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
3783
3784
/// Return the width of the widest combined bundle in bits.
3785
unsigned getWidestBundleBits() const { return WidestBundleBits; }
3786
3787
/// Return true if all visited instruction can be combined.
3788
bool isCompletelySLP() const { return CompletelySLP; }
3789
};
3790
3791
namespace vputils {
3792
3793
/// Returns true if only the first lane of \p Def is used.
3794
bool onlyFirstLaneUsed(const VPValue *Def);
3795
3796
/// Returns true if only the first part of \p Def is used.
3797
bool onlyFirstPartUsed(const VPValue *Def);
3798
3799
/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3800
/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3801
/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3802
/// pre-header already contains a recipe expanding \p Expr, return it. If not,
3803
/// create a new one.
3804
VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3805
ScalarEvolution &SE);
3806
3807
/// Returns true if \p VPV is uniform after vectorization.
3808
inline bool isUniformAfterVectorization(VPValue *VPV) {
3809
// A value defined outside the vector region must be uniform after
3810
// vectorization inside a vector region.
3811
if (VPV->isDefinedOutsideVectorRegions())
3812
return true;
3813
VPRecipeBase *Def = VPV->getDefiningRecipe();
3814
assert(Def && "Must have definition for value defined inside vector region");
3815
if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
3816
return Rep->isUniform();
3817
if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
3818
return all_of(GEP->operands(), isUniformAfterVectorization);
3819
if (auto *VPI = dyn_cast<VPInstruction>(Def))
3820
return VPI->isSingleScalar() || VPI->isVectorToScalar();
3821
return false;
3822
}
3823
3824
/// Return true if \p V is a header mask in \p Plan.
3825
bool isHeaderMask(VPValue *V, VPlan &Plan);
3826
} // end namespace vputils
3827
3828
} // end namespace llvm
3829
3830
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
3831
3832