Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
213799 views
1
//===- VPlanHelpers.h - VPlan-related auxiliary helpers -------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This file contains the declarations of different VPlan-related auxiliary
11
/// helpers.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
16
#define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
17
18
#include "VPlanAnalysis.h"
19
#include "VPlanDominatorTree.h"
20
#include "llvm/ADT/DenseMap.h"
21
#include "llvm/ADT/SmallPtrSet.h"
22
#include "llvm/ADT/SmallVector.h"
23
#include "llvm/Analysis/DomTreeUpdater.h"
24
#include "llvm/Analysis/TargetTransformInfo.h"
25
#include "llvm/IR/DebugLoc.h"
26
#include "llvm/IR/ModuleSlotTracker.h"
27
#include "llvm/Support/InstructionCost.h"
28
29
namespace llvm {
30
31
class AssumptionCache;
32
class BasicBlock;
33
class DominatorTree;
34
class InnerLoopVectorizer;
35
class IRBuilderBase;
36
class LoopInfo;
37
class SCEV;
38
class Type;
39
class VPBasicBlock;
40
class VPRegionBlock;
41
class VPlan;
42
class Value;
43
44
/// Returns a calculation for the total number of elements for a given \p VF.
45
/// For fixed width vectors this value is a constant, whereas for scalable
46
/// vectors it is an expression determined at runtime.
47
Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
48
49
/// Return a value for Step multiplied by VF.
50
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
51
int64_t Step);
52
53
/// A helper function that returns how much we should divide the cost of a
54
/// predicated block by. Typically this is the reciprocal of the block
55
/// probability, i.e. if we return X we are assuming the predicated block will
56
/// execute once for every X iterations of the loop header so the block should
57
/// only contribute 1/X of its cost to the total cost calculation, but when
58
/// optimizing for code size it will just be 1 as code size costs don't depend
59
/// on execution probabilities.
60
///
61
/// TODO: We should use actual block probability here, if available. Currently,
62
/// we always assume predicated blocks have a 50% chance of executing.
63
inline unsigned
64
getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind) {
65
return CostKind == TTI::TCK_CodeSize ? 1 : 2;
66
}
67
68
/// A range of powers-of-2 vectorization factors with fixed start and
69
/// adjustable end. The range includes start and excludes end, e.g.,:
70
/// [1, 16) = {1, 2, 4, 8}
71
struct VFRange {
72
// A power of 2.
73
const ElementCount Start;
74
75
// A power of 2. If End <= Start range is empty.
76
ElementCount End;
77
78
bool isEmpty() const {
79
return End.getKnownMinValue() <= Start.getKnownMinValue();
80
}
81
82
VFRange(const ElementCount &Start, const ElementCount &End)
83
: Start(Start), End(End) {
84
assert(Start.isScalable() == End.isScalable() &&
85
"Both Start and End should have the same scalable flag");
86
assert(isPowerOf2_32(Start.getKnownMinValue()) &&
87
"Expected Start to be a power of 2");
88
assert(isPowerOf2_32(End.getKnownMinValue()) &&
89
"Expected End to be a power of 2");
90
}
91
92
/// Iterator to iterate over vectorization factors in a VFRange.
93
class iterator
94
: public iterator_facade_base<iterator, std::forward_iterator_tag,
95
ElementCount> {
96
ElementCount VF;
97
98
public:
99
iterator(ElementCount VF) : VF(VF) {}
100
101
bool operator==(const iterator &Other) const { return VF == Other.VF; }
102
103
ElementCount operator*() const { return VF; }
104
105
iterator &operator++() {
106
VF *= 2;
107
return *this;
108
}
109
};
110
111
iterator begin() { return iterator(Start); }
112
iterator end() {
113
assert(isPowerOf2_32(End.getKnownMinValue()));
114
return iterator(End);
115
}
116
};
117
118
/// In what follows, the term "input IR" refers to code that is fed into the
119
/// vectorizer whereas the term "output IR" refers to code that is generated by
120
/// the vectorizer.
121
122
/// VPLane provides a way to access lanes in both fixed width and scalable
123
/// vectors, where for the latter the lane index sometimes needs calculating
124
/// as a runtime expression.
125
class VPLane {
126
public:
127
/// Kind describes how to interpret Lane.
128
enum class Kind : uint8_t {
129
/// For First, Lane is the index into the first N elements of a
130
/// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
131
First,
132
/// For ScalableLast, Lane is the offset from the start of the last
133
/// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
134
/// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
135
/// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
136
ScalableLast
137
};
138
139
private:
140
/// in [0..VF)
141
unsigned Lane;
142
143
/// Indicates how the Lane should be interpreted, as described above.
144
Kind LaneKind = Kind::First;
145
146
public:
147
VPLane(unsigned Lane) : Lane(Lane) {}
148
VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
149
150
static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
151
152
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
153
assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
154
"trying to extract with invalid offset");
155
unsigned LaneOffset = VF.getKnownMinValue() - Offset;
156
Kind LaneKind;
157
if (VF.isScalable())
158
// In this case 'LaneOffset' refers to the offset from the start of the
159
// last subvector with VF.getKnownMinValue() elements.
160
LaneKind = VPLane::Kind::ScalableLast;
161
else
162
LaneKind = VPLane::Kind::First;
163
return VPLane(LaneOffset, LaneKind);
164
}
165
166
static VPLane getLastLaneForVF(const ElementCount &VF) {
167
return getLaneFromEnd(VF, 1);
168
}
169
170
/// Returns a compile-time known value for the lane index and asserts if the
171
/// lane can only be calculated at runtime.
172
unsigned getKnownLane() const {
173
assert(LaneKind == Kind::First &&
174
"can only get known lane from the beginning");
175
return Lane;
176
}
177
178
/// Returns an expression describing the lane index that can be used at
179
/// runtime.
180
Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
181
182
/// Returns the Kind of lane offset.
183
Kind getKind() const { return LaneKind; }
184
185
/// Returns true if this is the first lane of the whole vector.
186
bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
187
188
/// Maps the lane to a cache index based on \p VF.
189
unsigned mapToCacheIndex(const ElementCount &VF) const {
190
switch (LaneKind) {
191
case VPLane::Kind::ScalableLast:
192
assert(VF.isScalable() && Lane < VF.getKnownMinValue() &&
193
"ScalableLast can only be used with scalable VFs");
194
return VF.getKnownMinValue() + Lane;
195
default:
196
assert(Lane < VF.getKnownMinValue() &&
197
"Cannot extract lane larger than VF");
198
return Lane;
199
}
200
}
201
};
202
203
/// VPTransformState holds information passed down when "executing" a VPlan,
204
/// needed for generating the output IR.
205
struct VPTransformState {
206
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF,
207
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
208
IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop,
209
Type *CanonicalIVTy);
210
/// Target Transform Info.
211
const TargetTransformInfo *TTI;
212
213
/// The chosen Vectorization Factor of the loop being vectorized.
214
ElementCount VF;
215
216
/// Hold the index to generate specific scalar instructions. Null indicates
217
/// that all instances are to be generated, using either scalar or vector
218
/// instructions.
219
std::optional<VPLane> Lane;
220
221
struct DataState {
222
// Each value from the original loop, when vectorized, is represented by a
223
// vector value in the map.
224
DenseMap<const VPValue *, Value *> VPV2Vector;
225
226
DenseMap<const VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
227
} Data;
228
229
/// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
230
/// is false, otherwise return the generated scalar. \See set.
231
Value *get(const VPValue *Def, bool IsScalar = false);
232
233
/// Get the generated Value for a given VPValue and given Part and Lane.
234
Value *get(const VPValue *Def, const VPLane &Lane);
235
236
bool hasVectorValue(const VPValue *Def) {
237
return Data.VPV2Vector.contains(Def);
238
}
239
240
bool hasScalarValue(const VPValue *Def, VPLane Lane) {
241
auto I = Data.VPV2Scalars.find(Def);
242
if (I == Data.VPV2Scalars.end())
243
return false;
244
unsigned CacheIdx = Lane.mapToCacheIndex(VF);
245
return CacheIdx < I->second.size() && I->second[CacheIdx];
246
}
247
248
/// Set the generated vector Value for a given VPValue, if \p
249
/// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
250
void set(const VPValue *Def, Value *V, bool IsScalar = false) {
251
if (IsScalar) {
252
set(Def, V, VPLane(0));
253
return;
254
}
255
assert((VF.isScalar() || isVectorizedTy(V->getType())) &&
256
"scalar values must be stored as (0, 0)");
257
Data.VPV2Vector[Def] = V;
258
}
259
260
/// Reset an existing vector value for \p Def and a given \p Part.
261
void reset(const VPValue *Def, Value *V) {
262
assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
263
Data.VPV2Vector[Def] = V;
264
}
265
266
/// Set the generated scalar \p V for \p Def and the given \p Lane.
267
void set(const VPValue *Def, Value *V, const VPLane &Lane) {
268
auto &Scalars = Data.VPV2Scalars[Def];
269
unsigned CacheIdx = Lane.mapToCacheIndex(VF);
270
if (Scalars.size() <= CacheIdx)
271
Scalars.resize(CacheIdx + 1);
272
assert(!Scalars[CacheIdx] && "should overwrite existing value");
273
Scalars[CacheIdx] = V;
274
}
275
276
/// Reset an existing scalar value for \p Def and a given \p Lane.
277
void reset(const VPValue *Def, Value *V, const VPLane &Lane) {
278
auto Iter = Data.VPV2Scalars.find(Def);
279
assert(Iter != Data.VPV2Scalars.end() &&
280
"need to overwrite existing value");
281
unsigned CacheIdx = Lane.mapToCacheIndex(VF);
282
assert(CacheIdx < Iter->second.size() &&
283
"need to overwrite existing value");
284
Iter->second[CacheIdx] = V;
285
}
286
287
/// Set the debug location in the builder using the debug location \p DL.
288
void setDebugLocFrom(DebugLoc DL);
289
290
/// Insert the scalar value of \p Def at \p Lane into \p Lane of \p WideValue
291
/// and return the resulting value.
292
Value *packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue,
293
const VPLane &Lane);
294
295
/// Hold state information used when constructing the CFG of the output IR,
296
/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
297
struct CFGState {
298
/// The previous VPBasicBlock visited. Initially set to null.
299
VPBasicBlock *PrevVPBB = nullptr;
300
301
/// The previous IR BasicBlock created or used. Initially set to the new
302
/// header BasicBlock.
303
BasicBlock *PrevBB = nullptr;
304
305
/// The last IR BasicBlock in the output IR. Set to the exit block of the
306
/// vector loop.
307
BasicBlock *ExitBB = nullptr;
308
309
/// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
310
/// of replication, maps the BasicBlock of the last replica created.
311
SmallDenseMap<const VPBasicBlock *, BasicBlock *> VPBB2IRBB;
312
313
/// Updater for the DominatorTree.
314
DomTreeUpdater DTU;
315
316
CFGState(DominatorTree *DT)
317
: DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
318
} CFG;
319
320
/// Hold a pointer to LoopInfo to register new basic blocks in the loop.
321
LoopInfo *LI;
322
323
/// Hold a pointer to AssumptionCache to register new assumptions after
324
/// replicating assume calls.
325
AssumptionCache *AC;
326
327
/// Hold a reference to the IRBuilder used to generate output IR code.
328
IRBuilderBase &Builder;
329
330
/// Pointer to the VPlan code is generated for.
331
VPlan *Plan;
332
333
/// The parent loop object for the current scope, or nullptr.
334
Loop *CurrentParentLoop = nullptr;
335
336
/// VPlan-based type analysis.
337
VPTypeAnalysis TypeAnalysis;
338
339
/// VPlan-based dominator tree.
340
VPDominatorTree VPDT;
341
};
342
343
/// Struct to hold various analysis needed for cost computations.
344
struct VPCostContext {
345
const TargetTransformInfo &TTI;
346
const TargetLibraryInfo &TLI;
347
VPTypeAnalysis Types;
348
LLVMContext &LLVMCtx;
349
LoopVectorizationCostModel &CM;
350
SmallPtrSet<Instruction *, 8> SkipCostComputation;
351
TargetTransformInfo::TargetCostKind CostKind;
352
353
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
354
Type *CanIVTy, LoopVectorizationCostModel &CM,
355
TargetTransformInfo::TargetCostKind CostKind)
356
: TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
357
CM(CM), CostKind(CostKind) {}
358
359
/// Return the cost for \p UI with \p VF using the legacy cost model as
360
/// fallback until computing the cost of all recipes migrates to VPlan.
361
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
362
363
/// Return true if the cost for \p UI shouldn't be computed, e.g. because it
364
/// has already been pre-computed.
365
bool skipCostComputation(Instruction *UI, bool IsVector) const;
366
367
/// Returns the OperandInfo for \p V, if it is a live-in.
368
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
369
370
/// Return true if \p I is considered uniform-after-vectorization in the
371
/// legacy cost model for \p VF. Only used to check for additional VPlan
372
/// simplifications.
373
bool isLegacyUniformAfterVectorization(Instruction *I, ElementCount VF) const;
374
};
375
376
/// This class can be used to assign names to VPValues. For VPValues without
377
/// underlying value, assign consecutive numbers and use those as names (wrapped
378
/// in vp<>). Otherwise, use the name from the underlying value (wrapped in
379
/// ir<>), appending a .V version number if there are multiple uses of the same
380
/// name. Allows querying names for VPValues for printing, similar to the
381
/// ModuleSlotTracker for IR values.
382
class VPSlotTracker {
383
/// Keep track of versioned names assigned to VPValues with underlying IR
384
/// values.
385
DenseMap<const VPValue *, std::string> VPValue2Name;
386
/// Keep track of the next number to use to version the base name.
387
StringMap<unsigned> BaseName2Version;
388
389
/// Number to assign to the next VPValue without underlying value.
390
unsigned NextSlot = 0;
391
392
/// Lazily created ModuleSlotTracker, used only when unnamed IR instructions
393
/// require slot tracking.
394
std::unique_ptr<ModuleSlotTracker> MST;
395
396
void assignName(const VPValue *V);
397
void assignNames(const VPlan &Plan);
398
void assignNames(const VPBasicBlock *VPBB);
399
std::string getName(const Value *V);
400
401
public:
402
VPSlotTracker(const VPlan *Plan = nullptr) {
403
if (Plan)
404
assignNames(*Plan);
405
}
406
407
/// Returns the name assigned to \p V, if there is one, otherwise try to
408
/// construct one from the underlying value, if there's one; else return
409
/// <badref>.
410
std::string getOrCreateName(const VPValue *V) const;
411
};
412
413
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
414
/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
415
/// indented and follows the dot format.
416
class VPlanPrinter {
417
raw_ostream &OS;
418
const VPlan &Plan;
419
unsigned Depth = 0;
420
unsigned TabWidth = 2;
421
std::string Indent;
422
unsigned BID = 0;
423
SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
424
425
VPSlotTracker SlotTracker;
426
427
/// Handle indentation.
428
void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
429
430
/// Print a given \p Block of the Plan.
431
void dumpBlock(const VPBlockBase *Block);
432
433
/// Print the information related to the CFG edges going out of a given
434
/// \p Block, followed by printing the successor blocks themselves.
435
void dumpEdges(const VPBlockBase *Block);
436
437
/// Print a given \p BasicBlock, including its VPRecipes, followed by printing
438
/// its successor blocks.
439
void dumpBasicBlock(const VPBasicBlock *BasicBlock);
440
441
/// Print a given \p Region of the Plan.
442
void dumpRegion(const VPRegionBlock *Region);
443
444
unsigned getOrCreateBID(const VPBlockBase *Block) {
445
return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
446
}
447
448
Twine getOrCreateName(const VPBlockBase *Block);
449
450
Twine getUID(const VPBlockBase *Block);
451
452
/// Print the information related to a CFG edge between two VPBlockBases.
453
void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
454
const Twine &Label);
455
456
public:
457
VPlanPrinter(raw_ostream &O, const VPlan &P)
458
: OS(O), Plan(P), SlotTracker(&P) {}
459
460
LLVM_DUMP_METHOD void dump();
461
};
462
#endif
463
464
} // end namespace llvm
465
466
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
467
468