Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnroll.cpp
35271 views
1
//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements some loop unrolling utilities. It does not define any
10
// actual pass or policy, but provides a single function to perform loop
11
// unrolling.
12
//
13
// The process of unrolling can produce extraneous basic blocks linked with
14
// unconditional branches. This will be corrected in the future.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#include "llvm/ADT/ArrayRef.h"
19
#include "llvm/ADT/DenseMap.h"
20
#include "llvm/ADT/STLExtras.h"
21
#include "llvm/ADT/ScopedHashTable.h"
22
#include "llvm/ADT/SetVector.h"
23
#include "llvm/ADT/SmallVector.h"
24
#include "llvm/ADT/Statistic.h"
25
#include "llvm/ADT/StringRef.h"
26
#include "llvm/ADT/Twine.h"
27
#include "llvm/ADT/ilist_iterator.h"
28
#include "llvm/Analysis/AliasAnalysis.h"
29
#include "llvm/Analysis/AssumptionCache.h"
30
#include "llvm/Analysis/DomTreeUpdater.h"
31
#include "llvm/Analysis/InstructionSimplify.h"
32
#include "llvm/Analysis/LoopInfo.h"
33
#include "llvm/Analysis/LoopIterator.h"
34
#include "llvm/Analysis/MemorySSA.h"
35
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
36
#include "llvm/Analysis/ScalarEvolution.h"
37
#include "llvm/IR/BasicBlock.h"
38
#include "llvm/IR/CFG.h"
39
#include "llvm/IR/Constants.h"
40
#include "llvm/IR/DebugInfoMetadata.h"
41
#include "llvm/IR/DebugLoc.h"
42
#include "llvm/IR/DiagnosticInfo.h"
43
#include "llvm/IR/Dominators.h"
44
#include "llvm/IR/Function.h"
45
#include "llvm/IR/Instruction.h"
46
#include "llvm/IR/Instructions.h"
47
#include "llvm/IR/IntrinsicInst.h"
48
#include "llvm/IR/Metadata.h"
49
#include "llvm/IR/Module.h"
50
#include "llvm/IR/PatternMatch.h"
51
#include "llvm/IR/Use.h"
52
#include "llvm/IR/User.h"
53
#include "llvm/IR/ValueHandle.h"
54
#include "llvm/IR/ValueMap.h"
55
#include "llvm/Support/Casting.h"
56
#include "llvm/Support/CommandLine.h"
57
#include "llvm/Support/Debug.h"
58
#include "llvm/Support/GenericDomTree.h"
59
#include "llvm/Support/MathExtras.h"
60
#include "llvm/Support/raw_ostream.h"
61
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
62
#include "llvm/Transforms/Utils/Cloning.h"
63
#include "llvm/Transforms/Utils/Local.h"
64
#include "llvm/Transforms/Utils/LoopSimplify.h"
65
#include "llvm/Transforms/Utils/LoopUtils.h"
66
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
67
#include "llvm/Transforms/Utils/UnrollLoop.h"
68
#include "llvm/Transforms/Utils/ValueMapper.h"
69
#include <algorithm>
70
#include <assert.h>
71
#include <numeric>
72
#include <type_traits>
73
#include <vector>
74
75
namespace llvm {
76
class DataLayout;
77
class Value;
78
} // namespace llvm
79
80
using namespace llvm;
81
82
#define DEBUG_TYPE "loop-unroll"
83
84
// TODO: Should these be here or in LoopUnroll?
85
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
86
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
87
STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
88
"latch (completely or otherwise)");
89
90
static cl::opt<bool>
91
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
92
cl::desc("Allow runtime unrolled loops to be unrolled "
93
"with epilog instead of prolog."));
94
95
static cl::opt<bool>
96
UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
97
cl::desc("Verify domtree after unrolling"),
98
#ifdef EXPENSIVE_CHECKS
99
cl::init(true)
100
#else
101
cl::init(false)
102
#endif
103
);
104
105
static cl::opt<bool>
106
UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
107
cl::desc("Verify loopinfo after unrolling"),
108
#ifdef EXPENSIVE_CHECKS
109
cl::init(true)
110
#else
111
cl::init(false)
112
#endif
113
);
114
115
116
/// Check if unrolling created a situation where we need to insert phi nodes to
117
/// preserve LCSSA form.
118
/// \param Blocks is a vector of basic blocks representing unrolled loop.
119
/// \param L is the outer loop.
120
/// It's possible that some of the blocks are in L, and some are not. In this
121
/// case, if there is a use is outside L, and definition is inside L, we need to
122
/// insert a phi-node, otherwise LCSSA will be broken.
123
/// The function is just a helper function for llvm::UnrollLoop that returns
124
/// true if this situation occurs, indicating that LCSSA needs to be fixed.
125
static bool needToInsertPhisForLCSSA(Loop *L,
126
const std::vector<BasicBlock *> &Blocks,
127
LoopInfo *LI) {
128
for (BasicBlock *BB : Blocks) {
129
if (LI->getLoopFor(BB) == L)
130
continue;
131
for (Instruction &I : *BB) {
132
for (Use &U : I.operands()) {
133
if (const auto *Def = dyn_cast<Instruction>(U)) {
134
Loop *DefLoop = LI->getLoopFor(Def->getParent());
135
if (!DefLoop)
136
continue;
137
if (DefLoop->contains(L))
138
return true;
139
}
140
}
141
}
142
}
143
return false;
144
}
145
146
/// Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary
147
/// and adds a mapping from the original loop to the new loop to NewLoops.
148
/// Returns nullptr if no new loop was created and a pointer to the
149
/// original loop OriginalBB was part of otherwise.
150
const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
151
BasicBlock *ClonedBB, LoopInfo *LI,
152
NewLoopsMap &NewLoops) {
153
// Figure out which loop New is in.
154
const Loop *OldLoop = LI->getLoopFor(OriginalBB);
155
assert(OldLoop && "Should (at least) be in the loop being unrolled!");
156
157
Loop *&NewLoop = NewLoops[OldLoop];
158
if (!NewLoop) {
159
// Found a new sub-loop.
160
assert(OriginalBB == OldLoop->getHeader() &&
161
"Header should be first in RPO");
162
163
NewLoop = LI->AllocateLoop();
164
Loop *NewLoopParent = NewLoops.lookup(OldLoop->getParentLoop());
165
166
if (NewLoopParent)
167
NewLoopParent->addChildLoop(NewLoop);
168
else
169
LI->addTopLevelLoop(NewLoop);
170
171
NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
172
return OldLoop;
173
} else {
174
NewLoop->addBasicBlockToLoop(ClonedBB, *LI);
175
return nullptr;
176
}
177
}
178
179
/// The function chooses which type of unroll (epilog or prolog) is more
180
/// profitabale.
181
/// Epilog unroll is more profitable when there is PHI that starts from
182
/// constant. In this case epilog will leave PHI start from constant,
183
/// but prolog will convert it to non-constant.
184
///
185
/// loop:
186
/// PN = PHI [I, Latch], [CI, PreHeader]
187
/// I = foo(PN)
188
/// ...
189
///
190
/// Epilog unroll case.
191
/// loop:
192
/// PN = PHI [I2, Latch], [CI, PreHeader]
193
/// I1 = foo(PN)
194
/// I2 = foo(I1)
195
/// ...
196
/// Prolog unroll case.
197
/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
198
/// loop:
199
/// PN = PHI [I2, Latch], [NewPN, PreHeader]
200
/// I1 = foo(PN)
201
/// I2 = foo(I1)
202
/// ...
203
///
204
static bool isEpilogProfitable(Loop *L) {
205
BasicBlock *PreHeader = L->getLoopPreheader();
206
BasicBlock *Header = L->getHeader();
207
assert(PreHeader && Header);
208
for (const PHINode &PN : Header->phis()) {
209
if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))
210
return true;
211
}
212
return false;
213
}
214
215
struct LoadValue {
216
Instruction *DefI = nullptr;
217
unsigned Generation = 0;
218
LoadValue() = default;
219
LoadValue(Instruction *Inst, unsigned Generation)
220
: DefI(Inst), Generation(Generation) {}
221
};
222
223
class StackNode {
224
ScopedHashTable<const SCEV *, LoadValue>::ScopeTy LoadScope;
225
unsigned CurrentGeneration;
226
unsigned ChildGeneration;
227
DomTreeNode *Node;
228
DomTreeNode::const_iterator ChildIter;
229
DomTreeNode::const_iterator EndIter;
230
bool Processed = false;
231
232
public:
233
StackNode(ScopedHashTable<const SCEV *, LoadValue> &AvailableLoads,
234
unsigned cg, DomTreeNode *N, DomTreeNode::const_iterator Child,
235
DomTreeNode::const_iterator End)
236
: LoadScope(AvailableLoads), CurrentGeneration(cg), ChildGeneration(cg),
237
Node(N), ChildIter(Child), EndIter(End) {}
238
// Accessors.
239
unsigned currentGeneration() const { return CurrentGeneration; }
240
unsigned childGeneration() const { return ChildGeneration; }
241
void childGeneration(unsigned generation) { ChildGeneration = generation; }
242
DomTreeNode *node() { return Node; }
243
DomTreeNode::const_iterator childIter() const { return ChildIter; }
244
245
DomTreeNode *nextChild() {
246
DomTreeNode *Child = *ChildIter;
247
++ChildIter;
248
return Child;
249
}
250
251
DomTreeNode::const_iterator end() const { return EndIter; }
252
bool isProcessed() const { return Processed; }
253
void process() { Processed = true; }
254
};
255
256
Value *getMatchingValue(LoadValue LV, LoadInst *LI, unsigned CurrentGeneration,
257
BatchAAResults &BAA,
258
function_ref<MemorySSA *()> GetMSSA) {
259
if (!LV.DefI)
260
return nullptr;
261
if (LV.DefI->getType() != LI->getType())
262
return nullptr;
263
if (LV.Generation != CurrentGeneration) {
264
MemorySSA *MSSA = GetMSSA();
265
if (!MSSA)
266
return nullptr;
267
auto *EarlierMA = MSSA->getMemoryAccess(LV.DefI);
268
MemoryAccess *LaterDef =
269
MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA);
270
if (!MSSA->dominates(LaterDef, EarlierMA))
271
return nullptr;
272
}
273
return LV.DefI;
274
}
275
276
void loadCSE(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI,
277
BatchAAResults &BAA, function_ref<MemorySSA *()> GetMSSA) {
278
ScopedHashTable<const SCEV *, LoadValue> AvailableLoads;
279
SmallVector<std::unique_ptr<StackNode>> NodesToProcess;
280
DomTreeNode *HeaderD = DT.getNode(L->getHeader());
281
NodesToProcess.emplace_back(new StackNode(AvailableLoads, 0, HeaderD,
282
HeaderD->begin(), HeaderD->end()));
283
284
unsigned CurrentGeneration = 0;
285
while (!NodesToProcess.empty()) {
286
StackNode *NodeToProcess = &*NodesToProcess.back();
287
288
CurrentGeneration = NodeToProcess->currentGeneration();
289
290
if (!NodeToProcess->isProcessed()) {
291
// Process the node.
292
293
// If this block has a single predecessor, then the predecessor is the
294
// parent
295
// of the domtree node and all of the live out memory values are still
296
// current in this block. If this block has multiple predecessors, then
297
// they could have invalidated the live-out memory values of our parent
298
// value. For now, just be conservative and invalidate memory if this
299
// block has multiple predecessors.
300
if (!NodeToProcess->node()->getBlock()->getSinglePredecessor())
301
++CurrentGeneration;
302
for (auto &I : make_early_inc_range(*NodeToProcess->node()->getBlock())) {
303
304
auto *Load = dyn_cast<LoadInst>(&I);
305
if (!Load || !Load->isSimple()) {
306
if (I.mayWriteToMemory())
307
CurrentGeneration++;
308
continue;
309
}
310
311
const SCEV *PtrSCEV = SE.getSCEV(Load->getPointerOperand());
312
LoadValue LV = AvailableLoads.lookup(PtrSCEV);
313
if (Value *M =
314
getMatchingValue(LV, Load, CurrentGeneration, BAA, GetMSSA)) {
315
if (LI.replacementPreservesLCSSAForm(Load, M)) {
316
Load->replaceAllUsesWith(M);
317
Load->eraseFromParent();
318
}
319
} else {
320
AvailableLoads.insert(PtrSCEV, LoadValue(Load, CurrentGeneration));
321
}
322
}
323
NodeToProcess->childGeneration(CurrentGeneration);
324
NodeToProcess->process();
325
} else if (NodeToProcess->childIter() != NodeToProcess->end()) {
326
// Push the next child onto the stack.
327
DomTreeNode *Child = NodeToProcess->nextChild();
328
if (!L->contains(Child->getBlock()))
329
continue;
330
NodesToProcess.emplace_back(
331
new StackNode(AvailableLoads, NodeToProcess->childGeneration(), Child,
332
Child->begin(), Child->end()));
333
} else {
334
// It has been processed, and there are no more children to process,
335
// so delete it and pop it off the stack.
336
NodesToProcess.pop_back();
337
}
338
}
339
}
340
341
/// Perform some cleanup and simplifications on loops after unrolling. It is
342
/// useful to simplify the IV's in the new loop, as well as do a quick
343
/// simplify/dce pass of the instructions.
344
void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
345
ScalarEvolution *SE, DominatorTree *DT,
346
AssumptionCache *AC,
347
const TargetTransformInfo *TTI,
348
AAResults *AA) {
349
using namespace llvm::PatternMatch;
350
351
// Simplify any new induction variables in the partially unrolled loop.
352
if (SE && SimplifyIVs) {
353
SmallVector<WeakTrackingVH, 16> DeadInsts;
354
simplifyLoopIVs(L, SE, DT, LI, TTI, DeadInsts);
355
356
// Aggressively clean up dead instructions that simplifyLoopIVs already
357
// identified. Any remaining should be cleaned up below.
358
while (!DeadInsts.empty()) {
359
Value *V = DeadInsts.pop_back_val();
360
if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
361
RecursivelyDeleteTriviallyDeadInstructions(Inst);
362
}
363
364
if (AA) {
365
std::unique_ptr<MemorySSA> MSSA = nullptr;
366
BatchAAResults BAA(*AA);
367
loadCSE(L, *DT, *SE, *LI, BAA, [L, AA, DT, &MSSA]() -> MemorySSA * {
368
if (!MSSA)
369
MSSA.reset(new MemorySSA(*L, AA, DT));
370
return &*MSSA;
371
});
372
}
373
}
374
375
// At this point, the code is well formed. Perform constprop, instsimplify,
376
// and dce.
377
const DataLayout &DL = L->getHeader()->getDataLayout();
378
SmallVector<WeakTrackingVH, 16> DeadInsts;
379
for (BasicBlock *BB : L->getBlocks()) {
380
// Remove repeated debug instructions after loop unrolling.
381
if (BB->getParent()->getSubprogram())
382
RemoveRedundantDbgInstrs(BB);
383
384
for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
385
if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
386
if (LI->replacementPreservesLCSSAForm(&Inst, V))
387
Inst.replaceAllUsesWith(V);
388
if (isInstructionTriviallyDead(&Inst))
389
DeadInsts.emplace_back(&Inst);
390
391
// Fold ((add X, C1), C2) to (add X, C1+C2). This is very common in
392
// unrolled loops, and handling this early allows following code to
393
// identify the IV as a "simple recurrence" without first folding away
394
// a long chain of adds.
395
{
396
Value *X;
397
const APInt *C1, *C2;
398
if (match(&Inst, m_Add(m_Add(m_Value(X), m_APInt(C1)), m_APInt(C2)))) {
399
auto *InnerI = dyn_cast<Instruction>(Inst.getOperand(0));
400
auto *InnerOBO = cast<OverflowingBinaryOperator>(Inst.getOperand(0));
401
bool SignedOverflow;
402
APInt NewC = C1->sadd_ov(*C2, SignedOverflow);
403
Inst.setOperand(0, X);
404
Inst.setOperand(1, ConstantInt::get(Inst.getType(), NewC));
405
Inst.setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap() &&
406
InnerOBO->hasNoUnsignedWrap());
407
Inst.setHasNoSignedWrap(Inst.hasNoSignedWrap() &&
408
InnerOBO->hasNoSignedWrap() &&
409
!SignedOverflow);
410
if (InnerI && isInstructionTriviallyDead(InnerI))
411
DeadInsts.emplace_back(InnerI);
412
}
413
}
414
}
415
// We can't do recursive deletion until we're done iterating, as we might
416
// have a phi which (potentially indirectly) uses instructions later in
417
// the block we're iterating through.
418
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
419
}
420
}
421
422
// Loops containing convergent instructions that are uncontrolled or controlled
423
// from outside the loop must have a count that divides their TripMultiple.
424
LLVM_ATTRIBUTE_USED
425
static bool canHaveUnrollRemainder(const Loop *L) {
426
if (getLoopConvergenceHeart(L))
427
return false;
428
429
// Check for uncontrolled convergent operations.
430
for (auto &BB : L->blocks()) {
431
for (auto &I : *BB) {
432
if (isa<ConvergenceControlInst>(I))
433
return true;
434
if (auto *CB = dyn_cast<CallBase>(&I))
435
if (CB->isConvergent())
436
return CB->getConvergenceControlToken();
437
}
438
}
439
return true;
440
}
441
442
/// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling
443
/// can only fail when the loop's latch block is not terminated by a conditional
444
/// branch instruction. However, if the trip count (and multiple) are not known,
445
/// loop unrolling will mostly produce more code that is no faster.
446
///
447
/// If Runtime is true then UnrollLoop will try to insert a prologue or
448
/// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop
449
/// will not runtime-unroll the loop if computing the run-time trip count will
450
/// be expensive and AllowExpensiveTripCount is false.
451
///
452
/// The LoopInfo Analysis that is passed will be kept consistent.
453
///
454
/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
455
/// DominatorTree if they are non-null.
456
///
457
/// If RemainderLoop is non-null, it will receive the remainder loop (if
458
/// required and not fully unrolled).
459
LoopUnrollResult
460
llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
461
ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
462
const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE,
463
bool PreserveLCSSA, Loop **RemainderLoop, AAResults *AA) {
464
assert(DT && "DomTree is required");
465
466
if (!L->getLoopPreheader()) {
467
LLVM_DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
468
return LoopUnrollResult::Unmodified;
469
}
470
471
if (!L->getLoopLatch()) {
472
LLVM_DEBUG(dbgs() << " Can't unroll; loop exit-block-insertion failed.\n");
473
return LoopUnrollResult::Unmodified;
474
}
475
476
// Loops with indirectbr cannot be cloned.
477
if (!L->isSafeToClone()) {
478
LLVM_DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n");
479
return LoopUnrollResult::Unmodified;
480
}
481
482
if (L->getHeader()->hasAddressTaken()) {
483
// The loop-rotate pass can be helpful to avoid this in many cases.
484
LLVM_DEBUG(
485
dbgs() << " Won't unroll loop: address of header block is taken.\n");
486
return LoopUnrollResult::Unmodified;
487
}
488
489
assert(ULO.Count > 0);
490
491
// All these values should be taken only after peeling because they might have
492
// changed.
493
BasicBlock *Preheader = L->getLoopPreheader();
494
BasicBlock *Header = L->getHeader();
495
BasicBlock *LatchBlock = L->getLoopLatch();
496
SmallVector<BasicBlock *, 4> ExitBlocks;
497
L->getExitBlocks(ExitBlocks);
498
std::vector<BasicBlock *> OriginalLoopBlocks = L->getBlocks();
499
500
const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
501
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
502
unsigned EstimatedLoopInvocationWeight = 0;
503
std::optional<unsigned> OriginalTripCount =
504
llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight);
505
506
// Effectively "DCE" unrolled iterations that are beyond the max tripcount
507
// and will never be executed.
508
if (MaxTripCount && ULO.Count > MaxTripCount)
509
ULO.Count = MaxTripCount;
510
511
struct ExitInfo {
512
unsigned TripCount;
513
unsigned TripMultiple;
514
unsigned BreakoutTrip;
515
bool ExitOnTrue;
516
BasicBlock *FirstExitingBlock = nullptr;
517
SmallVector<BasicBlock *> ExitingBlocks;
518
};
519
DenseMap<BasicBlock *, ExitInfo> ExitInfos;
520
SmallVector<BasicBlock *, 4> ExitingBlocks;
521
L->getExitingBlocks(ExitingBlocks);
522
for (auto *ExitingBlock : ExitingBlocks) {
523
// The folding code is not prepared to deal with non-branch instructions
524
// right now.
525
auto *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
526
if (!BI)
527
continue;
528
529
ExitInfo &Info = ExitInfos.try_emplace(ExitingBlock).first->second;
530
Info.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
531
Info.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
532
if (Info.TripCount != 0) {
533
Info.BreakoutTrip = Info.TripCount % ULO.Count;
534
Info.TripMultiple = 0;
535
} else {
536
Info.BreakoutTrip = Info.TripMultiple =
537
(unsigned)std::gcd(ULO.Count, Info.TripMultiple);
538
}
539
Info.ExitOnTrue = !L->contains(BI->getSuccessor(0));
540
Info.ExitingBlocks.push_back(ExitingBlock);
541
LLVM_DEBUG(dbgs() << " Exiting block %" << ExitingBlock->getName()
542
<< ": TripCount=" << Info.TripCount
543
<< ", TripMultiple=" << Info.TripMultiple
544
<< ", BreakoutTrip=" << Info.BreakoutTrip << "\n");
545
}
546
547
// Are we eliminating the loop control altogether? Note that we can know
548
// we're eliminating the backedge without knowing exactly which iteration
549
// of the unrolled body exits.
550
const bool CompletelyUnroll = ULO.Count == MaxTripCount;
551
552
const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero;
553
554
// There's no point in performing runtime unrolling if this unroll count
555
// results in a full unroll.
556
if (CompletelyUnroll)
557
ULO.Runtime = false;
558
559
// Go through all exits of L and see if there are any phi-nodes there. We just
560
// conservatively assume that they're inserted to preserve LCSSA form, which
561
// means that complete unrolling might break this form. We need to either fix
562
// it in-place after the transformation, or entirely rebuild LCSSA. TODO: For
563
// now we just recompute LCSSA for the outer loop, but it should be possible
564
// to fix it in-place.
565
bool NeedToFixLCSSA =
566
PreserveLCSSA && CompletelyUnroll &&
567
any_of(ExitBlocks,
568
[](const BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
569
570
// The current loop unroll pass can unroll loops that have
571
// (1) single latch; and
572
// (2a) latch is unconditional; or
573
// (2b) latch is conditional and is an exiting block
574
// FIXME: The implementation can be extended to work with more complicated
575
// cases, e.g. loops with multiple latches.
576
BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
577
578
// A conditional branch which exits the loop, which can be optimized to an
579
// unconditional branch in the unrolled loop in some cases.
580
bool LatchIsExiting = L->isLoopExiting(LatchBlock);
581
if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
582
LLVM_DEBUG(
583
dbgs() << "Can't unroll; a conditional latch must exit the loop");
584
return LoopUnrollResult::Unmodified;
585
}
586
587
assert((!ULO.Runtime || canHaveUnrollRemainder(L)) &&
588
"Can't runtime unroll if loop contains a convergent operation.");
589
590
bool EpilogProfitability =
591
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
592
: isEpilogProfitable(L);
593
594
if (ULO.Runtime &&
595
!UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
596
EpilogProfitability, ULO.UnrollRemainder,
597
ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI,
598
PreserveLCSSA, RemainderLoop)) {
599
if (ULO.Force)
600
ULO.Runtime = false;
601
else {
602
LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
603
"generated when assuming runtime trip count\n");
604
return LoopUnrollResult::Unmodified;
605
}
606
}
607
608
using namespace ore;
609
// Report the unrolling decision.
610
if (CompletelyUnroll) {
611
LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
612
<< " with trip count " << ULO.Count << "!\n");
613
if (ORE)
614
ORE->emit([&]() {
615
return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
616
L->getHeader())
617
<< "completely unrolled loop with "
618
<< NV("UnrollCount", ULO.Count) << " iterations";
619
});
620
} else {
621
LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
622
<< ULO.Count);
623
if (ULO.Runtime)
624
LLVM_DEBUG(dbgs() << " with run-time trip count");
625
LLVM_DEBUG(dbgs() << "!\n");
626
627
if (ORE)
628
ORE->emit([&]() {
629
OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
630
L->getHeader());
631
Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count);
632
if (ULO.Runtime)
633
Diag << " with run-time trip count";
634
return Diag;
635
});
636
}
637
638
// We are going to make changes to this loop. SCEV may be keeping cached info
639
// about it, in particular about backedge taken count. The changes we make
640
// are guaranteed to invalidate this information for our loop. It is tempting
641
// to only invalidate the loop being unrolled, but it is incorrect as long as
642
// all exiting branches from all inner loops have impact on the outer loops,
643
// and if something changes inside them then any of outer loops may also
644
// change. When we forget outermost loop, we also forget all contained loops
645
// and this is what we need here.
646
if (SE) {
647
if (ULO.ForgetAllSCEV)
648
SE->forgetAllLoops();
649
else {
650
SE->forgetTopmostLoop(L);
651
SE->forgetBlockAndLoopDispositions();
652
}
653
}
654
655
if (!LatchIsExiting)
656
++NumUnrolledNotLatch;
657
658
// For the first iteration of the loop, we should use the precloned values for
659
// PHI nodes. Insert associations now.
660
ValueToValueMapTy LastValueMap;
661
std::vector<PHINode*> OrigPHINode;
662
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
663
OrigPHINode.push_back(cast<PHINode>(I));
664
}
665
666
std::vector<BasicBlock *> Headers;
667
std::vector<BasicBlock *> Latches;
668
Headers.push_back(Header);
669
Latches.push_back(LatchBlock);
670
671
// The current on-the-fly SSA update requires blocks to be processed in
672
// reverse postorder so that LastValueMap contains the correct value at each
673
// exit.
674
LoopBlocksDFS DFS(L);
675
DFS.perform(LI);
676
677
// Stash the DFS iterators before adding blocks to the loop.
678
LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
679
LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
680
681
std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
682
683
// Loop Unrolling might create new loops. While we do preserve LoopInfo, we
684
// might break loop-simplified form for these loops (as they, e.g., would
685
// share the same exit blocks). We'll keep track of loops for which we can
686
// break this so that later we can re-simplify them.
687
SmallSetVector<Loop *, 4> LoopsToSimplify;
688
for (Loop *SubLoop : *L)
689
LoopsToSimplify.insert(SubLoop);
690
691
// When a FSDiscriminator is enabled, we don't need to add the multiply
692
// factors to the discriminators.
693
if (Header->getParent()->shouldEmitDebugInfoForProfiling() &&
694
!EnableFSDiscriminator)
695
for (BasicBlock *BB : L->getBlocks())
696
for (Instruction &I : *BB)
697
if (!I.isDebugOrPseudoInst())
698
if (const DILocation *DIL = I.getDebugLoc()) {
699
auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
700
if (NewDIL)
701
I.setDebugLoc(*NewDIL);
702
else
703
LLVM_DEBUG(dbgs()
704
<< "Failed to create new discriminator: "
705
<< DIL->getFilename() << " Line: " << DIL->getLine());
706
}
707
708
// Identify what noalias metadata is inside the loop: if it is inside the
709
// loop, the associated metadata must be cloned for each iteration.
710
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
711
identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
712
713
// We place the unrolled iterations immediately after the original loop
714
// latch. This is a reasonable default placement if we don't have block
715
// frequencies, and if we do, well the layout will be adjusted later.
716
auto BlockInsertPt = std::next(LatchBlock->getIterator());
717
for (unsigned It = 1; It != ULO.Count; ++It) {
718
SmallVector<BasicBlock *, 8> NewBlocks;
719
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
720
NewLoops[L] = L;
721
722
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
723
ValueToValueMapTy VMap;
724
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
725
Header->getParent()->insert(BlockInsertPt, New);
726
727
assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
728
"Header should not be in a sub-loop");
729
// Tell LI about New.
730
const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
731
if (OldLoop)
732
LoopsToSimplify.insert(NewLoops[OldLoop]);
733
734
if (*BB == Header) {
735
// Loop over all of the PHI nodes in the block, changing them to use
736
// the incoming values from the previous block.
737
for (PHINode *OrigPHI : OrigPHINode) {
738
PHINode *NewPHI = cast<PHINode>(VMap[OrigPHI]);
739
Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
740
if (Instruction *InValI = dyn_cast<Instruction>(InVal))
741
if (It > 1 && L->contains(InValI))
742
InVal = LastValueMap[InValI];
743
VMap[OrigPHI] = InVal;
744
NewPHI->eraseFromParent();
745
}
746
747
// Eliminate copies of the loop heart intrinsic, if any.
748
if (ULO.Heart) {
749
auto it = VMap.find(ULO.Heart);
750
assert(it != VMap.end());
751
Instruction *heartCopy = cast<Instruction>(it->second);
752
heartCopy->eraseFromParent();
753
VMap.erase(it);
754
}
755
}
756
757
// Update our running map of newest clones
758
LastValueMap[*BB] = New;
759
for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
760
VI != VE; ++VI)
761
LastValueMap[VI->first] = VI->second;
762
763
// Add phi entries for newly created values to all exit blocks.
764
for (BasicBlock *Succ : successors(*BB)) {
765
if (L->contains(Succ))
766
continue;
767
for (PHINode &PHI : Succ->phis()) {
768
Value *Incoming = PHI.getIncomingValueForBlock(*BB);
769
ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
770
if (It != LastValueMap.end())
771
Incoming = It->second;
772
PHI.addIncoming(Incoming, New);
773
SE->forgetValue(&PHI);
774
}
775
}
776
// Keep track of new headers and latches as we create them, so that
777
// we can insert the proper branches later.
778
if (*BB == Header)
779
Headers.push_back(New);
780
if (*BB == LatchBlock)
781
Latches.push_back(New);
782
783
// Keep track of the exiting block and its successor block contained in
784
// the loop for the current iteration.
785
auto ExitInfoIt = ExitInfos.find(*BB);
786
if (ExitInfoIt != ExitInfos.end())
787
ExitInfoIt->second.ExitingBlocks.push_back(New);
788
789
NewBlocks.push_back(New);
790
UnrolledLoopBlocks.push_back(New);
791
792
// Update DomTree: since we just copy the loop body, and each copy has a
793
// dedicated entry block (copy of the header block), this header's copy
794
// dominates all copied blocks. That means, dominance relations in the
795
// copied body are the same as in the original body.
796
if (*BB == Header)
797
DT->addNewBlock(New, Latches[It - 1]);
798
else {
799
auto BBDomNode = DT->getNode(*BB);
800
auto BBIDom = BBDomNode->getIDom();
801
BasicBlock *OriginalBBIDom = BBIDom->getBlock();
802
DT->addNewBlock(
803
New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
804
}
805
}
806
807
// Remap all instructions in the most recent iteration
808
remapInstructionsInBlocks(NewBlocks, LastValueMap);
809
for (BasicBlock *NewBlock : NewBlocks)
810
for (Instruction &I : *NewBlock)
811
if (auto *II = dyn_cast<AssumeInst>(&I))
812
AC->registerAssumption(II);
813
814
{
815
// Identify what other metadata depends on the cloned version. After
816
// cloning, replace the metadata with the corrected version for both
817
// memory instructions and noalias intrinsics.
818
std::string ext = (Twine("It") + Twine(It)).str();
819
cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
820
Header->getContext(), ext);
821
}
822
}
823
824
// Loop over the PHI nodes in the original block, setting incoming values.
825
for (PHINode *PN : OrigPHINode) {
826
if (CompletelyUnroll) {
827
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
828
PN->eraseFromParent();
829
} else if (ULO.Count > 1) {
830
Value *InVal = PN->removeIncomingValue(LatchBlock, false);
831
// If this value was defined in the loop, take the value defined by the
832
// last iteration of the loop.
833
if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
834
if (L->contains(InValI))
835
InVal = LastValueMap[InVal];
836
}
837
assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
838
PN->addIncoming(InVal, Latches.back());
839
}
840
}
841
842
// Connect latches of the unrolled iterations to the headers of the next
843
// iteration. Currently they point to the header of the same iteration.
844
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
845
unsigned j = (i + 1) % e;
846
Latches[i]->getTerminator()->replaceSuccessorWith(Headers[i], Headers[j]);
847
}
848
849
// Update dominators of blocks we might reach through exits.
850
// Immediate dominator of such block might change, because we add more
851
// routes which can lead to the exit: we can now reach it from the copied
852
// iterations too.
853
if (ULO.Count > 1) {
854
for (auto *BB : OriginalLoopBlocks) {
855
auto *BBDomNode = DT->getNode(BB);
856
SmallVector<BasicBlock *, 16> ChildrenToUpdate;
857
for (auto *ChildDomNode : BBDomNode->children()) {
858
auto *ChildBB = ChildDomNode->getBlock();
859
if (!L->contains(ChildBB))
860
ChildrenToUpdate.push_back(ChildBB);
861
}
862
// The new idom of the block will be the nearest common dominator
863
// of all copies of the previous idom. This is equivalent to the
864
// nearest common dominator of the previous idom and the first latch,
865
// which dominates all copies of the previous idom.
866
BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
867
for (auto *ChildBB : ChildrenToUpdate)
868
DT->changeImmediateDominator(ChildBB, NewIDom);
869
}
870
}
871
872
assert(!UnrollVerifyDomtree ||
873
DT->verify(DominatorTree::VerificationLevel::Fast));
874
875
SmallVector<DominatorTree::UpdateType> DTUpdates;
876
auto SetDest = [&](BasicBlock *Src, bool WillExit, bool ExitOnTrue) {
877
auto *Term = cast<BranchInst>(Src->getTerminator());
878
const unsigned Idx = ExitOnTrue ^ WillExit;
879
BasicBlock *Dest = Term->getSuccessor(Idx);
880
BasicBlock *DeadSucc = Term->getSuccessor(1-Idx);
881
882
// Remove predecessors from all non-Dest successors.
883
DeadSucc->removePredecessor(Src, /* KeepOneInputPHIs */ true);
884
885
// Replace the conditional branch with an unconditional one.
886
BranchInst::Create(Dest, Term->getIterator());
887
Term->eraseFromParent();
888
889
DTUpdates.emplace_back(DominatorTree::Delete, Src, DeadSucc);
890
};
891
892
auto WillExit = [&](const ExitInfo &Info, unsigned i, unsigned j,
893
bool IsLatch) -> std::optional<bool> {
894
if (CompletelyUnroll) {
895
if (PreserveOnlyFirst) {
896
if (i == 0)
897
return std::nullopt;
898
return j == 0;
899
}
900
// Complete (but possibly inexact) unrolling
901
if (j == 0)
902
return true;
903
if (Info.TripCount && j != Info.TripCount)
904
return false;
905
return std::nullopt;
906
}
907
908
if (ULO.Runtime) {
909
// If runtime unrolling inserts a prologue, information about non-latch
910
// exits may be stale.
911
if (IsLatch && j != 0)
912
return false;
913
return std::nullopt;
914
}
915
916
if (j != Info.BreakoutTrip &&
917
(Info.TripMultiple == 0 || j % Info.TripMultiple != 0)) {
918
// If we know the trip count or a multiple of it, we can safely use an
919
// unconditional branch for some iterations.
920
return false;
921
}
922
return std::nullopt;
923
};
924
925
// Fold branches for iterations where we know that they will exit or not
926
// exit.
927
for (auto &Pair : ExitInfos) {
928
ExitInfo &Info = Pair.second;
929
for (unsigned i = 0, e = Info.ExitingBlocks.size(); i != e; ++i) {
930
// The branch destination.
931
unsigned j = (i + 1) % e;
932
bool IsLatch = Pair.first == LatchBlock;
933
std::optional<bool> KnownWillExit = WillExit(Info, i, j, IsLatch);
934
if (!KnownWillExit) {
935
if (!Info.FirstExitingBlock)
936
Info.FirstExitingBlock = Info.ExitingBlocks[i];
937
continue;
938
}
939
940
// We don't fold known-exiting branches for non-latch exits here,
941
// because this ensures that both all loop blocks and all exit blocks
942
// remain reachable in the CFG.
943
// TODO: We could fold these branches, but it would require much more
944
// sophisticated updates to LoopInfo.
945
if (*KnownWillExit && !IsLatch) {
946
if (!Info.FirstExitingBlock)
947
Info.FirstExitingBlock = Info.ExitingBlocks[i];
948
continue;
949
}
950
951
SetDest(Info.ExitingBlocks[i], *KnownWillExit, Info.ExitOnTrue);
952
}
953
}
954
955
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
956
DomTreeUpdater *DTUToUse = &DTU;
957
if (ExitingBlocks.size() == 1 && ExitInfos.size() == 1) {
958
// Manually update the DT if there's a single exiting node. In that case
959
// there's a single exit node and it is sufficient to update the nodes
960
// immediately dominated by the original exiting block. They will become
961
// dominated by the first exiting block that leaves the loop after
962
// unrolling. Note that the CFG inside the loop does not change, so there's
963
// no need to update the DT inside the unrolled loop.
964
DTUToUse = nullptr;
965
auto &[OriginalExit, Info] = *ExitInfos.begin();
966
if (!Info.FirstExitingBlock)
967
Info.FirstExitingBlock = Info.ExitingBlocks.back();
968
for (auto *C : to_vector(DT->getNode(OriginalExit)->children())) {
969
if (L->contains(C->getBlock()))
970
continue;
971
C->setIDom(DT->getNode(Info.FirstExitingBlock));
972
}
973
} else {
974
DTU.applyUpdates(DTUpdates);
975
}
976
977
// When completely unrolling, the last latch becomes unreachable.
978
if (!LatchIsExiting && CompletelyUnroll) {
979
// There is no need to update the DT here, because there must be a unique
980
// latch. Hence if the latch is not exiting it must directly branch back to
981
// the original loop header and does not dominate any nodes.
982
assert(LatchBlock->getSingleSuccessor() && "Loop with multiple latches?");
983
changeToUnreachable(Latches.back()->getTerminator(), PreserveLCSSA);
984
}
985
986
// Merge adjacent basic blocks, if possible.
987
for (BasicBlock *Latch : Latches) {
988
BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
989
assert((Term ||
990
(CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
991
"Need a branch as terminator, except when fully unrolling with "
992
"unconditional latch");
993
if (Term && Term->isUnconditional()) {
994
BasicBlock *Dest = Term->getSuccessor(0);
995
BasicBlock *Fold = Dest->getUniquePredecessor();
996
if (MergeBlockIntoPredecessor(Dest, /*DTU=*/DTUToUse, LI,
997
/*MSSAU=*/nullptr, /*MemDep=*/nullptr,
998
/*PredecessorWithTwoSuccessors=*/false,
999
DTUToUse ? nullptr : DT)) {
1000
// Dest has been folded into Fold. Update our worklists accordingly.
1001
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
1002
llvm::erase(UnrolledLoopBlocks, Dest);
1003
}
1004
}
1005
}
1006
1007
if (DTUToUse) {
1008
// Apply updates to the DomTree.
1009
DT = &DTU.getDomTree();
1010
}
1011
assert(!UnrollVerifyDomtree ||
1012
DT->verify(DominatorTree::VerificationLevel::Fast));
1013
1014
// At this point, the code is well formed. We now simplify the unrolled loop,
1015
// doing constant propagation and dead code elimination as we go.
1016
simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC,
1017
TTI, AA);
1018
1019
NumCompletelyUnrolled += CompletelyUnroll;
1020
++NumUnrolled;
1021
1022
Loop *OuterL = L->getParentLoop();
1023
// Update LoopInfo if the loop is completely removed.
1024
if (CompletelyUnroll) {
1025
LI->erase(L);
1026
// We shouldn't try to use `L` anymore.
1027
L = nullptr;
1028
} else if (OriginalTripCount) {
1029
// Update the trip count. Note that the remainder has already logic
1030
// computing it in `UnrollRuntimeLoopRemainder`.
1031
setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count,
1032
EstimatedLoopInvocationWeight);
1033
}
1034
1035
// LoopInfo should not be valid, confirm that.
1036
if (UnrollVerifyLoopInfo)
1037
LI->verify(*DT);
1038
1039
// After complete unrolling most of the blocks should be contained in OuterL.
1040
// However, some of them might happen to be out of OuterL (e.g. if they
1041
// precede a loop exit). In this case we might need to insert PHI nodes in
1042
// order to preserve LCSSA form.
1043
// We don't need to check this if we already know that we need to fix LCSSA
1044
// form.
1045
// TODO: For now we just recompute LCSSA for the outer loop in this case, but
1046
// it should be possible to fix it in-place.
1047
if (PreserveLCSSA && OuterL && CompletelyUnroll && !NeedToFixLCSSA)
1048
NeedToFixLCSSA |= ::needToInsertPhisForLCSSA(OuterL, UnrolledLoopBlocks, LI);
1049
1050
// Make sure that loop-simplify form is preserved. We want to simplify
1051
// at least one layer outside of the loop that was unrolled so that any
1052
// changes to the parent loop exposed by the unrolling are considered.
1053
if (OuterL) {
1054
// OuterL includes all loops for which we can break loop-simplify, so
1055
// it's sufficient to simplify only it (it'll recursively simplify inner
1056
// loops too).
1057
if (NeedToFixLCSSA) {
1058
// LCSSA must be performed on the outermost affected loop. The unrolled
1059
// loop's last loop latch is guaranteed to be in the outermost loop
1060
// after LoopInfo's been updated by LoopInfo::erase.
1061
Loop *LatchLoop = LI->getLoopFor(Latches.back());
1062
Loop *FixLCSSALoop = OuterL;
1063
if (!FixLCSSALoop->contains(LatchLoop))
1064
while (FixLCSSALoop->getParentLoop() != LatchLoop)
1065
FixLCSSALoop = FixLCSSALoop->getParentLoop();
1066
1067
formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
1068
} else if (PreserveLCSSA) {
1069
assert(OuterL->isLCSSAForm(*DT) &&
1070
"Loops should be in LCSSA form after loop-unroll.");
1071
}
1072
1073
// TODO: That potentially might be compile-time expensive. We should try
1074
// to fix the loop-simplified form incrementally.
1075
simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
1076
} else {
1077
// Simplify loops for which we might've broken loop-simplify form.
1078
for (Loop *SubLoop : LoopsToSimplify)
1079
simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
1080
}
1081
1082
return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
1083
: LoopUnrollResult::PartiallyUnrolled;
1084
}
1085
1086
/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
1087
/// node with the given name (for example, "llvm.loop.unroll.count"). If no
1088
/// such metadata node exists, then nullptr is returned.
1089
MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) {
1090
// First operand should refer to the loop id itself.
1091
assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
1092
assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
1093
1094
for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
1095
MDNode *MD = dyn_cast<MDNode>(MDO);
1096
if (!MD)
1097
continue;
1098
1099
MDString *S = dyn_cast<MDString>(MD->getOperand(0));
1100
if (!S)
1101
continue;
1102
1103
if (Name == S->getString())
1104
return MD;
1105
}
1106
return nullptr;
1107
}
1108
1109