Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
35271 views
1
//===- LoopPeel.cpp -------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Loop Peeling Utilities.
10
//===----------------------------------------------------------------------===//
11
12
#include "llvm/Transforms/Utils/LoopPeel.h"
13
#include "llvm/ADT/DenseMap.h"
14
#include "llvm/ADT/SmallVector.h"
15
#include "llvm/ADT/Statistic.h"
16
#include "llvm/Analysis/Loads.h"
17
#include "llvm/Analysis/LoopInfo.h"
18
#include "llvm/Analysis/LoopIterator.h"
19
#include "llvm/Analysis/ScalarEvolution.h"
20
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
21
#include "llvm/Analysis/TargetTransformInfo.h"
22
#include "llvm/IR/BasicBlock.h"
23
#include "llvm/IR/Dominators.h"
24
#include "llvm/IR/Function.h"
25
#include "llvm/IR/InstrTypes.h"
26
#include "llvm/IR/Instruction.h"
27
#include "llvm/IR/Instructions.h"
28
#include "llvm/IR/LLVMContext.h"
29
#include "llvm/IR/MDBuilder.h"
30
#include "llvm/IR/PatternMatch.h"
31
#include "llvm/IR/ProfDataUtils.h"
32
#include "llvm/Support/Casting.h"
33
#include "llvm/Support/CommandLine.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/raw_ostream.h"
36
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
37
#include "llvm/Transforms/Utils/Cloning.h"
38
#include "llvm/Transforms/Utils/LoopSimplify.h"
39
#include "llvm/Transforms/Utils/LoopUtils.h"
40
#include "llvm/Transforms/Utils/ValueMapper.h"
41
#include <algorithm>
42
#include <cassert>
43
#include <cstdint>
44
#include <optional>
45
46
using namespace llvm;
47
using namespace llvm::PatternMatch;
48
49
#define DEBUG_TYPE "loop-peel"
50
51
STATISTIC(NumPeeled, "Number of loops peeled");
52
53
static cl::opt<unsigned> UnrollPeelCount(
54
"unroll-peel-count", cl::Hidden,
55
cl::desc("Set the unroll peeling count, for testing purposes"));
56
57
static cl::opt<bool>
58
UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
59
cl::desc("Allows loops to be peeled when the dynamic "
60
"trip count is known to be low."));
61
62
static cl::opt<bool>
63
UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
64
cl::init(false), cl::Hidden,
65
cl::desc("Allows loop nests to be peeled."));
66
67
static cl::opt<unsigned> UnrollPeelMaxCount(
68
"unroll-peel-max-count", cl::init(7), cl::Hidden,
69
cl::desc("Max average trip count which will cause loop peeling."));
70
71
static cl::opt<unsigned> UnrollForcePeelCount(
72
"unroll-force-peel-count", cl::init(0), cl::Hidden,
73
cl::desc("Force a peel count regardless of profiling information."));
74
75
static cl::opt<bool> DisableAdvancedPeeling(
76
"disable-advanced-peeling", cl::init(false), cl::Hidden,
77
cl::desc(
78
"Disable advance peeling. Issues for convergent targets (D134803)."));
79
80
static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
81
82
// Check whether we are capable of peeling this loop.
83
bool llvm::canPeel(const Loop *L) {
84
// Make sure the loop is in simplified form
85
if (!L->isLoopSimplifyForm())
86
return false;
87
if (!DisableAdvancedPeeling)
88
return true;
89
90
SmallVector<BasicBlock *, 4> Exits;
91
L->getUniqueNonLatchExitBlocks(Exits);
92
// The latch must either be the only exiting block or all non-latch exit
93
// blocks have either a deopt or unreachable terminator or compose a chain of
94
// blocks where the last one is either deopt or unreachable terminated. Both
95
// deopt and unreachable terminators are a strong indication they are not
96
// taken. Note that this is a profitability check, not a legality check. Also
97
// note that LoopPeeling currently can only update the branch weights of latch
98
// blocks and branch weights to blocks with deopt or unreachable do not need
99
// updating.
100
return llvm::all_of(Exits, IsBlockFollowedByDeoptOrUnreachable);
101
}
102
103
namespace {
104
105
// As a loop is peeled, it may be the case that Phi nodes become
106
// loop-invariant (ie, known because there is only one choice).
107
// For example, consider the following function:
108
// void g(int);
109
// void binary() {
110
// int x = 0;
111
// int y = 0;
112
// int a = 0;
113
// for(int i = 0; i <100000; ++i) {
114
// g(x);
115
// x = y;
116
// g(a);
117
// y = a + 1;
118
// a = 5;
119
// }
120
// }
121
// Peeling 3 iterations is beneficial because the values for x, y and a
122
// become known. The IR for this loop looks something like the following:
123
//
124
// %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
125
// %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
126
// %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
127
// %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
128
// ...
129
// tail call void @_Z1gi(i32 signext %x)
130
// tail call void @_Z1gi(i32 signext %a)
131
// %add = add nuw nsw i32 %a, 1
132
// %inc = add nuw nsw i32 %i, 1
133
// %exitcond = icmp eq i32 %inc, 100000
134
// br i1 %exitcond, label %for.cond.cleanup, label %for.body
135
//
136
// The arguments for the calls to g will become known after 3 iterations
137
// of the loop, because the phi nodes values become known after 3 iterations
138
// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
139
// The first iteration has g(0), g(0); the second has g(0), g(5); the
140
// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
141
// Now consider the phi nodes:
142
// %a is a phi with constants so it is determined after iteration 1.
143
// %y is a phi based on a constant and %a so it is determined on
144
// the iteration after %a is determined, so iteration 2.
145
// %x is a phi based on a constant and %y so it is determined on
146
// the iteration after %y, so iteration 3.
147
// %i is based on itself (and is an induction variable) so it is
148
// never determined.
149
// This means that peeling off 3 iterations will result in being able to
150
// remove the phi nodes for %a, %y, and %x. The arguments for the
151
// corresponding calls to g are determined and the code for computing
152
// x, y, and a can be removed.
153
//
154
// The PhiAnalyzer class calculates how many times a loop should be
155
// peeled based on the above analysis of the phi nodes in the loop while
156
// respecting the maximum specified.
157
class PhiAnalyzer {
158
public:
159
PhiAnalyzer(const Loop &L, unsigned MaxIterations);
160
161
// Calculate the sufficient minimum number of iterations of the loop to peel
162
// such that phi instructions become determined (subject to allowable limits)
163
std::optional<unsigned> calculateIterationsToPeel();
164
165
protected:
166
using PeelCounter = std::optional<unsigned>;
167
const PeelCounter Unknown = std::nullopt;
168
169
// Add 1 respecting Unknown and return Unknown if result over MaxIterations
170
PeelCounter addOne(PeelCounter PC) const {
171
if (PC == Unknown)
172
return Unknown;
173
return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown;
174
}
175
176
// Calculate the number of iterations after which the given value
177
// becomes an invariant.
178
PeelCounter calculate(const Value &);
179
180
const Loop &L;
181
const unsigned MaxIterations;
182
183
// Map of Values to number of iterations to invariance
184
SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance;
185
};
186
187
PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
188
: L(L), MaxIterations(MaxIterations) {
189
assert(canPeel(&L) && "loop is not suitable for peeling");
190
assert(MaxIterations > 0 && "no peeling is allowed?");
191
}
192
193
// This function calculates the number of iterations after which the value
194
// becomes an invariant. The pre-calculated values are memorized in a map.
195
// N.B. This number will be Unknown or <= MaxIterations.
196
// The function is calculated according to the following definition:
197
// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
198
// F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
199
// G(%y) = 0 if %y is a loop invariant
200
// G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
201
// G(%y) = TODO: if %y is an expression based on phis and loop invariants
202
// The example looks like:
203
// %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
204
// %y = phi(0, 5)
205
// %a = %y + 1
206
// G(%y) = Unknown otherwise (including phi not in header block)
207
PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
208
// If we already know the answer, take it from the map.
209
auto I = IterationsToInvariance.find(&V);
210
if (I != IterationsToInvariance.end())
211
return I->second;
212
213
// Place Unknown to map to avoid infinite recursion. Such
214
// cycles can never stop on an invariant.
215
IterationsToInvariance[&V] = Unknown;
216
217
if (L.isLoopInvariant(&V))
218
// Loop invariant so known at start.
219
return (IterationsToInvariance[&V] = 0);
220
if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
221
if (Phi->getParent() != L.getHeader()) {
222
// Phi is not in header block so Unknown.
223
assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
224
return Unknown;
225
}
226
// We need to analyze the input from the back edge and add 1.
227
Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
228
PeelCounter Iterations = calculate(*Input);
229
assert(IterationsToInvariance[Input] == Iterations &&
230
"unexpected value saved");
231
return (IterationsToInvariance[Phi] = addOne(Iterations));
232
}
233
if (const Instruction *I = dyn_cast<Instruction>(&V)) {
234
if (isa<CmpInst>(I) || I->isBinaryOp()) {
235
// Binary instructions get the max of the operands.
236
PeelCounter LHS = calculate(*I->getOperand(0));
237
if (LHS == Unknown)
238
return Unknown;
239
PeelCounter RHS = calculate(*I->getOperand(1));
240
if (RHS == Unknown)
241
return Unknown;
242
return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)});
243
}
244
if (I->isCast())
245
// Cast instructions get the value of the operand.
246
return (IterationsToInvariance[I] = calculate(*I->getOperand(0)));
247
}
248
// TODO: handle more expressions
249
250
// Everything else is Unknown.
251
assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
252
return Unknown;
253
}
254
255
std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
256
unsigned Iterations = 0;
257
for (auto &PHI : L.getHeader()->phis()) {
258
PeelCounter ToInvariance = calculate(PHI);
259
if (ToInvariance != Unknown) {
260
assert(*ToInvariance <= MaxIterations && "bad result in phi analysis");
261
Iterations = std::max(Iterations, *ToInvariance);
262
if (Iterations == MaxIterations)
263
break;
264
}
265
}
266
assert((Iterations <= MaxIterations) && "bad result in phi analysis");
267
return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
268
}
269
270
} // unnamed namespace
271
272
// Try to find any invariant memory reads that will become dereferenceable in
273
// the remainder loop after peeling. The load must also be used (transitively)
274
// by an exit condition. Returns the number of iterations to peel off (at the
275
// moment either 0 or 1).
276
static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
277
DominatorTree &DT,
278
AssumptionCache *AC) {
279
// Skip loops with a single exiting block, because there should be no benefit
280
// for the heuristic below.
281
if (L.getExitingBlock())
282
return 0;
283
284
// All non-latch exit blocks must have an UnreachableInst terminator.
285
// Otherwise the heuristic below may not be profitable.
286
SmallVector<BasicBlock *, 4> Exits;
287
L.getUniqueNonLatchExitBlocks(Exits);
288
if (any_of(Exits, [](const BasicBlock *BB) {
289
return !isa<UnreachableInst>(BB->getTerminator());
290
}))
291
return 0;
292
293
// Now look for invariant loads that dominate the latch and are not known to
294
// be dereferenceable. If there are such loads and no writes, they will become
295
// dereferenceable in the loop if the first iteration is peeled off. Also
296
// collect the set of instructions controlled by such loads. Only peel if an
297
// exit condition uses (transitively) such a load.
298
BasicBlock *Header = L.getHeader();
299
BasicBlock *Latch = L.getLoopLatch();
300
SmallPtrSet<Value *, 8> LoadUsers;
301
const DataLayout &DL = L.getHeader()->getDataLayout();
302
for (BasicBlock *BB : L.blocks()) {
303
for (Instruction &I : *BB) {
304
if (I.mayWriteToMemory())
305
return 0;
306
307
auto Iter = LoadUsers.find(&I);
308
if (Iter != LoadUsers.end()) {
309
for (Value *U : I.users())
310
LoadUsers.insert(U);
311
}
312
// Do not look for reads in the header; they can already be hoisted
313
// without peeling.
314
if (BB == Header)
315
continue;
316
if (auto *LI = dyn_cast<LoadInst>(&I)) {
317
Value *Ptr = LI->getPointerOperand();
318
if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
319
!isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT))
320
for (Value *U : I.users())
321
LoadUsers.insert(U);
322
}
323
}
324
}
325
SmallVector<BasicBlock *> ExitingBlocks;
326
L.getExitingBlocks(ExitingBlocks);
327
if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
328
return LoadUsers.contains(Exiting->getTerminator());
329
}))
330
return 1;
331
return 0;
332
}
333
334
// Return the number of iterations to peel off that make conditions in the
335
// body true/false. For example, if we peel 2 iterations off the loop below,
336
// the condition i < 2 can be evaluated at compile time.
337
// for (i = 0; i < n; i++)
338
// if (i < 2)
339
// ..
340
// else
341
// ..
342
// }
343
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
344
ScalarEvolution &SE) {
345
assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
346
unsigned DesiredPeelCount = 0;
347
348
// Do not peel the entire loop.
349
const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L);
350
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(BE))
351
MaxPeelCount =
352
std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
353
354
// Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
355
// return true if inversed condition become known before reaching the
356
// MaxPeelCount limit.
357
auto PeelWhilePredicateIsKnown =
358
[&](unsigned &PeelCount, const SCEV *&IterVal, const SCEV *BoundSCEV,
359
const SCEV *Step, ICmpInst::Predicate Pred) {
360
while (PeelCount < MaxPeelCount &&
361
SE.isKnownPredicate(Pred, IterVal, BoundSCEV)) {
362
IterVal = SE.getAddExpr(IterVal, Step);
363
++PeelCount;
364
}
365
return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
366
BoundSCEV);
367
};
368
369
const unsigned MaxDepth = 4;
370
std::function<void(Value *, unsigned)> ComputePeelCount =
371
[&](Value *Condition, unsigned Depth) -> void {
372
if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
373
return;
374
375
Value *LeftVal, *RightVal;
376
if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
377
match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
378
ComputePeelCount(LeftVal, Depth + 1);
379
ComputePeelCount(RightVal, Depth + 1);
380
return;
381
}
382
383
CmpInst::Predicate Pred;
384
if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
385
return;
386
387
const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
388
const SCEV *RightSCEV = SE.getSCEV(RightVal);
389
390
// Do not consider predicates that are known to be true or false
391
// independently of the loop iteration.
392
if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
393
return;
394
395
// Check if we have a condition with one AddRec and one non AddRec
396
// expression. Normalize LeftSCEV to be the AddRec.
397
if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
398
if (isa<SCEVAddRecExpr>(RightSCEV)) {
399
std::swap(LeftSCEV, RightSCEV);
400
Pred = ICmpInst::getSwappedPredicate(Pred);
401
} else
402
return;
403
}
404
405
const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
406
407
// Avoid huge SCEV computations in the loop below, make sure we only
408
// consider AddRecs of the loop we are trying to peel.
409
if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
410
return;
411
if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
412
!SE.getMonotonicPredicateType(LeftAR, Pred))
413
return;
414
415
// Check if extending the current DesiredPeelCount lets us evaluate Pred
416
// or !Pred in the loop body statically.
417
unsigned NewPeelCount = DesiredPeelCount;
418
419
const SCEV *IterVal = LeftAR->evaluateAtIteration(
420
SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
421
422
// If the original condition is not known, get the negated predicate
423
// (which holds on the else branch) and check if it is known. This allows
424
// us to peel of iterations that make the original condition false.
425
if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
426
Pred = ICmpInst::getInversePredicate(Pred);
427
428
const SCEV *Step = LeftAR->getStepRecurrence(SE);
429
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
430
Pred))
431
return;
432
433
// However, for equality comparisons, that isn't always sufficient to
434
// eliminate the comparsion in loop body, we may need to peel one more
435
// iteration. See if that makes !Pred become unknown again.
436
const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
437
if (ICmpInst::isEquality(Pred) &&
438
!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
439
RightSCEV) &&
440
!SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
441
SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
442
if (NewPeelCount >= MaxPeelCount)
443
return; // Need to peel one more iteration, but can't. Give up.
444
++NewPeelCount; // Great!
445
}
446
447
DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
448
};
449
450
auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
451
if (!MinMax->getType()->isIntegerTy())
452
return;
453
Value *LHS = MinMax->getLHS(), *RHS = MinMax->getRHS();
454
const SCEV *BoundSCEV, *IterSCEV;
455
if (L.isLoopInvariant(LHS)) {
456
BoundSCEV = SE.getSCEV(LHS);
457
IterSCEV = SE.getSCEV(RHS);
458
} else if (L.isLoopInvariant(RHS)) {
459
BoundSCEV = SE.getSCEV(RHS);
460
IterSCEV = SE.getSCEV(LHS);
461
} else
462
return;
463
const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IterSCEV);
464
// For simplicity, we support only affine recurrences.
465
if (!AddRec || !AddRec->isAffine() || AddRec->getLoop() != &L)
466
return;
467
const SCEV *Step = AddRec->getStepRecurrence(SE);
468
bool IsSigned = MinMax->isSigned();
469
// To minimize number of peeled iterations, we use strict relational
470
// predicates here.
471
ICmpInst::Predicate Pred;
472
if (SE.isKnownPositive(Step))
473
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
474
else if (SE.isKnownNegative(Step))
475
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
476
else
477
return;
478
// Check that AddRec is not wrapping.
479
if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
480
return;
481
unsigned NewPeelCount = DesiredPeelCount;
482
const SCEV *IterVal = AddRec->evaluateAtIteration(
483
SE.getConstant(AddRec->getType(), NewPeelCount), SE);
484
if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
485
Pred))
486
return;
487
DesiredPeelCount = NewPeelCount;
488
};
489
490
for (BasicBlock *BB : L.blocks()) {
491
for (Instruction &I : *BB) {
492
if (SelectInst *SI = dyn_cast<SelectInst>(&I))
493
ComputePeelCount(SI->getCondition(), 0);
494
if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(&I))
495
ComputePeelCountMinMax(MinMax);
496
}
497
498
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
499
if (!BI || BI->isUnconditional())
500
continue;
501
502
// Ignore loop exit condition.
503
if (L.getLoopLatch() == BB)
504
continue;
505
506
ComputePeelCount(BI->getCondition(), 0);
507
}
508
509
return DesiredPeelCount;
510
}
511
512
/// This "heuristic" exactly matches implicit behavior which used to exist
513
/// inside getLoopEstimatedTripCount. It was added here to keep an
514
/// improvement inside that API from causing peeling to become more aggressive.
515
/// This should probably be removed.
516
static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
517
BasicBlock *Latch = L->getLoopLatch();
518
if (!Latch)
519
return true;
520
521
BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
522
if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
523
return true;
524
525
assert((LatchBR->getSuccessor(0) == L->getHeader() ||
526
LatchBR->getSuccessor(1) == L->getHeader()) &&
527
"At least one edge out of the latch must go to the header");
528
529
SmallVector<BasicBlock *, 4> ExitBlocks;
530
L->getUniqueNonLatchExitBlocks(ExitBlocks);
531
return any_of(ExitBlocks, [](const BasicBlock *EB) {
532
return !EB->getTerminatingDeoptimizeCall();
533
});
534
}
535
536
537
// Return the number of iterations we want to peel off.
538
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
539
TargetTransformInfo::PeelingPreferences &PP,
540
unsigned TripCount, DominatorTree &DT,
541
ScalarEvolution &SE, AssumptionCache *AC,
542
unsigned Threshold) {
543
assert(LoopSize > 0 && "Zero loop size is not allowed!");
544
// Save the PP.PeelCount value set by the target in
545
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
546
unsigned TargetPeelCount = PP.PeelCount;
547
PP.PeelCount = 0;
548
if (!canPeel(L))
549
return;
550
551
// Only try to peel innermost loops by default.
552
// The constraint can be relaxed by the target in TTI.getPeelingPreferences
553
// or by the flag -unroll-allow-loop-nests-peeling.
554
if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
555
return;
556
557
// If the user provided a peel count, use that.
558
bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
559
if (UserPeelCount) {
560
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
561
<< " iterations.\n");
562
PP.PeelCount = UnrollForcePeelCount;
563
PP.PeelProfiledIterations = true;
564
return;
565
}
566
567
// Skip peeling if it's disabled.
568
if (!PP.AllowPeeling)
569
return;
570
571
// Check that we can peel at least one iteration.
572
if (2 * LoopSize > Threshold)
573
return;
574
575
unsigned AlreadyPeeled = 0;
576
if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
577
AlreadyPeeled = *Peeled;
578
// Stop if we already peeled off the maximum number of iterations.
579
if (AlreadyPeeled >= UnrollPeelMaxCount)
580
return;
581
582
// Pay respect to limitations implied by loop size and the max peel count.
583
unsigned MaxPeelCount = UnrollPeelMaxCount;
584
MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
585
586
// Start the max computation with the PP.PeelCount value set by the target
587
// in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
588
unsigned DesiredPeelCount = TargetPeelCount;
589
590
// Here we try to get rid of Phis which become invariants after 1, 2, ..., N
591
// iterations of the loop. For this we compute the number for iterations after
592
// which every Phi is guaranteed to become an invariant, and try to peel the
593
// maximum number of iterations among these values, thus turning all those
594
// Phis into invariants.
595
if (MaxPeelCount > DesiredPeelCount) {
596
// Check how many iterations are useful for resolving Phis
597
auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel();
598
if (NumPeels)
599
DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
600
}
601
602
DesiredPeelCount = std::max(DesiredPeelCount,
603
countToEliminateCompares(*L, MaxPeelCount, SE));
604
605
if (DesiredPeelCount == 0)
606
DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC);
607
608
if (DesiredPeelCount > 0) {
609
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
610
// Consider max peel count limitation.
611
assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
612
if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
613
LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
614
<< " iteration(s) to turn"
615
<< " some Phis into invariants.\n");
616
PP.PeelCount = DesiredPeelCount;
617
PP.PeelProfiledIterations = false;
618
return;
619
}
620
}
621
622
// Bail if we know the statically calculated trip count.
623
// In this case we rather prefer partial unrolling.
624
if (TripCount)
625
return;
626
627
// Do not apply profile base peeling if it is disabled.
628
if (!PP.PeelProfiledIterations)
629
return;
630
// If we don't know the trip count, but have reason to believe the average
631
// trip count is low, peeling should be beneficial, since we will usually
632
// hit the peeled section.
633
// We only do this in the presence of profile information, since otherwise
634
// our estimates of the trip count are not reliable enough.
635
if (L->getHeader()->getParent()->hasProfileData()) {
636
if (violatesLegacyMultiExitLoopCheck(L))
637
return;
638
std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
639
if (!EstimatedTripCount)
640
return;
641
642
LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
643
<< *EstimatedTripCount << "\n");
644
645
if (*EstimatedTripCount) {
646
if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
647
unsigned PeelCount = *EstimatedTripCount;
648
LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
649
PP.PeelCount = PeelCount;
650
return;
651
}
652
LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
653
LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
654
LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
655
LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
656
LLVM_DEBUG(dbgs() << "Max peel count by cost: "
657
<< (Threshold / LoopSize - 1) << "\n");
658
}
659
}
660
}
661
662
struct WeightInfo {
663
// Weights for current iteration.
664
SmallVector<uint32_t> Weights;
665
// Weights to subtract after each iteration.
666
const SmallVector<uint32_t> SubWeights;
667
};
668
669
/// Update the branch weights of an exiting block of a peeled-off loop
670
/// iteration.
671
/// Let F is a weight of the edge to continue (fallthrough) into the loop.
672
/// Let E is a weight of the edge to an exit.
673
/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
674
/// go to exit.
675
/// Then, Estimated ExitCount = F / E.
676
/// For I-th (counting from 0) peeled off iteration we set the weights for
677
/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
678
/// The probability to go to exit 1/(EC-I) increases. At the same time
679
/// the estimated exit count in the remainder loop reduces by I.
680
/// To avoid dealing with division rounding we can just multiple both part
681
/// of weights to E and use weight as (F - I * E, E).
682
static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
683
setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
684
for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
685
if (SubWeight != 0)
686
// Don't set the probability of taking the edge from latch to loop header
687
// to less than 1:1 ratio (meaning Weight should not be lower than
688
// SubWeight), as this could significantly reduce the loop's hotness,
689
// which would be incorrect in the case of underestimating the trip count.
690
Info.Weights[Idx] =
691
Info.Weights[Idx] > SubWeight
692
? std::max(Info.Weights[Idx] - SubWeight, SubWeight)
693
: SubWeight;
694
}
695
696
/// Initialize the weights for all exiting blocks.
697
static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
698
Loop *L) {
699
SmallVector<BasicBlock *> ExitingBlocks;
700
L->getExitingBlocks(ExitingBlocks);
701
for (BasicBlock *ExitingBlock : ExitingBlocks) {
702
Instruction *Term = ExitingBlock->getTerminator();
703
SmallVector<uint32_t> Weights;
704
if (!extractBranchWeights(*Term, Weights))
705
continue;
706
707
// See the comment on updateBranchWeights() for an explanation of what we
708
// do here.
709
uint32_t FallThroughWeights = 0;
710
uint32_t ExitWeights = 0;
711
for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
712
if (L->contains(Succ))
713
FallThroughWeights += Weight;
714
else
715
ExitWeights += Weight;
716
}
717
718
// Don't try to update weights for degenerate case.
719
if (FallThroughWeights == 0)
720
continue;
721
722
SmallVector<uint32_t> SubWeights;
723
for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
724
if (!L->contains(Succ)) {
725
// Exit weights stay the same.
726
SubWeights.push_back(0);
727
continue;
728
}
729
730
// Subtract exit weights on each iteration, distributed across all
731
// fallthrough edges.
732
double W = (double)Weight / (double)FallThroughWeights;
733
SubWeights.push_back((uint32_t)(ExitWeights * W));
734
}
735
736
WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
737
}
738
}
739
740
/// Clones the body of the loop L, putting it between \p InsertTop and \p
741
/// InsertBot.
742
/// \param IterNumber The serial number of the iteration currently being
743
/// peeled off.
744
/// \param ExitEdges The exit edges of the original loop.
745
/// \param[out] NewBlocks A list of the blocks in the newly created clone
746
/// \param[out] VMap The value map between the loop and the new clone.
747
/// \param LoopBlocks A helper for DFS-traversal of the loop.
748
/// \param LVMap A value-map that maps instructions from the original loop to
749
/// instructions in the last peeled-off iteration.
750
static void cloneLoopBlocks(
751
Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
752
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
753
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
754
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
755
LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
756
ScalarEvolution &SE) {
757
BasicBlock *Header = L->getHeader();
758
BasicBlock *Latch = L->getLoopLatch();
759
BasicBlock *PreHeader = L->getLoopPreheader();
760
761
Function *F = Header->getParent();
762
LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
763
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
764
Loop *ParentLoop = L->getParentLoop();
765
766
// For each block in the original loop, create a new copy,
767
// and update the value map with the newly created values.
768
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
769
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
770
NewBlocks.push_back(NewBB);
771
772
// If an original block is an immediate child of the loop L, its copy
773
// is a child of a ParentLoop after peeling. If a block is a child of
774
// a nested loop, it is handled in the cloneLoop() call below.
775
if (ParentLoop && LI->getLoopFor(*BB) == L)
776
ParentLoop->addBasicBlockToLoop(NewBB, *LI);
777
778
VMap[*BB] = NewBB;
779
780
// If dominator tree is available, insert nodes to represent cloned blocks.
781
if (DT) {
782
if (Header == *BB)
783
DT->addNewBlock(NewBB, InsertTop);
784
else {
785
DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
786
// VMap must contain entry for IDom, as the iteration order is RPO.
787
DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
788
}
789
}
790
}
791
792
{
793
// Identify what other metadata depends on the cloned version. After
794
// cloning, replace the metadata with the corrected version for both
795
// memory instructions and noalias intrinsics.
796
std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
797
cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
798
Header->getContext(), Ext);
799
}
800
801
// Recursively create the new Loop objects for nested loops, if any,
802
// to preserve LoopInfo.
803
for (Loop *ChildLoop : *L) {
804
cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
805
}
806
807
// Hook-up the control flow for the newly inserted blocks.
808
// The new header is hooked up directly to the "top", which is either
809
// the original loop preheader (for the first iteration) or the previous
810
// iteration's exiting block (for every other iteration)
811
InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
812
813
// Similarly, for the latch:
814
// The original exiting edge is still hooked up to the loop exit.
815
// The backedge now goes to the "bottom", which is either the loop's real
816
// header (for the last peeled iteration) or the copied header of the next
817
// iteration (for every other iteration)
818
BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
819
auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
820
for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx)
821
if (LatchTerm->getSuccessor(idx) == Header) {
822
LatchTerm->setSuccessor(idx, InsertBot);
823
break;
824
}
825
if (DT)
826
DT->changeImmediateDominator(InsertBot, NewLatch);
827
828
// The new copy of the loop body starts with a bunch of PHI nodes
829
// that pick an incoming value from either the preheader, or the previous
830
// loop iteration. Since this copy is no longer part of the loop, we
831
// resolve this statically:
832
// For the first iteration, we use the value from the preheader directly.
833
// For any other iteration, we replace the phi with the value generated by
834
// the immediately preceding clone of the loop body (which represents
835
// the previous iteration).
836
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
837
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
838
if (IterNumber == 0) {
839
VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
840
} else {
841
Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
842
Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
843
if (LatchInst && L->contains(LatchInst))
844
VMap[&*I] = LVMap[LatchInst];
845
else
846
VMap[&*I] = LatchVal;
847
}
848
NewPHI->eraseFromParent();
849
}
850
851
// Fix up the outgoing values - we need to add a value for the iteration
852
// we've just created. Note that this must happen *after* the incoming
853
// values are adjusted, since the value going out of the latch may also be
854
// a value coming into the header.
855
for (auto Edge : ExitEdges)
856
for (PHINode &PHI : Edge.second->phis()) {
857
Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
858
Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
859
if (LatchInst && L->contains(LatchInst))
860
LatchVal = VMap[LatchVal];
861
PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
862
SE.forgetLcssaPhiWithNewPredecessor(L, &PHI);
863
}
864
865
// LastValueMap is updated with the values for the current loop
866
// which are used the next time this function is called.
867
for (auto KV : VMap)
868
LVMap[KV.first] = KV.second;
869
}
870
871
TargetTransformInfo::PeelingPreferences
872
llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
873
const TargetTransformInfo &TTI,
874
std::optional<bool> UserAllowPeeling,
875
std::optional<bool> UserAllowProfileBasedPeeling,
876
bool UnrollingSpecficValues) {
877
TargetTransformInfo::PeelingPreferences PP;
878
879
// Set the default values.
880
PP.PeelCount = 0;
881
PP.AllowPeeling = true;
882
PP.AllowLoopNestsPeeling = false;
883
PP.PeelProfiledIterations = true;
884
885
// Get the target specifc values.
886
TTI.getPeelingPreferences(L, SE, PP);
887
888
// User specified values using cl::opt.
889
if (UnrollingSpecficValues) {
890
if (UnrollPeelCount.getNumOccurrences() > 0)
891
PP.PeelCount = UnrollPeelCount;
892
if (UnrollAllowPeeling.getNumOccurrences() > 0)
893
PP.AllowPeeling = UnrollAllowPeeling;
894
if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
895
PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
896
}
897
898
// User specifed values provided by argument.
899
if (UserAllowPeeling)
900
PP.AllowPeeling = *UserAllowPeeling;
901
if (UserAllowProfileBasedPeeling)
902
PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
903
904
return PP;
905
}
906
907
/// Peel off the first \p PeelCount iterations of loop \p L.
908
///
909
/// Note that this does not peel them off as a single straight-line block.
910
/// Rather, each iteration is peeled off separately, and needs to check the
911
/// exit condition.
912
/// For loops that dynamically execute \p PeelCount iterations or less
913
/// this provides a benefit, since the peeled off iterations, which account
914
/// for the bulk of dynamic execution, can be further simplified by scalar
915
/// optimizations.
916
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
917
ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC,
918
bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
919
assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
920
assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
921
922
LoopBlocksDFS LoopBlocks(L);
923
LoopBlocks.perform(LI);
924
925
BasicBlock *Header = L->getHeader();
926
BasicBlock *PreHeader = L->getLoopPreheader();
927
BasicBlock *Latch = L->getLoopLatch();
928
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
929
L->getExitEdges(ExitEdges);
930
931
// Remember dominators of blocks we might reach through exits to change them
932
// later. Immediate dominator of such block might change, because we add more
933
// routes which can lead to the exit: we can reach it from the peeled
934
// iterations too.
935
DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
936
for (auto *BB : L->blocks()) {
937
auto *BBDomNode = DT.getNode(BB);
938
SmallVector<BasicBlock *, 16> ChildrenToUpdate;
939
for (auto *ChildDomNode : BBDomNode->children()) {
940
auto *ChildBB = ChildDomNode->getBlock();
941
if (!L->contains(ChildBB))
942
ChildrenToUpdate.push_back(ChildBB);
943
}
944
// The new idom of the block will be the nearest common dominator
945
// of all copies of the previous idom. This is equivalent to the
946
// nearest common dominator of the previous idom and the first latch,
947
// which dominates all copies of the previous idom.
948
BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
949
for (auto *ChildBB : ChildrenToUpdate)
950
NonLoopBlocksIDom[ChildBB] = NewIDom;
951
}
952
953
Function *F = Header->getParent();
954
955
// Set up all the necessary basic blocks. It is convenient to split the
956
// preheader into 3 parts - two blocks to anchor the peeled copy of the loop
957
// body, and a new preheader for the "real" loop.
958
959
// Peeling the first iteration transforms.
960
//
961
// PreHeader:
962
// ...
963
// Header:
964
// LoopBody
965
// If (cond) goto Header
966
// Exit:
967
//
968
// into
969
//
970
// InsertTop:
971
// LoopBody
972
// If (!cond) goto Exit
973
// InsertBot:
974
// NewPreHeader:
975
// ...
976
// Header:
977
// LoopBody
978
// If (cond) goto Header
979
// Exit:
980
//
981
// Each following iteration will split the current bottom anchor in two,
982
// and put the new copy of the loop body between these two blocks. That is,
983
// after peeling another iteration from the example above, we'll split
984
// InsertBot, and get:
985
//
986
// InsertTop:
987
// LoopBody
988
// If (!cond) goto Exit
989
// InsertBot:
990
// LoopBody
991
// If (!cond) goto Exit
992
// InsertBot.next:
993
// NewPreHeader:
994
// ...
995
// Header:
996
// LoopBody
997
// If (cond) goto Header
998
// Exit:
999
1000
BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
1001
BasicBlock *InsertBot =
1002
SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1003
BasicBlock *NewPreHeader =
1004
SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1005
1006
InsertTop->setName(Header->getName() + ".peel.begin");
1007
InsertBot->setName(Header->getName() + ".peel.next");
1008
NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
1009
1010
Instruction *LatchTerm =
1011
cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
1012
1013
// If we have branch weight information, we'll want to update it for the
1014
// newly created branches.
1015
DenseMap<Instruction *, WeightInfo> Weights;
1016
initBranchWeights(Weights, L);
1017
1018
// Identify what noalias metadata is inside the loop: if it is inside the
1019
// loop, the associated metadata must be cloned for each iteration.
1020
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
1021
identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
1022
1023
// For each peeled-off iteration, make a copy of the loop.
1024
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
1025
SmallVector<BasicBlock *, 8> NewBlocks;
1026
ValueToValueMapTy VMap;
1027
1028
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
1029
LoopBlocks, VMap, LVMap, &DT, LI,
1030
LoopLocalNoAliasDeclScopes, *SE);
1031
1032
// Remap to use values from the current iteration instead of the
1033
// previous one.
1034
remapInstructionsInBlocks(NewBlocks, VMap);
1035
1036
// Update IDoms of the blocks reachable through exits.
1037
if (Iter == 0)
1038
for (auto BBIDom : NonLoopBlocksIDom)
1039
DT.changeImmediateDominator(BBIDom.first,
1040
cast<BasicBlock>(LVMap[BBIDom.second]));
1041
#ifdef EXPENSIVE_CHECKS
1042
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1043
#endif
1044
1045
for (auto &[Term, Info] : Weights) {
1046
auto *TermCopy = cast<Instruction>(VMap[Term]);
1047
updateBranchWeights(TermCopy, Info);
1048
}
1049
1050
// Remove Loop metadata from the latch branch instruction
1051
// because it is not the Loop's latch branch anymore.
1052
auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
1053
LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
1054
1055
InsertTop = InsertBot;
1056
InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1057
InsertBot->setName(Header->getName() + ".peel.next");
1058
1059
F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(),
1060
F->end());
1061
}
1062
1063
// Now adjust the phi nodes in the loop header to get their initial values
1064
// from the last peeled-off iteration instead of the preheader.
1065
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1066
PHINode *PHI = cast<PHINode>(I);
1067
Value *NewVal = PHI->getIncomingValueForBlock(Latch);
1068
Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
1069
if (LatchInst && L->contains(LatchInst))
1070
NewVal = LVMap[LatchInst];
1071
1072
PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
1073
}
1074
1075
for (const auto &[Term, Info] : Weights) {
1076
setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
1077
}
1078
1079
// Update Metadata for count of peeled off iterations.
1080
unsigned AlreadyPeeled = 0;
1081
if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
1082
AlreadyPeeled = *Peeled;
1083
addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
1084
1085
if (Loop *ParentLoop = L->getParentLoop())
1086
L = ParentLoop;
1087
1088
// We modified the loop, update SE.
1089
SE->forgetTopmostLoop(L);
1090
SE->forgetBlockAndLoopDispositions();
1091
1092
#ifdef EXPENSIVE_CHECKS
1093
// Finally DomtTree must be correct.
1094
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1095
#endif
1096
1097
// FIXME: Incrementally update loop-simplify
1098
simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
1099
1100
NumPeeled++;
1101
1102
return true;
1103
}
1104
1105