CoCalc -- LoopPeel.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopPeel.cpp
³⁵²⁷¹ views
1
//===- LoopPeel.cpp -------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Loop Peeling Utilities.
10
//===----------------------------------------------------------------------===//
11

12
#include "llvm/Transforms/Utils/LoopPeel.h"
13
#include "llvm/ADT/DenseMap.h"
14
#include "llvm/ADT/SmallVector.h"
15
#include "llvm/ADT/Statistic.h"
16
#include "llvm/Analysis/Loads.h"
17
#include "llvm/Analysis/LoopInfo.h"
18
#include "llvm/Analysis/LoopIterator.h"
19
#include "llvm/Analysis/ScalarEvolution.h"
20
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
21
#include "llvm/Analysis/TargetTransformInfo.h"
22
#include "llvm/IR/BasicBlock.h"
23
#include "llvm/IR/Dominators.h"
24
#include "llvm/IR/Function.h"
25
#include "llvm/IR/InstrTypes.h"
26
#include "llvm/IR/Instruction.h"
27
#include "llvm/IR/Instructions.h"
28
#include "llvm/IR/LLVMContext.h"
29
#include "llvm/IR/MDBuilder.h"
30
#include "llvm/IR/PatternMatch.h"
31
#include "llvm/IR/ProfDataUtils.h"
32
#include "llvm/Support/Casting.h"
33
#include "llvm/Support/CommandLine.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/raw_ostream.h"
36
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
37
#include "llvm/Transforms/Utils/Cloning.h"
38
#include "llvm/Transforms/Utils/LoopSimplify.h"
39
#include "llvm/Transforms/Utils/LoopUtils.h"
40
#include "llvm/Transforms/Utils/ValueMapper.h"
41
#include <algorithm>
42
#include <cassert>
43
#include <cstdint>
44
#include <optional>
45

46
using namespace llvm;
47
using namespace llvm::PatternMatch;
48

49
#define DEBUG_TYPE "loop-peel"
50

51
STATISTIC(NumPeeled, "Number of loops peeled");
52

53
static cl::opt<unsigned> UnrollPeelCount(
54
    "unroll-peel-count", cl::Hidden,
55
    cl::desc("Set the unroll peeling count, for testing purposes"));
56

57
static cl::opt<bool>
58
    UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
59
                       cl::desc("Allows loops to be peeled when the dynamic "
60
                                "trip count is known to be low."));
61

62
static cl::opt<bool>
63
    UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling",
64
                                cl::init(false), cl::Hidden,
65
                                cl::desc("Allows loop nests to be peeled."));
66

67
static cl::opt<unsigned> UnrollPeelMaxCount(
68
    "unroll-peel-max-count", cl::init(7), cl::Hidden,
69
    cl::desc("Max average trip count which will cause loop peeling."));
70

71
static cl::opt<unsigned> UnrollForcePeelCount(
72
    "unroll-force-peel-count", cl::init(0), cl::Hidden,
73
    cl::desc("Force a peel count regardless of profiling information."));
74

75
static cl::opt<bool> DisableAdvancedPeeling(
76
    "disable-advanced-peeling", cl::init(false), cl::Hidden,
77
    cl::desc(
78
        "Disable advance peeling. Issues for convergent targets (D134803)."));
79

80
static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
81

82
// Check whether we are capable of peeling this loop.
83
bool llvm::canPeel(const Loop *L) {
84
  // Make sure the loop is in simplified form
85
  if (!L->isLoopSimplifyForm())
86
    return false;
87
  if (!DisableAdvancedPeeling)
88
    return true;
89

90
  SmallVector<BasicBlock *, 4> Exits;
91
  L->getUniqueNonLatchExitBlocks(Exits);
92
  // The latch must either be the only exiting block or all non-latch exit
93
  // blocks have either a deopt or unreachable terminator or compose a chain of
94
  // blocks where the last one is either deopt or unreachable terminated. Both
95
  // deopt and unreachable terminators are a strong indication they are not
96
  // taken. Note that this is a profitability check, not a legality check. Also
97
  // note that LoopPeeling currently can only update the branch weights of latch
98
  // blocks and branch weights to blocks with deopt or unreachable do not need
99
  // updating.
100
  return llvm::all_of(Exits, IsBlockFollowedByDeoptOrUnreachable);
101
}
102

103
namespace {
104

105
// As a loop is peeled, it may be the case that Phi nodes become
106
// loop-invariant (ie, known because there is only one choice).
107
// For example, consider the following function:
108
//   void g(int);
109
//   void binary() {
110
//     int x = 0;
111
//     int y = 0;
112
//     int a = 0;
113
//     for(int i = 0; i <100000; ++i) {
114
//       g(x);
115
//       x = y;
116
//       g(a);
117
//       y = a + 1;
118
//       a = 5;
119
//     }
120
//   }
121
// Peeling 3 iterations is beneficial because the values for x, y and a
122
// become known.  The IR for this loop looks something like the following:
123
//
124
//   %i = phi i32 [ 0, %entry ], [ %inc, %if.end ]
125
//   %a = phi i32 [ 0, %entry ], [ 5, %if.end ]
126
//   %y = phi i32 [ 0, %entry ], [ %add, %if.end ]
127
//   %x = phi i32 [ 0, %entry ], [ %y, %if.end ]
128
//   ...
129
//   tail call void @_Z1gi(i32 signext %x)
130
//   tail call void @_Z1gi(i32 signext %a)
131
//   %add = add nuw nsw i32 %a, 1
132
//   %inc = add nuw nsw i32 %i, 1
133
//   %exitcond = icmp eq i32 %inc, 100000
134
//   br i1 %exitcond, label %for.cond.cleanup, label %for.body
135
//
136
// The arguments for the calls to g will become known after 3 iterations
137
// of the loop, because the phi nodes values become known after 3 iterations
138
// of the loop (ie, they are known on the 4th iteration, so peel 3 iterations).
139
// The first iteration has g(0), g(0); the second has g(0), g(5); the
140
// third has g(1), g(5) and the fourth (and all subsequent) have g(6), g(5).
141
// Now consider the phi nodes:
142
//   %a is a phi with constants so it is determined after iteration 1.
143
//   %y is a phi based on a constant and %a so it is determined on
144
//     the iteration after %a is determined, so iteration 2.
145
//   %x is a phi based on a constant and %y so it is determined on
146
//     the iteration after %y, so iteration 3.
147
//   %i is based on itself (and is an induction variable) so it is
148
//     never determined.
149
// This means that peeling off 3 iterations will result in being able to
150
// remove the phi nodes for %a, %y, and %x.  The arguments for the
151
// corresponding calls to g are determined and the code for computing
152
// x, y, and a can be removed.
153
//
154
// The PhiAnalyzer class calculates how many times a loop should be
155
// peeled based on the above analysis of the phi nodes in the loop while
156
// respecting the maximum specified.
157
class PhiAnalyzer {
158
public:
159
  PhiAnalyzer(const Loop &L, unsigned MaxIterations);
160

161
  // Calculate the sufficient minimum number of iterations of the loop to peel
162
  // such that phi instructions become determined (subject to allowable limits)
163
  std::optional<unsigned> calculateIterationsToPeel();
164

165
protected:
166
  using PeelCounter = std::optional<unsigned>;
167
  const PeelCounter Unknown = std::nullopt;
168

169
  // Add 1 respecting Unknown and return Unknown if result over MaxIterations
170
  PeelCounter addOne(PeelCounter PC) const {
171
    if (PC == Unknown)
172
      return Unknown;
173
    return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown;
174
  }
175

176
  // Calculate the number of iterations after which the given value
177
  // becomes an invariant.
178
  PeelCounter calculate(const Value &);
179

180
  const Loop &L;
181
  const unsigned MaxIterations;
182

183
  // Map of Values to number of iterations to invariance
184
  SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance;
185
};
186

187
PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations)
188
    : L(L), MaxIterations(MaxIterations) {
189
  assert(canPeel(&L) && "loop is not suitable for peeling");
190
  assert(MaxIterations > 0 && "no peeling is allowed?");
191
}
192

193
// This function calculates the number of iterations after which the value
194
// becomes an invariant. The pre-calculated values are memorized in a map.
195
// N.B. This number will be Unknown or <= MaxIterations.
196
// The function is calculated according to the following definition:
197
// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
198
//   F(%x) = G(%y) + 1 (N.B. [MaxIterations | Unknown] + 1 => Unknown)
199
//   G(%y) = 0 if %y is a loop invariant
200
//   G(%y) = G(%BackEdgeValue) if %y is a phi in the header block
201
//   G(%y) = TODO: if %y is an expression based on phis and loop invariants
202
//           The example looks like:
203
//           %x = phi(0, %a) <-- becomes invariant starting from 3rd iteration.
204
//           %y = phi(0, 5)
205
//           %a = %y + 1
206
//   G(%y) = Unknown otherwise (including phi not in header block)
207
PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
208
  // If we already know the answer, take it from the map.
209
  auto I = IterationsToInvariance.find(&V);
210
  if (I != IterationsToInvariance.end())
211
    return I->second;
212

213
  // Place Unknown to map to avoid infinite recursion. Such
214
  // cycles can never stop on an invariant.
215
  IterationsToInvariance[&V] = Unknown;
216

217
  if (L.isLoopInvariant(&V))
218
    // Loop invariant so known at start.
219
    return (IterationsToInvariance[&V] = 0);
220
  if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
221
    if (Phi->getParent() != L.getHeader()) {
222
      // Phi is not in header block so Unknown.
223
      assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
224
      return Unknown;
225
    }
226
    // We need to analyze the input from the back edge and add 1.
227
    Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch());
228
    PeelCounter Iterations = calculate(*Input);
229
    assert(IterationsToInvariance[Input] == Iterations &&
230
           "unexpected value saved");
231
    return (IterationsToInvariance[Phi] = addOne(Iterations));
232
  }
233
  if (const Instruction *I = dyn_cast<Instruction>(&V)) {
234
    if (isa<CmpInst>(I) || I->isBinaryOp()) {
235
      // Binary instructions get the max of the operands.
236
      PeelCounter LHS = calculate(*I->getOperand(0));
237
      if (LHS == Unknown)
238
        return Unknown;
239
      PeelCounter RHS = calculate(*I->getOperand(1));
240
      if (RHS == Unknown)
241
        return Unknown;
242
      return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)});
243
    }
244
    if (I->isCast())
245
      // Cast instructions get the value of the operand.
246
      return (IterationsToInvariance[I] = calculate(*I->getOperand(0)));
247
  }
248
  // TODO: handle more expressions
249

250
  // Everything else is Unknown.
251
  assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved");
252
  return Unknown;
253
}
254

255
std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() {
256
  unsigned Iterations = 0;
257
  for (auto &PHI : L.getHeader()->phis()) {
258
    PeelCounter ToInvariance = calculate(PHI);
259
    if (ToInvariance != Unknown) {
260
      assert(*ToInvariance <= MaxIterations && "bad result in phi analysis");
261
      Iterations = std::max(Iterations, *ToInvariance);
262
      if (Iterations == MaxIterations)
263
        break;
264
    }
265
  }
266
  assert((Iterations <= MaxIterations) && "bad result in phi analysis");
267
  return Iterations ? std::optional<unsigned>(Iterations) : std::nullopt;
268
}
269

270
} // unnamed namespace
271

272
// Try to find any invariant memory reads that will become dereferenceable in
273
// the remainder loop after peeling. The load must also be used (transitively)
274
// by an exit condition. Returns the number of iterations to peel off (at the
275
// moment either 0 or 1).
276
static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
277
                                                      DominatorTree &DT,
278
                                                      AssumptionCache *AC) {
279
  // Skip loops with a single exiting block, because there should be no benefit
280
  // for the heuristic below.
281
  if (L.getExitingBlock())
282
    return 0;
283

284
  // All non-latch exit blocks must have an UnreachableInst terminator.
285
  // Otherwise the heuristic below may not be profitable.
286
  SmallVector<BasicBlock *, 4> Exits;
287
  L.getUniqueNonLatchExitBlocks(Exits);
288
  if (any_of(Exits, [](const BasicBlock *BB) {
289
        return !isa<UnreachableInst>(BB->getTerminator());
290
      }))
291
    return 0;
292

293
  // Now look for invariant loads that dominate the latch and are not known to
294
  // be dereferenceable. If there are such loads and no writes, they will become
295
  // dereferenceable in the loop if the first iteration is peeled off. Also
296
  // collect the set of instructions controlled by such loads. Only peel if an
297
  // exit condition uses (transitively) such a load.
298
  BasicBlock *Header = L.getHeader();
299
  BasicBlock *Latch = L.getLoopLatch();
300
  SmallPtrSet<Value *, 8> LoadUsers;
301
  const DataLayout &DL = L.getHeader()->getDataLayout();
302
  for (BasicBlock *BB : L.blocks()) {
303
    for (Instruction &I : *BB) {
304
      if (I.mayWriteToMemory())
305
        return 0;
306

307
      auto Iter = LoadUsers.find(&I);
308
      if (Iter != LoadUsers.end()) {
309
        for (Value *U : I.users())
310
          LoadUsers.insert(U);
311
      }
312
      // Do not look for reads in the header; they can already be hoisted
313
      // without peeling.
314
      if (BB == Header)
315
        continue;
316
      if (auto *LI = dyn_cast<LoadInst>(&I)) {
317
        Value *Ptr = LI->getPointerOperand();
318
        if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
319
            !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, AC, &DT))
320
          for (Value *U : I.users())
321
            LoadUsers.insert(U);
322
      }
323
    }
324
  }
325
  SmallVector<BasicBlock *> ExitingBlocks;
326
  L.getExitingBlocks(ExitingBlocks);
327
  if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
328
        return LoadUsers.contains(Exiting->getTerminator());
329
      }))
330
    return 1;
331
  return 0;
332
}
333

334
// Return the number of iterations to peel off that make conditions in the
335
// body true/false. For example, if we peel 2 iterations off the loop below,
336
// the condition i < 2 can be evaluated at compile time.
337
//  for (i = 0; i < n; i++)
338
//    if (i < 2)
339
//      ..
340
//    else
341
//      ..
342
//   }
343
static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
344
                                         ScalarEvolution &SE) {
345
  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
346
  unsigned DesiredPeelCount = 0;
347

348
  // Do not peel the entire loop.
349
  const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L);
350
  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(BE))
351
    MaxPeelCount =
352
        std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
353

354
  // Increase PeelCount while (IterVal Pred BoundSCEV) condition is satisfied;
355
  // return true if inversed condition become known before reaching the
356
  // MaxPeelCount limit.
357
  auto PeelWhilePredicateIsKnown =
358
      [&](unsigned &PeelCount, const SCEV *&IterVal, const SCEV *BoundSCEV,
359
          const SCEV *Step, ICmpInst::Predicate Pred) {
360
        while (PeelCount < MaxPeelCount &&
361
               SE.isKnownPredicate(Pred, IterVal, BoundSCEV)) {
362
          IterVal = SE.getAddExpr(IterVal, Step);
363
          ++PeelCount;
364
        }
365
        return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
366
                                   BoundSCEV);
367
      };
368

369
  const unsigned MaxDepth = 4;
370
  std::function<void(Value *, unsigned)> ComputePeelCount =
371
      [&](Value *Condition, unsigned Depth) -> void {
372
    if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
373
      return;
374

375
    Value *LeftVal, *RightVal;
376
    if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
377
        match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
378
      ComputePeelCount(LeftVal, Depth + 1);
379
      ComputePeelCount(RightVal, Depth + 1);
380
      return;
381
    }
382

383
    CmpInst::Predicate Pred;
384
    if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
385
      return;
386

387
    const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
388
    const SCEV *RightSCEV = SE.getSCEV(RightVal);
389

390
    // Do not consider predicates that are known to be true or false
391
    // independently of the loop iteration.
392
    if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
393
      return;
394

395
    // Check if we have a condition with one AddRec and one non AddRec
396
    // expression. Normalize LeftSCEV to be the AddRec.
397
    if (!isa<SCEVAddRecExpr>(LeftSCEV)) {
398
      if (isa<SCEVAddRecExpr>(RightSCEV)) {
399
        std::swap(LeftSCEV, RightSCEV);
400
        Pred = ICmpInst::getSwappedPredicate(Pred);
401
      } else
402
        return;
403
    }
404

405
    const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
406

407
    // Avoid huge SCEV computations in the loop below, make sure we only
408
    // consider AddRecs of the loop we are trying to peel.
409
    if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
410
      return;
411
    if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
412
        !SE.getMonotonicPredicateType(LeftAR, Pred))
413
      return;
414

415
    // Check if extending the current DesiredPeelCount lets us evaluate Pred
416
    // or !Pred in the loop body statically.
417
    unsigned NewPeelCount = DesiredPeelCount;
418

419
    const SCEV *IterVal = LeftAR->evaluateAtIteration(
420
        SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
421

422
    // If the original condition is not known, get the negated predicate
423
    // (which holds on the else branch) and check if it is known. This allows
424
    // us to peel of iterations that make the original condition false.
425
    if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV))
426
      Pred = ICmpInst::getInversePredicate(Pred);
427

428
    const SCEV *Step = LeftAR->getStepRecurrence(SE);
429
    if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
430
                                   Pred))
431
      return;
432

433
    // However, for equality comparisons, that isn't always sufficient to
434
    // eliminate the comparsion in loop body, we may need to peel one more
435
    // iteration. See if that makes !Pred become unknown again.
436
    const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step);
437
    if (ICmpInst::isEquality(Pred) &&
438
        !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal,
439
                             RightSCEV) &&
440
        !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
441
        SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
442
      if (NewPeelCount >= MaxPeelCount)
443
        return; // Need to peel one more iteration, but can't. Give up.
444
      ++NewPeelCount; // Great!
445
    }
446

447
    DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
448
  };
449

450
  auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
451
    if (!MinMax->getType()->isIntegerTy())
452
      return;
453
    Value *LHS = MinMax->getLHS(), *RHS = MinMax->getRHS();
454
    const SCEV *BoundSCEV, *IterSCEV;
455
    if (L.isLoopInvariant(LHS)) {
456
      BoundSCEV = SE.getSCEV(LHS);
457
      IterSCEV = SE.getSCEV(RHS);
458
    } else if (L.isLoopInvariant(RHS)) {
459
      BoundSCEV = SE.getSCEV(RHS);
460
      IterSCEV = SE.getSCEV(LHS);
461
    } else
462
      return;
463
    const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IterSCEV);
464
    // For simplicity, we support only affine recurrences.
465
    if (!AddRec || !AddRec->isAffine() || AddRec->getLoop() != &L)
466
      return;
467
    const SCEV *Step = AddRec->getStepRecurrence(SE);
468
    bool IsSigned = MinMax->isSigned();
469
    // To minimize number of peeled iterations, we use strict relational
470
    // predicates here.
471
    ICmpInst::Predicate Pred;
472
    if (SE.isKnownPositive(Step))
473
      Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
474
    else if (SE.isKnownNegative(Step))
475
      Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
476
    else
477
      return;
478
    // Check that AddRec is not wrapping.
479
    if (!(IsSigned ? AddRec->hasNoSignedWrap() : AddRec->hasNoUnsignedWrap()))
480
      return;
481
    unsigned NewPeelCount = DesiredPeelCount;
482
    const SCEV *IterVal = AddRec->evaluateAtIteration(
483
        SE.getConstant(AddRec->getType(), NewPeelCount), SE);
484
    if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, BoundSCEV, Step,
485
                                   Pred))
486
      return;
487
    DesiredPeelCount = NewPeelCount;
488
  };
489

490
  for (BasicBlock *BB : L.blocks()) {
491
    for (Instruction &I : *BB) {
492
      if (SelectInst *SI = dyn_cast<SelectInst>(&I))
493
        ComputePeelCount(SI->getCondition(), 0);
494
      if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(&I))
495
        ComputePeelCountMinMax(MinMax);
496
    }
497

498
    auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
499
    if (!BI || BI->isUnconditional())
500
      continue;
501

502
    // Ignore loop exit condition.
503
    if (L.getLoopLatch() == BB)
504
      continue;
505

506
    ComputePeelCount(BI->getCondition(), 0);
507
  }
508

509
  return DesiredPeelCount;
510
}
511

512
/// This "heuristic" exactly matches implicit behavior which used to exist
513
/// inside getLoopEstimatedTripCount.  It was added here to keep an
514
/// improvement inside that API from causing peeling to become more aggressive.
515
/// This should probably be removed.
516
static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
517
  BasicBlock *Latch = L->getLoopLatch();
518
  if (!Latch)
519
    return true;
520

521
  BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
522
  if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
523
    return true;
524

525
  assert((LatchBR->getSuccessor(0) == L->getHeader() ||
526
          LatchBR->getSuccessor(1) == L->getHeader()) &&
527
         "At least one edge out of the latch must go to the header");
528

529
  SmallVector<BasicBlock *, 4> ExitBlocks;
530
  L->getUniqueNonLatchExitBlocks(ExitBlocks);
531
  return any_of(ExitBlocks, [](const BasicBlock *EB) {
532
      return !EB->getTerminatingDeoptimizeCall();
533
    });
534
}
535

536

537
// Return the number of iterations we want to peel off.
538
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
539
                            TargetTransformInfo::PeelingPreferences &PP,
540
                            unsigned TripCount, DominatorTree &DT,
541
                            ScalarEvolution &SE, AssumptionCache *AC,
542
                            unsigned Threshold) {
543
  assert(LoopSize > 0 && "Zero loop size is not allowed!");
544
  // Save the PP.PeelCount value set by the target in
545
  // TTI.getPeelingPreferences or by the flag -unroll-peel-count.
546
  unsigned TargetPeelCount = PP.PeelCount;
547
  PP.PeelCount = 0;
548
  if (!canPeel(L))
549
    return;
550

551
  // Only try to peel innermost loops by default.
552
  // The constraint can be relaxed by the target in TTI.getPeelingPreferences
553
  // or by the flag -unroll-allow-loop-nests-peeling.
554
  if (!PP.AllowLoopNestsPeeling && !L->isInnermost())
555
    return;
556

557
  // If the user provided a peel count, use that.
558
  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
559
  if (UserPeelCount) {
560
    LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
561
                      << " iterations.\n");
562
    PP.PeelCount = UnrollForcePeelCount;
563
    PP.PeelProfiledIterations = true;
564
    return;
565
  }
566

567
  // Skip peeling if it's disabled.
568
  if (!PP.AllowPeeling)
569
    return;
570

571
  // Check that we can peel at least one iteration.
572
  if (2 * LoopSize > Threshold)
573
    return;
574

575
  unsigned AlreadyPeeled = 0;
576
  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
577
    AlreadyPeeled = *Peeled;
578
  // Stop if we already peeled off the maximum number of iterations.
579
  if (AlreadyPeeled >= UnrollPeelMaxCount)
580
    return;
581

582
  // Pay respect to limitations implied by loop size and the max peel count.
583
  unsigned MaxPeelCount = UnrollPeelMaxCount;
584
  MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1);
585

586
  // Start the max computation with the PP.PeelCount value set by the target
587
  // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
588
  unsigned DesiredPeelCount = TargetPeelCount;
589

590
  // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
591
  // iterations of the loop. For this we compute the number for iterations after
592
  // which every Phi is guaranteed to become an invariant, and try to peel the
593
  // maximum number of iterations among these values, thus turning all those
594
  // Phis into invariants.
595
  if (MaxPeelCount > DesiredPeelCount) {
596
    // Check how many iterations are useful for resolving Phis
597
    auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel();
598
    if (NumPeels)
599
      DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
600
  }
601

602
  DesiredPeelCount = std::max(DesiredPeelCount,
603
                              countToEliminateCompares(*L, MaxPeelCount, SE));
604

605
  if (DesiredPeelCount == 0)
606
    DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC);
607

608
  if (DesiredPeelCount > 0) {
609
    DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
610
    // Consider max peel count limitation.
611
    assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
612
    if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
613
      LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
614
                        << " iteration(s) to turn"
615
                        << " some Phis into invariants.\n");
616
      PP.PeelCount = DesiredPeelCount;
617
      PP.PeelProfiledIterations = false;
618
      return;
619
    }
620
  }
621

622
  // Bail if we know the statically calculated trip count.
623
  // In this case we rather prefer partial unrolling.
624
  if (TripCount)
625
    return;
626

627
  // Do not apply profile base peeling if it is disabled.
628
  if (!PP.PeelProfiledIterations)
629
    return;
630
  // If we don't know the trip count, but have reason to believe the average
631
  // trip count is low, peeling should be beneficial, since we will usually
632
  // hit the peeled section.
633
  // We only do this in the presence of profile information, since otherwise
634
  // our estimates of the trip count are not reliable enough.
635
  if (L->getHeader()->getParent()->hasProfileData()) {
636
    if (violatesLegacyMultiExitLoopCheck(L))
637
      return;
638
    std::optional<unsigned> EstimatedTripCount = getLoopEstimatedTripCount(L);
639
    if (!EstimatedTripCount)
640
      return;
641

642
    LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is "
643
                      << *EstimatedTripCount << "\n");
644

645
    if (*EstimatedTripCount) {
646
      if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) {
647
        unsigned PeelCount = *EstimatedTripCount;
648
        LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n");
649
        PP.PeelCount = PeelCount;
650
        return;
651
      }
652
      LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
653
      LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
654
      LLVM_DEBUG(dbgs() << "Loop cost: " << LoopSize << "\n");
655
      LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n");
656
      LLVM_DEBUG(dbgs() << "Max peel count by cost: "
657
                        << (Threshold / LoopSize - 1) << "\n");
658
    }
659
  }
660
}
661

662
struct WeightInfo {
663
  // Weights for current iteration.
664
  SmallVector<uint32_t> Weights;
665
  // Weights to subtract after each iteration.
666
  const SmallVector<uint32_t> SubWeights;
667
};
668

669
/// Update the branch weights of an exiting block of a peeled-off loop
670
/// iteration.
671
/// Let F is a weight of the edge to continue (fallthrough) into the loop.
672
/// Let E is a weight of the edge to an exit.
673
/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
674
/// go to exit.
675
/// Then, Estimated ExitCount = F / E.
676
/// For I-th (counting from 0) peeled off iteration we set the weights for
677
/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
678
/// The probability to go to exit 1/(EC-I) increases. At the same time
679
/// the estimated exit count in the remainder loop reduces by I.
680
/// To avoid dealing with division rounding we can just multiple both part
681
/// of weights to E and use weight as (F - I * E, E).
682
static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
683
  setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
684
  for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
685
    if (SubWeight != 0)
686
      // Don't set the probability of taking the edge from latch to loop header
687
      // to less than 1:1 ratio (meaning Weight should not be lower than
688
      // SubWeight), as this could significantly reduce the loop's hotness,
689
      // which would be incorrect in the case of underestimating the trip count.
690
      Info.Weights[Idx] =
691
          Info.Weights[Idx] > SubWeight
692
              ? std::max(Info.Weights[Idx] - SubWeight, SubWeight)
693
              : SubWeight;
694
}
695

696
/// Initialize the weights for all exiting blocks.
697
static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
698
                              Loop *L) {
699
  SmallVector<BasicBlock *> ExitingBlocks;
700
  L->getExitingBlocks(ExitingBlocks);
701
  for (BasicBlock *ExitingBlock : ExitingBlocks) {
702
    Instruction *Term = ExitingBlock->getTerminator();
703
    SmallVector<uint32_t> Weights;
704
    if (!extractBranchWeights(*Term, Weights))
705
      continue;
706

707
    // See the comment on updateBranchWeights() for an explanation of what we
708
    // do here.
709
    uint32_t FallThroughWeights = 0;
710
    uint32_t ExitWeights = 0;
711
    for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
712
      if (L->contains(Succ))
713
        FallThroughWeights += Weight;
714
      else
715
        ExitWeights += Weight;
716
    }
717

718
    // Don't try to update weights for degenerate case.
719
    if (FallThroughWeights == 0)
720
      continue;
721

722
    SmallVector<uint32_t> SubWeights;
723
    for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
724
      if (!L->contains(Succ)) {
725
        // Exit weights stay the same.
726
        SubWeights.push_back(0);
727
        continue;
728
      }
729

730
      // Subtract exit weights on each iteration, distributed across all
731
      // fallthrough edges.
732
      double W = (double)Weight / (double)FallThroughWeights;
733
      SubWeights.push_back((uint32_t)(ExitWeights * W));
734
    }
735

736
    WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
737
  }
738
}
739

740
/// Clones the body of the loop L, putting it between \p InsertTop and \p
741
/// InsertBot.
742
/// \param IterNumber The serial number of the iteration currently being
743
/// peeled off.
744
/// \param ExitEdges The exit edges of the original loop.
745
/// \param[out] NewBlocks A list of the blocks in the newly created clone
746
/// \param[out] VMap The value map between the loop and the new clone.
747
/// \param LoopBlocks A helper for DFS-traversal of the loop.
748
/// \param LVMap A value-map that maps instructions from the original loop to
749
/// instructions in the last peeled-off iteration.
750
static void cloneLoopBlocks(
751
    Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
752
    SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
753
    SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
754
    ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
755
    LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes,
756
    ScalarEvolution &SE) {
757
  BasicBlock *Header = L->getHeader();
758
  BasicBlock *Latch = L->getLoopLatch();
759
  BasicBlock *PreHeader = L->getLoopPreheader();
760

761
  Function *F = Header->getParent();
762
  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
763
  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
764
  Loop *ParentLoop = L->getParentLoop();
765

766
  // For each block in the original loop, create a new copy,
767
  // and update the value map with the newly created values.
768
  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
769
    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
770
    NewBlocks.push_back(NewBB);
771

772
    // If an original block is an immediate child of the loop L, its copy
773
    // is a child of a ParentLoop after peeling. If a block is a child of
774
    // a nested loop, it is handled in the cloneLoop() call below.
775
    if (ParentLoop && LI->getLoopFor(*BB) == L)
776
      ParentLoop->addBasicBlockToLoop(NewBB, *LI);
777

778
    VMap[*BB] = NewBB;
779

780
    // If dominator tree is available, insert nodes to represent cloned blocks.
781
    if (DT) {
782
      if (Header == *BB)
783
        DT->addNewBlock(NewBB, InsertTop);
784
      else {
785
        DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
786
        // VMap must contain entry for IDom, as the iteration order is RPO.
787
        DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
788
      }
789
    }
790
  }
791

792
  {
793
    // Identify what other metadata depends on the cloned version. After
794
    // cloning, replace the metadata with the corrected version for both
795
    // memory instructions and noalias intrinsics.
796
    std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
797
    cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
798
                               Header->getContext(), Ext);
799
  }
800

801
  // Recursively create the new Loop objects for nested loops, if any,
802
  // to preserve LoopInfo.
803
  for (Loop *ChildLoop : *L) {
804
    cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
805
  }
806

807
  // Hook-up the control flow for the newly inserted blocks.
808
  // The new header is hooked up directly to the "top", which is either
809
  // the original loop preheader (for the first iteration) or the previous
810
  // iteration's exiting block (for every other iteration)
811
  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
812

813
  // Similarly, for the latch:
814
  // The original exiting edge is still hooked up to the loop exit.
815
  // The backedge now goes to the "bottom", which is either the loop's real
816
  // header (for the last peeled iteration) or the copied header of the next
817
  // iteration (for every other iteration)
818
  BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
819
  auto *LatchTerm = cast<Instruction>(NewLatch->getTerminator());
820
  for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx)
821
    if (LatchTerm->getSuccessor(idx) == Header) {
822
      LatchTerm->setSuccessor(idx, InsertBot);
823
      break;
824
    }
825
  if (DT)
826
    DT->changeImmediateDominator(InsertBot, NewLatch);
827

828
  // The new copy of the loop body starts with a bunch of PHI nodes
829
  // that pick an incoming value from either the preheader, or the previous
830
  // loop iteration. Since this copy is no longer part of the loop, we
831
  // resolve this statically:
832
  // For the first iteration, we use the value from the preheader directly.
833
  // For any other iteration, we replace the phi with the value generated by
834
  // the immediately preceding clone of the loop body (which represents
835
  // the previous iteration).
836
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
837
    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
838
    if (IterNumber == 0) {
839
      VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
840
    } else {
841
      Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
842
      Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
843
      if (LatchInst && L->contains(LatchInst))
844
        VMap[&*I] = LVMap[LatchInst];
845
      else
846
        VMap[&*I] = LatchVal;
847
    }
848
    NewPHI->eraseFromParent();
849
  }
850

851
  // Fix up the outgoing values - we need to add a value for the iteration
852
  // we've just created. Note that this must happen *after* the incoming
853
  // values are adjusted, since the value going out of the latch may also be
854
  // a value coming into the header.
855
  for (auto Edge : ExitEdges)
856
    for (PHINode &PHI : Edge.second->phis()) {
857
      Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
858
      Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
859
      if (LatchInst && L->contains(LatchInst))
860
        LatchVal = VMap[LatchVal];
861
      PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
862
      SE.forgetLcssaPhiWithNewPredecessor(L, &PHI);
863
    }
864

865
  // LastValueMap is updated with the values for the current loop
866
  // which are used the next time this function is called.
867
  for (auto KV : VMap)
868
    LVMap[KV.first] = KV.second;
869
}
870

871
TargetTransformInfo::PeelingPreferences
872
llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
873
                               const TargetTransformInfo &TTI,
874
                               std::optional<bool> UserAllowPeeling,
875
                               std::optional<bool> UserAllowProfileBasedPeeling,
876
                               bool UnrollingSpecficValues) {
877
  TargetTransformInfo::PeelingPreferences PP;
878

879
  // Set the default values.
880
  PP.PeelCount = 0;
881
  PP.AllowPeeling = true;
882
  PP.AllowLoopNestsPeeling = false;
883
  PP.PeelProfiledIterations = true;
884

885
  // Get the target specifc values.
886
  TTI.getPeelingPreferences(L, SE, PP);
887

888
  // User specified values using cl::opt.
889
  if (UnrollingSpecficValues) {
890
    if (UnrollPeelCount.getNumOccurrences() > 0)
891
      PP.PeelCount = UnrollPeelCount;
892
    if (UnrollAllowPeeling.getNumOccurrences() > 0)
893
      PP.AllowPeeling = UnrollAllowPeeling;
894
    if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
895
      PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
896
  }
897

898
  // User specifed values provided by argument.
899
  if (UserAllowPeeling)
900
    PP.AllowPeeling = *UserAllowPeeling;
901
  if (UserAllowProfileBasedPeeling)
902
    PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling;
903

904
  return PP;
905
}
906

907
/// Peel off the first \p PeelCount iterations of loop \p L.
908
///
909
/// Note that this does not peel them off as a single straight-line block.
910
/// Rather, each iteration is peeled off separately, and needs to check the
911
/// exit condition.
912
/// For loops that dynamically execute \p PeelCount iterations or less
913
/// this provides a benefit, since the peeled off iterations, which account
914
/// for the bulk of dynamic execution, can be further simplified by scalar
915
/// optimizations.
916
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
917
                    ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC,
918
                    bool PreserveLCSSA, ValueToValueMapTy &LVMap) {
919
  assert(PeelCount > 0 && "Attempt to peel out zero iterations?");
920
  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");
921

922
  LoopBlocksDFS LoopBlocks(L);
923
  LoopBlocks.perform(LI);
924

925
  BasicBlock *Header = L->getHeader();
926
  BasicBlock *PreHeader = L->getLoopPreheader();
927
  BasicBlock *Latch = L->getLoopLatch();
928
  SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
929
  L->getExitEdges(ExitEdges);
930

931
  // Remember dominators of blocks we might reach through exits to change them
932
  // later. Immediate dominator of such block might change, because we add more
933
  // routes which can lead to the exit: we can reach it from the peeled
934
  // iterations too.
935
  DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
936
  for (auto *BB : L->blocks()) {
937
    auto *BBDomNode = DT.getNode(BB);
938
    SmallVector<BasicBlock *, 16> ChildrenToUpdate;
939
    for (auto *ChildDomNode : BBDomNode->children()) {
940
      auto *ChildBB = ChildDomNode->getBlock();
941
      if (!L->contains(ChildBB))
942
        ChildrenToUpdate.push_back(ChildBB);
943
    }
944
    // The new idom of the block will be the nearest common dominator
945
    // of all copies of the previous idom. This is equivalent to the
946
    // nearest common dominator of the previous idom and the first latch,
947
    // which dominates all copies of the previous idom.
948
    BasicBlock *NewIDom = DT.findNearestCommonDominator(BB, Latch);
949
    for (auto *ChildBB : ChildrenToUpdate)
950
      NonLoopBlocksIDom[ChildBB] = NewIDom;
951
  }
952

953
  Function *F = Header->getParent();
954

955
  // Set up all the necessary basic blocks. It is convenient to split the
956
  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
957
  // body, and a new preheader for the "real" loop.
958

959
  // Peeling the first iteration transforms.
960
  //
961
  // PreHeader:
962
  // ...
963
  // Header:
964
  //   LoopBody
965
  //   If (cond) goto Header
966
  // Exit:
967
  //
968
  // into
969
  //
970
  // InsertTop:
971
  //   LoopBody
972
  //   If (!cond) goto Exit
973
  // InsertBot:
974
  // NewPreHeader:
975
  // ...
976
  // Header:
977
  //  LoopBody
978
  //  If (cond) goto Header
979
  // Exit:
980
  //
981
  // Each following iteration will split the current bottom anchor in two,
982
  // and put the new copy of the loop body between these two blocks. That is,
983
  // after peeling another iteration from the example above, we'll split
984
  // InsertBot, and get:
985
  //
986
  // InsertTop:
987
  //   LoopBody
988
  //   If (!cond) goto Exit
989
  // InsertBot:
990
  //   LoopBody
991
  //   If (!cond) goto Exit
992
  // InsertBot.next:
993
  // NewPreHeader:
994
  // ...
995
  // Header:
996
  //  LoopBody
997
  //  If (cond) goto Header
998
  // Exit:
999

1000
  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI);
1001
  BasicBlock *InsertBot =
1002
      SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI);
1003
  BasicBlock *NewPreHeader =
1004
      SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1005

1006
  InsertTop->setName(Header->getName() + ".peel.begin");
1007
  InsertBot->setName(Header->getName() + ".peel.next");
1008
  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
1009

1010
  Instruction *LatchTerm =
1011
      cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());
1012

1013
  // If we have branch weight information, we'll want to update it for the
1014
  // newly created branches.
1015
  DenseMap<Instruction *, WeightInfo> Weights;
1016
  initBranchWeights(Weights, L);
1017

1018
  // Identify what noalias metadata is inside the loop: if it is inside the
1019
  // loop, the associated metadata must be cloned for each iteration.
1020
  SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
1021
  identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
1022

1023
  // For each peeled-off iteration, make a copy of the loop.
1024
  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
1025
    SmallVector<BasicBlock *, 8> NewBlocks;
1026
    ValueToValueMapTy VMap;
1027

1028
    cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
1029
                    LoopBlocks, VMap, LVMap, &DT, LI,
1030
                    LoopLocalNoAliasDeclScopes, *SE);
1031

1032
    // Remap to use values from the current iteration instead of the
1033
    // previous one.
1034
    remapInstructionsInBlocks(NewBlocks, VMap);
1035

1036
    // Update IDoms of the blocks reachable through exits.
1037
    if (Iter == 0)
1038
      for (auto BBIDom : NonLoopBlocksIDom)
1039
        DT.changeImmediateDominator(BBIDom.first,
1040
                                     cast<BasicBlock>(LVMap[BBIDom.second]));
1041
#ifdef EXPENSIVE_CHECKS
1042
    assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1043
#endif
1044

1045
    for (auto &[Term, Info] : Weights) {
1046
      auto *TermCopy = cast<Instruction>(VMap[Term]);
1047
      updateBranchWeights(TermCopy, Info);
1048
    }
1049

1050
    // Remove Loop metadata from the latch branch instruction
1051
    // because it is not the Loop's latch branch anymore.
1052
    auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
1053
    LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr);
1054

1055
    InsertTop = InsertBot;
1056
    InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI);
1057
    InsertBot->setName(Header->getName() + ".peel.next");
1058

1059
    F->splice(InsertTop->getIterator(), F, NewBlocks[0]->getIterator(),
1060
              F->end());
1061
  }
1062

1063
  // Now adjust the phi nodes in the loop header to get their initial values
1064
  // from the last peeled-off iteration instead of the preheader.
1065
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
1066
    PHINode *PHI = cast<PHINode>(I);
1067
    Value *NewVal = PHI->getIncomingValueForBlock(Latch);
1068
    Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
1069
    if (LatchInst && L->contains(LatchInst))
1070
      NewVal = LVMap[LatchInst];
1071

1072
    PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
1073
  }
1074

1075
  for (const auto &[Term, Info] : Weights) {
1076
    setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
1077
  }
1078

1079
  // Update Metadata for count of peeled off iterations.
1080
  unsigned AlreadyPeeled = 0;
1081
  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
1082
    AlreadyPeeled = *Peeled;
1083
  addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
1084

1085
  if (Loop *ParentLoop = L->getParentLoop())
1086
    L = ParentLoop;
1087

1088
  // We modified the loop, update SE.
1089
  SE->forgetTopmostLoop(L);
1090
  SE->forgetBlockAndLoopDispositions();
1091

1092
#ifdef EXPENSIVE_CHECKS
1093
  // Finally DomtTree must be correct.
1094
  assert(DT.verify(DominatorTree::VerificationLevel::Fast));
1095
#endif
1096

1097
  // FIXME: Incrementally update loop-simplify
1098
  simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA);
1099

1100
  NumPeeled++;
1101

1102
  return true;
1103
}
1104

1105
Product

Resources

Company