CoCalc -- IndVarSimplify.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
³⁵²⁶⁹ views
1
//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This transformation analyzes and transforms the induction variables (and
10
// computations derived from them) into simpler forms suitable for subsequent
11
// analysis and transformation.
12
//
13
// If the trip count of a loop is computable, this pass also makes the following
14
// changes:
15
//   1. The exit condition for the loop is canonicalized to compare the
16
//      induction value against the exit value.  This turns loops like:
17
//        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18
//   2. Any use outside of the loop of an expression derived from the indvar
19
//      is changed to compute the derived value outside of the loop, eliminating
20
//      the dependence on the exit value of the induction variable.  If the only
21
//      purpose of the loop is to compute the exit value of some derived
22
//      expression, this transformation will make the loop dead.
23
//
24
//===----------------------------------------------------------------------===//
25

26
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27
#include "llvm/ADT/APFloat.h"
28
#include "llvm/ADT/ArrayRef.h"
29
#include "llvm/ADT/STLExtras.h"
30
#include "llvm/ADT/SmallPtrSet.h"
31
#include "llvm/ADT/SmallSet.h"
32
#include "llvm/ADT/SmallVector.h"
33
#include "llvm/ADT/Statistic.h"
34
#include "llvm/ADT/iterator_range.h"
35
#include "llvm/Analysis/LoopInfo.h"
36
#include "llvm/Analysis/LoopPass.h"
37
#include "llvm/Analysis/MemorySSA.h"
38
#include "llvm/Analysis/MemorySSAUpdater.h"
39
#include "llvm/Analysis/ScalarEvolution.h"
40
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41
#include "llvm/Analysis/TargetLibraryInfo.h"
42
#include "llvm/Analysis/TargetTransformInfo.h"
43
#include "llvm/Analysis/ValueTracking.h"
44
#include "llvm/IR/BasicBlock.h"
45
#include "llvm/IR/Constant.h"
46
#include "llvm/IR/ConstantRange.h"
47
#include "llvm/IR/Constants.h"
48
#include "llvm/IR/DataLayout.h"
49
#include "llvm/IR/DerivedTypes.h"
50
#include "llvm/IR/Dominators.h"
51
#include "llvm/IR/Function.h"
52
#include "llvm/IR/IRBuilder.h"
53
#include "llvm/IR/InstrTypes.h"
54
#include "llvm/IR/Instruction.h"
55
#include "llvm/IR/Instructions.h"
56
#include "llvm/IR/IntrinsicInst.h"
57
#include "llvm/IR/Intrinsics.h"
58
#include "llvm/IR/Module.h"
59
#include "llvm/IR/Operator.h"
60
#include "llvm/IR/PassManager.h"
61
#include "llvm/IR/PatternMatch.h"
62
#include "llvm/IR/Type.h"
63
#include "llvm/IR/Use.h"
64
#include "llvm/IR/User.h"
65
#include "llvm/IR/Value.h"
66
#include "llvm/IR/ValueHandle.h"
67
#include "llvm/Support/Casting.h"
68
#include "llvm/Support/CommandLine.h"
69
#include "llvm/Support/Compiler.h"
70
#include "llvm/Support/Debug.h"
71
#include "llvm/Support/MathExtras.h"
72
#include "llvm/Support/raw_ostream.h"
73
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
74
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
75
#include "llvm/Transforms/Utils/Local.h"
76
#include "llvm/Transforms/Utils/LoopUtils.h"
77
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
78
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
79
#include <cassert>
80
#include <cstdint>
81
#include <utility>
82

83
using namespace llvm;
84
using namespace PatternMatch;
85

86
#define DEBUG_TYPE "indvars"
87

88
STATISTIC(NumWidened     , "Number of indvars widened");
89
STATISTIC(NumReplaced    , "Number of exit values replaced");
90
STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
91
STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
92
STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
93

94
static cl::opt<ReplaceExitVal> ReplaceExitValue(
95
    "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
96
    cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
97
    cl::values(
98
        clEnumValN(NeverRepl, "never", "never replace exit value"),
99
        clEnumValN(OnlyCheapRepl, "cheap",
100
                   "only replace exit value when the cost is cheap"),
101
        clEnumValN(
102
            UnusedIndVarInLoop, "unusedindvarinloop",
103
            "only replace exit value when it is an unused "
104
            "induction variable in the loop and has cheap replacement cost"),
105
        clEnumValN(NoHardUse, "noharduse",
106
                   "only replace exit values when loop def likely dead"),
107
        clEnumValN(AlwaysRepl, "always",
108
                   "always replace exit value whenever possible")));
109

110
static cl::opt<bool> UsePostIncrementRanges(
111
  "indvars-post-increment-ranges", cl::Hidden,
112
  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
113
  cl::init(true));
114

115
static cl::opt<bool>
116
DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
117
            cl::desc("Disable Linear Function Test Replace optimization"));
118

119
static cl::opt<bool>
120
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
121
                cl::desc("Predicate conditions in read only loops"));
122

123
static cl::opt<bool>
124
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
125
                cl::desc("Allow widening of indvars to eliminate s/zext"));
126

127
namespace {
128

129
class IndVarSimplify {
130
  LoopInfo *LI;
131
  ScalarEvolution *SE;
132
  DominatorTree *DT;
133
  const DataLayout &DL;
134
  TargetLibraryInfo *TLI;
135
  const TargetTransformInfo *TTI;
136
  std::unique_ptr<MemorySSAUpdater> MSSAU;
137

138
  SmallVector<WeakTrackingVH, 16> DeadInsts;
139
  bool WidenIndVars;
140

141
  bool RunUnswitching = false;
142

143
  bool handleFloatingPointIV(Loop *L, PHINode *PH);
144
  bool rewriteNonIntegerIVs(Loop *L);
145

146
  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147
  /// Try to improve our exit conditions by converting condition from signed
148
  /// to unsigned or rotating computation out of the loop.
149
  /// (See inline comment about why this is duplicated from simplifyAndExtend)
150
  bool canonicalizeExitCondition(Loop *L);
151
  /// Try to eliminate loop exits based on analyzeable exit counts
152
  bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153
  /// Try to form loop invariant tests for loop exits by changing how many
154
  /// iterations of the loop run when that is unobservable.
155
  bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156

157
  bool rewriteFirstIterationLoopExitValues(Loop *L);
158

159
  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160
                                 const SCEV *ExitCount,
161
                                 PHINode *IndVar, SCEVExpander &Rewriter);
162

163
  bool sinkUnusedInvariants(Loop *L);
164

165
public:
166
  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167
                 const DataLayout &DL, TargetLibraryInfo *TLI,
168
                 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169
      : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170
        WidenIndVars(WidenIndVars) {
171
    if (MSSA)
172
      MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173
  }
174

175
  bool run(Loop *L);
176

177
  bool runUnswitching() const { return RunUnswitching; }
178
};
179

180
} // end anonymous namespace
181

182
//===----------------------------------------------------------------------===//
183
// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
184
//===----------------------------------------------------------------------===//
185

186
/// Convert APF to an integer, if possible.
187
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
188
  bool isExact = false;
189
  // See if we can convert this to an int64_t
190
  uint64_t UIntVal;
191
  if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
192
                           APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
193
      !isExact)
194
    return false;
195
  IntVal = UIntVal;
196
  return true;
197
}
198

199
/// If the loop has floating induction variable then insert corresponding
200
/// integer induction variable if possible.
201
/// For example,
202
/// for(double i = 0; i < 10000; ++i)
203
///   bar(i)
204
/// is converted into
205
/// for(int i = 0; i < 10000; ++i)
206
///   bar((double)i);
207
bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
208
  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
209
  unsigned BackEdge     = IncomingEdge^1;
210

211
  // Check incoming value.
212
  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
213

214
  int64_t InitValue;
215
  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
216
    return false;
217

218
  // Check IV increment. Reject this PN if increment operation is not
219
  // an add or increment value can not be represented by an integer.
220
  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
221
  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
222

223
  // If this is not an add of the PHI with a constantfp, or if the constant fp
224
  // is not an integer, bail out.
225
  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
226
  int64_t IncValue;
227
  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
228
      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
229
    return false;
230

231
  // Check Incr uses. One user is PN and the other user is an exit condition
232
  // used by the conditional terminator.
233
  Value::user_iterator IncrUse = Incr->user_begin();
234
  Instruction *U1 = cast<Instruction>(*IncrUse++);
235
  if (IncrUse == Incr->user_end()) return false;
236
  Instruction *U2 = cast<Instruction>(*IncrUse++);
237
  if (IncrUse != Incr->user_end()) return false;
238

239
  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
240
  // only used by a branch, we can't transform it.
241
  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
242
  if (!Compare)
243
    Compare = dyn_cast<FCmpInst>(U2);
244
  if (!Compare || !Compare->hasOneUse() ||
245
      !isa<BranchInst>(Compare->user_back()))
246
    return false;
247

248
  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
249

250
  // We need to verify that the branch actually controls the iteration count
251
  // of the loop.  If not, the new IV can overflow and no one will notice.
252
  // The branch block must be in the loop and one of the successors must be out
253
  // of the loop.
254
  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
255
  if (!L->contains(TheBr->getParent()) ||
256
      (L->contains(TheBr->getSuccessor(0)) &&
257
       L->contains(TheBr->getSuccessor(1))))
258
    return false;
259

260
  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
261
  // transform it.
262
  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
263
  int64_t ExitValue;
264
  if (ExitValueVal == nullptr ||
265
      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
266
    return false;
267

268
  // Find new predicate for integer comparison.
269
  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
270
  switch (Compare->getPredicate()) {
271
  default: return false;  // Unknown comparison.
272
  case CmpInst::FCMP_OEQ:
273
  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
274
  case CmpInst::FCMP_ONE:
275
  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
276
  case CmpInst::FCMP_OGT:
277
  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
278
  case CmpInst::FCMP_OGE:
279
  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
280
  case CmpInst::FCMP_OLT:
281
  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
282
  case CmpInst::FCMP_OLE:
283
  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
284
  }
285

286
  // We convert the floating point induction variable to a signed i32 value if
287
  // we can.  This is only safe if the comparison will not overflow in a way
288
  // that won't be trapped by the integer equivalent operations.  Check for this
289
  // now.
290
  // TODO: We could use i64 if it is native and the range requires it.
291

292
  // The start/stride/exit values must all fit in signed i32.
293
  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
294
    return false;
295

296
  // If not actually striding (add x, 0.0), avoid touching the code.
297
  if (IncValue == 0)
298
    return false;
299

300
  // Positive and negative strides have different safety conditions.
301
  if (IncValue > 0) {
302
    // If we have a positive stride, we require the init to be less than the
303
    // exit value.
304
    if (InitValue >= ExitValue)
305
      return false;
306

307
    uint32_t Range = uint32_t(ExitValue-InitValue);
308
    // Check for infinite loop, either:
309
    // while (i <= Exit) or until (i > Exit)
310
    if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
311
      if (++Range == 0) return false;  // Range overflows.
312
    }
313

314
    unsigned Leftover = Range % uint32_t(IncValue);
315

316
    // If this is an equality comparison, we require that the strided value
317
    // exactly land on the exit value, otherwise the IV condition will wrap
318
    // around and do things the fp IV wouldn't.
319
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
320
        Leftover != 0)
321
      return false;
322

323
    // If the stride would wrap around the i32 before exiting, we can't
324
    // transform the IV.
325
    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
326
      return false;
327
  } else {
328
    // If we have a negative stride, we require the init to be greater than the
329
    // exit value.
330
    if (InitValue <= ExitValue)
331
      return false;
332

333
    uint32_t Range = uint32_t(InitValue-ExitValue);
334
    // Check for infinite loop, either:
335
    // while (i >= Exit) or until (i < Exit)
336
    if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
337
      if (++Range == 0) return false;  // Range overflows.
338
    }
339

340
    unsigned Leftover = Range % uint32_t(-IncValue);
341

342
    // If this is an equality comparison, we require that the strided value
343
    // exactly land on the exit value, otherwise the IV condition will wrap
344
    // around and do things the fp IV wouldn't.
345
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
346
        Leftover != 0)
347
      return false;
348

349
    // If the stride would wrap around the i32 before exiting, we can't
350
    // transform the IV.
351
    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
352
      return false;
353
  }
354

355
  IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
356

357
  // Insert new integer induction variable.
358
  PHINode *NewPHI =
359
      PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
360
  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
361
                      PN->getIncomingBlock(IncomingEdge));
362
  NewPHI->setDebugLoc(PN->getDebugLoc());
363

364
  Instruction *NewAdd =
365
      BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
366
                                Incr->getName() + ".int", Incr->getIterator());
367
  NewAdd->setDebugLoc(Incr->getDebugLoc());
368
  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
369

370
  ICmpInst *NewCompare =
371
      new ICmpInst(TheBr->getIterator(), NewPred, NewAdd,
372
                   ConstantInt::get(Int32Ty, ExitValue), Compare->getName());
373
  NewCompare->setDebugLoc(Compare->getDebugLoc());
374

375
  // In the following deletions, PN may become dead and may be deleted.
376
  // Use a WeakTrackingVH to observe whether this happens.
377
  WeakTrackingVH WeakPH = PN;
378

379
  // Delete the old floating point exit comparison.  The branch starts using the
380
  // new comparison.
381
  NewCompare->takeName(Compare);
382
  Compare->replaceAllUsesWith(NewCompare);
383
  RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
384

385
  // Delete the old floating point increment.
386
  Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
387
  RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
388

389
  // If the FP induction variable still has uses, this is because something else
390
  // in the loop uses its value.  In order to canonicalize the induction
391
  // variable, we chose to eliminate the IV and rewrite it in terms of an
392
  // int->fp cast.
393
  //
394
  // We give preference to sitofp over uitofp because it is faster on most
395
  // platforms.
396
  if (WeakPH) {
397
    Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
398
                                       PN->getParent()->getFirstInsertionPt());
399
    Conv->setDebugLoc(PN->getDebugLoc());
400
    PN->replaceAllUsesWith(Conv);
401
    RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
402
  }
403
  return true;
404
}
405

406
bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
407
  // First step.  Check to see if there are any floating-point recurrences.
408
  // If there are, change them into integer recurrences, permitting analysis by
409
  // the SCEV routines.
410
  BasicBlock *Header = L->getHeader();
411

412
  SmallVector<WeakTrackingVH, 8> PHIs;
413
  for (PHINode &PN : Header->phis())
414
    PHIs.push_back(&PN);
415

416
  bool Changed = false;
417
  for (WeakTrackingVH &PHI : PHIs)
418
    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
419
      Changed |= handleFloatingPointIV(L, PN);
420

421
  // If the loop previously had floating-point IV, ScalarEvolution
422
  // may not have been able to compute a trip count. Now that we've done some
423
  // re-writing, the trip count may be computable.
424
  if (Changed)
425
    SE->forgetLoop(L);
426
  return Changed;
427
}
428

429
//===---------------------------------------------------------------------===//
430
// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
431
// they will exit at the first iteration.
432
//===---------------------------------------------------------------------===//
433

434
/// Check to see if this loop has loop invariant conditions which lead to loop
435
/// exits. If so, we know that if the exit path is taken, it is at the first
436
/// loop iteration. This lets us predict exit values of PHI nodes that live in
437
/// loop header.
438
bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
439
  // Verify the input to the pass is already in LCSSA form.
440
  assert(L->isLCSSAForm(*DT));
441

442
  SmallVector<BasicBlock *, 8> ExitBlocks;
443
  L->getUniqueExitBlocks(ExitBlocks);
444

445
  bool MadeAnyChanges = false;
446
  for (auto *ExitBB : ExitBlocks) {
447
    // If there are no more PHI nodes in this exit block, then no more
448
    // values defined inside the loop are used on this path.
449
    for (PHINode &PN : ExitBB->phis()) {
450
      for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
451
           IncomingValIdx != E; ++IncomingValIdx) {
452
        auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
453

454
        // Can we prove that the exit must run on the first iteration if it
455
        // runs at all?  (i.e. early exits are fine for our purposes, but
456
        // traces which lead to this exit being taken on the 2nd iteration
457
        // aren't.)  Note that this is about whether the exit branch is
458
        // executed, not about whether it is taken.
459
        if (!L->getLoopLatch() ||
460
            !DT->dominates(IncomingBB, L->getLoopLatch()))
461
          continue;
462

463
        // Get condition that leads to the exit path.
464
        auto *TermInst = IncomingBB->getTerminator();
465

466
        Value *Cond = nullptr;
467
        if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
468
          // Must be a conditional branch, otherwise the block
469
          // should not be in the loop.
470
          Cond = BI->getCondition();
471
        } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
472
          Cond = SI->getCondition();
473
        else
474
          continue;
475

476
        if (!L->isLoopInvariant(Cond))
477
          continue;
478

479
        auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
480

481
        // Only deal with PHIs in the loop header.
482
        if (!ExitVal || ExitVal->getParent() != L->getHeader())
483
          continue;
484

485
        // If ExitVal is a PHI on the loop header, then we know its
486
        // value along this exit because the exit can only be taken
487
        // on the first iteration.
488
        auto *LoopPreheader = L->getLoopPreheader();
489
        assert(LoopPreheader && "Invalid loop");
490
        int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
491
        if (PreheaderIdx != -1) {
492
          assert(ExitVal->getParent() == L->getHeader() &&
493
                 "ExitVal must be in loop header");
494
          MadeAnyChanges = true;
495
          PN.setIncomingValue(IncomingValIdx,
496
                              ExitVal->getIncomingValue(PreheaderIdx));
497
          SE->forgetValue(&PN);
498
        }
499
      }
500
    }
501
  }
502
  return MadeAnyChanges;
503
}
504

505
//===----------------------------------------------------------------------===//
506
//  IV Widening - Extend the width of an IV to cover its widest uses.
507
//===----------------------------------------------------------------------===//
508

509
/// Update information about the induction variable that is extended by this
510
/// sign or zero extend operation. This is used to determine the final width of
511
/// the IV before actually widening it.
512
static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
513
                        ScalarEvolution *SE,
514
                        const TargetTransformInfo *TTI) {
515
  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
516
  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
517
    return;
518

519
  Type *Ty = Cast->getType();
520
  uint64_t Width = SE->getTypeSizeInBits(Ty);
521
  if (!Cast->getDataLayout().isLegalInteger(Width))
522
    return;
523

524
  // Check that `Cast` actually extends the induction variable (we rely on this
525
  // later).  This takes care of cases where `Cast` is extending a truncation of
526
  // the narrow induction variable, and thus can end up being narrower than the
527
  // "narrow" induction variable.
528
  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
529
  if (NarrowIVWidth >= Width)
530
    return;
531

532
  // Cast is either an sext or zext up to this point.
533
  // We should not widen an indvar if arithmetics on the wider indvar are more
534
  // expensive than those on the narrower indvar. We check only the cost of ADD
535
  // because at least an ADD is required to increment the induction variable. We
536
  // could compute more comprehensively the cost of all instructions on the
537
  // induction variable when necessary.
538
  if (TTI &&
539
      TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
540
          TTI->getArithmeticInstrCost(Instruction::Add,
541
                                      Cast->getOperand(0)->getType())) {
542
    return;
543
  }
544

545
  if (!WI.WidestNativeType ||
546
      Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
547
    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
548
    WI.IsSigned = IsSigned;
549
    return;
550
  }
551

552
  // We extend the IV to satisfy the sign of its user(s), or 'signed'
553
  // if there are multiple users with both sign- and zero extensions,
554
  // in order not to introduce nondeterministic behaviour based on the
555
  // unspecified order of a PHI nodes' users-iterator.
556
  WI.IsSigned |= IsSigned;
557
}
558

559
//===----------------------------------------------------------------------===//
560
//  Live IV Reduction - Minimize IVs live across the loop.
561
//===----------------------------------------------------------------------===//
562

563
//===----------------------------------------------------------------------===//
564
//  Simplification of IV users based on SCEV evaluation.
565
//===----------------------------------------------------------------------===//
566

567
namespace {
568

569
class IndVarSimplifyVisitor : public IVVisitor {
570
  ScalarEvolution *SE;
571
  const TargetTransformInfo *TTI;
572
  PHINode *IVPhi;
573

574
public:
575
  WideIVInfo WI;
576

577
  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
578
                        const TargetTransformInfo *TTI,
579
                        const DominatorTree *DTree)
580
    : SE(SCEV), TTI(TTI), IVPhi(IV) {
581
    DT = DTree;
582
    WI.NarrowIV = IVPhi;
583
  }
584

585
  // Implement the interface used by simplifyUsersOfIV.
586
  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
587
};
588

589
} // end anonymous namespace
590

591
/// Iteratively perform simplification on a worklist of IV users. Each
592
/// successive simplification may push more users which may themselves be
593
/// candidates for simplification.
594
///
595
/// Sign/Zero extend elimination is interleaved with IV simplification.
596
bool IndVarSimplify::simplifyAndExtend(Loop *L,
597
                                       SCEVExpander &Rewriter,
598
                                       LoopInfo *LI) {
599
  SmallVector<WideIVInfo, 8> WideIVs;
600

601
  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
602
          Intrinsic::getName(Intrinsic::experimental_guard));
603
  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
604

605
  SmallVector<PHINode *, 8> LoopPhis;
606
  for (PHINode &PN : L->getHeader()->phis())
607
    LoopPhis.push_back(&PN);
608

609
  // Each round of simplification iterates through the SimplifyIVUsers worklist
610
  // for all current phis, then determines whether any IVs can be
611
  // widened. Widening adds new phis to LoopPhis, inducing another round of
612
  // simplification on the wide IVs.
613
  bool Changed = false;
614
  while (!LoopPhis.empty()) {
615
    // Evaluate as many IV expressions as possible before widening any IVs. This
616
    // forces SCEV to set no-wrap flags before evaluating sign/zero
617
    // extension. The first time SCEV attempts to normalize sign/zero extension,
618
    // the result becomes final. So for the most predictable results, we delay
619
    // evaluation of sign/zero extend evaluation until needed, and avoid running
620
    // other SCEV based analysis prior to simplifyAndExtend.
621
    do {
622
      PHINode *CurrIV = LoopPhis.pop_back_val();
623

624
      // Information about sign/zero extensions of CurrIV.
625
      IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
626

627
      const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
628
                                             Rewriter, &Visitor);
629

630
      Changed |= C;
631
      RunUnswitching |= U;
632
      if (Visitor.WI.WidestNativeType) {
633
        WideIVs.push_back(Visitor.WI);
634
      }
635
    } while(!LoopPhis.empty());
636

637
    // Continue if we disallowed widening.
638
    if (!WidenIndVars)
639
      continue;
640

641
    for (; !WideIVs.empty(); WideIVs.pop_back()) {
642
      unsigned ElimExt;
643
      unsigned Widened;
644
      if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
645
                                          DT, DeadInsts, ElimExt, Widened,
646
                                          HasGuards, UsePostIncrementRanges)) {
647
        NumElimExt += ElimExt;
648
        NumWidened += Widened;
649
        Changed = true;
650
        LoopPhis.push_back(WidePhi);
651
      }
652
    }
653
  }
654
  return Changed;
655
}
656

657
//===----------------------------------------------------------------------===//
658
//  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
659
//===----------------------------------------------------------------------===//
660

661
/// Given an Value which is hoped to be part of an add recurance in the given
662
/// loop, return the associated Phi node if so.  Otherwise, return null.  Note
663
/// that this is less general than SCEVs AddRec checking.
664
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
665
  Instruction *IncI = dyn_cast<Instruction>(IncV);
666
  if (!IncI)
667
    return nullptr;
668

669
  switch (IncI->getOpcode()) {
670
  case Instruction::Add:
671
  case Instruction::Sub:
672
    break;
673
  case Instruction::GetElementPtr:
674
    // An IV counter must preserve its type.
675
    if (IncI->getNumOperands() == 2)
676
      break;
677
    [[fallthrough]];
678
  default:
679
    return nullptr;
680
  }
681

682
  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
683
  if (Phi && Phi->getParent() == L->getHeader()) {
684
    if (L->isLoopInvariant(IncI->getOperand(1)))
685
      return Phi;
686
    return nullptr;
687
  }
688
  if (IncI->getOpcode() == Instruction::GetElementPtr)
689
    return nullptr;
690

691
  // Allow add/sub to be commuted.
692
  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
693
  if (Phi && Phi->getParent() == L->getHeader()) {
694
    if (L->isLoopInvariant(IncI->getOperand(0)))
695
      return Phi;
696
  }
697
  return nullptr;
698
}
699

700
/// Whether the current loop exit test is based on this value.  Currently this
701
/// is limited to a direct use in the loop condition.
702
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
703
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
704
  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
705
  // TODO: Allow non-icmp loop test.
706
  if (!ICmp)
707
    return false;
708

709
  // TODO: Allow indirect use.
710
  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
711
}
712

713
/// linearFunctionTestReplace policy. Return true unless we can show that the
714
/// current exit test is already sufficiently canonical.
715
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
716
  assert(L->getLoopLatch() && "Must be in simplified form");
717

718
  // Avoid converting a constant or loop invariant test back to a runtime
719
  // test.  This is critical for when SCEV's cached ExitCount is less precise
720
  // than the current IR (such as after we've proven a particular exit is
721
  // actually dead and thus the BE count never reaches our ExitCount.)
722
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
723
  if (L->isLoopInvariant(BI->getCondition()))
724
    return false;
725

726
  // Do LFTR to simplify the exit condition to an ICMP.
727
  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
728
  if (!Cond)
729
    return true;
730

731
  // Do LFTR to simplify the exit ICMP to EQ/NE
732
  ICmpInst::Predicate Pred = Cond->getPredicate();
733
  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
734
    return true;
735

736
  // Look for a loop invariant RHS
737
  Value *LHS = Cond->getOperand(0);
738
  Value *RHS = Cond->getOperand(1);
739
  if (!L->isLoopInvariant(RHS)) {
740
    if (!L->isLoopInvariant(LHS))
741
      return true;
742
    std::swap(LHS, RHS);
743
  }
744
  // Look for a simple IV counter LHS
745
  PHINode *Phi = dyn_cast<PHINode>(LHS);
746
  if (!Phi)
747
    Phi = getLoopPhiForCounter(LHS, L);
748

749
  if (!Phi)
750
    return true;
751

752
  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
753
  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
754
  if (Idx < 0)
755
    return true;
756

757
  // Do LFTR if the exit condition's IV is *not* a simple counter.
758
  Value *IncV = Phi->getIncomingValue(Idx);
759
  return Phi != getLoopPhiForCounter(IncV, L);
760
}
761

762
/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
763
/// down to checking that all operands are constant and listing instructions
764
/// that may hide undef.
765
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
766
                               unsigned Depth) {
767
  if (isa<Constant>(V))
768
    return !isa<UndefValue>(V);
769

770
  if (Depth >= 6)
771
    return false;
772

773
  // Conservatively handle non-constant non-instructions. For example, Arguments
774
  // may be undef.
775
  Instruction *I = dyn_cast<Instruction>(V);
776
  if (!I)
777
    return false;
778

779
  // Load and return values may be undef.
780
  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
781
    return false;
782

783
  // Optimistically handle other instructions.
784
  for (Value *Op : I->operands()) {
785
    if (!Visited.insert(Op).second)
786
      continue;
787
    if (!hasConcreteDefImpl(Op, Visited, Depth+1))
788
      return false;
789
  }
790
  return true;
791
}
792

793
/// Return true if the given value is concrete. We must prove that undef can
794
/// never reach it.
795
///
796
/// TODO: If we decide that this is a good approach to checking for undef, we
797
/// may factor it into a common location.
798
static bool hasConcreteDef(Value *V) {
799
  SmallPtrSet<Value*, 8> Visited;
800
  Visited.insert(V);
801
  return hasConcreteDefImpl(V, Visited, 0);
802
}
803

804
/// Return true if the given phi is a "counter" in L.  A counter is an
805
/// add recurance (of integer or pointer type) with an arbitrary start, and a
806
/// step of 1.  Note that L must have exactly one latch.
807
static bool isLoopCounter(PHINode* Phi, Loop *L,
808
                          ScalarEvolution *SE) {
809
  assert(Phi->getParent() == L->getHeader());
810
  assert(L->getLoopLatch());
811

812
  if (!SE->isSCEVable(Phi->getType()))
813
    return false;
814

815
  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
816
  if (!AR || AR->getLoop() != L || !AR->isAffine())
817
    return false;
818

819
  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
820
  if (!Step || !Step->isOne())
821
    return false;
822

823
  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
824
  Value *IncV = Phi->getIncomingValue(LatchIdx);
825
  return (getLoopPhiForCounter(IncV, L) == Phi &&
826
          isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
827
}
828

829
/// Search the loop header for a loop counter (anadd rec w/step of one)
830
/// suitable for use by LFTR.  If multiple counters are available, select the
831
/// "best" one based profitable heuristics.
832
///
833
/// BECount may be an i8* pointer type. The pointer difference is already
834
/// valid count without scaling the address stride, so it remains a pointer
835
/// expression as far as SCEV is concerned.
836
static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
837
                                const SCEV *BECount,
838
                                ScalarEvolution *SE, DominatorTree *DT) {
839
  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
840

841
  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
842

843
  // Loop over all of the PHI nodes, looking for a simple counter.
844
  PHINode *BestPhi = nullptr;
845
  const SCEV *BestInit = nullptr;
846
  BasicBlock *LatchBlock = L->getLoopLatch();
847
  assert(LatchBlock && "Must be in simplified form");
848
  const DataLayout &DL = L->getHeader()->getDataLayout();
849

850
  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
851
    PHINode *Phi = cast<PHINode>(I);
852
    if (!isLoopCounter(Phi, L, SE))
853
      continue;
854

855
    const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
856

857
    // AR may be a pointer type, while BECount is an integer type.
858
    // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
859
    // AR may not be a narrower type, or we may never exit.
860
    uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
861
    if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
862
      continue;
863

864
    // Avoid reusing a potentially undef value to compute other values that may
865
    // have originally had a concrete definition.
866
    if (!hasConcreteDef(Phi)) {
867
      // We explicitly allow unknown phis as long as they are already used by
868
      // the loop exit test.  This is legal since performing LFTR could not
869
      // increase the number of undef users.
870
      Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
871
      if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
872
          !isLoopExitTestBasedOn(IncPhi, ExitingBB))
873
        continue;
874
    }
875

876
    // Avoid introducing undefined behavior due to poison which didn't exist in
877
    // the original program.  (Annoyingly, the rules for poison and undef
878
    // propagation are distinct, so this does NOT cover the undef case above.)
879
    // We have to ensure that we don't introduce UB by introducing a use on an
880
    // iteration where said IV produces poison.  Our strategy here differs for
881
    // pointers and integer IVs.  For integers, we strip and reinfer as needed,
882
    // see code in linearFunctionTestReplace.  For pointers, we restrict
883
    // transforms as there is no good way to reinfer inbounds once lost.
884
    if (!Phi->getType()->isIntegerTy() &&
885
        !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
886
      continue;
887

888
    const SCEV *Init = AR->getStart();
889

890
    if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
891
      // Don't force a live loop counter if another IV can be used.
892
      if (isAlmostDeadIV(Phi, LatchBlock, Cond))
893
        continue;
894

895
      // Prefer to count-from-zero. This is a more "canonical" counter form. It
896
      // also prefers integer to pointer IVs.
897
      if (BestInit->isZero() != Init->isZero()) {
898
        if (BestInit->isZero())
899
          continue;
900
      }
901
      // If two IVs both count from zero or both count from nonzero then the
902
      // narrower is likely a dead phi that has been widened. Use the wider phi
903
      // to allow the other to be eliminated.
904
      else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
905
        continue;
906
    }
907
    BestPhi = Phi;
908
    BestInit = Init;
909
  }
910
  return BestPhi;
911
}
912

913
/// Insert an IR expression which computes the value held by the IV IndVar
914
/// (which must be an loop counter w/unit stride) after the backedge of loop L
915
/// is taken ExitCount times.
916
static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
917
                           const SCEV *ExitCount, bool UsePostInc, Loop *L,
918
                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
919
  assert(isLoopCounter(IndVar, L, SE));
920
  assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
921
  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
922
  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
923

924
  // For integer IVs, truncate the IV before computing the limit unless we
925
  // know apriori that the limit must be a constant when evaluated in the
926
  // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
927
  // IV in the loop over a (potentially) expensive expansion of the widened
928
  // exit count add(zext(add)) expression.
929
  if (IndVar->getType()->isIntegerTy() &&
930
      SE->getTypeSizeInBits(AR->getType()) >
931
      SE->getTypeSizeInBits(ExitCount->getType())) {
932
    const SCEV *IVInit = AR->getStart();
933
    if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
934
      AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
935
  }
936

937
  const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
938
  const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
939
  assert(SE->isLoopInvariant(IVLimit, L) &&
940
         "Computed iteration count is not loop invariant!");
941
  return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
942
                                ExitingBB->getTerminator());
943
}
944

945
/// This method rewrites the exit condition of the loop to be a canonical !=
946
/// comparison against the incremented loop induction variable.  This pass is
947
/// able to rewrite the exit tests of any loop where the SCEV analysis can
948
/// determine a loop-invariant trip count of the loop, which is actually a much
949
/// broader range than just linear tests.
950
bool IndVarSimplify::
951
linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
952
                          const SCEV *ExitCount,
953
                          PHINode *IndVar, SCEVExpander &Rewriter) {
954
  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
955
  assert(isLoopCounter(IndVar, L, SE));
956
  Instruction * const IncVar =
957
    cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
958

959
  // Initialize CmpIndVar to the preincremented IV.
960
  Value *CmpIndVar = IndVar;
961
  bool UsePostInc = false;
962

963
  // If the exiting block is the same as the backedge block, we prefer to
964
  // compare against the post-incremented value, otherwise we must compare
965
  // against the preincremented value.
966
  if (ExitingBB == L->getLoopLatch()) {
967
    // For pointer IVs, we chose to not strip inbounds which requires us not
968
    // to add a potentially UB introducing use.  We need to either a) show
969
    // the loop test we're modifying is already in post-inc form, or b) show
970
    // that adding a use must not introduce UB.
971
    bool SafeToPostInc =
972
        IndVar->getType()->isIntegerTy() ||
973
        isLoopExitTestBasedOn(IncVar, ExitingBB) ||
974
        mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
975
    if (SafeToPostInc) {
976
      UsePostInc = true;
977
      CmpIndVar = IncVar;
978
    }
979
  }
980

981
  // It may be necessary to drop nowrap flags on the incrementing instruction
982
  // if either LFTR moves from a pre-inc check to a post-inc check (in which
983
  // case the increment might have previously been poison on the last iteration
984
  // only) or if LFTR switches to a different IV that was previously dynamically
985
  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
986
  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
987
  // check), because the pre-inc addrec flags may be adopted from the original
988
  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
989
  // TODO: This handling is inaccurate for one case: If we switch to a
990
  // dynamically dead IV that wraps on the first loop iteration only, which is
991
  // not covered by the post-inc addrec. (If the new IV was not dynamically
992
  // dead, it could not be poison on the first iteration in the first place.)
993
  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
994
    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
995
    if (BO->hasNoUnsignedWrap())
996
      BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
997
    if (BO->hasNoSignedWrap())
998
      BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
999
  }
1000

1001
  Value *ExitCnt = genLoopLimit(
1002
      IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1003
  assert(ExitCnt->getType()->isPointerTy() ==
1004
             IndVar->getType()->isPointerTy() &&
1005
         "genLoopLimit missed a cast");
1006

1007
  // Insert a new icmp_ne or icmp_eq instruction before the branch.
1008
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1009
  ICmpInst::Predicate P;
1010
  if (L->contains(BI->getSuccessor(0)))
1011
    P = ICmpInst::ICMP_NE;
1012
  else
1013
    P = ICmpInst::ICMP_EQ;
1014

1015
  IRBuilder<> Builder(BI);
1016

1017
  // The new loop exit condition should reuse the debug location of the
1018
  // original loop exit condition.
1019
  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1020
    Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1021

1022
  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1023
  // avoid the expensive expansion of the limit expression in the wider type,
1024
  // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1025
  // since we know (from the exit count bitwidth), that we can't self-wrap in
1026
  // the narrower type.
1027
  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1028
  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1029
  if (CmpIndVarSize > ExitCntSize) {
1030
    assert(!CmpIndVar->getType()->isPointerTy() &&
1031
           !ExitCnt->getType()->isPointerTy());
1032

1033
    // Before resorting to actually inserting the truncate, use the same
1034
    // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1035
    // the other side of the comparison instead.  We still evaluate the limit
1036
    // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1037
    // a truncate within in.
1038
    bool Extended = false;
1039
    const SCEV *IV = SE->getSCEV(CmpIndVar);
1040
    const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1041
    const SCEV *ZExtTrunc =
1042
      SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1043

1044
    if (ZExtTrunc == IV) {
1045
      Extended = true;
1046
      ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1047
                                   "wide.trip.count");
1048
    } else {
1049
      const SCEV *SExtTrunc =
1050
        SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1051
      if (SExtTrunc == IV) {
1052
        Extended = true;
1053
        ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1054
                                     "wide.trip.count");
1055
      }
1056
    }
1057

1058
    if (Extended) {
1059
      bool Discard;
1060
      L->makeLoopInvariant(ExitCnt, Discard);
1061
    } else
1062
      CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1063
                                      "lftr.wideiv");
1064
  }
1065
  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1066
                    << "      LHS:" << *CmpIndVar << '\n'
1067
                    << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1068
                    << "\n"
1069
                    << "      RHS:\t" << *ExitCnt << "\n"
1070
                    << "ExitCount:\t" << *ExitCount << "\n"
1071
                    << "  was: " << *BI->getCondition() << "\n");
1072

1073
  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1074
  Value *OrigCond = BI->getCondition();
1075
  // It's tempting to use replaceAllUsesWith here to fully replace the old
1076
  // comparison, but that's not immediately safe, since users of the old
1077
  // comparison may not be dominated by the new comparison. Instead, just
1078
  // update the branch to use the new comparison; in the common case this
1079
  // will make old comparison dead.
1080
  BI->setCondition(Cond);
1081
  DeadInsts.emplace_back(OrigCond);
1082

1083
  ++NumLFTR;
1084
  return true;
1085
}
1086

1087
//===----------------------------------------------------------------------===//
1088
//  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1089
//===----------------------------------------------------------------------===//
1090

1091
/// If there's a single exit block, sink any loop-invariant values that
1092
/// were defined in the preheader but not used inside the loop into the
1093
/// exit block to reduce register pressure in the loop.
1094
bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1095
  BasicBlock *ExitBlock = L->getExitBlock();
1096
  if (!ExitBlock) return false;
1097

1098
  BasicBlock *Preheader = L->getLoopPreheader();
1099
  if (!Preheader) return false;
1100

1101
  bool MadeAnyChanges = false;
1102
  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1103
  BasicBlock::iterator I(Preheader->getTerminator());
1104
  while (I != Preheader->begin()) {
1105
    --I;
1106
    // New instructions were inserted at the end of the preheader.
1107
    if (isa<PHINode>(I))
1108
      break;
1109

1110
    // Don't move instructions which might have side effects, since the side
1111
    // effects need to complete before instructions inside the loop.  Also don't
1112
    // move instructions which might read memory, since the loop may modify
1113
    // memory. Note that it's okay if the instruction might have undefined
1114
    // behavior: LoopSimplify guarantees that the preheader dominates the exit
1115
    // block.
1116
    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1117
      continue;
1118

1119
    // Skip debug info intrinsics.
1120
    if (isa<DbgInfoIntrinsic>(I))
1121
      continue;
1122

1123
    // Skip eh pad instructions.
1124
    if (I->isEHPad())
1125
      continue;
1126

1127
    // Don't sink alloca: we never want to sink static alloca's out of the
1128
    // entry block, and correctly sinking dynamic alloca's requires
1129
    // checks for stacksave/stackrestore intrinsics.
1130
    // FIXME: Refactor this check somehow?
1131
    if (isa<AllocaInst>(I))
1132
      continue;
1133

1134
    // Determine if there is a use in or before the loop (direct or
1135
    // otherwise).
1136
    bool UsedInLoop = false;
1137
    for (Use &U : I->uses()) {
1138
      Instruction *User = cast<Instruction>(U.getUser());
1139
      BasicBlock *UseBB = User->getParent();
1140
      if (PHINode *P = dyn_cast<PHINode>(User)) {
1141
        unsigned i =
1142
          PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1143
        UseBB = P->getIncomingBlock(i);
1144
      }
1145
      if (UseBB == Preheader || L->contains(UseBB)) {
1146
        UsedInLoop = true;
1147
        break;
1148
      }
1149
    }
1150

1151
    // If there is, the def must remain in the preheader.
1152
    if (UsedInLoop)
1153
      continue;
1154

1155
    // Otherwise, sink it to the exit block.
1156
    Instruction *ToMove = &*I;
1157
    bool Done = false;
1158

1159
    if (I != Preheader->begin()) {
1160
      // Skip debug info intrinsics.
1161
      do {
1162
        --I;
1163
      } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1164

1165
      if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1166
        Done = true;
1167
    } else {
1168
      Done = true;
1169
    }
1170

1171
    MadeAnyChanges = true;
1172
    ToMove->moveBefore(*ExitBlock, InsertPt);
1173
    SE->forgetValue(ToMove);
1174
    if (Done) break;
1175
    InsertPt = ToMove->getIterator();
1176
  }
1177

1178
  return MadeAnyChanges;
1179
}
1180

1181
static void replaceExitCond(BranchInst *BI, Value *NewCond,
1182
                            SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1183
  auto *OldCond = BI->getCondition();
1184
  LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1185
                    << " with " << *NewCond << "\n");
1186
  BI->setCondition(NewCond);
1187
  if (OldCond->use_empty())
1188
    DeadInsts.emplace_back(OldCond);
1189
}
1190

1191
static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1192
                                      bool IsTaken) {
1193
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1194
  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1195
  auto *OldCond = BI->getCondition();
1196
  return ConstantInt::get(OldCond->getType(),
1197
                          IsTaken ? ExitIfTrue : !ExitIfTrue);
1198
}
1199

1200
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1201
                     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1202
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1203
  auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1204
  replaceExitCond(BI, NewCond, DeadInsts);
1205
}
1206

1207
static void replaceLoopPHINodesWithPreheaderValues(
1208
    LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1209
    ScalarEvolution &SE) {
1210
  assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1211
  auto *LoopPreheader = L->getLoopPreheader();
1212
  auto *LoopHeader = L->getHeader();
1213
  SmallVector<Instruction *> Worklist;
1214
  for (auto &PN : LoopHeader->phis()) {
1215
    auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1216
    for (User *U : PN.users())
1217
      Worklist.push_back(cast<Instruction>(U));
1218
    SE.forgetValue(&PN);
1219
    PN.replaceAllUsesWith(PreheaderIncoming);
1220
    DeadInsts.emplace_back(&PN);
1221
  }
1222

1223
  // Replacing with the preheader value will often allow IV users to simplify
1224
  // (especially if the preheader value is a constant).
1225
  SmallPtrSet<Instruction *, 16> Visited;
1226
  while (!Worklist.empty()) {
1227
    auto *I = cast<Instruction>(Worklist.pop_back_val());
1228
    if (!Visited.insert(I).second)
1229
      continue;
1230

1231
    // Don't simplify instructions outside the loop.
1232
    if (!L->contains(I))
1233
      continue;
1234

1235
    Value *Res = simplifyInstruction(I, I->getDataLayout());
1236
    if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1237
      for (User *U : I->users())
1238
        Worklist.push_back(cast<Instruction>(U));
1239
      I->replaceAllUsesWith(Res);
1240
      DeadInsts.emplace_back(I);
1241
    }
1242
  }
1243
}
1244

1245
static Value *
1246
createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1247
                    const ScalarEvolution::LoopInvariantPredicate &LIP,
1248
                    SCEVExpander &Rewriter) {
1249
  ICmpInst::Predicate InvariantPred = LIP.Pred;
1250
  BasicBlock *Preheader = L->getLoopPreheader();
1251
  assert(Preheader && "Preheader doesn't exist");
1252
  Rewriter.setInsertPoint(Preheader->getTerminator());
1253
  auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1254
  auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1255
  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1256
  if (ExitIfTrue)
1257
    InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1258
  IRBuilder<> Builder(Preheader->getTerminator());
1259
  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1260
  return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1261
                            BI->getCondition()->getName());
1262
}
1263

1264
static std::optional<Value *>
1265
createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1266
                  const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1267
                  ScalarEvolution *SE, SCEVExpander &Rewriter) {
1268
  ICmpInst::Predicate Pred = ICmp->getPredicate();
1269
  Value *LHS = ICmp->getOperand(0);
1270
  Value *RHS = ICmp->getOperand(1);
1271

1272
  // 'LHS pred RHS' should now mean that we stay in loop.
1273
  auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1274
  if (Inverted)
1275
    Pred = CmpInst::getInversePredicate(Pred);
1276

1277
  const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1278
  const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1279
  // Can we prove it to be trivially true or false?
1280
  if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1281
    return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1282

1283
  auto *ARTy = LHSS->getType();
1284
  auto *MaxIterTy = MaxIter->getType();
1285
  // If possible, adjust types.
1286
  if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1287
    MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1288
  else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1289
    const SCEV *MinusOne = SE->getMinusOne(ARTy);
1290
    auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1291
    if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1292
      MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1293
  }
1294

1295
  if (SkipLastIter) {
1296
    // Semantically skip last iter is "subtract 1, do not bother about unsigned
1297
    // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1298
    // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1299
    // So we manually construct umin(a - 1, b - 1).
1300
    SmallVector<const SCEV *, 4> Elements;
1301
    if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1302
      for (auto *Op : UMin->operands())
1303
        Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1304
      MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1305
    } else
1306
      MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1307
  }
1308

1309
  // Check if there is a loop-invariant predicate equivalent to our check.
1310
  auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1311
                                                               L, BI, MaxIter);
1312
  if (!LIP)
1313
    return std::nullopt;
1314

1315
  // Can we prove it to be trivially true?
1316
  if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1317
    return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1318
  else
1319
    return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1320
}
1321

1322
static bool optimizeLoopExitWithUnknownExitCount(
1323
    const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1324
    bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1325
    SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1326
  assert(
1327
      (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1328
      "Not a loop exit!");
1329

1330
  // For branch that stays in loop by TRUE condition, go through AND. For branch
1331
  // that stays in loop by FALSE condition, go through OR. Both gives the
1332
  // similar logic: "stay in loop iff all conditions are true(false)".
1333
  bool Inverted = L->contains(BI->getSuccessor(1));
1334
  SmallVector<ICmpInst *, 4> LeafConditions;
1335
  SmallVector<Value *, 4> Worklist;
1336
  SmallPtrSet<Value *, 4> Visited;
1337
  Value *OldCond = BI->getCondition();
1338
  Visited.insert(OldCond);
1339
  Worklist.push_back(OldCond);
1340

1341
  auto GoThrough = [&](Value *V) {
1342
    Value *LHS = nullptr, *RHS = nullptr;
1343
    if (Inverted) {
1344
      if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1345
        return false;
1346
    } else {
1347
      if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1348
        return false;
1349
    }
1350
    if (Visited.insert(LHS).second)
1351
      Worklist.push_back(LHS);
1352
    if (Visited.insert(RHS).second)
1353
      Worklist.push_back(RHS);
1354
    return true;
1355
  };
1356

1357
  do {
1358
    Value *Curr = Worklist.pop_back_val();
1359
    // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1360
    // those with one use, to avoid instruction duplication.
1361
    if (Curr->hasOneUse())
1362
      if (!GoThrough(Curr))
1363
        if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1364
          LeafConditions.push_back(ICmp);
1365
  } while (!Worklist.empty());
1366

1367
  // If the current basic block has the same exit count as the whole loop, and
1368
  // it consists of multiple icmp's, try to collect all icmp's that give exact
1369
  // same exit count. For all other icmp's, we could use one less iteration,
1370
  // because their value on the last iteration doesn't really matter.
1371
  SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1372
  if (!SkipLastIter && LeafConditions.size() > 1 &&
1373
      SE->getExitCount(L, ExitingBB,
1374
                       ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1375
          MaxIter)
1376
    for (auto *ICmp : LeafConditions) {
1377
      auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1378
                                             /*ControlsExit*/ false);
1379
      auto *ExitMax = EL.SymbolicMaxNotTaken;
1380
      if (isa<SCEVCouldNotCompute>(ExitMax))
1381
        continue;
1382
      // They could be of different types (specifically this happens after
1383
      // IV widening).
1384
      auto *WiderType =
1385
          SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1386
      auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1387
      auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1388
      if (WideExitMax == WideMaxIter)
1389
        ICmpsFailingOnLastIter.insert(ICmp);
1390
    }
1391

1392
  bool Changed = false;
1393
  for (auto *OldCond : LeafConditions) {
1394
    // Skip last iteration for this icmp under one of two conditions:
1395
    // - We do it for all conditions;
1396
    // - There is another ICmp that would fail on last iter, so this one doesn't
1397
    // really matter.
1398
    bool OptimisticSkipLastIter = SkipLastIter;
1399
    if (!OptimisticSkipLastIter) {
1400
      if (ICmpsFailingOnLastIter.size() > 1)
1401
        OptimisticSkipLastIter = true;
1402
      else if (ICmpsFailingOnLastIter.size() == 1)
1403
        OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1404
    }
1405
    if (auto Replaced =
1406
            createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1407
                              OptimisticSkipLastIter, SE, Rewriter)) {
1408
      Changed = true;
1409
      auto *NewCond = *Replaced;
1410
      if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1411
        NCI->setName(OldCond->getName() + ".first_iter");
1412
      }
1413
      LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1414
                        << " with " << *NewCond << "\n");
1415
      assert(OldCond->hasOneUse() && "Must be!");
1416
      OldCond->replaceAllUsesWith(NewCond);
1417
      DeadInsts.push_back(OldCond);
1418
      // Make sure we no longer consider this condition as failing on last
1419
      // iteration.
1420
      ICmpsFailingOnLastIter.erase(OldCond);
1421
    }
1422
  }
1423
  return Changed;
1424
}
1425

1426
bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1427
  // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1428
  // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1429
  // never reaches the icmp since the zext doesn't fold to an AddRec unless
1430
  // it already has flags.  The alternative to this would be to extending the
1431
  // set of "interesting" IV users to include the icmp, but doing that
1432
  // regresses results in practice by querying SCEVs before trip counts which
1433
  // rely on them which results in SCEV caching sub-optimal answers.  The
1434
  // concern about caching sub-optimal results is why we only query SCEVs of
1435
  // the loop invariant RHS here.
1436
  SmallVector<BasicBlock*, 16> ExitingBlocks;
1437
  L->getExitingBlocks(ExitingBlocks);
1438
  bool Changed = false;
1439
  for (auto *ExitingBB : ExitingBlocks) {
1440
    auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1441
    if (!BI)
1442
      continue;
1443
    assert(BI->isConditional() && "exit branch must be conditional");
1444

1445
    auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1446
    if (!ICmp || !ICmp->hasOneUse())
1447
      continue;
1448

1449
    auto *LHS = ICmp->getOperand(0);
1450
    auto *RHS = ICmp->getOperand(1);
1451
    // For the range reasoning, avoid computing SCEVs in the loop to avoid
1452
    // poisoning cache with sub-optimal results.  For the must-execute case,
1453
    // this is a neccessary precondition for correctness.
1454
    if (!L->isLoopInvariant(RHS)) {
1455
      if (!L->isLoopInvariant(LHS))
1456
        continue;
1457
      // Same logic applies for the inverse case
1458
      std::swap(LHS, RHS);
1459
    }
1460

1461
    // Match (icmp signed-cond zext, RHS)
1462
    Value *LHSOp = nullptr;
1463
    if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1464
      continue;
1465

1466
    const DataLayout &DL = ExitingBB->getDataLayout();
1467
    const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1468
    const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1469
    auto FullCR = ConstantRange::getFull(InnerBitWidth);
1470
    FullCR = FullCR.zeroExtend(OuterBitWidth);
1471
    auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1472
    if (FullCR.contains(RHSCR)) {
1473
      // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1474
      // replace the signed condition with the unsigned version.
1475
      ICmp->setPredicate(ICmp->getUnsignedPredicate());
1476
      Changed = true;
1477
      // Note: No SCEV invalidation needed.  We've changed the predicate, but
1478
      // have not changed exit counts, or the values produced by the compare.
1479
      continue;
1480
    }
1481
  }
1482

1483
  // Now that we've canonicalized the condition to match the extend,
1484
  // see if we can rotate the extend out of the loop.
1485
  for (auto *ExitingBB : ExitingBlocks) {
1486
    auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1487
    if (!BI)
1488
      continue;
1489
    assert(BI->isConditional() && "exit branch must be conditional");
1490

1491
    auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1492
    if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1493
      continue;
1494

1495
    bool Swapped = false;
1496
    auto *LHS = ICmp->getOperand(0);
1497
    auto *RHS = ICmp->getOperand(1);
1498
    if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1499
      // Nothing to rotate
1500
      continue;
1501
    if (L->isLoopInvariant(LHS)) {
1502
      // Same logic applies for the inverse case until we actually pick
1503
      // which operand of the compare to update.
1504
      Swapped = true;
1505
      std::swap(LHS, RHS);
1506
    }
1507
    assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1508

1509
    // Match (icmp unsigned-cond zext, RHS)
1510
    // TODO: Extend to handle corresponding sext/signed-cmp case
1511
    // TODO: Extend to other invertible functions
1512
    Value *LHSOp = nullptr;
1513
    if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1514
      continue;
1515

1516
    // In general, we only rotate if we can do so without increasing the number
1517
    // of instructions.  The exception is when we have an zext(add-rec).  The
1518
    // reason for allowing this exception is that we know we need to get rid
1519
    // of the zext for SCEV to be able to compute a trip count for said loops;
1520
    // we consider the new trip count valuable enough to increase instruction
1521
    // count by one.
1522
    if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1523
      continue;
1524

1525
    // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1526
    // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1527
    // when zext is loop varying and RHS is loop invariant.  This converts
1528
    // loop varying work to loop-invariant work.
1529
    auto doRotateTransform = [&]() {
1530
      assert(ICmp->isUnsigned() && "must have proven unsigned already");
1531
      auto *NewRHS = CastInst::Create(
1532
          Instruction::Trunc, RHS, LHSOp->getType(), "",
1533
          L->getLoopPreheader()->getTerminator()->getIterator());
1534
      ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1535
      ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1536
      if (LHS->use_empty())
1537
        DeadInsts.push_back(LHS);
1538
    };
1539

1540

1541
    const DataLayout &DL = ExitingBB->getDataLayout();
1542
    const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1543
    const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1544
    auto FullCR = ConstantRange::getFull(InnerBitWidth);
1545
    FullCR = FullCR.zeroExtend(OuterBitWidth);
1546
    auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1547
    if (FullCR.contains(RHSCR)) {
1548
      doRotateTransform();
1549
      Changed = true;
1550
      // Note, we are leaving SCEV in an unfortunately imprecise case here
1551
      // as rotation tends to reveal information about trip counts not
1552
      // previously visible.
1553
      continue;
1554
    }
1555
  }
1556

1557
  return Changed;
1558
}
1559

1560
bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1561
  SmallVector<BasicBlock*, 16> ExitingBlocks;
1562
  L->getExitingBlocks(ExitingBlocks);
1563

1564
  // Remove all exits which aren't both rewriteable and execute on every
1565
  // iteration.
1566
  llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1567
    // If our exitting block exits multiple loops, we can only rewrite the
1568
    // innermost one.  Otherwise, we're changing how many times the innermost
1569
    // loop runs before it exits.
1570
    if (LI->getLoopFor(ExitingBB) != L)
1571
      return true;
1572

1573
    // Can't rewrite non-branch yet.
1574
    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1575
    if (!BI)
1576
      return true;
1577

1578
    // Likewise, the loop latch must be dominated by the exiting BB.
1579
    if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1580
      return true;
1581

1582
    if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1583
      // If already constant, nothing to do. However, if this is an
1584
      // unconditional exit, we can still replace header phis with their
1585
      // preheader value.
1586
      if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1587
        replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1588
      return true;
1589
    }
1590

1591
    return false;
1592
  });
1593

1594
  if (ExitingBlocks.empty())
1595
    return false;
1596

1597
  // Get a symbolic upper bound on the loop backedge taken count.
1598
  const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1599
  if (isa<SCEVCouldNotCompute>(MaxBECount))
1600
    return false;
1601

1602
  // Visit our exit blocks in order of dominance. We know from the fact that
1603
  // all exits must dominate the latch, so there is a total dominance order
1604
  // between them.
1605
  llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1606
               // std::sort sorts in ascending order, so we want the inverse of
1607
               // the normal dominance relation.
1608
               if (A == B) return false;
1609
               if (DT->properlyDominates(A, B))
1610
                 return true;
1611
               else {
1612
                 assert(DT->properlyDominates(B, A) &&
1613
                        "expected total dominance order!");
1614
                 return false;
1615
               }
1616
  });
1617
#ifdef ASSERT
1618
  for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1619
    assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1620
  }
1621
#endif
1622

1623
  bool Changed = false;
1624
  bool SkipLastIter = false;
1625
  const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1626
  auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1627
    if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1628
      return;
1629
    if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1630
      CurrMaxExit = MaxExitCount;
1631
    else
1632
      CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1633
    // If the loop has more than 1 iteration, all further checks will be
1634
    // executed 1 iteration less.
1635
    if (CurrMaxExit == MaxBECount)
1636
      SkipLastIter = true;
1637
  };
1638
  SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1639
  for (BasicBlock *ExitingBB : ExitingBlocks) {
1640
    const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1641
    const SCEV *MaxExitCount = SE->getExitCount(
1642
        L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1643
    if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1644
      // Okay, we do not know the exit count here. Can we at least prove that it
1645
      // will remain the same within iteration space?
1646
      auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1647
      auto OptimizeCond = [&](bool SkipLastIter) {
1648
        return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1649
                                                    MaxBECount, SkipLastIter,
1650
                                                    SE, Rewriter, DeadInsts);
1651
      };
1652

1653
      // TODO: We might have proved that we can skip the last iteration for
1654
      // this check. In this case, we only want to check the condition on the
1655
      // pre-last iteration (MaxBECount - 1). However, there is a nasty
1656
      // corner case:
1657
      //
1658
      //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1659
      //
1660
      // If we could not prove that len != 0, then we also could not prove that
1661
      // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1662
      // OptimizeCond will likely not prove anything for it, even if it could
1663
      // prove the same fact for len.
1664
      //
1665
      // As a temporary solution, we query both last and pre-last iterations in
1666
      // hope that we will be able to prove triviality for at least one of
1667
      // them. We can stop querying MaxBECount for this case once SCEV
1668
      // understands that (MaxBECount - 1) will not overflow here.
1669
      if (OptimizeCond(false))
1670
        Changed = true;
1671
      else if (SkipLastIter && OptimizeCond(true))
1672
        Changed = true;
1673
      UpdateSkipLastIter(MaxExitCount);
1674
      continue;
1675
    }
1676

1677
    UpdateSkipLastIter(ExactExitCount);
1678

1679
    // If we know we'd exit on the first iteration, rewrite the exit to
1680
    // reflect this.  This does not imply the loop must exit through this
1681
    // exit; there may be an earlier one taken on the first iteration.
1682
    // We know that the backedge can't be taken, so we replace all
1683
    // the header PHIs with values coming from the preheader.
1684
    if (ExactExitCount->isZero()) {
1685
      foldExit(L, ExitingBB, true, DeadInsts);
1686
      replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1687
      Changed = true;
1688
      continue;
1689
    }
1690

1691
    assert(ExactExitCount->getType()->isIntegerTy() &&
1692
           MaxBECount->getType()->isIntegerTy() &&
1693
           "Exit counts must be integers");
1694

1695
    Type *WiderType =
1696
        SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1697
    ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1698
    MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1699
    assert(MaxBECount->getType() == ExactExitCount->getType());
1700

1701
    // Can we prove that some other exit must be taken strictly before this
1702
    // one?
1703
    if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1704
                                     ExactExitCount)) {
1705
      foldExit(L, ExitingBB, false, DeadInsts);
1706
      Changed = true;
1707
      continue;
1708
    }
1709

1710
    // As we run, keep track of which exit counts we've encountered.  If we
1711
    // find a duplicate, we've found an exit which would have exited on the
1712
    // exiting iteration, but (from the visit order) strictly follows another
1713
    // which does the same and is thus dead.
1714
    if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1715
      foldExit(L, ExitingBB, false, DeadInsts);
1716
      Changed = true;
1717
      continue;
1718
    }
1719

1720
    // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1721
    // here.  If we kept track of the min of dominanting exits so far, we could
1722
    // discharge exits with EC >= MDEC. This is less powerful than the existing
1723
    // transform (since later exits aren't considered), but potentially more
1724
    // powerful for any case where SCEV can prove a >=u b, but neither a == b
1725
    // or a >u b.  Such a case is not currently known.
1726
  }
1727
  return Changed;
1728
}
1729

1730
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1731
  SmallVector<BasicBlock*, 16> ExitingBlocks;
1732
  L->getExitingBlocks(ExitingBlocks);
1733

1734
  // Finally, see if we can rewrite our exit conditions into a loop invariant
1735
  // form. If we have a read-only loop, and we can tell that we must exit down
1736
  // a path which does not need any of the values computed within the loop, we
1737
  // can rewrite the loop to exit on the first iteration.  Note that this
1738
  // doesn't either a) tell us the loop exits on the first iteration (unless
1739
  // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1740
  // This transformation looks a lot like a restricted form of dead loop
1741
  // elimination, but restricted to read-only loops and without neccesssarily
1742
  // needing to kill the loop entirely.
1743
  if (!LoopPredication)
1744
    return false;
1745

1746
  // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1747
  // through *explicit* control flow.  We have to eliminate the possibility of
1748
  // implicit exits (see below) before we know it's truly exact.
1749
  const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1750
  if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1751
    return false;
1752

1753
  assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1754
  assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1755

1756
  auto BadExit = [&](BasicBlock *ExitingBB) {
1757
    // If our exiting block exits multiple loops, we can only rewrite the
1758
    // innermost one.  Otherwise, we're changing how many times the innermost
1759
    // loop runs before it exits.
1760
    if (LI->getLoopFor(ExitingBB) != L)
1761
      return true;
1762

1763
    // Can't rewrite non-branch yet.
1764
    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1765
    if (!BI)
1766
      return true;
1767

1768
    // If already constant, nothing to do.
1769
    if (isa<Constant>(BI->getCondition()))
1770
      return true;
1771

1772
    // If the exit block has phis, we need to be able to compute the values
1773
    // within the loop which contains them.  This assumes trivially lcssa phis
1774
    // have already been removed; TODO: generalize
1775
    BasicBlock *ExitBlock =
1776
    BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1777
    if (!ExitBlock->phis().empty())
1778
      return true;
1779

1780
    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1781
    if (isa<SCEVCouldNotCompute>(ExitCount) ||
1782
        !Rewriter.isSafeToExpand(ExitCount))
1783
      return true;
1784

1785
    assert(SE->isLoopInvariant(ExitCount, L) &&
1786
           "Exit count must be loop invariant");
1787
    assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1788
    return false;
1789
  };
1790

1791
  // If we have any exits which can't be predicated themselves, than we can't
1792
  // predicate any exit which isn't guaranteed to execute before it.  Consider
1793
  // two exits (a) and (b) which would both exit on the same iteration.  If we
1794
  // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1795
  // we could convert a loop from exiting through (a) to one exiting through
1796
  // (b).  Note that this problem exists only for exits with the same exit
1797
  // count, and we could be more aggressive when exit counts are known inequal.
1798
  llvm::sort(ExitingBlocks,
1799
            [&](BasicBlock *A, BasicBlock *B) {
1800
              // std::sort sorts in ascending order, so we want the inverse of
1801
              // the normal dominance relation, plus a tie breaker for blocks
1802
              // unordered by dominance.
1803
              if (DT->properlyDominates(A, B)) return true;
1804
              if (DT->properlyDominates(B, A)) return false;
1805
              return A->getName() < B->getName();
1806
            });
1807
  // Check to see if our exit blocks are a total order (i.e. a linear chain of
1808
  // exits before the backedge).  If they aren't, reasoning about reachability
1809
  // is complicated and we choose not to for now.
1810
  for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1811
    if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1812
      return false;
1813

1814
  // Given our sorted total order, we know that exit[j] must be evaluated
1815
  // after all exit[i] such j > i.
1816
  for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1817
    if (BadExit(ExitingBlocks[i])) {
1818
      ExitingBlocks.resize(i);
1819
      break;
1820
    }
1821

1822
  if (ExitingBlocks.empty())
1823
    return false;
1824

1825
  // We rely on not being able to reach an exiting block on a later iteration
1826
  // then it's statically compute exit count.  The implementaton of
1827
  // getExitCount currently has this invariant, but assert it here so that
1828
  // breakage is obvious if this ever changes..
1829
  assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1830
        return DT->dominates(ExitingBB, L->getLoopLatch());
1831
      }));
1832

1833
  // At this point, ExitingBlocks consists of only those blocks which are
1834
  // predicatable.  Given that, we know we have at least one exit we can
1835
  // predicate if the loop is doesn't have side effects and doesn't have any
1836
  // implicit exits (because then our exact BTC isn't actually exact).
1837
  // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1838
  // suggestions on how to improve this?  I can obviously bail out for outer
1839
  // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1840
  // is that enough for *all* side effects?
1841
  for (BasicBlock *BB : L->blocks())
1842
    for (auto &I : *BB)
1843
      // TODO:isGuaranteedToTransfer
1844
      if (I.mayHaveSideEffects())
1845
        return false;
1846

1847
  bool Changed = false;
1848
  // Finally, do the actual predication for all predicatable blocks.  A couple
1849
  // of notes here:
1850
  // 1) We don't bother to constant fold dominated exits with identical exit
1851
  //    counts; that's simply a form of CSE/equality propagation and we leave
1852
  //    it for dedicated passes.
1853
  // 2) We insert the comparison at the branch.  Hoisting introduces additional
1854
  //    legality constraints and we leave that to dedicated logic.  We want to
1855
  //    predicate even if we can't insert a loop invariant expression as
1856
  //    peeling or unrolling will likely reduce the cost of the otherwise loop
1857
  //    varying check.
1858
  Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1859
  IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1860
  Value *ExactBTCV = nullptr; // Lazily generated if needed.
1861
  for (BasicBlock *ExitingBB : ExitingBlocks) {
1862
    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1863

1864
    auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1865
    Value *NewCond;
1866
    if (ExitCount == ExactBTC) {
1867
      NewCond = L->contains(BI->getSuccessor(0)) ?
1868
        B.getFalse() : B.getTrue();
1869
    } else {
1870
      Value *ECV = Rewriter.expandCodeFor(ExitCount);
1871
      if (!ExactBTCV)
1872
        ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1873
      Value *RHS = ExactBTCV;
1874
      if (ECV->getType() != RHS->getType()) {
1875
        Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1876
        ECV = B.CreateZExt(ECV, WiderTy);
1877
        RHS = B.CreateZExt(RHS, WiderTy);
1878
      }
1879
      auto Pred = L->contains(BI->getSuccessor(0)) ?
1880
        ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1881
      NewCond = B.CreateICmp(Pred, ECV, RHS);
1882
    }
1883
    Value *OldCond = BI->getCondition();
1884
    BI->setCondition(NewCond);
1885
    if (OldCond->use_empty())
1886
      DeadInsts.emplace_back(OldCond);
1887
    Changed = true;
1888
    RunUnswitching = true;
1889
  }
1890

1891
  return Changed;
1892
}
1893

1894
//===----------------------------------------------------------------------===//
1895
//  IndVarSimplify driver. Manage several subpasses of IV simplification.
1896
//===----------------------------------------------------------------------===//
1897

1898
bool IndVarSimplify::run(Loop *L) {
1899
  // We need (and expect!) the incoming loop to be in LCSSA.
1900
  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1901
         "LCSSA required to run indvars!");
1902

1903
  // If LoopSimplify form is not available, stay out of trouble. Some notes:
1904
  //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1905
  //    canonicalization can be a pessimization without LSR to "clean up"
1906
  //    afterwards.
1907
  //  - We depend on having a preheader; in particular,
1908
  //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1909
  //    and we're in trouble if we can't find the induction variable even when
1910
  //    we've manually inserted one.
1911
  //  - LFTR relies on having a single backedge.
1912
  if (!L->isLoopSimplifyForm())
1913
    return false;
1914

1915
  bool Changed = false;
1916
  // If there are any floating-point recurrences, attempt to
1917
  // transform them to use integer recurrences.
1918
  Changed |= rewriteNonIntegerIVs(L);
1919

1920
  // Create a rewriter object which we'll use to transform the code with.
1921
  SCEVExpander Rewriter(*SE, DL, "indvars");
1922
#ifndef NDEBUG
1923
  Rewriter.setDebugType(DEBUG_TYPE);
1924
#endif
1925

1926
  // Eliminate redundant IV users.
1927
  //
1928
  // Simplification works best when run before other consumers of SCEV. We
1929
  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1930
  // other expressions involving loop IVs have been evaluated. This helps SCEV
1931
  // set no-wrap flags before normalizing sign/zero extension.
1932
  Rewriter.disableCanonicalMode();
1933
  Changed |= simplifyAndExtend(L, Rewriter, LI);
1934

1935
  // Check to see if we can compute the final value of any expressions
1936
  // that are recurrent in the loop, and substitute the exit values from the
1937
  // loop into any instructions outside of the loop that use the final values
1938
  // of the current expressions.
1939
  if (ReplaceExitValue != NeverRepl) {
1940
    if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1941
                                             ReplaceExitValue, DeadInsts)) {
1942
      NumReplaced += Rewrites;
1943
      Changed = true;
1944
    }
1945
  }
1946

1947
  // Eliminate redundant IV cycles.
1948
  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1949

1950
  // Try to convert exit conditions to unsigned and rotate computation
1951
  // out of the loop.  Note: Handles invalidation internally if needed.
1952
  Changed |= canonicalizeExitCondition(L);
1953

1954
  // Try to eliminate loop exits based on analyzeable exit counts
1955
  if (optimizeLoopExits(L, Rewriter))  {
1956
    Changed = true;
1957
    // Given we've changed exit counts, notify SCEV
1958
    // Some nested loops may share same folded exit basic block,
1959
    // thus we need to notify top most loop.
1960
    SE->forgetTopmostLoop(L);
1961
  }
1962

1963
  // Try to form loop invariant tests for loop exits by changing how many
1964
  // iterations of the loop run when that is unobservable.
1965
  if (predicateLoopExits(L, Rewriter)) {
1966
    Changed = true;
1967
    // Given we've changed exit counts, notify SCEV
1968
    SE->forgetLoop(L);
1969
  }
1970

1971
  // If we have a trip count expression, rewrite the loop's exit condition
1972
  // using it.
1973
  if (!DisableLFTR) {
1974
    BasicBlock *PreHeader = L->getLoopPreheader();
1975

1976
    SmallVector<BasicBlock*, 16> ExitingBlocks;
1977
    L->getExitingBlocks(ExitingBlocks);
1978
    for (BasicBlock *ExitingBB : ExitingBlocks) {
1979
      // Can't rewrite non-branch yet.
1980
      if (!isa<BranchInst>(ExitingBB->getTerminator()))
1981
        continue;
1982

1983
      // If our exitting block exits multiple loops, we can only rewrite the
1984
      // innermost one.  Otherwise, we're changing how many times the innermost
1985
      // loop runs before it exits.
1986
      if (LI->getLoopFor(ExitingBB) != L)
1987
        continue;
1988

1989
      if (!needsLFTR(L, ExitingBB))
1990
        continue;
1991

1992
      const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1993
      if (isa<SCEVCouldNotCompute>(ExitCount))
1994
        continue;
1995

1996
      // This was handled above, but as we form SCEVs, we can sometimes refine
1997
      // existing ones; this allows exit counts to be folded to zero which
1998
      // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1999
      // until stable to handle cases like this better.
2000
      if (ExitCount->isZero())
2001
        continue;
2002

2003
      PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2004
      if (!IndVar)
2005
        continue;
2006

2007
      // Avoid high cost expansions.  Note: This heuristic is questionable in
2008
      // that our definition of "high cost" is not exactly principled.
2009
      if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2010
                                       TTI, PreHeader->getTerminator()))
2011
        continue;
2012

2013
      if (!Rewriter.isSafeToExpand(ExitCount))
2014
        continue;
2015

2016
      Changed |= linearFunctionTestReplace(L, ExitingBB,
2017
                                           ExitCount, IndVar,
2018
                                           Rewriter);
2019
    }
2020
  }
2021
  // Clear the rewriter cache, because values that are in the rewriter's cache
2022
  // can be deleted in the loop below, causing the AssertingVH in the cache to
2023
  // trigger.
2024
  Rewriter.clear();
2025

2026
  // Now that we're done iterating through lists, clean up any instructions
2027
  // which are now dead.
2028
  while (!DeadInsts.empty()) {
2029
    Value *V = DeadInsts.pop_back_val();
2030

2031
    if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2032
      Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2033
    else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2034
      Changed |=
2035
          RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2036
  }
2037

2038
  // The Rewriter may not be used from this point on.
2039

2040
  // Loop-invariant instructions in the preheader that aren't used in the
2041
  // loop may be sunk below the loop to reduce register pressure.
2042
  Changed |= sinkUnusedInvariants(L);
2043

2044
  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2045
  // trip count and therefore can further simplify exit values in addition to
2046
  // rewriteLoopExitValues.
2047
  Changed |= rewriteFirstIterationLoopExitValues(L);
2048

2049
  // Clean up dead instructions.
2050
  Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2051

2052
  // Check a post-condition.
2053
  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2054
         "Indvars did not preserve LCSSA!");
2055
  if (VerifyMemorySSA && MSSAU)
2056
    MSSAU->getMemorySSA()->verifyMemorySSA();
2057

2058
  return Changed;
2059
}
2060

2061
PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2062
                                          LoopStandardAnalysisResults &AR,
2063
                                          LPMUpdater &) {
2064
  Function *F = L.getHeader()->getParent();
2065
  const DataLayout &DL = F->getDataLayout();
2066

2067
  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2068
                     WidenIndVars && AllowIVWidening);
2069
  if (!IVS.run(&L))
2070
    return PreservedAnalyses::all();
2071

2072
  auto PA = getLoopPassPreservedAnalyses();
2073
  PA.preserveSet<CFGAnalyses>();
2074
  if (IVS.runUnswitching()) {
2075
    AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
2076
    PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2077
  }
2078

2079
  if (AR.MSSA)
2080
    PA.preserve<MemorySSAAnalysis>();
2081
  return PA;
2082
}
2083

2084
Product

Resources

Company