Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
35269 views
1
//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This transformation analyzes and transforms the induction variables (and
10
// computations derived from them) into simpler forms suitable for subsequent
11
// analysis and transformation.
12
//
13
// If the trip count of a loop is computable, this pass also makes the following
14
// changes:
15
// 1. The exit condition for the loop is canonicalized to compare the
16
// induction value against the exit value. This turns loops like:
17
// 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18
// 2. Any use outside of the loop of an expression derived from the indvar
19
// is changed to compute the derived value outside of the loop, eliminating
20
// the dependence on the exit value of the induction variable. If the only
21
// purpose of the loop is to compute the exit value of some derived
22
// expression, this transformation will make the loop dead.
23
//
24
//===----------------------------------------------------------------------===//
25
26
#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27
#include "llvm/ADT/APFloat.h"
28
#include "llvm/ADT/ArrayRef.h"
29
#include "llvm/ADT/STLExtras.h"
30
#include "llvm/ADT/SmallPtrSet.h"
31
#include "llvm/ADT/SmallSet.h"
32
#include "llvm/ADT/SmallVector.h"
33
#include "llvm/ADT/Statistic.h"
34
#include "llvm/ADT/iterator_range.h"
35
#include "llvm/Analysis/LoopInfo.h"
36
#include "llvm/Analysis/LoopPass.h"
37
#include "llvm/Analysis/MemorySSA.h"
38
#include "llvm/Analysis/MemorySSAUpdater.h"
39
#include "llvm/Analysis/ScalarEvolution.h"
40
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41
#include "llvm/Analysis/TargetLibraryInfo.h"
42
#include "llvm/Analysis/TargetTransformInfo.h"
43
#include "llvm/Analysis/ValueTracking.h"
44
#include "llvm/IR/BasicBlock.h"
45
#include "llvm/IR/Constant.h"
46
#include "llvm/IR/ConstantRange.h"
47
#include "llvm/IR/Constants.h"
48
#include "llvm/IR/DataLayout.h"
49
#include "llvm/IR/DerivedTypes.h"
50
#include "llvm/IR/Dominators.h"
51
#include "llvm/IR/Function.h"
52
#include "llvm/IR/IRBuilder.h"
53
#include "llvm/IR/InstrTypes.h"
54
#include "llvm/IR/Instruction.h"
55
#include "llvm/IR/Instructions.h"
56
#include "llvm/IR/IntrinsicInst.h"
57
#include "llvm/IR/Intrinsics.h"
58
#include "llvm/IR/Module.h"
59
#include "llvm/IR/Operator.h"
60
#include "llvm/IR/PassManager.h"
61
#include "llvm/IR/PatternMatch.h"
62
#include "llvm/IR/Type.h"
63
#include "llvm/IR/Use.h"
64
#include "llvm/IR/User.h"
65
#include "llvm/IR/Value.h"
66
#include "llvm/IR/ValueHandle.h"
67
#include "llvm/Support/Casting.h"
68
#include "llvm/Support/CommandLine.h"
69
#include "llvm/Support/Compiler.h"
70
#include "llvm/Support/Debug.h"
71
#include "llvm/Support/MathExtras.h"
72
#include "llvm/Support/raw_ostream.h"
73
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
74
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
75
#include "llvm/Transforms/Utils/Local.h"
76
#include "llvm/Transforms/Utils/LoopUtils.h"
77
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
78
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
79
#include <cassert>
80
#include <cstdint>
81
#include <utility>
82
83
using namespace llvm;
84
using namespace PatternMatch;
85
86
#define DEBUG_TYPE "indvars"
87
88
STATISTIC(NumWidened , "Number of indvars widened");
89
STATISTIC(NumReplaced , "Number of exit values replaced");
90
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
91
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
92
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
93
94
static cl::opt<ReplaceExitVal> ReplaceExitValue(
95
"replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
96
cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
97
cl::values(
98
clEnumValN(NeverRepl, "never", "never replace exit value"),
99
clEnumValN(OnlyCheapRepl, "cheap",
100
"only replace exit value when the cost is cheap"),
101
clEnumValN(
102
UnusedIndVarInLoop, "unusedindvarinloop",
103
"only replace exit value when it is an unused "
104
"induction variable in the loop and has cheap replacement cost"),
105
clEnumValN(NoHardUse, "noharduse",
106
"only replace exit values when loop def likely dead"),
107
clEnumValN(AlwaysRepl, "always",
108
"always replace exit value whenever possible")));
109
110
static cl::opt<bool> UsePostIncrementRanges(
111
"indvars-post-increment-ranges", cl::Hidden,
112
cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
113
cl::init(true));
114
115
static cl::opt<bool>
116
DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
117
cl::desc("Disable Linear Function Test Replace optimization"));
118
119
static cl::opt<bool>
120
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
121
cl::desc("Predicate conditions in read only loops"));
122
123
static cl::opt<bool>
124
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
125
cl::desc("Allow widening of indvars to eliminate s/zext"));
126
127
namespace {
128
129
class IndVarSimplify {
130
LoopInfo *LI;
131
ScalarEvolution *SE;
132
DominatorTree *DT;
133
const DataLayout &DL;
134
TargetLibraryInfo *TLI;
135
const TargetTransformInfo *TTI;
136
std::unique_ptr<MemorySSAUpdater> MSSAU;
137
138
SmallVector<WeakTrackingVH, 16> DeadInsts;
139
bool WidenIndVars;
140
141
bool RunUnswitching = false;
142
143
bool handleFloatingPointIV(Loop *L, PHINode *PH);
144
bool rewriteNonIntegerIVs(Loop *L);
145
146
bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147
/// Try to improve our exit conditions by converting condition from signed
148
/// to unsigned or rotating computation out of the loop.
149
/// (See inline comment about why this is duplicated from simplifyAndExtend)
150
bool canonicalizeExitCondition(Loop *L);
151
/// Try to eliminate loop exits based on analyzeable exit counts
152
bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153
/// Try to form loop invariant tests for loop exits by changing how many
154
/// iterations of the loop run when that is unobservable.
155
bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156
157
bool rewriteFirstIterationLoopExitValues(Loop *L);
158
159
bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160
const SCEV *ExitCount,
161
PHINode *IndVar, SCEVExpander &Rewriter);
162
163
bool sinkUnusedInvariants(Loop *L);
164
165
public:
166
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167
const DataLayout &DL, TargetLibraryInfo *TLI,
168
TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169
: LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170
WidenIndVars(WidenIndVars) {
171
if (MSSA)
172
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173
}
174
175
bool run(Loop *L);
176
177
bool runUnswitching() const { return RunUnswitching; }
178
};
179
180
} // end anonymous namespace
181
182
//===----------------------------------------------------------------------===//
183
// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
184
//===----------------------------------------------------------------------===//
185
186
/// Convert APF to an integer, if possible.
187
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
188
bool isExact = false;
189
// See if we can convert this to an int64_t
190
uint64_t UIntVal;
191
if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
192
APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
193
!isExact)
194
return false;
195
IntVal = UIntVal;
196
return true;
197
}
198
199
/// If the loop has floating induction variable then insert corresponding
200
/// integer induction variable if possible.
201
/// For example,
202
/// for(double i = 0; i < 10000; ++i)
203
/// bar(i)
204
/// is converted into
205
/// for(int i = 0; i < 10000; ++i)
206
/// bar((double)i);
207
bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
208
unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
209
unsigned BackEdge = IncomingEdge^1;
210
211
// Check incoming value.
212
auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
213
214
int64_t InitValue;
215
if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
216
return false;
217
218
// Check IV increment. Reject this PN if increment operation is not
219
// an add or increment value can not be represented by an integer.
220
auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
221
if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
222
223
// If this is not an add of the PHI with a constantfp, or if the constant fp
224
// is not an integer, bail out.
225
ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
226
int64_t IncValue;
227
if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
228
!ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
229
return false;
230
231
// Check Incr uses. One user is PN and the other user is an exit condition
232
// used by the conditional terminator.
233
Value::user_iterator IncrUse = Incr->user_begin();
234
Instruction *U1 = cast<Instruction>(*IncrUse++);
235
if (IncrUse == Incr->user_end()) return false;
236
Instruction *U2 = cast<Instruction>(*IncrUse++);
237
if (IncrUse != Incr->user_end()) return false;
238
239
// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
240
// only used by a branch, we can't transform it.
241
FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
242
if (!Compare)
243
Compare = dyn_cast<FCmpInst>(U2);
244
if (!Compare || !Compare->hasOneUse() ||
245
!isa<BranchInst>(Compare->user_back()))
246
return false;
247
248
BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
249
250
// We need to verify that the branch actually controls the iteration count
251
// of the loop. If not, the new IV can overflow and no one will notice.
252
// The branch block must be in the loop and one of the successors must be out
253
// of the loop.
254
assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
255
if (!L->contains(TheBr->getParent()) ||
256
(L->contains(TheBr->getSuccessor(0)) &&
257
L->contains(TheBr->getSuccessor(1))))
258
return false;
259
260
// If it isn't a comparison with an integer-as-fp (the exit value), we can't
261
// transform it.
262
ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
263
int64_t ExitValue;
264
if (ExitValueVal == nullptr ||
265
!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
266
return false;
267
268
// Find new predicate for integer comparison.
269
CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
270
switch (Compare->getPredicate()) {
271
default: return false; // Unknown comparison.
272
case CmpInst::FCMP_OEQ:
273
case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
274
case CmpInst::FCMP_ONE:
275
case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
276
case CmpInst::FCMP_OGT:
277
case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
278
case CmpInst::FCMP_OGE:
279
case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
280
case CmpInst::FCMP_OLT:
281
case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
282
case CmpInst::FCMP_OLE:
283
case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
284
}
285
286
// We convert the floating point induction variable to a signed i32 value if
287
// we can. This is only safe if the comparison will not overflow in a way
288
// that won't be trapped by the integer equivalent operations. Check for this
289
// now.
290
// TODO: We could use i64 if it is native and the range requires it.
291
292
// The start/stride/exit values must all fit in signed i32.
293
if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
294
return false;
295
296
// If not actually striding (add x, 0.0), avoid touching the code.
297
if (IncValue == 0)
298
return false;
299
300
// Positive and negative strides have different safety conditions.
301
if (IncValue > 0) {
302
// If we have a positive stride, we require the init to be less than the
303
// exit value.
304
if (InitValue >= ExitValue)
305
return false;
306
307
uint32_t Range = uint32_t(ExitValue-InitValue);
308
// Check for infinite loop, either:
309
// while (i <= Exit) or until (i > Exit)
310
if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
311
if (++Range == 0) return false; // Range overflows.
312
}
313
314
unsigned Leftover = Range % uint32_t(IncValue);
315
316
// If this is an equality comparison, we require that the strided value
317
// exactly land on the exit value, otherwise the IV condition will wrap
318
// around and do things the fp IV wouldn't.
319
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
320
Leftover != 0)
321
return false;
322
323
// If the stride would wrap around the i32 before exiting, we can't
324
// transform the IV.
325
if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
326
return false;
327
} else {
328
// If we have a negative stride, we require the init to be greater than the
329
// exit value.
330
if (InitValue <= ExitValue)
331
return false;
332
333
uint32_t Range = uint32_t(InitValue-ExitValue);
334
// Check for infinite loop, either:
335
// while (i >= Exit) or until (i < Exit)
336
if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
337
if (++Range == 0) return false; // Range overflows.
338
}
339
340
unsigned Leftover = Range % uint32_t(-IncValue);
341
342
// If this is an equality comparison, we require that the strided value
343
// exactly land on the exit value, otherwise the IV condition will wrap
344
// around and do things the fp IV wouldn't.
345
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
346
Leftover != 0)
347
return false;
348
349
// If the stride would wrap around the i32 before exiting, we can't
350
// transform the IV.
351
if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
352
return false;
353
}
354
355
IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
356
357
// Insert new integer induction variable.
358
PHINode *NewPHI =
359
PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
360
NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
361
PN->getIncomingBlock(IncomingEdge));
362
NewPHI->setDebugLoc(PN->getDebugLoc());
363
364
Instruction *NewAdd =
365
BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
366
Incr->getName() + ".int", Incr->getIterator());
367
NewAdd->setDebugLoc(Incr->getDebugLoc());
368
NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
369
370
ICmpInst *NewCompare =
371
new ICmpInst(TheBr->getIterator(), NewPred, NewAdd,
372
ConstantInt::get(Int32Ty, ExitValue), Compare->getName());
373
NewCompare->setDebugLoc(Compare->getDebugLoc());
374
375
// In the following deletions, PN may become dead and may be deleted.
376
// Use a WeakTrackingVH to observe whether this happens.
377
WeakTrackingVH WeakPH = PN;
378
379
// Delete the old floating point exit comparison. The branch starts using the
380
// new comparison.
381
NewCompare->takeName(Compare);
382
Compare->replaceAllUsesWith(NewCompare);
383
RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
384
385
// Delete the old floating point increment.
386
Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
387
RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
388
389
// If the FP induction variable still has uses, this is because something else
390
// in the loop uses its value. In order to canonicalize the induction
391
// variable, we chose to eliminate the IV and rewrite it in terms of an
392
// int->fp cast.
393
//
394
// We give preference to sitofp over uitofp because it is faster on most
395
// platforms.
396
if (WeakPH) {
397
Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
398
PN->getParent()->getFirstInsertionPt());
399
Conv->setDebugLoc(PN->getDebugLoc());
400
PN->replaceAllUsesWith(Conv);
401
RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
402
}
403
return true;
404
}
405
406
bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
407
// First step. Check to see if there are any floating-point recurrences.
408
// If there are, change them into integer recurrences, permitting analysis by
409
// the SCEV routines.
410
BasicBlock *Header = L->getHeader();
411
412
SmallVector<WeakTrackingVH, 8> PHIs;
413
for (PHINode &PN : Header->phis())
414
PHIs.push_back(&PN);
415
416
bool Changed = false;
417
for (WeakTrackingVH &PHI : PHIs)
418
if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
419
Changed |= handleFloatingPointIV(L, PN);
420
421
// If the loop previously had floating-point IV, ScalarEvolution
422
// may not have been able to compute a trip count. Now that we've done some
423
// re-writing, the trip count may be computable.
424
if (Changed)
425
SE->forgetLoop(L);
426
return Changed;
427
}
428
429
//===---------------------------------------------------------------------===//
430
// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
431
// they will exit at the first iteration.
432
//===---------------------------------------------------------------------===//
433
434
/// Check to see if this loop has loop invariant conditions which lead to loop
435
/// exits. If so, we know that if the exit path is taken, it is at the first
436
/// loop iteration. This lets us predict exit values of PHI nodes that live in
437
/// loop header.
438
bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
439
// Verify the input to the pass is already in LCSSA form.
440
assert(L->isLCSSAForm(*DT));
441
442
SmallVector<BasicBlock *, 8> ExitBlocks;
443
L->getUniqueExitBlocks(ExitBlocks);
444
445
bool MadeAnyChanges = false;
446
for (auto *ExitBB : ExitBlocks) {
447
// If there are no more PHI nodes in this exit block, then no more
448
// values defined inside the loop are used on this path.
449
for (PHINode &PN : ExitBB->phis()) {
450
for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
451
IncomingValIdx != E; ++IncomingValIdx) {
452
auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
453
454
// Can we prove that the exit must run on the first iteration if it
455
// runs at all? (i.e. early exits are fine for our purposes, but
456
// traces which lead to this exit being taken on the 2nd iteration
457
// aren't.) Note that this is about whether the exit branch is
458
// executed, not about whether it is taken.
459
if (!L->getLoopLatch() ||
460
!DT->dominates(IncomingBB, L->getLoopLatch()))
461
continue;
462
463
// Get condition that leads to the exit path.
464
auto *TermInst = IncomingBB->getTerminator();
465
466
Value *Cond = nullptr;
467
if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
468
// Must be a conditional branch, otherwise the block
469
// should not be in the loop.
470
Cond = BI->getCondition();
471
} else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
472
Cond = SI->getCondition();
473
else
474
continue;
475
476
if (!L->isLoopInvariant(Cond))
477
continue;
478
479
auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
480
481
// Only deal with PHIs in the loop header.
482
if (!ExitVal || ExitVal->getParent() != L->getHeader())
483
continue;
484
485
// If ExitVal is a PHI on the loop header, then we know its
486
// value along this exit because the exit can only be taken
487
// on the first iteration.
488
auto *LoopPreheader = L->getLoopPreheader();
489
assert(LoopPreheader && "Invalid loop");
490
int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
491
if (PreheaderIdx != -1) {
492
assert(ExitVal->getParent() == L->getHeader() &&
493
"ExitVal must be in loop header");
494
MadeAnyChanges = true;
495
PN.setIncomingValue(IncomingValIdx,
496
ExitVal->getIncomingValue(PreheaderIdx));
497
SE->forgetValue(&PN);
498
}
499
}
500
}
501
}
502
return MadeAnyChanges;
503
}
504
505
//===----------------------------------------------------------------------===//
506
// IV Widening - Extend the width of an IV to cover its widest uses.
507
//===----------------------------------------------------------------------===//
508
509
/// Update information about the induction variable that is extended by this
510
/// sign or zero extend operation. This is used to determine the final width of
511
/// the IV before actually widening it.
512
static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
513
ScalarEvolution *SE,
514
const TargetTransformInfo *TTI) {
515
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
516
if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
517
return;
518
519
Type *Ty = Cast->getType();
520
uint64_t Width = SE->getTypeSizeInBits(Ty);
521
if (!Cast->getDataLayout().isLegalInteger(Width))
522
return;
523
524
// Check that `Cast` actually extends the induction variable (we rely on this
525
// later). This takes care of cases where `Cast` is extending a truncation of
526
// the narrow induction variable, and thus can end up being narrower than the
527
// "narrow" induction variable.
528
uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
529
if (NarrowIVWidth >= Width)
530
return;
531
532
// Cast is either an sext or zext up to this point.
533
// We should not widen an indvar if arithmetics on the wider indvar are more
534
// expensive than those on the narrower indvar. We check only the cost of ADD
535
// because at least an ADD is required to increment the induction variable. We
536
// could compute more comprehensively the cost of all instructions on the
537
// induction variable when necessary.
538
if (TTI &&
539
TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
540
TTI->getArithmeticInstrCost(Instruction::Add,
541
Cast->getOperand(0)->getType())) {
542
return;
543
}
544
545
if (!WI.WidestNativeType ||
546
Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
547
WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
548
WI.IsSigned = IsSigned;
549
return;
550
}
551
552
// We extend the IV to satisfy the sign of its user(s), or 'signed'
553
// if there are multiple users with both sign- and zero extensions,
554
// in order not to introduce nondeterministic behaviour based on the
555
// unspecified order of a PHI nodes' users-iterator.
556
WI.IsSigned |= IsSigned;
557
}
558
559
//===----------------------------------------------------------------------===//
560
// Live IV Reduction - Minimize IVs live across the loop.
561
//===----------------------------------------------------------------------===//
562
563
//===----------------------------------------------------------------------===//
564
// Simplification of IV users based on SCEV evaluation.
565
//===----------------------------------------------------------------------===//
566
567
namespace {
568
569
class IndVarSimplifyVisitor : public IVVisitor {
570
ScalarEvolution *SE;
571
const TargetTransformInfo *TTI;
572
PHINode *IVPhi;
573
574
public:
575
WideIVInfo WI;
576
577
IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
578
const TargetTransformInfo *TTI,
579
const DominatorTree *DTree)
580
: SE(SCEV), TTI(TTI), IVPhi(IV) {
581
DT = DTree;
582
WI.NarrowIV = IVPhi;
583
}
584
585
// Implement the interface used by simplifyUsersOfIV.
586
void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
587
};
588
589
} // end anonymous namespace
590
591
/// Iteratively perform simplification on a worklist of IV users. Each
592
/// successive simplification may push more users which may themselves be
593
/// candidates for simplification.
594
///
595
/// Sign/Zero extend elimination is interleaved with IV simplification.
596
bool IndVarSimplify::simplifyAndExtend(Loop *L,
597
SCEVExpander &Rewriter,
598
LoopInfo *LI) {
599
SmallVector<WideIVInfo, 8> WideIVs;
600
601
auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
602
Intrinsic::getName(Intrinsic::experimental_guard));
603
bool HasGuards = GuardDecl && !GuardDecl->use_empty();
604
605
SmallVector<PHINode *, 8> LoopPhis;
606
for (PHINode &PN : L->getHeader()->phis())
607
LoopPhis.push_back(&PN);
608
609
// Each round of simplification iterates through the SimplifyIVUsers worklist
610
// for all current phis, then determines whether any IVs can be
611
// widened. Widening adds new phis to LoopPhis, inducing another round of
612
// simplification on the wide IVs.
613
bool Changed = false;
614
while (!LoopPhis.empty()) {
615
// Evaluate as many IV expressions as possible before widening any IVs. This
616
// forces SCEV to set no-wrap flags before evaluating sign/zero
617
// extension. The first time SCEV attempts to normalize sign/zero extension,
618
// the result becomes final. So for the most predictable results, we delay
619
// evaluation of sign/zero extend evaluation until needed, and avoid running
620
// other SCEV based analysis prior to simplifyAndExtend.
621
do {
622
PHINode *CurrIV = LoopPhis.pop_back_val();
623
624
// Information about sign/zero extensions of CurrIV.
625
IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
626
627
const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
628
Rewriter, &Visitor);
629
630
Changed |= C;
631
RunUnswitching |= U;
632
if (Visitor.WI.WidestNativeType) {
633
WideIVs.push_back(Visitor.WI);
634
}
635
} while(!LoopPhis.empty());
636
637
// Continue if we disallowed widening.
638
if (!WidenIndVars)
639
continue;
640
641
for (; !WideIVs.empty(); WideIVs.pop_back()) {
642
unsigned ElimExt;
643
unsigned Widened;
644
if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
645
DT, DeadInsts, ElimExt, Widened,
646
HasGuards, UsePostIncrementRanges)) {
647
NumElimExt += ElimExt;
648
NumWidened += Widened;
649
Changed = true;
650
LoopPhis.push_back(WidePhi);
651
}
652
}
653
}
654
return Changed;
655
}
656
657
//===----------------------------------------------------------------------===//
658
// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
659
//===----------------------------------------------------------------------===//
660
661
/// Given an Value which is hoped to be part of an add recurance in the given
662
/// loop, return the associated Phi node if so. Otherwise, return null. Note
663
/// that this is less general than SCEVs AddRec checking.
664
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
665
Instruction *IncI = dyn_cast<Instruction>(IncV);
666
if (!IncI)
667
return nullptr;
668
669
switch (IncI->getOpcode()) {
670
case Instruction::Add:
671
case Instruction::Sub:
672
break;
673
case Instruction::GetElementPtr:
674
// An IV counter must preserve its type.
675
if (IncI->getNumOperands() == 2)
676
break;
677
[[fallthrough]];
678
default:
679
return nullptr;
680
}
681
682
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
683
if (Phi && Phi->getParent() == L->getHeader()) {
684
if (L->isLoopInvariant(IncI->getOperand(1)))
685
return Phi;
686
return nullptr;
687
}
688
if (IncI->getOpcode() == Instruction::GetElementPtr)
689
return nullptr;
690
691
// Allow add/sub to be commuted.
692
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
693
if (Phi && Phi->getParent() == L->getHeader()) {
694
if (L->isLoopInvariant(IncI->getOperand(0)))
695
return Phi;
696
}
697
return nullptr;
698
}
699
700
/// Whether the current loop exit test is based on this value. Currently this
701
/// is limited to a direct use in the loop condition.
702
static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
703
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
704
ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
705
// TODO: Allow non-icmp loop test.
706
if (!ICmp)
707
return false;
708
709
// TODO: Allow indirect use.
710
return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
711
}
712
713
/// linearFunctionTestReplace policy. Return true unless we can show that the
714
/// current exit test is already sufficiently canonical.
715
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
716
assert(L->getLoopLatch() && "Must be in simplified form");
717
718
// Avoid converting a constant or loop invariant test back to a runtime
719
// test. This is critical for when SCEV's cached ExitCount is less precise
720
// than the current IR (such as after we've proven a particular exit is
721
// actually dead and thus the BE count never reaches our ExitCount.)
722
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
723
if (L->isLoopInvariant(BI->getCondition()))
724
return false;
725
726
// Do LFTR to simplify the exit condition to an ICMP.
727
ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
728
if (!Cond)
729
return true;
730
731
// Do LFTR to simplify the exit ICMP to EQ/NE
732
ICmpInst::Predicate Pred = Cond->getPredicate();
733
if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
734
return true;
735
736
// Look for a loop invariant RHS
737
Value *LHS = Cond->getOperand(0);
738
Value *RHS = Cond->getOperand(1);
739
if (!L->isLoopInvariant(RHS)) {
740
if (!L->isLoopInvariant(LHS))
741
return true;
742
std::swap(LHS, RHS);
743
}
744
// Look for a simple IV counter LHS
745
PHINode *Phi = dyn_cast<PHINode>(LHS);
746
if (!Phi)
747
Phi = getLoopPhiForCounter(LHS, L);
748
749
if (!Phi)
750
return true;
751
752
// Do LFTR if PHI node is defined in the loop, but is *not* a counter.
753
int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
754
if (Idx < 0)
755
return true;
756
757
// Do LFTR if the exit condition's IV is *not* a simple counter.
758
Value *IncV = Phi->getIncomingValue(Idx);
759
return Phi != getLoopPhiForCounter(IncV, L);
760
}
761
762
/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
763
/// down to checking that all operands are constant and listing instructions
764
/// that may hide undef.
765
static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
766
unsigned Depth) {
767
if (isa<Constant>(V))
768
return !isa<UndefValue>(V);
769
770
if (Depth >= 6)
771
return false;
772
773
// Conservatively handle non-constant non-instructions. For example, Arguments
774
// may be undef.
775
Instruction *I = dyn_cast<Instruction>(V);
776
if (!I)
777
return false;
778
779
// Load and return values may be undef.
780
if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
781
return false;
782
783
// Optimistically handle other instructions.
784
for (Value *Op : I->operands()) {
785
if (!Visited.insert(Op).second)
786
continue;
787
if (!hasConcreteDefImpl(Op, Visited, Depth+1))
788
return false;
789
}
790
return true;
791
}
792
793
/// Return true if the given value is concrete. We must prove that undef can
794
/// never reach it.
795
///
796
/// TODO: If we decide that this is a good approach to checking for undef, we
797
/// may factor it into a common location.
798
static bool hasConcreteDef(Value *V) {
799
SmallPtrSet<Value*, 8> Visited;
800
Visited.insert(V);
801
return hasConcreteDefImpl(V, Visited, 0);
802
}
803
804
/// Return true if the given phi is a "counter" in L. A counter is an
805
/// add recurance (of integer or pointer type) with an arbitrary start, and a
806
/// step of 1. Note that L must have exactly one latch.
807
static bool isLoopCounter(PHINode* Phi, Loop *L,
808
ScalarEvolution *SE) {
809
assert(Phi->getParent() == L->getHeader());
810
assert(L->getLoopLatch());
811
812
if (!SE->isSCEVable(Phi->getType()))
813
return false;
814
815
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
816
if (!AR || AR->getLoop() != L || !AR->isAffine())
817
return false;
818
819
const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
820
if (!Step || !Step->isOne())
821
return false;
822
823
int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
824
Value *IncV = Phi->getIncomingValue(LatchIdx);
825
return (getLoopPhiForCounter(IncV, L) == Phi &&
826
isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
827
}
828
829
/// Search the loop header for a loop counter (anadd rec w/step of one)
830
/// suitable for use by LFTR. If multiple counters are available, select the
831
/// "best" one based profitable heuristics.
832
///
833
/// BECount may be an i8* pointer type. The pointer difference is already
834
/// valid count without scaling the address stride, so it remains a pointer
835
/// expression as far as SCEV is concerned.
836
static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
837
const SCEV *BECount,
838
ScalarEvolution *SE, DominatorTree *DT) {
839
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
840
841
Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
842
843
// Loop over all of the PHI nodes, looking for a simple counter.
844
PHINode *BestPhi = nullptr;
845
const SCEV *BestInit = nullptr;
846
BasicBlock *LatchBlock = L->getLoopLatch();
847
assert(LatchBlock && "Must be in simplified form");
848
const DataLayout &DL = L->getHeader()->getDataLayout();
849
850
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
851
PHINode *Phi = cast<PHINode>(I);
852
if (!isLoopCounter(Phi, L, SE))
853
continue;
854
855
const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
856
857
// AR may be a pointer type, while BECount is an integer type.
858
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
859
// AR may not be a narrower type, or we may never exit.
860
uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
861
if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
862
continue;
863
864
// Avoid reusing a potentially undef value to compute other values that may
865
// have originally had a concrete definition.
866
if (!hasConcreteDef(Phi)) {
867
// We explicitly allow unknown phis as long as they are already used by
868
// the loop exit test. This is legal since performing LFTR could not
869
// increase the number of undef users.
870
Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
871
if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
872
!isLoopExitTestBasedOn(IncPhi, ExitingBB))
873
continue;
874
}
875
876
// Avoid introducing undefined behavior due to poison which didn't exist in
877
// the original program. (Annoyingly, the rules for poison and undef
878
// propagation are distinct, so this does NOT cover the undef case above.)
879
// We have to ensure that we don't introduce UB by introducing a use on an
880
// iteration where said IV produces poison. Our strategy here differs for
881
// pointers and integer IVs. For integers, we strip and reinfer as needed,
882
// see code in linearFunctionTestReplace. For pointers, we restrict
883
// transforms as there is no good way to reinfer inbounds once lost.
884
if (!Phi->getType()->isIntegerTy() &&
885
!mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
886
continue;
887
888
const SCEV *Init = AR->getStart();
889
890
if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
891
// Don't force a live loop counter if another IV can be used.
892
if (isAlmostDeadIV(Phi, LatchBlock, Cond))
893
continue;
894
895
// Prefer to count-from-zero. This is a more "canonical" counter form. It
896
// also prefers integer to pointer IVs.
897
if (BestInit->isZero() != Init->isZero()) {
898
if (BestInit->isZero())
899
continue;
900
}
901
// If two IVs both count from zero or both count from nonzero then the
902
// narrower is likely a dead phi that has been widened. Use the wider phi
903
// to allow the other to be eliminated.
904
else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
905
continue;
906
}
907
BestPhi = Phi;
908
BestInit = Init;
909
}
910
return BestPhi;
911
}
912
913
/// Insert an IR expression which computes the value held by the IV IndVar
914
/// (which must be an loop counter w/unit stride) after the backedge of loop L
915
/// is taken ExitCount times.
916
static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
917
const SCEV *ExitCount, bool UsePostInc, Loop *L,
918
SCEVExpander &Rewriter, ScalarEvolution *SE) {
919
assert(isLoopCounter(IndVar, L, SE));
920
assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
921
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
922
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
923
924
// For integer IVs, truncate the IV before computing the limit unless we
925
// know apriori that the limit must be a constant when evaluated in the
926
// bitwidth of the IV. We prefer (potentially) keeping a truncate of the
927
// IV in the loop over a (potentially) expensive expansion of the widened
928
// exit count add(zext(add)) expression.
929
if (IndVar->getType()->isIntegerTy() &&
930
SE->getTypeSizeInBits(AR->getType()) >
931
SE->getTypeSizeInBits(ExitCount->getType())) {
932
const SCEV *IVInit = AR->getStart();
933
if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
934
AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
935
}
936
937
const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
938
const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
939
assert(SE->isLoopInvariant(IVLimit, L) &&
940
"Computed iteration count is not loop invariant!");
941
return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
942
ExitingBB->getTerminator());
943
}
944
945
/// This method rewrites the exit condition of the loop to be a canonical !=
946
/// comparison against the incremented loop induction variable. This pass is
947
/// able to rewrite the exit tests of any loop where the SCEV analysis can
948
/// determine a loop-invariant trip count of the loop, which is actually a much
949
/// broader range than just linear tests.
950
bool IndVarSimplify::
951
linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
952
const SCEV *ExitCount,
953
PHINode *IndVar, SCEVExpander &Rewriter) {
954
assert(L->getLoopLatch() && "Loop no longer in simplified form?");
955
assert(isLoopCounter(IndVar, L, SE));
956
Instruction * const IncVar =
957
cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
958
959
// Initialize CmpIndVar to the preincremented IV.
960
Value *CmpIndVar = IndVar;
961
bool UsePostInc = false;
962
963
// If the exiting block is the same as the backedge block, we prefer to
964
// compare against the post-incremented value, otherwise we must compare
965
// against the preincremented value.
966
if (ExitingBB == L->getLoopLatch()) {
967
// For pointer IVs, we chose to not strip inbounds which requires us not
968
// to add a potentially UB introducing use. We need to either a) show
969
// the loop test we're modifying is already in post-inc form, or b) show
970
// that adding a use must not introduce UB.
971
bool SafeToPostInc =
972
IndVar->getType()->isIntegerTy() ||
973
isLoopExitTestBasedOn(IncVar, ExitingBB) ||
974
mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
975
if (SafeToPostInc) {
976
UsePostInc = true;
977
CmpIndVar = IncVar;
978
}
979
}
980
981
// It may be necessary to drop nowrap flags on the incrementing instruction
982
// if either LFTR moves from a pre-inc check to a post-inc check (in which
983
// case the increment might have previously been poison on the last iteration
984
// only) or if LFTR switches to a different IV that was previously dynamically
985
// dead (and as such may be arbitrarily poison). We remove any nowrap flags
986
// that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
987
// check), because the pre-inc addrec flags may be adopted from the original
988
// instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
989
// TODO: This handling is inaccurate for one case: If we switch to a
990
// dynamically dead IV that wraps on the first loop iteration only, which is
991
// not covered by the post-inc addrec. (If the new IV was not dynamically
992
// dead, it could not be poison on the first iteration in the first place.)
993
if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
994
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
995
if (BO->hasNoUnsignedWrap())
996
BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
997
if (BO->hasNoSignedWrap())
998
BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
999
}
1000
1001
Value *ExitCnt = genLoopLimit(
1002
IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1003
assert(ExitCnt->getType()->isPointerTy() ==
1004
IndVar->getType()->isPointerTy() &&
1005
"genLoopLimit missed a cast");
1006
1007
// Insert a new icmp_ne or icmp_eq instruction before the branch.
1008
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1009
ICmpInst::Predicate P;
1010
if (L->contains(BI->getSuccessor(0)))
1011
P = ICmpInst::ICMP_NE;
1012
else
1013
P = ICmpInst::ICMP_EQ;
1014
1015
IRBuilder<> Builder(BI);
1016
1017
// The new loop exit condition should reuse the debug location of the
1018
// original loop exit condition.
1019
if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1020
Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1021
1022
// For integer IVs, if we evaluated the limit in the narrower bitwidth to
1023
// avoid the expensive expansion of the limit expression in the wider type,
1024
// emit a truncate to narrow the IV to the ExitCount type. This is safe
1025
// since we know (from the exit count bitwidth), that we can't self-wrap in
1026
// the narrower type.
1027
unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1028
unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1029
if (CmpIndVarSize > ExitCntSize) {
1030
assert(!CmpIndVar->getType()->isPointerTy() &&
1031
!ExitCnt->getType()->isPointerTy());
1032
1033
// Before resorting to actually inserting the truncate, use the same
1034
// reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1035
// the other side of the comparison instead. We still evaluate the limit
1036
// in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1037
// a truncate within in.
1038
bool Extended = false;
1039
const SCEV *IV = SE->getSCEV(CmpIndVar);
1040
const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1041
const SCEV *ZExtTrunc =
1042
SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1043
1044
if (ZExtTrunc == IV) {
1045
Extended = true;
1046
ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1047
"wide.trip.count");
1048
} else {
1049
const SCEV *SExtTrunc =
1050
SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1051
if (SExtTrunc == IV) {
1052
Extended = true;
1053
ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1054
"wide.trip.count");
1055
}
1056
}
1057
1058
if (Extended) {
1059
bool Discard;
1060
L->makeLoopInvariant(ExitCnt, Discard);
1061
} else
1062
CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1063
"lftr.wideiv");
1064
}
1065
LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1066
<< " LHS:" << *CmpIndVar << '\n'
1067
<< " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1068
<< "\n"
1069
<< " RHS:\t" << *ExitCnt << "\n"
1070
<< "ExitCount:\t" << *ExitCount << "\n"
1071
<< " was: " << *BI->getCondition() << "\n");
1072
1073
Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1074
Value *OrigCond = BI->getCondition();
1075
// It's tempting to use replaceAllUsesWith here to fully replace the old
1076
// comparison, but that's not immediately safe, since users of the old
1077
// comparison may not be dominated by the new comparison. Instead, just
1078
// update the branch to use the new comparison; in the common case this
1079
// will make old comparison dead.
1080
BI->setCondition(Cond);
1081
DeadInsts.emplace_back(OrigCond);
1082
1083
++NumLFTR;
1084
return true;
1085
}
1086
1087
//===----------------------------------------------------------------------===//
1088
// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1089
//===----------------------------------------------------------------------===//
1090
1091
/// If there's a single exit block, sink any loop-invariant values that
1092
/// were defined in the preheader but not used inside the loop into the
1093
/// exit block to reduce register pressure in the loop.
1094
bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1095
BasicBlock *ExitBlock = L->getExitBlock();
1096
if (!ExitBlock) return false;
1097
1098
BasicBlock *Preheader = L->getLoopPreheader();
1099
if (!Preheader) return false;
1100
1101
bool MadeAnyChanges = false;
1102
BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1103
BasicBlock::iterator I(Preheader->getTerminator());
1104
while (I != Preheader->begin()) {
1105
--I;
1106
// New instructions were inserted at the end of the preheader.
1107
if (isa<PHINode>(I))
1108
break;
1109
1110
// Don't move instructions which might have side effects, since the side
1111
// effects need to complete before instructions inside the loop. Also don't
1112
// move instructions which might read memory, since the loop may modify
1113
// memory. Note that it's okay if the instruction might have undefined
1114
// behavior: LoopSimplify guarantees that the preheader dominates the exit
1115
// block.
1116
if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1117
continue;
1118
1119
// Skip debug info intrinsics.
1120
if (isa<DbgInfoIntrinsic>(I))
1121
continue;
1122
1123
// Skip eh pad instructions.
1124
if (I->isEHPad())
1125
continue;
1126
1127
// Don't sink alloca: we never want to sink static alloca's out of the
1128
// entry block, and correctly sinking dynamic alloca's requires
1129
// checks for stacksave/stackrestore intrinsics.
1130
// FIXME: Refactor this check somehow?
1131
if (isa<AllocaInst>(I))
1132
continue;
1133
1134
// Determine if there is a use in or before the loop (direct or
1135
// otherwise).
1136
bool UsedInLoop = false;
1137
for (Use &U : I->uses()) {
1138
Instruction *User = cast<Instruction>(U.getUser());
1139
BasicBlock *UseBB = User->getParent();
1140
if (PHINode *P = dyn_cast<PHINode>(User)) {
1141
unsigned i =
1142
PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1143
UseBB = P->getIncomingBlock(i);
1144
}
1145
if (UseBB == Preheader || L->contains(UseBB)) {
1146
UsedInLoop = true;
1147
break;
1148
}
1149
}
1150
1151
// If there is, the def must remain in the preheader.
1152
if (UsedInLoop)
1153
continue;
1154
1155
// Otherwise, sink it to the exit block.
1156
Instruction *ToMove = &*I;
1157
bool Done = false;
1158
1159
if (I != Preheader->begin()) {
1160
// Skip debug info intrinsics.
1161
do {
1162
--I;
1163
} while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1164
1165
if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1166
Done = true;
1167
} else {
1168
Done = true;
1169
}
1170
1171
MadeAnyChanges = true;
1172
ToMove->moveBefore(*ExitBlock, InsertPt);
1173
SE->forgetValue(ToMove);
1174
if (Done) break;
1175
InsertPt = ToMove->getIterator();
1176
}
1177
1178
return MadeAnyChanges;
1179
}
1180
1181
static void replaceExitCond(BranchInst *BI, Value *NewCond,
1182
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1183
auto *OldCond = BI->getCondition();
1184
LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1185
<< " with " << *NewCond << "\n");
1186
BI->setCondition(NewCond);
1187
if (OldCond->use_empty())
1188
DeadInsts.emplace_back(OldCond);
1189
}
1190
1191
static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1192
bool IsTaken) {
1193
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1194
bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1195
auto *OldCond = BI->getCondition();
1196
return ConstantInt::get(OldCond->getType(),
1197
IsTaken ? ExitIfTrue : !ExitIfTrue);
1198
}
1199
1200
static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1201
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1202
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1203
auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1204
replaceExitCond(BI, NewCond, DeadInsts);
1205
}
1206
1207
static void replaceLoopPHINodesWithPreheaderValues(
1208
LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1209
ScalarEvolution &SE) {
1210
assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1211
auto *LoopPreheader = L->getLoopPreheader();
1212
auto *LoopHeader = L->getHeader();
1213
SmallVector<Instruction *> Worklist;
1214
for (auto &PN : LoopHeader->phis()) {
1215
auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1216
for (User *U : PN.users())
1217
Worklist.push_back(cast<Instruction>(U));
1218
SE.forgetValue(&PN);
1219
PN.replaceAllUsesWith(PreheaderIncoming);
1220
DeadInsts.emplace_back(&PN);
1221
}
1222
1223
// Replacing with the preheader value will often allow IV users to simplify
1224
// (especially if the preheader value is a constant).
1225
SmallPtrSet<Instruction *, 16> Visited;
1226
while (!Worklist.empty()) {
1227
auto *I = cast<Instruction>(Worklist.pop_back_val());
1228
if (!Visited.insert(I).second)
1229
continue;
1230
1231
// Don't simplify instructions outside the loop.
1232
if (!L->contains(I))
1233
continue;
1234
1235
Value *Res = simplifyInstruction(I, I->getDataLayout());
1236
if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1237
for (User *U : I->users())
1238
Worklist.push_back(cast<Instruction>(U));
1239
I->replaceAllUsesWith(Res);
1240
DeadInsts.emplace_back(I);
1241
}
1242
}
1243
}
1244
1245
static Value *
1246
createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1247
const ScalarEvolution::LoopInvariantPredicate &LIP,
1248
SCEVExpander &Rewriter) {
1249
ICmpInst::Predicate InvariantPred = LIP.Pred;
1250
BasicBlock *Preheader = L->getLoopPreheader();
1251
assert(Preheader && "Preheader doesn't exist");
1252
Rewriter.setInsertPoint(Preheader->getTerminator());
1253
auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1254
auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1255
bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1256
if (ExitIfTrue)
1257
InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1258
IRBuilder<> Builder(Preheader->getTerminator());
1259
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1260
return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1261
BI->getCondition()->getName());
1262
}
1263
1264
static std::optional<Value *>
1265
createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1266
const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1267
ScalarEvolution *SE, SCEVExpander &Rewriter) {
1268
ICmpInst::Predicate Pred = ICmp->getPredicate();
1269
Value *LHS = ICmp->getOperand(0);
1270
Value *RHS = ICmp->getOperand(1);
1271
1272
// 'LHS pred RHS' should now mean that we stay in loop.
1273
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1274
if (Inverted)
1275
Pred = CmpInst::getInversePredicate(Pred);
1276
1277
const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1278
const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1279
// Can we prove it to be trivially true or false?
1280
if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1281
return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1282
1283
auto *ARTy = LHSS->getType();
1284
auto *MaxIterTy = MaxIter->getType();
1285
// If possible, adjust types.
1286
if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1287
MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1288
else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1289
const SCEV *MinusOne = SE->getMinusOne(ARTy);
1290
auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1291
if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1292
MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1293
}
1294
1295
if (SkipLastIter) {
1296
// Semantically skip last iter is "subtract 1, do not bother about unsigned
1297
// wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1298
// with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1299
// So we manually construct umin(a - 1, b - 1).
1300
SmallVector<const SCEV *, 4> Elements;
1301
if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1302
for (auto *Op : UMin->operands())
1303
Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1304
MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1305
} else
1306
MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1307
}
1308
1309
// Check if there is a loop-invariant predicate equivalent to our check.
1310
auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1311
L, BI, MaxIter);
1312
if (!LIP)
1313
return std::nullopt;
1314
1315
// Can we prove it to be trivially true?
1316
if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1317
return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1318
else
1319
return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1320
}
1321
1322
static bool optimizeLoopExitWithUnknownExitCount(
1323
const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1324
bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1325
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1326
assert(
1327
(L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1328
"Not a loop exit!");
1329
1330
// For branch that stays in loop by TRUE condition, go through AND. For branch
1331
// that stays in loop by FALSE condition, go through OR. Both gives the
1332
// similar logic: "stay in loop iff all conditions are true(false)".
1333
bool Inverted = L->contains(BI->getSuccessor(1));
1334
SmallVector<ICmpInst *, 4> LeafConditions;
1335
SmallVector<Value *, 4> Worklist;
1336
SmallPtrSet<Value *, 4> Visited;
1337
Value *OldCond = BI->getCondition();
1338
Visited.insert(OldCond);
1339
Worklist.push_back(OldCond);
1340
1341
auto GoThrough = [&](Value *V) {
1342
Value *LHS = nullptr, *RHS = nullptr;
1343
if (Inverted) {
1344
if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1345
return false;
1346
} else {
1347
if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1348
return false;
1349
}
1350
if (Visited.insert(LHS).second)
1351
Worklist.push_back(LHS);
1352
if (Visited.insert(RHS).second)
1353
Worklist.push_back(RHS);
1354
return true;
1355
};
1356
1357
do {
1358
Value *Curr = Worklist.pop_back_val();
1359
// Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1360
// those with one use, to avoid instruction duplication.
1361
if (Curr->hasOneUse())
1362
if (!GoThrough(Curr))
1363
if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1364
LeafConditions.push_back(ICmp);
1365
} while (!Worklist.empty());
1366
1367
// If the current basic block has the same exit count as the whole loop, and
1368
// it consists of multiple icmp's, try to collect all icmp's that give exact
1369
// same exit count. For all other icmp's, we could use one less iteration,
1370
// because their value on the last iteration doesn't really matter.
1371
SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1372
if (!SkipLastIter && LeafConditions.size() > 1 &&
1373
SE->getExitCount(L, ExitingBB,
1374
ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1375
MaxIter)
1376
for (auto *ICmp : LeafConditions) {
1377
auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1378
/*ControlsExit*/ false);
1379
auto *ExitMax = EL.SymbolicMaxNotTaken;
1380
if (isa<SCEVCouldNotCompute>(ExitMax))
1381
continue;
1382
// They could be of different types (specifically this happens after
1383
// IV widening).
1384
auto *WiderType =
1385
SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1386
auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1387
auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1388
if (WideExitMax == WideMaxIter)
1389
ICmpsFailingOnLastIter.insert(ICmp);
1390
}
1391
1392
bool Changed = false;
1393
for (auto *OldCond : LeafConditions) {
1394
// Skip last iteration for this icmp under one of two conditions:
1395
// - We do it for all conditions;
1396
// - There is another ICmp that would fail on last iter, so this one doesn't
1397
// really matter.
1398
bool OptimisticSkipLastIter = SkipLastIter;
1399
if (!OptimisticSkipLastIter) {
1400
if (ICmpsFailingOnLastIter.size() > 1)
1401
OptimisticSkipLastIter = true;
1402
else if (ICmpsFailingOnLastIter.size() == 1)
1403
OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1404
}
1405
if (auto Replaced =
1406
createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1407
OptimisticSkipLastIter, SE, Rewriter)) {
1408
Changed = true;
1409
auto *NewCond = *Replaced;
1410
if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1411
NCI->setName(OldCond->getName() + ".first_iter");
1412
}
1413
LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1414
<< " with " << *NewCond << "\n");
1415
assert(OldCond->hasOneUse() && "Must be!");
1416
OldCond->replaceAllUsesWith(NewCond);
1417
DeadInsts.push_back(OldCond);
1418
// Make sure we no longer consider this condition as failing on last
1419
// iteration.
1420
ICmpsFailingOnLastIter.erase(OldCond);
1421
}
1422
}
1423
return Changed;
1424
}
1425
1426
bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1427
// Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1428
// We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1429
// never reaches the icmp since the zext doesn't fold to an AddRec unless
1430
// it already has flags. The alternative to this would be to extending the
1431
// set of "interesting" IV users to include the icmp, but doing that
1432
// regresses results in practice by querying SCEVs before trip counts which
1433
// rely on them which results in SCEV caching sub-optimal answers. The
1434
// concern about caching sub-optimal results is why we only query SCEVs of
1435
// the loop invariant RHS here.
1436
SmallVector<BasicBlock*, 16> ExitingBlocks;
1437
L->getExitingBlocks(ExitingBlocks);
1438
bool Changed = false;
1439
for (auto *ExitingBB : ExitingBlocks) {
1440
auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1441
if (!BI)
1442
continue;
1443
assert(BI->isConditional() && "exit branch must be conditional");
1444
1445
auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1446
if (!ICmp || !ICmp->hasOneUse())
1447
continue;
1448
1449
auto *LHS = ICmp->getOperand(0);
1450
auto *RHS = ICmp->getOperand(1);
1451
// For the range reasoning, avoid computing SCEVs in the loop to avoid
1452
// poisoning cache with sub-optimal results. For the must-execute case,
1453
// this is a neccessary precondition for correctness.
1454
if (!L->isLoopInvariant(RHS)) {
1455
if (!L->isLoopInvariant(LHS))
1456
continue;
1457
// Same logic applies for the inverse case
1458
std::swap(LHS, RHS);
1459
}
1460
1461
// Match (icmp signed-cond zext, RHS)
1462
Value *LHSOp = nullptr;
1463
if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1464
continue;
1465
1466
const DataLayout &DL = ExitingBB->getDataLayout();
1467
const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1468
const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1469
auto FullCR = ConstantRange::getFull(InnerBitWidth);
1470
FullCR = FullCR.zeroExtend(OuterBitWidth);
1471
auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1472
if (FullCR.contains(RHSCR)) {
1473
// We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1474
// replace the signed condition with the unsigned version.
1475
ICmp->setPredicate(ICmp->getUnsignedPredicate());
1476
Changed = true;
1477
// Note: No SCEV invalidation needed. We've changed the predicate, but
1478
// have not changed exit counts, or the values produced by the compare.
1479
continue;
1480
}
1481
}
1482
1483
// Now that we've canonicalized the condition to match the extend,
1484
// see if we can rotate the extend out of the loop.
1485
for (auto *ExitingBB : ExitingBlocks) {
1486
auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1487
if (!BI)
1488
continue;
1489
assert(BI->isConditional() && "exit branch must be conditional");
1490
1491
auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1492
if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1493
continue;
1494
1495
bool Swapped = false;
1496
auto *LHS = ICmp->getOperand(0);
1497
auto *RHS = ICmp->getOperand(1);
1498
if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1499
// Nothing to rotate
1500
continue;
1501
if (L->isLoopInvariant(LHS)) {
1502
// Same logic applies for the inverse case until we actually pick
1503
// which operand of the compare to update.
1504
Swapped = true;
1505
std::swap(LHS, RHS);
1506
}
1507
assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1508
1509
// Match (icmp unsigned-cond zext, RHS)
1510
// TODO: Extend to handle corresponding sext/signed-cmp case
1511
// TODO: Extend to other invertible functions
1512
Value *LHSOp = nullptr;
1513
if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1514
continue;
1515
1516
// In general, we only rotate if we can do so without increasing the number
1517
// of instructions. The exception is when we have an zext(add-rec). The
1518
// reason for allowing this exception is that we know we need to get rid
1519
// of the zext for SCEV to be able to compute a trip count for said loops;
1520
// we consider the new trip count valuable enough to increase instruction
1521
// count by one.
1522
if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1523
continue;
1524
1525
// Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1526
// replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1527
// when zext is loop varying and RHS is loop invariant. This converts
1528
// loop varying work to loop-invariant work.
1529
auto doRotateTransform = [&]() {
1530
assert(ICmp->isUnsigned() && "must have proven unsigned already");
1531
auto *NewRHS = CastInst::Create(
1532
Instruction::Trunc, RHS, LHSOp->getType(), "",
1533
L->getLoopPreheader()->getTerminator()->getIterator());
1534
ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1535
ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1536
if (LHS->use_empty())
1537
DeadInsts.push_back(LHS);
1538
};
1539
1540
1541
const DataLayout &DL = ExitingBB->getDataLayout();
1542
const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1543
const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1544
auto FullCR = ConstantRange::getFull(InnerBitWidth);
1545
FullCR = FullCR.zeroExtend(OuterBitWidth);
1546
auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1547
if (FullCR.contains(RHSCR)) {
1548
doRotateTransform();
1549
Changed = true;
1550
// Note, we are leaving SCEV in an unfortunately imprecise case here
1551
// as rotation tends to reveal information about trip counts not
1552
// previously visible.
1553
continue;
1554
}
1555
}
1556
1557
return Changed;
1558
}
1559
1560
bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1561
SmallVector<BasicBlock*, 16> ExitingBlocks;
1562
L->getExitingBlocks(ExitingBlocks);
1563
1564
// Remove all exits which aren't both rewriteable and execute on every
1565
// iteration.
1566
llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1567
// If our exitting block exits multiple loops, we can only rewrite the
1568
// innermost one. Otherwise, we're changing how many times the innermost
1569
// loop runs before it exits.
1570
if (LI->getLoopFor(ExitingBB) != L)
1571
return true;
1572
1573
// Can't rewrite non-branch yet.
1574
BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1575
if (!BI)
1576
return true;
1577
1578
// Likewise, the loop latch must be dominated by the exiting BB.
1579
if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1580
return true;
1581
1582
if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1583
// If already constant, nothing to do. However, if this is an
1584
// unconditional exit, we can still replace header phis with their
1585
// preheader value.
1586
if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1587
replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1588
return true;
1589
}
1590
1591
return false;
1592
});
1593
1594
if (ExitingBlocks.empty())
1595
return false;
1596
1597
// Get a symbolic upper bound on the loop backedge taken count.
1598
const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1599
if (isa<SCEVCouldNotCompute>(MaxBECount))
1600
return false;
1601
1602
// Visit our exit blocks in order of dominance. We know from the fact that
1603
// all exits must dominate the latch, so there is a total dominance order
1604
// between them.
1605
llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1606
// std::sort sorts in ascending order, so we want the inverse of
1607
// the normal dominance relation.
1608
if (A == B) return false;
1609
if (DT->properlyDominates(A, B))
1610
return true;
1611
else {
1612
assert(DT->properlyDominates(B, A) &&
1613
"expected total dominance order!");
1614
return false;
1615
}
1616
});
1617
#ifdef ASSERT
1618
for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1619
assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1620
}
1621
#endif
1622
1623
bool Changed = false;
1624
bool SkipLastIter = false;
1625
const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1626
auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1627
if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1628
return;
1629
if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1630
CurrMaxExit = MaxExitCount;
1631
else
1632
CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1633
// If the loop has more than 1 iteration, all further checks will be
1634
// executed 1 iteration less.
1635
if (CurrMaxExit == MaxBECount)
1636
SkipLastIter = true;
1637
};
1638
SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1639
for (BasicBlock *ExitingBB : ExitingBlocks) {
1640
const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1641
const SCEV *MaxExitCount = SE->getExitCount(
1642
L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1643
if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1644
// Okay, we do not know the exit count here. Can we at least prove that it
1645
// will remain the same within iteration space?
1646
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1647
auto OptimizeCond = [&](bool SkipLastIter) {
1648
return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1649
MaxBECount, SkipLastIter,
1650
SE, Rewriter, DeadInsts);
1651
};
1652
1653
// TODO: We might have proved that we can skip the last iteration for
1654
// this check. In this case, we only want to check the condition on the
1655
// pre-last iteration (MaxBECount - 1). However, there is a nasty
1656
// corner case:
1657
//
1658
// for (i = len; i != 0; i--) { ... check (i ult X) ... }
1659
//
1660
// If we could not prove that len != 0, then we also could not prove that
1661
// (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1662
// OptimizeCond will likely not prove anything for it, even if it could
1663
// prove the same fact for len.
1664
//
1665
// As a temporary solution, we query both last and pre-last iterations in
1666
// hope that we will be able to prove triviality for at least one of
1667
// them. We can stop querying MaxBECount for this case once SCEV
1668
// understands that (MaxBECount - 1) will not overflow here.
1669
if (OptimizeCond(false))
1670
Changed = true;
1671
else if (SkipLastIter && OptimizeCond(true))
1672
Changed = true;
1673
UpdateSkipLastIter(MaxExitCount);
1674
continue;
1675
}
1676
1677
UpdateSkipLastIter(ExactExitCount);
1678
1679
// If we know we'd exit on the first iteration, rewrite the exit to
1680
// reflect this. This does not imply the loop must exit through this
1681
// exit; there may be an earlier one taken on the first iteration.
1682
// We know that the backedge can't be taken, so we replace all
1683
// the header PHIs with values coming from the preheader.
1684
if (ExactExitCount->isZero()) {
1685
foldExit(L, ExitingBB, true, DeadInsts);
1686
replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1687
Changed = true;
1688
continue;
1689
}
1690
1691
assert(ExactExitCount->getType()->isIntegerTy() &&
1692
MaxBECount->getType()->isIntegerTy() &&
1693
"Exit counts must be integers");
1694
1695
Type *WiderType =
1696
SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1697
ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1698
MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1699
assert(MaxBECount->getType() == ExactExitCount->getType());
1700
1701
// Can we prove that some other exit must be taken strictly before this
1702
// one?
1703
if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1704
ExactExitCount)) {
1705
foldExit(L, ExitingBB, false, DeadInsts);
1706
Changed = true;
1707
continue;
1708
}
1709
1710
// As we run, keep track of which exit counts we've encountered. If we
1711
// find a duplicate, we've found an exit which would have exited on the
1712
// exiting iteration, but (from the visit order) strictly follows another
1713
// which does the same and is thus dead.
1714
if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1715
foldExit(L, ExitingBB, false, DeadInsts);
1716
Changed = true;
1717
continue;
1718
}
1719
1720
// TODO: There might be another oppurtunity to leverage SCEV's reasoning
1721
// here. If we kept track of the min of dominanting exits so far, we could
1722
// discharge exits with EC >= MDEC. This is less powerful than the existing
1723
// transform (since later exits aren't considered), but potentially more
1724
// powerful for any case where SCEV can prove a >=u b, but neither a == b
1725
// or a >u b. Such a case is not currently known.
1726
}
1727
return Changed;
1728
}
1729
1730
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1731
SmallVector<BasicBlock*, 16> ExitingBlocks;
1732
L->getExitingBlocks(ExitingBlocks);
1733
1734
// Finally, see if we can rewrite our exit conditions into a loop invariant
1735
// form. If we have a read-only loop, and we can tell that we must exit down
1736
// a path which does not need any of the values computed within the loop, we
1737
// can rewrite the loop to exit on the first iteration. Note that this
1738
// doesn't either a) tell us the loop exits on the first iteration (unless
1739
// *all* exits are predicateable) or b) tell us *which* exit might be taken.
1740
// This transformation looks a lot like a restricted form of dead loop
1741
// elimination, but restricted to read-only loops and without neccesssarily
1742
// needing to kill the loop entirely.
1743
if (!LoopPredication)
1744
return false;
1745
1746
// Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1747
// through *explicit* control flow. We have to eliminate the possibility of
1748
// implicit exits (see below) before we know it's truly exact.
1749
const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1750
if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1751
return false;
1752
1753
assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1754
assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1755
1756
auto BadExit = [&](BasicBlock *ExitingBB) {
1757
// If our exiting block exits multiple loops, we can only rewrite the
1758
// innermost one. Otherwise, we're changing how many times the innermost
1759
// loop runs before it exits.
1760
if (LI->getLoopFor(ExitingBB) != L)
1761
return true;
1762
1763
// Can't rewrite non-branch yet.
1764
BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1765
if (!BI)
1766
return true;
1767
1768
// If already constant, nothing to do.
1769
if (isa<Constant>(BI->getCondition()))
1770
return true;
1771
1772
// If the exit block has phis, we need to be able to compute the values
1773
// within the loop which contains them. This assumes trivially lcssa phis
1774
// have already been removed; TODO: generalize
1775
BasicBlock *ExitBlock =
1776
BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1777
if (!ExitBlock->phis().empty())
1778
return true;
1779
1780
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1781
if (isa<SCEVCouldNotCompute>(ExitCount) ||
1782
!Rewriter.isSafeToExpand(ExitCount))
1783
return true;
1784
1785
assert(SE->isLoopInvariant(ExitCount, L) &&
1786
"Exit count must be loop invariant");
1787
assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1788
return false;
1789
};
1790
1791
// If we have any exits which can't be predicated themselves, than we can't
1792
// predicate any exit which isn't guaranteed to execute before it. Consider
1793
// two exits (a) and (b) which would both exit on the same iteration. If we
1794
// can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1795
// we could convert a loop from exiting through (a) to one exiting through
1796
// (b). Note that this problem exists only for exits with the same exit
1797
// count, and we could be more aggressive when exit counts are known inequal.
1798
llvm::sort(ExitingBlocks,
1799
[&](BasicBlock *A, BasicBlock *B) {
1800
// std::sort sorts in ascending order, so we want the inverse of
1801
// the normal dominance relation, plus a tie breaker for blocks
1802
// unordered by dominance.
1803
if (DT->properlyDominates(A, B)) return true;
1804
if (DT->properlyDominates(B, A)) return false;
1805
return A->getName() < B->getName();
1806
});
1807
// Check to see if our exit blocks are a total order (i.e. a linear chain of
1808
// exits before the backedge). If they aren't, reasoning about reachability
1809
// is complicated and we choose not to for now.
1810
for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1811
if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1812
return false;
1813
1814
// Given our sorted total order, we know that exit[j] must be evaluated
1815
// after all exit[i] such j > i.
1816
for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1817
if (BadExit(ExitingBlocks[i])) {
1818
ExitingBlocks.resize(i);
1819
break;
1820
}
1821
1822
if (ExitingBlocks.empty())
1823
return false;
1824
1825
// We rely on not being able to reach an exiting block on a later iteration
1826
// then it's statically compute exit count. The implementaton of
1827
// getExitCount currently has this invariant, but assert it here so that
1828
// breakage is obvious if this ever changes..
1829
assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1830
return DT->dominates(ExitingBB, L->getLoopLatch());
1831
}));
1832
1833
// At this point, ExitingBlocks consists of only those blocks which are
1834
// predicatable. Given that, we know we have at least one exit we can
1835
// predicate if the loop is doesn't have side effects and doesn't have any
1836
// implicit exits (because then our exact BTC isn't actually exact).
1837
// @Reviewers - As structured, this is O(I^2) for loop nests. Any
1838
// suggestions on how to improve this? I can obviously bail out for outer
1839
// loops, but that seems less than ideal. MemorySSA can find memory writes,
1840
// is that enough for *all* side effects?
1841
for (BasicBlock *BB : L->blocks())
1842
for (auto &I : *BB)
1843
// TODO:isGuaranteedToTransfer
1844
if (I.mayHaveSideEffects())
1845
return false;
1846
1847
bool Changed = false;
1848
// Finally, do the actual predication for all predicatable blocks. A couple
1849
// of notes here:
1850
// 1) We don't bother to constant fold dominated exits with identical exit
1851
// counts; that's simply a form of CSE/equality propagation and we leave
1852
// it for dedicated passes.
1853
// 2) We insert the comparison at the branch. Hoisting introduces additional
1854
// legality constraints and we leave that to dedicated logic. We want to
1855
// predicate even if we can't insert a loop invariant expression as
1856
// peeling or unrolling will likely reduce the cost of the otherwise loop
1857
// varying check.
1858
Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1859
IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1860
Value *ExactBTCV = nullptr; // Lazily generated if needed.
1861
for (BasicBlock *ExitingBB : ExitingBlocks) {
1862
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1863
1864
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1865
Value *NewCond;
1866
if (ExitCount == ExactBTC) {
1867
NewCond = L->contains(BI->getSuccessor(0)) ?
1868
B.getFalse() : B.getTrue();
1869
} else {
1870
Value *ECV = Rewriter.expandCodeFor(ExitCount);
1871
if (!ExactBTCV)
1872
ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1873
Value *RHS = ExactBTCV;
1874
if (ECV->getType() != RHS->getType()) {
1875
Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1876
ECV = B.CreateZExt(ECV, WiderTy);
1877
RHS = B.CreateZExt(RHS, WiderTy);
1878
}
1879
auto Pred = L->contains(BI->getSuccessor(0)) ?
1880
ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1881
NewCond = B.CreateICmp(Pred, ECV, RHS);
1882
}
1883
Value *OldCond = BI->getCondition();
1884
BI->setCondition(NewCond);
1885
if (OldCond->use_empty())
1886
DeadInsts.emplace_back(OldCond);
1887
Changed = true;
1888
RunUnswitching = true;
1889
}
1890
1891
return Changed;
1892
}
1893
1894
//===----------------------------------------------------------------------===//
1895
// IndVarSimplify driver. Manage several subpasses of IV simplification.
1896
//===----------------------------------------------------------------------===//
1897
1898
bool IndVarSimplify::run(Loop *L) {
1899
// We need (and expect!) the incoming loop to be in LCSSA.
1900
assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1901
"LCSSA required to run indvars!");
1902
1903
// If LoopSimplify form is not available, stay out of trouble. Some notes:
1904
// - LSR currently only supports LoopSimplify-form loops. Indvars'
1905
// canonicalization can be a pessimization without LSR to "clean up"
1906
// afterwards.
1907
// - We depend on having a preheader; in particular,
1908
// Loop::getCanonicalInductionVariable only supports loops with preheaders,
1909
// and we're in trouble if we can't find the induction variable even when
1910
// we've manually inserted one.
1911
// - LFTR relies on having a single backedge.
1912
if (!L->isLoopSimplifyForm())
1913
return false;
1914
1915
bool Changed = false;
1916
// If there are any floating-point recurrences, attempt to
1917
// transform them to use integer recurrences.
1918
Changed |= rewriteNonIntegerIVs(L);
1919
1920
// Create a rewriter object which we'll use to transform the code with.
1921
SCEVExpander Rewriter(*SE, DL, "indvars");
1922
#ifndef NDEBUG
1923
Rewriter.setDebugType(DEBUG_TYPE);
1924
#endif
1925
1926
// Eliminate redundant IV users.
1927
//
1928
// Simplification works best when run before other consumers of SCEV. We
1929
// attempt to avoid evaluating SCEVs for sign/zero extend operations until
1930
// other expressions involving loop IVs have been evaluated. This helps SCEV
1931
// set no-wrap flags before normalizing sign/zero extension.
1932
Rewriter.disableCanonicalMode();
1933
Changed |= simplifyAndExtend(L, Rewriter, LI);
1934
1935
// Check to see if we can compute the final value of any expressions
1936
// that are recurrent in the loop, and substitute the exit values from the
1937
// loop into any instructions outside of the loop that use the final values
1938
// of the current expressions.
1939
if (ReplaceExitValue != NeverRepl) {
1940
if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1941
ReplaceExitValue, DeadInsts)) {
1942
NumReplaced += Rewrites;
1943
Changed = true;
1944
}
1945
}
1946
1947
// Eliminate redundant IV cycles.
1948
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1949
1950
// Try to convert exit conditions to unsigned and rotate computation
1951
// out of the loop. Note: Handles invalidation internally if needed.
1952
Changed |= canonicalizeExitCondition(L);
1953
1954
// Try to eliminate loop exits based on analyzeable exit counts
1955
if (optimizeLoopExits(L, Rewriter)) {
1956
Changed = true;
1957
// Given we've changed exit counts, notify SCEV
1958
// Some nested loops may share same folded exit basic block,
1959
// thus we need to notify top most loop.
1960
SE->forgetTopmostLoop(L);
1961
}
1962
1963
// Try to form loop invariant tests for loop exits by changing how many
1964
// iterations of the loop run when that is unobservable.
1965
if (predicateLoopExits(L, Rewriter)) {
1966
Changed = true;
1967
// Given we've changed exit counts, notify SCEV
1968
SE->forgetLoop(L);
1969
}
1970
1971
// If we have a trip count expression, rewrite the loop's exit condition
1972
// using it.
1973
if (!DisableLFTR) {
1974
BasicBlock *PreHeader = L->getLoopPreheader();
1975
1976
SmallVector<BasicBlock*, 16> ExitingBlocks;
1977
L->getExitingBlocks(ExitingBlocks);
1978
for (BasicBlock *ExitingBB : ExitingBlocks) {
1979
// Can't rewrite non-branch yet.
1980
if (!isa<BranchInst>(ExitingBB->getTerminator()))
1981
continue;
1982
1983
// If our exitting block exits multiple loops, we can only rewrite the
1984
// innermost one. Otherwise, we're changing how many times the innermost
1985
// loop runs before it exits.
1986
if (LI->getLoopFor(ExitingBB) != L)
1987
continue;
1988
1989
if (!needsLFTR(L, ExitingBB))
1990
continue;
1991
1992
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1993
if (isa<SCEVCouldNotCompute>(ExitCount))
1994
continue;
1995
1996
// This was handled above, but as we form SCEVs, we can sometimes refine
1997
// existing ones; this allows exit counts to be folded to zero which
1998
// weren't when optimizeLoopExits saw them. Arguably, we should iterate
1999
// until stable to handle cases like this better.
2000
if (ExitCount->isZero())
2001
continue;
2002
2003
PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2004
if (!IndVar)
2005
continue;
2006
2007
// Avoid high cost expansions. Note: This heuristic is questionable in
2008
// that our definition of "high cost" is not exactly principled.
2009
if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2010
TTI, PreHeader->getTerminator()))
2011
continue;
2012
2013
if (!Rewriter.isSafeToExpand(ExitCount))
2014
continue;
2015
2016
Changed |= linearFunctionTestReplace(L, ExitingBB,
2017
ExitCount, IndVar,
2018
Rewriter);
2019
}
2020
}
2021
// Clear the rewriter cache, because values that are in the rewriter's cache
2022
// can be deleted in the loop below, causing the AssertingVH in the cache to
2023
// trigger.
2024
Rewriter.clear();
2025
2026
// Now that we're done iterating through lists, clean up any instructions
2027
// which are now dead.
2028
while (!DeadInsts.empty()) {
2029
Value *V = DeadInsts.pop_back_val();
2030
2031
if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2032
Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2033
else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2034
Changed |=
2035
RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2036
}
2037
2038
// The Rewriter may not be used from this point on.
2039
2040
// Loop-invariant instructions in the preheader that aren't used in the
2041
// loop may be sunk below the loop to reduce register pressure.
2042
Changed |= sinkUnusedInvariants(L);
2043
2044
// rewriteFirstIterationLoopExitValues does not rely on the computation of
2045
// trip count and therefore can further simplify exit values in addition to
2046
// rewriteLoopExitValues.
2047
Changed |= rewriteFirstIterationLoopExitValues(L);
2048
2049
// Clean up dead instructions.
2050
Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2051
2052
// Check a post-condition.
2053
assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2054
"Indvars did not preserve LCSSA!");
2055
if (VerifyMemorySSA && MSSAU)
2056
MSSAU->getMemorySSA()->verifyMemorySSA();
2057
2058
return Changed;
2059
}
2060
2061
PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2062
LoopStandardAnalysisResults &AR,
2063
LPMUpdater &) {
2064
Function *F = L.getHeader()->getParent();
2065
const DataLayout &DL = F->getDataLayout();
2066
2067
IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2068
WidenIndVars && AllowIVWidening);
2069
if (!IVS.run(&L))
2070
return PreservedAnalyses::all();
2071
2072
auto PA = getLoopPassPreservedAnalyses();
2073
PA.preserveSet<CFGAnalyses>();
2074
if (IVS.runUnswitching()) {
2075
AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
2076
PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2077
}
2078
2079
if (AR.MSSA)
2080
PA.preserve<MemorySSAAnalysis>();
2081
return PA;
2082
}
2083
2084