Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp
35266 views
//===-- LoopPredication.cpp - Guard based loop predication pass -----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// The LoopPredication pass tries to convert loop variant range checks to loop9// invariant by widening checks across loop iterations. For example, it will10// convert11//12// for (i = 0; i < n; i++) {13// guard(i < len);14// ...15// }16//17// to18//19// for (i = 0; i < n; i++) {20// guard(n - 1 < len);21// ...22// }23//24// After this transformation the condition of the guard is loop invariant, so25// loop-unswitch can later unswitch the loop by this condition which basically26// predicates the loop by the widened condition:27//28// if (n - 1 < len)29// for (i = 0; i < n; i++) {30// ...31// }32// else33// deoptimize34//35// It's tempting to rely on SCEV here, but it has proven to be problematic.36// Generally the facts SCEV provides about the increment step of add37// recurrences are true if the backedge of the loop is taken, which implicitly38// assumes that the guard doesn't fail. Using these facts to optimize the39// guard results in a circular logic where the guard is optimized under the40// assumption that it never fails.41//42// For example, in the loop below the induction variable will be marked as nuw43// basing on the guard. Basing on nuw the guard predicate will be considered44// monotonic. Given a monotonic condition it's tempting to replace the induction45// variable in the condition with its value on the last iteration. But this46// transformation is not correct, e.g. e = 4, b = 5 breaks the loop.47//48// for (int i = b; i != e; i++)49// guard(i u< len)50//51// One of the ways to reason about this problem is to use an inductive proof52// approach. Given the loop:53//54// if (B(0)) {55// do {56// I = PHI(0, I.INC)57// I.INC = I + Step58// guard(G(I));59// } while (B(I));60// }61//62// where B(x) and G(x) are predicates that map integers to booleans, we want a63// loop invariant expression M such the following program has the same semantics64// as the above:65//66// if (B(0)) {67// do {68// I = PHI(0, I.INC)69// I.INC = I + Step70// guard(G(0) && M);71// } while (B(I));72// }73//74// One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step)75//76// Informal proof that the transformation above is correct:77//78// By the definition of guards we can rewrite the guard condition to:79// G(I) && G(0) && M80//81// Let's prove that for each iteration of the loop:82// G(0) && M => G(I)83// And the condition above can be simplified to G(Start) && M.84//85// Induction base.86// G(0) && M => G(0)87//88// Induction step. Assuming G(0) && M => G(I) on the subsequent89// iteration:90//91// B(I) is true because it's the backedge condition.92// G(I) is true because the backedge is guarded by this condition.93//94// So M = forall X . (G(X) && B(X)) => G(X + Step) implies G(I + Step).95//96// Note that we can use anything stronger than M, i.e. any condition which97// implies M.98//99// When S = 1 (i.e. forward iterating loop), the transformation is supported100// when:101// * The loop has a single latch with the condition of the form:102// B(X) = latchStart + X <pred> latchLimit,103// where <pred> is u<, u<=, s<, or s<=.104// * The guard condition is of the form105// G(X) = guardStart + X u< guardLimit106//107// For the ult latch comparison case M is:108// forall X . guardStart + X u< guardLimit && latchStart + X <u latchLimit =>109// guardStart + X + 1 u< guardLimit110//111// The only way the antecedent can be true and the consequent can be false is112// if113// X == guardLimit - 1 - guardStart114// (and guardLimit is non-zero, but we won't use this latter fact).115// If X == guardLimit - 1 - guardStart then the second half of the antecedent is116// latchStart + guardLimit - 1 - guardStart u< latchLimit117// and its negation is118// latchStart + guardLimit - 1 - guardStart u>= latchLimit119//120// In other words, if121// latchLimit u<= latchStart + guardLimit - 1 - guardStart122// then:123// (the ranges below are written in ConstantRange notation, where [A, B) is the124// set for (I = A; I != B; I++ /*maywrap*/) yield(I);)125//126// forall X . guardStart + X u< guardLimit &&127// latchStart + X u< latchLimit =>128// guardStart + X + 1 u< guardLimit129// == forall X . guardStart + X u< guardLimit &&130// latchStart + X u< latchStart + guardLimit - 1 - guardStart =>131// guardStart + X + 1 u< guardLimit132// == forall X . (guardStart + X) in [0, guardLimit) &&133// (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) =>134// (guardStart + X + 1) in [0, guardLimit)135// == forall X . X in [-guardStart, guardLimit - guardStart) &&136// X in [-latchStart, guardLimit - 1 - guardStart) =>137// X in [-guardStart - 1, guardLimit - guardStart - 1)138// == true139//140// So the widened condition is:141// guardStart u< guardLimit &&142// latchStart + guardLimit - 1 - guardStart u>= latchLimit143// Similarly for ule condition the widened condition is:144// guardStart u< guardLimit &&145// latchStart + guardLimit - 1 - guardStart u> latchLimit146// For slt condition the widened condition is:147// guardStart u< guardLimit &&148// latchStart + guardLimit - 1 - guardStart s>= latchLimit149// For sle condition the widened condition is:150// guardStart u< guardLimit &&151// latchStart + guardLimit - 1 - guardStart s> latchLimit152//153// When S = -1 (i.e. reverse iterating loop), the transformation is supported154// when:155// * The loop has a single latch with the condition of the form:156// B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=.157// * The guard condition is of the form158// G(X) = X - 1 u< guardLimit159//160// For the ugt latch comparison case M is:161// forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit162//163// The only way the antecedent can be true and the consequent can be false is if164// X == 1.165// If X == 1 then the second half of the antecedent is166// 1 u> latchLimit, and its negation is latchLimit u>= 1.167//168// So the widened condition is:169// guardStart u< guardLimit && latchLimit u>= 1.170// Similarly for sgt condition the widened condition is:171// guardStart u< guardLimit && latchLimit s>= 1.172// For uge condition the widened condition is:173// guardStart u< guardLimit && latchLimit u> 1.174// For sge condition the widened condition is:175// guardStart u< guardLimit && latchLimit s> 1.176//===----------------------------------------------------------------------===//177178#include "llvm/Transforms/Scalar/LoopPredication.h"179#include "llvm/ADT/Statistic.h"180#include "llvm/Analysis/AliasAnalysis.h"181#include "llvm/Analysis/BranchProbabilityInfo.h"182#include "llvm/Analysis/GuardUtils.h"183#include "llvm/Analysis/LoopInfo.h"184#include "llvm/Analysis/LoopPass.h"185#include "llvm/Analysis/MemorySSA.h"186#include "llvm/Analysis/MemorySSAUpdater.h"187#include "llvm/Analysis/ScalarEvolution.h"188#include "llvm/Analysis/ScalarEvolutionExpressions.h"189#include "llvm/IR/Function.h"190#include "llvm/IR/IntrinsicInst.h"191#include "llvm/IR/Module.h"192#include "llvm/IR/PatternMatch.h"193#include "llvm/IR/ProfDataUtils.h"194#include "llvm/InitializePasses.h"195#include "llvm/Pass.h"196#include "llvm/Support/CommandLine.h"197#include "llvm/Support/Debug.h"198#include "llvm/Transforms/Scalar.h"199#include "llvm/Transforms/Utils/GuardUtils.h"200#include "llvm/Transforms/Utils/Local.h"201#include "llvm/Transforms/Utils/LoopUtils.h"202#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"203#include <optional>204205#define DEBUG_TYPE "loop-predication"206207STATISTIC(TotalConsidered, "Number of guards considered");208STATISTIC(TotalWidened, "Number of checks widened");209210using namespace llvm;211212static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",213cl::Hidden, cl::init(true));214215static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop",216cl::Hidden, cl::init(true));217218static cl::opt<bool>219SkipProfitabilityChecks("loop-predication-skip-profitability-checks",220cl::Hidden, cl::init(false));221222// This is the scale factor for the latch probability. We use this during223// profitability analysis to find other exiting blocks that have a much higher224// probability of exiting the loop instead of loop exiting via latch.225// This value should be greater than 1 for a sane profitability check.226static cl::opt<float> LatchExitProbabilityScale(227"loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0),228cl::desc("scale factor for the latch probability. Value should be greater "229"than 1. Lower values are ignored"));230231static cl::opt<bool> PredicateWidenableBranchGuards(232"loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden,233cl::desc("Whether or not we should predicate guards "234"expressed as widenable branches to deoptimize blocks"),235cl::init(true));236237static cl::opt<bool> InsertAssumesOfPredicatedGuardsConditions(238"loop-predication-insert-assumes-of-predicated-guards-conditions",239cl::Hidden,240cl::desc("Whether or not we should insert assumes of conditions of "241"predicated guards"),242cl::init(true));243244namespace {245/// Represents an induction variable check:246/// icmp Pred, <induction variable>, <loop invariant limit>247struct LoopICmp {248ICmpInst::Predicate Pred;249const SCEVAddRecExpr *IV;250const SCEV *Limit;251LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,252const SCEV *Limit)253: Pred(Pred), IV(IV), Limit(Limit) {}254LoopICmp() = default;255void dump() {256dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV257<< ", Limit = " << *Limit << "\n";258}259};260261class LoopPredication {262AliasAnalysis *AA;263DominatorTree *DT;264ScalarEvolution *SE;265LoopInfo *LI;266MemorySSAUpdater *MSSAU;267268Loop *L;269const DataLayout *DL;270BasicBlock *Preheader;271LoopICmp LatchCheck;272273bool isSupportedStep(const SCEV* Step);274std::optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);275std::optional<LoopICmp> parseLoopLatchICmp();276277/// Return an insertion point suitable for inserting a safe to speculate278/// instruction whose only user will be 'User' which has operands 'Ops'. A279/// trivial result would be the at the User itself, but we try to return a280/// loop invariant location if possible.281Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);282/// Same as above, *except* that this uses the SCEV definition of invariant283/// which is that an expression *can be made* invariant via SCEVExpander.284/// Thus, this version is only suitable for finding an insert point to be285/// passed to SCEVExpander!286Instruction *findInsertPt(const SCEVExpander &Expander, Instruction *User,287ArrayRef<const SCEV *> Ops);288289/// Return true if the value is known to produce a single fixed value across290/// all iterations on which it executes. Note that this does not imply291/// speculation safety. That must be established separately.292bool isLoopInvariantValue(const SCEV* S);293294Value *expandCheck(SCEVExpander &Expander, Instruction *Guard,295ICmpInst::Predicate Pred, const SCEV *LHS,296const SCEV *RHS);297298std::optional<Value *> widenICmpRangeCheck(ICmpInst *ICI,299SCEVExpander &Expander,300Instruction *Guard);301std::optional<Value *>302widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck, LoopICmp RangeCheck,303SCEVExpander &Expander,304Instruction *Guard);305std::optional<Value *>306widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck, LoopICmp RangeCheck,307SCEVExpander &Expander,308Instruction *Guard);309void widenChecks(SmallVectorImpl<Value *> &Checks,310SmallVectorImpl<Value *> &WidenedChecks,311SCEVExpander &Expander, Instruction *Guard);312bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);313bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);314// If the loop always exits through another block in the loop, we should not315// predicate based on the latch check. For example, the latch check can be a316// very coarse grained check and there can be more fine grained exit checks317// within the loop.318bool isLoopProfitableToPredicate();319320bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);321322public:323LoopPredication(AliasAnalysis *AA, DominatorTree *DT, ScalarEvolution *SE,324LoopInfo *LI, MemorySSAUpdater *MSSAU)325: AA(AA), DT(DT), SE(SE), LI(LI), MSSAU(MSSAU){};326bool runOnLoop(Loop *L);327};328329} // end namespace330331PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,332LoopStandardAnalysisResults &AR,333LPMUpdater &U) {334std::unique_ptr<MemorySSAUpdater> MSSAU;335if (AR.MSSA)336MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);337LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI,338MSSAU ? MSSAU.get() : nullptr);339if (!LP.runOnLoop(&L))340return PreservedAnalyses::all();341342auto PA = getLoopPassPreservedAnalyses();343if (AR.MSSA)344PA.preserve<MemorySSAAnalysis>();345return PA;346}347348std::optional<LoopICmp> LoopPredication::parseLoopICmp(ICmpInst *ICI) {349auto Pred = ICI->getPredicate();350auto *LHS = ICI->getOperand(0);351auto *RHS = ICI->getOperand(1);352353const SCEV *LHSS = SE->getSCEV(LHS);354if (isa<SCEVCouldNotCompute>(LHSS))355return std::nullopt;356const SCEV *RHSS = SE->getSCEV(RHS);357if (isa<SCEVCouldNotCompute>(RHSS))358return std::nullopt;359360// Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV361if (SE->isLoopInvariant(LHSS, L)) {362std::swap(LHS, RHS);363std::swap(LHSS, RHSS);364Pred = ICmpInst::getSwappedPredicate(Pred);365}366367const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS);368if (!AR || AR->getLoop() != L)369return std::nullopt;370371return LoopICmp(Pred, AR, RHSS);372}373374Value *LoopPredication::expandCheck(SCEVExpander &Expander,375Instruction *Guard,376ICmpInst::Predicate Pred, const SCEV *LHS,377const SCEV *RHS) {378Type *Ty = LHS->getType();379assert(Ty == RHS->getType() && "expandCheck operands have different types?");380381if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) {382IRBuilder<> Builder(Guard);383if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))384return Builder.getTrue();385if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),386LHS, RHS))387return Builder.getFalse();388}389390Value *LHSV =391Expander.expandCodeFor(LHS, Ty, findInsertPt(Expander, Guard, {LHS}));392Value *RHSV =393Expander.expandCodeFor(RHS, Ty, findInsertPt(Expander, Guard, {RHS}));394IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV}));395return Builder.CreateICmp(Pred, LHSV, RHSV);396}397398// Returns true if its safe to truncate the IV to RangeCheckType.399// When the IV type is wider than the range operand type, we can still do loop400// predication, by generating SCEVs for the range and latch that are of the401// same type. We achieve this by generating a SCEV truncate expression for the402// latch IV. This is done iff truncation of the IV is a safe operation,403// without loss of information.404// Another way to achieve this is by generating a wider type SCEV for the405// range check operand, however, this needs a more involved check that406// operands do not overflow. This can lead to loss of information when the407// range operand is of the form: add i32 %offset, %iv. We need to prove that408// sext(x + y) is same as sext(x) + sext(y).409// This function returns true if we can safely represent the IV type in410// the RangeCheckType without loss of information.411static bool isSafeToTruncateWideIVType(const DataLayout &DL,412ScalarEvolution &SE,413const LoopICmp LatchCheck,414Type *RangeCheckType) {415if (!EnableIVTruncation)416return false;417assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedValue() >418DL.getTypeSizeInBits(RangeCheckType).getFixedValue() &&419"Expected latch check IV type to be larger than range check operand "420"type!");421// The start and end values of the IV should be known. This is to guarantee422// that truncating the wide type will not lose information.423auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);424auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());425if (!Limit || !Start)426return false;427// This check makes sure that the IV does not change sign during loop428// iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,429// LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the430// IV wraps around, and the truncation of the IV would lose the range of431// iterations between 2^32 and 2^64.432if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred))433return false;434// The active bits should be less than the bits in the RangeCheckType. This435// guarantees that truncating the latch check to RangeCheckType is a safe436// operation.437auto RangeCheckTypeBitSize =438DL.getTypeSizeInBits(RangeCheckType).getFixedValue();439return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&440Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;441}442443444// Return an LoopICmp describing a latch check equivlent to LatchCheck but with445// the requested type if safe to do so. May involve the use of a new IV.446static std::optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,447ScalarEvolution &SE,448const LoopICmp LatchCheck,449Type *RangeCheckType) {450451auto *LatchType = LatchCheck.IV->getType();452if (RangeCheckType == LatchType)453return LatchCheck;454// For now, bail out if latch type is narrower than range type.455if (DL.getTypeSizeInBits(LatchType).getFixedValue() <456DL.getTypeSizeInBits(RangeCheckType).getFixedValue())457return std::nullopt;458if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))459return std::nullopt;460// We can now safely identify the truncated version of the IV and limit for461// RangeCheckType.462LoopICmp NewLatchCheck;463NewLatchCheck.Pred = LatchCheck.Pred;464NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(465SE.getTruncateExpr(LatchCheck.IV, RangeCheckType));466if (!NewLatchCheck.IV)467return std::nullopt;468NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType);469LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType470<< "can be represented as range check type:"471<< *RangeCheckType << "\n");472LLVM_DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n");473LLVM_DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n");474return NewLatchCheck;475}476477bool LoopPredication::isSupportedStep(const SCEV* Step) {478return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);479}480481Instruction *LoopPredication::findInsertPt(Instruction *Use,482ArrayRef<Value*> Ops) {483for (Value *Op : Ops)484if (!L->isLoopInvariant(Op))485return Use;486return Preheader->getTerminator();487}488489Instruction *LoopPredication::findInsertPt(const SCEVExpander &Expander,490Instruction *Use,491ArrayRef<const SCEV *> Ops) {492// Subtlety: SCEV considers things to be invariant if the value produced is493// the same across iterations. This is not the same as being able to494// evaluate outside the loop, which is what we actually need here.495for (const SCEV *Op : Ops)496if (!SE->isLoopInvariant(Op, L) ||497!Expander.isSafeToExpandAt(Op, Preheader->getTerminator()))498return Use;499return Preheader->getTerminator();500}501502bool LoopPredication::isLoopInvariantValue(const SCEV* S) {503// Handling expressions which produce invariant results, but *haven't* yet504// been removed from the loop serves two important purposes.505// 1) Most importantly, it resolves a pass ordering cycle which would506// otherwise need us to iteration licm, loop-predication, and either507// loop-unswitch or loop-peeling to make progress on examples with lots of508// predicable range checks in a row. (Since, in the general case, we can't509// hoist the length checks until the dominating checks have been discharged510// as we can't prove doing so is safe.)511// 2) As a nice side effect, this exposes the value of peeling or unswitching512// much more obviously in the IR. Otherwise, the cost modeling for other513// transforms would end up needing to duplicate all of this logic to model a514// check which becomes predictable based on a modeled peel or unswitch.515//516// The cost of doing so in the worst case is an extra fill from the stack in517// the loop to materialize the loop invariant test value instead of checking518// against the original IV which is presumable in a register inside the loop.519// Such cases are presumably rare, and hint at missing oppurtunities for520// other passes.521522if (SE->isLoopInvariant(S, L))523// Note: This the SCEV variant, so the original Value* may be within the524// loop even though SCEV has proven it is loop invariant.525return true;526527// Handle a particular important case which SCEV doesn't yet know about which528// shows up in range checks on arrays with immutable lengths.529// TODO: This should be sunk inside SCEV.530if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))531if (const auto *LI = dyn_cast<LoadInst>(U->getValue()))532if (LI->isUnordered() && L->hasLoopInvariantOperands(LI))533if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))) ||534LI->hasMetadata(LLVMContext::MD_invariant_load))535return true;536return false;537}538539std::optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(540LoopICmp LatchCheck, LoopICmp RangeCheck, SCEVExpander &Expander,541Instruction *Guard) {542auto *Ty = RangeCheck.IV->getType();543// Generate the widened condition for the forward loop:544// guardStart u< guardLimit &&545// latchLimit <pred> guardLimit - 1 - guardStart + latchStart546// where <pred> depends on the latch condition predicate. See the file547// header comment for the reasoning.548// guardLimit - guardStart + latchStart - 1549const SCEV *GuardStart = RangeCheck.IV->getStart();550const SCEV *GuardLimit = RangeCheck.Limit;551const SCEV *LatchStart = LatchCheck.IV->getStart();552const SCEV *LatchLimit = LatchCheck.Limit;553// Subtlety: We need all the values to be *invariant* across all iterations,554// but we only need to check expansion safety for those which *aren't*555// already guaranteed to dominate the guard.556if (!isLoopInvariantValue(GuardStart) ||557!isLoopInvariantValue(GuardLimit) ||558!isLoopInvariantValue(LatchStart) ||559!isLoopInvariantValue(LatchLimit)) {560LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");561return std::nullopt;562}563if (!Expander.isSafeToExpandAt(LatchStart, Guard) ||564!Expander.isSafeToExpandAt(LatchLimit, Guard)) {565LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");566return std::nullopt;567}568569// guardLimit - guardStart + latchStart - 1570const SCEV *RHS =571SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),572SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));573auto LimitCheckPred =574ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);575576LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n");577LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n");578LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n");579580auto *LimitCheck =581expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS);582auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred,583GuardStart, GuardLimit);584IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));585return Builder.CreateFreeze(586Builder.CreateAnd(FirstIterationCheck, LimitCheck));587}588589std::optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(590LoopICmp LatchCheck, LoopICmp RangeCheck, SCEVExpander &Expander,591Instruction *Guard) {592auto *Ty = RangeCheck.IV->getType();593const SCEV *GuardStart = RangeCheck.IV->getStart();594const SCEV *GuardLimit = RangeCheck.Limit;595const SCEV *LatchStart = LatchCheck.IV->getStart();596const SCEV *LatchLimit = LatchCheck.Limit;597// Subtlety: We need all the values to be *invariant* across all iterations,598// but we only need to check expansion safety for those which *aren't*599// already guaranteed to dominate the guard.600if (!isLoopInvariantValue(GuardStart) ||601!isLoopInvariantValue(GuardLimit) ||602!isLoopInvariantValue(LatchStart) ||603!isLoopInvariantValue(LatchLimit)) {604LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");605return std::nullopt;606}607if (!Expander.isSafeToExpandAt(LatchStart, Guard) ||608!Expander.isSafeToExpandAt(LatchLimit, Guard)) {609LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");610return std::nullopt;611}612// The decrement of the latch check IV should be the same as the613// rangeCheckIV.614auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE);615if (RangeCheck.IV != PostDecLatchCheckIV) {616LLVM_DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: "617<< *PostDecLatchCheckIV618<< " and RangeCheckIV: " << *RangeCheck.IV << "\n");619return std::nullopt;620}621622// Generate the widened condition for CountDownLoop:623// guardStart u< guardLimit &&624// latchLimit <pred> 1.625// See the header comment for reasoning of the checks.626auto LimitCheckPred =627ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);628auto *FirstIterationCheck = expandCheck(Expander, Guard,629ICmpInst::ICMP_ULT,630GuardStart, GuardLimit);631auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit,632SE->getOne(Ty));633IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));634return Builder.CreateFreeze(635Builder.CreateAnd(FirstIterationCheck, LimitCheck));636}637638static void normalizePredicate(ScalarEvolution *SE, Loop *L,639LoopICmp& RC) {640// LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the641// ULT/UGE form for ease of handling by our caller.642if (ICmpInst::isEquality(RC.Pred) &&643RC.IV->getStepRecurrence(*SE)->isOne() &&644SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))645RC.Pred = RC.Pred == ICmpInst::ICMP_NE ?646ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;647}648649/// If ICI can be widened to a loop invariant condition emits the loop650/// invariant condition in the loop preheader and return it, otherwise651/// returns std::nullopt.652std::optional<Value *>653LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,654Instruction *Guard) {655LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");656LLVM_DEBUG(ICI->dump());657658// parseLoopStructure guarantees that the latch condition is:659// ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=.660// We are looking for the range checks of the form:661// i u< guardLimit662auto RangeCheck = parseLoopICmp(ICI);663if (!RangeCheck) {664LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");665return std::nullopt;666}667LLVM_DEBUG(dbgs() << "Guard check:\n");668LLVM_DEBUG(RangeCheck->dump());669if (RangeCheck->Pred != ICmpInst::ICMP_ULT) {670LLVM_DEBUG(dbgs() << "Unsupported range check predicate("671<< RangeCheck->Pred << ")!\n");672return std::nullopt;673}674auto *RangeCheckIV = RangeCheck->IV;675if (!RangeCheckIV->isAffine()) {676LLVM_DEBUG(dbgs() << "Range check IV is not affine!\n");677return std::nullopt;678}679auto *Step = RangeCheckIV->getStepRecurrence(*SE);680// We cannot just compare with latch IV step because the latch and range IVs681// may have different types.682if (!isSupportedStep(Step)) {683LLVM_DEBUG(dbgs() << "Range check and latch have IVs different steps!\n");684return std::nullopt;685}686auto *Ty = RangeCheckIV->getType();687auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty);688if (!CurrLatchCheckOpt) {689LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check "690"corresponding to range type: "691<< *Ty << "\n");692return std::nullopt;693}694695LoopICmp CurrLatchCheck = *CurrLatchCheckOpt;696// At this point, the range and latch step should have the same type, but need697// not have the same value (we support both 1 and -1 steps).698assert(Step->getType() ==699CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() &&700"Range and latch steps should be of same type!");701if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) {702LLVM_DEBUG(dbgs() << "Range and latch have different step values!\n");703return std::nullopt;704}705706if (Step->isOne())707return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,708Expander, Guard);709else {710assert(Step->isAllOnesValue() && "Step should be -1!");711return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck,712Expander, Guard);713}714}715716void LoopPredication::widenChecks(SmallVectorImpl<Value *> &Checks,717SmallVectorImpl<Value *> &WidenedChecks,718SCEVExpander &Expander, Instruction *Guard) {719for (auto &Check : Checks)720if (ICmpInst *ICI = dyn_cast<ICmpInst>(Check))721if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Guard)) {722WidenedChecks.push_back(Check);723Check = *NewRangeCheck;724}725}726727bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,728SCEVExpander &Expander) {729LLVM_DEBUG(dbgs() << "Processing guard:\n");730LLVM_DEBUG(Guard->dump());731732TotalConsidered++;733SmallVector<Value *, 4> Checks;734SmallVector<Value *> WidenedChecks;735parseWidenableGuard(Guard, Checks);736widenChecks(Checks, WidenedChecks, Expander, Guard);737if (WidenedChecks.empty())738return false;739740TotalWidened += WidenedChecks.size();741742// Emit the new guard condition743IRBuilder<> Builder(findInsertPt(Guard, Checks));744Value *AllChecks = Builder.CreateAnd(Checks);745auto *OldCond = Guard->getOperand(0);746Guard->setOperand(0, AllChecks);747if (InsertAssumesOfPredicatedGuardsConditions) {748Builder.SetInsertPoint(&*++BasicBlock::iterator(Guard));749Builder.CreateAssumption(OldCond);750}751RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);752753LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n");754return true;755}756757bool LoopPredication::widenWidenableBranchGuardConditions(758BranchInst *BI, SCEVExpander &Expander) {759assert(isGuardAsWidenableBranch(BI) && "Must be!");760LLVM_DEBUG(dbgs() << "Processing guard:\n");761LLVM_DEBUG(BI->dump());762763TotalConsidered++;764SmallVector<Value *, 4> Checks;765SmallVector<Value *> WidenedChecks;766parseWidenableGuard(BI, Checks);767// At the moment, our matching logic for wideable conditions implicitly768// assumes we preserve the form: (br (and Cond, WC())). FIXME769auto WC = extractWidenableCondition(BI);770Checks.push_back(WC);771widenChecks(Checks, WidenedChecks, Expander, BI);772if (WidenedChecks.empty())773return false;774775TotalWidened += WidenedChecks.size();776777// Emit the new guard condition778IRBuilder<> Builder(findInsertPt(BI, Checks));779Value *AllChecks = Builder.CreateAnd(Checks);780auto *OldCond = BI->getCondition();781BI->setCondition(AllChecks);782if (InsertAssumesOfPredicatedGuardsConditions) {783BasicBlock *IfTrueBB = BI->getSuccessor(0);784Builder.SetInsertPoint(IfTrueBB, IfTrueBB->getFirstInsertionPt());785// If this block has other predecessors, we might not be able to use Cond.786// In this case, create a Phi where every other input is `true` and input787// from guard block is Cond.788Value *AssumeCond = Builder.CreateAnd(WidenedChecks);789if (!IfTrueBB->getUniquePredecessor()) {790auto *GuardBB = BI->getParent();791auto *PN = Builder.CreatePHI(AssumeCond->getType(), pred_size(IfTrueBB),792"assume.cond");793for (auto *Pred : predecessors(IfTrueBB))794PN->addIncoming(Pred == GuardBB ? AssumeCond : Builder.getTrue(), Pred);795AssumeCond = PN;796}797Builder.CreateAssumption(AssumeCond);798}799RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);800assert(isGuardAsWidenableBranch(BI) &&801"Stopped being a guard after transform?");802803LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n");804return true;805}806807std::optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {808using namespace PatternMatch;809810BasicBlock *LoopLatch = L->getLoopLatch();811if (!LoopLatch) {812LLVM_DEBUG(dbgs() << "The loop doesn't have a single latch!\n");813return std::nullopt;814}815816auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());817if (!BI || !BI->isConditional()) {818LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n");819return std::nullopt;820}821BasicBlock *TrueDest = BI->getSuccessor(0);822assert(823(TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) &&824"One of the latch's destinations must be the header");825826auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());827if (!ICI) {828LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n");829return std::nullopt;830}831auto Result = parseLoopICmp(ICI);832if (!Result) {833LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");834return std::nullopt;835}836837if (TrueDest != L->getHeader())838Result->Pred = ICmpInst::getInversePredicate(Result->Pred);839840// Check affine first, so if it's not we don't try to compute the step841// recurrence.842if (!Result->IV->isAffine()) {843LLVM_DEBUG(dbgs() << "The induction variable is not affine!\n");844return std::nullopt;845}846847auto *Step = Result->IV->getStepRecurrence(*SE);848if (!isSupportedStep(Step)) {849LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n");850return std::nullopt;851}852853auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) {854if (Step->isOne()) {855return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT &&856Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE;857} else {858assert(Step->isAllOnesValue() && "Step should be -1!");859return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT &&860Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE;861}862};863864normalizePredicate(SE, L, *Result);865if (IsUnsupportedPredicate(Step, Result->Pred)) {866LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred867<< ")!\n");868return std::nullopt;869}870871return Result;872}873874bool LoopPredication::isLoopProfitableToPredicate() {875if (SkipProfitabilityChecks)876return true;877878SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;879L->getExitEdges(ExitEdges);880// If there is only one exiting edge in the loop, it is always profitable to881// predicate the loop.882if (ExitEdges.size() == 1)883return true;884885// Calculate the exiting probabilities of all exiting edges from the loop,886// starting with the LatchExitProbability.887// Heuristic for profitability: If any of the exiting blocks' probability of888// exiting the loop is larger than exiting through the latch block, it's not889// profitable to predicate the loop.890auto *LatchBlock = L->getLoopLatch();891assert(LatchBlock && "Should have a single latch at this point!");892auto *LatchTerm = LatchBlock->getTerminator();893assert(LatchTerm->getNumSuccessors() == 2 &&894"expected to be an exiting block with 2 succs!");895unsigned LatchBrExitIdx =896LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;897// We compute branch probabilities without BPI. We do not rely on BPI since898// Loop predication is usually run in an LPM and BPI is only preserved899// lossily within loop pass managers, while BPI has an inherent notion of900// being complete for an entire function.901902// If the latch exits into a deoptimize or an unreachable block, do not903// predicate on that latch check.904auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx);905if (isa<UnreachableInst>(LatchTerm) ||906LatchExitBlock->getTerminatingDeoptimizeCall())907return false;908909// Latch terminator has no valid profile data, so nothing to check910// profitability on.911if (!hasValidBranchWeightMD(*LatchTerm))912return true;913914auto ComputeBranchProbability =915[&](const BasicBlock *ExitingBlock,916const BasicBlock *ExitBlock) -> BranchProbability {917auto *Term = ExitingBlock->getTerminator();918unsigned NumSucc = Term->getNumSuccessors();919if (MDNode *ProfileData = getValidBranchWeightMDNode(*Term)) {920SmallVector<uint32_t> Weights;921extractBranchWeights(ProfileData, Weights);922uint64_t Numerator = 0, Denominator = 0;923for (auto [i, Weight] : llvm::enumerate(Weights)) {924if (Term->getSuccessor(i) == ExitBlock)925Numerator += Weight;926Denominator += Weight;927}928// If all weights are zero act as if there was no profile data929if (Denominator == 0)930return BranchProbability::getBranchProbability(1, NumSucc);931return BranchProbability::getBranchProbability(Numerator, Denominator);932} else {933assert(LatchBlock != ExitingBlock &&934"Latch term should always have profile data!");935// No profile data, so we choose the weight as 1/num_of_succ(Src)936return BranchProbability::getBranchProbability(1, NumSucc);937}938};939940BranchProbability LatchExitProbability =941ComputeBranchProbability(LatchBlock, LatchExitBlock);942943// Protect against degenerate inputs provided by the user. Providing a value944// less than one, can invert the definition of profitable loop predication.945float ScaleFactor = LatchExitProbabilityScale;946if (ScaleFactor < 1) {947LLVM_DEBUG(948dbgs()949<< "Ignored user setting for loop-predication-latch-probability-scale: "950<< LatchExitProbabilityScale << "\n");951LLVM_DEBUG(dbgs() << "The value is set to 1.0\n");952ScaleFactor = 1.0;953}954const auto LatchProbabilityThreshold = LatchExitProbability * ScaleFactor;955956for (const auto &ExitEdge : ExitEdges) {957BranchProbability ExitingBlockProbability =958ComputeBranchProbability(ExitEdge.first, ExitEdge.second);959// Some exiting edge has higher probability than the latch exiting edge.960// No longer profitable to predicate.961if (ExitingBlockProbability > LatchProbabilityThreshold)962return false;963}964965// We have concluded that the most probable way to exit from the966// loop is through the latch (or there's no profile information and all967// exits are equally likely).968return true;969}970971/// If we can (cheaply) find a widenable branch which controls entry into the972/// loop, return it.973static BranchInst *FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI) {974// Walk back through any unconditional executed blocks and see if we can find975// a widenable condition which seems to control execution of this loop. Note976// that we predict that maythrow calls are likely untaken and thus that it's977// profitable to widen a branch before a maythrow call with a condition978// afterwards even though that may cause the slow path to run in a case where979// it wouldn't have otherwise.980BasicBlock *BB = L->getLoopPreheader();981if (!BB)982return nullptr;983do {984if (BasicBlock *Pred = BB->getSinglePredecessor())985if (BB == Pred->getSingleSuccessor()) {986BB = Pred;987continue;988}989break;990} while (true);991992if (BasicBlock *Pred = BB->getSinglePredecessor()) {993if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))994if (BI->getSuccessor(0) == BB && isWidenableBranch(BI))995return BI;996}997return nullptr;998}9991000/// Return the minimum of all analyzeable exit counts. This is an upper bound1001/// on the actual exit count. If there are not at least two analyzeable exits,1002/// returns SCEVCouldNotCompute.1003static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE,1004DominatorTree &DT,1005Loop *L) {1006SmallVector<BasicBlock *, 16> ExitingBlocks;1007L->getExitingBlocks(ExitingBlocks);10081009SmallVector<const SCEV *, 4> ExitCounts;1010for (BasicBlock *ExitingBB : ExitingBlocks) {1011const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);1012if (isa<SCEVCouldNotCompute>(ExitCount))1013continue;1014assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&1015"We should only have known counts for exiting blocks that "1016"dominate latch!");1017ExitCounts.push_back(ExitCount);1018}1019if (ExitCounts.size() < 2)1020return SE.getCouldNotCompute();1021return SE.getUMinFromMismatchedTypes(ExitCounts);1022}10231024/// This implements an analogous, but entirely distinct transform from the main1025/// loop predication transform. This one is phrased in terms of using a1026/// widenable branch *outside* the loop to allow us to simplify loop exits in a1027/// following loop. This is close in spirit to the IndVarSimplify transform1028/// of the same name, but is materially different widening loosens legality1029/// sharply.1030bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {1031// The transformation performed here aims to widen a widenable condition1032// above the loop such that all analyzeable exit leading to deopt are dead.1033// It assumes that the latch is the dominant exit for profitability and that1034// exits branching to deoptimizing blocks are rarely taken. It relies on the1035// semantics of widenable expressions for legality. (i.e. being able to fall1036// down the widenable path spuriously allows us to ignore exit order,1037// unanalyzeable exits, side effects, exceptional exits, and other challenges1038// which restrict the applicability of the non-WC based version of this1039// transform in IndVarSimplify.)1040//1041// NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may1042// imply flags on the expression being hoisted and inserting new uses (flags1043// are only correct for current uses). The result is that we may be1044// inserting a branch on the value which can be either poison or undef. In1045// this case, the branch can legally go either way; we just need to avoid1046// introducing UB. This is achieved through the use of the freeze1047// instruction.10481049SmallVector<BasicBlock *, 16> ExitingBlocks;1050L->getExitingBlocks(ExitingBlocks);10511052if (ExitingBlocks.empty())1053return false; // Nothing to do.10541055auto *Latch = L->getLoopLatch();1056if (!Latch)1057return false;10581059auto *WidenableBR = FindWidenableTerminatorAboveLoop(L, *LI);1060if (!WidenableBR)1061return false;10621063const SCEV *LatchEC = SE->getExitCount(L, Latch);1064if (isa<SCEVCouldNotCompute>(LatchEC))1065return false; // profitability - want hot exit in analyzeable set10661067// At this point, we have found an analyzeable latch, and a widenable1068// condition above the loop. If we have a widenable exit within the loop1069// (for which we can't compute exit counts), drop the ability to further1070// widen so that we gain ability to analyze it's exit count and perform this1071// transform. TODO: It'd be nice to know for sure the exit became1072// analyzeable after dropping widenability.1073bool ChangedLoop = false;10741075for (auto *ExitingBB : ExitingBlocks) {1076if (LI->getLoopFor(ExitingBB) != L)1077continue;10781079auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());1080if (!BI)1081continue;10821083if (auto WC = extractWidenableCondition(BI))1084if (L->contains(BI->getSuccessor(0))) {1085assert(WC->hasOneUse() && "Not appropriate widenable branch!");1086WC->user_back()->replaceUsesOfWith(1087WC, ConstantInt::getTrue(BI->getContext()));1088ChangedLoop = true;1089}1090}1091if (ChangedLoop)1092SE->forgetLoop(L);10931094// The insertion point for the widening should be at the widenably call, not1095// at the WidenableBR. If we do this at the widenableBR, we can incorrectly1096// change a loop-invariant condition to a loop-varying one.1097auto *IP = cast<Instruction>(WidenableBR->getCondition());10981099// The use of umin(all analyzeable exits) instead of latch is subtle, but1100// important for profitability. We may have a loop which hasn't been fully1101// canonicalized just yet. If the exit we chose to widen is provably never1102// taken, we want the widened form to *also* be provably never taken. We1103// can't guarantee this as a current unanalyzeable exit may later become1104// analyzeable, but we can at least avoid the obvious cases.1105const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L);1106if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||1107!SE->isLoopInvariant(MinEC, L) ||1108!Rewriter.isSafeToExpandAt(MinEC, IP))1109return ChangedLoop;11101111Rewriter.setInsertPoint(IP);1112IRBuilder<> B(IP);11131114bool InvalidateLoop = false;1115Value *MinECV = nullptr; // lazily generated if needed1116for (BasicBlock *ExitingBB : ExitingBlocks) {1117// If our exiting block exits multiple loops, we can only rewrite the1118// innermost one. Otherwise, we're changing how many times the innermost1119// loop runs before it exits.1120if (LI->getLoopFor(ExitingBB) != L)1121continue;11221123// Can't rewrite non-branch yet.1124auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());1125if (!BI)1126continue;11271128// If already constant, nothing to do.1129if (isa<Constant>(BI->getCondition()))1130continue;11311132const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);1133if (isa<SCEVCouldNotCompute>(ExitCount) ||1134ExitCount->getType()->isPointerTy() ||1135!Rewriter.isSafeToExpandAt(ExitCount, WidenableBR))1136continue;11371138const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));1139BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1);1140if (!ExitBB->getPostdominatingDeoptimizeCall())1141continue;11421143/// Here we can be fairly sure that executing this exit will most likely1144/// lead to executing llvm.experimental.deoptimize.1145/// This is a profitability heuristic, not a legality constraint.11461147// If we found a widenable exit condition, do two things:1148// 1) fold the widened exit test into the widenable condition1149// 2) fold the branch to untaken - avoids infinite looping11501151Value *ECV = Rewriter.expandCodeFor(ExitCount);1152if (!MinECV)1153MinECV = Rewriter.expandCodeFor(MinEC);1154Value *RHS = MinECV;1155if (ECV->getType() != RHS->getType()) {1156Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());1157ECV = B.CreateZExt(ECV, WiderTy);1158RHS = B.CreateZExt(RHS, WiderTy);1159}1160assert(!Latch || DT->dominates(ExitingBB, Latch));1161Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS);1162// Freeze poison or undef to an arbitrary bit pattern to ensure we can1163// branch without introducing UB. See NOTE ON POISON/UNDEF above for1164// context.1165NewCond = B.CreateFreeze(NewCond);11661167widenWidenableBranch(WidenableBR, NewCond);11681169Value *OldCond = BI->getCondition();1170BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));1171InvalidateLoop = true;1172}11731174if (InvalidateLoop)1175// We just mutated a bunch of loop exits changing there exit counts1176// widely. We need to force recomputation of the exit counts given these1177// changes. Note that all of the inserted exits are never taken, and1178// should be removed next time the CFG is modified.1179SE->forgetLoop(L);11801181// Always return `true` since we have moved the WidenableBR's condition.1182return true;1183}11841185bool LoopPredication::runOnLoop(Loop *Loop) {1186L = Loop;11871188LLVM_DEBUG(dbgs() << "Analyzing ");1189LLVM_DEBUG(L->dump());11901191Module *M = L->getHeader()->getModule();11921193// There is nothing to do if the module doesn't use guards1194auto *GuardDecl =1195M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));1196bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();1197auto *WCDecl = M->getFunction(1198Intrinsic::getName(Intrinsic::experimental_widenable_condition));1199bool HasWidenableConditions =1200PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty();1201if (!HasIntrinsicGuards && !HasWidenableConditions)1202return false;12031204DL = &M->getDataLayout();12051206Preheader = L->getLoopPreheader();1207if (!Preheader)1208return false;12091210auto LatchCheckOpt = parseLoopLatchICmp();1211if (!LatchCheckOpt)1212return false;1213LatchCheck = *LatchCheckOpt;12141215LLVM_DEBUG(dbgs() << "Latch check:\n");1216LLVM_DEBUG(LatchCheck.dump());12171218if (!isLoopProfitableToPredicate()) {1219LLVM_DEBUG(dbgs() << "Loop not profitable to predicate!\n");1220return false;1221}1222// Collect all the guards into a vector and process later, so as not1223// to invalidate the instruction iterator.1224SmallVector<IntrinsicInst *, 4> Guards;1225SmallVector<BranchInst *, 4> GuardsAsWidenableBranches;1226for (const auto BB : L->blocks()) {1227for (auto &I : *BB)1228if (isGuard(&I))1229Guards.push_back(cast<IntrinsicInst>(&I));1230if (PredicateWidenableBranchGuards &&1231isGuardAsWidenableBranch(BB->getTerminator()))1232GuardsAsWidenableBranches.push_back(1233cast<BranchInst>(BB->getTerminator()));1234}12351236SCEVExpander Expander(*SE, *DL, "loop-predication");1237bool Changed = false;1238for (auto *Guard : Guards)1239Changed |= widenGuardConditions(Guard, Expander);1240for (auto *Guard : GuardsAsWidenableBranches)1241Changed |= widenWidenableBranchGuardConditions(Guard, Expander);1242Changed |= predicateLoopExits(L, Expander);12431244if (MSSAU && VerifyMemorySSA)1245MSSAU->getMemorySSA()->verifyMemorySSA();1246return Changed;1247}124812491250