Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
35271 views
//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines common loop utility functions.9//10//===----------------------------------------------------------------------===//1112#include "llvm/Transforms/Utils/LoopUtils.h"13#include "llvm/ADT/DenseSet.h"14#include "llvm/ADT/PriorityWorklist.h"15#include "llvm/ADT/ScopeExit.h"16#include "llvm/ADT/SetVector.h"17#include "llvm/ADT/SmallPtrSet.h"18#include "llvm/ADT/SmallVector.h"19#include "llvm/Analysis/AliasAnalysis.h"20#include "llvm/Analysis/BasicAliasAnalysis.h"21#include "llvm/Analysis/DomTreeUpdater.h"22#include "llvm/Analysis/GlobalsModRef.h"23#include "llvm/Analysis/InstSimplifyFolder.h"24#include "llvm/Analysis/LoopAccessAnalysis.h"25#include "llvm/Analysis/LoopInfo.h"26#include "llvm/Analysis/LoopPass.h"27#include "llvm/Analysis/MemorySSA.h"28#include "llvm/Analysis/MemorySSAUpdater.h"29#include "llvm/Analysis/ScalarEvolution.h"30#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"31#include "llvm/Analysis/ScalarEvolutionExpressions.h"32#include "llvm/IR/DIBuilder.h"33#include "llvm/IR/Dominators.h"34#include "llvm/IR/Instructions.h"35#include "llvm/IR/IntrinsicInst.h"36#include "llvm/IR/MDBuilder.h"37#include "llvm/IR/Module.h"38#include "llvm/IR/PatternMatch.h"39#include "llvm/IR/ProfDataUtils.h"40#include "llvm/IR/ValueHandle.h"41#include "llvm/InitializePasses.h"42#include "llvm/Pass.h"43#include "llvm/Support/Debug.h"44#include "llvm/Transforms/Utils/BasicBlockUtils.h"45#include "llvm/Transforms/Utils/Local.h"46#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"4748using namespace llvm;49using namespace llvm::PatternMatch;5051#define DEBUG_TYPE "loop-utils"5253static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";54static const char *LLVMLoopDisableLICM = "llvm.licm.disable";5556bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,57MemorySSAUpdater *MSSAU,58bool PreserveLCSSA) {59bool Changed = false;6061// We re-use a vector for the in-loop predecesosrs.62SmallVector<BasicBlock *, 4> InLoopPredecessors;6364auto RewriteExit = [&](BasicBlock *BB) {65assert(InLoopPredecessors.empty() &&66"Must start with an empty predecessors list!");67auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });6869// See if there are any non-loop predecessors of this exit block and70// keep track of the in-loop predecessors.71bool IsDedicatedExit = true;72for (auto *PredBB : predecessors(BB))73if (L->contains(PredBB)) {74if (isa<IndirectBrInst>(PredBB->getTerminator()))75// We cannot rewrite exiting edges from an indirectbr.76return false;7778InLoopPredecessors.push_back(PredBB);79} else {80IsDedicatedExit = false;81}8283assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");8485// Nothing to do if this is already a dedicated exit.86if (IsDedicatedExit)87return false;8889auto *NewExitBB = SplitBlockPredecessors(90BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);9192if (!NewExitBB)93LLVM_DEBUG(94dbgs() << "WARNING: Can't create a dedicated exit block for loop: "95<< *L << "\n");96else97LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "98<< NewExitBB->getName() << "\n");99return true;100};101102// Walk the exit blocks directly rather than building up a data structure for103// them, but only visit each one once.104SmallPtrSet<BasicBlock *, 4> Visited;105for (auto *BB : L->blocks())106for (auto *SuccBB : successors(BB)) {107// We're looking for exit blocks so skip in-loop successors.108if (L->contains(SuccBB))109continue;110111// Visit each exit block exactly once.112if (!Visited.insert(SuccBB).second)113continue;114115Changed |= RewriteExit(SuccBB);116}117118return Changed;119}120121/// Returns the instructions that use values defined in the loop.122SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {123SmallVector<Instruction *, 8> UsedOutside;124125for (auto *Block : L->getBlocks())126// FIXME: I believe that this could use copy_if if the Inst reference could127// be adapted into a pointer.128for (auto &Inst : *Block) {129auto Users = Inst.users();130if (any_of(Users, [&](User *U) {131auto *Use = cast<Instruction>(U);132return !L->contains(Use->getParent());133}))134UsedOutside.push_back(&Inst);135}136137return UsedOutside;138}139140void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {141// By definition, all loop passes need the LoopInfo analysis and the142// Dominator tree it depends on. Because they all participate in the loop143// pass manager, they must also preserve these.144AU.addRequired<DominatorTreeWrapperPass>();145AU.addPreserved<DominatorTreeWrapperPass>();146AU.addRequired<LoopInfoWrapperPass>();147AU.addPreserved<LoopInfoWrapperPass>();148149// We must also preserve LoopSimplify and LCSSA. We locally access their IDs150// here because users shouldn't directly get them from this header.151extern char &LoopSimplifyID;152extern char &LCSSAID;153AU.addRequiredID(LoopSimplifyID);154AU.addPreservedID(LoopSimplifyID);155AU.addRequiredID(LCSSAID);156AU.addPreservedID(LCSSAID);157// This is used in the LPPassManager to perform LCSSA verification on passes158// which preserve lcssa form159AU.addRequired<LCSSAVerificationPass>();160AU.addPreserved<LCSSAVerificationPass>();161162// Loop passes are designed to run inside of a loop pass manager which means163// that any function analyses they require must be required by the first loop164// pass in the manager (so that it is computed before the loop pass manager165// runs) and preserved by all loop pasess in the manager. To make this166// reasonably robust, the set needed for most loop passes is maintained here.167// If your loop pass requires an analysis not listed here, you will need to168// carefully audit the loop pass manager nesting structure that results.169AU.addRequired<AAResultsWrapperPass>();170AU.addPreserved<AAResultsWrapperPass>();171AU.addPreserved<BasicAAWrapperPass>();172AU.addPreserved<GlobalsAAWrapperPass>();173AU.addPreserved<SCEVAAWrapperPass>();174AU.addRequired<ScalarEvolutionWrapperPass>();175AU.addPreserved<ScalarEvolutionWrapperPass>();176// FIXME: When all loop passes preserve MemorySSA, it can be required and177// preserved here instead of the individual handling in each pass.178}179180/// Manually defined generic "LoopPass" dependency initialization. This is used181/// to initialize the exact set of passes from above in \c182/// getLoopAnalysisUsage. It can be used within a loop pass's initialization183/// with:184///185/// INITIALIZE_PASS_DEPENDENCY(LoopPass)186///187/// As-if "LoopPass" were a pass.188void llvm::initializeLoopPassPass(PassRegistry &Registry) {189INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)190INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)191INITIALIZE_PASS_DEPENDENCY(LoopSimplify)192INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)193INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)194INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)195INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)196INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)197INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)198INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)199}200201/// Create MDNode for input string.202static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) {203LLVMContext &Context = TheLoop->getHeader()->getContext();204Metadata *MDs[] = {205MDString::get(Context, Name),206ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};207return MDNode::get(Context, MDs);208}209210/// Set input string into loop metadata by keeping other values intact.211/// If the string is already in loop metadata update value if it is212/// different.213void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD,214unsigned V) {215SmallVector<Metadata *, 4> MDs(1);216// If the loop already has metadata, retain it.217MDNode *LoopID = TheLoop->getLoopID();218if (LoopID) {219for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {220MDNode *Node = cast<MDNode>(LoopID->getOperand(i));221// If it is of form key = value, try to parse it.222if (Node->getNumOperands() == 2) {223MDString *S = dyn_cast<MDString>(Node->getOperand(0));224if (S && S->getString() == StringMD) {225ConstantInt *IntMD =226mdconst::extract_or_null<ConstantInt>(Node->getOperand(1));227if (IntMD && IntMD->getSExtValue() == V)228// It is already in place. Do nothing.229return;230// We need to update the value, so just skip it here and it will231// be added after copying other existed nodes.232continue;233}234}235MDs.push_back(Node);236}237}238// Add new metadata.239MDs.push_back(createStringMetadata(TheLoop, StringMD, V));240// Replace current metadata node with new one.241LLVMContext &Context = TheLoop->getHeader()->getContext();242MDNode *NewLoopID = MDNode::get(Context, MDs);243// Set operand 0 to refer to the loop id itself.244NewLoopID->replaceOperandWith(0, NewLoopID);245TheLoop->setLoopID(NewLoopID);246}247248std::optional<ElementCount>249llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {250std::optional<int> Width =251getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");252253if (Width) {254std::optional<int> IsScalable = getOptionalIntLoopAttribute(255TheLoop, "llvm.loop.vectorize.scalable.enable");256return ElementCount::get(*Width, IsScalable.value_or(false));257}258259return std::nullopt;260}261262std::optional<MDNode *> llvm::makeFollowupLoopID(263MDNode *OrigLoopID, ArrayRef<StringRef> FollowupOptions,264const char *InheritOptionsExceptPrefix, bool AlwaysNew) {265if (!OrigLoopID) {266if (AlwaysNew)267return nullptr;268return std::nullopt;269}270271assert(OrigLoopID->getOperand(0) == OrigLoopID);272273bool InheritAllAttrs = !InheritOptionsExceptPrefix;274bool InheritSomeAttrs =275InheritOptionsExceptPrefix && InheritOptionsExceptPrefix[0] != '\0';276SmallVector<Metadata *, 8> MDs;277MDs.push_back(nullptr);278279bool Changed = false;280if (InheritAllAttrs || InheritSomeAttrs) {281for (const MDOperand &Existing : drop_begin(OrigLoopID->operands())) {282MDNode *Op = cast<MDNode>(Existing.get());283284auto InheritThisAttribute = [InheritSomeAttrs,285InheritOptionsExceptPrefix](MDNode *Op) {286if (!InheritSomeAttrs)287return false;288289// Skip malformatted attribute metadata nodes.290if (Op->getNumOperands() == 0)291return true;292Metadata *NameMD = Op->getOperand(0).get();293if (!isa<MDString>(NameMD))294return true;295StringRef AttrName = cast<MDString>(NameMD)->getString();296297// Do not inherit excluded attributes.298return !AttrName.starts_with(InheritOptionsExceptPrefix);299};300301if (InheritThisAttribute(Op))302MDs.push_back(Op);303else304Changed = true;305}306} else {307// Modified if we dropped at least one attribute.308Changed = OrigLoopID->getNumOperands() > 1;309}310311bool HasAnyFollowup = false;312for (StringRef OptionName : FollowupOptions) {313MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);314if (!FollowupNode)315continue;316317HasAnyFollowup = true;318for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {319MDs.push_back(Option.get());320Changed = true;321}322}323324// Attributes of the followup loop not specified explicity, so signal to the325// transformation pass to add suitable attributes.326if (!AlwaysNew && !HasAnyFollowup)327return std::nullopt;328329// If no attributes were added or remove, the previous loop Id can be reused.330if (!AlwaysNew && !Changed)331return OrigLoopID;332333// No attributes is equivalent to having no !llvm.loop metadata at all.334if (MDs.size() == 1)335return nullptr;336337// Build the new loop ID.338MDTuple *FollowupLoopID = MDNode::get(OrigLoopID->getContext(), MDs);339FollowupLoopID->replaceOperandWith(0, FollowupLoopID);340return FollowupLoopID;341}342343bool llvm::hasDisableAllTransformsHint(const Loop *L) {344return getBooleanLoopAttribute(L, LLVMLoopDisableNonforced);345}346347bool llvm::hasDisableLICMTransformsHint(const Loop *L) {348return getBooleanLoopAttribute(L, LLVMLoopDisableLICM);349}350351TransformationMode llvm::hasUnrollTransformation(const Loop *L) {352if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))353return TM_SuppressedByUser;354355std::optional<int> Count =356getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");357if (Count)358return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;359360if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))361return TM_ForcedByUser;362363if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full"))364return TM_ForcedByUser;365366if (hasDisableAllTransformsHint(L))367return TM_Disable;368369return TM_Unspecified;370}371372TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {373if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))374return TM_SuppressedByUser;375376std::optional<int> Count =377getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");378if (Count)379return *Count == 1 ? TM_SuppressedByUser : TM_ForcedByUser;380381if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))382return TM_ForcedByUser;383384if (hasDisableAllTransformsHint(L))385return TM_Disable;386387return TM_Unspecified;388}389390TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {391std::optional<bool> Enable =392getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");393394if (Enable == false)395return TM_SuppressedByUser;396397std::optional<ElementCount> VectorizeWidth =398getOptionalElementCountLoopAttribute(L);399std::optional<int> InterleaveCount =400getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");401402// 'Forcing' vector width and interleave count to one effectively disables403// this tranformation.404if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&405InterleaveCount == 1)406return TM_SuppressedByUser;407408if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))409return TM_Disable;410411if (Enable == true)412return TM_ForcedByUser;413414if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)415return TM_Disable;416417if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)418return TM_Enable;419420if (hasDisableAllTransformsHint(L))421return TM_Disable;422423return TM_Unspecified;424}425426TransformationMode llvm::hasDistributeTransformation(const Loop *L) {427if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))428return TM_ForcedByUser;429430if (hasDisableAllTransformsHint(L))431return TM_Disable;432433return TM_Unspecified;434}435436TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {437if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))438return TM_SuppressedByUser;439440if (hasDisableAllTransformsHint(L))441return TM_Disable;442443return TM_Unspecified;444}445446/// Does a BFS from a given node to all of its children inside a given loop.447/// The returned vector of nodes includes the starting point.448SmallVector<DomTreeNode *, 16>449llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {450SmallVector<DomTreeNode *, 16> Worklist;451auto AddRegionToWorklist = [&](DomTreeNode *DTN) {452// Only include subregions in the top level loop.453BasicBlock *BB = DTN->getBlock();454if (CurLoop->contains(BB))455Worklist.push_back(DTN);456};457458AddRegionToWorklist(N);459460for (size_t I = 0; I < Worklist.size(); I++) {461for (DomTreeNode *Child : Worklist[I]->children())462AddRegionToWorklist(Child);463}464465return Worklist;466}467468bool llvm::isAlmostDeadIV(PHINode *PN, BasicBlock *LatchBlock, Value *Cond) {469int LatchIdx = PN->getBasicBlockIndex(LatchBlock);470assert(LatchIdx != -1 && "LatchBlock is not a case in this PHINode");471Value *IncV = PN->getIncomingValue(LatchIdx);472473for (User *U : PN->users())474if (U != Cond && U != IncV) return false;475476for (User *U : IncV->users())477if (U != Cond && U != PN) return false;478return true;479}480481482void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,483LoopInfo *LI, MemorySSA *MSSA) {484assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");485auto *Preheader = L->getLoopPreheader();486assert(Preheader && "Preheader should exist!");487488std::unique_ptr<MemorySSAUpdater> MSSAU;489if (MSSA)490MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);491492// Now that we know the removal is safe, remove the loop by changing the493// branch from the preheader to go to the single exit block.494//495// Because we're deleting a large chunk of code at once, the sequence in which496// we remove things is very important to avoid invalidation issues.497498// Tell ScalarEvolution that the loop is deleted. Do this before499// deleting the loop so that ScalarEvolution can look at the loop500// to determine what it needs to clean up.501if (SE) {502SE->forgetLoop(L);503SE->forgetBlockAndLoopDispositions();504}505506Instruction *OldTerm = Preheader->getTerminator();507assert(!OldTerm->mayHaveSideEffects() &&508"Preheader must end with a side-effect-free terminator");509assert(OldTerm->getNumSuccessors() == 1 &&510"Preheader must have a single successor");511// Connect the preheader to the exit block. Keep the old edge to the header512// around to perform the dominator tree update in two separate steps513// -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge514// preheader -> header.515//516//517// 0. Preheader 1. Preheader 2. Preheader518// | | | |519// V | V |520// Header <--\ | Header <--\ | Header <--\521// | | | | | | | | | | |522// | V | | | V | | | V |523// | Body --/ | | Body --/ | | Body --/524// V V V V V525// Exit Exit Exit526//527// By doing this is two separate steps we can perform the dominator tree528// update without using the batch update API.529//530// Even when the loop is never executed, we cannot remove the edge from the531// source block to the exit block. Consider the case where the unexecuted loop532// branches back to an outer loop. If we deleted the loop and removed the edge533// coming to this inner loop, this will break the outer loop structure (by534// deleting the backedge of the outer loop). If the outer loop is indeed a535// non-loop, it will be deleted in a future iteration of loop deletion pass.536IRBuilder<> Builder(OldTerm);537538auto *ExitBlock = L->getUniqueExitBlock();539DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);540if (ExitBlock) {541assert(ExitBlock && "Should have a unique exit block!");542assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");543544Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);545// Remove the old branch. The conditional branch becomes a new terminator.546OldTerm->eraseFromParent();547548// Rewrite phis in the exit block to get their inputs from the Preheader549// instead of the exiting block.550for (PHINode &P : ExitBlock->phis()) {551// Set the zero'th element of Phi to be from the preheader and remove all552// other incoming values. Given the loop has dedicated exits, all other553// incoming values must be from the exiting blocks.554int PredIndex = 0;555P.setIncomingBlock(PredIndex, Preheader);556// Removes all incoming values from all other exiting blocks (including557// duplicate values from an exiting block).558// Nuke all entries except the zero'th entry which is the preheader entry.559P.removeIncomingValueIf([](unsigned Idx) { return Idx != 0; },560/* DeletePHIIfEmpty */ false);561562assert((P.getNumIncomingValues() == 1 &&563P.getIncomingBlock(PredIndex) == Preheader) &&564"Should have exactly one value and that's from the preheader!");565}566567if (DT) {568DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}});569if (MSSA) {570MSSAU->applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock}},571*DT);572if (VerifyMemorySSA)573MSSA->verifyMemorySSA();574}575}576577// Disconnect the loop body by branching directly to its exit.578Builder.SetInsertPoint(Preheader->getTerminator());579Builder.CreateBr(ExitBlock);580// Remove the old branch.581Preheader->getTerminator()->eraseFromParent();582} else {583assert(L->hasNoExitBlocks() &&584"Loop should have either zero or one exit blocks.");585586Builder.SetInsertPoint(OldTerm);587Builder.CreateUnreachable();588Preheader->getTerminator()->eraseFromParent();589}590591if (DT) {592DTU.applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}});593if (MSSA) {594MSSAU->applyUpdates({{DominatorTree::Delete, Preheader, L->getHeader()}},595*DT);596SmallSetVector<BasicBlock *, 8> DeadBlockSet(L->block_begin(),597L->block_end());598MSSAU->removeBlocks(DeadBlockSet);599if (VerifyMemorySSA)600MSSA->verifyMemorySSA();601}602}603604// Use a map to unique and a vector to guarantee deterministic ordering.605llvm::SmallDenseSet<DebugVariable, 4> DeadDebugSet;606llvm::SmallVector<DbgVariableIntrinsic *, 4> DeadDebugInst;607llvm::SmallVector<DbgVariableRecord *, 4> DeadDbgVariableRecords;608609if (ExitBlock) {610// Given LCSSA form is satisfied, we should not have users of instructions611// within the dead loop outside of the loop. However, LCSSA doesn't take612// unreachable uses into account. We handle them here.613// We could do it after drop all references (in this case all users in the614// loop will be already eliminated and we have less work to do but according615// to API doc of User::dropAllReferences only valid operation after dropping616// references, is deletion. So let's substitute all usages of617// instruction from the loop with poison value of corresponding type first.618for (auto *Block : L->blocks())619for (Instruction &I : *Block) {620auto *Poison = PoisonValue::get(I.getType());621for (Use &U : llvm::make_early_inc_range(I.uses())) {622if (auto *Usr = dyn_cast<Instruction>(U.getUser()))623if (L->contains(Usr->getParent()))624continue;625// If we have a DT then we can check that uses outside a loop only in626// unreachable block.627if (DT)628assert(!DT->isReachableFromEntry(U) &&629"Unexpected user in reachable block");630U.set(Poison);631}632633// RemoveDIs: do the same as below for DbgVariableRecords.634if (Block->IsNewDbgInfoFormat) {635for (DbgVariableRecord &DVR : llvm::make_early_inc_range(636filterDbgVars(I.getDbgRecordRange()))) {637DebugVariable Key(DVR.getVariable(), DVR.getExpression(),638DVR.getDebugLoc().get());639if (!DeadDebugSet.insert(Key).second)640continue;641// Unlinks the DVR from it's container, for later insertion.642DVR.removeFromParent();643DeadDbgVariableRecords.push_back(&DVR);644}645}646647// For one of each variable encountered, preserve a debug intrinsic (set648// to Poison) and transfer it to the loop exit. This terminates any649// variable locations that were set during the loop.650auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);651if (!DVI)652continue;653if (!DeadDebugSet.insert(DebugVariable(DVI)).second)654continue;655DeadDebugInst.push_back(DVI);656}657658// After the loop has been deleted all the values defined and modified659// inside the loop are going to be unavailable. Values computed in the660// loop will have been deleted, automatically causing their debug uses661// be be replaced with undef. Loop invariant values will still be available.662// Move dbg.values out the loop so that earlier location ranges are still663// terminated and loop invariant assignments are preserved.664DIBuilder DIB(*ExitBlock->getModule());665BasicBlock::iterator InsertDbgValueBefore =666ExitBlock->getFirstInsertionPt();667assert(InsertDbgValueBefore != ExitBlock->end() &&668"There should be a non-PHI instruction in exit block, else these "669"instructions will have no parent.");670671for (auto *DVI : DeadDebugInst)672DVI->moveBefore(*ExitBlock, InsertDbgValueBefore);673674// Due to the "head" bit in BasicBlock::iterator, we're going to insert675// each DbgVariableRecord right at the start of the block, wheras dbg.values676// would be repeatedly inserted before the first instruction. To replicate677// this behaviour, do it backwards.678for (DbgVariableRecord *DVR : llvm::reverse(DeadDbgVariableRecords))679ExitBlock->insertDbgRecordBefore(DVR, InsertDbgValueBefore);680}681682// Remove the block from the reference counting scheme, so that we can683// delete it freely later.684for (auto *Block : L->blocks())685Block->dropAllReferences();686687if (MSSA && VerifyMemorySSA)688MSSA->verifyMemorySSA();689690if (LI) {691// Erase the instructions and the blocks without having to worry692// about ordering because we already dropped the references.693// NOTE: This iteration is safe because erasing the block does not remove694// its entry from the loop's block list. We do that in the next section.695for (BasicBlock *BB : L->blocks())696BB->eraseFromParent();697698// Finally, the blocks from loopinfo. This has to happen late because699// otherwise our loop iterators won't work.700701SmallPtrSet<BasicBlock *, 8> blocks;702blocks.insert(L->block_begin(), L->block_end());703for (BasicBlock *BB : blocks)704LI->removeBlock(BB);705706// The last step is to update LoopInfo now that we've eliminated this loop.707// Note: LoopInfo::erase remove the given loop and relink its subloops with708// its parent. While removeLoop/removeChildLoop remove the given loop but709// not relink its subloops, which is what we want.710if (Loop *ParentLoop = L->getParentLoop()) {711Loop::iterator I = find(*ParentLoop, L);712assert(I != ParentLoop->end() && "Couldn't find loop");713ParentLoop->removeChildLoop(I);714} else {715Loop::iterator I = find(*LI, L);716assert(I != LI->end() && "Couldn't find loop");717LI->removeLoop(I);718}719LI->destroy(L);720}721}722723void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,724LoopInfo &LI, MemorySSA *MSSA) {725auto *Latch = L->getLoopLatch();726assert(Latch && "multiple latches not yet supported");727auto *Header = L->getHeader();728Loop *OutermostLoop = L->getOutermostLoop();729730SE.forgetLoop(L);731SE.forgetBlockAndLoopDispositions();732733std::unique_ptr<MemorySSAUpdater> MSSAU;734if (MSSA)735MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);736737// Update the CFG and domtree. We chose to special case a couple of738// of common cases for code quality and test readability reasons.739[&]() -> void {740if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {741if (!BI->isConditional()) {742DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);743(void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,744MSSAU.get());745return;746}747748// Conditional latch/exit - note that latch can be shared by inner749// and outer loop so the other target doesn't need to an exit750if (L->isLoopExiting(Latch)) {751// TODO: Generalize ConstantFoldTerminator so that it can be used752// here without invalidating LCSSA or MemorySSA. (Tricky case for753// LCSSA: header is an exit block of a preceeding sibling loop w/o754// dedicated exits.)755const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;756BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);757758DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);759Header->removePredecessor(Latch, true);760761IRBuilder<> Builder(BI);762auto *NewBI = Builder.CreateBr(ExitBB);763// Transfer the metadata to the new branch instruction (minus the764// loop info since this is no longer a loop)765NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,766LLVMContext::MD_annotation});767768BI->eraseFromParent();769DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});770if (MSSA)771MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);772return;773}774}775776// General case. By splitting the backedge, and then explicitly making it777// unreachable we gracefully handle corner cases such as switch and invoke778// termiantors.779auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());780781DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);782(void)changeToUnreachable(BackedgeBB->getTerminator(),783/*PreserveLCSSA*/ true, &DTU, MSSAU.get());784}();785786// Erase (and destroy) this loop instance. Handles relinking sub-loops787// and blocks within the loop as needed.788LI.erase(L);789790// If the loop we broke had a parent, then changeToUnreachable might have791// caused a block to be removed from the parent loop (see loop_nest_lcssa792// test case in zero-btc.ll for an example), thus changing the parent's793// exit blocks. If that happened, we need to rebuild LCSSA on the outermost794// loop which might have a had a block removed.795if (OutermostLoop != L)796formLCSSARecursively(*OutermostLoop, DT, &LI, &SE);797}798799800/// Checks if \p L has an exiting latch branch. There may also be other801/// exiting blocks. Returns branch instruction terminating the loop802/// latch if above check is successful, nullptr otherwise.803static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) {804BasicBlock *Latch = L->getLoopLatch();805if (!Latch)806return nullptr;807808BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());809if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))810return nullptr;811812assert((LatchBR->getSuccessor(0) == L->getHeader() ||813LatchBR->getSuccessor(1) == L->getHeader()) &&814"At least one edge out of the latch must go to the header");815816return LatchBR;817}818819/// Return the estimated trip count for any exiting branch which dominates820/// the loop latch.821static std::optional<uint64_t> getEstimatedTripCount(BranchInst *ExitingBranch,822Loop *L,823uint64_t &OrigExitWeight) {824// To estimate the number of times the loop body was executed, we want to825// know the number of times the backedge was taken, vs. the number of times826// we exited the loop.827uint64_t LoopWeight, ExitWeight;828if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight))829return std::nullopt;830831if (L->contains(ExitingBranch->getSuccessor(1)))832std::swap(LoopWeight, ExitWeight);833834if (!ExitWeight)835// Don't have a way to return predicated infinite836return std::nullopt;837838OrigExitWeight = ExitWeight;839840// Estimated exit count is a ratio of the loop weight by the weight of the841// edge exiting the loop, rounded to nearest.842uint64_t ExitCount = llvm::divideNearest(LoopWeight, ExitWeight);843// Estimated trip count is one plus estimated exit count.844return ExitCount + 1;845}846847std::optional<unsigned>848llvm::getLoopEstimatedTripCount(Loop *L,849unsigned *EstimatedLoopInvocationWeight) {850// Currently we take the estimate exit count only from the loop latch,851// ignoring other exiting blocks. This can overestimate the trip count852// if we exit through another exit, but can never underestimate it.853// TODO: incorporate information from other exits854if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) {855uint64_t ExitWeight;856if (std::optional<uint64_t> EstTripCount =857getEstimatedTripCount(LatchBranch, L, ExitWeight)) {858if (EstimatedLoopInvocationWeight)859*EstimatedLoopInvocationWeight = ExitWeight;860return *EstTripCount;861}862}863return std::nullopt;864}865866bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,867unsigned EstimatedloopInvocationWeight) {868// At the moment, we currently support changing the estimate trip count of869// the latch branch only. We could extend this API to manipulate estimated870// trip counts for any exit.871BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L);872if (!LatchBranch)873return false;874875// Calculate taken and exit weights.876unsigned LatchExitWeight = 0;877unsigned BackedgeTakenWeight = 0;878879if (EstimatedTripCount > 0) {880LatchExitWeight = EstimatedloopInvocationWeight;881BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight;882}883884// Make a swap if back edge is taken when condition is "false".885if (LatchBranch->getSuccessor(0) != L->getHeader())886std::swap(BackedgeTakenWeight, LatchExitWeight);887888MDBuilder MDB(LatchBranch->getContext());889890// Set/Update profile metadata.891LatchBranch->setMetadata(892LLVMContext::MD_prof,893MDB.createBranchWeights(BackedgeTakenWeight, LatchExitWeight));894895return true;896}897898bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,899ScalarEvolution &SE) {900Loop *OuterL = InnerLoop->getParentLoop();901if (!OuterL)902return true;903904// Get the backedge taken count for the inner loop905BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();906const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);907if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||908!InnerLoopBECountSC->getType()->isIntegerTy())909return false;910911// Get whether count is invariant to the outer loop912ScalarEvolution::LoopDisposition LD =913SE.getLoopDisposition(InnerLoopBECountSC, OuterL);914if (LD != ScalarEvolution::LoopInvariant)915return false;916917return true;918}919920constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {921switch (RK) {922default:923llvm_unreachable("Unexpected recurrence kind");924case RecurKind::Add:925return Intrinsic::vector_reduce_add;926case RecurKind::Mul:927return Intrinsic::vector_reduce_mul;928case RecurKind::And:929return Intrinsic::vector_reduce_and;930case RecurKind::Or:931return Intrinsic::vector_reduce_or;932case RecurKind::Xor:933return Intrinsic::vector_reduce_xor;934case RecurKind::FMulAdd:935case RecurKind::FAdd:936return Intrinsic::vector_reduce_fadd;937case RecurKind::FMul:938return Intrinsic::vector_reduce_fmul;939case RecurKind::SMax:940return Intrinsic::vector_reduce_smax;941case RecurKind::SMin:942return Intrinsic::vector_reduce_smin;943case RecurKind::UMax:944return Intrinsic::vector_reduce_umax;945case RecurKind::UMin:946return Intrinsic::vector_reduce_umin;947case RecurKind::FMax:948return Intrinsic::vector_reduce_fmax;949case RecurKind::FMin:950return Intrinsic::vector_reduce_fmin;951case RecurKind::FMaximum:952return Intrinsic::vector_reduce_fmaximum;953case RecurKind::FMinimum:954return Intrinsic::vector_reduce_fminimum;955}956}957958unsigned llvm::getArithmeticReductionInstruction(Intrinsic::ID RdxID) {959switch (RdxID) {960case Intrinsic::vector_reduce_fadd:961return Instruction::FAdd;962case Intrinsic::vector_reduce_fmul:963return Instruction::FMul;964case Intrinsic::vector_reduce_add:965return Instruction::Add;966case Intrinsic::vector_reduce_mul:967return Instruction::Mul;968case Intrinsic::vector_reduce_and:969return Instruction::And;970case Intrinsic::vector_reduce_or:971return Instruction::Or;972case Intrinsic::vector_reduce_xor:973return Instruction::Xor;974case Intrinsic::vector_reduce_smax:975case Intrinsic::vector_reduce_smin:976case Intrinsic::vector_reduce_umax:977case Intrinsic::vector_reduce_umin:978return Instruction::ICmp;979case Intrinsic::vector_reduce_fmax:980case Intrinsic::vector_reduce_fmin:981return Instruction::FCmp;982default:983llvm_unreachable("Unexpected ID");984}985}986987Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID) {988switch (RdxID) {989default:990llvm_unreachable("Unknown min/max recurrence kind");991case Intrinsic::vector_reduce_umin:992return Intrinsic::umin;993case Intrinsic::vector_reduce_umax:994return Intrinsic::umax;995case Intrinsic::vector_reduce_smin:996return Intrinsic::smin;997case Intrinsic::vector_reduce_smax:998return Intrinsic::smax;999case Intrinsic::vector_reduce_fmin:1000return Intrinsic::minnum;1001case Intrinsic::vector_reduce_fmax:1002return Intrinsic::maxnum;1003case Intrinsic::vector_reduce_fminimum:1004return Intrinsic::minimum;1005case Intrinsic::vector_reduce_fmaximum:1006return Intrinsic::maximum;1007}1008}10091010Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {1011switch (RK) {1012default:1013llvm_unreachable("Unknown min/max recurrence kind");1014case RecurKind::UMin:1015return Intrinsic::umin;1016case RecurKind::UMax:1017return Intrinsic::umax;1018case RecurKind::SMin:1019return Intrinsic::smin;1020case RecurKind::SMax:1021return Intrinsic::smax;1022case RecurKind::FMin:1023return Intrinsic::minnum;1024case RecurKind::FMax:1025return Intrinsic::maxnum;1026case RecurKind::FMinimum:1027return Intrinsic::minimum;1028case RecurKind::FMaximum:1029return Intrinsic::maximum;1030}1031}10321033RecurKind llvm::getMinMaxReductionRecurKind(Intrinsic::ID RdxID) {1034switch (RdxID) {1035case Intrinsic::vector_reduce_smax:1036return RecurKind::SMax;1037case Intrinsic::vector_reduce_smin:1038return RecurKind::SMin;1039case Intrinsic::vector_reduce_umax:1040return RecurKind::UMax;1041case Intrinsic::vector_reduce_umin:1042return RecurKind::UMin;1043case Intrinsic::vector_reduce_fmax:1044return RecurKind::FMax;1045case Intrinsic::vector_reduce_fmin:1046return RecurKind::FMin;1047default:1048return RecurKind::None;1049}1050}10511052CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {1053switch (RK) {1054default:1055llvm_unreachable("Unknown min/max recurrence kind");1056case RecurKind::UMin:1057return CmpInst::ICMP_ULT;1058case RecurKind::UMax:1059return CmpInst::ICMP_UGT;1060case RecurKind::SMin:1061return CmpInst::ICMP_SLT;1062case RecurKind::SMax:1063return CmpInst::ICMP_SGT;1064case RecurKind::FMin:1065return CmpInst::FCMP_OLT;1066case RecurKind::FMax:1067return CmpInst::FCMP_OGT;1068// We do not add FMinimum/FMaximum recurrence kind here since there is no1069// equivalent predicate which compares signed zeroes according to the1070// semantics of the intrinsics (llvm.minimum/maximum).1071}1072}10731074Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,1075Value *Right) {1076Type *Ty = Left->getType();1077if (Ty->isIntOrIntVectorTy() ||1078(RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {1079// TODO: Add float minnum/maxnum support when FMF nnan is set.1080Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);1081return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,1082"rdx.minmax");1083}1084CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);1085Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");1086Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");1087return Select;1088}10891090// Helper to generate an ordered reduction.1091Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,1092unsigned Op, RecurKind RdxKind) {1093unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();10941095// Extract and apply reduction ops in ascending order:1096// e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]1097Value *Result = Acc;1098for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {1099Value *Ext =1100Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));11011102if (Op != Instruction::ICmp && Op != Instruction::FCmp) {1103Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,1104"bin.rdx");1105} else {1106assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&1107"Invalid min/max");1108Result = createMinMaxOp(Builder, RdxKind, Result, Ext);1109}1110}11111112return Result;1113}11141115// Helper to generate a log2 shuffle reduction.1116Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,1117unsigned Op,1118TargetTransformInfo::ReductionShuffle RS,1119RecurKind RdxKind) {1120unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();1121// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles1122// and vector ops, reducing the set of values being computed by half each1123// round.1124assert(isPowerOf2_32(VF) &&1125"Reduction emission only supported for pow2 vectors!");1126// Note: fast-math-flags flags are controlled by the builder configuration1127// and are assumed to apply to all generated arithmetic instructions. Other1128// poison generating flags (nsw/nuw/inbounds/inrange/exact) are not part1129// of the builder configuration, and since they're not passed explicitly,1130// will never be relevant here. Note that it would be generally unsound to1131// propagate these from an intrinsic call to the expansion anyways as we/1132// change the order of operations.1133auto BuildShuffledOp = [&Builder, &Op,1134&RdxKind](SmallVectorImpl<int> &ShuffleMask,1135Value *&TmpVec) -> void {1136Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");1137if (Op != Instruction::ICmp && Op != Instruction::FCmp) {1138TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,1139"bin.rdx");1140} else {1141assert(RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind) &&1142"Invalid min/max");1143TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);1144}1145};11461147Value *TmpVec = Src;1148if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) {1149SmallVector<int, 32> ShuffleMask(VF);1150for (unsigned stride = 1; stride < VF; stride <<= 1) {1151// Initialise the mask with undef.1152std::fill(ShuffleMask.begin(), ShuffleMask.end(), -1);1153for (unsigned j = 0; j < VF; j += stride << 1) {1154ShuffleMask[j] = j + stride;1155}1156BuildShuffledOp(ShuffleMask, TmpVec);1157}1158} else {1159SmallVector<int, 32> ShuffleMask(VF);1160for (unsigned i = VF; i != 1; i >>= 1) {1161// Move the upper half of the vector to the lower half.1162for (unsigned j = 0; j != i / 2; ++j)1163ShuffleMask[j] = i / 2 + j;11641165// Fill the rest of the mask with undef.1166std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);1167BuildShuffledOp(ShuffleMask, TmpVec);1168}1169}1170// The result is in the first element of the vector.1171return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));1172}11731174Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,1175const RecurrenceDescriptor &Desc,1176PHINode *OrigPhi) {1177assert(1178RecurrenceDescriptor::isAnyOfRecurrenceKind(Desc.getRecurrenceKind()) &&1179"Unexpected reduction kind");1180Value *InitVal = Desc.getRecurrenceStartValue();1181Value *NewVal = nullptr;11821183// First use the original phi to determine the new value we're trying to1184// select from in the loop.1185SelectInst *SI = nullptr;1186for (auto *U : OrigPhi->users()) {1187if ((SI = dyn_cast<SelectInst>(U)))1188break;1189}1190assert(SI && "One user of the original phi should be a select");11911192if (SI->getTrueValue() == OrigPhi)1193NewVal = SI->getFalseValue();1194else {1195assert(SI->getFalseValue() == OrigPhi &&1196"At least one input to the select should be the original Phi");1197NewVal = SI->getTrueValue();1198}11991200// If any predicate is true it means that we want to select the new value.1201Value *AnyOf =1202Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;1203// The compares in the loop may yield poison, which propagates through the1204// bitwise ORs. Freeze it here before the condition is used.1205AnyOf = Builder.CreateFreeze(AnyOf);1206return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");1207}12081209Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,1210RecurKind RdxKind) {1211auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();1212switch (RdxKind) {1213case RecurKind::Add:1214return Builder.CreateAddReduce(Src);1215case RecurKind::Mul:1216return Builder.CreateMulReduce(Src);1217case RecurKind::And:1218return Builder.CreateAndReduce(Src);1219case RecurKind::Or:1220return Builder.CreateOrReduce(Src);1221case RecurKind::Xor:1222return Builder.CreateXorReduce(Src);1223case RecurKind::FMulAdd:1224case RecurKind::FAdd:1225return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),1226Src);1227case RecurKind::FMul:1228return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);1229case RecurKind::SMax:1230return Builder.CreateIntMaxReduce(Src, true);1231case RecurKind::SMin:1232return Builder.CreateIntMinReduce(Src, true);1233case RecurKind::UMax:1234return Builder.CreateIntMaxReduce(Src, false);1235case RecurKind::UMin:1236return Builder.CreateIntMinReduce(Src, false);1237case RecurKind::FMax:1238return Builder.CreateFPMaxReduce(Src);1239case RecurKind::FMin:1240return Builder.CreateFPMinReduce(Src);1241case RecurKind::FMinimum:1242return Builder.CreateFPMinimumReduce(Src);1243case RecurKind::FMaximum:1244return Builder.CreateFPMaximumReduce(Src);1245default:1246llvm_unreachable("Unhandled opcode");1247}1248}12491250Value *llvm::createSimpleTargetReduction(VectorBuilder &VBuilder, Value *Src,1251const RecurrenceDescriptor &Desc) {1252RecurKind Kind = Desc.getRecurrenceKind();1253assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&1254"AnyOf reduction is not supported.");1255Intrinsic::ID Id = getReductionIntrinsicID(Kind);1256auto *SrcTy = cast<VectorType>(Src->getType());1257Type *SrcEltTy = SrcTy->getElementType();1258Value *Iden =1259Desc.getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());1260Value *Ops[] = {Iden, Src};1261return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops);1262}12631264Value *llvm::createTargetReduction(IRBuilderBase &B,1265const RecurrenceDescriptor &Desc, Value *Src,1266PHINode *OrigPhi) {1267// TODO: Support in-order reductions based on the recurrence descriptor.1268// All ops in the reduction inherit fast-math-flags from the recurrence1269// descriptor.1270IRBuilderBase::FastMathFlagGuard FMFGuard(B);1271B.setFastMathFlags(Desc.getFastMathFlags());12721273RecurKind RK = Desc.getRecurrenceKind();1274if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))1275return createAnyOfTargetReduction(B, Src, Desc, OrigPhi);12761277return createSimpleTargetReduction(B, Src, RK);1278}12791280Value *llvm::createOrderedReduction(IRBuilderBase &B,1281const RecurrenceDescriptor &Desc,1282Value *Src, Value *Start) {1283assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||1284Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&1285"Unexpected reduction kind");1286assert(Src->getType()->isVectorTy() && "Expected a vector type");1287assert(!Start->getType()->isVectorTy() && "Expected a scalar type");12881289return B.CreateFAddReduce(Start, Src);1290}12911292Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,1293const RecurrenceDescriptor &Desc,1294Value *Src, Value *Start) {1295assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||1296Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&1297"Unexpected reduction kind");1298assert(Src->getType()->isVectorTy() && "Expected a vector type");1299assert(!Start->getType()->isVectorTy() && "Expected a scalar type");13001301Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);1302auto *SrcTy = cast<VectorType>(Src->getType());1303Value *Ops[] = {Start, Src};1304return VBuilder.createSimpleTargetReduction(Id, SrcTy, Ops);1305}13061307void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,1308bool IncludeWrapFlags) {1309auto *VecOp = dyn_cast<Instruction>(I);1310if (!VecOp)1311return;1312auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])1313: dyn_cast<Instruction>(OpValue);1314if (!Intersection)1315return;1316const unsigned Opcode = Intersection->getOpcode();1317VecOp->copyIRFlags(Intersection, IncludeWrapFlags);1318for (auto *V : VL) {1319auto *Instr = dyn_cast<Instruction>(V);1320if (!Instr)1321continue;1322if (OpValue == nullptr || Opcode == Instr->getOpcode())1323VecOp->andIRFlags(V);1324}1325}13261327bool llvm::isKnownNegativeInLoop(const SCEV *S, const Loop *L,1328ScalarEvolution &SE) {1329const SCEV *Zero = SE.getZero(S->getType());1330return SE.isAvailableAtLoopEntry(S, L) &&1331SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, S, Zero);1332}13331334bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,1335ScalarEvolution &SE) {1336const SCEV *Zero = SE.getZero(S->getType());1337return SE.isAvailableAtLoopEntry(S, L) &&1338SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);1339}13401341bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,1342ScalarEvolution &SE) {1343const SCEV *Zero = SE.getZero(S->getType());1344return SE.isAvailableAtLoopEntry(S, L) &&1345SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, S, Zero);1346}13471348bool llvm::isKnownNonPositiveInLoop(const SCEV *S, const Loop *L,1349ScalarEvolution &SE) {1350const SCEV *Zero = SE.getZero(S->getType());1351return SE.isAvailableAtLoopEntry(S, L) &&1352SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLE, S, Zero);1353}13541355bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,1356bool Signed) {1357unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();1358APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) :1359APInt::getMinValue(BitWidth);1360auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;1361return SE.isAvailableAtLoopEntry(S, L) &&1362SE.isLoopEntryGuardedByCond(L, Predicate, S,1363SE.getConstant(Min));1364}13651366bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,1367bool Signed) {1368unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();1369APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) :1370APInt::getMaxValue(BitWidth);1371auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;1372return SE.isAvailableAtLoopEntry(S, L) &&1373SE.isLoopEntryGuardedByCond(L, Predicate, S,1374SE.getConstant(Max));1375}13761377//===----------------------------------------------------------------------===//1378// rewriteLoopExitValues - Optimize IV users outside the loop.1379// As a side effect, reduces the amount of IV processing within the loop.1380//===----------------------------------------------------------------------===//13811382static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {1383SmallPtrSet<const Instruction *, 8> Visited;1384SmallVector<const Instruction *, 8> WorkList;1385Visited.insert(I);1386WorkList.push_back(I);1387while (!WorkList.empty()) {1388const Instruction *Curr = WorkList.pop_back_val();1389// This use is outside the loop, nothing to do.1390if (!L->contains(Curr))1391continue;1392// Do we assume it is a "hard" use which will not be eliminated easily?1393if (Curr->mayHaveSideEffects())1394return true;1395// Otherwise, add all its users to worklist.1396for (const auto *U : Curr->users()) {1397auto *UI = cast<Instruction>(U);1398if (Visited.insert(UI).second)1399WorkList.push_back(UI);1400}1401}1402return false;1403}14041405// Collect information about PHI nodes which can be transformed in1406// rewriteLoopExitValues.1407struct RewritePhi {1408PHINode *PN; // For which PHI node is this replacement?1409unsigned Ith; // For which incoming value?1410const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.1411Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?1412bool HighCost; // Is this expansion a high-cost?14131414RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,1415bool H)1416: PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),1417HighCost(H) {}1418};14191420// Check whether it is possible to delete the loop after rewriting exit1421// value. If it is possible, ignore ReplaceExitValue and do rewriting1422// aggressively.1423static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {1424BasicBlock *Preheader = L->getLoopPreheader();1425// If there is no preheader, the loop will not be deleted.1426if (!Preheader)1427return false;14281429// In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.1430// We obviate multiple ExitingBlocks case for simplicity.1431// TODO: If we see testcase with multiple ExitingBlocks can be deleted1432// after exit value rewriting, we can enhance the logic here.1433SmallVector<BasicBlock *, 4> ExitingBlocks;1434L->getExitingBlocks(ExitingBlocks);1435SmallVector<BasicBlock *, 8> ExitBlocks;1436L->getUniqueExitBlocks(ExitBlocks);1437if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)1438return false;14391440BasicBlock *ExitBlock = ExitBlocks[0];1441BasicBlock::iterator BI = ExitBlock->begin();1442while (PHINode *P = dyn_cast<PHINode>(BI)) {1443Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);14441445// If the Incoming value of P is found in RewritePhiSet, we know it1446// could be rewritten to use a loop invariant value in transformation1447// phase later. Skip it in the loop invariant check below.1448bool found = false;1449for (const RewritePhi &Phi : RewritePhiSet) {1450unsigned i = Phi.Ith;1451if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {1452found = true;1453break;1454}1455}14561457Instruction *I;1458if (!found && (I = dyn_cast<Instruction>(Incoming)))1459if (!L->hasLoopInvariantOperands(I))1460return false;14611462++BI;1463}14641465for (auto *BB : L->blocks())1466if (llvm::any_of(*BB, [](Instruction &I) {1467return I.mayHaveSideEffects();1468}))1469return false;14701471return true;1472}14731474/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,1475/// and returns true if this Phi is an induction phi in the loop. When1476/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.1477static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,1478InductionDescriptor &ID) {1479if (!Phi)1480return false;1481if (!L->getLoopPreheader())1482return false;1483if (Phi->getParent() != L->getHeader())1484return false;1485return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);1486}14871488int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,1489ScalarEvolution *SE,1490const TargetTransformInfo *TTI,1491SCEVExpander &Rewriter, DominatorTree *DT,1492ReplaceExitVal ReplaceExitValue,1493SmallVector<WeakTrackingVH, 16> &DeadInsts) {1494// Check a pre-condition.1495assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&1496"Indvars did not preserve LCSSA!");14971498SmallVector<BasicBlock*, 8> ExitBlocks;1499L->getUniqueExitBlocks(ExitBlocks);15001501SmallVector<RewritePhi, 8> RewritePhiSet;1502// Find all values that are computed inside the loop, but used outside of it.1503// Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan1504// the exit blocks of the loop to find them.1505for (BasicBlock *ExitBB : ExitBlocks) {1506// If there are no PHI nodes in this exit block, then no values defined1507// inside the loop are used on this path, skip it.1508PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());1509if (!PN) continue;15101511unsigned NumPreds = PN->getNumIncomingValues();15121513// Iterate over all of the PHI nodes.1514BasicBlock::iterator BBI = ExitBB->begin();1515while ((PN = dyn_cast<PHINode>(BBI++))) {1516if (PN->use_empty())1517continue; // dead use, don't replace it15181519if (!SE->isSCEVable(PN->getType()))1520continue;15211522// Iterate over all of the values in all the PHI nodes.1523for (unsigned i = 0; i != NumPreds; ++i) {1524// If the value being merged in is not integer or is not defined1525// in the loop, skip it.1526Value *InVal = PN->getIncomingValue(i);1527if (!isa<Instruction>(InVal))1528continue;15291530// If this pred is for a subloop, not L itself, skip it.1531if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)1532continue; // The Block is in a subloop, skip it.15331534// Check that InVal is defined in the loop.1535Instruction *Inst = cast<Instruction>(InVal);1536if (!L->contains(Inst))1537continue;15381539// Find exit values which are induction variables in the loop, and are1540// unused in the loop, with the only use being the exit block PhiNode,1541// and the induction variable update binary operator.1542// The exit value can be replaced with the final value when it is cheap1543// to do so.1544if (ReplaceExitValue == UnusedIndVarInLoop) {1545InductionDescriptor ID;1546PHINode *IndPhi = dyn_cast<PHINode>(Inst);1547if (IndPhi) {1548if (!checkIsIndPhi(IndPhi, L, SE, ID))1549continue;1550// This is an induction PHI. Check that the only users are PHI1551// nodes, and induction variable update binary operators.1552if (llvm::any_of(Inst->users(), [&](User *U) {1553if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))1554return true;1555BinaryOperator *B = dyn_cast<BinaryOperator>(U);1556if (B && B != ID.getInductionBinOp())1557return true;1558return false;1559}))1560continue;1561} else {1562// If it is not an induction phi, it must be an induction update1563// binary operator with an induction phi user.1564BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);1565if (!B)1566continue;1567if (llvm::any_of(Inst->users(), [&](User *U) {1568PHINode *Phi = dyn_cast<PHINode>(U);1569if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))1570return true;1571return false;1572}))1573continue;1574if (B != ID.getInductionBinOp())1575continue;1576}1577}15781579// Okay, this instruction has a user outside of the current loop1580// and varies predictably *inside* the loop. Evaluate the value it1581// contains when the loop exits, if possible. We prefer to start with1582// expressions which are true for all exits (so as to maximize1583// expression reuse by the SCEVExpander), but resort to per-exit1584// evaluation if that fails.1585const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());1586if (isa<SCEVCouldNotCompute>(ExitValue) ||1587!SE->isLoopInvariant(ExitValue, L) ||1588!Rewriter.isSafeToExpand(ExitValue)) {1589// TODO: This should probably be sunk into SCEV in some way; maybe a1590// getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for1591// most SCEV expressions and other recurrence types (e.g. shift1592// recurrences). Is there existing code we can reuse?1593const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));1594if (isa<SCEVCouldNotCompute>(ExitCount))1595continue;1596if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))1597if (AddRec->getLoop() == L)1598ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);1599if (isa<SCEVCouldNotCompute>(ExitValue) ||1600!SE->isLoopInvariant(ExitValue, L) ||1601!Rewriter.isSafeToExpand(ExitValue))1602continue;1603}16041605// Computing the value outside of the loop brings no benefit if it is1606// definitely used inside the loop in a way which can not be optimized1607// away. Avoid doing so unless we know we have a value which computes1608// the ExitValue already. TODO: This should be merged into SCEV1609// expander to leverage its knowledge of existing expressions.1610if (ReplaceExitValue != AlwaysRepl && !isa<SCEVConstant>(ExitValue) &&1611!isa<SCEVUnknown>(ExitValue) && hasHardUserWithinLoop(L, Inst))1612continue;16131614// Check if expansions of this SCEV would count as being high cost.1615bool HighCost = Rewriter.isHighCostExpansion(1616ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst);16171618// Note that we must not perform expansions until after1619// we query *all* the costs, because if we perform temporary expansion1620// inbetween, one that we might not intend to keep, said expansion1621// *may* affect cost calculation of the next SCEV's we'll query,1622// and next SCEV may errneously get smaller cost.16231624// Collect all the candidate PHINodes to be rewritten.1625Instruction *InsertPt =1626(isa<PHINode>(Inst) || isa<LandingPadInst>(Inst)) ?1627&*Inst->getParent()->getFirstInsertionPt() : Inst;1628RewritePhiSet.emplace_back(PN, i, ExitValue, InsertPt, HighCost);1629}1630}1631}16321633// TODO: evaluate whether it is beneficial to change how we calculate1634// high-cost: if we have SCEV 'A' which we know we will expand, should we1635// calculate the cost of other SCEV's after expanding SCEV 'A', thus1636// potentially giving cost bonus to those other SCEV's?16371638bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);1639int NumReplaced = 0;16401641// Transformation.1642for (const RewritePhi &Phi : RewritePhiSet) {1643PHINode *PN = Phi.PN;16441645// Only do the rewrite when the ExitValue can be expanded cheaply.1646// If LoopCanBeDel is true, rewrite exit value aggressively.1647if ((ReplaceExitValue == OnlyCheapRepl ||1648ReplaceExitValue == UnusedIndVarInLoop) &&1649!LoopCanBeDel && Phi.HighCost)1650continue;16511652Value *ExitVal = Rewriter.expandCodeFor(1653Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);16541655LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal1656<< '\n'1657<< " LoopVal = " << *(Phi.ExpansionPoint) << "\n");16581659#ifndef NDEBUG1660// If we reuse an instruction from a loop which is neither L nor one of1661// its containing loops, we end up breaking LCSSA form for this loop by1662// creating a new use of its instruction.1663if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))1664if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))1665if (EVL != L)1666assert(EVL->contains(L) && "LCSSA breach detected!");1667#endif16681669NumReplaced++;1670Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));1671PN->setIncomingValue(Phi.Ith, ExitVal);1672// It's necessary to tell ScalarEvolution about this explicitly so that1673// it can walk the def-use list and forget all SCEVs, as it may not be1674// watching the PHI itself. Once the new exit value is in place, there1675// may not be a def-use connection between the loop and every instruction1676// which got a SCEVAddRecExpr for that loop.1677SE->forgetValue(PN);16781679// If this instruction is dead now, delete it. Don't do it now to avoid1680// invalidating iterators.1681if (isInstructionTriviallyDead(Inst, TLI))1682DeadInsts.push_back(Inst);16831684// Replace PN with ExitVal if that is legal and does not break LCSSA.1685if (PN->getNumIncomingValues() == 1 &&1686LI->replacementPreservesLCSSAForm(PN, ExitVal)) {1687PN->replaceAllUsesWith(ExitVal);1688PN->eraseFromParent();1689}1690}16911692// The insertion point instruction may have been deleted; clear it out1693// so that the rewriter doesn't trip over it later.1694Rewriter.clearInsertPoint();1695return NumReplaced;1696}16971698/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for1699/// \p OrigLoop.1700void llvm::setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,1701Loop *RemainderLoop, uint64_t UF) {1702assert(UF > 0 && "Zero unrolled factor is not supported");1703assert(UnrolledLoop != RemainderLoop &&1704"Unrolled and Remainder loops are expected to distinct");17051706// Get number of iterations in the original scalar loop.1707unsigned OrigLoopInvocationWeight = 0;1708std::optional<unsigned> OrigAverageTripCount =1709getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);1710if (!OrigAverageTripCount)1711return;17121713// Calculate number of iterations in unrolled loop.1714unsigned UnrolledAverageTripCount = *OrigAverageTripCount / UF;1715// Calculate number of iterations for remainder loop.1716unsigned RemainderAverageTripCount = *OrigAverageTripCount % UF;17171718setLoopEstimatedTripCount(UnrolledLoop, UnrolledAverageTripCount,1719OrigLoopInvocationWeight);1720setLoopEstimatedTripCount(RemainderLoop, RemainderAverageTripCount,1721OrigLoopInvocationWeight);1722}17231724/// Utility that implements appending of loops onto a worklist.1725/// Loops are added in preorder (analogous for reverse postorder for trees),1726/// and the worklist is processed LIFO.1727template <typename RangeT>1728void llvm::appendReversedLoopsToWorklist(1729RangeT &&Loops, SmallPriorityWorklist<Loop *, 4> &Worklist) {1730// We use an internal worklist to build up the preorder traversal without1731// recursion.1732SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;17331734// We walk the initial sequence of loops in reverse because we generally want1735// to visit defs before uses and the worklist is LIFO.1736for (Loop *RootL : Loops) {1737assert(PreOrderLoops.empty() && "Must start with an empty preorder walk.");1738assert(PreOrderWorklist.empty() &&1739"Must start with an empty preorder walk worklist.");1740PreOrderWorklist.push_back(RootL);1741do {1742Loop *L = PreOrderWorklist.pop_back_val();1743PreOrderWorklist.append(L->begin(), L->end());1744PreOrderLoops.push_back(L);1745} while (!PreOrderWorklist.empty());17461747Worklist.insert(std::move(PreOrderLoops));1748PreOrderLoops.clear();1749}1750}17511752template <typename RangeT>1753void llvm::appendLoopsToWorklist(RangeT &&Loops,1754SmallPriorityWorklist<Loop *, 4> &Worklist) {1755appendReversedLoopsToWorklist(reverse(Loops), Worklist);1756}17571758template void llvm::appendLoopsToWorklist<ArrayRef<Loop *> &>(1759ArrayRef<Loop *> &Loops, SmallPriorityWorklist<Loop *, 4> &Worklist);17601761template void1762llvm::appendLoopsToWorklist<Loop &>(Loop &L,1763SmallPriorityWorklist<Loop *, 4> &Worklist);17641765void llvm::appendLoopsToWorklist(LoopInfo &LI,1766SmallPriorityWorklist<Loop *, 4> &Worklist) {1767appendReversedLoopsToWorklist(LI, Worklist);1768}17691770Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,1771LoopInfo *LI, LPPassManager *LPM) {1772Loop &New = *LI->AllocateLoop();1773if (PL)1774PL->addChildLoop(&New);1775else1776LI->addTopLevelLoop(&New);17771778if (LPM)1779LPM->addLoop(New);17801781// Add all of the blocks in L to the new loop.1782for (BasicBlock *BB : L->blocks())1783if (LI->getLoopFor(BB) == L)1784New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), *LI);17851786// Add all of the subloops to the new loop.1787for (Loop *I : *L)1788cloneLoop(I, &New, VM, LI, LPM);17891790return &New;1791}17921793/// IR Values for the lower and upper bounds of a pointer evolution. We1794/// need to use value-handles because SCEV expansion can invalidate previously1795/// expanded values. Thus expansion of a pointer can invalidate the bounds for1796/// a previous one.1797struct PointerBounds {1798TrackingVH<Value> Start;1799TrackingVH<Value> End;1800Value *StrideToCheck;1801};18021803/// Expand code for the lower and upper bound of the pointer group \p CG1804/// in \p TheLoop. \return the values for the bounds.1805static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,1806Loop *TheLoop, Instruction *Loc,1807SCEVExpander &Exp, bool HoistRuntimeChecks) {1808LLVMContext &Ctx = Loc->getContext();1809Type *PtrArithTy = PointerType::get(Ctx, CG->AddressSpace);18101811Value *Start = nullptr, *End = nullptr;1812LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");1813const SCEV *Low = CG->Low, *High = CG->High, *Stride = nullptr;18141815// If the Low and High values are themselves loop-variant, then we may want1816// to expand the range to include those covered by the outer loop as well.1817// There is a trade-off here with the advantage being that creating checks1818// using the expanded range permits the runtime memory checks to be hoisted1819// out of the outer loop. This reduces the cost of entering the inner loop,1820// which can be significant for low trip counts. The disadvantage is that1821// there is a chance we may now never enter the vectorized inner loop,1822// whereas using a restricted range check could have allowed us to enter at1823// least once. This is why the behaviour is not currently the default and is1824// controlled by the parameter 'HoistRuntimeChecks'.1825if (HoistRuntimeChecks && TheLoop->getParentLoop() &&1826isa<SCEVAddRecExpr>(High) && isa<SCEVAddRecExpr>(Low)) {1827auto *HighAR = cast<SCEVAddRecExpr>(High);1828auto *LowAR = cast<SCEVAddRecExpr>(Low);1829const Loop *OuterLoop = TheLoop->getParentLoop();1830ScalarEvolution &SE = *Exp.getSE();1831const SCEV *Recur = LowAR->getStepRecurrence(SE);1832if (Recur == HighAR->getStepRecurrence(SE) &&1833HighAR->getLoop() == OuterLoop && LowAR->getLoop() == OuterLoop) {1834BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();1835const SCEV *OuterExitCount = SE.getExitCount(OuterLoop, OuterLoopLatch);1836if (!isa<SCEVCouldNotCompute>(OuterExitCount) &&1837OuterExitCount->getType()->isIntegerTy()) {1838const SCEV *NewHigh =1839cast<SCEVAddRecExpr>(High)->evaluateAtIteration(OuterExitCount, SE);1840if (!isa<SCEVCouldNotCompute>(NewHigh)) {1841LLVM_DEBUG(dbgs() << "LAA: Expanded RT check for range to include "1842"outer loop in order to permit hoisting\n");1843High = NewHigh;1844Low = cast<SCEVAddRecExpr>(Low)->getStart();1845// If there is a possibility that the stride is negative then we have1846// to generate extra checks to ensure the stride is positive.1847if (!SE.isKnownNonNegative(1848SE.applyLoopGuards(Recur, HighAR->getLoop()))) {1849Stride = Recur;1850LLVM_DEBUG(dbgs() << "LAA: ... but need to check stride is "1851"positive: "1852<< *Stride << '\n');1853}1854}1855}1856}1857}18581859Start = Exp.expandCodeFor(Low, PtrArithTy, Loc);1860End = Exp.expandCodeFor(High, PtrArithTy, Loc);1861if (CG->NeedsFreeze) {1862IRBuilder<> Builder(Loc);1863Start = Builder.CreateFreeze(Start, Start->getName() + ".fr");1864End = Builder.CreateFreeze(End, End->getName() + ".fr");1865}1866Value *StrideVal =1867Stride ? Exp.expandCodeFor(Stride, Stride->getType(), Loc) : nullptr;1868LLVM_DEBUG(dbgs() << "Start: " << *Low << " End: " << *High << "\n");1869return {Start, End, StrideVal};1870}18711872/// Turns a collection of checks into a collection of expanded upper and1873/// lower bounds for both pointers in the check.1874static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>1875expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,1876Instruction *Loc, SCEVExpander &Exp, bool HoistRuntimeChecks) {1877SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;18781879// Here we're relying on the SCEV Expander's cache to only emit code for the1880// same bounds once.1881transform(PointerChecks, std::back_inserter(ChecksWithBounds),1882[&](const RuntimePointerCheck &Check) {1883PointerBounds First = expandBounds(Check.first, L, Loc, Exp,1884HoistRuntimeChecks),1885Second = expandBounds(Check.second, L, Loc, Exp,1886HoistRuntimeChecks);1887return std::make_pair(First, Second);1888});18891890return ChecksWithBounds;1891}18921893Value *llvm::addRuntimeChecks(1894Instruction *Loc, Loop *TheLoop,1895const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,1896SCEVExpander &Exp, bool HoistRuntimeChecks) {1897// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.1898// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible1899auto ExpandedChecks =1900expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);19011902LLVMContext &Ctx = Loc->getContext();1903IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,1904Loc->getDataLayout());1905ChkBuilder.SetInsertPoint(Loc);1906// Our instructions might fold to a constant.1907Value *MemoryRuntimeCheck = nullptr;19081909for (const auto &[A, B] : ExpandedChecks) {1910// Check if two pointers (A and B) conflict where conflict is computed as:1911// start(A) <= end(B) && start(B) <= end(A)19121913assert((A.Start->getType()->getPointerAddressSpace() ==1914B.End->getType()->getPointerAddressSpace()) &&1915(B.Start->getType()->getPointerAddressSpace() ==1916A.End->getType()->getPointerAddressSpace()) &&1917"Trying to bounds check pointers with different address spaces");19181919// [A|B].Start points to the first accessed byte under base [A|B].1920// [A|B].End points to the last accessed byte, plus one.1921// There is no conflict when the intervals are disjoint:1922// NoConflict = (B.Start >= A.End) || (A.Start >= B.End)1923//1924// bound0 = (B.Start < A.End)1925// bound1 = (A.Start < B.End)1926// IsConflict = bound0 & bound11927Value *Cmp0 = ChkBuilder.CreateICmpULT(A.Start, B.End, "bound0");1928Value *Cmp1 = ChkBuilder.CreateICmpULT(B.Start, A.End, "bound1");1929Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");1930if (A.StrideToCheck) {1931Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(1932A.StrideToCheck, ConstantInt::get(A.StrideToCheck->getType(), 0),1933"stride.check");1934IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);1935}1936if (B.StrideToCheck) {1937Value *IsNegativeStride = ChkBuilder.CreateICmpSLT(1938B.StrideToCheck, ConstantInt::get(B.StrideToCheck->getType(), 0),1939"stride.check");1940IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);1941}1942if (MemoryRuntimeCheck) {1943IsConflict =1944ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");1945}1946MemoryRuntimeCheck = IsConflict;1947}19481949return MemoryRuntimeCheck;1950}19511952Value *llvm::addDiffRuntimeChecks(1953Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,1954function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {19551956LLVMContext &Ctx = Loc->getContext();1957IRBuilder<InstSimplifyFolder> ChkBuilder(Ctx,1958Loc->getDataLayout());1959ChkBuilder.SetInsertPoint(Loc);1960// Our instructions might fold to a constant.1961Value *MemoryRuntimeCheck = nullptr;19621963auto &SE = *Expander.getSE();1964// Map to keep track of created compares, The key is the pair of operands for1965// the compare, to allow detecting and re-using redundant compares.1966DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;1967for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {1968Type *Ty = SinkStart->getType();1969// Compute VF * IC * AccessSize.1970auto *VFTimesUFTimesSize =1971ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),1972ConstantInt::get(Ty, IC * AccessSize));1973Value *Diff =1974Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);19751976// Check if the same compare has already been created earlier. In that case,1977// there is no need to check it again.1978Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize});1979if (IsConflict)1980continue;19811982IsConflict =1983ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");1984SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict});1985if (NeedsFreeze)1986IsConflict =1987ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");1988if (MemoryRuntimeCheck) {1989IsConflict =1990ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");1991}1992MemoryRuntimeCheck = IsConflict;1993}19941995return MemoryRuntimeCheck;1996}19971998std::optional<IVConditionInfo>1999llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,2000const MemorySSA &MSSA, AAResults &AA) {2001auto *TI = dyn_cast<BranchInst>(L.getHeader()->getTerminator());2002if (!TI || !TI->isConditional())2003return {};20042005auto *CondI = dyn_cast<Instruction>(TI->getCondition());2006// The case with the condition outside the loop should already be handled2007// earlier.2008// Allow CmpInst and TruncInsts as they may be users of load instructions2009// and have potential for partial unswitching2010if (!CondI || !isa<CmpInst, TruncInst>(CondI) || !L.contains(CondI))2011return {};20122013SmallVector<Instruction *> InstToDuplicate;2014InstToDuplicate.push_back(CondI);20152016SmallVector<Value *, 4> WorkList;2017WorkList.append(CondI->op_begin(), CondI->op_end());20182019SmallVector<MemoryAccess *, 4> AccessesToCheck;2020SmallVector<MemoryLocation, 4> AccessedLocs;2021while (!WorkList.empty()) {2022Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());2023if (!I || !L.contains(I))2024continue;20252026// TODO: support additional instructions.2027if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))2028return {};20292030// Do not duplicate volatile and atomic loads.2031if (auto *LI = dyn_cast<LoadInst>(I))2032if (LI->isVolatile() || LI->isAtomic())2033return {};20342035InstToDuplicate.push_back(I);2036if (MemoryAccess *MA = MSSA.getMemoryAccess(I)) {2037if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MA)) {2038// Queue the defining access to check for alias checks.2039AccessesToCheck.push_back(MemUse->getDefiningAccess());2040AccessedLocs.push_back(MemoryLocation::get(I));2041} else {2042// MemoryDefs may clobber the location or may be atomic memory2043// operations. Bail out.2044return {};2045}2046}2047WorkList.append(I->op_begin(), I->op_end());2048}20492050if (InstToDuplicate.empty())2051return {};20522053SmallVector<BasicBlock *, 4> ExitingBlocks;2054L.getExitingBlocks(ExitingBlocks);2055auto HasNoClobbersOnPath =2056[&L, &AA, &AccessedLocs, &ExitingBlocks, &InstToDuplicate,2057MSSAThreshold](BasicBlock *Succ, BasicBlock *Header,2058SmallVector<MemoryAccess *, 4> AccessesToCheck)2059-> std::optional<IVConditionInfo> {2060IVConditionInfo Info;2061// First, collect all blocks in the loop that are on a patch from Succ2062// to the header.2063SmallVector<BasicBlock *, 4> WorkList;2064WorkList.push_back(Succ);2065WorkList.push_back(Header);2066SmallPtrSet<BasicBlock *, 4> Seen;2067Seen.insert(Header);2068Info.PathIsNoop &=2069all_of(*Header, [](Instruction &I) { return !I.mayHaveSideEffects(); });20702071while (!WorkList.empty()) {2072BasicBlock *Current = WorkList.pop_back_val();2073if (!L.contains(Current))2074continue;2075const auto &SeenIns = Seen.insert(Current);2076if (!SeenIns.second)2077continue;20782079Info.PathIsNoop &= all_of(2080*Current, [](Instruction &I) { return !I.mayHaveSideEffects(); });2081WorkList.append(succ_begin(Current), succ_end(Current));2082}20832084// Require at least 2 blocks on a path through the loop. This skips2085// paths that directly exit the loop.2086if (Seen.size() < 2)2087return {};20882089// Next, check if there are any MemoryDefs that are on the path through2090// the loop (in the Seen set) and they may-alias any of the locations in2091// AccessedLocs. If that is the case, they may modify the condition and2092// partial unswitching is not possible.2093SmallPtrSet<MemoryAccess *, 4> SeenAccesses;2094while (!AccessesToCheck.empty()) {2095MemoryAccess *Current = AccessesToCheck.pop_back_val();2096auto SeenI = SeenAccesses.insert(Current);2097if (!SeenI.second || !Seen.contains(Current->getBlock()))2098continue;20992100// Bail out if exceeded the threshold.2101if (SeenAccesses.size() >= MSSAThreshold)2102return {};21032104// MemoryUse are read-only accesses.2105if (isa<MemoryUse>(Current))2106continue;21072108// For a MemoryDef, check if is aliases any of the location feeding2109// the original condition.2110if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {2111if (any_of(AccessedLocs, [&AA, CurrentDef](MemoryLocation &Loc) {2112return isModSet(2113AA.getModRefInfo(CurrentDef->getMemoryInst(), Loc));2114}))2115return {};2116}21172118for (Use &U : Current->uses())2119AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));2120}21212122// We could also allow loops with known trip counts without mustprogress,2123// but ScalarEvolution may not be available.2124Info.PathIsNoop &= isMustProgress(&L);21252126// If the path is considered a no-op so far, check if it reaches a2127// single exit block without any phis. This ensures no values from the2128// loop are used outside of the loop.2129if (Info.PathIsNoop) {2130for (auto *Exiting : ExitingBlocks) {2131if (!Seen.contains(Exiting))2132continue;2133for (auto *Succ : successors(Exiting)) {2134if (L.contains(Succ))2135continue;21362137Info.PathIsNoop &= Succ->phis().empty() &&2138(!Info.ExitForPath || Info.ExitForPath == Succ);2139if (!Info.PathIsNoop)2140break;2141assert((!Info.ExitForPath || Info.ExitForPath == Succ) &&2142"cannot have multiple exit blocks");2143Info.ExitForPath = Succ;2144}2145}2146}2147if (!Info.ExitForPath)2148Info.PathIsNoop = false;21492150Info.InstToDuplicate = InstToDuplicate;2151return Info;2152};21532154// If we branch to the same successor, partial unswitching will not be2155// beneficial.2156if (TI->getSuccessor(0) == TI->getSuccessor(1))2157return {};21582159if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(0), L.getHeader(),2160AccessesToCheck)) {2161Info->KnownValue = ConstantInt::getTrue(TI->getContext());2162return Info;2163}2164if (auto Info = HasNoClobbersOnPath(TI->getSuccessor(1), L.getHeader(),2165AccessesToCheck)) {2166Info->KnownValue = ConstantInt::getFalse(TI->getContext());2167return Info;2168}21692170return {};2171}217221732174