Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
35269 views
//===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass implements an unroll and jam pass. Most of the work is done by9// Utils/UnrollLoopAndJam.cpp.10//===----------------------------------------------------------------------===//1112#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"13#include "llvm/ADT/ArrayRef.h"14#include "llvm/ADT/PriorityWorklist.h"15#include "llvm/ADT/SmallPtrSet.h"16#include "llvm/ADT/StringRef.h"17#include "llvm/Analysis/AssumptionCache.h"18#include "llvm/Analysis/CodeMetrics.h"19#include "llvm/Analysis/DependenceAnalysis.h"20#include "llvm/Analysis/LoopAnalysisManager.h"21#include "llvm/Analysis/LoopInfo.h"22#include "llvm/Analysis/LoopNestAnalysis.h"23#include "llvm/Analysis/LoopPass.h"24#include "llvm/Analysis/OptimizationRemarkEmitter.h"25#include "llvm/Analysis/ScalarEvolution.h"26#include "llvm/Analysis/TargetTransformInfo.h"27#include "llvm/IR/BasicBlock.h"28#include "llvm/IR/Constants.h"29#include "llvm/IR/Dominators.h"30#include "llvm/IR/Function.h"31#include "llvm/IR/Instructions.h"32#include "llvm/IR/Metadata.h"33#include "llvm/IR/PassManager.h"34#include "llvm/Support/Casting.h"35#include "llvm/Support/CommandLine.h"36#include "llvm/Support/Compiler.h"37#include "llvm/Support/Debug.h"38#include "llvm/Support/raw_ostream.h"39#include "llvm/Transforms/Scalar/LoopPassManager.h"40#include "llvm/Transforms/Utils/LoopPeel.h"41#include "llvm/Transforms/Utils/LoopUtils.h"42#include "llvm/Transforms/Utils/UnrollLoop.h"43#include <cassert>44#include <cstdint>4546namespace llvm {47class Instruction;48class Value;49} // namespace llvm5051using namespace llvm;5253#define DEBUG_TYPE "loop-unroll-and-jam"5455/// @{56/// Metadata attribute names57static const char *const LLVMLoopUnrollAndJamFollowupAll =58"llvm.loop.unroll_and_jam.followup_all";59static const char *const LLVMLoopUnrollAndJamFollowupInner =60"llvm.loop.unroll_and_jam.followup_inner";61static const char *const LLVMLoopUnrollAndJamFollowupOuter =62"llvm.loop.unroll_and_jam.followup_outer";63static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner =64"llvm.loop.unroll_and_jam.followup_remainder_inner";65static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter =66"llvm.loop.unroll_and_jam.followup_remainder_outer";67/// @}6869static cl::opt<bool>70AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,71cl::desc("Allows loops to be unroll-and-jammed."));7273static cl::opt<unsigned> UnrollAndJamCount(74"unroll-and-jam-count", cl::Hidden,75cl::desc("Use this unroll count for all loops including those with "76"unroll_and_jam_count pragma values, for testing purposes"));7778static cl::opt<unsigned> UnrollAndJamThreshold(79"unroll-and-jam-threshold", cl::init(60), cl::Hidden,80cl::desc("Threshold to use for inner loop when doing unroll and jam."));8182static cl::opt<unsigned> PragmaUnrollAndJamThreshold(83"pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,84cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "85"unroll_count pragma."));8687// Returns the loop hint metadata node with the given name (for example,88// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is89// returned.90static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {91if (MDNode *LoopID = L->getLoopID())92return GetUnrollMetadata(LoopID, Name);93return nullptr;94}9596// Returns true if the loop has any metadata starting with Prefix. For example a97// Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.98static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {99if (MDNode *LoopID = L->getLoopID()) {100// First operand should refer to the loop id itself.101assert(LoopID->getNumOperands() > 0 && "requires at least one operand");102assert(LoopID->getOperand(0) == LoopID && "invalid loop id");103104for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {105MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));106if (!MD)107continue;108109MDString *S = dyn_cast<MDString>(MD->getOperand(0));110if (!S)111continue;112113if (S->getString().starts_with(Prefix))114return true;115}116}117return false;118}119120// Returns true if the loop has an unroll_and_jam(enable) pragma.121static bool hasUnrollAndJamEnablePragma(const Loop *L) {122return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");123}124125// If loop has an unroll_and_jam_count pragma return the (necessarily126// positive) value from the pragma. Otherwise return 0.127static unsigned unrollAndJamCountPragmaValue(const Loop *L) {128MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");129if (MD) {130assert(MD->getNumOperands() == 2 &&131"Unroll count hint metadata should have two operands.");132unsigned Count =133mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();134assert(Count >= 1 && "Unroll count must be positive.");135return Count;136}137return 0;138}139140// Returns loop size estimation for unrolled loop.141static uint64_t142getUnrollAndJammedLoopSize(unsigned LoopSize,143TargetTransformInfo::UnrollingPreferences &UP) {144assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");145return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;146}147148// Calculates unroll and jam count and writes it to UP.Count. Returns true if149// unroll count was set explicitly.150static bool computeUnrollAndJamCount(151Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,152LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE,153const SmallPtrSetImpl<const Value *> &EphValues,154OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,155unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE,156unsigned InnerTripCount, unsigned InnerLoopSize,157TargetTransformInfo::UnrollingPreferences &UP,158TargetTransformInfo::PeelingPreferences &PP) {159unsigned OuterLoopSize = OuterUCE.getRolledLoopSize();160// First up use computeUnrollCount from the loop unroller to get a count161// for unrolling the outer loop, plus any loops requiring explicit162// unrolling we leave to the unroller. This uses UP.Threshold /163// UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.164// We have already checked that the loop has no unroll.* pragmas.165unsigned MaxTripCount = 0;166bool UseUpperBound = false;167bool ExplicitUnroll = computeUnrollCount(168L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,169/*MaxOrZero*/ false, OuterTripMultiple, OuterUCE, UP, PP,170UseUpperBound);171if (ExplicitUnroll || UseUpperBound) {172// If the user explicitly set the loop as unrolled, dont UnJ it. Leave it173// for the unroller instead.174LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "175"computeUnrollCount\n");176UP.Count = 0;177return false;178}179180// Override with any explicit Count from the "unroll-and-jam-count" option.181bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;182if (UserUnrollCount) {183UP.Count = UnrollAndJamCount;184UP.Force = true;185if (UP.AllowRemainder &&186getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&187getUnrollAndJammedLoopSize(InnerLoopSize, UP) <188UP.UnrollAndJamInnerLoopThreshold)189return true;190}191192// Check for unroll_and_jam pragmas193unsigned PragmaCount = unrollAndJamCountPragmaValue(L);194if (PragmaCount > 0) {195UP.Count = PragmaCount;196UP.Runtime = true;197UP.Force = true;198if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&199getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&200getUnrollAndJammedLoopSize(InnerLoopSize, UP) <201UP.UnrollAndJamInnerLoopThreshold)202return true;203}204205bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L);206bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;207bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;208209// If the loop has an unrolling pragma, we want to be more aggressive with210// unrolling limits.211if (ExplicitUnrollAndJam)212UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold;213214if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=215UP.UnrollAndJamInnerLoopThreshold) {216LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "217"inner loop too large\n");218UP.Count = 0;219return false;220}221222// We have a sensible limit for the outer loop, now adjust it for the inner223// loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set224// explicitly, we want to stick to it.225if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {226while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=227UP.UnrollAndJamInnerLoopThreshold)228UP.Count--;229}230231// If we are explicitly unroll and jamming, we are done. Otherwise there are a232// number of extra performance heuristics to check.233if (ExplicitUnrollAndJam)234return true;235236// If the inner loop count is known and small, leave the entire loop nest to237// be the unroller238if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {239LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "240"being left for the unroller\n");241UP.Count = 0;242return false;243}244245// Check for situations where UnJ is likely to be unprofitable. Including246// subloops with more than 1 block.247if (SubLoop->getBlocks().size() != 1) {248LLVM_DEBUG(249dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");250UP.Count = 0;251return false;252}253254// Limit to loops where there is something to gain from unrolling and255// jamming the loop. In this case, look for loads that are invariant in the256// outer loop and can become shared.257unsigned NumInvariant = 0;258for (BasicBlock *BB : SubLoop->getBlocks()) {259for (Instruction &I : *BB) {260if (auto *Ld = dyn_cast<LoadInst>(&I)) {261Value *V = Ld->getPointerOperand();262const SCEV *LSCEV = SE.getSCEVAtScope(V, L);263if (SE.isLoopInvariant(LSCEV, L))264NumInvariant++;265}266}267}268if (NumInvariant == 0) {269LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");270UP.Count = 0;271return false;272}273274return false;275}276277static LoopUnrollResult278tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,279ScalarEvolution &SE, const TargetTransformInfo &TTI,280AssumptionCache &AC, DependenceInfo &DI,281OptimizationRemarkEmitter &ORE, int OptLevel) {282TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(283L, SE, TTI, nullptr, nullptr, ORE, OptLevel, std::nullopt, std::nullopt,284std::nullopt, std::nullopt, std::nullopt, std::nullopt);285TargetTransformInfo::PeelingPreferences PP =286gatherPeelingPreferences(L, SE, TTI, std::nullopt, std::nullopt);287288TransformationMode EnableMode = hasUnrollAndJamTransformation(L);289if (EnableMode & TM_Disable)290return LoopUnrollResult::Unmodified;291if (EnableMode & TM_ForcedByUser)292UP.UnrollAndJam = true;293294if (AllowUnrollAndJam.getNumOccurrences() > 0)295UP.UnrollAndJam = AllowUnrollAndJam;296if (UnrollAndJamThreshold.getNumOccurrences() > 0)297UP.UnrollAndJamInnerLoopThreshold = UnrollAndJamThreshold;298// Exit early if unrolling is disabled.299if (!UP.UnrollAndJam || UP.UnrollAndJamInnerLoopThreshold == 0)300return LoopUnrollResult::Unmodified;301302LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["303<< L->getHeader()->getParent()->getName() << "] Loop %"304<< L->getHeader()->getName() << "\n");305306// A loop with any unroll pragma (enabling/disabling/count/etc) is left for307// the unroller, so long as it does not explicitly have unroll_and_jam308// metadata. This means #pragma nounroll will disable unroll and jam as well309// as unrolling310if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") &&311!hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {312LLVM_DEBUG(dbgs() << " Disabled due to pragma.\n");313return LoopUnrollResult::Unmodified;314}315316if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) {317LLVM_DEBUG(dbgs() << " Disabled due to not being safe.\n");318return LoopUnrollResult::Unmodified;319}320321// Approximate the loop size and collect useful info322SmallPtrSet<const Value *, 32> EphValues;323CodeMetrics::collectEphemeralValues(L, &AC, EphValues);324Loop *SubLoop = L->getSubLoops()[0];325UnrollCostEstimator InnerUCE(SubLoop, TTI, EphValues, UP.BEInsns);326UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns);327328if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) {329LLVM_DEBUG(dbgs() << " Loop not considered unrollable\n");330return LoopUnrollResult::Unmodified;331}332333unsigned InnerLoopSize = InnerUCE.getRolledLoopSize();334LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterUCE.getRolledLoopSize()335<< "\n");336LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");337338if (InnerUCE.NumInlineCandidates != 0 || OuterUCE.NumInlineCandidates != 0) {339LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");340return LoopUnrollResult::Unmodified;341}342// FIXME: The call to canUnroll() allows some controlled convergent343// operations, but we block them here for future changes.344if (InnerUCE.Convergence != ConvergenceKind::None ||345OuterUCE.Convergence != ConvergenceKind::None) {346LLVM_DEBUG(347dbgs() << " Not unrolling loop with convergent instructions.\n");348return LoopUnrollResult::Unmodified;349}350351// Save original loop IDs for after the transformation.352MDNode *OrigOuterLoopID = L->getLoopID();353MDNode *OrigSubLoopID = SubLoop->getLoopID();354355// To assign the loop id of the epilogue, assign it before unrolling it so it356// is applied to every inner loop of the epilogue. We later apply the loop ID357// for the jammed inner loop.358std::optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(359OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,360LLVMLoopUnrollAndJamFollowupRemainderInner});361if (NewInnerEpilogueLoopID)362SubLoop->setLoopID(*NewInnerEpilogueLoopID);363364// Find trip count and trip multiple365BasicBlock *Latch = L->getLoopLatch();366BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();367unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);368unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);369unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);370371// Decide if, and by how much, to unroll372bool IsCountSetExplicitly = computeUnrollAndJamCount(373L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,374OuterTripMultiple, OuterUCE, InnerTripCount, InnerLoopSize, UP, PP);375if (UP.Count <= 1)376return LoopUnrollResult::Unmodified;377// Unroll factor (Count) must be less or equal to TripCount.378if (OuterTripCount && UP.Count > OuterTripCount)379UP.Count = OuterTripCount;380381Loop *EpilogueOuterLoop = nullptr;382LoopUnrollResult UnrollResult = UnrollAndJamLoop(383L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,384&SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);385386// Assign new loop attributes.387if (EpilogueOuterLoop) {388std::optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(389OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,390LLVMLoopUnrollAndJamFollowupRemainderOuter});391if (NewOuterEpilogueLoopID)392EpilogueOuterLoop->setLoopID(*NewOuterEpilogueLoopID);393}394395std::optional<MDNode *> NewInnerLoopID =396makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,397LLVMLoopUnrollAndJamFollowupInner});398if (NewInnerLoopID)399SubLoop->setLoopID(*NewInnerLoopID);400else401SubLoop->setLoopID(OrigSubLoopID);402403if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {404std::optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(405OrigOuterLoopID,406{LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});407if (NewOuterLoopID) {408L->setLoopID(*NewOuterLoopID);409410// Do not setLoopAlreadyUnrolled if a followup was given.411return UnrollResult;412}413}414415// If loop has an unroll count pragma or unrolled by explicitly set count416// mark loop as unrolled to prevent unrolling beyond that requested.417if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)418L->setLoopAlreadyUnrolled();419420return UnrollResult;421}422423static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI,424ScalarEvolution &SE,425const TargetTransformInfo &TTI,426AssumptionCache &AC, DependenceInfo &DI,427OptimizationRemarkEmitter &ORE, int OptLevel,428LPMUpdater &U) {429bool DidSomething = false;430ArrayRef<Loop *> Loops = LN.getLoops();431Loop *OutmostLoop = &LN.getOutermostLoop();432433// Add the loop nests in the reverse order of LN. See method434// declaration.435SmallPriorityWorklist<Loop *, 4> Worklist;436appendLoopsToWorklist(Loops, Worklist);437while (!Worklist.empty()) {438Loop *L = Worklist.pop_back_val();439std::string LoopName = std::string(L->getName());440LoopUnrollResult Result =441tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);442if (Result != LoopUnrollResult::Unmodified)443DidSomething = true;444if (L == OutmostLoop && Result == LoopUnrollResult::FullyUnrolled)445U.markLoopAsDeleted(*L, LoopName);446}447448return DidSomething;449}450451PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN,452LoopAnalysisManager &AM,453LoopStandardAnalysisResults &AR,454LPMUpdater &U) {455Function &F = *LN.getParent();456457DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);458OptimizationRemarkEmitter ORE(&F);459460if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE,461OptLevel, U))462return PreservedAnalyses::all();463464auto PA = getLoopPassPreservedAnalyses();465PA.preserve<LoopNestAnalysis>();466return PA;467}468469470