Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
35266 views
//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// Transform each threading path to effectively jump thread the DFA. For9// example, the CFG below could be transformed as follows, where the cloned10// blocks unconditionally branch to the next correct case based on what is11// identified in the analysis.12//13// sw.bb sw.bb14// / | \ / | \15// case1 case2 case3 case1 case2 case316// \ | / | | |17// determinator det.2 det.3 det.118// br sw.bb / | \19// sw.bb.2 sw.bb.3 sw.bb.120// br case2 br case3 br case1ยง21//22// Definitions and Terminology:23//24// * Threading path:25// a list of basic blocks, the exit state, and the block that determines26// the next state, for which the following notation will be used:27// < path of BBs that form a cycle > [ state, determinator ]28//29// * Predictable switch:30// The switch variable is always a known constant so that all conditional31// jumps based on switch variable can be converted to unconditional jump.32//33// * Determinator:34// The basic block that determines the next state of the DFA.35//36// Representing the optimization in C-like pseudocode: the code pattern on the37// left could functionally be transformed to the right pattern if the switch38// condition is predictable.39//40// X = A goto A41// for (...) A:42// switch (X) ...43// case A goto B44// X = B B:45// case B ...46// X = C goto C47//48// The pass first checks that switch variable X is decided by the control flow49// path taken in the loop; for example, in case B, the next value of X is50// decided to be C. It then enumerates through all paths in the loop and labels51// the basic blocks where the next state is decided.52//53// Using this information it creates new paths that unconditionally branch to54// the next case. This involves cloning code, so it only gets triggered if the55// amount of code duplicated is below a threshold.56//57//===----------------------------------------------------------------------===//5859#include "llvm/Transforms/Scalar/DFAJumpThreading.h"60#include "llvm/ADT/APInt.h"61#include "llvm/ADT/DenseMap.h"62#include "llvm/ADT/SmallSet.h"63#include "llvm/ADT/Statistic.h"64#include "llvm/Analysis/AssumptionCache.h"65#include "llvm/Analysis/CodeMetrics.h"66#include "llvm/Analysis/DomTreeUpdater.h"67#include "llvm/Analysis/LoopInfo.h"68#include "llvm/Analysis/OptimizationRemarkEmitter.h"69#include "llvm/Analysis/TargetTransformInfo.h"70#include "llvm/IR/CFG.h"71#include "llvm/IR/Constants.h"72#include "llvm/IR/IntrinsicInst.h"73#include "llvm/Support/CommandLine.h"74#include "llvm/Support/Debug.h"75#include "llvm/Transforms/Utils/Cloning.h"76#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"77#include "llvm/Transforms/Utils/ValueMapper.h"78#include <algorithm>79#include <deque>8081#ifdef EXPENSIVE_CHECKS82#include "llvm/IR/Verifier.h"83#endif8485using namespace llvm;8687#define DEBUG_TYPE "dfa-jump-threading"8889STATISTIC(NumTransforms, "Number of transformations done");90STATISTIC(NumCloned, "Number of blocks cloned");91STATISTIC(NumPaths, "Number of individual paths threaded");9293static cl::opt<bool>94ClViewCfgBefore("dfa-jump-view-cfg-before",95cl::desc("View the CFG before DFA Jump Threading"),96cl::Hidden, cl::init(false));9798static cl::opt<bool> EarlyExitHeuristic(99"dfa-early-exit-heuristic",100cl::desc("Exit early if an unpredictable value come from the same loop"),101cl::Hidden, cl::init(true));102103static cl::opt<unsigned> MaxPathLength(104"dfa-max-path-length",105cl::desc("Max number of blocks searched to find a threading path"),106cl::Hidden, cl::init(20));107108static cl::opt<unsigned>109MaxNumPaths("dfa-max-num-paths",110cl::desc("Max number of paths enumerated around a switch"),111cl::Hidden, cl::init(200));112113static cl::opt<unsigned>114CostThreshold("dfa-cost-threshold",115cl::desc("Maximum cost accepted for the transformation"),116cl::Hidden, cl::init(50));117118namespace {119120class SelectInstToUnfold {121SelectInst *SI;122PHINode *SIUse;123124public:125SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}126127SelectInst *getInst() { return SI; }128PHINode *getUse() { return SIUse; }129130explicit operator bool() const { return SI && SIUse; }131};132133void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,134std::vector<SelectInstToUnfold> *NewSIsToUnfold,135std::vector<BasicBlock *> *NewBBs);136137class DFAJumpThreading {138public:139DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,140TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)141: AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}142143bool run(Function &F);144bool LoopInfoBroken;145146private:147void148unfoldSelectInstrs(DominatorTree *DT,149const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {150DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);151SmallVector<SelectInstToUnfold, 4> Stack;152for (SelectInstToUnfold SIToUnfold : SelectInsts)153Stack.push_back(SIToUnfold);154155while (!Stack.empty()) {156SelectInstToUnfold SIToUnfold = Stack.pop_back_val();157158std::vector<SelectInstToUnfold> NewSIsToUnfold;159std::vector<BasicBlock *> NewBBs;160unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);161162// Put newly discovered select instructions into the work list.163for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)164Stack.push_back(NewSIToUnfold);165}166}167168AssumptionCache *AC;169DominatorTree *DT;170LoopInfo *LI;171TargetTransformInfo *TTI;172OptimizationRemarkEmitter *ORE;173};174175} // end anonymous namespace176177namespace {178179/// Create a new basic block and sink \p SIToSink into it.180void createBasicBlockAndSinkSelectInst(181DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink,182BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock,183BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold,184std::vector<BasicBlock *> *NewBBs) {185assert(SIToSink->hasOneUse());186assert(NewBlock);187assert(NewBranch);188*NewBlock = BasicBlock::Create(SI->getContext(), NewBBName,189EndBlock->getParent(), EndBlock);190NewBBs->push_back(*NewBlock);191*NewBranch = BranchInst::Create(EndBlock, *NewBlock);192SIToSink->moveBefore(*NewBranch);193NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse));194DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}});195}196197/// Unfold the select instruction held in \p SIToUnfold by replacing it with198/// control flow.199///200/// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly201/// created basic blocks into \p NewBBs.202///203/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.204void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,205std::vector<SelectInstToUnfold> *NewSIsToUnfold,206std::vector<BasicBlock *> *NewBBs) {207SelectInst *SI = SIToUnfold.getInst();208PHINode *SIUse = SIToUnfold.getUse();209BasicBlock *StartBlock = SI->getParent();210BasicBlock *EndBlock = SIUse->getParent();211BranchInst *StartBlockTerm =212dyn_cast<BranchInst>(StartBlock->getTerminator());213214assert(StartBlockTerm && StartBlockTerm->isUnconditional());215assert(SI->hasOneUse());216217// These are the new basic blocks for the conditional branch.218// At least one will become an actual new basic block.219BasicBlock *TrueBlock = nullptr;220BasicBlock *FalseBlock = nullptr;221BranchInst *TrueBranch = nullptr;222BranchInst *FalseBranch = nullptr;223224// Sink select instructions to be able to unfold them later.225if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) {226createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,227"si.unfold.true", &TrueBlock, &TrueBranch,228NewSIsToUnfold, NewBBs);229}230if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) {231createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,232"si.unfold.false", &FalseBlock,233&FalseBranch, NewSIsToUnfold, NewBBs);234}235236// If there was nothing to sink, then arbitrarily choose the 'false' side237// for a new input value to the PHI.238if (!TrueBlock && !FalseBlock) {239FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false",240EndBlock->getParent(), EndBlock);241NewBBs->push_back(FalseBlock);242BranchInst::Create(EndBlock, FalseBlock);243DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}});244}245246// Insert the real conditional branch based on the original condition.247// If we did not create a new block for one of the 'true' or 'false' paths248// of the condition, it means that side of the branch goes to the end block249// directly and the path originates from the start block from the point of250// view of the new PHI.251BasicBlock *TT = EndBlock;252BasicBlock *FT = EndBlock;253if (TrueBlock && FalseBlock) {254// A diamond.255TT = TrueBlock;256FT = FalseBlock;257258// Update the phi node of SI.259SIUse->addIncoming(SI->getTrueValue(), TrueBlock);260SIUse->addIncoming(SI->getFalseValue(), FalseBlock);261262// Update any other PHI nodes in EndBlock.263for (PHINode &Phi : EndBlock->phis()) {264if (&Phi != SIUse) {265Value *OrigValue = Phi.getIncomingValueForBlock(StartBlock);266Phi.addIncoming(OrigValue, TrueBlock);267Phi.addIncoming(OrigValue, FalseBlock);268}269270// Remove incoming place of original StartBlock, which comes in a indirect271// way (through TrueBlock and FalseBlock) now.272Phi.removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false);273}274} else {275BasicBlock *NewBlock = nullptr;276Value *SIOp1 = SI->getTrueValue();277Value *SIOp2 = SI->getFalseValue();278279// A triangle pointing right.280if (!TrueBlock) {281NewBlock = FalseBlock;282FT = FalseBlock;283}284// A triangle pointing left.285else {286NewBlock = TrueBlock;287TT = TrueBlock;288std::swap(SIOp1, SIOp2);289}290291// Update the phi node of SI.292for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) {293if (SIUse->getIncomingBlock(Idx) == StartBlock)294SIUse->setIncomingValue(Idx, SIOp1);295}296SIUse->addIncoming(SIOp2, NewBlock);297298// Update any other PHI nodes in EndBlock.299for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II);300++II) {301if (Phi != SIUse)302Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock);303}304}305StartBlockTerm->eraseFromParent();306BranchInst::Create(TT, FT, SI->getCondition(), StartBlock);307DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT},308{DominatorTree::Insert, StartBlock, FT}});309310// Preserve loop info311if (Loop *L = LI->getLoopFor(SI->getParent())) {312for (BasicBlock *NewBB : *NewBBs)313L->addBasicBlockToLoop(NewBB, *LI);314}315316// The select is now dead.317assert(SI->use_empty() && "Select must be dead now");318SI->eraseFromParent();319}320321struct ClonedBlock {322BasicBlock *BB;323APInt State; ///< \p State corresponds to the next value of a switch stmnt.324};325326typedef std::deque<BasicBlock *> PathType;327typedef std::vector<PathType> PathsType;328typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;329typedef std::vector<ClonedBlock> CloneList;330331// This data structure keeps track of all blocks that have been cloned. If two332// different ThreadingPaths clone the same block for a certain state it should333// be reused, and it can be looked up in this map.334typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;335336// This map keeps track of all the new definitions for an instruction. This337// information is needed when restoring SSA form after cloning blocks.338typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;339340inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {341OS << "< ";342for (const BasicBlock *BB : Path) {343std::string BBName;344if (BB->hasName())345raw_string_ostream(BBName) << BB->getName();346else347raw_string_ostream(BBName) << BB;348OS << BBName << " ";349}350OS << ">";351return OS;352}353354/// ThreadingPath is a path in the control flow of a loop that can be threaded355/// by cloning necessary basic blocks and replacing conditional branches with356/// unconditional ones. A threading path includes a list of basic blocks, the357/// exit state, and the block that determines the next state.358struct ThreadingPath {359/// Exit value is DFA's exit state for the given path.360APInt getExitValue() const { return ExitVal; }361void setExitValue(const ConstantInt *V) {362ExitVal = V->getValue();363IsExitValSet = true;364}365bool isExitValueSet() const { return IsExitValSet; }366367/// Determinator is the basic block that determines the next state of the DFA.368const BasicBlock *getDeterminatorBB() const { return DBB; }369void setDeterminator(const BasicBlock *BB) { DBB = BB; }370371/// Path is a list of basic blocks.372const PathType &getPath() const { return Path; }373void setPath(const PathType &NewPath) { Path = NewPath; }374375void print(raw_ostream &OS) const {376OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";377}378379private:380PathType Path;381APInt ExitVal;382const BasicBlock *DBB = nullptr;383bool IsExitValSet = false;384};385386#ifndef NDEBUG387inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {388TPath.print(OS);389return OS;390}391#endif392393struct MainSwitch {394MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)395: LI(LI) {396if (isCandidate(SI)) {397Instr = SI;398} else {399ORE->emit([&]() {400return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)401<< "Switch instruction is not predictable.";402});403}404}405406virtual ~MainSwitch() = default;407408SwitchInst *getInstr() const { return Instr; }409const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {410return SelectInsts;411}412413private:414/// Do a use-def chain traversal starting from the switch condition to see if415/// \p SI is a potential condidate.416///417/// Also, collect select instructions to unfold.418bool isCandidate(const SwitchInst *SI) {419std::deque<std::pair<Value *, BasicBlock *>> Q;420SmallSet<Value *, 16> SeenValues;421SelectInsts.clear();422423Value *SICond = SI->getCondition();424LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");425if (!isa<PHINode>(SICond))426return false;427428// The switch must be in a loop.429const Loop *L = LI->getLoopFor(SI->getParent());430if (!L)431return false;432433addToQueue(SICond, nullptr, Q, SeenValues);434435while (!Q.empty()) {436Value *Current = Q.front().first;437BasicBlock *CurrentIncomingBB = Q.front().second;438Q.pop_front();439440if (auto *Phi = dyn_cast<PHINode>(Current)) {441for (BasicBlock *IncomingBB : Phi->blocks()) {442Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB);443addToQueue(Incoming, IncomingBB, Q, SeenValues);444}445LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");446} else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {447if (!isValidSelectInst(SelI))448return false;449addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);450addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);451LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");452if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))453SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));454} else if (isa<Constant>(Current)) {455LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");456continue;457} else {458LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");459// Allow unpredictable values. The hope is that those will be the460// initial switch values that can be ignored (they will hit the461// unthreaded switch) but this assumption will get checked later after462// paths have been enumerated (in function getStateDefMap).463464// If the unpredictable value comes from the same inner loop it is465// likely that it will also be on the enumerated paths, causing us to466// exit after we have enumerated all the paths. This heuristic save467// compile time because a search for all the paths can become expensive.468if (EarlyExitHeuristic &&469L->contains(LI->getLoopFor(CurrentIncomingBB))) {470LLVM_DEBUG(dbgs()471<< "\tExiting early due to unpredictability heuristic.\n");472return false;473}474475continue;476}477}478479return true;480}481482void addToQueue(Value *Val, BasicBlock *BB,483std::deque<std::pair<Value *, BasicBlock *>> &Q,484SmallSet<Value *, 16> &SeenValues) {485if (SeenValues.contains(Val))486return;487Q.push_back({Val, BB});488SeenValues.insert(Val);489}490491bool isValidSelectInst(SelectInst *SI) {492if (!SI->hasOneUse())493return false;494495Instruction *SIUse = dyn_cast<Instruction>(SI->user_back());496// The use of the select inst should be either a phi or another select.497if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse)))498return false;499500BasicBlock *SIBB = SI->getParent();501502// Currently, we can only expand select instructions in basic blocks with503// one successor.504BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator());505if (!SITerm || !SITerm->isUnconditional())506return false;507508// Only fold the select coming from directly where it is defined.509PHINode *PHIUser = dyn_cast<PHINode>(SIUse);510if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB)511return false;512513// If select will not be sunk during unfolding, and it is in the same basic514// block as another state defining select, then cannot unfold both.515for (SelectInstToUnfold SIToUnfold : SelectInsts) {516SelectInst *PrevSI = SIToUnfold.getInst();517if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&518PrevSI->getParent() == SI->getParent())519return false;520}521522return true;523}524525LoopInfo *LI;526SwitchInst *Instr = nullptr;527SmallVector<SelectInstToUnfold, 4> SelectInsts;528};529530struct AllSwitchPaths {531AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,532LoopInfo *LI)533: Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),534LI(LI) {}535536std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }537unsigned getNumThreadingPaths() { return TPaths.size(); }538SwitchInst *getSwitchInst() { return Switch; }539BasicBlock *getSwitchBlock() { return SwitchBlock; }540541void run() {542VisitedBlocks Visited;543PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1);544StateDefMap StateDef = getStateDefMap(LoopPaths);545546if (StateDef.empty()) {547ORE->emit([&]() {548return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",549Switch)550<< "Switch instruction is not predictable.";551});552return;553}554555for (const PathType &Path : LoopPaths) {556ThreadingPath TPath;557558const BasicBlock *PrevBB = Path.back();559for (const BasicBlock *BB : Path) {560if (StateDef.contains(BB)) {561const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]);562assert(Phi && "Expected a state-defining instr to be a phi node.");563564const Value *V = Phi->getIncomingValueForBlock(PrevBB);565if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) {566TPath.setExitValue(C);567TPath.setDeterminator(BB);568TPath.setPath(Path);569}570}571572// Switch block is the determinator, this is the final exit value.573if (TPath.isExitValueSet() && BB == Path.front())574break;575576PrevBB = BB;577}578579if (TPath.isExitValueSet() && isSupported(TPath))580TPaths.push_back(TPath);581}582}583584private:585// Value: an instruction that defines a switch state;586// Key: the parent basic block of that instruction.587typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;588589PathsType paths(BasicBlock *BB, VisitedBlocks &Visited,590unsigned PathDepth) const {591PathsType Res;592593// Stop exploring paths after visiting MaxPathLength blocks594if (PathDepth > MaxPathLength) {595ORE->emit([&]() {596return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",597Switch)598<< "Exploration stopped after visiting MaxPathLength="599<< ore::NV("MaxPathLength", MaxPathLength) << " blocks.";600});601return Res;602}603604Visited.insert(BB);605606// Stop if we have reached the BB out of loop, since its successors have no607// impact on the DFA.608// TODO: Do we need to stop exploring if BB is the outer loop of the switch?609if (!LI->getLoopFor(BB))610return Res;611612// Some blocks have multiple edges to the same successor, and this set613// is used to prevent a duplicate path from being generated614SmallSet<BasicBlock *, 4> Successors;615for (BasicBlock *Succ : successors(BB)) {616if (!Successors.insert(Succ).second)617continue;618619// Found a cycle through the SwitchBlock620if (Succ == SwitchBlock) {621Res.push_back({BB});622continue;623}624625// We have encountered a cycle, do not get caught in it626if (Visited.contains(Succ))627continue;628629PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);630for (const PathType &Path : SuccPaths) {631PathType NewPath(Path);632NewPath.push_front(BB);633Res.push_back(NewPath);634if (Res.size() >= MaxNumPaths) {635return Res;636}637}638}639// This block could now be visited again from a different predecessor. Note640// that this will result in exponential runtime. Subpaths could possibly be641// cached but it takes a lot of memory to store them.642Visited.erase(BB);643return Res;644}645646/// Walk the use-def chain and collect all the state-defining instructions.647///648/// Return an empty map if unpredictable values encountered inside the basic649/// blocks of \p LoopPaths.650StateDefMap getStateDefMap(const PathsType &LoopPaths) const {651StateDefMap Res;652653// Basic blocks belonging to any of the loops around the switch statement.654SmallPtrSet<BasicBlock *, 16> LoopBBs;655for (const PathType &Path : LoopPaths) {656for (BasicBlock *BB : Path)657LoopBBs.insert(BB);658}659660Value *FirstDef = Switch->getOperand(0);661662assert(isa<PHINode>(FirstDef) && "The first definition must be a phi.");663664SmallVector<PHINode *, 8> Stack;665Stack.push_back(dyn_cast<PHINode>(FirstDef));666SmallSet<Value *, 16> SeenValues;667668while (!Stack.empty()) {669PHINode *CurPhi = Stack.pop_back_val();670671Res[CurPhi->getParent()] = CurPhi;672SeenValues.insert(CurPhi);673674for (BasicBlock *IncomingBB : CurPhi->blocks()) {675Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB);676bool IsOutsideLoops = LoopBBs.count(IncomingBB) == 0;677if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||678SeenValues.contains(Incoming) || IsOutsideLoops) {679continue;680}681682// Any unpredictable value inside the loops means we must bail out.683if (!isa<PHINode>(Incoming))684return StateDefMap();685686Stack.push_back(cast<PHINode>(Incoming));687}688}689690return Res;691}692693/// The determinator BB should precede the switch-defining BB.694///695/// Otherwise, it is possible that the state defined in the determinator block696/// defines the state for the next iteration of the loop, rather than for the697/// current one.698///699/// Currently supported paths:700/// \code701/// < switch bb1 determ def > [ 42, determ ]702/// < switch_and_def bb1 determ > [ 42, determ ]703/// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ]704/// \endcode705///706/// Unsupported paths:707/// \code708/// < switch bb1 def determ > [ 43, determ ]709/// < switch_and_determ bb1 def > [ 43, switch_and_determ ]710/// \endcode711bool isSupported(const ThreadingPath &TPath) {712Instruction *SwitchCondI = dyn_cast<Instruction>(Switch->getCondition());713assert(SwitchCondI);714if (!SwitchCondI)715return false;716717const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent();718const BasicBlock *SwitchCondUseBB = Switch->getParent();719const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB();720721assert(722SwitchCondUseBB == TPath.getPath().front() &&723"The first BB in a threading path should have the switch instruction");724if (SwitchCondUseBB != TPath.getPath().front())725return false;726727// Make DeterminatorBB the first element in Path.728PathType Path = TPath.getPath();729auto ItDet = llvm::find(Path, DeterminatorBB);730std::rotate(Path.begin(), ItDet, Path.end());731732bool IsDetBBSeen = false;733bool IsDefBBSeen = false;734bool IsUseBBSeen = false;735for (BasicBlock *BB : Path) {736if (BB == DeterminatorBB)737IsDetBBSeen = true;738if (BB == SwitchCondDefBB)739IsDefBBSeen = true;740if (BB == SwitchCondUseBB)741IsUseBBSeen = true;742if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen)743return false;744}745746return true;747}748749SwitchInst *Switch;750BasicBlock *SwitchBlock;751OptimizationRemarkEmitter *ORE;752std::vector<ThreadingPath> TPaths;753LoopInfo *LI;754};755756struct TransformDFA {757TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,758AssumptionCache *AC, TargetTransformInfo *TTI,759OptimizationRemarkEmitter *ORE,760SmallPtrSet<const Value *, 32> EphValues)761: SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),762EphValues(EphValues) {}763764void run() {765if (isLegalAndProfitableToTransform()) {766createAllExitPaths();767NumTransforms++;768}769}770771private:772/// This function performs both a legality check and profitability check at773/// the same time since it is convenient to do so. It iterates through all774/// blocks that will be cloned, and keeps track of the duplication cost. It775/// also returns false if it is illegal to clone some required block.776bool isLegalAndProfitableToTransform() {777CodeMetrics Metrics;778SwitchInst *Switch = SwitchPaths->getSwitchInst();779780// Don't thread switch without multiple successors.781if (Switch->getNumSuccessors() <= 1)782return false;783784// Note that DuplicateBlockMap is not being used as intended here. It is785// just being used to ensure (BB, State) pairs are only counted once.786DuplicateBlockMap DuplicateMap;787788for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {789PathType PathBBs = TPath.getPath();790APInt NextState = TPath.getExitValue();791const BasicBlock *Determinator = TPath.getDeterminatorBB();792793// Update Metrics for the Switch block, this is always cloned794BasicBlock *BB = SwitchPaths->getSwitchBlock();795BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);796if (!VisitedBB) {797Metrics.analyzeBasicBlock(BB, *TTI, EphValues);798DuplicateMap[BB].push_back({BB, NextState});799}800801// If the Switch block is the Determinator, then we can continue since802// this is the only block that is cloned and we already counted for it.803if (PathBBs.front() == Determinator)804continue;805806// Otherwise update Metrics for all blocks that will be cloned. If any807// block is already cloned and would be reused, don't double count it.808auto DetIt = llvm::find(PathBBs, Determinator);809for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {810BB = *BBIt;811VisitedBB = getClonedBB(BB, NextState, DuplicateMap);812if (VisitedBB)813continue;814Metrics.analyzeBasicBlock(BB, *TTI, EphValues);815DuplicateMap[BB].push_back({BB, NextState});816}817818if (Metrics.notDuplicatable) {819LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "820<< "non-duplicatable instructions.\n");821ORE->emit([&]() {822return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",823Switch)824<< "Contains non-duplicatable instructions.";825});826return false;827}828829// FIXME: Allow jump threading with controlled convergence.830if (Metrics.Convergence != ConvergenceKind::None) {831LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "832<< "convergent instructions.\n");833ORE->emit([&]() {834return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)835<< "Contains convergent instructions.";836});837return false;838}839840if (!Metrics.NumInsts.isValid()) {841LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "842<< "instructions with invalid cost.\n");843ORE->emit([&]() {844return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)845<< "Contains instructions with invalid cost.";846});847return false;848}849}850851InstructionCost DuplicationCost = 0;852853unsigned JumpTableSize = 0;854TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,855nullptr);856if (JumpTableSize == 0) {857// Factor in the number of conditional branches reduced from jump858// threading. Assume that lowering the switch block is implemented by859// using binary search, hence the LogBase2().860unsigned CondBranches =861APInt(32, Switch->getNumSuccessors()).ceilLogBase2();862assert(CondBranches > 0 &&863"The threaded switch must have multiple branches");864DuplicationCost = Metrics.NumInsts / CondBranches;865} else {866// Compared with jump tables, the DFA optimizer removes an indirect branch867// on each loop iteration, thus making branch prediction more precise. The868// more branch targets there are, the more likely it is for the branch869// predictor to make a mistake, and the more benefit there is in the DFA870// optimizer. Thus, the more branch targets there are, the lower is the871// cost of the DFA opt.872DuplicationCost = Metrics.NumInsts / JumpTableSize;873}874875LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "876<< SwitchPaths->getSwitchBlock()->getName()877<< " is: " << DuplicationCost << "\n\n");878879if (DuplicationCost > CostThreshold) {880LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "881<< "cost threshold.\n");882ORE->emit([&]() {883return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)884<< "Duplication cost exceeds the cost threshold (cost="885<< ore::NV("Cost", DuplicationCost)886<< ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";887});888return false;889}890891ORE->emit([&]() {892return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)893<< "Switch statement jump-threaded.";894});895896return true;897}898899/// Transform each threading path to effectively jump thread the DFA.900void createAllExitPaths() {901DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);902903// Move the switch block to the end of the path, since it will be duplicated904BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();905for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {906LLVM_DEBUG(dbgs() << TPath << "\n");907PathType NewPath(TPath.getPath());908NewPath.push_back(SwitchBlock);909TPath.setPath(NewPath);910}911912// Transform the ThreadingPaths and keep track of the cloned values913DuplicateBlockMap DuplicateMap;914DefMap NewDefs;915916SmallSet<BasicBlock *, 16> BlocksToClean;917for (BasicBlock *BB : successors(SwitchBlock))918BlocksToClean.insert(BB);919920for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {921createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);922NumPaths++;923}924925// After all paths are cloned, now update the last successor of the cloned926// path so it skips over the switch statement927for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())928updateLastSuccessor(TPath, DuplicateMap, &DTU);929930// For each instruction that was cloned and used outside, update its uses931updateSSA(NewDefs);932933// Clean PHI Nodes for the newly created blocks934for (BasicBlock *BB : BlocksToClean)935cleanPhiNodes(BB);936}937938/// For a specific ThreadingPath \p Path, create an exit path starting from939/// the determinator block.940///941/// To remember the correct destination, we have to duplicate blocks942/// corresponding to each state. Also update the terminating instruction of943/// the predecessors, and phis in the successor blocks.944void createExitPath(DefMap &NewDefs, ThreadingPath &Path,945DuplicateBlockMap &DuplicateMap,946SmallSet<BasicBlock *, 16> &BlocksToClean,947DomTreeUpdater *DTU) {948APInt NextState = Path.getExitValue();949const BasicBlock *Determinator = Path.getDeterminatorBB();950PathType PathBBs = Path.getPath();951952// Don't select the placeholder block in front953if (PathBBs.front() == Determinator)954PathBBs.pop_front();955956auto DetIt = llvm::find(PathBBs, Determinator);957// When there is only one BB in PathBBs, the determinator takes itself as a958// direct predecessor.959BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt);960for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {961BasicBlock *BB = *BBIt;962BlocksToClean.insert(BB);963964// We already cloned BB for this NextState, now just update the branch965// and continue.966BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);967if (NextBB) {968updatePredecessor(PrevBB, BB, NextBB, DTU);969PrevBB = NextBB;970continue;971}972973// Clone the BB and update the successor of Prev to jump to the new block974BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(975BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);976DuplicateMap[BB].push_back({NewBB, NextState});977BlocksToClean.insert(NewBB);978PrevBB = NewBB;979}980}981982/// Restore SSA form after cloning blocks.983///984/// Each cloned block creates new defs for a variable, and the uses need to be985/// updated to reflect this. The uses may be replaced with a cloned value, or986/// some derived phi instruction. Note that all uses of a value defined in the987/// same block were already remapped when cloning the block.988void updateSSA(DefMap &NewDefs) {989SSAUpdaterBulk SSAUpdate;990SmallVector<Use *, 16> UsesToRename;991992for (const auto &KV : NewDefs) {993Instruction *I = KV.first;994BasicBlock *BB = I->getParent();995std::vector<Instruction *> Cloned = KV.second;996997// Scan all uses of this instruction to see if it is used outside of its998// block, and if so, record them in UsesToRename.999for (Use &U : I->uses()) {1000Instruction *User = cast<Instruction>(U.getUser());1001if (PHINode *UserPN = dyn_cast<PHINode>(User)) {1002if (UserPN->getIncomingBlock(U) == BB)1003continue;1004} else if (User->getParent() == BB) {1005continue;1006}10071008UsesToRename.push_back(&U);1009}10101011// If there are no uses outside the block, we're done with this1012// instruction.1013if (UsesToRename.empty())1014continue;1015LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I1016<< "\n");10171018// We found a use of I outside of BB. Rename all uses of I that are1019// outside its block to be uses of the appropriate PHI node etc. See1020// ValuesInBlocks with the values we know.1021unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType());1022SSAUpdate.AddAvailableValue(VarNum, BB, I);1023for (Instruction *New : Cloned)1024SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New);10251026while (!UsesToRename.empty())1027SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val());10281029LLVM_DEBUG(dbgs() << "\n");1030}1031// SSAUpdater handles phi placement and renaming uses with the appropriate1032// value.1033SSAUpdate.RewriteAllUses(DT);1034}10351036/// Clones a basic block, and adds it to the CFG.1037///1038/// This function also includes updating phi nodes in the successors of the1039/// BB, and remapping uses that were defined locally in the cloned BB.1040BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,1041const APInt &NextState,1042DuplicateBlockMap &DuplicateMap,1043DefMap &NewDefs,1044DomTreeUpdater *DTU) {1045ValueToValueMapTy VMap;1046BasicBlock *NewBB = CloneBasicBlock(1047BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()),1048BB->getParent());1049NewBB->moveAfter(BB);1050NumCloned++;10511052for (Instruction &I : *NewBB) {1053// Do not remap operands of PHINode in case a definition in BB is an1054// incoming value to a phi in the same block. This incoming value will1055// be renamed later while restoring SSA.1056if (isa<PHINode>(&I))1057continue;1058RemapInstruction(&I, VMap,1059RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);1060if (AssumeInst *II = dyn_cast<AssumeInst>(&I))1061AC->registerAssumption(II);1062}10631064updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);1065updatePredecessor(PrevBB, BB, NewBB, DTU);1066updateDefMap(NewDefs, VMap);10671068// Add all successors to the DominatorTree1069SmallPtrSet<BasicBlock *, 4> SuccSet;1070for (auto *SuccBB : successors(NewBB)) {1071if (SuccSet.insert(SuccBB).second)1072DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});1073}1074SuccSet.clear();1075return NewBB;1076}10771078/// Update the phi nodes in BB's successors.1079///1080/// This means creating a new incoming value from NewBB with the new1081/// instruction wherever there is an incoming value from BB.1082void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,1083const APInt &NextState, ValueToValueMapTy &VMap,1084DuplicateBlockMap &DuplicateMap) {1085std::vector<BasicBlock *> BlocksToUpdate;10861087// If BB is the last block in the path, we can simply update the one case1088// successor that will be reached.1089if (BB == SwitchPaths->getSwitchBlock()) {1090SwitchInst *Switch = SwitchPaths->getSwitchInst();1091BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);1092BlocksToUpdate.push_back(NextCase);1093BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);1094if (ClonedSucc)1095BlocksToUpdate.push_back(ClonedSucc);1096}1097// Otherwise update phis in all successors.1098else {1099for (BasicBlock *Succ : successors(BB)) {1100BlocksToUpdate.push_back(Succ);11011102// Check if a successor has already been cloned for the particular exit1103// value. In this case if a successor was already cloned, the phi nodes1104// in the cloned block should be updated directly.1105BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);1106if (ClonedSucc)1107BlocksToUpdate.push_back(ClonedSucc);1108}1109}11101111// If there is a phi with an incoming value from BB, create a new incoming1112// value for the new predecessor ClonedBB. The value will either be the same1113// value from BB or a cloned value.1114for (BasicBlock *Succ : BlocksToUpdate) {1115for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II);1116++II) {1117Value *Incoming = Phi->getIncomingValueForBlock(BB);1118if (Incoming) {1119if (isa<Constant>(Incoming)) {1120Phi->addIncoming(Incoming, ClonedBB);1121continue;1122}1123Value *ClonedVal = VMap[Incoming];1124if (ClonedVal)1125Phi->addIncoming(ClonedVal, ClonedBB);1126else1127Phi->addIncoming(Incoming, ClonedBB);1128}1129}1130}1131}11321133/// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all1134/// other successors are kept as well.1135void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,1136BasicBlock *NewBB, DomTreeUpdater *DTU) {1137// When a path is reused, there is a chance that predecessors were already1138// updated before. Check if the predecessor needs to be updated first.1139if (!isPredecessor(OldBB, PrevBB))1140return;11411142Instruction *PrevTerm = PrevBB->getTerminator();1143for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {1144if (PrevTerm->getSuccessor(Idx) == OldBB) {1145OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true);1146PrevTerm->setSuccessor(Idx, NewBB);1147}1148}1149DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},1150{DominatorTree::Insert, PrevBB, NewBB}});1151}11521153/// Add new value mappings to the DefMap to keep track of all new definitions1154/// for a particular instruction. These will be used while updating SSA form.1155void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {1156SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;1157NewDefsVector.reserve(VMap.size());11581159for (auto Entry : VMap) {1160Instruction *Inst =1161dyn_cast<Instruction>(const_cast<Value *>(Entry.first));1162if (!Inst || !Entry.second || isa<BranchInst>(Inst) ||1163isa<SwitchInst>(Inst)) {1164continue;1165}11661167Instruction *Cloned = dyn_cast<Instruction>(Entry.second);1168if (!Cloned)1169continue;11701171NewDefsVector.push_back({Inst, Cloned});1172}11731174// Sort the defs to get deterministic insertion order into NewDefs.1175sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {1176if (LHS.first == RHS.first)1177return LHS.second->comesBefore(RHS.second);1178return LHS.first->comesBefore(RHS.first);1179});11801181for (const auto &KV : NewDefsVector)1182NewDefs[KV.first].push_back(KV.second);1183}11841185/// Update the last branch of a particular cloned path to point to the correct1186/// case successor.1187///1188/// Note that this is an optional step and would have been done in later1189/// optimizations, but it makes the CFG significantly easier to work with.1190void updateLastSuccessor(ThreadingPath &TPath,1191DuplicateBlockMap &DuplicateMap,1192DomTreeUpdater *DTU) {1193APInt NextState = TPath.getExitValue();1194BasicBlock *BB = TPath.getPath().back();1195BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);11961197// Note multiple paths can end at the same block so check that it is not1198// updated yet1199if (!isa<SwitchInst>(LastBlock->getTerminator()))1200return;1201SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator());1202BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);12031204std::vector<DominatorTree::UpdateType> DTUpdates;1205SmallPtrSet<BasicBlock *, 4> SuccSet;1206for (BasicBlock *Succ : successors(LastBlock)) {1207if (Succ != NextCase && SuccSet.insert(Succ).second)1208DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});1209}12101211Switch->eraseFromParent();1212BranchInst::Create(NextCase, LastBlock);12131214DTU->applyUpdates(DTUpdates);1215}12161217/// After cloning blocks, some of the phi nodes have extra incoming values1218/// that are no longer used. This function removes them.1219void cleanPhiNodes(BasicBlock *BB) {1220// If BB is no longer reachable, remove any remaining phi nodes1221if (pred_empty(BB)) {1222std::vector<PHINode *> PhiToRemove;1223for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {1224PhiToRemove.push_back(Phi);1225}1226for (PHINode *PN : PhiToRemove) {1227PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));1228PN->eraseFromParent();1229}1230return;1231}12321233// Remove any incoming values that come from an invalid predecessor1234for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {1235std::vector<BasicBlock *> BlocksToRemove;1236for (BasicBlock *IncomingBB : Phi->blocks()) {1237if (!isPredecessor(BB, IncomingBB))1238BlocksToRemove.push_back(IncomingBB);1239}1240for (BasicBlock *BB : BlocksToRemove)1241Phi->removeIncomingValue(BB);1242}1243}12441245/// Checks if BB was already cloned for a particular next state value. If it1246/// was then it returns this cloned block, and otherwise null.1247BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState,1248DuplicateBlockMap &DuplicateMap) {1249CloneList ClonedBBs = DuplicateMap[BB];12501251// Find an entry in the CloneList with this NextState. If it exists then1252// return the corresponding BB1253auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) {1254return C.State == NextState;1255});1256return It != ClonedBBs.end() ? (*It).BB : nullptr;1257}12581259/// Helper to get the successor corresponding to a particular case value for1260/// a switch statement.1261BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {1262BasicBlock *NextCase = nullptr;1263for (auto Case : Switch->cases()) {1264if (Case.getCaseValue()->getValue() == NextState) {1265NextCase = Case.getCaseSuccessor();1266break;1267}1268}1269if (!NextCase)1270NextCase = Switch->getDefaultDest();1271return NextCase;1272}12731274/// Returns true if IncomingBB is a predecessor of BB.1275bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {1276return llvm::is_contained(predecessors(BB), IncomingBB);1277}12781279AllSwitchPaths *SwitchPaths;1280DominatorTree *DT;1281AssumptionCache *AC;1282TargetTransformInfo *TTI;1283OptimizationRemarkEmitter *ORE;1284SmallPtrSet<const Value *, 32> EphValues;1285std::vector<ThreadingPath> TPaths;1286};12871288bool DFAJumpThreading::run(Function &F) {1289LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");12901291if (F.hasOptSize()) {1292LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");1293return false;1294}12951296if (ClViewCfgBefore)1297F.viewCFG();12981299SmallVector<AllSwitchPaths, 2> ThreadableLoops;1300bool MadeChanges = false;1301LoopInfoBroken = false;13021303for (BasicBlock &BB : F) {1304auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());1305if (!SI)1306continue;13071308LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()1309<< " is a candidate\n");1310MainSwitch Switch(SI, LI, ORE);13111312if (!Switch.getInstr())1313continue;13141315LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "1316<< "candidate for jump threading\n");1317LLVM_DEBUG(SI->dump());13181319unfoldSelectInstrs(DT, Switch.getSelectInsts());1320if (!Switch.getSelectInsts().empty())1321MadeChanges = true;13221323AllSwitchPaths SwitchPaths(&Switch, ORE, LI);1324SwitchPaths.run();13251326if (SwitchPaths.getNumThreadingPaths() > 0) {1327ThreadableLoops.push_back(SwitchPaths);13281329// For the time being limit this optimization to occurring once in a1330// function since it can change the CFG significantly. This is not a1331// strict requirement but it can cause buggy behavior if there is an1332// overlap of blocks in different opportunities. There is a lot of room to1333// experiment with catching more opportunities here.1334// NOTE: To release this contraint, we must handle LoopInfo invalidation1335break;1336}1337}13381339#ifdef NDEBUG1340LI->verify(*DT);1341#endif13421343SmallPtrSet<const Value *, 32> EphValues;1344if (ThreadableLoops.size() > 0)1345CodeMetrics::collectEphemeralValues(&F, AC, EphValues);13461347for (AllSwitchPaths SwitchPaths : ThreadableLoops) {1348TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);1349Transform.run();1350MadeChanges = true;1351LoopInfoBroken = true;1352}13531354#ifdef EXPENSIVE_CHECKS1355assert(DT->verify(DominatorTree::VerificationLevel::Full));1356verifyFunction(F, &dbgs());1357#endif13581359return MadeChanges;1360}13611362} // end anonymous namespace13631364/// Integrate with the new Pass Manager1365PreservedAnalyses DFAJumpThreadingPass::run(Function &F,1366FunctionAnalysisManager &AM) {1367AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);1368DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);1369LoopInfo &LI = AM.getResult<LoopAnalysis>(F);1370TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);1371OptimizationRemarkEmitter ORE(&F);1372DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);1373if (!ThreadImpl.run(F))1374return PreservedAnalyses::all();13751376PreservedAnalyses PA;1377PA.preserve<DominatorTreeAnalysis>();1378if (!ThreadImpl.LoopInfoBroken)1379PA.preserve<LoopAnalysis>();1380return PA;1381}138213831384