Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
35266 views
//===- JumpThreading.cpp - Thread control through conditional blocks ------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the Jump Threading pass.9//10//===----------------------------------------------------------------------===//1112#include "llvm/Transforms/Scalar/JumpThreading.h"13#include "llvm/ADT/DenseMap.h"14#include "llvm/ADT/DenseSet.h"15#include "llvm/ADT/MapVector.h"16#include "llvm/ADT/STLExtras.h"17#include "llvm/ADT/SmallPtrSet.h"18#include "llvm/ADT/SmallVector.h"19#include "llvm/ADT/Statistic.h"20#include "llvm/Analysis/AliasAnalysis.h"21#include "llvm/Analysis/BlockFrequencyInfo.h"22#include "llvm/Analysis/BranchProbabilityInfo.h"23#include "llvm/Analysis/CFG.h"24#include "llvm/Analysis/ConstantFolding.h"25#include "llvm/Analysis/GlobalsModRef.h"26#include "llvm/Analysis/GuardUtils.h"27#include "llvm/Analysis/InstructionSimplify.h"28#include "llvm/Analysis/LazyValueInfo.h"29#include "llvm/Analysis/Loads.h"30#include "llvm/Analysis/LoopInfo.h"31#include "llvm/Analysis/MemoryLocation.h"32#include "llvm/Analysis/PostDominators.h"33#include "llvm/Analysis/TargetLibraryInfo.h"34#include "llvm/Analysis/TargetTransformInfo.h"35#include "llvm/Analysis/ValueTracking.h"36#include "llvm/IR/BasicBlock.h"37#include "llvm/IR/CFG.h"38#include "llvm/IR/Constant.h"39#include "llvm/IR/ConstantRange.h"40#include "llvm/IR/Constants.h"41#include "llvm/IR/DataLayout.h"42#include "llvm/IR/DebugInfo.h"43#include "llvm/IR/Dominators.h"44#include "llvm/IR/Function.h"45#include "llvm/IR/InstrTypes.h"46#include "llvm/IR/Instruction.h"47#include "llvm/IR/Instructions.h"48#include "llvm/IR/IntrinsicInst.h"49#include "llvm/IR/Intrinsics.h"50#include "llvm/IR/LLVMContext.h"51#include "llvm/IR/MDBuilder.h"52#include "llvm/IR/Metadata.h"53#include "llvm/IR/Module.h"54#include "llvm/IR/PassManager.h"55#include "llvm/IR/PatternMatch.h"56#include "llvm/IR/ProfDataUtils.h"57#include "llvm/IR/Type.h"58#include "llvm/IR/Use.h"59#include "llvm/IR/Value.h"60#include "llvm/Support/BlockFrequency.h"61#include "llvm/Support/BranchProbability.h"62#include "llvm/Support/Casting.h"63#include "llvm/Support/CommandLine.h"64#include "llvm/Support/Debug.h"65#include "llvm/Support/raw_ostream.h"66#include "llvm/Transforms/Utils/BasicBlockUtils.h"67#include "llvm/Transforms/Utils/Cloning.h"68#include "llvm/Transforms/Utils/Local.h"69#include "llvm/Transforms/Utils/SSAUpdater.h"70#include "llvm/Transforms/Utils/ValueMapper.h"71#include <algorithm>72#include <cassert>73#include <cstdint>74#include <iterator>75#include <memory>76#include <utility>7778using namespace llvm;79using namespace jumpthreading;8081#define DEBUG_TYPE "jump-threading"8283STATISTIC(NumThreads, "Number of jumps threaded");84STATISTIC(NumFolds, "Number of terminators folded");85STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");8687static cl::opt<unsigned>88BBDuplicateThreshold("jump-threading-threshold",89cl::desc("Max block size to duplicate for jump threading"),90cl::init(6), cl::Hidden);9192static cl::opt<unsigned>93ImplicationSearchThreshold(94"jump-threading-implication-search-threshold",95cl::desc("The number of predecessors to search for a stronger "96"condition to use to thread over a weaker condition"),97cl::init(3), cl::Hidden);9899static cl::opt<unsigned> PhiDuplicateThreshold(100"jump-threading-phi-threshold",101cl::desc("Max PHIs in BB to duplicate for jump threading"), cl::init(76),102cl::Hidden);103104static cl::opt<bool> ThreadAcrossLoopHeaders(105"jump-threading-across-loop-headers",106cl::desc("Allow JumpThreading to thread across loop headers, for testing"),107cl::init(false), cl::Hidden);108109JumpThreadingPass::JumpThreadingPass(int T) {110DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);111}112113// Update branch probability information according to conditional114// branch probability. This is usually made possible for cloned branches115// in inline instances by the context specific profile in the caller.116// For instance,117//118// [Block PredBB]119// [Branch PredBr]120// if (t) {121// Block A;122// } else {123// Block B;124// }125//126// [Block BB]127// cond = PN([true, %A], [..., %B]); // PHI node128// [Branch CondBr]129// if (cond) {130// ... // P(cond == true) = 1%131// }132//133// Here we know that when block A is taken, cond must be true, which means134// P(cond == true | A) = 1135//136// Given that P(cond == true) = P(cond == true | A) * P(A) +137// P(cond == true | B) * P(B)138// we get:139// P(cond == true ) = P(A) + P(cond == true | B) * P(B)140//141// which gives us:142// P(A) is less than P(cond == true), i.e.143// P(t == true) <= P(cond == true)144//145// In other words, if we know P(cond == true) is unlikely, we know146// that P(t == true) is also unlikely.147//148static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {149BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());150if (!CondBr)151return;152153uint64_t TrueWeight, FalseWeight;154if (!extractBranchWeights(*CondBr, TrueWeight, FalseWeight))155return;156157if (TrueWeight + FalseWeight == 0)158// Zero branch_weights do not give a hint for getting branch probabilities.159// Technically it would result in division by zero denominator, which is160// TrueWeight + FalseWeight.161return;162163// Returns the outgoing edge of the dominating predecessor block164// that leads to the PhiNode's incoming block:165auto GetPredOutEdge =166[](BasicBlock *IncomingBB,167BasicBlock *PhiBB) -> std::pair<BasicBlock *, BasicBlock *> {168auto *PredBB = IncomingBB;169auto *SuccBB = PhiBB;170SmallPtrSet<BasicBlock *, 16> Visited;171while (true) {172BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());173if (PredBr && PredBr->isConditional())174return {PredBB, SuccBB};175Visited.insert(PredBB);176auto *SinglePredBB = PredBB->getSinglePredecessor();177if (!SinglePredBB)178return {nullptr, nullptr};179180// Stop searching when SinglePredBB has been visited. It means we see181// an unreachable loop.182if (Visited.count(SinglePredBB))183return {nullptr, nullptr};184185SuccBB = PredBB;186PredBB = SinglePredBB;187}188};189190for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {191Value *PhiOpnd = PN->getIncomingValue(i);192ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);193194if (!CI || !CI->getType()->isIntegerTy(1))195continue;196197BranchProbability BP =198(CI->isOne() ? BranchProbability::getBranchProbability(199TrueWeight, TrueWeight + FalseWeight)200: BranchProbability::getBranchProbability(201FalseWeight, TrueWeight + FalseWeight));202203auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);204if (!PredOutEdge.first)205return;206207BasicBlock *PredBB = PredOutEdge.first;208BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());209if (!PredBr)210return;211212uint64_t PredTrueWeight, PredFalseWeight;213// FIXME: We currently only set the profile data when it is missing.214// With PGO, this can be used to refine even existing profile data with215// context information. This needs to be done after more performance216// testing.217if (extractBranchWeights(*PredBr, PredTrueWeight, PredFalseWeight))218continue;219220// We can not infer anything useful when BP >= 50%, because BP is the221// upper bound probability value.222if (BP >= BranchProbability(50, 100))223continue;224225uint32_t Weights[2];226if (PredBr->getSuccessor(0) == PredOutEdge.second) {227Weights[0] = BP.getNumerator();228Weights[1] = BP.getCompl().getNumerator();229} else {230Weights[0] = BP.getCompl().getNumerator();231Weights[1] = BP.getNumerator();232}233setBranchWeights(*PredBr, Weights, hasBranchWeightOrigin(*PredBr));234}235}236237PreservedAnalyses JumpThreadingPass::run(Function &F,238FunctionAnalysisManager &AM) {239auto &TTI = AM.getResult<TargetIRAnalysis>(F);240// Jump Threading has no sense for the targets with divergent CF241if (TTI.hasBranchDivergence(&F))242return PreservedAnalyses::all();243auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);244auto &LVI = AM.getResult<LazyValueAnalysis>(F);245auto &AA = AM.getResult<AAManager>(F);246auto &DT = AM.getResult<DominatorTreeAnalysis>(F);247248bool Changed =249runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,250std::make_unique<DomTreeUpdater>(251&DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),252std::nullopt, std::nullopt);253254if (!Changed)255return PreservedAnalyses::all();256257258getDomTreeUpdater()->flush();259260#if defined(EXPENSIVE_CHECKS)261assert(getDomTreeUpdater()->getDomTree().verify(262DominatorTree::VerificationLevel::Full) &&263"DT broken after JumpThreading");264assert((!getDomTreeUpdater()->hasPostDomTree() ||265getDomTreeUpdater()->getPostDomTree().verify(266PostDominatorTree::VerificationLevel::Full)) &&267"PDT broken after JumpThreading");268#else269assert(getDomTreeUpdater()->getDomTree().verify(270DominatorTree::VerificationLevel::Fast) &&271"DT broken after JumpThreading");272assert((!getDomTreeUpdater()->hasPostDomTree() ||273getDomTreeUpdater()->getPostDomTree().verify(274PostDominatorTree::VerificationLevel::Fast)) &&275"PDT broken after JumpThreading");276#endif277278return getPreservedAnalysis();279}280281bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_,282TargetLibraryInfo *TLI_,283TargetTransformInfo *TTI_, LazyValueInfo *LVI_,284AliasAnalysis *AA_,285std::unique_ptr<DomTreeUpdater> DTU_,286std::optional<BlockFrequencyInfo *> BFI_,287std::optional<BranchProbabilityInfo *> BPI_) {288LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");289F = &F_;290FAM = FAM_;291TLI = TLI_;292TTI = TTI_;293LVI = LVI_;294AA = AA_;295DTU = std::move(DTU_);296BFI = BFI_;297BPI = BPI_;298auto *GuardDecl = F->getParent()->getFunction(299Intrinsic::getName(Intrinsic::experimental_guard));300HasGuards = GuardDecl && !GuardDecl->use_empty();301302// Reduce the number of instructions duplicated when optimizing strictly for303// size.304if (BBDuplicateThreshold.getNumOccurrences())305BBDupThreshold = BBDuplicateThreshold;306else if (F->hasFnAttribute(Attribute::MinSize))307BBDupThreshold = 3;308else309BBDupThreshold = DefaultBBDupThreshold;310311// JumpThreading must not processes blocks unreachable from entry. It's a312// waste of compute time and can potentially lead to hangs.313SmallPtrSet<BasicBlock *, 16> Unreachable;314assert(DTU && "DTU isn't passed into JumpThreading before using it.");315assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");316DominatorTree &DT = DTU->getDomTree();317for (auto &BB : *F)318if (!DT.isReachableFromEntry(&BB))319Unreachable.insert(&BB);320321if (!ThreadAcrossLoopHeaders)322findLoopHeaders(*F);323324bool EverChanged = false;325bool Changed;326do {327Changed = false;328for (auto &BB : *F) {329if (Unreachable.count(&BB))330continue;331while (processBlock(&BB)) // Thread all of the branches we can over BB.332Changed = ChangedSinceLastAnalysisUpdate = true;333334// Jump threading may have introduced redundant debug values into BB335// which should be removed.336if (Changed)337RemoveRedundantDbgInstrs(&BB);338339// Stop processing BB if it's the entry or is now deleted. The following340// routines attempt to eliminate BB and locating a suitable replacement341// for the entry is non-trivial.342if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))343continue;344345if (pred_empty(&BB)) {346// When processBlock makes BB unreachable it doesn't bother to fix up347// the instructions in it. We must remove BB to prevent invalid IR.348LLVM_DEBUG(dbgs() << " JT: Deleting dead block '" << BB.getName()349<< "' with terminator: " << *BB.getTerminator()350<< '\n');351LoopHeaders.erase(&BB);352LVI->eraseBlock(&BB);353DeleteDeadBlock(&BB, DTU.get());354Changed = ChangedSinceLastAnalysisUpdate = true;355continue;356}357358// processBlock doesn't thread BBs with unconditional TIs. However, if BB359// is "almost empty", we attempt to merge BB with its sole successor.360auto *BI = dyn_cast<BranchInst>(BB.getTerminator());361if (BI && BI->isUnconditional()) {362BasicBlock *Succ = BI->getSuccessor(0);363if (364// The terminator must be the only non-phi instruction in BB.365BB.getFirstNonPHIOrDbg(true)->isTerminator() &&366// Don't alter Loop headers and latches to ensure another pass can367// detect and transform nested loops later.368!LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&369TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU.get())) {370RemoveRedundantDbgInstrs(Succ);371// BB is valid for cleanup here because we passed in DTU. F remains372// BB's parent until a DTU->getDomTree() event.373LVI->eraseBlock(&BB);374Changed = ChangedSinceLastAnalysisUpdate = true;375}376}377}378EverChanged |= Changed;379} while (Changed);380381LoopHeaders.clear();382return EverChanged;383}384385// Replace uses of Cond with ToVal when safe to do so. If all uses are386// replaced, we can remove Cond. We cannot blindly replace all uses of Cond387// because we may incorrectly replace uses when guards/assumes are uses of388// of `Cond` and we used the guards/assume to reason about the `Cond` value389// at the end of block. RAUW unconditionally replaces all uses390// including the guards/assumes themselves and the uses before the391// guard/assume.392static bool replaceFoldableUses(Instruction *Cond, Value *ToVal,393BasicBlock *KnownAtEndOfBB) {394bool Changed = false;395assert(Cond->getType() == ToVal->getType());396// We can unconditionally replace all uses in non-local blocks (i.e. uses397// strictly dominated by BB), since LVI information is true from the398// terminator of BB.399if (Cond->getParent() == KnownAtEndOfBB)400Changed |= replaceNonLocalUsesWith(Cond, ToVal);401for (Instruction &I : reverse(*KnownAtEndOfBB)) {402// Replace any debug-info record users of Cond with ToVal.403for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))404DVR.replaceVariableLocationOp(Cond, ToVal, true);405406// Reached the Cond whose uses we are trying to replace, so there are no407// more uses.408if (&I == Cond)409break;410// We only replace uses in instructions that are guaranteed to reach the end411// of BB, where we know Cond is ToVal.412if (!isGuaranteedToTransferExecutionToSuccessor(&I))413break;414Changed |= I.replaceUsesOfWith(Cond, ToVal);415}416if (Cond->use_empty() && !Cond->mayHaveSideEffects()) {417Cond->eraseFromParent();418Changed = true;419}420return Changed;421}422423/// Return the cost of duplicating a piece of this block from first non-phi424/// and before StopAt instruction to thread across it. Stop scanning the block425/// when exceeding the threshold. If duplication is impossible, returns ~0U.426static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,427BasicBlock *BB,428Instruction *StopAt,429unsigned Threshold) {430assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");431432// Do not duplicate the BB if it has a lot of PHI nodes.433// If a threadable chain is too long then the number of PHI nodes can add up,434// leading to a substantial increase in compile time when rewriting the SSA.435unsigned PhiCount = 0;436Instruction *FirstNonPHI = nullptr;437for (Instruction &I : *BB) {438if (!isa<PHINode>(&I)) {439FirstNonPHI = &I;440break;441}442if (++PhiCount > PhiDuplicateThreshold)443return ~0U;444}445446/// Ignore PHI nodes, these will be flattened when duplication happens.447BasicBlock::const_iterator I(FirstNonPHI);448449// FIXME: THREADING will delete values that are just used to compute the450// branch, so they shouldn't count against the duplication cost.451452unsigned Bonus = 0;453if (BB->getTerminator() == StopAt) {454// Threading through a switch statement is particularly profitable. If this455// block ends in a switch, decrease its cost to make it more likely to456// happen.457if (isa<SwitchInst>(StopAt))458Bonus = 6;459460// The same holds for indirect branches, but slightly more so.461if (isa<IndirectBrInst>(StopAt))462Bonus = 8;463}464465// Bump the threshold up so the early exit from the loop doesn't skip the466// terminator-based Size adjustment at the end.467Threshold += Bonus;468469// Sum up the cost of each instruction until we get to the terminator. Don't470// include the terminator because the copy won't include it.471unsigned Size = 0;472for (; &*I != StopAt; ++I) {473474// Stop scanning the block if we've reached the threshold.475if (Size > Threshold)476return Size;477478// Bail out if this instruction gives back a token type, it is not possible479// to duplicate it if it is used outside this BB.480if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))481return ~0U;482483// Blocks with NoDuplicate are modelled as having infinite cost, so they484// are never duplicated.485if (const CallInst *CI = dyn_cast<CallInst>(I))486if (CI->cannotDuplicate() || CI->isConvergent())487return ~0U;488489if (TTI->getInstructionCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) ==490TargetTransformInfo::TCC_Free)491continue;492493// All other instructions count for at least one unit.494++Size;495496// Calls are more expensive. If they are non-intrinsic calls, we model them497// as having cost of 4. If they are a non-vector intrinsic, we model them498// as having cost of 2 total, and if they are a vector intrinsic, we model499// them as having cost 1.500if (const CallInst *CI = dyn_cast<CallInst>(I)) {501if (!isa<IntrinsicInst>(CI))502Size += 3;503else if (!CI->getType()->isVectorTy())504Size += 1;505}506}507508return Size > Bonus ? Size - Bonus : 0;509}510511/// findLoopHeaders - We do not want jump threading to turn proper loop512/// structures into irreducible loops. Doing this breaks up the loop nesting513/// hierarchy and pessimizes later transformations. To prevent this from514/// happening, we first have to find the loop headers. Here we approximate this515/// by finding targets of backedges in the CFG.516///517/// Note that there definitely are cases when we want to allow threading of518/// edges across a loop header. For example, threading a jump from outside the519/// loop (the preheader) to an exit block of the loop is definitely profitable.520/// It is also almost always profitable to thread backedges from within the loop521/// to exit blocks, and is often profitable to thread backedges to other blocks522/// within the loop (forming a nested loop). This simple analysis is not rich523/// enough to track all of these properties and keep it up-to-date as the CFG524/// mutates, so we don't allow any of these transformations.525void JumpThreadingPass::findLoopHeaders(Function &F) {526SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;527FindFunctionBackedges(F, Edges);528529for (const auto &Edge : Edges)530LoopHeaders.insert(Edge.second);531}532533/// getKnownConstant - Helper method to determine if we can thread over a534/// terminator with the given value as its condition, and if so what value to535/// use for that. What kind of value this is depends on whether we want an536/// integer or a block address, but an undef is always accepted.537/// Returns null if Val is null or not an appropriate constant.538static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {539if (!Val)540return nullptr;541542// Undef is "known" enough.543if (UndefValue *U = dyn_cast<UndefValue>(Val))544return U;545546if (Preference == WantBlockAddress)547return dyn_cast<BlockAddress>(Val->stripPointerCasts());548549return dyn_cast<ConstantInt>(Val);550}551552/// computeValueKnownInPredecessors - Given a basic block BB and a value V, see553/// if we can infer that the value is a known ConstantInt/BlockAddress or undef554/// in any of our predecessors. If so, return the known list of value and pred555/// BB in the result vector.556///557/// This returns true if there were any known values.558bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(559Value *V, BasicBlock *BB, PredValueInfo &Result,560ConstantPreference Preference, SmallPtrSet<Value *, 4> &RecursionSet,561Instruction *CxtI) {562const DataLayout &DL = BB->getDataLayout();563564// This method walks up use-def chains recursively. Because of this, we could565// get into an infinite loop going around loops in the use-def chain. To566// prevent this, keep track of what (value, block) pairs we've already visited567// and terminate the search if we loop back to them568if (!RecursionSet.insert(V).second)569return false;570571// If V is a constant, then it is known in all predecessors.572if (Constant *KC = getKnownConstant(V, Preference)) {573for (BasicBlock *Pred : predecessors(BB))574Result.emplace_back(KC, Pred);575576return !Result.empty();577}578579// If V is a non-instruction value, or an instruction in a different block,580// then it can't be derived from a PHI.581Instruction *I = dyn_cast<Instruction>(V);582if (!I || I->getParent() != BB) {583584// Okay, if this is a live-in value, see if it has a known value at the any585// edge from our predecessors.586for (BasicBlock *P : predecessors(BB)) {587using namespace PatternMatch;588// If the value is known by LazyValueInfo to be a constant in a589// predecessor, use that information to try to thread this block.590Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);591// If I is a non-local compare-with-constant instruction, use more-rich592// 'getPredicateOnEdge' method. This would be able to handle value593// inequalities better, for example if the compare is "X < 4" and "X < 3"594// is known true but "X < 4" itself is not available.595CmpInst::Predicate Pred;596Value *Val;597Constant *Cst;598if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst))))599PredCst = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);600if (Constant *KC = getKnownConstant(PredCst, Preference))601Result.emplace_back(KC, P);602}603604return !Result.empty();605}606607/// If I is a PHI node, then we know the incoming values for any constants.608if (PHINode *PN = dyn_cast<PHINode>(I)) {609for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {610Value *InVal = PN->getIncomingValue(i);611if (Constant *KC = getKnownConstant(InVal, Preference)) {612Result.emplace_back(KC, PN->getIncomingBlock(i));613} else {614Constant *CI = LVI->getConstantOnEdge(InVal,615PN->getIncomingBlock(i),616BB, CxtI);617if (Constant *KC = getKnownConstant(CI, Preference))618Result.emplace_back(KC, PN->getIncomingBlock(i));619}620}621622return !Result.empty();623}624625// Handle Cast instructions.626if (CastInst *CI = dyn_cast<CastInst>(I)) {627Value *Source = CI->getOperand(0);628PredValueInfoTy Vals;629computeValueKnownInPredecessorsImpl(Source, BB, Vals, Preference,630RecursionSet, CxtI);631if (Vals.empty())632return false;633634// Convert the known values.635for (auto &Val : Vals)636if (Constant *Folded = ConstantFoldCastOperand(CI->getOpcode(), Val.first,637CI->getType(), DL))638Result.emplace_back(Folded, Val.second);639640return !Result.empty();641}642643if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {644Value *Source = FI->getOperand(0);645computeValueKnownInPredecessorsImpl(Source, BB, Result, Preference,646RecursionSet, CxtI);647648erase_if(Result, [](auto &Pair) {649return !isGuaranteedNotToBeUndefOrPoison(Pair.first);650});651652return !Result.empty();653}654655// Handle some boolean conditions.656if (I->getType()->getPrimitiveSizeInBits() == 1) {657using namespace PatternMatch;658if (Preference != WantInteger)659return false;660// X | true -> true661// X & false -> false662Value *Op0, *Op1;663if (match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) ||664match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {665PredValueInfoTy LHSVals, RHSVals;666667computeValueKnownInPredecessorsImpl(Op0, BB, LHSVals, WantInteger,668RecursionSet, CxtI);669computeValueKnownInPredecessorsImpl(Op1, BB, RHSVals, WantInteger,670RecursionSet, CxtI);671672if (LHSVals.empty() && RHSVals.empty())673return false;674675ConstantInt *InterestingVal;676if (match(I, m_LogicalOr()))677InterestingVal = ConstantInt::getTrue(I->getContext());678else679InterestingVal = ConstantInt::getFalse(I->getContext());680681SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;682683// Scan for the sentinel. If we find an undef, force it to the684// interesting value: x|undef -> true and x&undef -> false.685for (const auto &LHSVal : LHSVals)686if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {687Result.emplace_back(InterestingVal, LHSVal.second);688LHSKnownBBs.insert(LHSVal.second);689}690for (const auto &RHSVal : RHSVals)691if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {692// If we already inferred a value for this block on the LHS, don't693// re-add it.694if (!LHSKnownBBs.count(RHSVal.second))695Result.emplace_back(InterestingVal, RHSVal.second);696}697698return !Result.empty();699}700701// Handle the NOT form of XOR.702if (I->getOpcode() == Instruction::Xor &&703isa<ConstantInt>(I->getOperand(1)) &&704cast<ConstantInt>(I->getOperand(1))->isOne()) {705computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, Result,706WantInteger, RecursionSet, CxtI);707if (Result.empty())708return false;709710// Invert the known values.711for (auto &R : Result)712R.first = ConstantExpr::getNot(R.first);713714return true;715}716717// Try to simplify some other binary operator values.718} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {719if (Preference != WantInteger)720return false;721if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {722PredValueInfoTy LHSVals;723computeValueKnownInPredecessorsImpl(BO->getOperand(0), BB, LHSVals,724WantInteger, RecursionSet, CxtI);725726// Try to use constant folding to simplify the binary operator.727for (const auto &LHSVal : LHSVals) {728Constant *V = LHSVal.first;729Constant *Folded =730ConstantFoldBinaryOpOperands(BO->getOpcode(), V, CI, DL);731732if (Constant *KC = getKnownConstant(Folded, WantInteger))733Result.emplace_back(KC, LHSVal.second);734}735}736737return !Result.empty();738}739740// Handle compare with phi operand, where the PHI is defined in this block.741if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {742if (Preference != WantInteger)743return false;744Type *CmpType = Cmp->getType();745Value *CmpLHS = Cmp->getOperand(0);746Value *CmpRHS = Cmp->getOperand(1);747CmpInst::Predicate Pred = Cmp->getPredicate();748749PHINode *PN = dyn_cast<PHINode>(CmpLHS);750if (!PN)751PN = dyn_cast<PHINode>(CmpRHS);752// Do not perform phi translation across a loop header phi, because this753// may result in comparison of values from two different loop iterations.754// FIXME: This check is broken if LoopHeaders is not populated.755if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {756const DataLayout &DL = PN->getDataLayout();757// We can do this simplification if any comparisons fold to true or false.758// See if any do.759for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {760BasicBlock *PredBB = PN->getIncomingBlock(i);761Value *LHS, *RHS;762if (PN == CmpLHS) {763LHS = PN->getIncomingValue(i);764RHS = CmpRHS->DoPHITranslation(BB, PredBB);765} else {766LHS = CmpLHS->DoPHITranslation(BB, PredBB);767RHS = PN->getIncomingValue(i);768}769Value *Res = simplifyCmpInst(Pred, LHS, RHS, {DL});770if (!Res) {771if (!isa<Constant>(RHS))772continue;773774// getPredicateOnEdge call will make no sense if LHS is defined in BB.775auto LHSInst = dyn_cast<Instruction>(LHS);776if (LHSInst && LHSInst->getParent() == BB)777continue;778779Res = LVI->getPredicateOnEdge(Pred, LHS, cast<Constant>(RHS), PredBB,780BB, CxtI ? CxtI : Cmp);781}782783if (Constant *KC = getKnownConstant(Res, WantInteger))784Result.emplace_back(KC, PredBB);785}786787return !Result.empty();788}789790// If comparing a live-in value against a constant, see if we know the791// live-in value on any predecessors.792if (isa<Constant>(CmpRHS) && !CmpType->isVectorTy()) {793Constant *CmpConst = cast<Constant>(CmpRHS);794795if (!isa<Instruction>(CmpLHS) ||796cast<Instruction>(CmpLHS)->getParent() != BB) {797for (BasicBlock *P : predecessors(BB)) {798// If the value is known by LazyValueInfo to be a constant in a799// predecessor, use that information to try to thread this block.800Constant *Res = LVI->getPredicateOnEdge(Pred, CmpLHS, CmpConst, P, BB,801CxtI ? CxtI : Cmp);802if (Constant *KC = getKnownConstant(Res, WantInteger))803Result.emplace_back(KC, P);804}805806return !Result.empty();807}808809// InstCombine can fold some forms of constant range checks into810// (icmp (add (x, C1)), C2). See if we have we have such a thing with811// x as a live-in.812{813using namespace PatternMatch;814815Value *AddLHS;816ConstantInt *AddConst;817if (isa<ConstantInt>(CmpConst) &&818match(CmpLHS, m_Add(m_Value(AddLHS), m_ConstantInt(AddConst)))) {819if (!isa<Instruction>(AddLHS) ||820cast<Instruction>(AddLHS)->getParent() != BB) {821for (BasicBlock *P : predecessors(BB)) {822// If the value is known by LazyValueInfo to be a ConstantRange in823// a predecessor, use that information to try to thread this824// block.825ConstantRange CR = LVI->getConstantRangeOnEdge(826AddLHS, P, BB, CxtI ? CxtI : cast<Instruction>(CmpLHS));827// Propagate the range through the addition.828CR = CR.add(AddConst->getValue());829830// Get the range where the compare returns true.831ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(832Pred, cast<ConstantInt>(CmpConst)->getValue());833834Constant *ResC;835if (CmpRange.contains(CR))836ResC = ConstantInt::getTrue(CmpType);837else if (CmpRange.inverse().contains(CR))838ResC = ConstantInt::getFalse(CmpType);839else840continue;841842Result.emplace_back(ResC, P);843}844845return !Result.empty();846}847}848}849850// Try to find a constant value for the LHS of a comparison,851// and evaluate it statically if we can.852PredValueInfoTy LHSVals;853computeValueKnownInPredecessorsImpl(I->getOperand(0), BB, LHSVals,854WantInteger, RecursionSet, CxtI);855856for (const auto &LHSVal : LHSVals) {857Constant *V = LHSVal.first;858Constant *Folded =859ConstantFoldCompareInstOperands(Pred, V, CmpConst, DL);860if (Constant *KC = getKnownConstant(Folded, WantInteger))861Result.emplace_back(KC, LHSVal.second);862}863864return !Result.empty();865}866}867868if (SelectInst *SI = dyn_cast<SelectInst>(I)) {869// Handle select instructions where at least one operand is a known constant870// and we can figure out the condition value for any predecessor block.871Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);872Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);873PredValueInfoTy Conds;874if ((TrueVal || FalseVal) &&875computeValueKnownInPredecessorsImpl(SI->getCondition(), BB, Conds,876WantInteger, RecursionSet, CxtI)) {877for (auto &C : Conds) {878Constant *Cond = C.first;879880// Figure out what value to use for the condition.881bool KnownCond;882if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {883// A known boolean.884KnownCond = CI->isOne();885} else {886assert(isa<UndefValue>(Cond) && "Unexpected condition value");887// Either operand will do, so be sure to pick the one that's a known888// constant.889// FIXME: Do this more cleverly if both values are known constants?890KnownCond = (TrueVal != nullptr);891}892893// See if the select has a known constant value for this predecessor.894if (Constant *Val = KnownCond ? TrueVal : FalseVal)895Result.emplace_back(Val, C.second);896}897898return !Result.empty();899}900}901902// If all else fails, see if LVI can figure out a constant value for us.903assert(CxtI->getParent() == BB && "CxtI should be in BB");904Constant *CI = LVI->getConstant(V, CxtI);905if (Constant *KC = getKnownConstant(CI, Preference)) {906for (BasicBlock *Pred : predecessors(BB))907Result.emplace_back(KC, Pred);908}909910return !Result.empty();911}912913/// GetBestDestForBranchOnUndef - If we determine that the specified block ends914/// in an undefined jump, decide which block is best to revector to.915///916/// Since we can pick an arbitrary destination, we pick the successor with the917/// fewest predecessors. This should reduce the in-degree of the others.918static unsigned getBestDestForJumpOnUndef(BasicBlock *BB) {919Instruction *BBTerm = BB->getTerminator();920unsigned MinSucc = 0;921BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);922// Compute the successor with the minimum number of predecessors.923unsigned MinNumPreds = pred_size(TestBB);924for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {925TestBB = BBTerm->getSuccessor(i);926unsigned NumPreds = pred_size(TestBB);927if (NumPreds < MinNumPreds) {928MinSucc = i;929MinNumPreds = NumPreds;930}931}932933return MinSucc;934}935936static bool hasAddressTakenAndUsed(BasicBlock *BB) {937if (!BB->hasAddressTaken()) return false;938939// If the block has its address taken, it may be a tree of dead constants940// hanging off of it. These shouldn't keep the block alive.941BlockAddress *BA = BlockAddress::get(BB);942BA->removeDeadConstantUsers();943return !BA->use_empty();944}945946/// processBlock - If there are any predecessors whose control can be threaded947/// through to a successor, transform them now.948bool JumpThreadingPass::processBlock(BasicBlock *BB) {949// If the block is trivially dead, just return and let the caller nuke it.950// This simplifies other transformations.951if (DTU->isBBPendingDeletion(BB) ||952(pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()))953return false;954955// If this block has a single predecessor, and if that pred has a single956// successor, merge the blocks. This encourages recursive jump threading957// because now the condition in this block can be threaded through958// predecessors of our predecessor block.959if (maybeMergeBasicBlockIntoOnlyPred(BB))960return true;961962if (tryToUnfoldSelectInCurrBB(BB))963return true;964965// Look if we can propagate guards to predecessors.966if (HasGuards && processGuards(BB))967return true;968969// What kind of constant we're looking for.970ConstantPreference Preference = WantInteger;971972// Look to see if the terminator is a conditional branch, switch or indirect973// branch, if not we can't thread it.974Value *Condition;975Instruction *Terminator = BB->getTerminator();976if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {977// Can't thread an unconditional jump.978if (BI->isUnconditional()) return false;979Condition = BI->getCondition();980} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {981Condition = SI->getCondition();982} else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {983// Can't thread indirect branch with no successors.984if (IB->getNumSuccessors() == 0) return false;985Condition = IB->getAddress()->stripPointerCasts();986Preference = WantBlockAddress;987} else {988return false; // Must be an invoke or callbr.989}990991// Keep track if we constant folded the condition in this invocation.992bool ConstantFolded = false;993994// Run constant folding to see if we can reduce the condition to a simple995// constant.996if (Instruction *I = dyn_cast<Instruction>(Condition)) {997Value *SimpleVal =998ConstantFoldInstruction(I, BB->getDataLayout(), TLI);999if (SimpleVal) {1000I->replaceAllUsesWith(SimpleVal);1001if (isInstructionTriviallyDead(I, TLI))1002I->eraseFromParent();1003Condition = SimpleVal;1004ConstantFolded = true;1005}1006}10071008// If the terminator is branching on an undef or freeze undef, we can pick any1009// of the successors to branch to. Let getBestDestForJumpOnUndef decide.1010auto *FI = dyn_cast<FreezeInst>(Condition);1011if (isa<UndefValue>(Condition) ||1012(FI && isa<UndefValue>(FI->getOperand(0)) && FI->hasOneUse())) {1013unsigned BestSucc = getBestDestForJumpOnUndef(BB);1014std::vector<DominatorTree::UpdateType> Updates;10151016// Fold the branch/switch.1017Instruction *BBTerm = BB->getTerminator();1018Updates.reserve(BBTerm->getNumSuccessors());1019for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {1020if (i == BestSucc) continue;1021BasicBlock *Succ = BBTerm->getSuccessor(i);1022Succ->removePredecessor(BB, true);1023Updates.push_back({DominatorTree::Delete, BB, Succ});1024}10251026LLVM_DEBUG(dbgs() << " In block '" << BB->getName()1027<< "' folding undef terminator: " << *BBTerm << '\n');1028Instruction *NewBI = BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm->getIterator());1029NewBI->setDebugLoc(BBTerm->getDebugLoc());1030++NumFolds;1031BBTerm->eraseFromParent();1032DTU->applyUpdatesPermissive(Updates);1033if (FI)1034FI->eraseFromParent();1035return true;1036}10371038// If the terminator of this block is branching on a constant, simplify the1039// terminator to an unconditional branch. This can occur due to threading in1040// other blocks.1041if (getKnownConstant(Condition, Preference)) {1042LLVM_DEBUG(dbgs() << " In block '" << BB->getName()1043<< "' folding terminator: " << *BB->getTerminator()1044<< '\n');1045++NumFolds;1046ConstantFoldTerminator(BB, true, nullptr, DTU.get());1047if (auto *BPI = getBPI())1048BPI->eraseBlock(BB);1049return true;1050}10511052Instruction *CondInst = dyn_cast<Instruction>(Condition);10531054// All the rest of our checks depend on the condition being an instruction.1055if (!CondInst) {1056// FIXME: Unify this with code below.1057if (processThreadableEdges(Condition, BB, Preference, Terminator))1058return true;1059return ConstantFolded;1060}10611062// Some of the following optimization can safely work on the unfrozen cond.1063Value *CondWithoutFreeze = CondInst;1064if (auto *FI = dyn_cast<FreezeInst>(CondInst))1065CondWithoutFreeze = FI->getOperand(0);10661067if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondWithoutFreeze)) {1068// If we're branching on a conditional, LVI might be able to determine1069// it's value at the branch instruction. We only handle comparisons1070// against a constant at this time.1071if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {1072Constant *Res =1073LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),1074CondConst, BB->getTerminator(),1075/*UseBlockValue=*/false);1076if (Res) {1077// We can safely replace *some* uses of the CondInst if it has1078// exactly one value as returned by LVI. RAUW is incorrect in the1079// presence of guards and assumes, that have the `Cond` as the use. This1080// is because we use the guards/assume to reason about the `Cond` value1081// at the end of block, but RAUW unconditionally replaces all uses1082// including the guards/assumes themselves and the uses before the1083// guard/assume.1084if (replaceFoldableUses(CondCmp, Res, BB))1085return true;1086}10871088// We did not manage to simplify this branch, try to see whether1089// CondCmp depends on a known phi-select pattern.1090if (tryToUnfoldSelect(CondCmp, BB))1091return true;1092}1093}10941095if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))1096if (tryToUnfoldSelect(SI, BB))1097return true;10981099// Check for some cases that are worth simplifying. Right now we want to look1100// for loads that are used by a switch or by the condition for the branch. If1101// we see one, check to see if it's partially redundant. If so, insert a PHI1102// which can then be used to thread the values.1103Value *SimplifyValue = CondWithoutFreeze;11041105if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))1106if (isa<Constant>(CondCmp->getOperand(1)))1107SimplifyValue = CondCmp->getOperand(0);11081109// TODO: There are other places where load PRE would be profitable, such as1110// more complex comparisons.1111if (LoadInst *LoadI = dyn_cast<LoadInst>(SimplifyValue))1112if (simplifyPartiallyRedundantLoad(LoadI))1113return true;11141115// Before threading, try to propagate profile data backwards:1116if (PHINode *PN = dyn_cast<PHINode>(CondInst))1117if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))1118updatePredecessorProfileMetadata(PN, BB);11191120// Handle a variety of cases where we are branching on something derived from1121// a PHI node in the current block. If we can prove that any predecessors1122// compute a predictable value based on a PHI node, thread those predecessors.1123if (processThreadableEdges(CondInst, BB, Preference, Terminator))1124return true;11251126// If this is an otherwise-unfoldable branch on a phi node or freeze(phi) in1127// the current block, see if we can simplify.1128PHINode *PN = dyn_cast<PHINode>(CondWithoutFreeze);1129if (PN && PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))1130return processBranchOnPHI(PN);11311132// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.1133if (CondInst->getOpcode() == Instruction::Xor &&1134CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))1135return processBranchOnXOR(cast<BinaryOperator>(CondInst));11361137// Search for a stronger dominating condition that can be used to simplify a1138// conditional branch leaving BB.1139if (processImpliedCondition(BB))1140return true;11411142return false;1143}11441145bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {1146auto *BI = dyn_cast<BranchInst>(BB->getTerminator());1147if (!BI || !BI->isConditional())1148return false;11491150Value *Cond = BI->getCondition();1151// Assuming that predecessor's branch was taken, if pred's branch condition1152// (V) implies Cond, Cond can be either true, undef, or poison. In this case,1153// freeze(Cond) is either true or a nondeterministic value.1154// If freeze(Cond) has only one use, we can freely fold freeze(Cond) to true1155// without affecting other instructions.1156auto *FICond = dyn_cast<FreezeInst>(Cond);1157if (FICond && FICond->hasOneUse())1158Cond = FICond->getOperand(0);1159else1160FICond = nullptr;11611162BasicBlock *CurrentBB = BB;1163BasicBlock *CurrentPred = BB->getSinglePredecessor();1164unsigned Iter = 0;11651166auto &DL = BB->getDataLayout();11671168while (CurrentPred && Iter++ < ImplicationSearchThreshold) {1169auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());1170if (!PBI || !PBI->isConditional())1171return false;1172if (PBI->getSuccessor(0) != CurrentBB && PBI->getSuccessor(1) != CurrentBB)1173return false;11741175bool CondIsTrue = PBI->getSuccessor(0) == CurrentBB;1176std::optional<bool> Implication =1177isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue);11781179// If the branch condition of BB (which is Cond) and CurrentPred are1180// exactly the same freeze instruction, Cond can be folded into CondIsTrue.1181if (!Implication && FICond && isa<FreezeInst>(PBI->getCondition())) {1182if (cast<FreezeInst>(PBI->getCondition())->getOperand(0) ==1183FICond->getOperand(0))1184Implication = CondIsTrue;1185}11861187if (Implication) {1188BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);1189BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);1190RemoveSucc->removePredecessor(BB);1191BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI->getIterator());1192UncondBI->setDebugLoc(BI->getDebugLoc());1193++NumFolds;1194BI->eraseFromParent();1195if (FICond)1196FICond->eraseFromParent();11971198DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});1199if (auto *BPI = getBPI())1200BPI->eraseBlock(BB);1201return true;1202}1203CurrentBB = CurrentPred;1204CurrentPred = CurrentBB->getSinglePredecessor();1205}12061207return false;1208}12091210/// Return true if Op is an instruction defined in the given block.1211static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) {1212if (Instruction *OpInst = dyn_cast<Instruction>(Op))1213if (OpInst->getParent() == BB)1214return true;1215return false;1216}12171218/// simplifyPartiallyRedundantLoad - If LoadI is an obviously partially1219/// redundant load instruction, eliminate it by replacing it with a PHI node.1220/// This is an important optimization that encourages jump threading, and needs1221/// to be run interlaced with other jump threading tasks.1222bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {1223// Don't hack volatile and ordered loads.1224if (!LoadI->isUnordered()) return false;12251226// If the load is defined in a block with exactly one predecessor, it can't be1227// partially redundant.1228BasicBlock *LoadBB = LoadI->getParent();1229if (LoadBB->getSinglePredecessor())1230return false;12311232// If the load is defined in an EH pad, it can't be partially redundant,1233// because the edges between the invoke and the EH pad cannot have other1234// instructions between them.1235if (LoadBB->isEHPad())1236return false;12371238Value *LoadedPtr = LoadI->getOperand(0);12391240// If the loaded operand is defined in the LoadBB and its not a phi,1241// it can't be available in predecessors.1242if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))1243return false;12441245// Scan a few instructions up from the load, to see if it is obviously live at1246// the entry to its block.1247BasicBlock::iterator BBIt(LoadI);1248bool IsLoadCSE;1249BatchAAResults BatchAA(*AA);1250// The dominator tree is updated lazily and may not be valid at this point.1251BatchAA.disableDominatorTree();1252if (Value *AvailableVal = FindAvailableLoadedValue(1253LoadI, LoadBB, BBIt, DefMaxInstsToScan, &BatchAA, &IsLoadCSE)) {1254// If the value of the load is locally available within the block, just use1255// it. This frequently occurs for reg2mem'd allocas.12561257if (IsLoadCSE) {1258LoadInst *NLoadI = cast<LoadInst>(AvailableVal);1259combineMetadataForCSE(NLoadI, LoadI, false);1260LVI->forgetValue(NLoadI);1261};12621263// If the returned value is the load itself, replace with poison. This can1264// only happen in dead loops.1265if (AvailableVal == LoadI)1266AvailableVal = PoisonValue::get(LoadI->getType());1267if (AvailableVal->getType() != LoadI->getType()) {1268AvailableVal = CastInst::CreateBitOrPointerCast(1269AvailableVal, LoadI->getType(), "", LoadI->getIterator());1270cast<Instruction>(AvailableVal)->setDebugLoc(LoadI->getDebugLoc());1271}1272LoadI->replaceAllUsesWith(AvailableVal);1273LoadI->eraseFromParent();1274return true;1275}12761277// Otherwise, if we scanned the whole block and got to the top of the block,1278// we know the block is locally transparent to the load. If not, something1279// might clobber its value.1280if (BBIt != LoadBB->begin())1281return false;12821283// If all of the loads and stores that feed the value have the same AA tags,1284// then we can propagate them onto any newly inserted loads.1285AAMDNodes AATags = LoadI->getAAMetadata();12861287SmallPtrSet<BasicBlock*, 8> PredsScanned;12881289using AvailablePredsTy = SmallVector<std::pair<BasicBlock *, Value *>, 8>;12901291AvailablePredsTy AvailablePreds;1292BasicBlock *OneUnavailablePred = nullptr;1293SmallVector<LoadInst*, 8> CSELoads;12941295// If we got here, the loaded value is transparent through to the start of the1296// block. Check to see if it is available in any of the predecessor blocks.1297for (BasicBlock *PredBB : predecessors(LoadBB)) {1298// If we already scanned this predecessor, skip it.1299if (!PredsScanned.insert(PredBB).second)1300continue;13011302BBIt = PredBB->end();1303unsigned NumScanedInst = 0;1304Value *PredAvailable = nullptr;1305// NOTE: We don't CSE load that is volatile or anything stronger than1306// unordered, that should have been checked when we entered the function.1307assert(LoadI->isUnordered() &&1308"Attempting to CSE volatile or atomic loads");1309// If this is a load on a phi pointer, phi-translate it and search1310// for available load/store to the pointer in predecessors.1311Type *AccessTy = LoadI->getType();1312const auto &DL = LoadI->getDataLayout();1313MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),1314LocationSize::precise(DL.getTypeStoreSize(AccessTy)),1315AATags);1316PredAvailable = findAvailablePtrLoadStore(1317Loc, AccessTy, LoadI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,1318&BatchAA, &IsLoadCSE, &NumScanedInst);13191320// If PredBB has a single predecessor, continue scanning through the1321// single predecessor.1322BasicBlock *SinglePredBB = PredBB;1323while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&1324NumScanedInst < DefMaxInstsToScan) {1325SinglePredBB = SinglePredBB->getSinglePredecessor();1326if (SinglePredBB) {1327BBIt = SinglePredBB->end();1328PredAvailable = findAvailablePtrLoadStore(1329Loc, AccessTy, LoadI->isAtomic(), SinglePredBB, BBIt,1330(DefMaxInstsToScan - NumScanedInst), &BatchAA, &IsLoadCSE,1331&NumScanedInst);1332}1333}13341335if (!PredAvailable) {1336OneUnavailablePred = PredBB;1337continue;1338}13391340if (IsLoadCSE)1341CSELoads.push_back(cast<LoadInst>(PredAvailable));13421343// If so, this load is partially redundant. Remember this info so that we1344// can create a PHI node.1345AvailablePreds.emplace_back(PredBB, PredAvailable);1346}13471348// If the loaded value isn't available in any predecessor, it isn't partially1349// redundant.1350if (AvailablePreds.empty()) return false;13511352// Okay, the loaded value is available in at least one (and maybe all!)1353// predecessors. If the value is unavailable in more than one unique1354// predecessor, we want to insert a merge block for those common predecessors.1355// This ensures that we only have to insert one reload, thus not increasing1356// code size.1357BasicBlock *UnavailablePred = nullptr;13581359// If the value is unavailable in one of predecessors, we will end up1360// inserting a new instruction into them. It is only valid if all the1361// instructions before LoadI are guaranteed to pass execution to its1362// successor, or if LoadI is safe to speculate.1363// TODO: If this logic becomes more complex, and we will perform PRE insertion1364// farther than to a predecessor, we need to reuse the code from GVN's PRE.1365// It requires domination tree analysis, so for this simple case it is an1366// overkill.1367if (PredsScanned.size() != AvailablePreds.size() &&1368!isSafeToSpeculativelyExecute(LoadI))1369for (auto I = LoadBB->begin(); &*I != LoadI; ++I)1370if (!isGuaranteedToTransferExecutionToSuccessor(&*I))1371return false;13721373// If there is exactly one predecessor where the value is unavailable, the1374// already computed 'OneUnavailablePred' block is it. If it ends in an1375// unconditional branch, we know that it isn't a critical edge.1376if (PredsScanned.size() == AvailablePreds.size()+1 &&1377OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {1378UnavailablePred = OneUnavailablePred;1379} else if (PredsScanned.size() != AvailablePreds.size()) {1380// Otherwise, we had multiple unavailable predecessors or we had a critical1381// edge from the one.1382SmallVector<BasicBlock*, 8> PredsToSplit;1383SmallPtrSet<BasicBlock*, 8> AvailablePredSet;13841385for (const auto &AvailablePred : AvailablePreds)1386AvailablePredSet.insert(AvailablePred.first);13871388// Add all the unavailable predecessors to the PredsToSplit list.1389for (BasicBlock *P : predecessors(LoadBB)) {1390// If the predecessor is an indirect goto, we can't split the edge.1391if (isa<IndirectBrInst>(P->getTerminator()))1392return false;13931394if (!AvailablePredSet.count(P))1395PredsToSplit.push_back(P);1396}13971398// Split them out to their own block.1399UnavailablePred = splitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");1400}14011402// If the value isn't available in all predecessors, then there will be1403// exactly one where it isn't available. Insert a load on that edge and add1404// it to the AvailablePreds list.1405if (UnavailablePred) {1406assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&1407"Can't handle critical edge here!");1408LoadInst *NewVal = new LoadInst(1409LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),1410LoadI->getName() + ".pr", false, LoadI->getAlign(),1411LoadI->getOrdering(), LoadI->getSyncScopeID(),1412UnavailablePred->getTerminator()->getIterator());1413NewVal->setDebugLoc(LoadI->getDebugLoc());1414if (AATags)1415NewVal->setAAMetadata(AATags);14161417AvailablePreds.emplace_back(UnavailablePred, NewVal);1418}14191420// Now we know that each predecessor of this block has a value in1421// AvailablePreds, sort them for efficient access as we're walking the preds.1422array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());14231424// Create a PHI node at the start of the block for the PRE'd load value.1425PHINode *PN = PHINode::Create(LoadI->getType(), pred_size(LoadBB), "");1426PN->insertBefore(LoadBB->begin());1427PN->takeName(LoadI);1428PN->setDebugLoc(LoadI->getDebugLoc());14291430// Insert new entries into the PHI for each predecessor. A single block may1431// have multiple entries here.1432for (BasicBlock *P : predecessors(LoadBB)) {1433AvailablePredsTy::iterator I =1434llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));14351436assert(I != AvailablePreds.end() && I->first == P &&1437"Didn't find entry for predecessor!");14381439// If we have an available predecessor but it requires casting, insert the1440// cast in the predecessor and use the cast. Note that we have to update the1441// AvailablePreds vector as we go so that all of the PHI entries for this1442// predecessor use the same bitcast.1443Value *&PredV = I->second;1444if (PredV->getType() != LoadI->getType())1445PredV = CastInst::CreateBitOrPointerCast(1446PredV, LoadI->getType(), "", P->getTerminator()->getIterator());14471448PN->addIncoming(PredV, I->first);1449}14501451for (LoadInst *PredLoadI : CSELoads) {1452combineMetadataForCSE(PredLoadI, LoadI, true);1453LVI->forgetValue(PredLoadI);1454}14551456LoadI->replaceAllUsesWith(PN);1457LoadI->eraseFromParent();14581459return true;1460}14611462/// findMostPopularDest - The specified list contains multiple possible1463/// threadable destinations. Pick the one that occurs the most frequently in1464/// the list.1465static BasicBlock *1466findMostPopularDest(BasicBlock *BB,1467const SmallVectorImpl<std::pair<BasicBlock *,1468BasicBlock *>> &PredToDestList) {1469assert(!PredToDestList.empty());14701471// Determine popularity. If there are multiple possible destinations, we1472// explicitly choose to ignore 'undef' destinations. We prefer to thread1473// blocks with known and real destinations to threading undef. We'll handle1474// them later if interesting.1475MapVector<BasicBlock *, unsigned> DestPopularity;14761477// Populate DestPopularity with the successors in the order they appear in the1478// successor list. This way, we ensure determinism by iterating it in the1479// same order in llvm::max_element below. We map nullptr to 0 so that we can1480// return nullptr when PredToDestList contains nullptr only.1481DestPopularity[nullptr] = 0;1482for (auto *SuccBB : successors(BB))1483DestPopularity[SuccBB] = 0;14841485for (const auto &PredToDest : PredToDestList)1486if (PredToDest.second)1487DestPopularity[PredToDest.second]++;14881489// Find the most popular dest.1490auto MostPopular = llvm::max_element(DestPopularity, llvm::less_second());14911492// Okay, we have finally picked the most popular destination.1493return MostPopular->first;1494}14951496// Try to evaluate the value of V when the control flows from PredPredBB to1497// BB->getSinglePredecessor() and then on to BB.1498Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,1499BasicBlock *PredPredBB,1500Value *V,1501const DataLayout &DL) {1502BasicBlock *PredBB = BB->getSinglePredecessor();1503assert(PredBB && "Expected a single predecessor");15041505if (Constant *Cst = dyn_cast<Constant>(V)) {1506return Cst;1507}15081509// Consult LVI if V is not an instruction in BB or PredBB.1510Instruction *I = dyn_cast<Instruction>(V);1511if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {1512return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);1513}15141515// Look into a PHI argument.1516if (PHINode *PHI = dyn_cast<PHINode>(V)) {1517if (PHI->getParent() == PredBB)1518return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));1519return nullptr;1520}15211522// If we have a CmpInst, try to fold it for each incoming edge into PredBB.1523if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {1524if (CondCmp->getParent() == BB) {1525Constant *Op0 =1526evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0), DL);1527Constant *Op1 =1528evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1), DL);1529if (Op0 && Op1) {1530return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,1531Op1, DL);1532}1533}1534return nullptr;1535}15361537return nullptr;1538}15391540bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,1541ConstantPreference Preference,1542Instruction *CxtI) {1543// If threading this would thread across a loop header, don't even try to1544// thread the edge.1545if (LoopHeaders.count(BB))1546return false;15471548PredValueInfoTy PredValues;1549if (!computeValueKnownInPredecessors(Cond, BB, PredValues, Preference,1550CxtI)) {1551// We don't have known values in predecessors. See if we can thread through1552// BB and its sole predecessor.1553return maybethreadThroughTwoBasicBlocks(BB, Cond);1554}15551556assert(!PredValues.empty() &&1557"computeValueKnownInPredecessors returned true with no values");15581559LLVM_DEBUG(dbgs() << "IN BB: " << *BB;1560for (const auto &PredValue : PredValues) {1561dbgs() << " BB '" << BB->getName()1562<< "': FOUND condition = " << *PredValue.first1563<< " for pred '" << PredValue.second->getName() << "'.\n";1564});15651566// Decide what we want to thread through. Convert our list of known values to1567// a list of known destinations for each pred. This also discards duplicate1568// predecessors and keeps track of the undefined inputs (which are represented1569// as a null dest in the PredToDestList).1570SmallPtrSet<BasicBlock*, 16> SeenPreds;1571SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;15721573BasicBlock *OnlyDest = nullptr;1574BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;1575Constant *OnlyVal = nullptr;1576Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;15771578for (const auto &PredValue : PredValues) {1579BasicBlock *Pred = PredValue.second;1580if (!SeenPreds.insert(Pred).second)1581continue; // Duplicate predecessor entry.15821583Constant *Val = PredValue.first;15841585BasicBlock *DestBB;1586if (isa<UndefValue>(Val))1587DestBB = nullptr;1588else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {1589assert(isa<ConstantInt>(Val) && "Expecting a constant integer");1590DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());1591} else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {1592assert(isa<ConstantInt>(Val) && "Expecting a constant integer");1593DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();1594} else {1595assert(isa<IndirectBrInst>(BB->getTerminator())1596&& "Unexpected terminator");1597assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");1598DestBB = cast<BlockAddress>(Val)->getBasicBlock();1599}16001601// If we have exactly one destination, remember it for efficiency below.1602if (PredToDestList.empty()) {1603OnlyDest = DestBB;1604OnlyVal = Val;1605} else {1606if (OnlyDest != DestBB)1607OnlyDest = MultipleDestSentinel;1608// It possible we have same destination, but different value, e.g. default1609// case in switchinst.1610if (Val != OnlyVal)1611OnlyVal = MultipleVal;1612}16131614// If the predecessor ends with an indirect goto, we can't change its1615// destination.1616if (isa<IndirectBrInst>(Pred->getTerminator()))1617continue;16181619PredToDestList.emplace_back(Pred, DestBB);1620}16211622// If all edges were unthreadable, we fail.1623if (PredToDestList.empty())1624return false;16251626// If all the predecessors go to a single known successor, we want to fold,1627// not thread. By doing so, we do not need to duplicate the current block and1628// also miss potential opportunities in case we dont/cant duplicate.1629if (OnlyDest && OnlyDest != MultipleDestSentinel) {1630if (BB->hasNPredecessors(PredToDestList.size())) {1631bool SeenFirstBranchToOnlyDest = false;1632std::vector <DominatorTree::UpdateType> Updates;1633Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);1634for (BasicBlock *SuccBB : successors(BB)) {1635if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) {1636SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.1637} else {1638SuccBB->removePredecessor(BB, true); // This is unreachable successor.1639Updates.push_back({DominatorTree::Delete, BB, SuccBB});1640}1641}16421643// Finally update the terminator.1644Instruction *Term = BB->getTerminator();1645Instruction *NewBI = BranchInst::Create(OnlyDest, Term->getIterator());1646NewBI->setDebugLoc(Term->getDebugLoc());1647++NumFolds;1648Term->eraseFromParent();1649DTU->applyUpdatesPermissive(Updates);1650if (auto *BPI = getBPI())1651BPI->eraseBlock(BB);16521653// If the condition is now dead due to the removal of the old terminator,1654// erase it.1655if (auto *CondInst = dyn_cast<Instruction>(Cond)) {1656if (CondInst->use_empty() && !CondInst->mayHaveSideEffects())1657CondInst->eraseFromParent();1658// We can safely replace *some* uses of the CondInst if it has1659// exactly one value as returned by LVI. RAUW is incorrect in the1660// presence of guards and assumes, that have the `Cond` as the use. This1661// is because we use the guards/assume to reason about the `Cond` value1662// at the end of block, but RAUW unconditionally replaces all uses1663// including the guards/assumes themselves and the uses before the1664// guard/assume.1665else if (OnlyVal && OnlyVal != MultipleVal)1666replaceFoldableUses(CondInst, OnlyVal, BB);1667}1668return true;1669}1670}16711672// Determine which is the most common successor. If we have many inputs and1673// this block is a switch, we want to start by threading the batch that goes1674// to the most popular destination first. If we only know about one1675// threadable destination (the common case) we can avoid this.1676BasicBlock *MostPopularDest = OnlyDest;16771678if (MostPopularDest == MultipleDestSentinel) {1679// Remove any loop headers from the Dest list, threadEdge conservatively1680// won't process them, but we might have other destination that are eligible1681// and we still want to process.1682erase_if(PredToDestList,1683[&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {1684return LoopHeaders.contains(PredToDest.second);1685});16861687if (PredToDestList.empty())1688return false;16891690MostPopularDest = findMostPopularDest(BB, PredToDestList);1691}16921693// Now that we know what the most popular destination is, factor all1694// predecessors that will jump to it into a single predecessor.1695SmallVector<BasicBlock*, 16> PredsToFactor;1696for (const auto &PredToDest : PredToDestList)1697if (PredToDest.second == MostPopularDest) {1698BasicBlock *Pred = PredToDest.first;16991700// This predecessor may be a switch or something else that has multiple1701// edges to the block. Factor each of these edges by listing them1702// according to # occurrences in PredsToFactor.1703for (BasicBlock *Succ : successors(Pred))1704if (Succ == BB)1705PredsToFactor.push_back(Pred);1706}17071708// If the threadable edges are branching on an undefined value, we get to pick1709// the destination that these predecessors should get to.1710if (!MostPopularDest)1711MostPopularDest = BB->getTerminator()->1712getSuccessor(getBestDestForJumpOnUndef(BB));17131714// Ok, try to thread it!1715return tryThreadEdge(BB, PredsToFactor, MostPopularDest);1716}17171718/// processBranchOnPHI - We have an otherwise unthreadable conditional branch on1719/// a PHI node (or freeze PHI) in the current block. See if there are any1720/// simplifications we can do based on inputs to the phi node.1721bool JumpThreadingPass::processBranchOnPHI(PHINode *PN) {1722BasicBlock *BB = PN->getParent();17231724// TODO: We could make use of this to do it once for blocks with common PHI1725// values.1726SmallVector<BasicBlock*, 1> PredBBs;1727PredBBs.resize(1);17281729// If any of the predecessor blocks end in an unconditional branch, we can1730// *duplicate* the conditional branch into that block in order to further1731// encourage jump threading and to eliminate cases where we have branch on a1732// phi of an icmp (branch on icmp is much better).1733// This is still beneficial when a frozen phi is used as the branch condition1734// because it allows CodeGenPrepare to further canonicalize br(freeze(icmp))1735// to br(icmp(freeze ...)).1736for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {1737BasicBlock *PredBB = PN->getIncomingBlock(i);1738if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))1739if (PredBr->isUnconditional()) {1740PredBBs[0] = PredBB;1741// Try to duplicate BB into PredBB.1742if (duplicateCondBranchOnPHIIntoPred(BB, PredBBs))1743return true;1744}1745}17461747return false;1748}17491750/// processBranchOnXOR - We have an otherwise unthreadable conditional branch on1751/// a xor instruction in the current block. See if there are any1752/// simplifications we can do based on inputs to the xor.1753bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {1754BasicBlock *BB = BO->getParent();17551756// If either the LHS or RHS of the xor is a constant, don't do this1757// optimization.1758if (isa<ConstantInt>(BO->getOperand(0)) ||1759isa<ConstantInt>(BO->getOperand(1)))1760return false;17611762// If the first instruction in BB isn't a phi, we won't be able to infer1763// anything special about any particular predecessor.1764if (!isa<PHINode>(BB->front()))1765return false;17661767// If this BB is a landing pad, we won't be able to split the edge into it.1768if (BB->isEHPad())1769return false;17701771// If we have a xor as the branch input to this block, and we know that the1772// LHS or RHS of the xor in any predecessor is true/false, then we can clone1773// the condition into the predecessor and fix that value to true, saving some1774// logical ops on that path and encouraging other paths to simplify.1775//1776// This copies something like this:1777//1778// BB:1779// %X = phi i1 [1], [%X']1780// %Y = icmp eq i32 %A, %B1781// %Z = xor i1 %X, %Y1782// br i1 %Z, ...1783//1784// Into:1785// BB':1786// %Y = icmp ne i32 %A, %B1787// br i1 %Y, ...17881789PredValueInfoTy XorOpValues;1790bool isLHS = true;1791if (!computeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,1792WantInteger, BO)) {1793assert(XorOpValues.empty());1794if (!computeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,1795WantInteger, BO))1796return false;1797isLHS = false;1798}17991800assert(!XorOpValues.empty() &&1801"computeValueKnownInPredecessors returned true with no values");18021803// Scan the information to see which is most popular: true or false. The1804// predecessors can be of the set true, false, or undef.1805unsigned NumTrue = 0, NumFalse = 0;1806for (const auto &XorOpValue : XorOpValues) {1807if (isa<UndefValue>(XorOpValue.first))1808// Ignore undefs for the count.1809continue;1810if (cast<ConstantInt>(XorOpValue.first)->isZero())1811++NumFalse;1812else1813++NumTrue;1814}18151816// Determine which value to split on, true, false, or undef if neither.1817ConstantInt *SplitVal = nullptr;1818if (NumTrue > NumFalse)1819SplitVal = ConstantInt::getTrue(BB->getContext());1820else if (NumTrue != 0 || NumFalse != 0)1821SplitVal = ConstantInt::getFalse(BB->getContext());18221823// Collect all of the blocks that this can be folded into so that we can1824// factor this once and clone it once.1825SmallVector<BasicBlock*, 8> BlocksToFoldInto;1826for (const auto &XorOpValue : XorOpValues) {1827if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))1828continue;18291830BlocksToFoldInto.push_back(XorOpValue.second);1831}18321833// If we inferred a value for all of the predecessors, then duplication won't1834// help us. However, we can just replace the LHS or RHS with the constant.1835if (BlocksToFoldInto.size() ==1836cast<PHINode>(BB->front()).getNumIncomingValues()) {1837if (!SplitVal) {1838// If all preds provide undef, just nuke the xor, because it is undef too.1839BO->replaceAllUsesWith(UndefValue::get(BO->getType()));1840BO->eraseFromParent();1841} else if (SplitVal->isZero() && BO != BO->getOperand(isLHS)) {1842// If all preds provide 0, replace the xor with the other input.1843BO->replaceAllUsesWith(BO->getOperand(isLHS));1844BO->eraseFromParent();1845} else {1846// If all preds provide 1, set the computed value to 1.1847BO->setOperand(!isLHS, SplitVal);1848}18491850return true;1851}18521853// If any of predecessors end with an indirect goto, we can't change its1854// destination.1855if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {1856return isa<IndirectBrInst>(Pred->getTerminator());1857}))1858return false;18591860// Try to duplicate BB into PredBB.1861return duplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);1862}18631864/// addPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new1865/// predecessor to the PHIBB block. If it has PHI nodes, add entries for1866/// NewPred using the entries from OldPred (suitably mapped).1867static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,1868BasicBlock *OldPred,1869BasicBlock *NewPred,1870ValueToValueMapTy &ValueMap) {1871for (PHINode &PN : PHIBB->phis()) {1872// Ok, we have a PHI node. Figure out what the incoming value was for the1873// DestBlock.1874Value *IV = PN.getIncomingValueForBlock(OldPred);18751876// Remap the value if necessary.1877if (Instruction *Inst = dyn_cast<Instruction>(IV)) {1878ValueToValueMapTy::iterator I = ValueMap.find(Inst);1879if (I != ValueMap.end())1880IV = I->second;1881}18821883PN.addIncoming(IV, NewPred);1884}1885}18861887/// Merge basic block BB into its sole predecessor if possible.1888bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {1889BasicBlock *SinglePred = BB->getSinglePredecessor();1890if (!SinglePred)1891return false;18921893const Instruction *TI = SinglePred->getTerminator();1894if (TI->isSpecialTerminator() || TI->getNumSuccessors() != 1 ||1895SinglePred == BB || hasAddressTakenAndUsed(BB))1896return false;18971898// If SinglePred was a loop header, BB becomes one.1899if (LoopHeaders.erase(SinglePred))1900LoopHeaders.insert(BB);19011902LVI->eraseBlock(SinglePred);1903MergeBasicBlockIntoOnlyPred(BB, DTU.get());19041905// Now that BB is merged into SinglePred (i.e. SinglePred code followed by1906// BB code within one basic block `BB`), we need to invalidate the LVI1907// information associated with BB, because the LVI information need not be1908// true for all of BB after the merge. For example,1909// Before the merge, LVI info and code is as follows:1910// SinglePred: <LVI info1 for %p val>1911// %y = use of %p1912// call @exit() // need not transfer execution to successor.1913// assume(%p) // from this point on %p is true1914// br label %BB1915// BB: <LVI info2 for %p val, i.e. %p is true>1916// %x = use of %p1917// br label exit1918//1919// Note that this LVI info for blocks BB and SinglPred is correct for %p1920// (info2 and info1 respectively). After the merge and the deletion of the1921// LVI info1 for SinglePred. We have the following code:1922// BB: <LVI info2 for %p val>1923// %y = use of %p1924// call @exit()1925// assume(%p)1926// %x = use of %p <-- LVI info2 is correct from here onwards.1927// br label exit1928// LVI info2 for BB is incorrect at the beginning of BB.19291930// Invalidate LVI information for BB if the LVI is not provably true for1931// all of BB.1932if (!isGuaranteedToTransferExecutionToSuccessor(BB))1933LVI->eraseBlock(BB);1934return true;1935}19361937/// Update the SSA form. NewBB contains instructions that are copied from BB.1938/// ValueMapping maps old values in BB to new ones in NewBB.1939void JumpThreadingPass::updateSSA(BasicBlock *BB, BasicBlock *NewBB,1940ValueToValueMapTy &ValueMapping) {1941// If there were values defined in BB that are used outside the block, then we1942// now have to update all uses of the value to use either the original value,1943// the cloned value, or some PHI derived value. This can require arbitrary1944// PHI insertion, of which we are prepared to do, clean these up now.1945SSAUpdater SSAUpdate;1946SmallVector<Use *, 16> UsesToRename;1947SmallVector<DbgValueInst *, 4> DbgValues;1948SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;19491950for (Instruction &I : *BB) {1951// Scan all uses of this instruction to see if it is used outside of its1952// block, and if so, record them in UsesToRename.1953for (Use &U : I.uses()) {1954Instruction *User = cast<Instruction>(U.getUser());1955if (PHINode *UserPN = dyn_cast<PHINode>(User)) {1956if (UserPN->getIncomingBlock(U) == BB)1957continue;1958} else if (User->getParent() == BB)1959continue;19601961UsesToRename.push_back(&U);1962}19631964// Find debug values outside of the block1965findDbgValues(DbgValues, &I, &DbgVariableRecords);1966llvm::erase_if(DbgValues, [&](const DbgValueInst *DbgVal) {1967return DbgVal->getParent() == BB;1968});1969llvm::erase_if(DbgVariableRecords, [&](const DbgVariableRecord *DbgVarRec) {1970return DbgVarRec->getParent() == BB;1971});19721973// If there are no uses outside the block, we're done with this instruction.1974if (UsesToRename.empty() && DbgValues.empty() && DbgVariableRecords.empty())1975continue;1976LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");19771978// We found a use of I outside of BB. Rename all uses of I that are outside1979// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks1980// with the two values we know.1981SSAUpdate.Initialize(I.getType(), I.getName());1982SSAUpdate.AddAvailableValue(BB, &I);1983SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);19841985while (!UsesToRename.empty())1986SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());1987if (!DbgValues.empty() || !DbgVariableRecords.empty()) {1988SSAUpdate.UpdateDebugValues(&I, DbgValues);1989SSAUpdate.UpdateDebugValues(&I, DbgVariableRecords);1990DbgValues.clear();1991DbgVariableRecords.clear();1992}19931994LLVM_DEBUG(dbgs() << "\n");1995}1996}19971998/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone1999/// arguments that come from PredBB. Return the map from the variables in the2000/// source basic block to the variables in the newly created basic block.20012002void JumpThreadingPass::cloneInstructions(ValueToValueMapTy &ValueMapping,2003BasicBlock::iterator BI,2004BasicBlock::iterator BE,2005BasicBlock *NewBB,2006BasicBlock *PredBB) {2007// We are going to have to map operands from the source basic block to the new2008// copy of the block 'NewBB'. If there are PHI nodes in the source basic2009// block, evaluate them to account for entry from PredBB.20102011// Retargets llvm.dbg.value to any renamed variables.2012auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {2013auto DbgInstruction = dyn_cast<DbgValueInst>(NewInst);2014if (!DbgInstruction)2015return false;20162017SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;2018for (auto DbgOperand : DbgInstruction->location_ops()) {2019auto DbgOperandInstruction = dyn_cast<Instruction>(DbgOperand);2020if (!DbgOperandInstruction)2021continue;20222023auto I = ValueMapping.find(DbgOperandInstruction);2024if (I != ValueMapping.end()) {2025OperandsToRemap.insert(2026std::pair<Value *, Value *>(DbgOperand, I->second));2027}2028}20292030for (auto &[OldOp, MappedOp] : OperandsToRemap)2031DbgInstruction->replaceVariableLocationOp(OldOp, MappedOp);2032return true;2033};20342035// Duplicate implementation of the above dbg.value code, using2036// DbgVariableRecords instead.2037auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {2038SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;2039for (auto *Op : DVR->location_ops()) {2040Instruction *OpInst = dyn_cast<Instruction>(Op);2041if (!OpInst)2042continue;20432044auto I = ValueMapping.find(OpInst);2045if (I != ValueMapping.end())2046OperandsToRemap.insert({OpInst, I->second});2047}20482049for (auto &[OldOp, MappedOp] : OperandsToRemap)2050DVR->replaceVariableLocationOp(OldOp, MappedOp);2051};20522053BasicBlock *RangeBB = BI->getParent();20542055// Clone the phi nodes of the source basic block into NewBB. The resulting2056// phi nodes are trivial since NewBB only has one predecessor, but SSAUpdater2057// might need to rewrite the operand of the cloned phi.2058for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {2059PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);2060NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);2061ValueMapping[PN] = NewPN;2062}20632064// Clone noalias scope declarations in the threaded block. When threading a2065// loop exit, we would otherwise end up with two idential scope declarations2066// visible at the same time.2067SmallVector<MDNode *> NoAliasScopes;2068DenseMap<MDNode *, MDNode *> ClonedScopes;2069LLVMContext &Context = PredBB->getContext();2070identifyNoAliasScopesToClone(BI, BE, NoAliasScopes);2071cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);20722073auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {2074auto DVRRange = NewInst->cloneDebugInfoFrom(From);2075for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))2076RetargetDbgVariableRecordIfPossible(&DVR);2077};20782079// Clone the non-phi instructions of the source basic block into NewBB,2080// keeping track of the mapping and using it to remap operands in the cloned2081// instructions.2082for (; BI != BE; ++BI) {2083Instruction *New = BI->clone();2084New->setName(BI->getName());2085New->insertInto(NewBB, NewBB->end());2086ValueMapping[&*BI] = New;2087adaptNoAliasScopes(New, ClonedScopes, Context);20882089CloneAndRemapDbgInfo(New, &*BI);20902091if (RetargetDbgValueIfPossible(New))2092continue;20932094// Remap operands to patch up intra-block references.2095for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)2096if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {2097ValueToValueMapTy::iterator I = ValueMapping.find(Inst);2098if (I != ValueMapping.end())2099New->setOperand(i, I->second);2100}2101}21022103// There may be DbgVariableRecords on the terminator, clone directly from2104// marker to marker as there isn't an instruction there.2105if (BE != RangeBB->end() && BE->hasDbgRecords()) {2106// Dump them at the end.2107DbgMarker *Marker = RangeBB->getMarker(BE);2108DbgMarker *EndMarker = NewBB->createMarker(NewBB->end());2109auto DVRRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);2110for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))2111RetargetDbgVariableRecordIfPossible(&DVR);2112}21132114return;2115}21162117/// Attempt to thread through two successive basic blocks.2118bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,2119Value *Cond) {2120// Consider:2121//2122// PredBB:2123// %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]2124// %tobool = icmp eq i32 %cond, 02125// br i1 %tobool, label %BB, label ...2126//2127// BB:2128// %cmp = icmp eq i32* %var, null2129// br i1 %cmp, label ..., label ...2130//2131// We don't know the value of %var at BB even if we know which incoming edge2132// we take to BB. However, once we duplicate PredBB for each of its incoming2133// edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of2134// PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.21352136// Require that BB end with a Branch for simplicity.2137BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());2138if (!CondBr)2139return false;21402141// BB must have exactly one predecessor.2142BasicBlock *PredBB = BB->getSinglePredecessor();2143if (!PredBB)2144return false;21452146// Require that PredBB end with a conditional Branch. If PredBB ends with an2147// unconditional branch, we should be merging PredBB and BB instead. For2148// simplicity, we don't deal with a switch.2149BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());2150if (!PredBBBranch || PredBBBranch->isUnconditional())2151return false;21522153// If PredBB has exactly one incoming edge, we don't gain anything by copying2154// PredBB.2155if (PredBB->getSinglePredecessor())2156return false;21572158// Don't thread through PredBB if it contains a successor edge to itself, in2159// which case we would infinite loop. Suppose we are threading an edge from2160// PredPredBB through PredBB and BB to SuccBB with PredBB containing a2161// successor edge to itself. If we allowed jump threading in this case, we2162// could duplicate PredBB and BB as, say, PredBB.thread and BB.thread. Since2163// PredBB.thread has a successor edge to PredBB, we would immediately come up2164// with another jump threading opportunity from PredBB.thread through PredBB2165// and BB to SuccBB. This jump threading would repeatedly occur. That is, we2166// would keep peeling one iteration from PredBB.2167if (llvm::is_contained(successors(PredBB), PredBB))2168return false;21692170// Don't thread across a loop header.2171if (LoopHeaders.count(PredBB))2172return false;21732174// Avoid complication with duplicating EH pads.2175if (PredBB->isEHPad())2176return false;21772178// Find a predecessor that we can thread. For simplicity, we only consider a2179// successor edge out of BB to which we thread exactly one incoming edge into2180// PredBB.2181unsigned ZeroCount = 0;2182unsigned OneCount = 0;2183BasicBlock *ZeroPred = nullptr;2184BasicBlock *OnePred = nullptr;2185const DataLayout &DL = BB->getDataLayout();2186for (BasicBlock *P : predecessors(PredBB)) {2187// If PredPred ends with IndirectBrInst, we can't handle it.2188if (isa<IndirectBrInst>(P->getTerminator()))2189continue;2190if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(2191evaluateOnPredecessorEdge(BB, P, Cond, DL))) {2192if (CI->isZero()) {2193ZeroCount++;2194ZeroPred = P;2195} else if (CI->isOne()) {2196OneCount++;2197OnePred = P;2198}2199}2200}22012202// Disregard complicated cases where we have to thread multiple edges.2203BasicBlock *PredPredBB;2204if (ZeroCount == 1) {2205PredPredBB = ZeroPred;2206} else if (OneCount == 1) {2207PredPredBB = OnePred;2208} else {2209return false;2210}22112212BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);22132214// If threading to the same block as we come from, we would infinite loop.2215if (SuccBB == BB) {2216LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()2217<< "' - would thread to self!\n");2218return false;2219}22202221// If threading this would thread across a loop header, don't thread the edge.2222// See the comments above findLoopHeaders for justifications and caveats.2223if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {2224LLVM_DEBUG({2225bool BBIsHeader = LoopHeaders.count(BB);2226bool SuccIsHeader = LoopHeaders.count(SuccBB);2227dbgs() << " Not threading across "2228<< (BBIsHeader ? "loop header BB '" : "block BB '")2229<< BB->getName() << "' to dest "2230<< (SuccIsHeader ? "loop header BB '" : "block BB '")2231<< SuccBB->getName()2232<< "' - it might create an irreducible loop!\n";2233});2234return false;2235}22362237// Compute the cost of duplicating BB and PredBB.2238unsigned BBCost = getJumpThreadDuplicationCost(2239TTI, BB, BB->getTerminator(), BBDupThreshold);2240unsigned PredBBCost = getJumpThreadDuplicationCost(2241TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);22422243// Give up if costs are too high. We need to check BBCost and PredBBCost2244// individually before checking their sum because getJumpThreadDuplicationCost2245// return (unsigned)~0 for those basic blocks that cannot be duplicated.2246if (BBCost > BBDupThreshold || PredBBCost > BBDupThreshold ||2247BBCost + PredBBCost > BBDupThreshold) {2248LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()2249<< "' - Cost is too high: " << PredBBCost2250<< " for PredBB, " << BBCost << "for BB\n");2251return false;2252}22532254// Now we are ready to duplicate PredBB.2255threadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);2256return true;2257}22582259void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,2260BasicBlock *PredBB,2261BasicBlock *BB,2262BasicBlock *SuccBB) {2263LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"2264<< BB->getName() << "'\n");22652266// Build BPI/BFI before any changes are made to IR.2267bool HasProfile = doesBlockHaveProfileData(BB);2268auto *BFI = getOrCreateBFI(HasProfile);2269auto *BPI = getOrCreateBPI(BFI != nullptr);22702271BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());2272BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());22732274BasicBlock *NewBB =2275BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",2276PredBB->getParent(), PredBB);2277NewBB->moveAfter(PredBB);22782279// Set the block frequency of NewBB.2280if (BFI) {2281assert(BPI && "It's expected BPI to exist along with BFI");2282auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *2283BPI->getEdgeProbability(PredPredBB, PredBB);2284BFI->setBlockFreq(NewBB, NewBBFreq);2285}22862287// We are going to have to map operands from the original BB block to the new2288// copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them2289// to account for entry from PredPredBB.2290ValueToValueMapTy ValueMapping;2291cloneInstructions(ValueMapping, PredBB->begin(), PredBB->end(), NewBB,2292PredPredBB);22932294// Copy the edge probabilities from PredBB to NewBB.2295if (BPI)2296BPI->copyEdgeProbabilities(PredBB, NewBB);22972298// Update the terminator of PredPredBB to jump to NewBB instead of PredBB.2299// This eliminates predecessors from PredPredBB, which requires us to simplify2300// any PHI nodes in PredBB.2301Instruction *PredPredTerm = PredPredBB->getTerminator();2302for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)2303if (PredPredTerm->getSuccessor(i) == PredBB) {2304PredBB->removePredecessor(PredPredBB, true);2305PredPredTerm->setSuccessor(i, NewBB);2306}23072308addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,2309ValueMapping);2310addPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,2311ValueMapping);23122313DTU->applyUpdatesPermissive(2314{{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},2315{DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},2316{DominatorTree::Insert, PredPredBB, NewBB},2317{DominatorTree::Delete, PredPredBB, PredBB}});23182319updateSSA(PredBB, NewBB, ValueMapping);23202321// Clean up things like PHI nodes with single operands, dead instructions,2322// etc.2323SimplifyInstructionsInBlock(NewBB, TLI);2324SimplifyInstructionsInBlock(PredBB, TLI);23252326SmallVector<BasicBlock *, 1> PredsToFactor;2327PredsToFactor.push_back(NewBB);2328threadEdge(BB, PredsToFactor, SuccBB);2329}23302331/// tryThreadEdge - Thread an edge if it's safe and profitable to do so.2332bool JumpThreadingPass::tryThreadEdge(2333BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,2334BasicBlock *SuccBB) {2335// If threading to the same block as we come from, we would infinite loop.2336if (SuccBB == BB) {2337LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()2338<< "' - would thread to self!\n");2339return false;2340}23412342// If threading this would thread across a loop header, don't thread the edge.2343// See the comments above findLoopHeaders for justifications and caveats.2344if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {2345LLVM_DEBUG({2346bool BBIsHeader = LoopHeaders.count(BB);2347bool SuccIsHeader = LoopHeaders.count(SuccBB);2348dbgs() << " Not threading across "2349<< (BBIsHeader ? "loop header BB '" : "block BB '") << BB->getName()2350<< "' to dest " << (SuccIsHeader ? "loop header BB '" : "block BB '")2351<< SuccBB->getName() << "' - it might create an irreducible loop!\n";2352});2353return false;2354}23552356unsigned JumpThreadCost = getJumpThreadDuplicationCost(2357TTI, BB, BB->getTerminator(), BBDupThreshold);2358if (JumpThreadCost > BBDupThreshold) {2359LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()2360<< "' - Cost is too high: " << JumpThreadCost << "\n");2361return false;2362}23632364threadEdge(BB, PredBBs, SuccBB);2365return true;2366}23672368/// threadEdge - We have decided that it is safe and profitable to factor the2369/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB2370/// across BB. Transform the IR to reflect this change.2371void JumpThreadingPass::threadEdge(BasicBlock *BB,2372const SmallVectorImpl<BasicBlock *> &PredBBs,2373BasicBlock *SuccBB) {2374assert(SuccBB != BB && "Don't create an infinite loop");23752376assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&2377"Don't thread across loop headers");23782379// Build BPI/BFI before any changes are made to IR.2380bool HasProfile = doesBlockHaveProfileData(BB);2381auto *BFI = getOrCreateBFI(HasProfile);2382auto *BPI = getOrCreateBPI(BFI != nullptr);23832384// And finally, do it! Start by factoring the predecessors if needed.2385BasicBlock *PredBB;2386if (PredBBs.size() == 1)2387PredBB = PredBBs[0];2388else {2389LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()2390<< " common predecessors.\n");2391PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");2392}23932394// And finally, do it!2395LLVM_DEBUG(dbgs() << " Threading edge from '" << PredBB->getName()2396<< "' to '" << SuccBB->getName()2397<< ", across block:\n " << *BB << "\n");23982399LVI->threadEdge(PredBB, BB, SuccBB);24002401BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),2402BB->getName()+".thread",2403BB->getParent(), BB);2404NewBB->moveAfter(PredBB);24052406// Set the block frequency of NewBB.2407if (BFI) {2408assert(BPI && "It's expected BPI to exist along with BFI");2409auto NewBBFreq =2410BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);2411BFI->setBlockFreq(NewBB, NewBBFreq);2412}24132414// Copy all the instructions from BB to NewBB except the terminator.2415ValueToValueMapTy ValueMapping;2416cloneInstructions(ValueMapping, BB->begin(), std::prev(BB->end()), NewBB,2417PredBB);24182419// We didn't copy the terminator from BB over to NewBB, because there is now2420// an unconditional jump to SuccBB. Insert the unconditional jump.2421BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);2422NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());24232424// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the2425// PHI nodes for NewBB now.2426addPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);24272428// Update the terminator of PredBB to jump to NewBB instead of BB. This2429// eliminates predecessors from BB, which requires us to simplify any PHI2430// nodes in BB.2431Instruction *PredTerm = PredBB->getTerminator();2432for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)2433if (PredTerm->getSuccessor(i) == BB) {2434BB->removePredecessor(PredBB, true);2435PredTerm->setSuccessor(i, NewBB);2436}24372438// Enqueue required DT updates.2439DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},2440{DominatorTree::Insert, PredBB, NewBB},2441{DominatorTree::Delete, PredBB, BB}});24422443updateSSA(BB, NewBB, ValueMapping);24442445// At this point, the IR is fully up to date and consistent. Do a quick scan2446// over the new instructions and zap any that are constants or dead. This2447// frequently happens because of phi translation.2448SimplifyInstructionsInBlock(NewBB, TLI);24492450// Update the edge weight from BB to SuccBB, which should be less than before.2451updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);24522453// Threaded an edge!2454++NumThreads;2455}24562457/// Create a new basic block that will be the predecessor of BB and successor of2458/// all blocks in Preds. When profile data is available, update the frequency of2459/// this new block.2460BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,2461ArrayRef<BasicBlock *> Preds,2462const char *Suffix) {2463SmallVector<BasicBlock *, 2> NewBBs;24642465// Collect the frequencies of all predecessors of BB, which will be used to2466// update the edge weight of the result of splitting predecessors.2467DenseMap<BasicBlock *, BlockFrequency> FreqMap;2468auto *BFI = getBFI();2469if (BFI) {2470auto *BPI = getOrCreateBPI(true);2471for (auto *Pred : Preds)2472FreqMap.insert(std::make_pair(2473Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));2474}24752476// In the case when BB is a LandingPad block we create 2 new predecessors2477// instead of just one.2478if (BB->isLandingPad()) {2479std::string NewName = std::string(Suffix) + ".split-lp";2480SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs);2481} else {2482NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix));2483}24842485std::vector<DominatorTree::UpdateType> Updates;2486Updates.reserve((2 * Preds.size()) + NewBBs.size());2487for (auto *NewBB : NewBBs) {2488BlockFrequency NewBBFreq(0);2489Updates.push_back({DominatorTree::Insert, NewBB, BB});2490for (auto *Pred : predecessors(NewBB)) {2491Updates.push_back({DominatorTree::Delete, Pred, BB});2492Updates.push_back({DominatorTree::Insert, Pred, NewBB});2493if (BFI) // Update frequencies between Pred -> NewBB.2494NewBBFreq += FreqMap.lookup(Pred);2495}2496if (BFI) // Apply the summed frequency to NewBB.2497BFI->setBlockFreq(NewBB, NewBBFreq);2498}24992500DTU->applyUpdatesPermissive(Updates);2501return NewBBs[0];2502}25032504bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {2505const Instruction *TI = BB->getTerminator();2506if (!TI || TI->getNumSuccessors() < 2)2507return false;25082509return hasValidBranchWeightMD(*TI);2510}25112512/// Update the block frequency of BB and branch weight and the metadata on the2513/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -2514/// Freq(PredBB->BB) / Freq(BB->SuccBB).2515void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,2516BasicBlock *BB,2517BasicBlock *NewBB,2518BasicBlock *SuccBB,2519BlockFrequencyInfo *BFI,2520BranchProbabilityInfo *BPI,2521bool HasProfile) {2522assert(((BFI && BPI) || (!BFI && !BFI)) &&2523"Both BFI & BPI should either be set or unset");25242525if (!BFI) {2526assert(!HasProfile &&2527"It's expected to have BFI/BPI when profile info exists");2528return;2529}25302531// As the edge from PredBB to BB is deleted, we have to update the block2532// frequency of BB.2533auto BBOrigFreq = BFI->getBlockFreq(BB);2534auto NewBBFreq = BFI->getBlockFreq(NewBB);2535auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);2536auto BBNewFreq = BBOrigFreq - NewBBFreq;2537BFI->setBlockFreq(BB, BBNewFreq);25382539// Collect updated outgoing edges' frequencies from BB and use them to update2540// edge probabilities.2541SmallVector<uint64_t, 4> BBSuccFreq;2542for (BasicBlock *Succ : successors(BB)) {2543auto SuccFreq = (Succ == SuccBB)2544? BB2SuccBBFreq - NewBBFreq2545: BBOrigFreq * BPI->getEdgeProbability(BB, Succ);2546BBSuccFreq.push_back(SuccFreq.getFrequency());2547}25482549uint64_t MaxBBSuccFreq = *llvm::max_element(BBSuccFreq);25502551SmallVector<BranchProbability, 4> BBSuccProbs;2552if (MaxBBSuccFreq == 0)2553BBSuccProbs.assign(BBSuccFreq.size(),2554{1, static_cast<unsigned>(BBSuccFreq.size())});2555else {2556for (uint64_t Freq : BBSuccFreq)2557BBSuccProbs.push_back(2558BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));2559// Normalize edge probabilities so that they sum up to one.2560BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),2561BBSuccProbs.end());2562}25632564// Update edge probabilities in BPI.2565BPI->setEdgeProbability(BB, BBSuccProbs);25662567// Update the profile metadata as well.2568//2569// Don't do this if the profile of the transformed blocks was statically2570// estimated. (This could occur despite the function having an entry2571// frequency in completely cold parts of the CFG.)2572//2573// In this case we don't want to suggest to subsequent passes that the2574// calculated weights are fully consistent. Consider this graph:2575//2576// check_12577// 50% / |2578// eq_1 | 50%2579// \ |2580// check_22581// 50% / |2582// eq_2 | 50%2583// \ |2584// check_32585// 50% / |2586// eq_3 | 50%2587// \ |2588//2589// Assuming the blocks check_* all compare the same value against 1, 2 and 3,2590// the overall probabilities are inconsistent; the total probability that the2591// value is either 1, 2 or 3 is 150%.2592//2593// As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_32594// becomes 0%. This is even worse if the edge whose probability becomes 0% is2595// the loop exit edge. Then based solely on static estimation we would assume2596// the loop was extremely hot.2597//2598// FIXME this locally as well so that BPI and BFI are consistent as well. We2599// shouldn't make edges extremely likely or unlikely based solely on static2600// estimation.2601if (BBSuccProbs.size() >= 2 && HasProfile) {2602SmallVector<uint32_t, 4> Weights;2603for (auto Prob : BBSuccProbs)2604Weights.push_back(Prob.getNumerator());26052606auto TI = BB->getTerminator();2607setBranchWeights(*TI, Weights, hasBranchWeightOrigin(*TI));2608}2609}26102611/// duplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch2612/// to BB which contains an i1 PHI node and a conditional branch on that PHI.2613/// If we can duplicate the contents of BB up into PredBB do so now, this2614/// improves the odds that the branch will be on an analyzable instruction like2615/// a compare.2616bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(2617BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs) {2618assert(!PredBBs.empty() && "Can't handle an empty set");26192620// If BB is a loop header, then duplicating this block outside the loop would2621// cause us to transform this into an irreducible loop, don't do this.2622// See the comments above findLoopHeaders for justifications and caveats.2623if (LoopHeaders.count(BB)) {2624LLVM_DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()2625<< "' into predecessor block '" << PredBBs[0]->getName()2626<< "' - it might create an irreducible loop!\n");2627return false;2628}26292630unsigned DuplicationCost = getJumpThreadDuplicationCost(2631TTI, BB, BB->getTerminator(), BBDupThreshold);2632if (DuplicationCost > BBDupThreshold) {2633LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()2634<< "' - Cost is too high: " << DuplicationCost << "\n");2635return false;2636}26372638// And finally, do it! Start by factoring the predecessors if needed.2639std::vector<DominatorTree::UpdateType> Updates;2640BasicBlock *PredBB;2641if (PredBBs.size() == 1)2642PredBB = PredBBs[0];2643else {2644LLVM_DEBUG(dbgs() << " Factoring out " << PredBBs.size()2645<< " common predecessors.\n");2646PredBB = splitBlockPreds(BB, PredBBs, ".thr_comm");2647}2648Updates.push_back({DominatorTree::Delete, PredBB, BB});26492650// Okay, we decided to do this! Clone all the instructions in BB onto the end2651// of PredBB.2652LLVM_DEBUG(dbgs() << " Duplicating block '" << BB->getName()2653<< "' into end of '" << PredBB->getName()2654<< "' to eliminate branch on phi. Cost: "2655<< DuplicationCost << " block is:" << *BB << "\n");26562657// Unless PredBB ends with an unconditional branch, split the edge so that we2658// can just clone the bits from BB into the end of the new PredBB.2659BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());26602661if (!OldPredBranch || !OldPredBranch->isUnconditional()) {2662BasicBlock *OldPredBB = PredBB;2663PredBB = SplitEdge(OldPredBB, BB);2664Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB});2665Updates.push_back({DominatorTree::Insert, PredBB, BB});2666Updates.push_back({DominatorTree::Delete, OldPredBB, BB});2667OldPredBranch = cast<BranchInst>(PredBB->getTerminator());2668}26692670// We are going to have to map operands from the original BB block into the2671// PredBB block. Evaluate PHI nodes in BB.2672ValueToValueMapTy ValueMapping;26732674BasicBlock::iterator BI = BB->begin();2675for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)2676ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);2677// Clone the non-phi instructions of BB into PredBB, keeping track of the2678// mapping and using it to remap operands in the cloned instructions.2679for (; BI != BB->end(); ++BI) {2680Instruction *New = BI->clone();2681New->insertInto(PredBB, OldPredBranch->getIterator());26822683// Remap operands to patch up intra-block references.2684for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)2685if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {2686ValueToValueMapTy::iterator I = ValueMapping.find(Inst);2687if (I != ValueMapping.end())2688New->setOperand(i, I->second);2689}26902691// Remap debug variable operands.2692remapDebugVariable(ValueMapping, New);26932694// If this instruction can be simplified after the operands are updated,2695// just use the simplified value instead. This frequently happens due to2696// phi translation.2697if (Value *IV = simplifyInstruction(2698New,2699{BB->getDataLayout(), TLI, nullptr, nullptr, New})) {2700ValueMapping[&*BI] = IV;2701if (!New->mayHaveSideEffects()) {2702New->eraseFromParent();2703New = nullptr;2704// Clone debug-info on the elided instruction to the destination2705// position.2706OldPredBranch->cloneDebugInfoFrom(&*BI, std::nullopt, true);2707}2708} else {2709ValueMapping[&*BI] = New;2710}2711if (New) {2712// Otherwise, insert the new instruction into the block.2713New->setName(BI->getName());2714// Clone across any debug-info attached to the old instruction.2715New->cloneDebugInfoFrom(&*BI);2716// Update Dominance from simplified New instruction operands.2717for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)2718if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))2719Updates.push_back({DominatorTree::Insert, PredBB, SuccBB});2720}2721}27222723// Check to see if the targets of the branch had PHI nodes. If so, we need to2724// add entries to the PHI nodes for branch from PredBB now.2725BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());2726addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,2727ValueMapping);2728addPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,2729ValueMapping);27302731updateSSA(BB, PredBB, ValueMapping);27322733// PredBB no longer jumps to BB, remove entries in the PHI node for the edge2734// that we nuked.2735BB->removePredecessor(PredBB, true);27362737// Remove the unconditional branch at the end of the PredBB block.2738OldPredBranch->eraseFromParent();2739if (auto *BPI = getBPI())2740BPI->copyEdgeProbabilities(BB, PredBB);2741DTU->applyUpdatesPermissive(Updates);27422743++NumDupes;2744return true;2745}27462747// Pred is a predecessor of BB with an unconditional branch to BB. SI is2748// a Select instruction in Pred. BB has other predecessors and SI is used in2749// a PHI node in BB. SI has no other use.2750// A new basic block, NewBB, is created and SI is converted to compare and2751// conditional branch. SI is erased from parent.2752void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,2753SelectInst *SI, PHINode *SIUse,2754unsigned Idx) {2755// Expand the select.2756//2757// Pred --2758// | v2759// | NewBB2760// | |2761// |-----2762// v2763// BB2764BranchInst *PredTerm = cast<BranchInst>(Pred->getTerminator());2765BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold",2766BB->getParent(), BB);2767// Move the unconditional branch to NewBB.2768PredTerm->removeFromParent();2769PredTerm->insertInto(NewBB, NewBB->end());2770// Create a conditional branch and update PHI nodes.2771auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred);2772BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc());2773BI->copyMetadata(*SI, {LLVMContext::MD_prof});2774SIUse->setIncomingValue(Idx, SI->getFalseValue());2775SIUse->addIncoming(SI->getTrueValue(), NewBB);27762777uint64_t TrueWeight = 1;2778uint64_t FalseWeight = 1;2779// Copy probabilities from 'SI' to created conditional branch in 'Pred'.2780if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&2781(TrueWeight + FalseWeight) != 0) {2782SmallVector<BranchProbability, 2> BP;2783BP.emplace_back(BranchProbability::getBranchProbability(2784TrueWeight, TrueWeight + FalseWeight));2785BP.emplace_back(BranchProbability::getBranchProbability(2786FalseWeight, TrueWeight + FalseWeight));2787// Update BPI if exists.2788if (auto *BPI = getBPI())2789BPI->setEdgeProbability(Pred, BP);2790}2791// Set the block frequency of NewBB.2792if (auto *BFI = getBFI()) {2793if ((TrueWeight + FalseWeight) == 0) {2794TrueWeight = 1;2795FalseWeight = 1;2796}2797BranchProbability PredToNewBBProb = BranchProbability::getBranchProbability(2798TrueWeight, TrueWeight + FalseWeight);2799auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;2800BFI->setBlockFreq(NewBB, NewBBFreq);2801}28022803// The select is now dead.2804SI->eraseFromParent();2805DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},2806{DominatorTree::Insert, Pred, NewBB}});28072808// Update any other PHI nodes in BB.2809for (BasicBlock::iterator BI = BB->begin();2810PHINode *Phi = dyn_cast<PHINode>(BI); ++BI)2811if (Phi != SIUse)2812Phi->addIncoming(Phi->getIncomingValueForBlock(Pred), NewBB);2813}28142815bool JumpThreadingPass::tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB) {2816PHINode *CondPHI = dyn_cast<PHINode>(SI->getCondition());28172818if (!CondPHI || CondPHI->getParent() != BB)2819return false;28202821for (unsigned I = 0, E = CondPHI->getNumIncomingValues(); I != E; ++I) {2822BasicBlock *Pred = CondPHI->getIncomingBlock(I);2823SelectInst *PredSI = dyn_cast<SelectInst>(CondPHI->getIncomingValue(I));28242825// The second and third condition can be potentially relaxed. Currently2826// the conditions help to simplify the code and allow us to reuse existing2827// code, developed for tryToUnfoldSelect(CmpInst *, BasicBlock *)2828if (!PredSI || PredSI->getParent() != Pred || !PredSI->hasOneUse())2829continue;28302831BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());2832if (!PredTerm || !PredTerm->isUnconditional())2833continue;28342835unfoldSelectInstr(Pred, BB, PredSI, CondPHI, I);2836return true;2837}2838return false;2839}28402841/// tryToUnfoldSelect - Look for blocks of the form2842/// bb1:2843/// %a = select2844/// br bb22845///2846/// bb2:2847/// %p = phi [%a, %bb1] ...2848/// %c = icmp %p2849/// br i1 %c2850///2851/// And expand the select into a branch structure if one of its arms allows %c2852/// to be folded. This later enables threading from bb1 over bb2.2853bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {2854BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());2855PHINode *CondLHS = dyn_cast<PHINode>(CondCmp->getOperand(0));2856Constant *CondRHS = cast<Constant>(CondCmp->getOperand(1));28572858if (!CondBr || !CondBr->isConditional() || !CondLHS ||2859CondLHS->getParent() != BB)2860return false;28612862for (unsigned I = 0, E = CondLHS->getNumIncomingValues(); I != E; ++I) {2863BasicBlock *Pred = CondLHS->getIncomingBlock(I);2864SelectInst *SI = dyn_cast<SelectInst>(CondLHS->getIncomingValue(I));28652866// Look if one of the incoming values is a select in the corresponding2867// predecessor.2868if (!SI || SI->getParent() != Pred || !SI->hasOneUse())2869continue;28702871BranchInst *PredTerm = dyn_cast<BranchInst>(Pred->getTerminator());2872if (!PredTerm || !PredTerm->isUnconditional())2873continue;28742875// Now check if one of the select values would allow us to constant fold the2876// terminator in BB. We don't do the transform if both sides fold, those2877// cases will be threaded in any case.2878Constant *LHSRes =2879LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),2880CondRHS, Pred, BB, CondCmp);2881Constant *RHSRes =2882LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),2883CondRHS, Pred, BB, CondCmp);2884if ((LHSRes || RHSRes) && LHSRes != RHSRes) {2885unfoldSelectInstr(Pred, BB, SI, CondLHS, I);2886return true;2887}2888}2889return false;2890}28912892/// tryToUnfoldSelectInCurrBB - Look for PHI/Select or PHI/CMP/Select in the2893/// same BB in the form2894/// bb:2895/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...2896/// %s = select %p, trueval, falseval2897///2898/// or2899///2900/// bb:2901/// %p = phi [0, %bb1], [1, %bb2], [0, %bb3], [1, %bb4], ...2902/// %c = cmp %p, 02903/// %s = select %c, trueval, falseval2904///2905/// And expand the select into a branch structure. This later enables2906/// jump-threading over bb in this pass.2907///2908/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold2909/// select if the associated PHI has at least one constant. If the unfolded2910/// select is not jump-threaded, it will be folded again in the later2911/// optimizations.2912bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {2913// This transform would reduce the quality of msan diagnostics.2914// Disable this transform under MemorySanitizer.2915if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))2916return false;29172918// If threading this would thread across a loop header, don't thread the edge.2919// See the comments above findLoopHeaders for justifications and caveats.2920if (LoopHeaders.count(BB))2921return false;29222923for (BasicBlock::iterator BI = BB->begin();2924PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {2925// Look for a Phi having at least one constant incoming value.2926if (llvm::all_of(PN->incoming_values(),2927[](Value *V) { return !isa<ConstantInt>(V); }))2928continue;29292930auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) {2931using namespace PatternMatch;29322933// Check if SI is in BB and use V as condition.2934if (SI->getParent() != BB)2935return false;2936Value *Cond = SI->getCondition();2937bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()));2938return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr;2939};29402941SelectInst *SI = nullptr;2942for (Use &U : PN->uses()) {2943if (ICmpInst *Cmp = dyn_cast<ICmpInst>(U.getUser())) {2944// Look for a ICmp in BB that compares PN with a constant and is the2945// condition of a Select.2946if (Cmp->getParent() == BB && Cmp->hasOneUse() &&2947isa<ConstantInt>(Cmp->getOperand(1 - U.getOperandNo())))2948if (SelectInst *SelectI = dyn_cast<SelectInst>(Cmp->user_back()))2949if (isUnfoldCandidate(SelectI, Cmp->use_begin()->get())) {2950SI = SelectI;2951break;2952}2953} else if (SelectInst *SelectI = dyn_cast<SelectInst>(U.getUser())) {2954// Look for a Select in BB that uses PN as condition.2955if (isUnfoldCandidate(SelectI, U.get())) {2956SI = SelectI;2957break;2958}2959}2960}29612962if (!SI)2963continue;2964// Expand the select.2965Value *Cond = SI->getCondition();2966if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))2967Cond = new FreezeInst(Cond, "cond.fr", SI->getIterator());2968MDNode *BranchWeights = getBranchWeightMDNode(*SI);2969Instruction *Term =2970SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);2971BasicBlock *SplitBB = SI->getParent();2972BasicBlock *NewBB = Term->getParent();2973PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI->getIterator());2974NewPN->addIncoming(SI->getTrueValue(), Term->getParent());2975NewPN->addIncoming(SI->getFalseValue(), BB);2976NewPN->setDebugLoc(SI->getDebugLoc());2977SI->replaceAllUsesWith(NewPN);2978SI->eraseFromParent();2979// NewBB and SplitBB are newly created blocks which require insertion.2980std::vector<DominatorTree::UpdateType> Updates;2981Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3);2982Updates.push_back({DominatorTree::Insert, BB, SplitBB});2983Updates.push_back({DominatorTree::Insert, BB, NewBB});2984Updates.push_back({DominatorTree::Insert, NewBB, SplitBB});2985// BB's successors were moved to SplitBB, update DTU accordingly.2986for (auto *Succ : successors(SplitBB)) {2987Updates.push_back({DominatorTree::Delete, BB, Succ});2988Updates.push_back({DominatorTree::Insert, SplitBB, Succ});2989}2990DTU->applyUpdatesPermissive(Updates);2991return true;2992}2993return false;2994}29952996/// Try to propagate a guard from the current BB into one of its predecessors2997/// in case if another branch of execution implies that the condition of this2998/// guard is always true. Currently we only process the simplest case that2999/// looks like:3000///3001/// Start:3002/// %cond = ...3003/// br i1 %cond, label %T1, label %F13004/// T1:3005/// br label %Merge3006/// F1:3007/// br label %Merge3008/// Merge:3009/// %condGuard = ...3010/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]3011///3012/// And cond either implies condGuard or !condGuard. In this case all the3013/// instructions before the guard can be duplicated in both branches, and the3014/// guard is then threaded to one of them.3015bool JumpThreadingPass::processGuards(BasicBlock *BB) {3016using namespace PatternMatch;30173018// We only want to deal with two predecessors.3019BasicBlock *Pred1, *Pred2;3020auto PI = pred_begin(BB), PE = pred_end(BB);3021if (PI == PE)3022return false;3023Pred1 = *PI++;3024if (PI == PE)3025return false;3026Pred2 = *PI++;3027if (PI != PE)3028return false;3029if (Pred1 == Pred2)3030return false;30313032// Try to thread one of the guards of the block.3033// TODO: Look up deeper than to immediate predecessor?3034auto *Parent = Pred1->getSinglePredecessor();3035if (!Parent || Parent != Pred2->getSinglePredecessor())3036return false;30373038if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))3039for (auto &I : *BB)3040if (isGuard(&I) && threadGuard(BB, cast<IntrinsicInst>(&I), BI))3041return true;30423043return false;3044}30453046/// Try to propagate the guard from BB which is the lower block of a diamond3047/// to one of its branches, in case if diamond's condition implies guard's3048/// condition.3049bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,3050BranchInst *BI) {3051assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");3052assert(BI->isConditional() && "Unconditional branch has 2 successors?");3053Value *GuardCond = Guard->getArgOperand(0);3054Value *BranchCond = BI->getCondition();3055BasicBlock *TrueDest = BI->getSuccessor(0);3056BasicBlock *FalseDest = BI->getSuccessor(1);30573058auto &DL = BB->getDataLayout();3059bool TrueDestIsSafe = false;3060bool FalseDestIsSafe = false;30613062// True dest is safe if BranchCond => GuardCond.3063auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);3064if (Impl && *Impl)3065TrueDestIsSafe = true;3066else {3067// False dest is safe if !BranchCond => GuardCond.3068Impl = isImpliedCondition(BranchCond, GuardCond, DL, /* LHSIsTrue */ false);3069if (Impl && *Impl)3070FalseDestIsSafe = true;3071}30723073if (!TrueDestIsSafe && !FalseDestIsSafe)3074return false;30753076BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;3077BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;30783079ValueToValueMapTy UnguardedMapping, GuardedMapping;3080Instruction *AfterGuard = Guard->getNextNode();3081unsigned Cost =3082getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);3083if (Cost > BBDupThreshold)3084return false;3085// Duplicate all instructions before the guard and the guard itself to the3086// branch where implication is not proved.3087BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween(3088BB, PredGuardedBlock, AfterGuard, GuardedMapping, *DTU);3089assert(GuardedBlock && "Could not create the guarded block?");3090// Duplicate all instructions before the guard in the unguarded branch.3091// Since we have successfully duplicated the guarded block and this block3092// has fewer instructions, we expect it to succeed.3093BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween(3094BB, PredUnguardedBlock, Guard, UnguardedMapping, *DTU);3095assert(UnguardedBlock && "Could not create the unguarded block?");3096LLVM_DEBUG(dbgs() << "Moved guard " << *Guard << " to block "3097<< GuardedBlock->getName() << "\n");3098// Some instructions before the guard may still have uses. For them, we need3099// to create Phi nodes merging their copies in both guarded and unguarded3100// branches. Those instructions that have no uses can be just removed.3101SmallVector<Instruction *, 4> ToRemove;3102for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)3103if (!isa<PHINode>(&*BI))3104ToRemove.push_back(&*BI);31053106BasicBlock::iterator InsertionPoint = BB->getFirstInsertionPt();3107assert(InsertionPoint != BB->end() && "Empty block?");3108// Substitute with Phis & remove.3109for (auto *Inst : reverse(ToRemove)) {3110if (!Inst->use_empty()) {3111PHINode *NewPN = PHINode::Create(Inst->getType(), 2);3112NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);3113NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);3114NewPN->setDebugLoc(Inst->getDebugLoc());3115NewPN->insertBefore(InsertionPoint);3116Inst->replaceAllUsesWith(NewPN);3117}3118Inst->dropDbgRecords();3119Inst->eraseFromParent();3120}3121return true;3122}31233124PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {3125PreservedAnalyses PA;3126PA.preserve<LazyValueAnalysis>();3127PA.preserve<DominatorTreeAnalysis>();31283129// TODO: We would like to preserve BPI/BFI. Enable once all paths update them.3130// TODO: Would be nice to verify BPI/BFI consistency as well.3131return PA;3132}31333134template <typename AnalysisT>3135typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {3136assert(FAM && "Can't run external analysis without FunctionAnalysisManager");31373138// If there were no changes since last call to 'runExternalAnalysis' then all3139// analysis is either up to date or explicitly invalidated. Just go ahead and3140// run the "external" analysis.3141if (!ChangedSinceLastAnalysisUpdate) {3142assert(!DTU->hasPendingUpdates() &&3143"Lost update of 'ChangedSinceLastAnalysisUpdate'?");3144// Run the "external" analysis.3145return &FAM->getResult<AnalysisT>(*F);3146}3147ChangedSinceLastAnalysisUpdate = false;31483149auto PA = getPreservedAnalysis();3150// TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI3151// as preserved.3152PA.preserve<BranchProbabilityAnalysis>();3153PA.preserve<BlockFrequencyAnalysis>();3154// Report everything except explicitly preserved as invalid.3155FAM->invalidate(*F, PA);3156// Update DT/PDT.3157DTU->flush();3158// Make sure DT/PDT are valid before running "external" analysis.3159assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));3160assert((!DTU->hasPostDomTree() ||3161DTU->getPostDomTree().verify(3162PostDominatorTree::VerificationLevel::Fast)));3163// Run the "external" analysis.3164auto *Result = &FAM->getResult<AnalysisT>(*F);3165// Update analysis JumpThreading depends on and not explicitly preserved.3166TTI = &FAM->getResult<TargetIRAnalysis>(*F);3167TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);3168AA = &FAM->getResult<AAManager>(*F);31693170return Result;3171}31723173BranchProbabilityInfo *JumpThreadingPass::getBPI() {3174if (!BPI) {3175assert(FAM && "Can't create BPI without FunctionAnalysisManager");3176BPI = FAM->getCachedResult<BranchProbabilityAnalysis>(*F);3177}3178return *BPI;3179}31803181BlockFrequencyInfo *JumpThreadingPass::getBFI() {3182if (!BFI) {3183assert(FAM && "Can't create BFI without FunctionAnalysisManager");3184BFI = FAM->getCachedResult<BlockFrequencyAnalysis>(*F);3185}3186return *BFI;3187}31883189// Important note on validity of BPI/BFI. JumpThreading tries to preserve3190// BPI/BFI as it goes. Thus if cached instance exists it will be updated.3191// Otherwise, new instance of BPI/BFI is created (up to date by definition).3192BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {3193auto *Res = getBPI();3194if (Res)3195return Res;31963197if (Force)3198BPI = runExternalAnalysis<BranchProbabilityAnalysis>();31993200return *BPI;3201}32023203BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {3204auto *Res = getBFI();3205if (Res)3206return Res;32073208if (Force)3209BFI = runExternalAnalysis<BlockFrequencyAnalysis>();32103211return *BFI;3212}321332143215