Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
35233 views
//===- BranchFolding.cpp - Fold machine code branch instructions ----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass forwards branches to unconditional branches to make them branch9// directly to the target block. This pass often results in dead MBB's, which10// it then removes.11//12// Note that this pass must be run after register allocation, it cannot handle13// SSA form. It also must handle virtual registers for targets that emit virtual14// ISA (e.g. NVPTX).15//16//===----------------------------------------------------------------------===//1718#include "BranchFolding.h"19#include "llvm/ADT/BitVector.h"20#include "llvm/ADT/STLExtras.h"21#include "llvm/ADT/SmallSet.h"22#include "llvm/ADT/SmallVector.h"23#include "llvm/ADT/Statistic.h"24#include "llvm/Analysis/ProfileSummaryInfo.h"25#include "llvm/CodeGen/Analysis.h"26#include "llvm/CodeGen/MBFIWrapper.h"27#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"28#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"29#include "llvm/CodeGen/MachineFunction.h"30#include "llvm/CodeGen/MachineFunctionPass.h"31#include "llvm/CodeGen/MachineInstr.h"32#include "llvm/CodeGen/MachineInstrBuilder.h"33#include "llvm/CodeGen/MachineJumpTableInfo.h"34#include "llvm/CodeGen/MachineLoopInfo.h"35#include "llvm/CodeGen/MachineOperand.h"36#include "llvm/CodeGen/MachineRegisterInfo.h"37#include "llvm/CodeGen/MachineSizeOpts.h"38#include "llvm/CodeGen/TargetInstrInfo.h"39#include "llvm/CodeGen/TargetOpcodes.h"40#include "llvm/CodeGen/TargetPassConfig.h"41#include "llvm/CodeGen/TargetRegisterInfo.h"42#include "llvm/CodeGen/TargetSubtargetInfo.h"43#include "llvm/IR/DebugInfoMetadata.h"44#include "llvm/IR/DebugLoc.h"45#include "llvm/IR/Function.h"46#include "llvm/InitializePasses.h"47#include "llvm/MC/LaneBitmask.h"48#include "llvm/MC/MCRegisterInfo.h"49#include "llvm/Pass.h"50#include "llvm/Support/BlockFrequency.h"51#include "llvm/Support/BranchProbability.h"52#include "llvm/Support/CommandLine.h"53#include "llvm/Support/Debug.h"54#include "llvm/Support/ErrorHandling.h"55#include "llvm/Support/raw_ostream.h"56#include "llvm/Target/TargetMachine.h"57#include <cassert>58#include <cstddef>59#include <iterator>60#include <numeric>6162using namespace llvm;6364#define DEBUG_TYPE "branch-folder"6566STATISTIC(NumDeadBlocks, "Number of dead blocks removed");67STATISTIC(NumBranchOpts, "Number of branches optimized");68STATISTIC(NumTailMerge , "Number of block tails merged");69STATISTIC(NumHoist , "Number of times common instructions are hoisted");70STATISTIC(NumTailCalls, "Number of tail calls optimized");7172static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",73cl::init(cl::BOU_UNSET), cl::Hidden);7475// Throttle for huge numbers of predecessors (compile speed problems)76static cl::opt<unsigned>77TailMergeThreshold("tail-merge-threshold",78cl::desc("Max number of predecessors to consider tail merging"),79cl::init(150), cl::Hidden);8081// Heuristic for tail merging (and, inversely, tail duplication).82static cl::opt<unsigned>83TailMergeSize("tail-merge-size",84cl::desc("Min number of instructions to consider tail merging"),85cl::init(3), cl::Hidden);8687namespace {8889/// BranchFolderPass - Wrap branch folder in a machine function pass.90class BranchFolderPass : public MachineFunctionPass {91public:92static char ID;9394explicit BranchFolderPass(): MachineFunctionPass(ID) {}9596bool runOnMachineFunction(MachineFunction &MF) override;9798void getAnalysisUsage(AnalysisUsage &AU) const override {99AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();100AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();101AU.addRequired<ProfileSummaryInfoWrapperPass>();102AU.addRequired<TargetPassConfig>();103MachineFunctionPass::getAnalysisUsage(AU);104}105106MachineFunctionProperties getRequiredProperties() const override {107return MachineFunctionProperties().set(108MachineFunctionProperties::Property::NoPHIs);109}110};111112} // end anonymous namespace113114char BranchFolderPass::ID = 0;115116char &llvm::BranchFolderPassID = BranchFolderPass::ID;117118INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE,119"Control Flow Optimizer", false, false)120121bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {122if (skipFunction(MF.getFunction()))123return false;124125TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();126// TailMerge can create jump into if branches that make CFG irreducible for127// HW that requires structurized CFG.128bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&129PassConfig->getEnableTailMerge();130MBFIWrapper MBBFreqInfo(131getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());132BranchFolder Folder(133EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,134getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(),135&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());136return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),137MF.getSubtarget().getRegisterInfo());138}139140BranchFolder::BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist,141MBFIWrapper &FreqInfo,142const MachineBranchProbabilityInfo &ProbInfo,143ProfileSummaryInfo *PSI, unsigned MinTailLength)144: EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),145MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {146switch (FlagEnableTailMerge) {147case cl::BOU_UNSET:148EnableTailMerge = DefaultEnableTailMerge;149break;150case cl::BOU_TRUE: EnableTailMerge = true; break;151case cl::BOU_FALSE: EnableTailMerge = false; break;152}153}154155void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {156assert(MBB->pred_empty() && "MBB must be dead!");157LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);158159MachineFunction *MF = MBB->getParent();160// drop all successors.161while (!MBB->succ_empty())162MBB->removeSuccessor(MBB->succ_end()-1);163164// Avoid matching if this pointer gets reused.165TriedMerging.erase(MBB);166167// Update call site info.168for (const MachineInstr &MI : *MBB)169if (MI.shouldUpdateCallSiteInfo())170MF->eraseCallSiteInfo(&MI);171172// Remove the block.173MF->erase(MBB);174EHScopeMembership.erase(MBB);175if (MLI)176MLI->removeBlock(MBB);177}178179bool BranchFolder::OptimizeFunction(MachineFunction &MF,180const TargetInstrInfo *tii,181const TargetRegisterInfo *tri,182MachineLoopInfo *mli, bool AfterPlacement) {183if (!tii) return false;184185TriedMerging.clear();186187MachineRegisterInfo &MRI = MF.getRegInfo();188AfterBlockPlacement = AfterPlacement;189TII = tii;190TRI = tri;191MLI = mli;192this->MRI = &MRI;193194if (MinCommonTailLength == 0) {195MinCommonTailLength = TailMergeSize.getNumOccurrences() > 0196? TailMergeSize197: TII->getTailMergeSize(MF);198}199200UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF);201if (!UpdateLiveIns)202MRI.invalidateLiveness();203204bool MadeChange = false;205206// Recalculate EH scope membership.207EHScopeMembership = getEHScopeMembership(MF);208209bool MadeChangeThisIteration = true;210while (MadeChangeThisIteration) {211MadeChangeThisIteration = TailMergeBlocks(MF);212// No need to clean up if tail merging does not change anything after the213// block placement.214if (!AfterBlockPlacement || MadeChangeThisIteration)215MadeChangeThisIteration |= OptimizeBranches(MF);216if (EnableHoistCommonCode)217MadeChangeThisIteration |= HoistCommonCode(MF);218MadeChange |= MadeChangeThisIteration;219}220221// See if any jump tables have become dead as the code generator222// did its thing.223MachineJumpTableInfo *JTI = MF.getJumpTableInfo();224if (!JTI)225return MadeChange;226227// Walk the function to find jump tables that are live.228BitVector JTIsLive(JTI->getJumpTables().size());229for (const MachineBasicBlock &BB : MF) {230for (const MachineInstr &I : BB)231for (const MachineOperand &Op : I.operands()) {232if (!Op.isJTI()) continue;233234// Remember that this JT is live.235JTIsLive.set(Op.getIndex());236}237}238239// Finally, remove dead jump tables. This happens when the240// indirect jump was unreachable (and thus deleted).241for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)242if (!JTIsLive.test(i)) {243JTI->RemoveJumpTable(i);244MadeChange = true;245}246247return MadeChange;248}249250//===----------------------------------------------------------------------===//251// Tail Merging of Blocks252//===----------------------------------------------------------------------===//253254/// HashMachineInstr - Compute a hash value for MI and its operands.255static unsigned HashMachineInstr(const MachineInstr &MI) {256unsigned Hash = MI.getOpcode();257for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {258const MachineOperand &Op = MI.getOperand(i);259260// Merge in bits from the operand if easy. We can't use MachineOperand's261// hash_code here because it's not deterministic and we sort by hash value262// later.263unsigned OperandHash = 0;264switch (Op.getType()) {265case MachineOperand::MO_Register:266OperandHash = Op.getReg();267break;268case MachineOperand::MO_Immediate:269OperandHash = Op.getImm();270break;271case MachineOperand::MO_MachineBasicBlock:272OperandHash = Op.getMBB()->getNumber();273break;274case MachineOperand::MO_FrameIndex:275case MachineOperand::MO_ConstantPoolIndex:276case MachineOperand::MO_JumpTableIndex:277OperandHash = Op.getIndex();278break;279case MachineOperand::MO_GlobalAddress:280case MachineOperand::MO_ExternalSymbol:281// Global address / external symbol are too hard, don't bother, but do282// pull in the offset.283OperandHash = Op.getOffset();284break;285default:286break;287}288289Hash += ((OperandHash << 3) | Op.getType()) << (i & 31);290}291return Hash;292}293294/// HashEndOfMBB - Hash the last instruction in the MBB.295static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {296MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(false);297if (I == MBB.end())298return 0;299300return HashMachineInstr(*I);301}302303/// Whether MI should be counted as an instruction when calculating common tail.304static bool countsAsInstruction(const MachineInstr &MI) {305return !(MI.isDebugInstr() || MI.isCFIInstruction());306}307308/// Iterate backwards from the given iterator \p I, towards the beginning of the309/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator310/// pointing to that MI. If no such MI is found, return the end iterator.311static MachineBasicBlock::iterator312skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,313MachineBasicBlock *MBB) {314while (I != MBB->begin()) {315--I;316if (countsAsInstruction(*I))317return I;318}319return MBB->end();320}321322/// Given two machine basic blocks, return the number of instructions they323/// actually have in common together at their end. If a common tail is found (at324/// least by one instruction), then iterators for the first shared instruction325/// in each block are returned as well.326///327/// Non-instructions according to countsAsInstruction are ignored.328static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,329MachineBasicBlock *MBB2,330MachineBasicBlock::iterator &I1,331MachineBasicBlock::iterator &I2) {332MachineBasicBlock::iterator MBBI1 = MBB1->end();333MachineBasicBlock::iterator MBBI2 = MBB2->end();334335unsigned TailLen = 0;336while (true) {337MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);338MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);339if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())340break;341if (!MBBI1->isIdenticalTo(*MBBI2) ||342// FIXME: This check is dubious. It's used to get around a problem where343// people incorrectly expect inline asm directives to remain in the same344// relative order. This is untenable because normal compiler345// optimizations (like this one) may reorder and/or merge these346// directives.347MBBI1->isInlineAsm()) {348break;349}350if (MBBI1->getFlag(MachineInstr::NoMerge) ||351MBBI2->getFlag(MachineInstr::NoMerge))352break;353++TailLen;354I1 = MBBI1;355I2 = MBBI2;356}357358return TailLen;359}360361void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,362MachineBasicBlock &NewDest) {363if (UpdateLiveIns) {364// OldInst should always point to an instruction.365MachineBasicBlock &OldMBB = *OldInst->getParent();366LiveRegs.clear();367LiveRegs.addLiveOuts(OldMBB);368// Move backward to the place where will insert the jump.369MachineBasicBlock::iterator I = OldMBB.end();370do {371--I;372LiveRegs.stepBackward(*I);373} while (I != OldInst);374375// Merging the tails may have switched some undef operand to non-undef ones.376// Add IMPLICIT_DEFS into OldMBB as necessary to have a definition of the377// register.378for (MachineBasicBlock::RegisterMaskPair P : NewDest.liveins()) {379// We computed the liveins with computeLiveIn earlier and should only see380// full registers:381assert(P.LaneMask == LaneBitmask::getAll() &&382"Can only handle full register.");383MCPhysReg Reg = P.PhysReg;384if (!LiveRegs.available(*MRI, Reg))385continue;386DebugLoc DL;387BuildMI(OldMBB, OldInst, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Reg);388}389}390391TII->ReplaceTailWithBranchTo(OldInst, &NewDest);392++NumTailMerge;393}394395MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,396MachineBasicBlock::iterator BBI1,397const BasicBlock *BB) {398if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))399return nullptr;400401MachineFunction &MF = *CurMBB.getParent();402403// Create the fall-through block.404MachineFunction::iterator MBBI = CurMBB.getIterator();405MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB);406CurMBB.getParent()->insert(++MBBI, NewMBB);407408// Move all the successors of this block to the specified block.409NewMBB->transferSuccessors(&CurMBB);410411// Add an edge from CurMBB to NewMBB for the fall-through.412CurMBB.addSuccessor(NewMBB);413414// Splice the code over.415NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());416417// NewMBB belongs to the same loop as CurMBB.418if (MLI)419if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))420ML->addBasicBlockToLoop(NewMBB, *MLI);421422// NewMBB inherits CurMBB's block frequency.423MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));424425if (UpdateLiveIns)426computeAndAddLiveIns(LiveRegs, *NewMBB);427428// Add the new block to the EH scope.429const auto &EHScopeI = EHScopeMembership.find(&CurMBB);430if (EHScopeI != EHScopeMembership.end()) {431auto n = EHScopeI->second;432EHScopeMembership[NewMBB] = n;433}434435return NewMBB;436}437438/// EstimateRuntime - Make a rough estimate for how long it will take to run439/// the specified code.440static unsigned EstimateRuntime(MachineBasicBlock::iterator I,441MachineBasicBlock::iterator E) {442unsigned Time = 0;443for (; I != E; ++I) {444if (!countsAsInstruction(*I))445continue;446if (I->isCall())447Time += 10;448else if (I->mayLoadOrStore())449Time += 2;450else451++Time;452}453return Time;454}455456// CurMBB needs to add an unconditional branch to SuccMBB (we removed these457// branches temporarily for tail merging). In the case where CurMBB ends458// with a conditional branch to the next block, optimize by reversing the459// test and conditionally branching to SuccMBB instead.460static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,461const TargetInstrInfo *TII, const DebugLoc &BranchDL) {462MachineFunction *MF = CurMBB->getParent();463MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));464MachineBasicBlock *TBB = nullptr, *FBB = nullptr;465SmallVector<MachineOperand, 4> Cond;466DebugLoc dl = CurMBB->findBranchDebugLoc();467if (!dl)468dl = BranchDL;469if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {470MachineBasicBlock *NextBB = &*I;471if (TBB == NextBB && !Cond.empty() && !FBB) {472if (!TII->reverseBranchCondition(Cond)) {473TII->removeBranch(*CurMBB);474TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);475return;476}477}478}479TII->insertBranch(*CurMBB, SuccBB, nullptr,480SmallVector<MachineOperand, 0>(), dl);481}482483bool484BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {485if (getHash() < o.getHash())486return true;487if (getHash() > o.getHash())488return false;489if (getBlock()->getNumber() < o.getBlock()->getNumber())490return true;491if (getBlock()->getNumber() > o.getBlock()->getNumber())492return false;493return false;494}495496/// CountTerminators - Count the number of terminators in the given497/// block and set I to the position of the first non-terminator, if there498/// is one, or MBB->end() otherwise.499static unsigned CountTerminators(MachineBasicBlock *MBB,500MachineBasicBlock::iterator &I) {501I = MBB->end();502unsigned NumTerms = 0;503while (true) {504if (I == MBB->begin()) {505I = MBB->end();506break;507}508--I;509if (!I->isTerminator()) break;510++NumTerms;511}512return NumTerms;513}514515/// A no successor, non-return block probably ends in unreachable and is cold.516/// Also consider a block that ends in an indirect branch to be a return block,517/// since many targets use plain indirect branches to return.518static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) {519if (!MBB->succ_empty())520return false;521if (MBB->empty())522return true;523return !(MBB->back().isReturn() || MBB->back().isIndirectBranch());524}525526/// ProfitableToMerge - Check if two machine basic blocks have a common tail527/// and decide if it would be profitable to merge those tails. Return the528/// length of the common tail and iterators to the first common instruction529/// in each block.530/// MBB1, MBB2 The blocks to check531/// MinCommonTailLength Minimum size of tail block to be merged.532/// CommonTailLen Out parameter to record the size of the shared tail between533/// MBB1 and MBB2534/// I1, I2 Iterator references that will be changed to point to the first535/// instruction in the common tail shared by MBB1,MBB2536/// SuccBB A common successor of MBB1, MBB2 which are in a canonical form537/// relative to SuccBB538/// PredBB The layout predecessor of SuccBB, if any.539/// EHScopeMembership map from block to EH scope #.540/// AfterPlacement True if we are merging blocks after layout. Stricter541/// thresholds apply to prevent undoing tail-duplication.542static bool543ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,544unsigned MinCommonTailLength, unsigned &CommonTailLen,545MachineBasicBlock::iterator &I1,546MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,547MachineBasicBlock *PredBB,548DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,549bool AfterPlacement,550MBFIWrapper &MBBFreqInfo,551ProfileSummaryInfo *PSI) {552// It is never profitable to tail-merge blocks from two different EH scopes.553if (!EHScopeMembership.empty()) {554auto EHScope1 = EHScopeMembership.find(MBB1);555assert(EHScope1 != EHScopeMembership.end());556auto EHScope2 = EHScopeMembership.find(MBB2);557assert(EHScope2 != EHScopeMembership.end());558if (EHScope1->second != EHScope2->second)559return false;560}561562CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);563if (CommonTailLen == 0)564return false;565LLVM_DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1)566<< " and " << printMBBReference(*MBB2) << " is "567<< CommonTailLen << '\n');568569// Move the iterators to the beginning of the MBB if we only got debug570// instructions before the tail. This is to avoid splitting a block when we571// only got debug instructions before the tail (to be invariant on -g).572if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end(), false) == I1)573I1 = MBB1->begin();574if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end(), false) == I2)575I2 = MBB2->begin();576577bool FullBlockTail1 = I1 == MBB1->begin();578bool FullBlockTail2 = I2 == MBB2->begin();579580// It's almost always profitable to merge any number of non-terminator581// instructions with the block that falls through into the common successor.582// This is true only for a single successor. For multiple successors, we are583// trading a conditional branch for an unconditional one.584// TODO: Re-visit successor size for non-layout tail merging.585if ((MBB1 == PredBB || MBB2 == PredBB) &&586(!AfterPlacement || MBB1->succ_size() == 1)) {587MachineBasicBlock::iterator I;588unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);589if (CommonTailLen > NumTerms)590return true;591}592593// If these are identical non-return blocks with no successors, merge them.594// Such blocks are typically cold calls to noreturn functions like abort, and595// are unlikely to become a fallthrough target after machine block placement.596// Tail merging these blocks is unlikely to create additional unconditional597// branches, and will reduce the size of this cold code.598if (FullBlockTail1 && FullBlockTail2 &&599blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))600return true;601602// If one of the blocks can be completely merged and happens to be in603// a position where the other could fall through into it, merge any number604// of instructions, because it can be done without a branch.605// TODO: If the blocks are not adjacent, move one of them so that they are?606if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2)607return true;608if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1)609return true;610611// If both blocks are identical and end in a branch, merge them unless they612// both have a fallthrough predecessor and successor.613// We can only do this after block placement because it depends on whether614// there are fallthroughs, and we don't know until after layout.615if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {616auto BothFallThrough = [](MachineBasicBlock *MBB) {617if (!MBB->succ_empty() && !MBB->canFallThrough())618return false;619MachineFunction::iterator I(MBB);620MachineFunction *MF = MBB->getParent();621return (MBB != &*MF->begin()) && std::prev(I)->canFallThrough();622};623if (!BothFallThrough(MBB1) || !BothFallThrough(MBB2))624return true;625}626627// If both blocks have an unconditional branch temporarily stripped out,628// count that as an additional common instruction for the following629// heuristics. This heuristic is only accurate for single-succ blocks, so to630// make sure that during layout merging and duplicating don't crash, we check631// for that when merging during layout.632unsigned EffectiveTailLen = CommonTailLen;633if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&634(MBB1->succ_size() == 1 || !AfterPlacement) &&635!MBB1->back().isBarrier() &&636!MBB2->back().isBarrier())637++EffectiveTailLen;638639// Check if the common tail is long enough to be worthwhile.640if (EffectiveTailLen >= MinCommonTailLength)641return true;642643// If we are optimizing for code size, 2 instructions in common is enough if644// we don't have to split a block. At worst we will be introducing 1 new645// branch instruction, which is likely to be smaller than the 2646// instructions that would be deleted in the merge.647MachineFunction *MF = MBB1->getParent();648bool OptForSize =649MF->getFunction().hasOptSize() ||650(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&651llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo));652return EffectiveTailLen >= 2 && OptForSize &&653(FullBlockTail1 || FullBlockTail2);654}655656unsigned BranchFolder::ComputeSameTails(unsigned CurHash,657unsigned MinCommonTailLength,658MachineBasicBlock *SuccBB,659MachineBasicBlock *PredBB) {660unsigned maxCommonTailLength = 0U;661SameTails.clear();662MachineBasicBlock::iterator TrialBBI1, TrialBBI2;663MPIterator HighestMPIter = std::prev(MergePotentials.end());664for (MPIterator CurMPIter = std::prev(MergePotentials.end()),665B = MergePotentials.begin();666CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) {667for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) {668unsigned CommonTailLen;669if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),670MinCommonTailLength,671CommonTailLen, TrialBBI1, TrialBBI2,672SuccBB, PredBB,673EHScopeMembership,674AfterBlockPlacement, MBBFreqInfo, PSI)) {675if (CommonTailLen > maxCommonTailLength) {676SameTails.clear();677maxCommonTailLength = CommonTailLen;678HighestMPIter = CurMPIter;679SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));680}681if (HighestMPIter == CurMPIter &&682CommonTailLen == maxCommonTailLength)683SameTails.push_back(SameTailElt(I, TrialBBI2));684}685if (I == B)686break;687}688}689return maxCommonTailLength;690}691692void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,693MachineBasicBlock *SuccBB,694MachineBasicBlock *PredBB,695const DebugLoc &BranchDL) {696MPIterator CurMPIter, B;697for (CurMPIter = std::prev(MergePotentials.end()),698B = MergePotentials.begin();699CurMPIter->getHash() == CurHash; --CurMPIter) {700// Put the unconditional branch back, if we need one.701MachineBasicBlock *CurMBB = CurMPIter->getBlock();702if (SuccBB && CurMBB != PredBB)703FixTail(CurMBB, SuccBB, TII, BranchDL);704if (CurMPIter == B)705break;706}707if (CurMPIter->getHash() != CurHash)708CurMPIter++;709MergePotentials.erase(CurMPIter, MergePotentials.end());710}711712bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,713MachineBasicBlock *SuccBB,714unsigned maxCommonTailLength,715unsigned &commonTailIndex) {716commonTailIndex = 0;717unsigned TimeEstimate = ~0U;718for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {719// Use PredBB if possible; that doesn't require a new branch.720if (SameTails[i].getBlock() == PredBB) {721commonTailIndex = i;722break;723}724// Otherwise, make a (fairly bogus) choice based on estimate of725// how long it will take the various blocks to execute.726unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),727SameTails[i].getTailStartPos());728if (t <= TimeEstimate) {729TimeEstimate = t;730commonTailIndex = i;731}732}733734MachineBasicBlock::iterator BBI =735SameTails[commonTailIndex].getTailStartPos();736MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();737738LLVM_DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size "739<< maxCommonTailLength);740741// If the split block unconditionally falls-thru to SuccBB, it will be742// merged. In control flow terms it should then take SuccBB's name. e.g. If743// SuccBB is an inner loop, the common tail is still part of the inner loop.744const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ?745SuccBB->getBasicBlock() : MBB->getBasicBlock();746MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB);747if (!newMBB) {748LLVM_DEBUG(dbgs() << "... failed!");749return false;750}751752SameTails[commonTailIndex].setBlock(newMBB);753SameTails[commonTailIndex].setTailStartPos(newMBB->begin());754755// If we split PredBB, newMBB is the new predecessor.756if (PredBB == MBB)757PredBB = newMBB;758759return true;760}761762static void763mergeOperations(MachineBasicBlock::iterator MBBIStartPos,764MachineBasicBlock &MBBCommon) {765MachineBasicBlock *MBB = MBBIStartPos->getParent();766// Note CommonTailLen does not necessarily matches the size of767// the common BB nor all its instructions because of debug768// instructions differences.769unsigned CommonTailLen = 0;770for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)771++CommonTailLen;772773MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin();774MachineBasicBlock::reverse_iterator MBBIE = MBB->rend();775MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();776MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();777778while (CommonTailLen--) {779assert(MBBI != MBBIE && "Reached BB end within common tail length!");780(void)MBBIE;781782if (!countsAsInstruction(*MBBI)) {783++MBBI;784continue;785}786787while ((MBBICommon != MBBIECommon) && !countsAsInstruction(*MBBICommon))788++MBBICommon;789790assert(MBBICommon != MBBIECommon &&791"Reached BB end within common tail length!");792assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");793794// Merge MMOs from memory operations in the common block.795if (MBBICommon->mayLoadOrStore())796MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI});797// Drop undef flags if they aren't present in all merged instructions.798for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {799MachineOperand &MO = MBBICommon->getOperand(I);800if (MO.isReg() && MO.isUndef()) {801const MachineOperand &OtherMO = MBBI->getOperand(I);802if (!OtherMO.isUndef())803MO.setIsUndef(false);804}805}806807++MBBI;808++MBBICommon;809}810}811812void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {813MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();814815std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());816for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {817if (i != commonTailIndex) {818NextCommonInsts[i] = SameTails[i].getTailStartPos();819mergeOperations(SameTails[i].getTailStartPos(), *MBB);820} else {821assert(SameTails[i].getTailStartPos() == MBB->begin() &&822"MBB is not a common tail only block");823}824}825826for (auto &MI : *MBB) {827if (!countsAsInstruction(MI))828continue;829DebugLoc DL = MI.getDebugLoc();830for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {831if (i == commonTailIndex)832continue;833834auto &Pos = NextCommonInsts[i];835assert(Pos != SameTails[i].getBlock()->end() &&836"Reached BB end within common tail");837while (!countsAsInstruction(*Pos)) {838++Pos;839assert(Pos != SameTails[i].getBlock()->end() &&840"Reached BB end within common tail");841}842assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");843DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());844NextCommonInsts[i] = ++Pos;845}846MI.setDebugLoc(DL);847}848849if (UpdateLiveIns) {850LivePhysRegs NewLiveIns(*TRI);851computeLiveIns(NewLiveIns, *MBB);852LiveRegs.init(*TRI);853854// The flag merging may lead to some register uses no longer using the855// <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary.856for (MachineBasicBlock *Pred : MBB->predecessors()) {857LiveRegs.clear();858LiveRegs.addLiveOuts(*Pred);859MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator();860for (Register Reg : NewLiveIns) {861if (!LiveRegs.available(*MRI, Reg))862continue;863864// Skip the register if we are about to add one of its super registers.865// TODO: Common this up with the same logic in addLineIns().866if (any_of(TRI->superregs(Reg), [&](MCPhysReg SReg) {867return NewLiveIns.contains(SReg) && !MRI->isReserved(SReg);868}))869continue;870871DebugLoc DL;872BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF),873Reg);874}875}876877MBB->clearLiveIns();878addLiveIns(*MBB, NewLiveIns);879}880}881882// See if any of the blocks in MergePotentials (which all have SuccBB as a883// successor, or all have no successor if it is null) can be tail-merged.884// If there is a successor, any blocks in MergePotentials that are not885// tail-merged and are not immediately before Succ must have an unconditional886// branch to Succ added (but the predecessor/successor lists need no887// adjustment). The lone predecessor of Succ that falls through into Succ,888// if any, is given in PredBB.889// MinCommonTailLength - Except for the special cases below, tail-merge if890// there are at least this many instructions in common.891bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,892MachineBasicBlock *PredBB,893unsigned MinCommonTailLength) {894bool MadeChange = false;895896LLVM_DEBUG(897dbgs() << "\nTryTailMergeBlocks: ";898for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs()899<< printMBBReference(*MergePotentials[i].getBlock())900<< (i == e - 1 ? "" : ", ");901dbgs() << "\n"; if (SuccBB) {902dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n';903if (PredBB)904dbgs() << " which has fall-through from "905<< printMBBReference(*PredBB) << "\n";906} dbgs() << "Looking for common tails of at least "907<< MinCommonTailLength << " instruction"908<< (MinCommonTailLength == 1 ? "" : "s") << '\n';);909910// Sort by hash value so that blocks with identical end sequences sort911// together.912array_pod_sort(MergePotentials.begin(), MergePotentials.end());913914// Walk through equivalence sets looking for actual exact matches.915while (MergePotentials.size() > 1) {916unsigned CurHash = MergePotentials.back().getHash();917const DebugLoc &BranchDL = MergePotentials.back().getBranchDebugLoc();918919// Build SameTails, identifying the set of blocks with this hash code920// and with the maximum number of instructions in common.921unsigned maxCommonTailLength = ComputeSameTails(CurHash,922MinCommonTailLength,923SuccBB, PredBB);924925// If we didn't find any pair that has at least MinCommonTailLength926// instructions in common, remove all blocks with this hash code and retry.927if (SameTails.empty()) {928RemoveBlocksWithHash(CurHash, SuccBB, PredBB, BranchDL);929continue;930}931932// If one of the blocks is the entire common tail (and is not the entry933// block/an EH pad, which we can't jump to), we can treat all blocks with934// this same tail at once. Use PredBB if that is one of the possibilities,935// as that will not introduce any extra branches.936MachineBasicBlock *EntryBB =937&MergePotentials.front().getBlock()->getParent()->front();938unsigned commonTailIndex = SameTails.size();939// If there are two blocks, check to see if one can be made to fall through940// into the other.941if (SameTails.size() == 2 &&942SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&943SameTails[1].tailIsWholeBlock() && !SameTails[1].getBlock()->isEHPad())944commonTailIndex = 1;945else if (SameTails.size() == 2 &&946SameTails[1].getBlock()->isLayoutSuccessor(947SameTails[0].getBlock()) &&948SameTails[0].tailIsWholeBlock() &&949!SameTails[0].getBlock()->isEHPad())950commonTailIndex = 0;951else {952// Otherwise just pick one, favoring the fall-through predecessor if953// there is one.954for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {955MachineBasicBlock *MBB = SameTails[i].getBlock();956if ((MBB == EntryBB || MBB->isEHPad()) &&957SameTails[i].tailIsWholeBlock())958continue;959if (MBB == PredBB) {960commonTailIndex = i;961break;962}963if (SameTails[i].tailIsWholeBlock())964commonTailIndex = i;965}966}967968if (commonTailIndex == SameTails.size() ||969(SameTails[commonTailIndex].getBlock() == PredBB &&970!SameTails[commonTailIndex].tailIsWholeBlock())) {971// None of the blocks consist entirely of the common tail.972// Split a block so that one does.973if (!CreateCommonTailOnlyBlock(PredBB, SuccBB,974maxCommonTailLength, commonTailIndex)) {975RemoveBlocksWithHash(CurHash, SuccBB, PredBB, BranchDL);976continue;977}978}979980MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();981982// Recompute common tail MBB's edge weights and block frequency.983setCommonTailEdgeWeights(*MBB);984985// Merge debug locations, MMOs and undef flags across identical instructions986// for common tail.987mergeCommonTails(commonTailIndex);988989// MBB is common tail. Adjust all other BB's to jump to this one.990// Traversal must be forwards so erases work.991LLVM_DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB)992<< " for ");993for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {994if (commonTailIndex == i)995continue;996LLVM_DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock())997<< (i == e - 1 ? "" : ", "));998// Hack the end off BB i, making it jump to BB commonTailIndex instead.999replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB);1000// BB i is no longer a predecessor of SuccBB; remove it from the worklist.1001MergePotentials.erase(SameTails[i].getMPIter());1002}1003LLVM_DEBUG(dbgs() << "\n");1004// We leave commonTailIndex in the worklist in case there are other blocks1005// that match it with a smaller number of instructions.1006MadeChange = true;1007}1008return MadeChange;1009}10101011bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {1012bool MadeChange = false;1013if (!EnableTailMerge)1014return MadeChange;10151016// First find blocks with no successors.1017// Block placement may create new tail merging opportunities for these blocks.1018MergePotentials.clear();1019for (MachineBasicBlock &MBB : MF) {1020if (MergePotentials.size() == TailMergeThreshold)1021break;1022if (!TriedMerging.count(&MBB) && MBB.succ_empty())1023MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB,1024MBB.findBranchDebugLoc()));1025}10261027// If this is a large problem, avoid visiting the same basic blocks1028// multiple times.1029if (MergePotentials.size() == TailMergeThreshold)1030for (const MergePotentialsElt &Elt : MergePotentials)1031TriedMerging.insert(Elt.getBlock());10321033// See if we can do any tail merging on those.1034if (MergePotentials.size() >= 2)1035MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);10361037// Look at blocks (IBB) with multiple predecessors (PBB).1038// We change each predecessor to a canonical form, by1039// (1) temporarily removing any unconditional branch from the predecessor1040// to IBB, and1041// (2) alter conditional branches so they branch to the other block1042// not IBB; this may require adding back an unconditional branch to IBB1043// later, where there wasn't one coming in. E.g.1044// Bcc IBB1045// fallthrough to QBB1046// here becomes1047// Bncc QBB1048// with a conceptual B to IBB after that, which never actually exists.1049// With those changes, we see whether the predecessors' tails match,1050// and merge them if so. We change things out of canonical form and1051// back to the way they were later in the process. (OptimizeBranches1052// would undo some of this, but we can't use it, because we'd get into1053// a compile-time infinite loop repeatedly doing and undoing the same1054// transformations.)10551056for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();1057I != E; ++I) {1058if (I->pred_size() < 2) continue;1059SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;1060MachineBasicBlock *IBB = &*I;1061MachineBasicBlock *PredBB = &*std::prev(I);1062MergePotentials.clear();1063MachineLoop *ML;10641065// Bail if merging after placement and IBB is the loop header because1066// -- If merging predecessors that belong to the same loop as IBB, the1067// common tail of merged predecessors may become the loop top if block1068// placement is called again and the predecessors may branch to this common1069// tail and require more branches. This can be relaxed if1070// MachineBlockPlacement::findBestLoopTop is more flexible.1071// --If merging predecessors that do not belong to the same loop as IBB, the1072// loop info of IBB's loop and the other loops may be affected. Calling the1073// block placement again may make big change to the layout and eliminate the1074// reason to do tail merging here.1075if (AfterBlockPlacement && MLI) {1076ML = MLI->getLoopFor(IBB);1077if (ML && IBB == ML->getHeader())1078continue;1079}10801081for (MachineBasicBlock *PBB : I->predecessors()) {1082if (MergePotentials.size() == TailMergeThreshold)1083break;10841085if (TriedMerging.count(PBB))1086continue;10871088// Skip blocks that loop to themselves, can't tail merge these.1089if (PBB == IBB)1090continue;10911092// Visit each predecessor only once.1093if (!UniquePreds.insert(PBB).second)1094continue;10951096// Skip blocks which may jump to a landing pad or jump from an asm blob.1097// Can't tail merge these.1098if (PBB->hasEHPadSuccessor() || PBB->mayHaveInlineAsmBr())1099continue;11001101// After block placement, only consider predecessors that belong to the1102// same loop as IBB. The reason is the same as above when skipping loop1103// header.1104if (AfterBlockPlacement && MLI)1105if (ML != MLI->getLoopFor(PBB))1106continue;11071108MachineBasicBlock *TBB = nullptr, *FBB = nullptr;1109SmallVector<MachineOperand, 4> Cond;1110if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {1111// Failing case: IBB is the target of a cbr, and we cannot reverse the1112// branch.1113SmallVector<MachineOperand, 4> NewCond(Cond);1114if (!Cond.empty() && TBB == IBB) {1115if (TII->reverseBranchCondition(NewCond))1116continue;1117// This is the QBB case described above1118if (!FBB) {1119auto Next = ++PBB->getIterator();1120if (Next != MF.end())1121FBB = &*Next;1122}1123}11241125// Remove the unconditional branch at the end, if any.1126DebugLoc dl = PBB->findBranchDebugLoc();1127if (TBB && (Cond.empty() || FBB)) {1128TII->removeBranch(*PBB);1129if (!Cond.empty())1130// reinsert conditional branch only, for now1131TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,1132NewCond, dl);1133}11341135MergePotentials.push_back(1136MergePotentialsElt(HashEndOfMBB(*PBB), PBB, dl));1137}1138}11391140// If this is a large problem, avoid visiting the same basic blocks multiple1141// times.1142if (MergePotentials.size() == TailMergeThreshold)1143for (MergePotentialsElt &Elt : MergePotentials)1144TriedMerging.insert(Elt.getBlock());11451146if (MergePotentials.size() >= 2)1147MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength);11481149// Reinsert an unconditional branch if needed. The 1 below can occur as a1150// result of removing blocks in TryTailMergeBlocks.1151PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks1152if (MergePotentials.size() == 1 &&1153MergePotentials.begin()->getBlock() != PredBB)1154FixTail(MergePotentials.begin()->getBlock(), IBB, TII,1155MergePotentials.begin()->getBranchDebugLoc());1156}11571158return MadeChange;1159}11601161void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {1162SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size());1163BlockFrequency AccumulatedMBBFreq;11641165// Aggregate edge frequency of successor edge j:1166// edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)),1167// where bb is a basic block that is in SameTails.1168for (const auto &Src : SameTails) {1169const MachineBasicBlock *SrcMBB = Src.getBlock();1170BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB);1171AccumulatedMBBFreq += BlockFreq;11721173// It is not necessary to recompute edge weights if TailBB has less than two1174// successors.1175if (TailMBB.succ_size() <= 1)1176continue;11771178auto EdgeFreq = EdgeFreqLs.begin();11791180for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();1181SuccI != SuccE; ++SuccI, ++EdgeFreq)1182*EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI);1183}11841185MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq);11861187if (TailMBB.succ_size() <= 1)1188return;11891190auto SumEdgeFreq =1191std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0))1192.getFrequency();1193auto EdgeFreq = EdgeFreqLs.begin();11941195if (SumEdgeFreq > 0) {1196for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();1197SuccI != SuccE; ++SuccI, ++EdgeFreq) {1198auto Prob = BranchProbability::getBranchProbability(1199EdgeFreq->getFrequency(), SumEdgeFreq);1200TailMBB.setSuccProbability(SuccI, Prob);1201}1202}1203}12041205//===----------------------------------------------------------------------===//1206// Branch Optimization1207//===----------------------------------------------------------------------===//12081209bool BranchFolder::OptimizeBranches(MachineFunction &MF) {1210bool MadeChange = false;12111212// Make sure blocks are numbered in order1213MF.RenumberBlocks();1214// Renumbering blocks alters EH scope membership, recalculate it.1215EHScopeMembership = getEHScopeMembership(MF);12161217for (MachineBasicBlock &MBB :1218llvm::make_early_inc_range(llvm::drop_begin(MF))) {1219MadeChange |= OptimizeBlock(&MBB);12201221// If it is dead, remove it.1222if (MBB.pred_empty() && !MBB.isMachineBlockAddressTaken()) {1223RemoveDeadBlock(&MBB);1224MadeChange = true;1225++NumDeadBlocks;1226}1227}12281229return MadeChange;1230}12311232// Blocks should be considered empty if they contain only debug info;1233// else the debug info would affect codegen.1234static bool IsEmptyBlock(MachineBasicBlock *MBB) {1235return MBB->getFirstNonDebugInstr(true) == MBB->end();1236}12371238// Blocks with only debug info and branches should be considered the same1239// as blocks with only branches.1240static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {1241MachineBasicBlock::iterator I = MBB->getFirstNonDebugInstr();1242assert(I != MBB->end() && "empty block!");1243return I->isBranch();1244}12451246/// IsBetterFallthrough - Return true if it would be clearly better to1247/// fall-through to MBB1 than to fall through into MBB2. This has to return1248/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will1249/// result in infinite loops.1250static bool IsBetterFallthrough(MachineBasicBlock *MBB1,1251MachineBasicBlock *MBB2) {1252assert(MBB1 && MBB2 && "Unknown MachineBasicBlock");12531254// Right now, we use a simple heuristic. If MBB2 ends with a call, and1255// MBB1 doesn't, we prefer to fall through into MBB1. This allows us to1256// optimize branches that branch to either a return block or an assert block1257// into a fallthrough to the return.1258MachineBasicBlock::iterator MBB1I = MBB1->getLastNonDebugInstr();1259MachineBasicBlock::iterator MBB2I = MBB2->getLastNonDebugInstr();1260if (MBB1I == MBB1->end() || MBB2I == MBB2->end())1261return false;12621263// If there is a clear successor ordering we make sure that one block1264// will fall through to the next1265if (MBB1->isSuccessor(MBB2)) return true;1266if (MBB2->isSuccessor(MBB1)) return false;12671268return MBB2I->isCall() && !MBB1I->isCall();1269}12701271/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch1272/// instructions on the block.1273static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {1274MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();1275if (I != MBB.end() && I->isBranch())1276return I->getDebugLoc();1277return DebugLoc();1278}12791280static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII,1281MachineBasicBlock &MBB,1282MachineBasicBlock &PredMBB) {1283auto InsertBefore = PredMBB.getFirstTerminator();1284for (MachineInstr &MI : MBB.instrs())1285if (MI.isDebugInstr()) {1286TII->duplicate(PredMBB, InsertBefore, MI);1287LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to pred: "1288<< MI);1289}1290}12911292static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII,1293MachineBasicBlock &MBB,1294MachineBasicBlock &SuccMBB) {1295auto InsertBefore = SuccMBB.SkipPHIsAndLabels(SuccMBB.begin());1296for (MachineInstr &MI : MBB.instrs())1297if (MI.isDebugInstr()) {1298TII->duplicate(SuccMBB, InsertBefore, MI);1299LLVM_DEBUG(dbgs() << "Copied debug entity from empty block to succ: "1300<< MI);1301}1302}13031304// Try to salvage DBG_VALUE instructions from an otherwise empty block. If such1305// a basic block is removed we would lose the debug information unless we have1306// copied the information to a predecessor/successor.1307//1308// TODO: This function only handles some simple cases. An alternative would be1309// to run a heavier analysis, such as the LiveDebugValues pass, before we do1310// branch folding.1311static void salvageDebugInfoFromEmptyBlock(const TargetInstrInfo *TII,1312MachineBasicBlock &MBB) {1313assert(IsEmptyBlock(&MBB) && "Expected an empty block (except debug info).");1314// If this MBB is the only predecessor of a successor it is legal to copy1315// DBG_VALUE instructions to the beginning of the successor.1316for (MachineBasicBlock *SuccBB : MBB.successors())1317if (SuccBB->pred_size() == 1)1318copyDebugInfoToSuccessor(TII, MBB, *SuccBB);1319// If this MBB is the only successor of a predecessor it is legal to copy the1320// DBG_VALUE instructions to the end of the predecessor (just before the1321// terminators, assuming that the terminator isn't affecting the DBG_VALUE).1322for (MachineBasicBlock *PredBB : MBB.predecessors())1323if (PredBB->succ_size() == 1)1324copyDebugInfoToPredecessor(TII, MBB, *PredBB);1325}13261327bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {1328bool MadeChange = false;1329MachineFunction &MF = *MBB->getParent();1330ReoptimizeBlock:13311332MachineFunction::iterator FallThrough = MBB->getIterator();1333++FallThrough;13341335// Make sure MBB and FallThrough belong to the same EH scope.1336bool SameEHScope = true;1337if (!EHScopeMembership.empty() && FallThrough != MF.end()) {1338auto MBBEHScope = EHScopeMembership.find(MBB);1339assert(MBBEHScope != EHScopeMembership.end());1340auto FallThroughEHScope = EHScopeMembership.find(&*FallThrough);1341assert(FallThroughEHScope != EHScopeMembership.end());1342SameEHScope = MBBEHScope->second == FallThroughEHScope->second;1343}13441345// Analyze the branch in the current block. As a side-effect, this may cause1346// the block to become empty.1347MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;1348SmallVector<MachineOperand, 4> CurCond;1349bool CurUnAnalyzable =1350TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);13511352// If this block is empty, make everyone use its fall-through, not the block1353// explicitly. Landing pads should not do this since the landing-pad table1354// points to this block. Blocks with their addresses taken shouldn't be1355// optimized away.1356if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() &&1357SameEHScope) {1358salvageDebugInfoFromEmptyBlock(TII, *MBB);1359// Dead block? Leave for cleanup later.1360if (MBB->pred_empty()) return MadeChange;13611362if (FallThrough == MF.end()) {1363// TODO: Simplify preds to not branch here if possible!1364} else if (FallThrough->isEHPad()) {1365// Don't rewrite to a landing pad fallthough. That could lead to the case1366// where a BB jumps to more than one landing pad.1367// TODO: Is it ever worth rewriting predecessors which don't already1368// jump to a landing pad, and so can safely jump to the fallthrough?1369} else if (MBB->isSuccessor(&*FallThrough)) {1370// Rewrite all predecessors of the old block to go to the fallthrough1371// instead.1372while (!MBB->pred_empty()) {1373MachineBasicBlock *Pred = *(MBB->pred_end()-1);1374Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough);1375}1376// Add rest successors of MBB to successors of FallThrough. Those1377// successors are not directly reachable via MBB, so it should be1378// landing-pad.1379for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)1380if (*SI != &*FallThrough && !FallThrough->isSuccessor(*SI)) {1381assert((*SI)->isEHPad() && "Bad CFG");1382FallThrough->copySuccessor(MBB, SI);1383}1384// If MBB was the target of a jump table, update jump tables to go to the1385// fallthrough instead.1386if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())1387MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough);1388MadeChange = true;1389}1390return MadeChange;1391}13921393// Check to see if we can simplify the terminator of the block before this1394// one.1395MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB));13961397MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;1398SmallVector<MachineOperand, 4> PriorCond;1399bool PriorUnAnalyzable =1400TII->analyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);1401if (!PriorUnAnalyzable) {1402// If the previous branch is conditional and both conditions go to the same1403// destination, remove the branch, replacing it with an unconditional one or1404// a fall-through.1405if (PriorTBB && PriorTBB == PriorFBB) {1406DebugLoc dl = getBranchDebugLoc(PrevBB);1407TII->removeBranch(PrevBB);1408PriorCond.clear();1409if (PriorTBB != MBB)1410TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);1411MadeChange = true;1412++NumBranchOpts;1413goto ReoptimizeBlock;1414}14151416// If the previous block unconditionally falls through to this block and1417// this block has no other predecessors, move the contents of this block1418// into the prior block. This doesn't usually happen when SimplifyCFG1419// has been used, but it can happen if tail merging splits a fall-through1420// predecessor of a block.1421// This has to check PrevBB->succ_size() because EH edges are ignored by1422// analyzeBranch.1423if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&1424PrevBB.succ_size() == 1 && PrevBB.isSuccessor(MBB) &&1425!MBB->hasAddressTaken() && !MBB->isEHPad()) {1426LLVM_DEBUG(dbgs() << "\nMerging into block: " << PrevBB1427<< "From MBB: " << *MBB);1428// Remove redundant DBG_VALUEs first.1429if (!PrevBB.empty()) {1430MachineBasicBlock::iterator PrevBBIter = PrevBB.end();1431--PrevBBIter;1432MachineBasicBlock::iterator MBBIter = MBB->begin();1433// Check if DBG_VALUE at the end of PrevBB is identical to the1434// DBG_VALUE at the beginning of MBB.1435while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()1436&& PrevBBIter->isDebugInstr() && MBBIter->isDebugInstr()) {1437if (!MBBIter->isIdenticalTo(*PrevBBIter))1438break;1439MachineInstr &DuplicateDbg = *MBBIter;1440++MBBIter; -- PrevBBIter;1441DuplicateDbg.eraseFromParent();1442}1443}1444PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());1445PrevBB.removeSuccessor(PrevBB.succ_begin());1446assert(PrevBB.succ_empty());1447PrevBB.transferSuccessors(MBB);1448MadeChange = true;1449return MadeChange;1450}14511452// If the previous branch *only* branches to *this* block (conditional or1453// not) remove the branch.1454if (PriorTBB == MBB && !PriorFBB) {1455TII->removeBranch(PrevBB);1456MadeChange = true;1457++NumBranchOpts;1458goto ReoptimizeBlock;1459}14601461// If the prior block branches somewhere else on the condition and here if1462// the condition is false, remove the uncond second branch.1463if (PriorFBB == MBB) {1464DebugLoc dl = getBranchDebugLoc(PrevBB);1465TII->removeBranch(PrevBB);1466TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);1467MadeChange = true;1468++NumBranchOpts;1469goto ReoptimizeBlock;1470}14711472// If the prior block branches here on true and somewhere else on false, and1473// if the branch condition is reversible, reverse the branch to create a1474// fall-through.1475if (PriorTBB == MBB) {1476SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);1477if (!TII->reverseBranchCondition(NewPriorCond)) {1478DebugLoc dl = getBranchDebugLoc(PrevBB);1479TII->removeBranch(PrevBB);1480TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);1481MadeChange = true;1482++NumBranchOpts;1483goto ReoptimizeBlock;1484}1485}14861487// If this block has no successors (e.g. it is a return block or ends with1488// a call to a no-return function like abort or __cxa_throw) and if the pred1489// falls through into this block, and if it would otherwise fall through1490// into the block after this, move this block to the end of the function.1491//1492// We consider it more likely that execution will stay in the function (e.g.1493// due to loops) than it is to exit it. This asserts in loops etc, moving1494// the assert condition out of the loop body.1495if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB &&1496MachineFunction::iterator(PriorTBB) == FallThrough &&1497!MBB->canFallThrough()) {1498bool DoTransform = true;14991500// We have to be careful that the succs of PredBB aren't both no-successor1501// blocks. If neither have successors and if PredBB is the second from1502// last block in the function, we'd just keep swapping the two blocks for1503// last. Only do the swap if one is clearly better to fall through than1504// the other.1505if (FallThrough == --MF.end() &&1506!IsBetterFallthrough(PriorTBB, MBB))1507DoTransform = false;15081509if (DoTransform) {1510// Reverse the branch so we will fall through on the previous true cond.1511SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);1512if (!TII->reverseBranchCondition(NewPriorCond)) {1513LLVM_DEBUG(dbgs() << "\nMoving MBB: " << *MBB1514<< "To make fallthrough to: " << *PriorTBB << "\n");15151516DebugLoc dl = getBranchDebugLoc(PrevBB);1517TII->removeBranch(PrevBB);1518TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);15191520// Move this block to the end of the function.1521MBB->moveAfter(&MF.back());1522MadeChange = true;1523++NumBranchOpts;1524return MadeChange;1525}1526}1527}1528}15291530if (!IsEmptyBlock(MBB)) {1531MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();1532if (TII->isUnconditionalTailCall(TailCall)) {1533SmallVector<MachineBasicBlock *> PredsChanged;1534for (auto &Pred : MBB->predecessors()) {1535MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;1536SmallVector<MachineOperand, 4> PredCond;1537bool PredAnalyzable =1538!TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);15391540// Only eliminate if MBB == TBB (Taken Basic Block)1541if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&1542PredTBB != PredFBB) {1543// The predecessor has a conditional branch to this block which1544// consists of only a tail call. Try to fold the tail call into the1545// conditional branch.1546if (TII->canMakeTailCallConditional(PredCond, TailCall)) {1547// TODO: It would be nice if analyzeBranch() could provide a pointer1548// to the branch instruction so replaceBranchWithTailCall() doesn't1549// have to search for it.1550TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);1551PredsChanged.push_back(Pred);1552}1553}1554// If the predecessor is falling through to this block, we could reverse1555// the branch condition and fold the tail call into that. However, after1556// that we might have to re-arrange the CFG to fall through to the other1557// block and there is a high risk of regressing code size rather than1558// improving it.1559}1560if (!PredsChanged.empty()) {1561NumTailCalls += PredsChanged.size();1562for (auto &Pred : PredsChanged)1563Pred->removeSuccessor(MBB);15641565return true;1566}1567}1568}15691570if (!CurUnAnalyzable) {1571// If this is a two-way branch, and the FBB branches to this block, reverse1572// the condition so the single-basic-block loop is faster. Instead of:1573// Loop: xxx; jcc Out; jmp Loop1574// we want:1575// Loop: xxx; jncc Loop; jmp Out1576if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {1577SmallVector<MachineOperand, 4> NewCond(CurCond);1578if (!TII->reverseBranchCondition(NewCond)) {1579DebugLoc dl = getBranchDebugLoc(*MBB);1580TII->removeBranch(*MBB);1581TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);1582MadeChange = true;1583++NumBranchOpts;1584goto ReoptimizeBlock;1585}1586}15871588// If this branch is the only thing in its block, see if we can forward1589// other blocks across it.1590if (CurTBB && CurCond.empty() && !CurFBB &&1591IsBranchOnlyBlock(MBB) && CurTBB != MBB &&1592!MBB->hasAddressTaken() && !MBB->isEHPad()) {1593DebugLoc dl = getBranchDebugLoc(*MBB);1594// This block may contain just an unconditional branch. Because there can1595// be 'non-branch terminators' in the block, try removing the branch and1596// then seeing if the block is empty.1597TII->removeBranch(*MBB);1598// If the only things remaining in the block are debug info, remove these1599// as well, so this will behave the same as an empty block in non-debug1600// mode.1601if (IsEmptyBlock(MBB)) {1602// Make the block empty, losing the debug info (we could probably1603// improve this in some cases.)1604MBB->erase(MBB->begin(), MBB->end());1605}1606// If this block is just an unconditional branch to CurTBB, we can1607// usually completely eliminate the block. The only case we cannot1608// completely eliminate the block is when the block before this one1609// falls through into MBB and we can't understand the prior block's branch1610// condition.1611if (MBB->empty()) {1612bool PredHasNoFallThrough = !PrevBB.canFallThrough();1613if (PredHasNoFallThrough || !PriorUnAnalyzable ||1614!PrevBB.isSuccessor(MBB)) {1615// If the prior block falls through into us, turn it into an1616// explicit branch to us to make updates simpler.1617if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&1618PriorTBB != MBB && PriorFBB != MBB) {1619if (!PriorTBB) {1620assert(PriorCond.empty() && !PriorFBB &&1621"Bad branch analysis");1622PriorTBB = MBB;1623} else {1624assert(!PriorFBB && "Machine CFG out of date!");1625PriorFBB = MBB;1626}1627DebugLoc pdl = getBranchDebugLoc(PrevBB);1628TII->removeBranch(PrevBB);1629TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);1630}16311632// Iterate through all the predecessors, revectoring each in-turn.1633size_t PI = 0;1634bool DidChange = false;1635bool HasBranchToSelf = false;1636while(PI != MBB->pred_size()) {1637MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);1638if (PMBB == MBB) {1639// If this block has an uncond branch to itself, leave it.1640++PI;1641HasBranchToSelf = true;1642} else {1643DidChange = true;1644PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);1645// Add rest successors of MBB to successors of CurTBB. Those1646// successors are not directly reachable via MBB, so it should be1647// landing-pad.1648for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE;1649++SI)1650if (*SI != CurTBB && !CurTBB->isSuccessor(*SI)) {1651assert((*SI)->isEHPad() && "Bad CFG");1652CurTBB->copySuccessor(MBB, SI);1653}1654// If this change resulted in PMBB ending in a conditional1655// branch where both conditions go to the same destination,1656// change this to an unconditional branch.1657MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr;1658SmallVector<MachineOperand, 4> NewCurCond;1659bool NewCurUnAnalyzable = TII->analyzeBranch(1660*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true);1661if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {1662DebugLoc pdl = getBranchDebugLoc(*PMBB);1663TII->removeBranch(*PMBB);1664NewCurCond.clear();1665TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);1666MadeChange = true;1667++NumBranchOpts;1668}1669}1670}16711672// Change any jumptables to go to the new MBB.1673if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())1674MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);1675if (DidChange) {1676++NumBranchOpts;1677MadeChange = true;1678if (!HasBranchToSelf) return MadeChange;1679}1680}1681}16821683// Add the branch back if the block is more than just an uncond branch.1684TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl);1685}1686}16871688// If the prior block doesn't fall through into this block, and if this1689// block doesn't fall through into some other block, see if we can find a1690// place to move this block where a fall-through will happen.1691if (!PrevBB.canFallThrough()) {1692// Now we know that there was no fall-through into this block, check to1693// see if it has a fall-through into its successor.1694bool CurFallsThru = MBB->canFallThrough();16951696if (!MBB->isEHPad()) {1697// Check all the predecessors of this block. If one of them has no fall1698// throughs, and analyzeBranch thinks it _could_ fallthrough to this1699// block, move this block right after it.1700for (MachineBasicBlock *PredBB : MBB->predecessors()) {1701// Analyze the branch at the end of the pred.1702MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;1703SmallVector<MachineOperand, 4> PredCond;1704if (PredBB != MBB && !PredBB->canFallThrough() &&1705!TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) &&1706(PredTBB == MBB || PredFBB == MBB) &&1707(!CurFallsThru || !CurTBB || !CurFBB) &&1708(!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {1709// If the current block doesn't fall through, just move it.1710// If the current block can fall through and does not end with a1711// conditional branch, we need to append an unconditional jump to1712// the (current) next block. To avoid a possible compile-time1713// infinite loop, move blocks only backward in this case.1714// Also, if there are already 2 branches here, we cannot add a third;1715// this means we have the case1716// Bcc next1717// B elsewhere1718// next:1719if (CurFallsThru) {1720MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());1721CurCond.clear();1722TII->insertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());1723}1724MBB->moveAfter(PredBB);1725MadeChange = true;1726goto ReoptimizeBlock;1727}1728}1729}17301731if (!CurFallsThru) {1732// Check analyzable branch-successors to see if we can move this block1733// before one.1734if (!CurUnAnalyzable) {1735for (MachineBasicBlock *SuccBB : {CurFBB, CurTBB}) {1736if (!SuccBB)1737continue;1738// Analyze the branch at the end of the block before the succ.1739MachineFunction::iterator SuccPrev = --SuccBB->getIterator();17401741// If this block doesn't already fall-through to that successor, and1742// if the succ doesn't already have a block that can fall through into1743// it, we can arrange for the fallthrough to happen.1744if (SuccBB != MBB && &*SuccPrev != MBB &&1745!SuccPrev->canFallThrough()) {1746MBB->moveBefore(SuccBB);1747MadeChange = true;1748goto ReoptimizeBlock;1749}1750}1751}17521753// Okay, there is no really great place to put this block. If, however,1754// the block before this one would be a fall-through if this block were1755// removed, move this block to the end of the function. There is no real1756// advantage in "falling through" to an EH block, so we don't want to1757// perform this transformation for that case.1758//1759// Also, Windows EH introduced the possibility of an arbitrary number of1760// successors to a given block. The analyzeBranch call does not consider1761// exception handling and so we can get in a state where a block1762// containing a call is followed by multiple EH blocks that would be1763// rotated infinitely at the end of the function if the transformation1764// below were performed for EH "FallThrough" blocks. Therefore, even if1765// that appears not to be happening anymore, we should assume that it is1766// possible and not remove the "!FallThrough()->isEHPad" condition below.1767MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;1768SmallVector<MachineOperand, 4> PrevCond;1769if (FallThrough != MF.end() &&1770!FallThrough->isEHPad() &&1771!TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&1772PrevBB.isSuccessor(&*FallThrough)) {1773MBB->moveAfter(&MF.back());1774MadeChange = true;1775return MadeChange;1776}1777}1778}17791780return MadeChange;1781}17821783//===----------------------------------------------------------------------===//1784// Hoist Common Code1785//===----------------------------------------------------------------------===//17861787bool BranchFolder::HoistCommonCode(MachineFunction &MF) {1788bool MadeChange = false;1789for (MachineBasicBlock &MBB : llvm::make_early_inc_range(MF))1790MadeChange |= HoistCommonCodeInSuccs(&MBB);17911792return MadeChange;1793}17941795/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given1796/// its 'true' successor.1797static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,1798MachineBasicBlock *TrueBB) {1799for (MachineBasicBlock *SuccBB : BB->successors())1800if (SuccBB != TrueBB)1801return SuccBB;1802return nullptr;1803}18041805template <class Container>1806static void addRegAndItsAliases(Register Reg, const TargetRegisterInfo *TRI,1807Container &Set) {1808if (Reg.isPhysical()) {1809for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)1810Set.insert(*AI);1811} else {1812Set.insert(Reg);1813}1814}18151816/// findHoistingInsertPosAndDeps - Find the location to move common instructions1817/// in successors to. The location is usually just before the terminator,1818/// however if the terminator is a conditional branch and its previous1819/// instruction is the flag setting instruction, the previous instruction is1820/// the preferred location. This function also gathers uses and defs of the1821/// instructions from the insertion point to the end of the block. The data is1822/// used by HoistCommonCodeInSuccs to ensure safety.1823static1824MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,1825const TargetInstrInfo *TII,1826const TargetRegisterInfo *TRI,1827SmallSet<Register, 4> &Uses,1828SmallSet<Register, 4> &Defs) {1829MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();1830if (!TII->isUnpredicatedTerminator(*Loc))1831return MBB->end();18321833for (const MachineOperand &MO : Loc->operands()) {1834if (!MO.isReg())1835continue;1836Register Reg = MO.getReg();1837if (!Reg)1838continue;1839if (MO.isUse()) {1840addRegAndItsAliases(Reg, TRI, Uses);1841} else {1842if (!MO.isDead())1843// Don't try to hoist code in the rare case the terminator defines a1844// register that is later used.1845return MBB->end();18461847// If the terminator defines a register, make sure we don't hoist1848// the instruction whose def might be clobbered by the terminator.1849addRegAndItsAliases(Reg, TRI, Defs);1850}1851}18521853if (Uses.empty())1854return Loc;1855// If the terminator is the only instruction in the block and Uses is not1856// empty (or we would have returned above), we can still safely hoist1857// instructions just before the terminator as long as the Defs/Uses are not1858// violated (which is checked in HoistCommonCodeInSuccs).1859if (Loc == MBB->begin())1860return Loc;18611862// The terminator is probably a conditional branch, try not to separate the1863// branch from condition setting instruction.1864MachineBasicBlock::iterator PI = prev_nodbg(Loc, MBB->begin());18651866bool IsDef = false;1867for (const MachineOperand &MO : PI->operands()) {1868// If PI has a regmask operand, it is probably a call. Separate away.1869if (MO.isRegMask())1870return Loc;1871if (!MO.isReg() || MO.isUse())1872continue;1873Register Reg = MO.getReg();1874if (!Reg)1875continue;1876if (Uses.count(Reg)) {1877IsDef = true;1878break;1879}1880}1881if (!IsDef)1882// The condition setting instruction is not just before the conditional1883// branch.1884return Loc;18851886// Be conservative, don't insert instruction above something that may have1887// side-effects. And since it's potentially bad to separate flag setting1888// instruction from the conditional branch, just abort the optimization1889// completely.1890// Also avoid moving code above predicated instruction since it's hard to1891// reason about register liveness with predicated instruction.1892bool DontMoveAcrossStore = true;1893if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI))1894return MBB->end();18951896// Find out what registers are live. Note this routine is ignoring other live1897// registers which are only used by instructions in successor blocks.1898for (const MachineOperand &MO : PI->operands()) {1899if (!MO.isReg())1900continue;1901Register Reg = MO.getReg();1902if (!Reg)1903continue;1904if (MO.isUse()) {1905addRegAndItsAliases(Reg, TRI, Uses);1906} else {1907if (Uses.erase(Reg)) {1908if (Reg.isPhysical()) {1909for (MCPhysReg SubReg : TRI->subregs(Reg))1910Uses.erase(SubReg); // Use sub-registers to be conservative1911}1912}1913addRegAndItsAliases(Reg, TRI, Defs);1914}1915}19161917return PI;1918}19191920bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {1921MachineBasicBlock *TBB = nullptr, *FBB = nullptr;1922SmallVector<MachineOperand, 4> Cond;1923if (TII->analyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())1924return false;19251926if (!FBB) FBB = findFalseBlock(MBB, TBB);1927if (!FBB)1928// Malformed bcc? True and false blocks are the same?1929return false;19301931// Restrict the optimization to cases where MBB is the only predecessor,1932// it is an obvious win.1933if (TBB->pred_size() > 1 || FBB->pred_size() > 1)1934return false;19351936// Find a suitable position to hoist the common instructions to. Also figure1937// out which registers are used or defined by instructions from the insertion1938// point to the end of the block.1939SmallSet<Register, 4> Uses, Defs;1940MachineBasicBlock::iterator Loc =1941findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);1942if (Loc == MBB->end())1943return false;19441945bool HasDups = false;1946SmallSet<Register, 4> ActiveDefsSet, AllDefsSet;1947MachineBasicBlock::iterator TIB = TBB->begin();1948MachineBasicBlock::iterator FIB = FBB->begin();1949MachineBasicBlock::iterator TIE = TBB->end();1950MachineBasicBlock::iterator FIE = FBB->end();1951while (TIB != TIE && FIB != FIE) {1952// Skip dbg_value instructions. These do not count.1953TIB = skipDebugInstructionsForward(TIB, TIE, false);1954FIB = skipDebugInstructionsForward(FIB, FIE, false);1955if (TIB == TIE || FIB == FIE)1956break;19571958if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead))1959break;19601961if (TII->isPredicated(*TIB))1962// Hard to reason about register liveness with predicated instruction.1963break;19641965bool IsSafe = true;1966for (MachineOperand &MO : TIB->operands()) {1967// Don't attempt to hoist instructions with register masks.1968if (MO.isRegMask()) {1969IsSafe = false;1970break;1971}1972if (!MO.isReg())1973continue;1974Register Reg = MO.getReg();1975if (!Reg)1976continue;1977if (MO.isDef()) {1978if (Uses.count(Reg)) {1979// Avoid clobbering a register that's used by the instruction at1980// the point of insertion.1981IsSafe = false;1982break;1983}19841985if (Defs.count(Reg) && !MO.isDead()) {1986// Don't hoist the instruction if the def would be clobber by the1987// instruction at the point insertion. FIXME: This is overly1988// conservative. It should be possible to hoist the instructions1989// in BB2 in the following example:1990// BB1:1991// r1, eflag = op1 r2, r31992// brcc eflag1993//1994// BB2:1995// r1 = op2, ...1996// = op3, killed r11997IsSafe = false;1998break;1999}2000} else if (!ActiveDefsSet.count(Reg)) {2001if (Defs.count(Reg)) {2002// Use is defined by the instruction at the point of insertion.2003IsSafe = false;2004break;2005}20062007if (MO.isKill() && Uses.count(Reg))2008// Kills a register that's read by the instruction at the point of2009// insertion. Remove the kill marker.2010MO.setIsKill(false);2011}2012}2013if (!IsSafe)2014break;20152016bool DontMoveAcrossStore = true;2017if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore))2018break;20192020// Remove kills from ActiveDefsSet, these registers had short live ranges.2021for (const MachineOperand &MO : TIB->all_uses()) {2022if (!MO.isKill())2023continue;2024Register Reg = MO.getReg();2025if (!Reg)2026continue;2027if (!AllDefsSet.count(Reg)) {2028continue;2029}2030if (Reg.isPhysical()) {2031for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)2032ActiveDefsSet.erase(*AI);2033} else {2034ActiveDefsSet.erase(Reg);2035}2036}20372038// Track local defs so we can update liveins.2039for (const MachineOperand &MO : TIB->all_defs()) {2040if (MO.isDead())2041continue;2042Register Reg = MO.getReg();2043if (!Reg || Reg.isVirtual())2044continue;2045addRegAndItsAliases(Reg, TRI, ActiveDefsSet);2046addRegAndItsAliases(Reg, TRI, AllDefsSet);2047}20482049HasDups = true;2050++TIB;2051++FIB;2052}20532054if (!HasDups)2055return false;20562057MBB->splice(Loc, TBB, TBB->begin(), TIB);2058FBB->erase(FBB->begin(), FIB);20592060if (UpdateLiveIns)2061fullyRecomputeLiveIns({TBB, FBB});20622063++NumHoist;2064return true;2065}206620672068