Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
35233 views
//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// Early if-conversion is for out-of-order CPUs that don't have a lot of9// predicable instructions. The goal is to eliminate conditional branches that10// may mispredict.11//12// Instructions from both sides of the branch are executed specutatively, and a13// cmov instruction selects the result.14//15//===----------------------------------------------------------------------===//1617#include "llvm/ADT/BitVector.h"18#include "llvm/ADT/PostOrderIterator.h"19#include "llvm/ADT/SmallPtrSet.h"20#include "llvm/ADT/SparseSet.h"21#include "llvm/ADT/Statistic.h"22#include "llvm/Analysis/OptimizationRemarkEmitter.h"23#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"24#include "llvm/CodeGen/MachineDominators.h"25#include "llvm/CodeGen/MachineFunction.h"26#include "llvm/CodeGen/MachineFunctionPass.h"27#include "llvm/CodeGen/MachineInstr.h"28#include "llvm/CodeGen/MachineLoopInfo.h"29#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"30#include "llvm/CodeGen/MachineRegisterInfo.h"31#include "llvm/CodeGen/MachineTraceMetrics.h"32#include "llvm/CodeGen/TargetInstrInfo.h"33#include "llvm/CodeGen/TargetRegisterInfo.h"34#include "llvm/CodeGen/TargetSubtargetInfo.h"35#include "llvm/InitializePasses.h"36#include "llvm/Support/CommandLine.h"37#include "llvm/Support/Debug.h"38#include "llvm/Support/raw_ostream.h"3940using namespace llvm;4142#define DEBUG_TYPE "early-ifcvt"4344// Absolute maximum number of instructions allowed per speculated block.45// This bypasses all other heuristics, so it should be set fairly high.46static cl::opt<unsigned>47BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,48cl::desc("Maximum number of instructions per speculated block."));4950// Stress testing mode - disable heuristics.51static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,52cl::desc("Turn all knobs to 11"));5354STATISTIC(NumDiamondsSeen, "Number of diamonds");55STATISTIC(NumDiamondsConv, "Number of diamonds converted");56STATISTIC(NumTrianglesSeen, "Number of triangles");57STATISTIC(NumTrianglesConv, "Number of triangles converted");5859//===----------------------------------------------------------------------===//60// SSAIfConv61//===----------------------------------------------------------------------===//62//63// The SSAIfConv class performs if-conversion on SSA form machine code after64// determining if it is possible. The class contains no heuristics; external65// code should be used to determine when if-conversion is a good idea.66//67// SSAIfConv can convert both triangles and diamonds:68//69// Triangle: Head Diamond: Head70// | \ / \_71// | \ / |72// | [TF]BB FBB TBB73// | / \ /74// | / \ /75// Tail Tail76//77// Instructions in the conditional blocks TBB and/or FBB are spliced into the78// Head block, and phis in the Tail block are converted to select instructions.79//80namespace {81class SSAIfConv {82const TargetInstrInfo *TII;83const TargetRegisterInfo *TRI;84MachineRegisterInfo *MRI;8586public:87/// The block containing the conditional branch.88MachineBasicBlock *Head;8990/// The block containing phis after the if-then-else.91MachineBasicBlock *Tail;9293/// The 'true' conditional block as determined by analyzeBranch.94MachineBasicBlock *TBB;9596/// The 'false' conditional block as determined by analyzeBranch.97MachineBasicBlock *FBB;9899/// isTriangle - When there is no 'else' block, either TBB or FBB will be100/// equal to Tail.101bool isTriangle() const { return TBB == Tail || FBB == Tail; }102103/// Returns the Tail predecessor for the True side.104MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }105106/// Returns the Tail predecessor for the False side.107MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }108109/// Information about each phi in the Tail block.110struct PHIInfo {111MachineInstr *PHI;112unsigned TReg = 0, FReg = 0;113// Latencies from Cond+Branch, TReg, and FReg to DstReg.114int CondCycles = 0, TCycles = 0, FCycles = 0;115116PHIInfo(MachineInstr *phi) : PHI(phi) {}117};118119SmallVector<PHIInfo, 8> PHIs;120121/// The branch condition determined by analyzeBranch.122SmallVector<MachineOperand, 4> Cond;123124private:125/// Instructions in Head that define values used by the conditional blocks.126/// The hoisted instructions must be inserted after these instructions.127SmallPtrSet<MachineInstr*, 8> InsertAfter;128129/// Register units clobbered by the conditional blocks.130BitVector ClobberedRegUnits;131132// Scratch pad for findInsertionPoint.133SparseSet<unsigned> LiveRegUnits;134135/// Insertion point in Head for speculatively executed instructions form TBB136/// and FBB.137MachineBasicBlock::iterator InsertionPoint;138139/// Return true if all non-terminator instructions in MBB can be safely140/// speculated.141bool canSpeculateInstrs(MachineBasicBlock *MBB);142143/// Return true if all non-terminator instructions in MBB can be safely144/// predicated.145bool canPredicateInstrs(MachineBasicBlock *MBB);146147/// Scan through instruction dependencies and update InsertAfter array.148/// Return false if any dependency is incompatible with if conversion.149bool InstrDependenciesAllowIfConv(MachineInstr *I);150151/// Predicate all instructions of the basic block with current condition152/// except for terminators. Reverse the condition if ReversePredicate is set.153void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate);154155/// Find a valid insertion point in Head.156bool findInsertionPoint();157158/// Replace PHI instructions in Tail with selects.159void replacePHIInstrs();160161/// Insert selects and rewrite PHI operands to use them.162void rewritePHIOperands();163164public:165/// runOnMachineFunction - Initialize per-function data structures.166void runOnMachineFunction(MachineFunction &MF) {167TII = MF.getSubtarget().getInstrInfo();168TRI = MF.getSubtarget().getRegisterInfo();169MRI = &MF.getRegInfo();170LiveRegUnits.clear();171LiveRegUnits.setUniverse(TRI->getNumRegUnits());172ClobberedRegUnits.clear();173ClobberedRegUnits.resize(TRI->getNumRegUnits());174}175176/// canConvertIf - If the sub-CFG headed by MBB can be if-converted,177/// initialize the internal state, and return true.178/// If predicate is set try to predicate the block otherwise try to179/// speculatively execute it.180bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false);181182/// convertIf - If-convert the last block passed to canConvertIf(), assuming183/// it is possible. Add any erased blocks to RemovedBlocks.184void convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,185bool Predicate = false);186};187} // end anonymous namespace188189190/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely191/// be speculated. The terminators are not considered.192///193/// If instructions use any values that are defined in the head basic block,194/// the defining instructions are added to InsertAfter.195///196/// Any clobbered regunits are added to ClobberedRegUnits.197///198bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {199// Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to200// get right.201if (!MBB->livein_empty()) {202LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");203return false;204}205206unsigned InstrCount = 0;207208// Check all instructions, except the terminators. It is assumed that209// terminators never have side effects or define any used register values.210for (MachineInstr &MI :211llvm::make_range(MBB->begin(), MBB->getFirstTerminator())) {212if (MI.isDebugInstr())213continue;214215if (++InstrCount > BlockInstrLimit && !Stress) {216LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "217<< BlockInstrLimit << " instructions.\n");218return false;219}220221// There shouldn't normally be any phis in a single-predecessor block.222if (MI.isPHI()) {223LLVM_DEBUG(dbgs() << "Can't hoist: " << MI);224return false;225}226227// Don't speculate loads. Note that it may be possible and desirable to228// speculate GOT or constant pool loads that are guaranteed not to trap,229// but we don't support that for now.230if (MI.mayLoad()) {231LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI);232return false;233}234235// We never speculate stores, so an AA pointer isn't necessary.236bool DontMoveAcrossStore = true;237if (!MI.isSafeToMove(nullptr, DontMoveAcrossStore)) {238LLVM_DEBUG(dbgs() << "Can't speculate: " << MI);239return false;240}241242// Check for any dependencies on Head instructions.243if (!InstrDependenciesAllowIfConv(&MI))244return false;245}246return true;247}248249/// Check that there is no dependencies preventing if conversion.250///251/// If instruction uses any values that are defined in the head basic block,252/// the defining instructions are added to InsertAfter.253bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {254for (const MachineOperand &MO : I->operands()) {255if (MO.isRegMask()) {256LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);257return false;258}259if (!MO.isReg())260continue;261Register Reg = MO.getReg();262263// Remember clobbered regunits.264if (MO.isDef() && Reg.isPhysical())265for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))266ClobberedRegUnits.set(Unit);267268if (!MO.readsReg() || !Reg.isVirtual())269continue;270MachineInstr *DefMI = MRI->getVRegDef(Reg);271if (!DefMI || DefMI->getParent() != Head)272continue;273if (InsertAfter.insert(DefMI).second)274LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on "275<< *DefMI);276if (DefMI->isTerminator()) {277LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");278return false;279}280}281return true;282}283284/// canPredicateInstrs - Returns true if all the instructions in MBB can safely285/// be predicates. The terminators are not considered.286///287/// If instructions use any values that are defined in the head basic block,288/// the defining instructions are added to InsertAfter.289///290/// Any clobbered regunits are added to ClobberedRegUnits.291///292bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) {293// Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to294// get right.295if (!MBB->livein_empty()) {296LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");297return false;298}299300unsigned InstrCount = 0;301302// Check all instructions, except the terminators. It is assumed that303// terminators never have side effects or define any used register values.304for (MachineBasicBlock::iterator I = MBB->begin(),305E = MBB->getFirstTerminator();306I != E; ++I) {307if (I->isDebugInstr())308continue;309310if (++InstrCount > BlockInstrLimit && !Stress) {311LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "312<< BlockInstrLimit << " instructions.\n");313return false;314}315316// There shouldn't normally be any phis in a single-predecessor block.317if (I->isPHI()) {318LLVM_DEBUG(dbgs() << "Can't predicate: " << *I);319return false;320}321322// Check that instruction is predicable323if (!TII->isPredicable(*I)) {324LLVM_DEBUG(dbgs() << "Isn't predicable: " << *I);325return false;326}327328// Check that instruction is not already predicated.329if (TII->isPredicated(*I) && !TII->canPredicatePredicatedInstr(*I)) {330LLVM_DEBUG(dbgs() << "Is already predicated: " << *I);331return false;332}333334// Check for any dependencies on Head instructions.335if (!InstrDependenciesAllowIfConv(&(*I)))336return false;337}338return true;339}340341// Apply predicate to all instructions in the machine block.342void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) {343auto Condition = Cond;344if (ReversePredicate) {345bool CanRevCond = !TII->reverseBranchCondition(Condition);346assert(CanRevCond && "Reversed predicate is not supported");347(void)CanRevCond;348}349// Terminators don't need to be predicated as they will be removed.350for (MachineBasicBlock::iterator I = MBB->begin(),351E = MBB->getFirstTerminator();352I != E; ++I) {353if (I->isDebugInstr())354continue;355TII->PredicateInstruction(*I, Condition);356}357}358359/// Find an insertion point in Head for the speculated instructions. The360/// insertion point must be:361///362/// 1. Before any terminators.363/// 2. After any instructions in InsertAfter.364/// 3. Not have any clobbered regunits live.365///366/// This function sets InsertionPoint and returns true when successful, it367/// returns false if no valid insertion point could be found.368///369bool SSAIfConv::findInsertionPoint() {370// Keep track of live regunits before the current position.371// Only track RegUnits that are also in ClobberedRegUnits.372LiveRegUnits.clear();373SmallVector<MCRegister, 8> Reads;374MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();375MachineBasicBlock::iterator I = Head->end();376MachineBasicBlock::iterator B = Head->begin();377while (I != B) {378--I;379// Some of the conditional code depends in I.380if (InsertAfter.count(&*I)) {381LLVM_DEBUG(dbgs() << "Can't insert code after " << *I);382return false;383}384385// Update live regunits.386for (const MachineOperand &MO : I->operands()) {387// We're ignoring regmask operands. That is conservatively correct.388if (!MO.isReg())389continue;390Register Reg = MO.getReg();391if (!Reg.isPhysical())392continue;393// I clobbers Reg, so it isn't live before I.394if (MO.isDef())395for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))396LiveRegUnits.erase(Unit);397// Unless I reads Reg.398if (MO.readsReg())399Reads.push_back(Reg.asMCReg());400}401// Anything read by I is live before I.402while (!Reads.empty())403for (MCRegUnit Unit : TRI->regunits(Reads.pop_back_val()))404if (ClobberedRegUnits.test(Unit))405LiveRegUnits.insert(Unit);406407// We can't insert before a terminator.408if (I != FirstTerm && I->isTerminator())409continue;410411// Some of the clobbered registers are live before I, not a valid insertion412// point.413if (!LiveRegUnits.empty()) {414LLVM_DEBUG({415dbgs() << "Would clobber";416for (unsigned LRU : LiveRegUnits)417dbgs() << ' ' << printRegUnit(LRU, TRI);418dbgs() << " live before " << *I;419});420continue;421}422423// This is a valid insertion point.424InsertionPoint = I;425LLVM_DEBUG(dbgs() << "Can insert before " << *I);426return true;427}428LLVM_DEBUG(dbgs() << "No legal insertion point found.\n");429return false;430}431432433434/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is435/// a potential candidate for if-conversion. Fill out the internal state.436///437bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {438Head = MBB;439TBB = FBB = Tail = nullptr;440441if (Head->succ_size() != 2)442return false;443MachineBasicBlock *Succ0 = Head->succ_begin()[0];444MachineBasicBlock *Succ1 = Head->succ_begin()[1];445446// Canonicalize so Succ0 has MBB as its single predecessor.447if (Succ0->pred_size() != 1)448std::swap(Succ0, Succ1);449450if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)451return false;452453Tail = Succ0->succ_begin()[0];454455// This is not a triangle.456if (Tail != Succ1) {457// Check for a diamond. We won't deal with any critical edges.458if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||459Succ1->succ_begin()[0] != Tail)460return false;461LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> "462<< printMBBReference(*Succ0) << "/"463<< printMBBReference(*Succ1) << " -> "464<< printMBBReference(*Tail) << '\n');465466// Live-in physregs are tricky to get right when speculating code.467if (!Tail->livein_empty()) {468LLVM_DEBUG(dbgs() << "Tail has live-ins.\n");469return false;470}471} else {472LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "473<< printMBBReference(*Succ0) << " -> "474<< printMBBReference(*Tail) << '\n');475}476477// This is a triangle or a diamond.478// Skip if we cannot predicate and there are no phis skip as there must be479// side effects that can only be handled with predication.480if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) {481LLVM_DEBUG(dbgs() << "No phis in tail.\n");482return false;483}484485// The branch we're looking to eliminate must be analyzable.486Cond.clear();487if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) {488LLVM_DEBUG(dbgs() << "Branch not analyzable.\n");489return false;490}491492// This is weird, probably some sort of degenerate CFG.493if (!TBB) {494LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n");495return false;496}497498// Make sure the analyzed branch is conditional; one of the successors499// could be a landing pad. (Empty landing pads can be generated on Windows.)500if (Cond.empty()) {501LLVM_DEBUG(dbgs() << "analyzeBranch found an unconditional branch.\n");502return false;503}504505// analyzeBranch doesn't set FBB on a fall-through branch.506// Make sure it is always set.507FBB = TBB == Succ0 ? Succ1 : Succ0;508509// Any phis in the tail block must be convertible to selects.510PHIs.clear();511MachineBasicBlock *TPred = getTPred();512MachineBasicBlock *FPred = getFPred();513for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();514I != E && I->isPHI(); ++I) {515PHIs.push_back(&*I);516PHIInfo &PI = PHIs.back();517// Find PHI operands corresponding to TPred and FPred.518for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {519if (PI.PHI->getOperand(i+1).getMBB() == TPred)520PI.TReg = PI.PHI->getOperand(i).getReg();521if (PI.PHI->getOperand(i+1).getMBB() == FPred)522PI.FReg = PI.PHI->getOperand(i).getReg();523}524assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI");525assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");526527// Get target information.528if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(),529PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles,530PI.FCycles)) {531LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);532return false;533}534}535536// Check that the conditional instructions can be speculated.537InsertAfter.clear();538ClobberedRegUnits.reset();539if (Predicate) {540if (TBB != Tail && !canPredicateInstrs(TBB))541return false;542if (FBB != Tail && !canPredicateInstrs(FBB))543return false;544} else {545if (TBB != Tail && !canSpeculateInstrs(TBB))546return false;547if (FBB != Tail && !canSpeculateInstrs(FBB))548return false;549}550551// Try to find a valid insertion point for the speculated instructions in the552// head basic block.553if (!findInsertionPoint())554return false;555556if (isTriangle())557++NumTrianglesSeen;558else559++NumDiamondsSeen;560return true;561}562563/// \return true iff the two registers are known to have the same value.564static bool hasSameValue(const MachineRegisterInfo &MRI,565const TargetInstrInfo *TII, Register TReg,566Register FReg) {567if (TReg == FReg)568return true;569570if (!TReg.isVirtual() || !FReg.isVirtual())571return false;572573const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg);574const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg);575if (!TDef || !FDef)576return false;577578// If there are side-effects, all bets are off.579if (TDef->hasUnmodeledSideEffects())580return false;581582// If the instruction could modify memory, or there may be some intervening583// store between the two, we can't consider them to be equal.584if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad())585return false;586587// We also can't guarantee that they are the same if, for example, the588// instructions are both a copy from a physical reg, because some other589// instruction may have modified the value in that reg between the two590// defining insts.591if (any_of(TDef->uses(), [](const MachineOperand &MO) {592return MO.isReg() && MO.getReg().isPhysical();593}))594return false;595596// Check whether the two defining instructions produce the same value(s).597if (!TII->produceSameValue(*TDef, *FDef, &MRI))598return false;599600// Further, check that the two defs come from corresponding operands.601int TIdx = TDef->findRegisterDefOperandIdx(TReg, /*TRI=*/nullptr);602int FIdx = FDef->findRegisterDefOperandIdx(FReg, /*TRI=*/nullptr);603if (TIdx == -1 || FIdx == -1)604return false;605606return TIdx == FIdx;607}608609/// replacePHIInstrs - Completely replace PHI instructions with selects.610/// This is possible when the only Tail predecessors are the if-converted611/// blocks.612void SSAIfConv::replacePHIInstrs() {613assert(Tail->pred_size() == 2 && "Cannot replace PHIs");614MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();615assert(FirstTerm != Head->end() && "No terminators");616DebugLoc HeadDL = FirstTerm->getDebugLoc();617618// Convert all PHIs to select instructions inserted before FirstTerm.619for (PHIInfo &PI : PHIs) {620LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);621Register DstReg = PI.PHI->getOperand(0).getReg();622if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {623// We do not need the select instruction if both incoming values are624// equal, but we do need a COPY.625BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg)626.addReg(PI.TReg);627} else {628TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg,629PI.FReg);630}631LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));632PI.PHI->eraseFromParent();633PI.PHI = nullptr;634}635}636637/// rewritePHIOperands - When there are additional Tail predecessors, insert638/// select instructions in Head and rewrite PHI operands to use the selects.639/// Keep the PHI instructions in Tail to handle the other predecessors.640void SSAIfConv::rewritePHIOperands() {641MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();642assert(FirstTerm != Head->end() && "No terminators");643DebugLoc HeadDL = FirstTerm->getDebugLoc();644645// Convert all PHIs to select instructions inserted before FirstTerm.646for (PHIInfo &PI : PHIs) {647unsigned DstReg = 0;648649LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);650if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {651// We do not need the select instruction if both incoming values are652// equal.653DstReg = PI.TReg;654} else {655Register PHIDst = PI.PHI->getOperand(0).getReg();656DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));657TII->insertSelect(*Head, FirstTerm, HeadDL,658DstReg, Cond, PI.TReg, PI.FReg);659LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));660}661662// Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.663for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {664MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();665if (MBB == getTPred()) {666PI.PHI->getOperand(i-1).setMBB(Head);667PI.PHI->getOperand(i-2).setReg(DstReg);668} else if (MBB == getFPred()) {669PI.PHI->removeOperand(i-1);670PI.PHI->removeOperand(i-2);671}672}673LLVM_DEBUG(dbgs() << " --> " << *PI.PHI);674}675}676677/// convertIf - Execute the if conversion after canConvertIf has determined the678/// feasibility.679///680/// Any basic blocks erased will be added to RemovedBlocks.681///682void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,683bool Predicate) {684assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");685686// Update statistics.687if (isTriangle())688++NumTrianglesConv;689else690++NumDiamondsConv;691692// Move all instructions into Head, except for the terminators.693if (TBB != Tail) {694if (Predicate)695PredicateBlock(TBB, /*ReversePredicate=*/false);696Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());697}698if (FBB != Tail) {699if (Predicate)700PredicateBlock(FBB, /*ReversePredicate=*/true);701Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());702}703// Are there extra Tail predecessors?704bool ExtraPreds = Tail->pred_size() != 2;705if (ExtraPreds)706rewritePHIOperands();707else708replacePHIInstrs();709710// Fix up the CFG, temporarily leave Head without any successors.711Head->removeSuccessor(TBB);712Head->removeSuccessor(FBB, true);713if (TBB != Tail)714TBB->removeSuccessor(Tail, true);715if (FBB != Tail)716FBB->removeSuccessor(Tail, true);717718// Fix up Head's terminators.719// It should become a single branch or a fallthrough.720DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();721TII->removeBranch(*Head);722723// Erase the now empty conditional blocks. It is likely that Head can fall724// through to Tail, and we can join the two blocks.725if (TBB != Tail) {726RemovedBlocks.push_back(TBB);727TBB->eraseFromParent();728}729if (FBB != Tail) {730RemovedBlocks.push_back(FBB);731FBB->eraseFromParent();732}733734assert(Head->succ_empty() && "Additional head successors?");735if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {736// Splice Tail onto the end of Head.737LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail)738<< " into head " << printMBBReference(*Head) << '\n');739Head->splice(Head->end(), Tail,740Tail->begin(), Tail->end());741Head->transferSuccessorsAndUpdatePHIs(Tail);742RemovedBlocks.push_back(Tail);743Tail->eraseFromParent();744} else {745// We need a branch to Tail, let code placement work it out later.746LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n");747SmallVector<MachineOperand, 0> EmptyCond;748TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);749Head->addSuccessor(Tail);750}751LLVM_DEBUG(dbgs() << *Head);752}753754//===----------------------------------------------------------------------===//755// EarlyIfConverter Pass756//===----------------------------------------------------------------------===//757758namespace {759class EarlyIfConverter : public MachineFunctionPass {760const TargetInstrInfo *TII = nullptr;761const TargetRegisterInfo *TRI = nullptr;762MCSchedModel SchedModel;763MachineRegisterInfo *MRI = nullptr;764MachineDominatorTree *DomTree = nullptr;765MachineLoopInfo *Loops = nullptr;766MachineTraceMetrics *Traces = nullptr;767MachineTraceMetrics::Ensemble *MinInstr = nullptr;768SSAIfConv IfConv;769770public:771static char ID;772EarlyIfConverter() : MachineFunctionPass(ID) {}773void getAnalysisUsage(AnalysisUsage &AU) const override;774bool runOnMachineFunction(MachineFunction &MF) override;775StringRef getPassName() const override { return "Early If-Conversion"; }776777private:778bool tryConvertIf(MachineBasicBlock*);779void invalidateTraces();780bool shouldConvertIf();781};782} // end anonymous namespace783784char EarlyIfConverter::ID = 0;785char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;786787INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE,788"Early If Converter", false, false)789INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)790INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)791INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)792INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE,793"Early If Converter", false, false)794795void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {796AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();797AU.addRequired<MachineDominatorTreeWrapperPass>();798AU.addPreserved<MachineDominatorTreeWrapperPass>();799AU.addRequired<MachineLoopInfoWrapperPass>();800AU.addPreserved<MachineLoopInfoWrapperPass>();801AU.addRequired<MachineTraceMetrics>();802AU.addPreserved<MachineTraceMetrics>();803MachineFunctionPass::getAnalysisUsage(AU);804}805806namespace {807/// Update the dominator tree after if-conversion erased some blocks.808void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv,809ArrayRef<MachineBasicBlock *> Removed) {810// convertIf can remove TBB, FBB, and Tail can be merged into Head.811// TBB and FBB should not dominate any blocks.812// Tail children should be transferred to Head.813MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);814for (auto *B : Removed) {815MachineDomTreeNode *Node = DomTree->getNode(B);816assert(Node != HeadNode && "Cannot erase the head node");817while (Node->getNumChildren()) {818assert(Node->getBlock() == IfConv.Tail && "Unexpected children");819DomTree->changeImmediateDominator(Node->back(), HeadNode);820}821DomTree->eraseNode(B);822}823}824825/// Update LoopInfo after if-conversion.826void updateLoops(MachineLoopInfo *Loops,827ArrayRef<MachineBasicBlock *> Removed) {828// If-conversion doesn't change loop structure, and it doesn't mess with back829// edges, so updating LoopInfo is simply removing the dead blocks.830for (auto *B : Removed)831Loops->removeBlock(B);832}833} // namespace834835/// Invalidate MachineTraceMetrics before if-conversion.836void EarlyIfConverter::invalidateTraces() {837Traces->verifyAnalysis();838Traces->invalidate(IfConv.Head);839Traces->invalidate(IfConv.Tail);840Traces->invalidate(IfConv.TBB);841Traces->invalidate(IfConv.FBB);842Traces->verifyAnalysis();843}844845// Adjust cycles with downward saturation.846static unsigned adjCycles(unsigned Cyc, int Delta) {847if (Delta < 0 && Cyc + Delta > Cyc)848return 0;849return Cyc + Delta;850}851852namespace {853/// Helper class to simplify emission of cycle counts into optimization remarks.854struct Cycles {855const char *Key;856unsigned Value;857};858template <typename Remark> Remark &operator<<(Remark &R, Cycles C) {859return R << ore::NV(C.Key, C.Value) << (C.Value == 1 ? " cycle" : " cycles");860}861} // anonymous namespace862863/// Apply cost model and heuristics to the if-conversion in IfConv.864/// Return true if the conversion is a good idea.865///866bool EarlyIfConverter::shouldConvertIf() {867// Stress testing mode disables all cost considerations.868if (Stress)869return true;870871// Do not try to if-convert if the condition has a high chance of being872// predictable.873MachineLoop *CurrentLoop = Loops->getLoopFor(IfConv.Head);874// If the condition is in a loop, consider it predictable if the condition875// itself or all its operands are loop-invariant. E.g. this considers a load876// from a loop-invariant address predictable; we were unable to prove that it877// doesn't alias any of the memory-writes in the loop, but it is likely to878// read to same value multiple times.879if (CurrentLoop && any_of(IfConv.Cond, [&](MachineOperand &MO) {880if (!MO.isReg() || !MO.isUse())881return false;882Register Reg = MO.getReg();883if (Register::isPhysicalRegister(Reg))884return false;885886MachineInstr *Def = MRI->getVRegDef(Reg);887return CurrentLoop->isLoopInvariant(*Def) ||888all_of(Def->operands(), [&](MachineOperand &Op) {889if (Op.isImm())890return true;891if (!MO.isReg() || !MO.isUse())892return false;893Register Reg = MO.getReg();894if (Register::isPhysicalRegister(Reg))895return false;896897MachineInstr *Def = MRI->getVRegDef(Reg);898return CurrentLoop->isLoopInvariant(*Def);899});900}))901return false;902903if (!MinInstr)904MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);905906MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());907MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());908LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);909unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),910FBBTrace.getCriticalPath());911912// Set a somewhat arbitrary limit on the critical path extension we accept.913unsigned CritLimit = SchedModel.MispredictPenalty/2;914915MachineBasicBlock &MBB = *IfConv.Head;916MachineOptimizationRemarkEmitter MORE(*MBB.getParent(), nullptr);917918// If-conversion only makes sense when there is unexploited ILP. Compute the919// maximum-ILP resource length of the trace after if-conversion. Compare it920// to the shortest critical path.921SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;922if (IfConv.TBB != IfConv.Tail)923ExtraBlocks.push_back(IfConv.TBB);924unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);925LLVM_DEBUG(dbgs() << "Resource length " << ResLength926<< ", minimal critical path " << MinCrit << '\n');927if (ResLength > MinCrit + CritLimit) {928LLVM_DEBUG(dbgs() << "Not enough available ILP.\n");929MORE.emit([&]() {930MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",931MBB.findDebugLoc(MBB.back()), &MBB);932R << "did not if-convert branch: the resulting critical path ("933<< Cycles{"ResLength", ResLength}934<< ") would extend the shorter leg's critical path ("935<< Cycles{"MinCrit", MinCrit} << ") by more than the threshold of "936<< Cycles{"CritLimit", CritLimit}937<< ", which cannot be hidden by available ILP.";938return R;939});940return false;941}942943// Assume that the depth of the first head terminator will also be the depth944// of the select instruction inserted, as determined by the flag dependency.945// TBB / FBB data dependencies may delay the select even more.946MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);947unsigned BranchDepth =948HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth;949LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');950951// Look at all the tail phis, and compute the critical path extension caused952// by inserting select instructions.953MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);954struct CriticalPathInfo {955unsigned Extra; // Count of extra cycles that the component adds.956unsigned Depth; // Absolute depth of the component in cycles.957};958CriticalPathInfo Cond{};959CriticalPathInfo TBlock{};960CriticalPathInfo FBlock{};961bool ShouldConvert = true;962for (SSAIfConv::PHIInfo &PI : IfConv.PHIs) {963unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);964unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth;965LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);966967// The condition is pulled into the critical path.968unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);969if (CondDepth > MaxDepth) {970unsigned Extra = CondDepth - MaxDepth;971LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");972if (Extra > Cond.Extra)973Cond = {Extra, CondDepth};974if (Extra > CritLimit) {975LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');976ShouldConvert = false;977}978}979980// The TBB value is pulled into the critical path.981unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles);982if (TDepth > MaxDepth) {983unsigned Extra = TDepth - MaxDepth;984LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");985if (Extra > TBlock.Extra)986TBlock = {Extra, TDepth};987if (Extra > CritLimit) {988LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');989ShouldConvert = false;990}991}992993// The FBB value is pulled into the critical path.994unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles);995if (FDepth > MaxDepth) {996unsigned Extra = FDepth - MaxDepth;997LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");998if (Extra > FBlock.Extra)999FBlock = {Extra, FDepth};1000if (Extra > CritLimit) {1001LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');1002ShouldConvert = false;1003}1004}1005}10061007// Organize by "short" and "long" legs, since the diagnostics get confusing1008// when referring to the "true" and "false" sides of the branch, given that1009// those don't always correlate with what the user wrote in source-terms.1010const CriticalPathInfo Short = TBlock.Extra > FBlock.Extra ? FBlock : TBlock;1011const CriticalPathInfo Long = TBlock.Extra > FBlock.Extra ? TBlock : FBlock;10121013if (ShouldConvert) {1014MORE.emit([&]() {1015MachineOptimizationRemark R(DEBUG_TYPE, "IfConversion",1016MBB.back().getDebugLoc(), &MBB);1017R << "performing if-conversion on branch: the condition adds "1018<< Cycles{"CondCycles", Cond.Extra} << " to the critical path";1019if (Short.Extra > 0)1020R << ", and the short leg adds another "1021<< Cycles{"ShortCycles", Short.Extra};1022if (Long.Extra > 0)1023R << ", and the long leg adds another "1024<< Cycles{"LongCycles", Long.Extra};1025R << ", each staying under the threshold of "1026<< Cycles{"CritLimit", CritLimit} << ".";1027return R;1028});1029} else {1030MORE.emit([&]() {1031MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",1032MBB.back().getDebugLoc(), &MBB);1033R << "did not if-convert branch: the condition would add "1034<< Cycles{"CondCycles", Cond.Extra} << " to the critical path";1035if (Cond.Extra > CritLimit)1036R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};1037if (Short.Extra > 0) {1038R << ", and the short leg would add another "1039<< Cycles{"ShortCycles", Short.Extra};1040if (Short.Extra > CritLimit)1041R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};1042}1043if (Long.Extra > 0) {1044R << ", and the long leg would add another "1045<< Cycles{"LongCycles", Long.Extra};1046if (Long.Extra > CritLimit)1047R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};1048}1049R << ".";1050return R;1051});1052}10531054return ShouldConvert;1055}10561057/// Attempt repeated if-conversion on MBB, return true if successful.1058///1059bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {1060bool Changed = false;1061while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {1062// If-convert MBB and update analyses.1063invalidateTraces();1064SmallVector<MachineBasicBlock*, 4> RemovedBlocks;1065IfConv.convertIf(RemovedBlocks);1066Changed = true;1067updateDomTree(DomTree, IfConv, RemovedBlocks);1068updateLoops(Loops, RemovedBlocks);1069}1070return Changed;1071}10721073bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {1074LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"1075<< "********** Function: " << MF.getName() << '\n');1076if (skipFunction(MF.getFunction()))1077return false;10781079// Only run if conversion if the target wants it.1080const TargetSubtargetInfo &STI = MF.getSubtarget();1081if (!STI.enableEarlyIfConversion())1082return false;10831084TII = STI.getInstrInfo();1085TRI = STI.getRegisterInfo();1086SchedModel = STI.getSchedModel();1087MRI = &MF.getRegInfo();1088DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();1089Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();1090Traces = &getAnalysis<MachineTraceMetrics>();1091MinInstr = nullptr;10921093bool Changed = false;1094IfConv.runOnMachineFunction(MF);10951096// Visit blocks in dominator tree post-order. The post-order enables nested1097// if-conversion in a single pass. The tryConvertIf() function may erase1098// blocks, but only blocks dominated by the head block. This makes it safe to1099// update the dominator tree while the post-order iterator is still active.1100for (auto *DomNode : post_order(DomTree))1101if (tryConvertIf(DomNode->getBlock()))1102Changed = true;11031104return Changed;1105}11061107//===----------------------------------------------------------------------===//1108// EarlyIfPredicator Pass1109//===----------------------------------------------------------------------===//11101111namespace {1112class EarlyIfPredicator : public MachineFunctionPass {1113const TargetInstrInfo *TII = nullptr;1114const TargetRegisterInfo *TRI = nullptr;1115TargetSchedModel SchedModel;1116MachineRegisterInfo *MRI = nullptr;1117MachineDominatorTree *DomTree = nullptr;1118MachineBranchProbabilityInfo *MBPI = nullptr;1119MachineLoopInfo *Loops = nullptr;1120SSAIfConv IfConv;11211122public:1123static char ID;1124EarlyIfPredicator() : MachineFunctionPass(ID) {}1125void getAnalysisUsage(AnalysisUsage &AU) const override;1126bool runOnMachineFunction(MachineFunction &MF) override;1127StringRef getPassName() const override { return "Early If-predicator"; }11281129protected:1130bool tryConvertIf(MachineBasicBlock *);1131bool shouldConvertIf();1132};1133} // end anonymous namespace11341135#undef DEBUG_TYPE1136#define DEBUG_TYPE "early-if-predicator"11371138char EarlyIfPredicator::ID = 0;1139char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;11401141INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",1142false, false)1143INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)1144INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)1145INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,1146false)11471148void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {1149AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();1150AU.addRequired<MachineDominatorTreeWrapperPass>();1151AU.addPreserved<MachineDominatorTreeWrapperPass>();1152AU.addRequired<MachineLoopInfoWrapperPass>();1153AU.addPreserved<MachineLoopInfoWrapperPass>();1154MachineFunctionPass::getAnalysisUsage(AU);1155}11561157/// Apply the target heuristic to decide if the transformation is profitable.1158bool EarlyIfPredicator::shouldConvertIf() {1159auto TrueProbability = MBPI->getEdgeProbability(IfConv.Head, IfConv.TBB);1160if (IfConv.isTriangle()) {1161MachineBasicBlock &IfBlock =1162(IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB;11631164unsigned ExtraPredCost = 0;1165unsigned Cycles = 0;1166for (MachineInstr &I : IfBlock) {1167unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);1168if (NumCycles > 1)1169Cycles += NumCycles - 1;1170ExtraPredCost += TII->getPredicationCost(I);1171}11721173return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost,1174TrueProbability);1175}1176unsigned TExtra = 0;1177unsigned FExtra = 0;1178unsigned TCycle = 0;1179unsigned FCycle = 0;1180for (MachineInstr &I : *IfConv.TBB) {1181unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);1182if (NumCycles > 1)1183TCycle += NumCycles - 1;1184TExtra += TII->getPredicationCost(I);1185}1186for (MachineInstr &I : *IfConv.FBB) {1187unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);1188if (NumCycles > 1)1189FCycle += NumCycles - 1;1190FExtra += TII->getPredicationCost(I);1191}1192return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB,1193FCycle, FExtra, TrueProbability);1194}11951196/// Attempt repeated if-conversion on MBB, return true if successful.1197///1198bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) {1199bool Changed = false;1200while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) {1201// If-convert MBB and update analyses.1202SmallVector<MachineBasicBlock *, 4> RemovedBlocks;1203IfConv.convertIf(RemovedBlocks, /*Predicate*/ true);1204Changed = true;1205updateDomTree(DomTree, IfConv, RemovedBlocks);1206updateLoops(Loops, RemovedBlocks);1207}1208return Changed;1209}12101211bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {1212LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n"1213<< "********** Function: " << MF.getName() << '\n');1214if (skipFunction(MF.getFunction()))1215return false;12161217const TargetSubtargetInfo &STI = MF.getSubtarget();1218TII = STI.getInstrInfo();1219TRI = STI.getRegisterInfo();1220MRI = &MF.getRegInfo();1221SchedModel.init(&STI);1222DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();1223Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();1224MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();12251226bool Changed = false;1227IfConv.runOnMachineFunction(MF);12281229// Visit blocks in dominator tree post-order. The post-order enables nested1230// if-conversion in a single pass. The tryConvertIf() function may erase1231// blocks, but only blocks dominated by the head block. This makes it safe to1232// update the dominator tree while the post-order iterator is still active.1233for (auto *DomNode : post_order(DomTree))1234if (tryConvertIf(DomNode->getBlock()))1235Changed = true;12361237return Changed;1238}123912401241