Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
35234 views
//==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file Break False Dependency pass.9///10/// Some instructions have false dependencies which cause unnecessary stalls.11/// For example, instructions may write part of a register and implicitly12/// need to read the other parts of the register. This may cause unwanted13/// stalls preventing otherwise unrelated instructions from executing in14/// parallel in an out-of-order CPU.15/// This pass is aimed at identifying and avoiding these dependencies.16//17//===----------------------------------------------------------------------===//1819#include "llvm/ADT/DepthFirstIterator.h"20#include "llvm/CodeGen/LivePhysRegs.h"21#include "llvm/CodeGen/MachineFunctionPass.h"22#include "llvm/CodeGen/ReachingDefAnalysis.h"23#include "llvm/CodeGen/RegisterClassInfo.h"24#include "llvm/CodeGen/TargetInstrInfo.h"25#include "llvm/InitializePasses.h"26#include "llvm/MC/MCInstrDesc.h"27#include "llvm/MC/MCRegister.h"28#include "llvm/MC/MCRegisterInfo.h"29#include "llvm/Support/Debug.h"3031using namespace llvm;3233namespace llvm {3435class BreakFalseDeps : public MachineFunctionPass {36private:37MachineFunction *MF = nullptr;38const TargetInstrInfo *TII = nullptr;39const TargetRegisterInfo *TRI = nullptr;40RegisterClassInfo RegClassInfo;4142/// List of undefined register reads in this block in forward order.43std::vector<std::pair<MachineInstr *, unsigned>> UndefReads;4445/// Storage for register unit liveness.46LivePhysRegs LiveRegSet;4748ReachingDefAnalysis *RDA = nullptr;4950public:51static char ID; // Pass identification, replacement for typeid5253BreakFalseDeps() : MachineFunctionPass(ID) {54initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry());55}5657void getAnalysisUsage(AnalysisUsage &AU) const override {58AU.setPreservesAll();59AU.addRequired<ReachingDefAnalysis>();60MachineFunctionPass::getAnalysisUsage(AU);61}6263bool runOnMachineFunction(MachineFunction &MF) override;6465MachineFunctionProperties getRequiredProperties() const override {66return MachineFunctionProperties().set(67MachineFunctionProperties::Property::NoVRegs);68}6970private:71/// Process he given basic block.72void processBasicBlock(MachineBasicBlock *MBB);7374/// Update def-ages for registers defined by MI.75/// Also break dependencies on partial defs and undef uses.76void processDefs(MachineInstr *MI);7778/// Helps avoid false dependencies on undef registers by updating the79/// machine instructions' undef operand to use a register that the instruction80/// is truly dependent on, or use a register with clearance higher than Pref.81/// Returns true if it was able to find a true dependency, thus not requiring82/// a dependency breaking instruction regardless of clearance.83bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,84unsigned Pref);8586/// Return true to if it makes sense to break dependence on a partial87/// def or undef use.88bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref);8990/// Break false dependencies on undefined register reads.91/// Walk the block backward computing precise liveness. This is expensive, so92/// we only do it on demand. Note that the occurrence of undefined register93/// reads that should be broken is very rare, but when they occur we may have94/// many in a single block.95void processUndefReads(MachineBasicBlock *);96};9798} // namespace llvm99100#define DEBUG_TYPE "break-false-deps"101102char BreakFalseDeps::ID = 0;103INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)104INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)105INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false)106107FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); }108109bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,110unsigned Pref) {111112// We can't change tied operands.113if (MI->isRegTiedToDefOperand(OpIdx))114return false;115116MachineOperand &MO = MI->getOperand(OpIdx);117assert(MO.isUndef() && "Expected undef machine operand");118119// We can't change registers that aren't renamable.120if (!MO.isRenamable())121return false;122123MCRegister OriginalReg = MO.getReg().asMCReg();124125// Update only undef operands that have reg units that are mapped to one root.126for (MCRegUnit Unit : TRI->regunits(OriginalReg)) {127unsigned NumRoots = 0;128for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {129NumRoots++;130if (NumRoots > 1)131return false;132}133}134135// Get the undef operand's register class136const TargetRegisterClass *OpRC =137TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);138assert(OpRC && "Not a valid register class");139140// If the instruction has a true dependency, we can hide the false depdency141// behind it.142for (MachineOperand &CurrMO : MI->all_uses()) {143if (CurrMO.isUndef() || !OpRC->contains(CurrMO.getReg()))144continue;145// We found a true dependency - replace the undef register with the true146// dependency.147MO.setReg(CurrMO.getReg());148return true;149}150151// Go over all registers in the register class and find the register with152// max clearance or clearance higher than Pref.153unsigned MaxClearance = 0;154unsigned MaxClearanceReg = OriginalReg;155ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);156for (MCPhysReg Reg : Order) {157unsigned Clearance = RDA->getClearance(MI, Reg);158if (Clearance <= MaxClearance)159continue;160MaxClearance = Clearance;161MaxClearanceReg = Reg;162163if (MaxClearance > Pref)164break;165}166167// Update the operand if we found a register with better clearance.168if (MaxClearanceReg != OriginalReg)169MO.setReg(MaxClearanceReg);170171return false;172}173174bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,175unsigned Pref) {176MCRegister Reg = MI->getOperand(OpIdx).getReg().asMCReg();177unsigned Clearance = RDA->getClearance(MI, Reg);178LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);179180if (Pref > Clearance) {181LLVM_DEBUG(dbgs() << ": Break dependency.\n");182return true;183}184LLVM_DEBUG(dbgs() << ": OK .\n");185return false;186}187188void BreakFalseDeps::processDefs(MachineInstr *MI) {189assert(!MI->isDebugInstr() && "Won't process debug values");190191const MCInstrDesc &MCID = MI->getDesc();192193// Break dependence on undef uses. Do this before updating LiveRegs below.194// This can remove a false dependence with no additional instructions.195for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) {196MachineOperand &MO = MI->getOperand(i);197if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef())198continue;199200unsigned Pref = TII->getUndefRegClearance(*MI, i, TRI);201if (Pref) {202bool HadTrueDependency = pickBestRegisterForUndef(MI, i, Pref);203// We don't need to bother trying to break a dependency if this204// instruction has a true dependency on that register through another205// operand - we'll have to wait for it to be available regardless.206if (!HadTrueDependency && shouldBreakDependence(MI, i, Pref))207UndefReads.push_back(std::make_pair(MI, i));208}209}210211// The code below allows the target to create a new instruction to break the212// dependence. That opposes the goal of minimizing size, so bail out now.213if (MF->getFunction().hasMinSize())214return;215216for (unsigned i = 0,217e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();218i != e; ++i) {219MachineOperand &MO = MI->getOperand(i);220if (!MO.isReg() || !MO.getReg())221continue;222if (MO.isUse())223continue;224// Check clearance before partial register updates.225unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);226if (Pref && shouldBreakDependence(MI, i, Pref))227TII->breakPartialRegDependency(*MI, i, TRI);228}229}230231void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {232if (UndefReads.empty())233return;234235// The code below allows the target to create a new instruction to break the236// dependence. That opposes the goal of minimizing size, so bail out now.237if (MF->getFunction().hasMinSize())238return;239240// Collect this block's live out register units.241LiveRegSet.init(*TRI);242// We do not need to care about pristine registers as they are just preserved243// but not actually used in the function.244LiveRegSet.addLiveOutsNoPristines(*MBB);245246MachineInstr *UndefMI = UndefReads.back().first;247unsigned OpIdx = UndefReads.back().second;248249for (MachineInstr &I : llvm::reverse(*MBB)) {250// Update liveness, including the current instruction's defs.251LiveRegSet.stepBackward(I);252253if (UndefMI == &I) {254if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))255TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);256257UndefReads.pop_back();258if (UndefReads.empty())259return;260261UndefMI = UndefReads.back().first;262OpIdx = UndefReads.back().second;263}264}265}266267void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) {268UndefReads.clear();269// If this block is not done, it makes little sense to make any decisions270// based on clearance information. We need to make a second pass anyway,271// and by then we'll have better information, so we can avoid doing the work272// to try and break dependencies now.273for (MachineInstr &MI : *MBB) {274if (!MI.isDebugInstr())275processDefs(&MI);276}277processUndefReads(MBB);278}279280bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {281if (skipFunction(mf.getFunction()))282return false;283MF = &mf;284TII = MF->getSubtarget().getInstrInfo();285TRI = MF->getSubtarget().getRegisterInfo();286RDA = &getAnalysis<ReachingDefAnalysis>();287288RegClassInfo.runOnMachineFunction(mf);289290LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");291292// Skip Dead blocks due to ReachingDefAnalysis has no idea about instructions293// in them.294df_iterator_default_set<MachineBasicBlock *> Reachable;295for (MachineBasicBlock *MBB : depth_first_ext(&mf, Reachable))296(void)MBB /* Mark all reachable blocks */;297298// Traverse the basic blocks.299for (MachineBasicBlock &MBB : mf)300if (Reachable.count(&MBB))301processBasicBlock(&MBB);302303return false;304}305306307