Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
35294 views
//===- GVNSink.cpp - sink expressions into successors ---------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file GVNSink.cpp9/// This pass attempts to sink instructions into successors, reducing static10/// instruction count and enabling if-conversion.11///12/// We use a variant of global value numbering to decide what can be sunk.13/// Consider:14///15/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]16/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]17/// \ /18/// [ %e = phi i32 %a2, %c2 ]19/// [ add i32 %e, 4 ]20///21///22/// GVN would number %a1 and %c1 differently because they compute different23/// results - the VN of an instruction is a function of its opcode and the24/// transitive closure of its operands. This is the key property for hoisting25/// and CSE.26///27/// What we want when sinking however is for a numbering that is a function of28/// the *uses* of an instruction, which allows us to answer the question "if I29/// replace %a1 with %c1, will it contribute in an equivalent way to all30/// successive instructions?". The PostValueTable class in GVN provides this31/// mapping.32//33//===----------------------------------------------------------------------===//3435#include "llvm/ADT/ArrayRef.h"36#include "llvm/ADT/DenseMap.h"37#include "llvm/ADT/DenseSet.h"38#include "llvm/ADT/Hashing.h"39#include "llvm/ADT/PostOrderIterator.h"40#include "llvm/ADT/STLExtras.h"41#include "llvm/ADT/SmallPtrSet.h"42#include "llvm/ADT/SmallVector.h"43#include "llvm/ADT/Statistic.h"44#include "llvm/Analysis/GlobalsModRef.h"45#include "llvm/IR/BasicBlock.h"46#include "llvm/IR/CFG.h"47#include "llvm/IR/Constants.h"48#include "llvm/IR/Function.h"49#include "llvm/IR/InstrTypes.h"50#include "llvm/IR/Instruction.h"51#include "llvm/IR/Instructions.h"52#include "llvm/IR/PassManager.h"53#include "llvm/IR/Type.h"54#include "llvm/IR/Use.h"55#include "llvm/IR/Value.h"56#include "llvm/Support/Allocator.h"57#include "llvm/Support/ArrayRecycler.h"58#include "llvm/Support/AtomicOrdering.h"59#include "llvm/Support/Casting.h"60#include "llvm/Support/Compiler.h"61#include "llvm/Support/Debug.h"62#include "llvm/Support/raw_ostream.h"63#include "llvm/Transforms/Scalar/GVN.h"64#include "llvm/Transforms/Scalar/GVNExpression.h"65#include "llvm/Transforms/Utils/BasicBlockUtils.h"66#include "llvm/Transforms/Utils/Local.h"67#include <algorithm>68#include <cassert>69#include <cstddef>70#include <cstdint>71#include <iterator>72#include <utility>7374using namespace llvm;7576#define DEBUG_TYPE "gvn-sink"7778STATISTIC(NumRemoved, "Number of instructions removed");7980namespace llvm {81namespace GVNExpression {8283LLVM_DUMP_METHOD void Expression::dump() const {84print(dbgs());85dbgs() << "\n";86}8788} // end namespace GVNExpression89} // end namespace llvm9091namespace {9293static bool isMemoryInst(const Instruction *I) {94return isa<LoadInst>(I) || isa<StoreInst>(I) ||95(isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||96(isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());97}9899/// Iterates through instructions in a set of blocks in reverse order from the100/// first non-terminator. For example (assume all blocks have size n):101/// LockstepReverseIterator I([B1, B2, B3]);102/// *I-- = [B1[n], B2[n], B3[n]];103/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];104/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];105/// ...106///107/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()108/// to109/// determine which blocks are still going and the order they appear in the110/// list returned by operator*.111class LockstepReverseIterator {112ArrayRef<BasicBlock *> Blocks;113SmallSetVector<BasicBlock *, 4> ActiveBlocks;114SmallVector<Instruction *, 4> Insts;115bool Fail;116117public:118LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {119reset();120}121122void reset() {123Fail = false;124ActiveBlocks.clear();125for (BasicBlock *BB : Blocks)126ActiveBlocks.insert(BB);127Insts.clear();128for (BasicBlock *BB : Blocks) {129if (BB->size() <= 1) {130// Block wasn't big enough - only contained a terminator.131ActiveBlocks.remove(BB);132continue;133}134Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());135}136if (Insts.empty())137Fail = true;138}139140bool isValid() const { return !Fail; }141ArrayRef<Instruction *> operator*() const { return Insts; }142143// Note: This needs to return a SmallSetVector as the elements of144// ActiveBlocks will be later copied to Blocks using std::copy. The145// resultant order of elements in Blocks needs to be deterministic.146// Using SmallPtrSet instead causes non-deterministic order while147// copying. And we cannot simply sort Blocks as they need to match the148// corresponding Values.149SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }150151void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {152for (auto II = Insts.begin(); II != Insts.end();) {153if (!Blocks.contains((*II)->getParent())) {154ActiveBlocks.remove((*II)->getParent());155II = Insts.erase(II);156} else {157++II;158}159}160}161162void operator--() {163if (Fail)164return;165SmallVector<Instruction *, 4> NewInsts;166for (auto *Inst : Insts) {167if (Inst == &Inst->getParent()->front())168ActiveBlocks.remove(Inst->getParent());169else170NewInsts.push_back(Inst->getPrevNonDebugInstruction());171}172if (NewInsts.empty()) {173Fail = true;174return;175}176Insts = NewInsts;177}178};179180//===----------------------------------------------------------------------===//181182/// Candidate solution for sinking. There may be different ways to183/// sink instructions, differing in the number of instructions sunk,184/// the number of predecessors sunk from and the number of PHIs185/// required.186struct SinkingInstructionCandidate {187unsigned NumBlocks;188unsigned NumInstructions;189unsigned NumPHIs;190unsigned NumMemoryInsts;191int Cost = -1;192SmallVector<BasicBlock *, 4> Blocks;193194void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {195unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;196unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;197Cost = (NumInstructions * (NumBlocks - 1)) -198(NumExtraPHIs *199NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.200- SplitEdgeCost;201}202203bool operator>(const SinkingInstructionCandidate &Other) const {204return Cost > Other.Cost;205}206};207208#ifndef NDEBUG209raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {210OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks211<< " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";212return OS;213}214#endif215216//===----------------------------------------------------------------------===//217218/// Describes a PHI node that may or may not exist. These track the PHIs219/// that must be created if we sunk a sequence of instructions. It provides220/// a hash function for efficient equality comparisons.221class ModelledPHI {222SmallVector<Value *, 4> Values;223SmallVector<BasicBlock *, 4> Blocks;224225public:226ModelledPHI() = default;227228ModelledPHI(const PHINode *PN,229const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {230// BasicBlock comes first so we sort by basic block pointer order,231// then by value pointer order. No need to call `verifyModelledPHI`232// As the Values and Blocks are populated in a deterministic order.233using OpsType = std::pair<BasicBlock *, Value *>;234SmallVector<OpsType, 4> Ops;235for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)236Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});237238auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) {239return BlockOrder.lookup(O1.first) < BlockOrder.lookup(O2.first);240};241// Sort in a deterministic order.242llvm::sort(Ops, ComesBefore);243244for (auto &P : Ops) {245Blocks.push_back(P.first);246Values.push_back(P.second);247}248}249250/// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI251/// without the same ID.252/// \note This is specifically for DenseMapInfo - do not use this!253static ModelledPHI createDummy(size_t ID) {254ModelledPHI M;255M.Values.push_back(reinterpret_cast<Value*>(ID));256return M;257}258259void260verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {261assert(Values.size() > 1 && Blocks.size() > 1 &&262"Modelling PHI with less than 2 values");263auto ComesBefore = [BlockOrder](const BasicBlock *BB1,264const BasicBlock *BB2) {265return BlockOrder.lookup(BB1) < BlockOrder.lookup(BB2);266};267assert(llvm::is_sorted(Blocks, ComesBefore));268int C = 0;269for (const Value *V : Values) {270if (!isa<UndefValue>(V)) {271assert(cast<Instruction>(V)->getParent() == Blocks[C]);272(void)C;273}274C++;275}276}277/// Create a PHI from an array of incoming values and incoming blocks.278ModelledPHI(SmallVectorImpl<Instruction *> &V,279SmallSetVector<BasicBlock *, 4> &B,280const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {281// The order of Values and Blocks are already ordered by the caller.282llvm::copy(V, std::back_inserter(Values));283llvm::copy(B, std::back_inserter(Blocks));284verifyModelledPHI(BlockOrder);285}286287/// Create a PHI from [I[OpNum] for I in Insts].288/// TODO: Figure out a way to verifyModelledPHI in this constructor.289ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,290SmallSetVector<BasicBlock *, 4> &B) {291llvm::copy(B, std::back_inserter(Blocks));292for (auto *I : Insts)293Values.push_back(I->getOperand(OpNum));294}295296/// Restrict the PHI's contents down to only \c NewBlocks.297/// \c NewBlocks must be a subset of \c this->Blocks.298void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {299auto BI = Blocks.begin();300auto VI = Values.begin();301while (BI != Blocks.end()) {302assert(VI != Values.end());303if (!NewBlocks.contains(*BI)) {304BI = Blocks.erase(BI);305VI = Values.erase(VI);306} else {307++BI;308++VI;309}310}311assert(Blocks.size() == NewBlocks.size());312}313314ArrayRef<Value *> getValues() const { return Values; }315316bool areAllIncomingValuesSame() const {317return llvm::all_equal(Values);318}319320bool areAllIncomingValuesSameType() const {321return llvm::all_of(322Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });323}324325bool areAnyIncomingValuesConstant() const {326return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });327}328329// Hash functor330unsigned hash() const {331// Is deterministic because Values are saved in a specific order.332return (unsigned)hash_combine_range(Values.begin(), Values.end());333}334335bool operator==(const ModelledPHI &Other) const {336return Values == Other.Values && Blocks == Other.Blocks;337}338};339340template <typename ModelledPHI> struct DenseMapInfo {341static inline ModelledPHI &getEmptyKey() {342static ModelledPHI Dummy = ModelledPHI::createDummy(0);343return Dummy;344}345346static inline ModelledPHI &getTombstoneKey() {347static ModelledPHI Dummy = ModelledPHI::createDummy(1);348return Dummy;349}350351static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }352353static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {354return LHS == RHS;355}356};357358using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;359360//===----------------------------------------------------------------------===//361// ValueTable362//===----------------------------------------------------------------------===//363// This is a value number table where the value number is a function of the364// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know365// that the program would be equivalent if we replaced A with PHI(A, B).366//===----------------------------------------------------------------------===//367368/// A GVN expression describing how an instruction is used. The operands369/// field of BasicExpression is used to store uses, not operands.370///371/// This class also contains fields for discriminators used when determining372/// equivalence of instructions with sideeffects.373class InstructionUseExpr : public GVNExpression::BasicExpression {374unsigned MemoryUseOrder = -1;375bool Volatile = false;376ArrayRef<int> ShuffleMask;377378public:379InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,380BumpPtrAllocator &A)381: GVNExpression::BasicExpression(I->getNumUses()) {382allocateOperands(R, A);383setOpcode(I->getOpcode());384setType(I->getType());385386if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))387ShuffleMask = SVI->getShuffleMask().copy(A);388389for (auto &U : I->uses())390op_push_back(U.getUser());391llvm::sort(op_begin(), op_end());392}393394void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }395void setVolatile(bool V) { Volatile = V; }396397hash_code getHashValue() const override {398return hash_combine(GVNExpression::BasicExpression::getHashValue(),399MemoryUseOrder, Volatile, ShuffleMask);400}401402template <typename Function> hash_code getHashValue(Function MapFn) {403hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,404ShuffleMask);405for (auto *V : operands())406H = hash_combine(H, MapFn(V));407return H;408}409};410411using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;412413class ValueTable {414DenseMap<Value *, uint32_t> ValueNumbering;415DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;416DenseMap<size_t, uint32_t> HashNumbering;417BumpPtrAllocator Allocator;418ArrayRecycler<Value *> Recycler;419uint32_t nextValueNumber = 1;420BasicBlocksSet ReachableBBs;421422/// Create an expression for I based on its opcode and its uses. If I423/// touches or reads memory, the expression is also based upon its memory424/// order - see \c getMemoryUseOrder().425InstructionUseExpr *createExpr(Instruction *I) {426InstructionUseExpr *E =427new (Allocator) InstructionUseExpr(I, Recycler, Allocator);428if (isMemoryInst(I))429E->setMemoryUseOrder(getMemoryUseOrder(I));430431if (CmpInst *C = dyn_cast<CmpInst>(I)) {432CmpInst::Predicate Predicate = C->getPredicate();433E->setOpcode((C->getOpcode() << 8) | Predicate);434}435return E;436}437438/// Helper to compute the value number for a memory instruction439/// (LoadInst/StoreInst), including checking the memory ordering and440/// volatility.441template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {442if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())443return nullptr;444InstructionUseExpr *E = createExpr(I);445E->setVolatile(I->isVolatile());446return E;447}448449public:450ValueTable() = default;451452/// Set basic blocks reachable from entry block.453void setReachableBBs(const BasicBlocksSet &ReachableBBs) {454this->ReachableBBs = ReachableBBs;455}456457/// Returns the value number for the specified value, assigning458/// it a new number if it did not have one before.459uint32_t lookupOrAdd(Value *V) {460auto VI = ValueNumbering.find(V);461if (VI != ValueNumbering.end())462return VI->second;463464if (!isa<Instruction>(V)) {465ValueNumbering[V] = nextValueNumber;466return nextValueNumber++;467}468469Instruction *I = cast<Instruction>(V);470if (!ReachableBBs.contains(I->getParent()))471return ~0U;472473InstructionUseExpr *exp = nullptr;474switch (I->getOpcode()) {475case Instruction::Load:476exp = createMemoryExpr(cast<LoadInst>(I));477break;478case Instruction::Store:479exp = createMemoryExpr(cast<StoreInst>(I));480break;481case Instruction::Call:482case Instruction::Invoke:483case Instruction::FNeg:484case Instruction::Add:485case Instruction::FAdd:486case Instruction::Sub:487case Instruction::FSub:488case Instruction::Mul:489case Instruction::FMul:490case Instruction::UDiv:491case Instruction::SDiv:492case Instruction::FDiv:493case Instruction::URem:494case Instruction::SRem:495case Instruction::FRem:496case Instruction::Shl:497case Instruction::LShr:498case Instruction::AShr:499case Instruction::And:500case Instruction::Or:501case Instruction::Xor:502case Instruction::ICmp:503case Instruction::FCmp:504case Instruction::Trunc:505case Instruction::ZExt:506case Instruction::SExt:507case Instruction::FPToUI:508case Instruction::FPToSI:509case Instruction::UIToFP:510case Instruction::SIToFP:511case Instruction::FPTrunc:512case Instruction::FPExt:513case Instruction::PtrToInt:514case Instruction::IntToPtr:515case Instruction::BitCast:516case Instruction::AddrSpaceCast:517case Instruction::Select:518case Instruction::ExtractElement:519case Instruction::InsertElement:520case Instruction::ShuffleVector:521case Instruction::InsertValue:522case Instruction::GetElementPtr:523exp = createExpr(I);524break;525default:526break;527}528529if (!exp) {530ValueNumbering[V] = nextValueNumber;531return nextValueNumber++;532}533534uint32_t e = ExpressionNumbering[exp];535if (!e) {536hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });537auto I = HashNumbering.find(H);538if (I != HashNumbering.end()) {539e = I->second;540} else {541e = nextValueNumber++;542HashNumbering[H] = e;543ExpressionNumbering[exp] = e;544}545}546ValueNumbering[V] = e;547return e;548}549550/// Returns the value number of the specified value. Fails if the value has551/// not yet been numbered.552uint32_t lookup(Value *V) const {553auto VI = ValueNumbering.find(V);554assert(VI != ValueNumbering.end() && "Value not numbered?");555return VI->second;556}557558/// Removes all value numberings and resets the value table.559void clear() {560ValueNumbering.clear();561ExpressionNumbering.clear();562HashNumbering.clear();563Recycler.clear(Allocator);564nextValueNumber = 1;565}566567/// \c Inst uses or touches memory. Return an ID describing the memory state568/// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),569/// the exact same memory operations happen after I1 and I2.570///571/// This is a very hard problem in general, so we use domain-specific572/// knowledge that we only ever check for equivalence between blocks sharing a573/// single immediate successor that is common, and when determining if I1 ==574/// I2 we will have already determined that next(I1) == next(I2). This575/// inductive property allows us to simply return the value number of the next576/// instruction that defines memory.577uint32_t getMemoryUseOrder(Instruction *Inst) {578auto *BB = Inst->getParent();579for (auto I = std::next(Inst->getIterator()), E = BB->end();580I != E && !I->isTerminator(); ++I) {581if (!isMemoryInst(&*I))582continue;583if (isa<LoadInst>(&*I))584continue;585CallInst *CI = dyn_cast<CallInst>(&*I);586if (CI && CI->onlyReadsMemory())587continue;588InvokeInst *II = dyn_cast<InvokeInst>(&*I);589if (II && II->onlyReadsMemory())590continue;591return lookupOrAdd(&*I);592}593return 0;594}595};596597//===----------------------------------------------------------------------===//598599class GVNSink {600public:601GVNSink() {}602603bool run(Function &F) {604LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()605<< "\n");606607unsigned NumSunk = 0;608ReversePostOrderTraversal<Function*> RPOT(&F);609VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));610// Populate reverse post-order to order basic blocks in deterministic611// order. Any arbitrary ordering will work in this case as long as they are612// deterministic. The node ordering of newly created basic blocks613// are irrelevant because RPOT(for computing sinkable candidates) is also614// obtained ahead of time and only their order are relevant for this pass.615unsigned NodeOrdering = 0;616RPOTOrder[*RPOT.begin()] = ++NodeOrdering;617for (auto *BB : RPOT)618if (!pred_empty(BB))619RPOTOrder[BB] = ++NodeOrdering;620for (auto *N : RPOT)621NumSunk += sinkBB(N);622623return NumSunk > 0;624}625626private:627ValueTable VN;628DenseMap<const BasicBlock *, unsigned> RPOTOrder;629630bool shouldAvoidSinkingInstruction(Instruction *I) {631// These instructions may change or break semantics if moved.632if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||633I->getType()->isTokenTy())634return true;635return false;636}637638/// The main heuristic function. Analyze the set of instructions pointed to by639/// LRI and return a candidate solution if these instructions can be sunk, or640/// std::nullopt otherwise.641std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking(642LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,643ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);644645/// Create a ModelledPHI for each PHI in BB, adding to PHIs.646void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,647SmallPtrSetImpl<Value *> &PHIContents) {648for (PHINode &PN : BB->phis()) {649auto MPHI = ModelledPHI(&PN, RPOTOrder);650PHIs.insert(MPHI);651for (auto *V : MPHI.getValues())652PHIContents.insert(V);653}654}655656/// The main instruction sinking driver. Set up state and try and sink657/// instructions into BBEnd from its predecessors.658unsigned sinkBB(BasicBlock *BBEnd);659660/// Perform the actual mechanics of sinking an instruction from Blocks into661/// BBEnd, which is their only successor.662void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);663664/// Remove PHIs that all have the same incoming value.665void foldPointlessPHINodes(BasicBlock *BB) {666auto I = BB->begin();667while (PHINode *PN = dyn_cast<PHINode>(I++)) {668if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {669return V == PN->getIncomingValue(0);670}))671continue;672if (PN->getIncomingValue(0) != PN)673PN->replaceAllUsesWith(PN->getIncomingValue(0));674else675PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));676PN->eraseFromParent();677}678}679};680681std::optional<SinkingInstructionCandidate>682GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,683unsigned &InstNum,684unsigned &MemoryInstNum,685ModelledPHISet &NeededPHIs,686SmallPtrSetImpl<Value *> &PHIContents) {687auto Insts = *LRI;688LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I689: Insts) {690I->dump();691} dbgs() << " ]\n";);692693DenseMap<uint32_t, unsigned> VNums;694for (auto *I : Insts) {695uint32_t N = VN.lookupOrAdd(I);696LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");697if (N == ~0U)698return std::nullopt;699VNums[N]++;700}701unsigned VNumToSink = llvm::max_element(VNums, llvm::less_second())->first;702703if (VNums[VNumToSink] == 1)704// Can't sink anything!705return std::nullopt;706707// Now restrict the number of incoming blocks down to only those with708// VNumToSink.709auto &ActivePreds = LRI.getActiveBlocks();710unsigned InitialActivePredSize = ActivePreds.size();711SmallVector<Instruction *, 4> NewInsts;712for (auto *I : Insts) {713if (VN.lookup(I) != VNumToSink)714ActivePreds.remove(I->getParent());715else716NewInsts.push_back(I);717}718for (auto *I : NewInsts)719if (shouldAvoidSinkingInstruction(I))720return std::nullopt;721722// If we've restricted the incoming blocks, restrict all needed PHIs also723// to that set.724bool RecomputePHIContents = false;725if (ActivePreds.size() != InitialActivePredSize) {726ModelledPHISet NewNeededPHIs;727for (auto P : NeededPHIs) {728P.restrictToBlocks(ActivePreds);729NewNeededPHIs.insert(P);730}731NeededPHIs = NewNeededPHIs;732LRI.restrictToBlocks(ActivePreds);733RecomputePHIContents = true;734}735736// The sunk instruction's results.737ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);738739// Does sinking this instruction render previous PHIs redundant?740if (NeededPHIs.erase(NewPHI))741RecomputePHIContents = true;742743if (RecomputePHIContents) {744// The needed PHIs have changed, so recompute the set of all needed745// values.746PHIContents.clear();747for (auto &PHI : NeededPHIs)748PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());749}750751// Is this instruction required by a later PHI that doesn't match this PHI?752// if so, we can't sink this instruction.753for (auto *V : NewPHI.getValues())754if (PHIContents.count(V))755// V exists in this PHI, but the whole PHI is different to NewPHI756// (else it would have been removed earlier). We cannot continue757// because this isn't representable.758return std::nullopt;759760// Which operands need PHIs?761// FIXME: If any of these fail, we should partition up the candidates to762// try and continue making progress.763Instruction *I0 = NewInsts[0];764765auto isNotSameOperation = [&I0](Instruction *I) {766return !I0->isSameOperationAs(I);767};768769if (any_of(NewInsts, isNotSameOperation))770return std::nullopt;771772for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {773ModelledPHI PHI(NewInsts, OpNum, ActivePreds);774if (PHI.areAllIncomingValuesSame())775continue;776if (!canReplaceOperandWithVariable(I0, OpNum))777// We can 't create a PHI from this instruction!778return std::nullopt;779if (NeededPHIs.count(PHI))780continue;781if (!PHI.areAllIncomingValuesSameType())782return std::nullopt;783// Don't create indirect calls! The called value is the final operand.784if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&785PHI.areAnyIncomingValuesConstant())786return std::nullopt;787788NeededPHIs.reserve(NeededPHIs.size());789NeededPHIs.insert(PHI);790PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());791}792793if (isMemoryInst(NewInsts[0]))794++MemoryInstNum;795796SinkingInstructionCandidate Cand;797Cand.NumInstructions = ++InstNum;798Cand.NumMemoryInsts = MemoryInstNum;799Cand.NumBlocks = ActivePreds.size();800Cand.NumPHIs = NeededPHIs.size();801append_range(Cand.Blocks, ActivePreds);802803return Cand;804}805806unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {807LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";808BBEnd->printAsOperand(dbgs()); dbgs() << "\n");809SmallVector<BasicBlock *, 4> Preds;810for (auto *B : predecessors(BBEnd)) {811// Bailout on basic blocks without predecessor(PR42346).812if (!RPOTOrder.count(B))813return 0;814auto *T = B->getTerminator();815if (isa<BranchInst>(T) || isa<SwitchInst>(T))816Preds.push_back(B);817else818return 0;819}820if (Preds.size() < 2)821return 0;822auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {823return RPOTOrder.lookup(BB1) < RPOTOrder.lookup(BB2);824};825// Sort in a deterministic order.826llvm::sort(Preds, ComesBefore);827828unsigned NumOrigPreds = Preds.size();829// We can only sink instructions through unconditional branches.830llvm::erase_if(Preds, [](BasicBlock *BB) {831return BB->getTerminator()->getNumSuccessors() != 1;832});833834LockstepReverseIterator LRI(Preds);835SmallVector<SinkingInstructionCandidate, 4> Candidates;836unsigned InstNum = 0, MemoryInstNum = 0;837ModelledPHISet NeededPHIs;838SmallPtrSet<Value *, 4> PHIContents;839analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);840unsigned NumOrigPHIs = NeededPHIs.size();841842while (LRI.isValid()) {843auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,844NeededPHIs, PHIContents);845if (!Cand)846break;847Cand->calculateCost(NumOrigPHIs, Preds.size());848Candidates.emplace_back(*Cand);849--LRI;850}851852llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());853LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C854: Candidates) dbgs()855<< " " << C << "\n";);856857// Pick the top candidate, as long it is positive!858if (Candidates.empty() || Candidates.front().Cost <= 0)859return 0;860auto C = Candidates.front();861862LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");863BasicBlock *InsertBB = BBEnd;864if (C.Blocks.size() < NumOrigPreds) {865LLVM_DEBUG(dbgs() << " -- Splitting edge to ";866BBEnd->printAsOperand(dbgs()); dbgs() << "\n");867InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");868if (!InsertBB) {869LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");870// Edge couldn't be split.871return 0;872}873}874875for (unsigned I = 0; I < C.NumInstructions; ++I)876sinkLastInstruction(C.Blocks, InsertBB);877878return C.NumInstructions;879}880881void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,882BasicBlock *BBEnd) {883SmallVector<Instruction *, 4> Insts;884for (BasicBlock *BB : Blocks)885Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());886Instruction *I0 = Insts.front();887888SmallVector<Value *, 4> NewOperands;889for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {890bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {891return I->getOperand(O) != I0->getOperand(O);892});893if (!NeedPHI) {894NewOperands.push_back(I0->getOperand(O));895continue;896}897898// Create a new PHI in the successor block and populate it.899auto *Op = I0->getOperand(O);900assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");901auto *PN =902PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");903PN->insertBefore(BBEnd->begin());904for (auto *I : Insts)905PN->addIncoming(I->getOperand(O), I->getParent());906NewOperands.push_back(PN);907}908909// Arbitrarily use I0 as the new "common" instruction; remap its operands910// and move it to the start of the successor block.911for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)912I0->getOperandUse(O).set(NewOperands[O]);913I0->moveBefore(&*BBEnd->getFirstInsertionPt());914915// Update metadata and IR flags.916for (auto *I : Insts)917if (I != I0) {918combineMetadataForCSE(I0, I, true);919I0->andIRFlags(I);920}921922for (auto *I : Insts)923if (I != I0) {924I->replaceAllUsesWith(I0);925I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());926}927foldPointlessPHINodes(BBEnd);928929// Finally nuke all instructions apart from the common instruction.930for (auto *I : Insts)931if (I != I0)932I->eraseFromParent();933934NumRemoved += Insts.size() - 1;935}936937} // end anonymous namespace938939PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {940GVNSink G;941if (!G.run(F))942return PreservedAnalyses::all();943944return PreservedAnalyses::none();945}946947948