Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
35294 views
//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass performs a simple dominator tree walk that eliminates trivially9// redundant instructions.10//11//===----------------------------------------------------------------------===//1213#include "llvm/Transforms/Scalar/EarlyCSE.h"14#include "llvm/ADT/DenseMapInfo.h"15#include "llvm/ADT/Hashing.h"16#include "llvm/ADT/STLExtras.h"17#include "llvm/ADT/ScopedHashTable.h"18#include "llvm/ADT/SmallVector.h"19#include "llvm/ADT/Statistic.h"20#include "llvm/Analysis/AssumptionCache.h"21#include "llvm/Analysis/GlobalsModRef.h"22#include "llvm/Analysis/GuardUtils.h"23#include "llvm/Analysis/InstructionSimplify.h"24#include "llvm/Analysis/MemorySSA.h"25#include "llvm/Analysis/MemorySSAUpdater.h"26#include "llvm/Analysis/TargetLibraryInfo.h"27#include "llvm/Analysis/TargetTransformInfo.h"28#include "llvm/Analysis/ValueTracking.h"29#include "llvm/IR/BasicBlock.h"30#include "llvm/IR/Constants.h"31#include "llvm/IR/Dominators.h"32#include "llvm/IR/Function.h"33#include "llvm/IR/InstrTypes.h"34#include "llvm/IR/Instruction.h"35#include "llvm/IR/Instructions.h"36#include "llvm/IR/IntrinsicInst.h"37#include "llvm/IR/LLVMContext.h"38#include "llvm/IR/PassManager.h"39#include "llvm/IR/PatternMatch.h"40#include "llvm/IR/Type.h"41#include "llvm/IR/Value.h"42#include "llvm/InitializePasses.h"43#include "llvm/Pass.h"44#include "llvm/Support/Allocator.h"45#include "llvm/Support/AtomicOrdering.h"46#include "llvm/Support/Casting.h"47#include "llvm/Support/Debug.h"48#include "llvm/Support/DebugCounter.h"49#include "llvm/Support/RecyclingAllocator.h"50#include "llvm/Support/raw_ostream.h"51#include "llvm/Transforms/Scalar.h"52#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"53#include "llvm/Transforms/Utils/Local.h"54#include <cassert>55#include <deque>56#include <memory>57#include <utility>5859using namespace llvm;60using namespace llvm::PatternMatch;6162#define DEBUG_TYPE "early-cse"6364STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");65STATISTIC(NumCSE, "Number of instructions CSE'd");66STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");67STATISTIC(NumCSELoad, "Number of load instructions CSE'd");68STATISTIC(NumCSECall, "Number of call instructions CSE'd");69STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd");70STATISTIC(NumDSE, "Number of trivial dead stores removed");7172DEBUG_COUNTER(CSECounter, "early-cse",73"Controls which instructions are removed");7475static cl::opt<unsigned> EarlyCSEMssaOptCap(76"earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,77cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "78"for faster compile. Caps the MemorySSA clobbering calls."));7980static cl::opt<bool> EarlyCSEDebugHash(81"earlycse-debug-hash", cl::init(false), cl::Hidden,82cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "83"function is well-behaved w.r.t. its isEqual predicate"));8485//===----------------------------------------------------------------------===//86// SimpleValue87//===----------------------------------------------------------------------===//8889namespace {9091/// Struct representing the available values in the scoped hash table.92struct SimpleValue {93Instruction *Inst;9495SimpleValue(Instruction *I) : Inst(I) {96assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");97}9899bool isSentinel() const {100return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||101Inst == DenseMapInfo<Instruction *>::getTombstoneKey();102}103104static bool canHandle(Instruction *Inst) {105// This can only handle non-void readnone functions.106// Also handled are constrained intrinsic that look like the types107// of instruction handled below (UnaryOperator, etc.).108if (CallInst *CI = dyn_cast<CallInst>(Inst)) {109if (Function *F = CI->getCalledFunction()) {110switch ((Intrinsic::ID)F->getIntrinsicID()) {111case Intrinsic::experimental_constrained_fadd:112case Intrinsic::experimental_constrained_fsub:113case Intrinsic::experimental_constrained_fmul:114case Intrinsic::experimental_constrained_fdiv:115case Intrinsic::experimental_constrained_frem:116case Intrinsic::experimental_constrained_fptosi:117case Intrinsic::experimental_constrained_sitofp:118case Intrinsic::experimental_constrained_fptoui:119case Intrinsic::experimental_constrained_uitofp:120case Intrinsic::experimental_constrained_fcmp:121case Intrinsic::experimental_constrained_fcmps: {122auto *CFP = cast<ConstrainedFPIntrinsic>(CI);123if (CFP->getExceptionBehavior() &&124CFP->getExceptionBehavior() == fp::ebStrict)125return false;126// Since we CSE across function calls we must not allow127// the rounding mode to change.128if (CFP->getRoundingMode() &&129CFP->getRoundingMode() == RoundingMode::Dynamic)130return false;131return true;132}133}134}135return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&136// FIXME: Currently the calls which may access the thread id may137// be considered as not accessing the memory. But this is138// problematic for coroutines, since coroutines may resume in a139// different thread. So we disable the optimization here for the140// correctness. However, it may block many other correct141// optimizations. Revert this one when we detect the memory142// accessing kind more precisely.143!CI->getFunction()->isPresplitCoroutine();144}145return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||146isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||147isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||148isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||149isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||150isa<FreezeInst>(Inst);151}152};153154} // end anonymous namespace155156namespace llvm {157158template <> struct DenseMapInfo<SimpleValue> {159static inline SimpleValue getEmptyKey() {160return DenseMapInfo<Instruction *>::getEmptyKey();161}162163static inline SimpleValue getTombstoneKey() {164return DenseMapInfo<Instruction *>::getTombstoneKey();165}166167static unsigned getHashValue(SimpleValue Val);168static bool isEqual(SimpleValue LHS, SimpleValue RHS);169};170171} // end namespace llvm172173/// Match a 'select' including an optional 'not's of the condition.174static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,175Value *&B,176SelectPatternFlavor &Flavor) {177// Return false if V is not even a select.178if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))179return false;180181// Look through a 'not' of the condition operand by swapping A/B.182Value *CondNot;183if (match(Cond, m_Not(m_Value(CondNot)))) {184Cond = CondNot;185std::swap(A, B);186}187188// Match canonical forms of min/max. We are not using ValueTracking's189// more powerful matchSelectPattern() because it may rely on instruction flags190// such as "nsw". That would be incompatible with the current hashing191// mechanism that may remove flags to increase the likelihood of CSE.192193Flavor = SPF_UNKNOWN;194CmpInst::Predicate Pred;195196if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {197// Check for commuted variants of min/max by swapping predicate.198// If we do not match the standard or commuted patterns, this is not a199// recognized form of min/max, but it is still a select, so return true.200if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))201return true;202Pred = ICmpInst::getSwappedPredicate(Pred);203}204205switch (Pred) {206case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;207case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;208case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;209case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;210// Non-strict inequalities.211case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;212case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;213case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;214case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;215default: break;216}217218return true;219}220221static unsigned hashCallInst(CallInst *CI) {222// Don't CSE convergent calls in different basic blocks, because they223// implicitly depend on the set of threads that is currently executing.224if (CI->isConvergent()) {225return hash_combine(226CI->getOpcode(), CI->getParent(),227hash_combine_range(CI->value_op_begin(), CI->value_op_end()));228}229return hash_combine(230CI->getOpcode(),231hash_combine_range(CI->value_op_begin(), CI->value_op_end()));232}233234static unsigned getHashValueImpl(SimpleValue Val) {235Instruction *Inst = Val.Inst;236// Hash in all of the operands as pointers.237if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {238Value *LHS = BinOp->getOperand(0);239Value *RHS = BinOp->getOperand(1);240if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))241std::swap(LHS, RHS);242243return hash_combine(BinOp->getOpcode(), LHS, RHS);244}245246if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {247// Compares can be commuted by swapping the comparands and248// updating the predicate. Choose the form that has the249// comparands in sorted order, or in the case of a tie, the250// one with the lower predicate.251Value *LHS = CI->getOperand(0);252Value *RHS = CI->getOperand(1);253CmpInst::Predicate Pred = CI->getPredicate();254CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();255if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {256std::swap(LHS, RHS);257Pred = SwappedPred;258}259return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);260}261262// Hash general selects to allow matching commuted true/false operands.263SelectPatternFlavor SPF;264Value *Cond, *A, *B;265if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {266// Hash min/max (cmp + select) to allow for commuted operands.267// Min/max may also have non-canonical compare predicate (eg, the compare for268// smin may use 'sgt' rather than 'slt'), and non-canonical operands in the269// compare.270// TODO: We should also detect FP min/max.271if (SPF == SPF_SMIN || SPF == SPF_SMAX ||272SPF == SPF_UMIN || SPF == SPF_UMAX) {273if (A > B)274std::swap(A, B);275return hash_combine(Inst->getOpcode(), SPF, A, B);276}277278// Hash general selects to allow matching commuted true/false operands.279280// If we do not have a compare as the condition, just hash in the condition.281CmpInst::Predicate Pred;282Value *X, *Y;283if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))284return hash_combine(Inst->getOpcode(), Cond, A, B);285286// Similar to cmp normalization (above) - canonicalize the predicate value:287// select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A288if (CmpInst::getInversePredicate(Pred) < Pred) {289Pred = CmpInst::getInversePredicate(Pred);290std::swap(A, B);291}292return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);293}294295if (CastInst *CI = dyn_cast<CastInst>(Inst))296return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));297298if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))299return hash_combine(FI->getOpcode(), FI->getOperand(0));300301if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))302return hash_combine(EVI->getOpcode(), EVI->getOperand(0),303hash_combine_range(EVI->idx_begin(), EVI->idx_end()));304305if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))306return hash_combine(IVI->getOpcode(), IVI->getOperand(0),307IVI->getOperand(1),308hash_combine_range(IVI->idx_begin(), IVI->idx_end()));309310assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||311isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||312isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&313"Invalid/unknown instruction");314315// Handle intrinsics with commutative operands.316auto *II = dyn_cast<IntrinsicInst>(Inst);317if (II && II->isCommutative() && II->arg_size() >= 2) {318Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);319if (LHS > RHS)320std::swap(LHS, RHS);321return hash_combine(322II->getOpcode(), LHS, RHS,323hash_combine_range(II->value_op_begin() + 2, II->value_op_end()));324}325326// gc.relocate is 'special' call: its second and third operands are327// not real values, but indices into statepoint's argument list.328// Get values they point to.329if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))330return hash_combine(GCR->getOpcode(), GCR->getOperand(0),331GCR->getBasePtr(), GCR->getDerivedPtr());332333// Don't CSE convergent calls in different basic blocks, because they334// implicitly depend on the set of threads that is currently executing.335if (CallInst *CI = dyn_cast<CallInst>(Inst))336return hashCallInst(CI);337338// Mix in the opcode.339return hash_combine(340Inst->getOpcode(),341hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));342}343344unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {345#ifndef NDEBUG346// If -earlycse-debug-hash was specified, return a constant -- this347// will force all hashing to collide, so we'll exhaustively search348// the table for a match, and the assertion in isEqual will fire if349// there's a bug causing equal keys to hash differently.350if (EarlyCSEDebugHash)351return 0;352#endif353return getHashValueImpl(Val);354}355356static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {357Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;358359if (LHS.isSentinel() || RHS.isSentinel())360return LHSI == RHSI;361362if (LHSI->getOpcode() != RHSI->getOpcode())363return false;364if (LHSI->isIdenticalToWhenDefined(RHSI)) {365// Convergent calls implicitly depend on the set of threads that is366// currently executing, so conservatively return false if they are in367// different basic blocks.368if (CallInst *CI = dyn_cast<CallInst>(LHSI);369CI && CI->isConvergent() && LHSI->getParent() != RHSI->getParent())370return false;371372return true;373}374375// If we're not strictly identical, we still might be a commutable instruction376if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {377if (!LHSBinOp->isCommutative())378return false;379380assert(isa<BinaryOperator>(RHSI) &&381"same opcode, but different instruction type?");382BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);383384// Commuted equality385return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&386LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);387}388if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {389assert(isa<CmpInst>(RHSI) &&390"same opcode, but different instruction type?");391CmpInst *RHSCmp = cast<CmpInst>(RHSI);392// Commuted equality393return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&394LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&395LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();396}397398auto *LII = dyn_cast<IntrinsicInst>(LHSI);399auto *RII = dyn_cast<IntrinsicInst>(RHSI);400if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&401LII->isCommutative() && LII->arg_size() >= 2) {402return LII->getArgOperand(0) == RII->getArgOperand(1) &&403LII->getArgOperand(1) == RII->getArgOperand(0) &&404std::equal(LII->arg_begin() + 2, LII->arg_end(),405RII->arg_begin() + 2, RII->arg_end());406}407408// See comment above in `getHashValue()`.409if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))410if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))411return GCR1->getOperand(0) == GCR2->getOperand(0) &&412GCR1->getBasePtr() == GCR2->getBasePtr() &&413GCR1->getDerivedPtr() == GCR2->getDerivedPtr();414415// Min/max can occur with commuted operands, non-canonical predicates,416// and/or non-canonical operands.417// Selects can be non-trivially equivalent via inverted conditions and swaps.418SelectPatternFlavor LSPF, RSPF;419Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;420if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&421matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {422if (LSPF == RSPF) {423// TODO: We should also detect FP min/max.424if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||425LSPF == SPF_UMIN || LSPF == SPF_UMAX)426return ((LHSA == RHSA && LHSB == RHSB) ||427(LHSA == RHSB && LHSB == RHSA));428429// select Cond, A, B <--> select not(Cond), B, A430if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)431return true;432}433434// If the true/false operands are swapped and the conditions are compares435// with inverted predicates, the selects are equal:436// select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A437//438// This also handles patterns with a double-negation in the sense of not +439// inverse, because we looked through a 'not' in the matching function and440// swapped A/B:441// select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A442//443// This intentionally does NOT handle patterns with a double-negation in444// the sense of not + not, because doing so could result in values445// comparing446// as equal that hash differently in the min/max cases like:447// select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y448// ^ hashes as min ^ would not hash as min449// In the context of the EarlyCSE pass, however, such cases never reach450// this code, as we simplify the double-negation before hashing the second451// select (and so still succeed at CSEing them).452if (LHSA == RHSB && LHSB == RHSA) {453CmpInst::Predicate PredL, PredR;454Value *X, *Y;455if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&456match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&457CmpInst::getInversePredicate(PredL) == PredR)458return true;459}460}461462return false;463}464465bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {466// These comparisons are nontrivial, so assert that equality implies467// hash equality (DenseMap demands this as an invariant).468bool Result = isEqualImpl(LHS, RHS);469assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||470getHashValueImpl(LHS) == getHashValueImpl(RHS));471return Result;472}473474//===----------------------------------------------------------------------===//475// CallValue476//===----------------------------------------------------------------------===//477478namespace {479480/// Struct representing the available call values in the scoped hash481/// table.482struct CallValue {483Instruction *Inst;484485CallValue(Instruction *I) : Inst(I) {486assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");487}488489bool isSentinel() const {490return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||491Inst == DenseMapInfo<Instruction *>::getTombstoneKey();492}493494static bool canHandle(Instruction *Inst) {495// Don't value number anything that returns void.496if (Inst->getType()->isVoidTy())497return false;498499CallInst *CI = dyn_cast<CallInst>(Inst);500if (!CI || !CI->onlyReadsMemory() ||501// FIXME: Currently the calls which may access the thread id may502// be considered as not accessing the memory. But this is503// problematic for coroutines, since coroutines may resume in a504// different thread. So we disable the optimization here for the505// correctness. However, it may block many other correct506// optimizations. Revert this one when we detect the memory507// accessing kind more precisely.508CI->getFunction()->isPresplitCoroutine())509return false;510return true;511}512};513514} // end anonymous namespace515516namespace llvm {517518template <> struct DenseMapInfo<CallValue> {519static inline CallValue getEmptyKey() {520return DenseMapInfo<Instruction *>::getEmptyKey();521}522523static inline CallValue getTombstoneKey() {524return DenseMapInfo<Instruction *>::getTombstoneKey();525}526527static unsigned getHashValue(CallValue Val);528static bool isEqual(CallValue LHS, CallValue RHS);529};530531} // end namespace llvm532533unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {534Instruction *Inst = Val.Inst;535536// Hash all of the operands as pointers and mix in the opcode.537return hashCallInst(cast<CallInst>(Inst));538}539540bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {541if (LHS.isSentinel() || RHS.isSentinel())542return LHS.Inst == RHS.Inst;543544CallInst *LHSI = cast<CallInst>(LHS.Inst);545CallInst *RHSI = cast<CallInst>(RHS.Inst);546547// Convergent calls implicitly depend on the set of threads that is548// currently executing, so conservatively return false if they are in549// different basic blocks.550if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())551return false;552553return LHSI->isIdenticalTo(RHSI);554}555556//===----------------------------------------------------------------------===//557// GEPValue558//===----------------------------------------------------------------------===//559560namespace {561562struct GEPValue {563Instruction *Inst;564std::optional<int64_t> ConstantOffset;565566GEPValue(Instruction *I) : Inst(I) {567assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");568}569570GEPValue(Instruction *I, std::optional<int64_t> ConstantOffset)571: Inst(I), ConstantOffset(ConstantOffset) {572assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");573}574575bool isSentinel() const {576return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||577Inst == DenseMapInfo<Instruction *>::getTombstoneKey();578}579580static bool canHandle(Instruction *Inst) {581return isa<GetElementPtrInst>(Inst);582}583};584585} // namespace586587namespace llvm {588589template <> struct DenseMapInfo<GEPValue> {590static inline GEPValue getEmptyKey() {591return DenseMapInfo<Instruction *>::getEmptyKey();592}593594static inline GEPValue getTombstoneKey() {595return DenseMapInfo<Instruction *>::getTombstoneKey();596}597598static unsigned getHashValue(const GEPValue &Val);599static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);600};601602} // end namespace llvm603604unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {605auto *GEP = cast<GetElementPtrInst>(Val.Inst);606if (Val.ConstantOffset.has_value())607return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),608Val.ConstantOffset.value());609return hash_combine(610GEP->getOpcode(),611hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));612}613614bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {615if (LHS.isSentinel() || RHS.isSentinel())616return LHS.Inst == RHS.Inst;617auto *LGEP = cast<GetElementPtrInst>(LHS.Inst);618auto *RGEP = cast<GetElementPtrInst>(RHS.Inst);619if (LGEP->getPointerOperand() != RGEP->getPointerOperand())620return false;621if (LHS.ConstantOffset.has_value() && RHS.ConstantOffset.has_value())622return LHS.ConstantOffset.value() == RHS.ConstantOffset.value();623return LGEP->isIdenticalToWhenDefined(RGEP);624}625626//===----------------------------------------------------------------------===//627// EarlyCSE implementation628//===----------------------------------------------------------------------===//629630namespace {631632/// A simple and fast domtree-based CSE pass.633///634/// This pass does a simple depth-first walk over the dominator tree,635/// eliminating trivially redundant instructions and using instsimplify to636/// canonicalize things as it goes. It is intended to be fast and catch obvious637/// cases so that instcombine and other passes are more effective. It is638/// expected that a later pass of GVN will catch the interesting/hard cases.639class EarlyCSE {640public:641const TargetLibraryInfo &TLI;642const TargetTransformInfo &TTI;643DominatorTree &DT;644AssumptionCache &AC;645const SimplifyQuery SQ;646MemorySSA *MSSA;647std::unique_ptr<MemorySSAUpdater> MSSAUpdater;648649using AllocatorTy =650RecyclingAllocator<BumpPtrAllocator,651ScopedHashTableVal<SimpleValue, Value *>>;652using ScopedHTType =653ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,654AllocatorTy>;655656/// A scoped hash table of the current values of all of our simple657/// scalar expressions.658///659/// As we walk down the domtree, we look to see if instructions are in this:660/// if so, we replace them with what we find, otherwise we insert them so661/// that dominated values can succeed in their lookup.662ScopedHTType AvailableValues;663664/// A scoped hash table of the current values of previously encountered665/// memory locations.666///667/// This allows us to get efficient access to dominating loads or stores when668/// we have a fully redundant load. In addition to the most recent load, we669/// keep track of a generation count of the read, which is compared against670/// the current generation count. The current generation count is incremented671/// after every possibly writing memory operation, which ensures that we only672/// CSE loads with other loads that have no intervening store. Ordering673/// events (such as fences or atomic instructions) increment the generation674/// count as well; essentially, we model these as writes to all possible675/// locations. Note that atomic and/or volatile loads and stores can be676/// present the table; it is the responsibility of the consumer to inspect677/// the atomicity/volatility if needed.678struct LoadValue {679Instruction *DefInst = nullptr;680unsigned Generation = 0;681int MatchingId = -1;682bool IsAtomic = false;683bool IsLoad = false;684685LoadValue() = default;686LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,687bool IsAtomic, bool IsLoad)688: DefInst(Inst), Generation(Generation), MatchingId(MatchingId),689IsAtomic(IsAtomic), IsLoad(IsLoad) {}690};691692using LoadMapAllocator =693RecyclingAllocator<BumpPtrAllocator,694ScopedHashTableVal<Value *, LoadValue>>;695using LoadHTType =696ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,697LoadMapAllocator>;698699LoadHTType AvailableLoads;700701// A scoped hash table mapping memory locations (represented as typed702// addresses) to generation numbers at which that memory location became703// (henceforth indefinitely) invariant.704using InvariantMapAllocator =705RecyclingAllocator<BumpPtrAllocator,706ScopedHashTableVal<MemoryLocation, unsigned>>;707using InvariantHTType =708ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,709InvariantMapAllocator>;710InvariantHTType AvailableInvariants;711712/// A scoped hash table of the current values of read-only call713/// values.714///715/// It uses the same generation count as loads.716using CallHTType =717ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;718CallHTType AvailableCalls;719720using GEPMapAllocatorTy =721RecyclingAllocator<BumpPtrAllocator,722ScopedHashTableVal<GEPValue, Value *>>;723using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,724GEPMapAllocatorTy>;725GEPHTType AvailableGEPs;726727/// This is the current generation of the memory value.728unsigned CurrentGeneration = 0;729730/// Set up the EarlyCSE runner for a particular function.731EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,732const TargetTransformInfo &TTI, DominatorTree &DT,733AssumptionCache &AC, MemorySSA *MSSA)734: TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),735MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}736737bool run();738739private:740unsigned ClobberCounter = 0;741// Almost a POD, but needs to call the constructors for the scoped hash742// tables so that a new scope gets pushed on. These are RAII so that the743// scope gets popped when the NodeScope is destroyed.744class NodeScope {745public:746NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,747InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,748GEPHTType &AvailableGEPs)749: Scope(AvailableValues), LoadScope(AvailableLoads),750InvariantScope(AvailableInvariants), CallScope(AvailableCalls),751GEPScope(AvailableGEPs) {}752NodeScope(const NodeScope &) = delete;753NodeScope &operator=(const NodeScope &) = delete;754755private:756ScopedHTType::ScopeTy Scope;757LoadHTType::ScopeTy LoadScope;758InvariantHTType::ScopeTy InvariantScope;759CallHTType::ScopeTy CallScope;760GEPHTType::ScopeTy GEPScope;761};762763// Contains all the needed information to create a stack for doing a depth764// first traversal of the tree. This includes scopes for values, loads, and765// calls as well as the generation. There is a child iterator so that the766// children do not need to be store separately.767class StackNode {768public:769StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,770InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,771GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,772DomTreeNode::const_iterator child,773DomTreeNode::const_iterator end)774: CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),775EndIter(end),776Scopes(AvailableValues, AvailableLoads, AvailableInvariants,777AvailableCalls, AvailableGEPs) {}778StackNode(const StackNode &) = delete;779StackNode &operator=(const StackNode &) = delete;780781// Accessors.782unsigned currentGeneration() const { return CurrentGeneration; }783unsigned childGeneration() const { return ChildGeneration; }784void childGeneration(unsigned generation) { ChildGeneration = generation; }785DomTreeNode *node() { return Node; }786DomTreeNode::const_iterator childIter() const { return ChildIter; }787788DomTreeNode *nextChild() {789DomTreeNode *child = *ChildIter;790++ChildIter;791return child;792}793794DomTreeNode::const_iterator end() const { return EndIter; }795bool isProcessed() const { return Processed; }796void process() { Processed = true; }797798private:799unsigned CurrentGeneration;800unsigned ChildGeneration;801DomTreeNode *Node;802DomTreeNode::const_iterator ChildIter;803DomTreeNode::const_iterator EndIter;804NodeScope Scopes;805bool Processed = false;806};807808/// Wrapper class to handle memory instructions, including loads,809/// stores and intrinsic loads and stores defined by the target.810class ParseMemoryInst {811public:812ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)813: Inst(Inst) {814if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {815IntrID = II->getIntrinsicID();816if (TTI.getTgtMemIntrinsic(II, Info))817return;818if (isHandledNonTargetIntrinsic(IntrID)) {819switch (IntrID) {820case Intrinsic::masked_load:821Info.PtrVal = Inst->getOperand(0);822Info.MatchingId = Intrinsic::masked_load;823Info.ReadMem = true;824Info.WriteMem = false;825Info.IsVolatile = false;826break;827case Intrinsic::masked_store:828Info.PtrVal = Inst->getOperand(1);829// Use the ID of masked load as the "matching id". This will830// prevent matching non-masked loads/stores with masked ones831// (which could be done), but at the moment, the code here832// does not support matching intrinsics with non-intrinsics,833// so keep the MatchingIds specific to masked instructions834// for now (TODO).835Info.MatchingId = Intrinsic::masked_load;836Info.ReadMem = false;837Info.WriteMem = true;838Info.IsVolatile = false;839break;840}841}842}843}844845Instruction *get() { return Inst; }846const Instruction *get() const { return Inst; }847848bool isLoad() const {849if (IntrID != 0)850return Info.ReadMem;851return isa<LoadInst>(Inst);852}853854bool isStore() const {855if (IntrID != 0)856return Info.WriteMem;857return isa<StoreInst>(Inst);858}859860bool isAtomic() const {861if (IntrID != 0)862return Info.Ordering != AtomicOrdering::NotAtomic;863return Inst->isAtomic();864}865866bool isUnordered() const {867if (IntrID != 0)868return Info.isUnordered();869870if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {871return LI->isUnordered();872} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {873return SI->isUnordered();874}875// Conservative answer876return !Inst->isAtomic();877}878879bool isVolatile() const {880if (IntrID != 0)881return Info.IsVolatile;882883if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {884return LI->isVolatile();885} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {886return SI->isVolatile();887}888// Conservative answer889return true;890}891892bool isInvariantLoad() const {893if (auto *LI = dyn_cast<LoadInst>(Inst))894return LI->hasMetadata(LLVMContext::MD_invariant_load);895return false;896}897898bool isValid() const { return getPointerOperand() != nullptr; }899900// For regular (non-intrinsic) loads/stores, this is set to -1. For901// intrinsic loads/stores, the id is retrieved from the corresponding902// field in the MemIntrinsicInfo structure. That field contains903// non-negative values only.904int getMatchingId() const {905if (IntrID != 0)906return Info.MatchingId;907return -1;908}909910Value *getPointerOperand() const {911if (IntrID != 0)912return Info.PtrVal;913return getLoadStorePointerOperand(Inst);914}915916Type *getValueType() const {917// TODO: handle target-specific intrinsics.918return Inst->getAccessType();919}920921bool mayReadFromMemory() const {922if (IntrID != 0)923return Info.ReadMem;924return Inst->mayReadFromMemory();925}926927bool mayWriteToMemory() const {928if (IntrID != 0)929return Info.WriteMem;930return Inst->mayWriteToMemory();931}932933private:934Intrinsic::ID IntrID = 0;935MemIntrinsicInfo Info;936Instruction *Inst;937};938939// This function is to prevent accidentally passing a non-target940// intrinsic ID to TargetTransformInfo.941static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {942switch (ID) {943case Intrinsic::masked_load:944case Intrinsic::masked_store:945return true;946}947return false;948}949static bool isHandledNonTargetIntrinsic(const Value *V) {950if (auto *II = dyn_cast<IntrinsicInst>(V))951return isHandledNonTargetIntrinsic(II->getIntrinsicID());952return false;953}954955bool processNode(DomTreeNode *Node);956957bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,958const BasicBlock *BB, const BasicBlock *Pred);959960Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,961unsigned CurrentGeneration);962963bool overridingStores(const ParseMemoryInst &Earlier,964const ParseMemoryInst &Later);965966Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {967// TODO: We could insert relevant casts on type mismatch here.968if (auto *LI = dyn_cast<LoadInst>(Inst))969return LI->getType() == ExpectedType ? LI : nullptr;970if (auto *SI = dyn_cast<StoreInst>(Inst)) {971Value *V = SI->getValueOperand();972return V->getType() == ExpectedType ? V : nullptr;973}974assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");975auto *II = cast<IntrinsicInst>(Inst);976if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))977return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);978return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);979}980981Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,982Type *ExpectedType) const {983// TODO: We could insert relevant casts on type mismatch here.984switch (II->getIntrinsicID()) {985case Intrinsic::masked_load:986return II->getType() == ExpectedType ? II : nullptr;987case Intrinsic::masked_store: {988Value *V = II->getOperand(0);989return V->getType() == ExpectedType ? V : nullptr;990}991}992return nullptr;993}994995/// Return true if the instruction is known to only operate on memory996/// provably invariant in the given "generation".997bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);998999bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,1000Instruction *EarlierInst, Instruction *LaterInst);10011002bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,1003const IntrinsicInst *Later) {1004auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {1005// Is Mask0 a submask of Mask1?1006if (Mask0 == Mask1)1007return true;1008if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))1009return false;1010auto *Vec0 = dyn_cast<ConstantVector>(Mask0);1011auto *Vec1 = dyn_cast<ConstantVector>(Mask1);1012if (!Vec0 || !Vec1)1013return false;1014if (Vec0->getType() != Vec1->getType())1015return false;1016for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {1017Constant *Elem0 = Vec0->getOperand(i);1018Constant *Elem1 = Vec1->getOperand(i);1019auto *Int0 = dyn_cast<ConstantInt>(Elem0);1020if (Int0 && Int0->isZero())1021continue;1022auto *Int1 = dyn_cast<ConstantInt>(Elem1);1023if (Int1 && !Int1->isZero())1024continue;1025if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))1026return false;1027if (Elem0 == Elem1)1028continue;1029return false;1030}1031return true;1032};1033auto PtrOp = [](const IntrinsicInst *II) {1034if (II->getIntrinsicID() == Intrinsic::masked_load)1035return II->getOperand(0);1036if (II->getIntrinsicID() == Intrinsic::masked_store)1037return II->getOperand(1);1038llvm_unreachable("Unexpected IntrinsicInst");1039};1040auto MaskOp = [](const IntrinsicInst *II) {1041if (II->getIntrinsicID() == Intrinsic::masked_load)1042return II->getOperand(2);1043if (II->getIntrinsicID() == Intrinsic::masked_store)1044return II->getOperand(3);1045llvm_unreachable("Unexpected IntrinsicInst");1046};1047auto ThruOp = [](const IntrinsicInst *II) {1048if (II->getIntrinsicID() == Intrinsic::masked_load)1049return II->getOperand(3);1050llvm_unreachable("Unexpected IntrinsicInst");1051};10521053if (PtrOp(Earlier) != PtrOp(Later))1054return false;10551056Intrinsic::ID IDE = Earlier->getIntrinsicID();1057Intrinsic::ID IDL = Later->getIntrinsicID();1058// We could really use specific intrinsic classes for masked loads1059// and stores in IntrinsicInst.h.1060if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {1061// Trying to replace later masked load with the earlier one.1062// Check that the pointers are the same, and1063// - masks and pass-throughs are the same, or1064// - replacee's pass-through is "undef" and replacer's mask is a1065// super-set of the replacee's mask.1066if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))1067return true;1068if (!isa<UndefValue>(ThruOp(Later)))1069return false;1070return IsSubmask(MaskOp(Later), MaskOp(Earlier));1071}1072if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {1073// Trying to replace a load of a stored value with the store's value.1074// Check that the pointers are the same, and1075// - load's mask is a subset of store's mask, and1076// - load's pass-through is "undef".1077if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))1078return false;1079return isa<UndefValue>(ThruOp(Later));1080}1081if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {1082// Trying to remove a store of the loaded value.1083// Check that the pointers are the same, and1084// - store's mask is a subset of the load's mask.1085return IsSubmask(MaskOp(Later), MaskOp(Earlier));1086}1087if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {1088// Trying to remove a dead store (earlier).1089// Check that the pointers are the same,1090// - the to-be-removed store's mask is a subset of the other store's1091// mask.1092return IsSubmask(MaskOp(Earlier), MaskOp(Later));1093}1094return false;1095}10961097void removeMSSA(Instruction &Inst) {1098if (!MSSA)1099return;1100if (VerifyMemorySSA)1101MSSA->verifyMemorySSA();1102// Removing a store here can leave MemorySSA in an unoptimized state by1103// creating MemoryPhis that have identical arguments and by creating1104// MemoryUses whose defining access is not an actual clobber. The phi case1105// is handled by MemorySSA when passing OptimizePhis = true to1106// removeMemoryAccess. The non-optimized MemoryUse case is lazily updated1107// by MemorySSA's getClobberingMemoryAccess.1108MSSAUpdater->removeMemoryAccess(&Inst, true);1109}1110};11111112} // end anonymous namespace11131114/// Determine if the memory referenced by LaterInst is from the same heap1115/// version as EarlierInst.1116/// This is currently called in two scenarios:1117///1118/// load p1119/// ...1120/// load p1121///1122/// and1123///1124/// x = load p1125/// ...1126/// store x, p1127///1128/// in both cases we want to verify that there are no possible writes to the1129/// memory referenced by p between the earlier and later instruction.1130bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,1131unsigned LaterGeneration,1132Instruction *EarlierInst,1133Instruction *LaterInst) {1134// Check the simple memory generation tracking first.1135if (EarlierGeneration == LaterGeneration)1136return true;11371138if (!MSSA)1139return false;11401141// If MemorySSA has determined that one of EarlierInst or LaterInst does not1142// read/write memory, then we can safely return true here.1143// FIXME: We could be more aggressive when checking doesNotAccessMemory(),1144// onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass1145// by also checking the MemorySSA MemoryAccess on the instruction. Initial1146// experiments suggest this isn't worthwhile, at least for C/C++ code compiled1147// with the default optimization pipeline.1148auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);1149if (!EarlierMA)1150return true;1151auto *LaterMA = MSSA->getMemoryAccess(LaterInst);1152if (!LaterMA)1153return true;11541155// Since we know LaterDef dominates LaterInst and EarlierInst dominates1156// LaterInst, if LaterDef dominates EarlierInst then it can't occur between1157// EarlierInst and LaterInst and neither can any other write that potentially1158// clobbers LaterInst.1159MemoryAccess *LaterDef;1160if (ClobberCounter < EarlyCSEMssaOptCap) {1161LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);1162ClobberCounter++;1163} else1164LaterDef = LaterMA->getDefiningAccess();11651166return MSSA->dominates(LaterDef, EarlierMA);1167}11681169bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {1170// A location loaded from with an invariant_load is assumed to *never* change1171// within the visible scope of the compilation.1172if (auto *LI = dyn_cast<LoadInst>(I))1173if (LI->hasMetadata(LLVMContext::MD_invariant_load))1174return true;11751176auto MemLocOpt = MemoryLocation::getOrNone(I);1177if (!MemLocOpt)1178// "target" intrinsic forms of loads aren't currently known to1179// MemoryLocation::get. TODO1180return false;1181MemoryLocation MemLoc = *MemLocOpt;1182if (!AvailableInvariants.count(MemLoc))1183return false;11841185// Is the generation at which this became invariant older than the1186// current one?1187return AvailableInvariants.lookup(MemLoc) <= GenAt;1188}11891190bool EarlyCSE::handleBranchCondition(Instruction *CondInst,1191const BranchInst *BI, const BasicBlock *BB,1192const BasicBlock *Pred) {1193assert(BI->isConditional() && "Should be a conditional branch!");1194assert(BI->getCondition() == CondInst && "Wrong condition?");1195assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);1196auto *TorF = (BI->getSuccessor(0) == BB)1197? ConstantInt::getTrue(BB->getContext())1198: ConstantInt::getFalse(BB->getContext());1199auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,1200Value *&RHS) {1201if (Opcode == Instruction::And &&1202match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))1203return true;1204else if (Opcode == Instruction::Or &&1205match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))1206return true;1207return false;1208};1209// If the condition is AND operation, we can propagate its operands into the1210// true branch. If it is OR operation, we can propagate them into the false1211// branch.1212unsigned PropagateOpcode =1213(BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;12141215bool MadeChanges = false;1216SmallVector<Instruction *, 4> WorkList;1217SmallPtrSet<Instruction *, 4> Visited;1218WorkList.push_back(CondInst);1219while (!WorkList.empty()) {1220Instruction *Curr = WorkList.pop_back_val();12211222AvailableValues.insert(Curr, TorF);1223LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"1224<< Curr->getName() << "' as " << *TorF << " in "1225<< BB->getName() << "\n");1226if (!DebugCounter::shouldExecute(CSECounter)) {1227LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");1228} else {1229// Replace all dominated uses with the known value.1230if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,1231BasicBlockEdge(Pred, BB))) {1232NumCSECVP += Count;1233MadeChanges = true;1234}1235}12361237Value *LHS, *RHS;1238if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))1239for (auto *Op : { LHS, RHS })1240if (Instruction *OPI = dyn_cast<Instruction>(Op))1241if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)1242WorkList.push_back(OPI);1243}12441245return MadeChanges;1246}12471248Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,1249unsigned CurrentGeneration) {1250if (InVal.DefInst == nullptr)1251return nullptr;1252if (InVal.MatchingId != MemInst.getMatchingId())1253return nullptr;1254// We don't yet handle removing loads with ordering of any kind.1255if (MemInst.isVolatile() || !MemInst.isUnordered())1256return nullptr;1257// We can't replace an atomic load with one which isn't also atomic.1258if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())1259return nullptr;1260// The value V returned from this function is used differently depending1261// on whether MemInst is a load or a store. If it's a load, we will replace1262// MemInst with V, if it's a store, we will check if V is the same as the1263// available value.1264bool MemInstMatching = !MemInst.isLoad();1265Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;1266Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();12671268// For stores check the result values before checking memory generation1269// (otherwise isSameMemGeneration may crash).1270Value *Result = MemInst.isStore()1271? getOrCreateResult(Matching, Other->getType())1272: nullptr;1273if (MemInst.isStore() && InVal.DefInst != Result)1274return nullptr;12751276// Deal with non-target memory intrinsics.1277bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);1278bool OtherNTI = isHandledNonTargetIntrinsic(Other);1279if (OtherNTI != MatchingNTI)1280return nullptr;1281if (OtherNTI && MatchingNTI) {1282if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),1283cast<IntrinsicInst>(MemInst.get())))1284return nullptr;1285}12861287if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&1288!isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,1289MemInst.get()))1290return nullptr;12911292if (!Result)1293Result = getOrCreateResult(Matching, Other->getType());1294return Result;1295}12961297static void combineIRFlags(Instruction &From, Value *To) {1298if (auto *I = dyn_cast<Instruction>(To)) {1299// If I being poison triggers UB, there is no need to drop those1300// flags. Otherwise, only retain flags present on both I and Inst.1301// TODO: Currently some fast-math flags are not treated as1302// poison-generating even though they should. Until this is fixed,1303// always retain flags present on both I and Inst for floating point1304// instructions.1305if (isa<FPMathOperator>(I) ||1306(I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))1307I->andIRFlags(&From);1308}1309}13101311bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,1312const ParseMemoryInst &Later) {1313// Can we remove Earlier store because of Later store?13141315assert(Earlier.isUnordered() && !Earlier.isVolatile() &&1316"Violated invariant");1317if (Earlier.getPointerOperand() != Later.getPointerOperand())1318return false;1319if (!Earlier.getValueType() || !Later.getValueType() ||1320Earlier.getValueType() != Later.getValueType())1321return false;1322if (Earlier.getMatchingId() != Later.getMatchingId())1323return false;1324// At the moment, we don't remove ordered stores, but do remove1325// unordered atomic stores. There's no special requirement (for1326// unordered atomics) about removing atomic stores only in favor of1327// other atomic stores since we were going to execute the non-atomic1328// one anyway and the atomic one might never have become visible.1329if (!Earlier.isUnordered() || !Later.isUnordered())1330return false;13311332// Deal with non-target memory intrinsics.1333bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());1334bool LNTI = isHandledNonTargetIntrinsic(Later.get());1335if (ENTI && LNTI)1336return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),1337cast<IntrinsicInst>(Later.get()));13381339// Because of the check above, at least one of them is false.1340// For now disallow matching intrinsics with non-intrinsics,1341// so assume that the stores match if neither is an intrinsic.1342return ENTI == LNTI;1343}13441345bool EarlyCSE::processNode(DomTreeNode *Node) {1346bool Changed = false;1347BasicBlock *BB = Node->getBlock();13481349// If this block has a single predecessor, then the predecessor is the parent1350// of the domtree node and all of the live out memory values are still current1351// in this block. If this block has multiple predecessors, then they could1352// have invalidated the live-out memory values of our parent value. For now,1353// just be conservative and invalidate memory if this block has multiple1354// predecessors.1355if (!BB->getSinglePredecessor())1356++CurrentGeneration;13571358// If this node has a single predecessor which ends in a conditional branch,1359// we can infer the value of the branch condition given that we took this1360// path. We need the single predecessor to ensure there's not another path1361// which reaches this block where the condition might hold a different1362// value. Since we're adding this to the scoped hash table (like any other1363// def), it will have been popped if we encounter a future merge block.1364if (BasicBlock *Pred = BB->getSinglePredecessor()) {1365auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());1366if (BI && BI->isConditional()) {1367auto *CondInst = dyn_cast<Instruction>(BI->getCondition());1368if (CondInst && SimpleValue::canHandle(CondInst))1369Changed |= handleBranchCondition(CondInst, BI, BB, Pred);1370}1371}13721373/// LastStore - Keep track of the last non-volatile store that we saw... for1374/// as long as there in no instruction that reads memory. If we see a store1375/// to the same location, we delete the dead store. This zaps trivial dead1376/// stores which can occur in bitfield code among other things.1377Instruction *LastStore = nullptr;13781379// See if any instructions in the block can be eliminated. If so, do it. If1380// not, add them to AvailableValues.1381for (Instruction &Inst : make_early_inc_range(*BB)) {1382// Dead instructions should just be removed.1383if (isInstructionTriviallyDead(&Inst, &TLI)) {1384LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');1385if (!DebugCounter::shouldExecute(CSECounter)) {1386LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");1387continue;1388}13891390salvageKnowledge(&Inst, &AC);1391salvageDebugInfo(Inst);1392removeMSSA(Inst);1393Inst.eraseFromParent();1394Changed = true;1395++NumSimplify;1396continue;1397}13981399// Skip assume intrinsics, they don't really have side effects (although1400// they're marked as such to ensure preservation of control dependencies),1401// and this pass will not bother with its removal. However, we should mark1402// its condition as true for all dominated blocks.1403if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {1404auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));1405if (CondI && SimpleValue::canHandle(CondI)) {1406LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst1407<< '\n');1408AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));1409} else1410LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');1411continue;1412}14131414// Likewise, noalias intrinsics don't actually write.1415if (match(&Inst,1416m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {1417LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst1418<< '\n');1419continue;1420}14211422// Skip sideeffect intrinsics, for the same reason as assume intrinsics.1423if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {1424LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');1425continue;1426}14271428// Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.1429if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {1430LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');1431continue;1432}14331434// We can skip all invariant.start intrinsics since they only read memory,1435// and we can forward values across it. For invariant starts without1436// invariant ends, we can use the fact that the invariantness never ends to1437// start a scope in the current generaton which is true for all future1438// generations. Also, we dont need to consume the last store since the1439// semantics of invariant.start allow us to perform DSE of the last1440// store, if there was a store following invariant.start. Consider:1441//1442// store 30, i8* p1443// invariant.start(p)1444// store 40, i8* p1445// We can DSE the store to 30, since the store 40 to invariant location p1446// causes undefined behaviour.1447if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {1448// If there are any uses, the scope might end.1449if (!Inst.use_empty())1450continue;1451MemoryLocation MemLoc =1452MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);1453// Don't start a scope if we already have a better one pushed1454if (!AvailableInvariants.count(MemLoc))1455AvailableInvariants.insert(MemLoc, CurrentGeneration);1456continue;1457}14581459if (isGuard(&Inst)) {1460if (auto *CondI =1461dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {1462if (SimpleValue::canHandle(CondI)) {1463// Do we already know the actual value of this condition?1464if (auto *KnownCond = AvailableValues.lookup(CondI)) {1465// Is the condition known to be true?1466if (isa<ConstantInt>(KnownCond) &&1467cast<ConstantInt>(KnownCond)->isOne()) {1468LLVM_DEBUG(dbgs()1469<< "EarlyCSE removing guard: " << Inst << '\n');1470salvageKnowledge(&Inst, &AC);1471removeMSSA(Inst);1472Inst.eraseFromParent();1473Changed = true;1474continue;1475} else1476// Use the known value if it wasn't true.1477cast<CallInst>(Inst).setArgOperand(0, KnownCond);1478}1479// The condition we're on guarding here is true for all dominated1480// locations.1481AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));1482}1483}14841485// Guard intrinsics read all memory, but don't write any memory.1486// Accordingly, don't update the generation but consume the last store (to1487// avoid an incorrect DSE).1488LastStore = nullptr;1489continue;1490}14911492// If the instruction can be simplified (e.g. X+0 = X) then replace it with1493// its simpler value.1494if (Value *V = simplifyInstruction(&Inst, SQ)) {1495LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V1496<< '\n');1497if (!DebugCounter::shouldExecute(CSECounter)) {1498LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");1499} else {1500bool Killed = false;1501if (!Inst.use_empty()) {1502Inst.replaceAllUsesWith(V);1503Changed = true;1504}1505if (isInstructionTriviallyDead(&Inst, &TLI)) {1506salvageKnowledge(&Inst, &AC);1507removeMSSA(Inst);1508Inst.eraseFromParent();1509Changed = true;1510Killed = true;1511}1512if (Changed)1513++NumSimplify;1514if (Killed)1515continue;1516}1517}15181519// If this is a simple instruction that we can value number, process it.1520if (SimpleValue::canHandle(&Inst)) {1521if ([[maybe_unused]] auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {1522assert(CI->getExceptionBehavior() != fp::ebStrict &&1523"Unexpected ebStrict from SimpleValue::canHandle()");1524assert((!CI->getRoundingMode() ||1525CI->getRoundingMode() != RoundingMode::Dynamic) &&1526"Unexpected dynamic rounding from SimpleValue::canHandle()");1527}1528// See if the instruction has an available value. If so, use it.1529if (Value *V = AvailableValues.lookup(&Inst)) {1530LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V1531<< '\n');1532if (!DebugCounter::shouldExecute(CSECounter)) {1533LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");1534continue;1535}1536combineIRFlags(Inst, V);1537Inst.replaceAllUsesWith(V);1538salvageKnowledge(&Inst, &AC);1539removeMSSA(Inst);1540Inst.eraseFromParent();1541Changed = true;1542++NumCSE;1543continue;1544}15451546// Otherwise, just remember that this value is available.1547AvailableValues.insert(&Inst, &Inst);1548continue;1549}15501551ParseMemoryInst MemInst(&Inst, TTI);1552// If this is a non-volatile load, process it.1553if (MemInst.isValid() && MemInst.isLoad()) {1554// (conservatively) we can't peak past the ordering implied by this1555// operation, but we can add this load to our set of available values1556if (MemInst.isVolatile() || !MemInst.isUnordered()) {1557LastStore = nullptr;1558++CurrentGeneration;1559}15601561if (MemInst.isInvariantLoad()) {1562// If we pass an invariant load, we know that memory location is1563// indefinitely constant from the moment of first dereferenceability.1564// We conservatively treat the invariant_load as that moment. If we1565// pass a invariant load after already establishing a scope, don't1566// restart it since we want to preserve the earliest point seen.1567auto MemLoc = MemoryLocation::get(&Inst);1568if (!AvailableInvariants.count(MemLoc))1569AvailableInvariants.insert(MemLoc, CurrentGeneration);1570}15711572// If we have an available version of this load, and if it is the right1573// generation or the load is known to be from an invariant location,1574// replace this instruction.1575//1576// If either the dominating load or the current load are invariant, then1577// we can assume the current load loads the same value as the dominating1578// load.1579LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());1580if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {1581LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst1582<< " to: " << *InVal.DefInst << '\n');1583if (!DebugCounter::shouldExecute(CSECounter)) {1584LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");1585continue;1586}1587if (InVal.IsLoad)1588if (auto *I = dyn_cast<Instruction>(Op))1589combineMetadataForCSE(I, &Inst, false);1590if (!Inst.use_empty())1591Inst.replaceAllUsesWith(Op);1592salvageKnowledge(&Inst, &AC);1593removeMSSA(Inst);1594Inst.eraseFromParent();1595Changed = true;1596++NumCSELoad;1597continue;1598}15991600// Otherwise, remember that we have this instruction.1601AvailableLoads.insert(MemInst.getPointerOperand(),1602LoadValue(&Inst, CurrentGeneration,1603MemInst.getMatchingId(),1604MemInst.isAtomic(),1605MemInst.isLoad()));1606LastStore = nullptr;1607continue;1608}16091610// If this instruction may read from memory or throw (and potentially read1611// from memory in the exception handler), forget LastStore. Load/store1612// intrinsics will indicate both a read and a write to memory. The target1613// may override this (e.g. so that a store intrinsic does not read from1614// memory, and thus will be treated the same as a regular store for1615// commoning purposes).1616if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&1617!(MemInst.isValid() && !MemInst.mayReadFromMemory()))1618LastStore = nullptr;16191620// If this is a read-only call, process it.1621if (CallValue::canHandle(&Inst)) {1622// If we have an available version of this call, and if it is the right1623// generation, replace this instruction.1624std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);1625if (InVal.first != nullptr &&1626isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,1627&Inst)) {1628LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst1629<< " to: " << *InVal.first << '\n');1630if (!DebugCounter::shouldExecute(CSECounter)) {1631LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");1632continue;1633}1634if (!Inst.use_empty())1635Inst.replaceAllUsesWith(InVal.first);1636salvageKnowledge(&Inst, &AC);1637removeMSSA(Inst);1638Inst.eraseFromParent();1639Changed = true;1640++NumCSECall;1641continue;1642}16431644// Otherwise, remember that we have this instruction.1645AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));1646continue;1647}16481649// Compare GEP instructions based on offset.1650if (GEPValue::canHandle(&Inst)) {1651auto *GEP = cast<GetElementPtrInst>(&Inst);1652APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);1653GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset)1654? Offset.trySExtValue()1655: std::nullopt);1656if (Value *V = AvailableGEPs.lookup(GEPVal)) {1657LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << " to: " << *V1658<< '\n');1659combineIRFlags(Inst, V);1660Inst.replaceAllUsesWith(V);1661salvageKnowledge(&Inst, &AC);1662removeMSSA(Inst);1663Inst.eraseFromParent();1664Changed = true;1665++NumCSEGEP;1666continue;1667}16681669// Otherwise, just remember that we have this GEP.1670AvailableGEPs.insert(GEPVal, &Inst);1671continue;1672}16731674// A release fence requires that all stores complete before it, but does1675// not prevent the reordering of following loads 'before' the fence. As a1676// result, we don't need to consider it as writing to memory and don't need1677// to advance the generation. We do need to prevent DSE across the fence,1678// but that's handled above.1679if (auto *FI = dyn_cast<FenceInst>(&Inst))1680if (FI->getOrdering() == AtomicOrdering::Release) {1681assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");1682continue;1683}16841685// write back DSE - If we write back the same value we just loaded from1686// the same location and haven't passed any intervening writes or ordering1687// operations, we can remove the write. The primary benefit is in allowing1688// the available load table to remain valid and value forward past where1689// the store originally was.1690if (MemInst.isValid() && MemInst.isStore()) {1691LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());1692if (InVal.DefInst &&1693InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {1694// It is okay to have a LastStore to a different pointer here if MemorySSA1695// tells us that the load and store are from the same memory generation.1696// In that case, LastStore should keep its present value since we're1697// removing the current store.1698assert((!LastStore ||1699ParseMemoryInst(LastStore, TTI).getPointerOperand() ==1700MemInst.getPointerOperand() ||1701MSSA) &&1702"can't have an intervening store if not using MemorySSA!");1703LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');1704if (!DebugCounter::shouldExecute(CSECounter)) {1705LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");1706continue;1707}1708salvageKnowledge(&Inst, &AC);1709removeMSSA(Inst);1710Inst.eraseFromParent();1711Changed = true;1712++NumDSE;1713// We can avoid incrementing the generation count since we were able1714// to eliminate this store.1715continue;1716}1717}17181719// Okay, this isn't something we can CSE at all. Check to see if it is1720// something that could modify memory. If so, our available memory values1721// cannot be used so bump the generation count.1722if (Inst.mayWriteToMemory()) {1723++CurrentGeneration;17241725if (MemInst.isValid() && MemInst.isStore()) {1726// We do a trivial form of DSE if there are two stores to the same1727// location with no intervening loads. Delete the earlier store.1728if (LastStore) {1729if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {1730LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore1731<< " due to: " << Inst << '\n');1732if (!DebugCounter::shouldExecute(CSECounter)) {1733LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");1734} else {1735salvageKnowledge(&Inst, &AC);1736removeMSSA(*LastStore);1737LastStore->eraseFromParent();1738Changed = true;1739++NumDSE;1740LastStore = nullptr;1741}1742}1743// fallthrough - we can exploit information about this store1744}17451746// Okay, we just invalidated anything we knew about loaded values. Try1747// to salvage *something* by remembering that the stored value is a live1748// version of the pointer. It is safe to forward from volatile stores1749// to non-volatile loads, so we don't have to check for volatility of1750// the store.1751AvailableLoads.insert(MemInst.getPointerOperand(),1752LoadValue(&Inst, CurrentGeneration,1753MemInst.getMatchingId(),1754MemInst.isAtomic(),1755MemInst.isLoad()));17561757// Remember that this was the last unordered store we saw for DSE. We1758// don't yet handle DSE on ordered or volatile stores since we don't1759// have a good way to model the ordering requirement for following1760// passes once the store is removed. We could insert a fence, but1761// since fences are slightly stronger than stores in their ordering,1762// it's not clear this is a profitable transform. Another option would1763// be to merge the ordering with that of the post dominating store.1764if (MemInst.isUnordered() && !MemInst.isVolatile())1765LastStore = &Inst;1766else1767LastStore = nullptr;1768}1769}1770}17711772return Changed;1773}17741775bool EarlyCSE::run() {1776// Note, deque is being used here because there is significant performance1777// gains over vector when the container becomes very large due to the1778// specific access patterns. For more information see the mailing list1779// discussion on this:1780// http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html1781std::deque<StackNode *> nodesToProcess;17821783bool Changed = false;17841785// Process the root node.1786nodesToProcess.push_back(new StackNode(1787AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,1788AvailableGEPs, CurrentGeneration, DT.getRootNode(),1789DT.getRootNode()->begin(), DT.getRootNode()->end()));17901791assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");17921793// Process the stack.1794while (!nodesToProcess.empty()) {1795// Grab the first item off the stack. Set the current generation, remove1796// the node from the stack, and process it.1797StackNode *NodeToProcess = nodesToProcess.back();17981799// Initialize class members.1800CurrentGeneration = NodeToProcess->currentGeneration();18011802// Check if the node needs to be processed.1803if (!NodeToProcess->isProcessed()) {1804// Process the node.1805Changed |= processNode(NodeToProcess->node());1806NodeToProcess->childGeneration(CurrentGeneration);1807NodeToProcess->process();1808} else if (NodeToProcess->childIter() != NodeToProcess->end()) {1809// Push the next child onto the stack.1810DomTreeNode *child = NodeToProcess->nextChild();1811nodesToProcess.push_back(new StackNode(1812AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,1813AvailableGEPs, NodeToProcess->childGeneration(), child,1814child->begin(), child->end()));1815} else {1816// It has been processed, and there are no more children to process,1817// so delete it and pop it off the stack.1818delete NodeToProcess;1819nodesToProcess.pop_back();1820}1821} // while (!nodes...)18221823return Changed;1824}18251826PreservedAnalyses EarlyCSEPass::run(Function &F,1827FunctionAnalysisManager &AM) {1828auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);1829auto &TTI = AM.getResult<TargetIRAnalysis>(F);1830auto &DT = AM.getResult<DominatorTreeAnalysis>(F);1831auto &AC = AM.getResult<AssumptionAnalysis>(F);1832auto *MSSA =1833UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;18341835EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);18361837if (!CSE.run())1838return PreservedAnalyses::all();18391840PreservedAnalyses PA;1841PA.preserveSet<CFGAnalyses>();1842if (UseMemorySSA)1843PA.preserve<MemorySSAAnalysis>();1844return PA;1845}18461847void EarlyCSEPass::printPipeline(1848raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {1849static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(1850OS, MapClassName2PassName);1851OS << '<';1852if (UseMemorySSA)1853OS << "memssa";1854OS << '>';1855}18561857namespace {18581859/// A simple and fast domtree-based CSE pass.1860///1861/// This pass does a simple depth-first walk over the dominator tree,1862/// eliminating trivially redundant instructions and using instsimplify to1863/// canonicalize things as it goes. It is intended to be fast and catch obvious1864/// cases so that instcombine and other passes are more effective. It is1865/// expected that a later pass of GVN will catch the interesting/hard cases.1866template<bool UseMemorySSA>1867class EarlyCSELegacyCommonPass : public FunctionPass {1868public:1869static char ID;18701871EarlyCSELegacyCommonPass() : FunctionPass(ID) {1872if (UseMemorySSA)1873initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());1874else1875initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());1876}18771878bool runOnFunction(Function &F) override {1879if (skipFunction(F))1880return false;18811882auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);1883auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);1884auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();1885auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);1886auto *MSSA =1887UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;18881889EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);18901891return CSE.run();1892}18931894void getAnalysisUsage(AnalysisUsage &AU) const override {1895AU.addRequired<AssumptionCacheTracker>();1896AU.addRequired<DominatorTreeWrapperPass>();1897AU.addRequired<TargetLibraryInfoWrapperPass>();1898AU.addRequired<TargetTransformInfoWrapperPass>();1899if (UseMemorySSA) {1900AU.addRequired<AAResultsWrapperPass>();1901AU.addRequired<MemorySSAWrapperPass>();1902AU.addPreserved<MemorySSAWrapperPass>();1903}1904AU.addPreserved<GlobalsAAWrapperPass>();1905AU.addPreserved<AAResultsWrapperPass>();1906AU.setPreservesCFG();1907}1908};19091910} // end anonymous namespace19111912using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;19131914template<>1915char EarlyCSELegacyPass::ID = 0;19161917INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,1918false)1919INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)1920INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)1921INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)1922INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)1923INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)19241925using EarlyCSEMemSSALegacyPass =1926EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;19271928template<>1929char EarlyCSEMemSSALegacyPass::ID = 0;19301931FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {1932if (UseMemorySSA)1933return new EarlyCSEMemSSALegacyPass();1934else1935return new EarlyCSELegacyPass();1936}19371938INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",1939"Early CSE w/ MemorySSA", false, false)1940INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)1941INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)1942INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)1943INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)1944INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)1945INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)1946INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",1947"Early CSE w/ MemorySSA", false, false)194819491950