Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
35294 views
//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// The code below implements dead store elimination using MemorySSA. It uses9// the following general approach: given a MemoryDef, walk upwards to find10// clobbering MemoryDefs that may be killed by the starting def. Then check11// that there are no uses that may read the location of the original MemoryDef12// in between both MemoryDefs. A bit more concretely:13//14// For all MemoryDefs StartDef:15// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking16// upwards.17// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by18// checking all uses starting at MaybeDeadAccess and walking until we see19// StartDef.20// 3. For each found CurrentDef, check that:21// 1. There are no barrier instructions between CurrentDef and StartDef (like22// throws or stores with ordering constraints).23// 2. StartDef is executed whenever CurrentDef is executed.24// 3. StartDef completely overwrites CurrentDef.25// 4. Erase CurrentDef from the function and MemorySSA.26//27//===----------------------------------------------------------------------===//2829#include "llvm/Transforms/Scalar/DeadStoreElimination.h"30#include "llvm/ADT/APInt.h"31#include "llvm/ADT/DenseMap.h"32#include "llvm/ADT/MapVector.h"33#include "llvm/ADT/PostOrderIterator.h"34#include "llvm/ADT/SetVector.h"35#include "llvm/ADT/SmallPtrSet.h"36#include "llvm/ADT/SmallVector.h"37#include "llvm/ADT/Statistic.h"38#include "llvm/ADT/StringRef.h"39#include "llvm/Analysis/AliasAnalysis.h"40#include "llvm/Analysis/CaptureTracking.h"41#include "llvm/Analysis/GlobalsModRef.h"42#include "llvm/Analysis/LoopInfo.h"43#include "llvm/Analysis/MemoryBuiltins.h"44#include "llvm/Analysis/MemoryLocation.h"45#include "llvm/Analysis/MemorySSA.h"46#include "llvm/Analysis/MemorySSAUpdater.h"47#include "llvm/Analysis/MustExecute.h"48#include "llvm/Analysis/PostDominators.h"49#include "llvm/Analysis/TargetLibraryInfo.h"50#include "llvm/Analysis/ValueTracking.h"51#include "llvm/IR/Argument.h"52#include "llvm/IR/BasicBlock.h"53#include "llvm/IR/Constant.h"54#include "llvm/IR/Constants.h"55#include "llvm/IR/DataLayout.h"56#include "llvm/IR/DebugInfo.h"57#include "llvm/IR/Dominators.h"58#include "llvm/IR/Function.h"59#include "llvm/IR/IRBuilder.h"60#include "llvm/IR/InstIterator.h"61#include "llvm/IR/InstrTypes.h"62#include "llvm/IR/Instruction.h"63#include "llvm/IR/Instructions.h"64#include "llvm/IR/IntrinsicInst.h"65#include "llvm/IR/Module.h"66#include "llvm/IR/PassManager.h"67#include "llvm/IR/PatternMatch.h"68#include "llvm/IR/Value.h"69#include "llvm/Support/Casting.h"70#include "llvm/Support/CommandLine.h"71#include "llvm/Support/Debug.h"72#include "llvm/Support/DebugCounter.h"73#include "llvm/Support/ErrorHandling.h"74#include "llvm/Support/raw_ostream.h"75#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"76#include "llvm/Transforms/Utils/BuildLibCalls.h"77#include "llvm/Transforms/Utils/Local.h"78#include <algorithm>79#include <cassert>80#include <cstdint>81#include <iterator>82#include <map>83#include <optional>84#include <utility>8586using namespace llvm;87using namespace PatternMatch;8889#define DEBUG_TYPE "dse"9091STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");92STATISTIC(NumRedundantStores, "Number of redundant stores deleted");93STATISTIC(NumFastStores, "Number of stores deleted");94STATISTIC(NumFastOther, "Number of other instrs removed");95STATISTIC(NumCompletePartials, "Number of stores dead by later partials");96STATISTIC(NumModifiedStores, "Number of stores modified");97STATISTIC(NumCFGChecks, "Number of stores modified");98STATISTIC(NumCFGTries, "Number of stores modified");99STATISTIC(NumCFGSuccess, "Number of stores modified");100STATISTIC(NumGetDomMemoryDefPassed,101"Number of times a valid candidate is returned from getDomMemoryDef");102STATISTIC(NumDomMemDefChecks,103"Number iterations check for reads in getDomMemoryDef");104105DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",106"Controls which MemoryDefs are eliminated.");107108static cl::opt<bool>109EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",110cl::init(true), cl::Hidden,111cl::desc("Enable partial-overwrite tracking in DSE"));112113static cl::opt<bool>114EnablePartialStoreMerging("enable-dse-partial-store-merging",115cl::init(true), cl::Hidden,116cl::desc("Enable partial store merging in DSE"));117118static cl::opt<unsigned>119MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,120cl::desc("The number of memory instructions to scan for "121"dead store elimination (default = 150)"));122static cl::opt<unsigned> MemorySSAUpwardsStepLimit(123"dse-memoryssa-walklimit", cl::init(90), cl::Hidden,124cl::desc("The maximum number of steps while walking upwards to find "125"MemoryDefs that may be killed (default = 90)"));126127static cl::opt<unsigned> MemorySSAPartialStoreLimit(128"dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,129cl::desc("The maximum number candidates that only partially overwrite the "130"killing MemoryDef to consider"131" (default = 5)"));132133static cl::opt<unsigned> MemorySSADefsPerBlockLimit(134"dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,135cl::desc("The number of MemoryDefs we consider as candidates to eliminated "136"other stores per basic block (default = 5000)"));137138static cl::opt<unsigned> MemorySSASameBBStepCost(139"dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,140cl::desc(141"The cost of a step in the same basic block as the killing MemoryDef"142"(default = 1)"));143144static cl::opt<unsigned>145MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),146cl::Hidden,147cl::desc("The cost of a step in a different basic "148"block than the killing MemoryDef"149"(default = 5)"));150151static cl::opt<unsigned> MemorySSAPathCheckLimit(152"dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,153cl::desc("The maximum number of blocks to check when trying to prove that "154"all paths to an exit go through a killing block (default = 50)"));155156// This flags allows or disallows DSE to optimize MemorySSA during its157// traversal. Note that DSE optimizing MemorySSA may impact other passes158// downstream of the DSE invocation and can lead to issues not being159// reproducible in isolation (i.e. when MemorySSA is built from scratch). In160// those cases, the flag can be used to check if DSE's MemorySSA optimizations161// impact follow-up passes.162static cl::opt<bool>163OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,164cl::desc("Allow DSE to optimize memory accesses."));165166//===----------------------------------------------------------------------===//167// Helper functions168//===----------------------------------------------------------------------===//169using OverlapIntervalsTy = std::map<int64_t, int64_t>;170using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;171172/// Returns true if the end of this instruction can be safely shortened in173/// length.174static bool isShortenableAtTheEnd(Instruction *I) {175// Don't shorten stores for now176if (isa<StoreInst>(I))177return false;178179if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {180switch (II->getIntrinsicID()) {181default: return false;182case Intrinsic::memset:183case Intrinsic::memcpy:184case Intrinsic::memcpy_element_unordered_atomic:185case Intrinsic::memset_element_unordered_atomic:186// Do shorten memory intrinsics.187// FIXME: Add memmove if it's also safe to transform.188return true;189}190}191192// Don't shorten libcalls calls for now.193194return false;195}196197/// Returns true if the beginning of this instruction can be safely shortened198/// in length.199static bool isShortenableAtTheBeginning(Instruction *I) {200// FIXME: Handle only memset for now. Supporting memcpy/memmove should be201// easily done by offsetting the source address.202return isa<AnyMemSetInst>(I);203}204205static std::optional<TypeSize> getPointerSize(const Value *V,206const DataLayout &DL,207const TargetLibraryInfo &TLI,208const Function *F) {209uint64_t Size;210ObjectSizeOpts Opts;211Opts.NullIsUnknownSize = NullPointerIsDefined(F);212213if (getObjectSize(V, Size, DL, &TLI, Opts))214return TypeSize::getFixed(Size);215return std::nullopt;216}217218namespace {219220enum OverwriteResult {221OW_Begin,222OW_Complete,223OW_End,224OW_PartialEarlierWithFullLater,225OW_MaybePartial,226OW_None,227OW_Unknown228};229230} // end anonymous namespace231232/// Check if two instruction are masked stores that completely233/// overwrite one another. More specifically, \p KillingI has to234/// overwrite \p DeadI.235static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,236const Instruction *DeadI,237BatchAAResults &AA) {238const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);239const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);240if (KillingII == nullptr || DeadII == nullptr)241return OW_Unknown;242if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())243return OW_Unknown;244if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {245// Type size.246VectorType *KillingTy =247cast<VectorType>(KillingII->getArgOperand(0)->getType());248VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());249if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())250return OW_Unknown;251// Element count.252if (KillingTy->getElementCount() != DeadTy->getElementCount())253return OW_Unknown;254// Pointers.255Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();256Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();257if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))258return OW_Unknown;259// Masks.260// TODO: check that KillingII's mask is a superset of the DeadII's mask.261if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))262return OW_Unknown;263return OW_Complete;264}265return OW_Unknown;266}267268/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely269/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the270/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'271/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.272/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was273/// overwritten by a killing (smaller) store which doesn't write outside the big274/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.275/// NOTE: This function must only be called if both \p KillingLoc and \p276/// DeadLoc belong to the same underlying object with valid \p KillingOff and277/// \p DeadOff.278static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,279const MemoryLocation &DeadLoc,280int64_t KillingOff, int64_t DeadOff,281Instruction *DeadI,282InstOverlapIntervalsTy &IOL) {283const uint64_t KillingSize = KillingLoc.Size.getValue();284const uint64_t DeadSize = DeadLoc.Size.getValue();285// We may now overlap, although the overlap is not complete. There might also286// be other incomplete overlaps, and together, they might cover the complete287// dead store.288// Note: The correctness of this logic depends on the fact that this function289// is not even called providing DepWrite when there are any intervening reads.290if (EnablePartialOverwriteTracking &&291KillingOff < int64_t(DeadOff + DeadSize) &&292int64_t(KillingOff + KillingSize) >= DeadOff) {293294// Insert our part of the overlap into the map.295auto &IM = IOL[DeadI];296LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "297<< int64_t(DeadOff + DeadSize) << ") KillingLoc ["298<< KillingOff << ", " << int64_t(KillingOff + KillingSize)299<< ")\n");300301// Make sure that we only insert non-overlapping intervals and combine302// adjacent intervals. The intervals are stored in the map with the ending303// offset as the key (in the half-open sense) and the starting offset as304// the value.305int64_t KillingIntStart = KillingOff;306int64_t KillingIntEnd = KillingOff + KillingSize;307308// Find any intervals ending at, or after, KillingIntStart which start309// before KillingIntEnd.310auto ILI = IM.lower_bound(KillingIntStart);311if (ILI != IM.end() && ILI->second <= KillingIntEnd) {312// This existing interval is overlapped with the current store somewhere313// in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing314// intervals and adjusting our start and end.315KillingIntStart = std::min(KillingIntStart, ILI->second);316KillingIntEnd = std::max(KillingIntEnd, ILI->first);317ILI = IM.erase(ILI);318319// Continue erasing and adjusting our end in case other previous320// intervals are also overlapped with the current store.321//322// |--- dead 1 ---| |--- dead 2 ---|323// |------- killing---------|324//325while (ILI != IM.end() && ILI->second <= KillingIntEnd) {326assert(ILI->second > KillingIntStart && "Unexpected interval");327KillingIntEnd = std::max(KillingIntEnd, ILI->first);328ILI = IM.erase(ILI);329}330}331332IM[KillingIntEnd] = KillingIntStart;333334ILI = IM.begin();335if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {336LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["337<< DeadOff << ", " << int64_t(DeadOff + DeadSize)338<< ") Composite KillingLoc [" << ILI->second << ", "339<< ILI->first << ")\n");340++NumCompletePartials;341return OW_Complete;342}343}344345// Check for a dead store which writes to all the memory locations that346// the killing store writes to.347if (EnablePartialStoreMerging && KillingOff >= DeadOff &&348int64_t(DeadOff + DeadSize) > KillingOff &&349uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {350LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff351<< ", " << int64_t(DeadOff + DeadSize)352<< ") by a killing store [" << KillingOff << ", "353<< int64_t(KillingOff + KillingSize) << ")\n");354// TODO: Maybe come up with a better name?355return OW_PartialEarlierWithFullLater;356}357358// Another interesting case is if the killing store overwrites the end of the359// dead store.360//361// |--dead--|362// |-- killing --|363//364// In this case we may want to trim the size of dead store to avoid365// generating stores to addresses which will definitely be overwritten killing366// store.367if (!EnablePartialOverwriteTracking &&368(KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&369int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))370return OW_End;371372// Finally, we also need to check if the killing store overwrites the373// beginning of the dead store.374//375// |--dead--|376// |-- killing --|377//378// In this case we may want to move the destination address and trim the size379// of dead store to avoid generating stores to addresses which will definitely380// be overwritten killing store.381if (!EnablePartialOverwriteTracking &&382(KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {383assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&384"Expect to be handled as OW_Complete");385return OW_Begin;386}387// Otherwise, they don't completely overlap.388return OW_Unknown;389}390391/// Returns true if the memory which is accessed by the second instruction is not392/// modified between the first and the second instruction.393/// Precondition: Second instruction must be dominated by the first394/// instruction.395static bool396memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,397BatchAAResults &AA, const DataLayout &DL,398DominatorTree *DT) {399// Do a backwards scan through the CFG from SecondI to FirstI. Look for400// instructions which can modify the memory location accessed by SecondI.401//402// While doing the walk keep track of the address to check. It might be403// different in different basic blocks due to PHI translation.404using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;405SmallVector<BlockAddressPair, 16> WorkList;406// Keep track of the address we visited each block with. Bail out if we407// visit a block with different addresses.408DenseMap<BasicBlock *, Value *> Visited;409410BasicBlock::iterator FirstBBI(FirstI);411++FirstBBI;412BasicBlock::iterator SecondBBI(SecondI);413BasicBlock *FirstBB = FirstI->getParent();414BasicBlock *SecondBB = SecondI->getParent();415MemoryLocation MemLoc;416if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))417MemLoc = MemoryLocation::getForDest(MemSet);418else419MemLoc = MemoryLocation::get(SecondI);420421auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);422423// Start checking the SecondBB.424WorkList.push_back(425std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));426bool isFirstBlock = true;427428// Check all blocks going backward until we reach the FirstBB.429while (!WorkList.empty()) {430BlockAddressPair Current = WorkList.pop_back_val();431BasicBlock *B = Current.first;432PHITransAddr &Addr = Current.second;433Value *Ptr = Addr.getAddr();434435// Ignore instructions before FirstI if this is the FirstBB.436BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());437438BasicBlock::iterator EI;439if (isFirstBlock) {440// Ignore instructions after SecondI if this is the first visit of SecondBB.441assert(B == SecondBB && "first block is not the store block");442EI = SecondBBI;443isFirstBlock = false;444} else {445// It's not SecondBB or (in case of a loop) the second visit of SecondBB.446// In this case we also have to look at instructions after SecondI.447EI = B->end();448}449for (; BI != EI; ++BI) {450Instruction *I = &*BI;451if (I->mayWriteToMemory() && I != SecondI)452if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))453return false;454}455if (B != FirstBB) {456assert(B != &FirstBB->getParent()->getEntryBlock() &&457"Should not hit the entry block because SI must be dominated by LI");458for (BasicBlock *Pred : predecessors(B)) {459PHITransAddr PredAddr = Addr;460if (PredAddr.needsPHITranslationFromBlock(B)) {461if (!PredAddr.isPotentiallyPHITranslatable())462return false;463if (!PredAddr.translateValue(B, Pred, DT, false))464return false;465}466Value *TranslatedPtr = PredAddr.getAddr();467auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));468if (!Inserted.second) {469// We already visited this block before. If it was with a different470// address - bail out!471if (TranslatedPtr != Inserted.first->second)472return false;473// ... otherwise just skip it.474continue;475}476WorkList.push_back(std::make_pair(Pred, PredAddr));477}478}479}480return true;481}482483static void shortenAssignment(Instruction *Inst, Value *OriginalDest,484uint64_t OldOffsetInBits, uint64_t OldSizeInBits,485uint64_t NewSizeInBits, bool IsOverwriteEnd) {486const DataLayout &DL = Inst->getDataLayout();487uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;488uint64_t DeadSliceOffsetInBits =489OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);490auto SetDeadFragExpr = [](auto *Assign,491DIExpression::FragmentInfo DeadFragment) {492// createFragmentExpression expects an offset relative to the existing493// fragment offset if there is one.494uint64_t RelativeOffset = DeadFragment.OffsetInBits -495Assign->getExpression()496->getFragmentInfo()497.value_or(DIExpression::FragmentInfo(0, 0))498.OffsetInBits;499if (auto NewExpr = DIExpression::createFragmentExpression(500Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {501Assign->setExpression(*NewExpr);502return;503}504// Failed to create a fragment expression for this so discard the value,505// making this a kill location.506auto *Expr = *DIExpression::createFragmentExpression(507DIExpression::get(Assign->getContext(), std::nullopt),508DeadFragment.OffsetInBits, DeadFragment.SizeInBits);509Assign->setExpression(Expr);510Assign->setKillLocation();511};512513// A DIAssignID to use so that the inserted dbg.assign intrinsics do not514// link to any instructions. Created in the loop below (once).515DIAssignID *LinkToNothing = nullptr;516LLVMContext &Ctx = Inst->getContext();517auto GetDeadLink = [&Ctx, &LinkToNothing]() {518if (!LinkToNothing)519LinkToNothing = DIAssignID::getDistinct(Ctx);520return LinkToNothing;521};522523// Insert an unlinked dbg.assign intrinsic for the dead fragment after each524// overlapping dbg.assign intrinsic. The loop invalidates the iterators525// returned by getAssignmentMarkers so save a copy of the markers to iterate526// over.527auto LinkedRange = at::getAssignmentMarkers(Inst);528SmallVector<DbgVariableRecord *> LinkedDVRAssigns =529at::getDVRAssignmentMarkers(Inst);530SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),531LinkedRange.end());532auto InsertAssignForOverlap = [&](auto *Assign) {533std::optional<DIExpression::FragmentInfo> NewFragment;534if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,535DeadSliceSizeInBits, Assign,536NewFragment) ||537!NewFragment) {538// We couldn't calculate the intersecting fragment for some reason. Be539// cautious and unlink the whole assignment from the store.540Assign->setKillAddress();541Assign->setAssignId(GetDeadLink());542return;543}544// No intersect.545if (NewFragment->SizeInBits == 0)546return;547548// Fragments overlap: insert a new dbg.assign for this dead part.549auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());550NewAssign->insertAfter(Assign);551NewAssign->setAssignId(GetDeadLink());552if (NewFragment)553SetDeadFragExpr(NewAssign, *NewFragment);554NewAssign->setKillAddress();555};556for_each(Linked, InsertAssignForOverlap);557for_each(LinkedDVRAssigns, InsertAssignForOverlap);558}559560static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,561uint64_t &DeadSize, int64_t KillingStart,562uint64_t KillingSize, bool IsOverwriteEnd) {563auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);564Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();565566// We assume that memet/memcpy operates in chunks of the "largest" native567// type size and aligned on the same value. That means optimal start and size568// of memset/memcpy should be modulo of preferred alignment of that type. That569// is it there is no any sense in trying to reduce store size any further570// since any "extra" stores comes for free anyway.571// On the other hand, maximum alignment we can achieve is limited by alignment572// of initial store.573574// TODO: Limit maximum alignment by preferred (or abi?) alignment of the575// "largest" native type.576// Note: What is the proper way to get that value?577// Should TargetTransformInfo::getRegisterBitWidth be used or anything else?578// PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);579580int64_t ToRemoveStart = 0;581uint64_t ToRemoveSize = 0;582// Compute start and size of the region to remove. Make sure 'PrefAlign' is583// maintained on the remaining store.584if (IsOverwriteEnd) {585// Calculate required adjustment for 'KillingStart' in order to keep586// remaining store size aligned on 'PerfAlign'.587uint64_t Off =588offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);589ToRemoveStart = KillingStart + Off;590if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))591return false;592ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);593} else {594ToRemoveStart = DeadStart;595assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&596"Not overlapping accesses?");597ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);598// Calculate required adjustment for 'ToRemoveSize'in order to keep599// start of the remaining store aligned on 'PerfAlign'.600uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);601if (Off != 0) {602if (ToRemoveSize <= (PrefAlign.value() - Off))603return false;604ToRemoveSize -= PrefAlign.value() - Off;605}606assert(isAligned(PrefAlign, ToRemoveSize) &&607"Should preserve selected alignment");608}609610assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");611assert(DeadSize > ToRemoveSize && "Can't remove more than original size");612613uint64_t NewSize = DeadSize - ToRemoveSize;614if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {615// When shortening an atomic memory intrinsic, the newly shortened616// length must remain an integer multiple of the element size.617const uint32_t ElementSize = AMI->getElementSizeInBytes();618if (0 != NewSize % ElementSize)619return false;620}621622LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "623<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI624<< "\n KILLER [" << ToRemoveStart << ", "625<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");626627Value *DeadWriteLength = DeadIntrinsic->getLength();628Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);629DeadIntrinsic->setLength(TrimmedLength);630DeadIntrinsic->setDestAlignment(PrefAlign);631632Value *OrigDest = DeadIntrinsic->getRawDest();633if (!IsOverwriteEnd) {634Value *Indices[1] = {635ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};636Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(637Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",638DeadI->getIterator());639NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());640DeadIntrinsic->setDest(NewDestGEP);641}642643// Update attached dbg.assign intrinsics. Assume 8-bit byte.644shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,645IsOverwriteEnd);646647// Finally update start and size of dead access.648if (!IsOverwriteEnd)649DeadStart += ToRemoveSize;650DeadSize = NewSize;651652return true;653}654655static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,656int64_t &DeadStart, uint64_t &DeadSize) {657if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))658return false;659660OverlapIntervalsTy::iterator OII = --IntervalMap.end();661int64_t KillingStart = OII->second;662uint64_t KillingSize = OII->first - KillingStart;663664assert(OII->first - KillingStart >= 0 && "Size expected to be positive");665666if (KillingStart > DeadStart &&667// Note: "KillingStart - KillingStart" is known to be positive due to668// preceding check.669(uint64_t)(KillingStart - DeadStart) < DeadSize &&670// Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to671// be non negative due to preceding checks.672KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {673if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,674true)) {675IntervalMap.erase(OII);676return true;677}678}679return false;680}681682static bool tryToShortenBegin(Instruction *DeadI,683OverlapIntervalsTy &IntervalMap,684int64_t &DeadStart, uint64_t &DeadSize) {685if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))686return false;687688OverlapIntervalsTy::iterator OII = IntervalMap.begin();689int64_t KillingStart = OII->second;690uint64_t KillingSize = OII->first - KillingStart;691692assert(OII->first - KillingStart >= 0 && "Size expected to be positive");693694if (KillingStart <= DeadStart &&695// Note: "DeadStart - KillingStart" is known to be non negative due to696// preceding check.697KillingSize > (uint64_t)(DeadStart - KillingStart)) {698// Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to699// be positive due to preceding checks.700assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&701"Should have been handled as OW_Complete");702if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,703false)) {704IntervalMap.erase(OII);705return true;706}707}708return false;709}710711static Constant *712tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,713int64_t KillingOffset, int64_t DeadOffset,714const DataLayout &DL, BatchAAResults &AA,715DominatorTree *DT) {716717if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&718DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&719KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&720DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&721memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {722// If the store we find is:723// a) partially overwritten by the store to 'Loc'724// b) the killing store is fully contained in the dead one and725// c) they both have a constant value726// d) none of the two stores need padding727// Merge the two stores, replacing the dead store's value with a728// merge of both values.729// TODO: Deal with other constant types (vectors, etc), and probably730// some mem intrinsics (if needed)731732APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();733APInt KillingValue =734cast<ConstantInt>(KillingI->getValueOperand())->getValue();735unsigned KillingBits = KillingValue.getBitWidth();736assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());737KillingValue = KillingValue.zext(DeadValue.getBitWidth());738739// Offset of the smaller store inside the larger store740unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;741unsigned LShiftAmount =742DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits743: BitOffsetDiff;744APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,745LShiftAmount + KillingBits);746// Clear the bits we'll be replacing, then OR with the smaller747// store, shifted appropriately.748APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);749LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI750<< "\n Killing: " << *KillingI751<< "\n Merged Value: " << Merged << '\n');752return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);753}754return nullptr;755}756757namespace {758// Returns true if \p I is an intrinsic that does not read or write memory.759bool isNoopIntrinsic(Instruction *I) {760if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {761switch (II->getIntrinsicID()) {762case Intrinsic::lifetime_start:763case Intrinsic::lifetime_end:764case Intrinsic::invariant_end:765case Intrinsic::launder_invariant_group:766case Intrinsic::assume:767return true;768case Intrinsic::dbg_declare:769case Intrinsic::dbg_label:770case Intrinsic::dbg_value:771llvm_unreachable("Intrinsic should not be modeled in MemorySSA");772default:773return false;774}775}776return false;777}778779// Check if we can ignore \p D for DSE.780bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {781Instruction *DI = D->getMemoryInst();782// Calls that only access inaccessible memory cannot read or write any memory783// locations we consider for elimination.784if (auto *CB = dyn_cast<CallBase>(DI))785if (CB->onlyAccessesInaccessibleMemory())786return true;787788// We can eliminate stores to locations not visible to the caller across789// throwing instructions.790if (DI->mayThrow() && !DefVisibleToCaller)791return true;792793// We can remove the dead stores, irrespective of the fence and its ordering794// (release/acquire/seq_cst). Fences only constraints the ordering of795// already visible stores, it does not make a store visible to other796// threads. So, skipping over a fence does not change a store from being797// dead.798if (isa<FenceInst>(DI))799return true;800801// Skip intrinsics that do not really read or modify memory.802if (isNoopIntrinsic(DI))803return true;804805return false;806}807808struct DSEState {809Function &F;810AliasAnalysis &AA;811EarliestEscapeInfo EI;812813/// The single BatchAA instance that is used to cache AA queries. It will814/// not be invalidated over the whole run. This is safe, because:815/// 1. Only memory writes are removed, so the alias cache for memory816/// locations remains valid.817/// 2. No new instructions are added (only instructions removed), so cached818/// information for a deleted value cannot be accessed by a re-used new819/// value pointer.820BatchAAResults BatchAA;821822MemorySSA &MSSA;823DominatorTree &DT;824PostDominatorTree &PDT;825const TargetLibraryInfo &TLI;826const DataLayout &DL;827const LoopInfo &LI;828829// Whether the function contains any irreducible control flow, useful for830// being accurately able to detect loops.831bool ContainsIrreducibleLoops;832833// All MemoryDefs that potentially could kill other MemDefs.834SmallVector<MemoryDef *, 64> MemDefs;835// Any that should be skipped as they are already deleted836SmallPtrSet<MemoryAccess *, 4> SkipStores;837// Keep track whether a given object is captured before return or not.838DenseMap<const Value *, bool> CapturedBeforeReturn;839// Keep track of all of the objects that are invisible to the caller after840// the function returns.841DenseMap<const Value *, bool> InvisibleToCallerAfterRet;842// Keep track of blocks with throwing instructions not modeled in MemorySSA.843SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;844// Post-order numbers for each basic block. Used to figure out if memory845// accesses are executed before another access.846DenseMap<BasicBlock *, unsigned> PostOrderNumbers;847848/// Keep track of instructions (partly) overlapping with killing MemoryDefs per849/// basic block.850MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;851// Check if there are root nodes that are terminated by UnreachableInst.852// Those roots pessimize post-dominance queries. If there are such roots,853// fall back to CFG scan starting from all non-unreachable roots.854bool AnyUnreachableExit;855856// Whether or not we should iterate on removing dead stores at the end of the857// function due to removing a store causing a previously captured pointer to858// no longer be captured.859bool ShouldIterateEndOfFunctionDSE;860861/// Dead instructions to be removed at the end of DSE.862SmallVector<Instruction *> ToRemove;863864// Class contains self-reference, make sure it's not copied/moved.865DSEState(const DSEState &) = delete;866DSEState &operator=(const DSEState &) = delete;867868DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,869PostDominatorTree &PDT, const TargetLibraryInfo &TLI,870const LoopInfo &LI)871: F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),872PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {873// Collect blocks with throwing instructions not modeled in MemorySSA and874// alloc-like objects.875unsigned PO = 0;876for (BasicBlock *BB : post_order(&F)) {877PostOrderNumbers[BB] = PO++;878for (Instruction &I : *BB) {879MemoryAccess *MA = MSSA.getMemoryAccess(&I);880if (I.mayThrow() && !MA)881ThrowingBlocks.insert(I.getParent());882883auto *MD = dyn_cast_or_null<MemoryDef>(MA);884if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&885(getLocForWrite(&I) || isMemTerminatorInst(&I)))886MemDefs.push_back(MD);887}888}889890// Treat byval or inalloca arguments the same as Allocas, stores to them are891// dead at the end of the function.892for (Argument &AI : F.args())893if (AI.hasPassPointeeByValueCopyAttr())894InvisibleToCallerAfterRet.insert({&AI, true});895896// Collect whether there is any irreducible control flow in the function.897ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);898899AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {900return isa<UnreachableInst>(E->getTerminator());901});902}903904static void pushMemUses(MemoryAccess *Acc,905SmallVectorImpl<MemoryAccess *> &WorkList,906SmallPtrSetImpl<MemoryAccess *> &Visited) {907for (Use &U : Acc->uses()) {908auto *MA = cast<MemoryAccess>(U.getUser());909if (Visited.insert(MA).second)910WorkList.push_back(MA);911}912};913914LocationSize strengthenLocationSize(const Instruction *I,915LocationSize Size) const {916if (auto *CB = dyn_cast<CallBase>(I)) {917LibFunc F;918if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&919(F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {920// Use the precise location size specified by the 3rd argument921// for determining KillingI overwrites DeadLoc if it is a memset_chk922// instruction. memset_chk will write either the amount specified as 3rd923// argument or the function will immediately abort and exit the program.924// NOTE: AA may determine NoAlias if it can prove that the access size925// is larger than the allocation size due to that being UB. To avoid926// returning potentially invalid NoAlias results by AA, limit the use of927// the precise location size to isOverwrite.928if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))929return LocationSize::precise(Len->getZExtValue());930}931}932return Size;933}934935/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p936/// KillingI instruction) completely overwrites a store to the 'DeadLoc'937/// location (by \p DeadI instruction).938/// Return OW_MaybePartial if \p KillingI does not completely overwrite939/// \p DeadI, but they both write to the same underlying object. In that940/// case, use isPartialOverwrite to check if \p KillingI partially overwrites941/// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the942/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.943OverwriteResult isOverwrite(const Instruction *KillingI,944const Instruction *DeadI,945const MemoryLocation &KillingLoc,946const MemoryLocation &DeadLoc,947int64_t &KillingOff, int64_t &DeadOff) {948// AliasAnalysis does not always account for loops. Limit overwrite checks949// to dependencies for which we can guarantee they are independent of any950// loops they are in.951if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))952return OW_Unknown;953954LocationSize KillingLocSize =955strengthenLocationSize(KillingI, KillingLoc.Size);956const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();957const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();958const Value *DeadUndObj = getUnderlyingObject(DeadPtr);959const Value *KillingUndObj = getUnderlyingObject(KillingPtr);960961// Check whether the killing store overwrites the whole object, in which962// case the size/offset of the dead store does not matter.963if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&964isIdentifiedObject(KillingUndObj)) {965std::optional<TypeSize> KillingUndObjSize =966getPointerSize(KillingUndObj, DL, TLI, &F);967if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())968return OW_Complete;969}970971// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll972// get imprecise values here, though (except for unknown sizes).973if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {974// In case no constant size is known, try to an IR values for the number975// of bytes written and check if they match.976const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);977const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);978if (KillingMemI && DeadMemI) {979const Value *KillingV = KillingMemI->getLength();980const Value *DeadV = DeadMemI->getLength();981if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))982return OW_Complete;983}984985// Masked stores have imprecise locations, but we can reason about them986// to some extent.987return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);988}989990const TypeSize KillingSize = KillingLocSize.getValue();991const TypeSize DeadSize = DeadLoc.Size.getValue();992// Bail on doing Size comparison which depends on AA for now993// TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors994const bool AnyScalable =995DeadSize.isScalable() || KillingLocSize.isScalable();996997if (AnyScalable)998return OW_Unknown;999// Query the alias information1000AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);10011002// If the start pointers are the same, we just have to compare sizes to see if1003// the killing store was larger than the dead store.1004if (AAR == AliasResult::MustAlias) {1005// Make sure that the KillingSize size is >= the DeadSize size.1006if (KillingSize >= DeadSize)1007return OW_Complete;1008}10091010// If we hit a partial alias we may have a full overwrite1011if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {1012int32_t Off = AAR.getOffset();1013if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)1014return OW_Complete;1015}10161017// If we can't resolve the same pointers to the same object, then we can't1018// analyze them at all.1019if (DeadUndObj != KillingUndObj) {1020// Non aliasing stores to different objects don't overlap. Note that1021// if the killing store is known to overwrite whole object (out of1022// bounds access overwrites whole object as well) then it is assumed to1023// completely overwrite any store to the same object even if they don't1024// actually alias (see next check).1025if (AAR == AliasResult::NoAlias)1026return OW_None;1027return OW_Unknown;1028}10291030// Okay, we have stores to two completely different pointers. Try to1031// decompose the pointer into a "base + constant_offset" form. If the base1032// pointers are equal, then we can reason about the two stores.1033DeadOff = 0;1034KillingOff = 0;1035const Value *DeadBasePtr =1036GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);1037const Value *KillingBasePtr =1038GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);10391040// If the base pointers still differ, we have two completely different1041// stores.1042if (DeadBasePtr != KillingBasePtr)1043return OW_Unknown;10441045// The killing access completely overlaps the dead store if and only if1046// both start and end of the dead one is "inside" the killing one:1047// |<->|--dead--|<->|1048// |-----killing------|1049// Accesses may overlap if and only if start of one of them is "inside"1050// another one:1051// |<->|--dead--|<-------->|1052// |-------killing--------|1053// OR1054// |-------dead-------|1055// |<->|---killing---|<----->|1056//1057// We have to be careful here as *Off is signed while *.Size is unsigned.10581059// Check if the dead access starts "not before" the killing one.1060if (DeadOff >= KillingOff) {1061// If the dead access ends "not after" the killing access then the1062// dead one is completely overwritten by the killing one.1063if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)1064return OW_Complete;1065// If start of the dead access is "before" end of the killing access1066// then accesses overlap.1067else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)1068return OW_MaybePartial;1069}1070// If start of the killing access is "before" end of the dead access then1071// accesses overlap.1072else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {1073return OW_MaybePartial;1074}10751076// Can reach here only if accesses are known not to overlap.1077return OW_None;1078}10791080bool isInvisibleToCallerAfterRet(const Value *V) {1081if (isa<AllocaInst>(V))1082return true;1083auto I = InvisibleToCallerAfterRet.insert({V, false});1084if (I.second) {1085if (!isInvisibleToCallerOnUnwind(V)) {1086I.first->second = false;1087} else if (isNoAliasCall(V)) {1088I.first->second = !PointerMayBeCaptured(V, true, false);1089}1090}1091return I.first->second;1092}10931094bool isInvisibleToCallerOnUnwind(const Value *V) {1095bool RequiresNoCaptureBeforeUnwind;1096if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))1097return false;1098if (!RequiresNoCaptureBeforeUnwind)1099return true;11001101auto I = CapturedBeforeReturn.insert({V, true});1102if (I.second)1103// NOTE: This could be made more precise by PointerMayBeCapturedBefore1104// with the killing MemoryDef. But we refrain from doing so for now to1105// limit compile-time and this does not cause any changes to the number1106// of stores removed on a large test set in practice.1107I.first->second = PointerMayBeCaptured(V, false, true);1108return !I.first->second;1109}11101111std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {1112if (!I->mayWriteToMemory())1113return std::nullopt;11141115if (auto *CB = dyn_cast<CallBase>(I))1116return MemoryLocation::getForDest(CB, TLI);11171118return MemoryLocation::getOrNone(I);1119}11201121/// Assuming this instruction has a dead analyzable write, can we delete1122/// this instruction?1123bool isRemovable(Instruction *I) {1124assert(getLocForWrite(I) && "Must have analyzable write");11251126// Don't remove volatile/atomic stores.1127if (StoreInst *SI = dyn_cast<StoreInst>(I))1128return SI->isUnordered();11291130if (auto *CB = dyn_cast<CallBase>(I)) {1131// Don't remove volatile memory intrinsics.1132if (auto *MI = dyn_cast<MemIntrinsic>(CB))1133return !MI->isVolatile();11341135// Never remove dead lifetime intrinsics, e.g. because they are followed1136// by a free.1137if (CB->isLifetimeStartOrEnd())1138return false;11391140return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&1141!CB->isTerminator();1142}11431144return false;1145}11461147/// Returns true if \p UseInst completely overwrites \p DefLoc1148/// (stored by \p DefInst).1149bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,1150Instruction *UseInst) {1151// UseInst has a MemoryDef associated in MemorySSA. It's possible for a1152// MemoryDef to not write to memory, e.g. a volatile load is modeled as a1153// MemoryDef.1154if (!UseInst->mayWriteToMemory())1155return false;11561157if (auto *CB = dyn_cast<CallBase>(UseInst))1158if (CB->onlyAccessesInaccessibleMemory())1159return false;11601161int64_t InstWriteOffset, DepWriteOffset;1162if (auto CC = getLocForWrite(UseInst))1163return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,1164DepWriteOffset) == OW_Complete;1165return false;1166}11671168/// Returns true if \p Def is not read before returning from the function.1169bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc) {1170LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("1171<< *Def->getMemoryInst()1172<< ") is at the end the function \n");1173SmallVector<MemoryAccess *, 4> WorkList;1174SmallPtrSet<MemoryAccess *, 8> Visited;11751176pushMemUses(Def, WorkList, Visited);1177for (unsigned I = 0; I < WorkList.size(); I++) {1178if (WorkList.size() >= MemorySSAScanLimit) {1179LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");1180return false;1181}11821183MemoryAccess *UseAccess = WorkList[I];1184if (isa<MemoryPhi>(UseAccess)) {1185// AliasAnalysis does not account for loops. Limit elimination to1186// candidates for which we can guarantee they always store to the same1187// memory location.1188if (!isGuaranteedLoopInvariant(DefLoc.Ptr))1189return false;11901191pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);1192continue;1193}1194// TODO: Checking for aliasing is expensive. Consider reducing the amount1195// of times this is called and/or caching it.1196Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();1197if (isReadClobber(DefLoc, UseInst)) {1198LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");1199return false;1200}12011202if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))1203pushMemUses(UseDef, WorkList, Visited);1204}1205return true;1206}12071208/// If \p I is a memory terminator like llvm.lifetime.end or free, return a1209/// pair with the MemoryLocation terminated by \p I and a boolean flag1210/// indicating whether \p I is a free-like call.1211std::optional<std::pair<MemoryLocation, bool>>1212getLocForTerminator(Instruction *I) const {1213uint64_t Len;1214Value *Ptr;1215if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),1216m_Value(Ptr))))1217return {std::make_pair(MemoryLocation(Ptr, Len), false)};12181219if (auto *CB = dyn_cast<CallBase>(I)) {1220if (Value *FreedOp = getFreedOperand(CB, &TLI))1221return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};1222}12231224return std::nullopt;1225}12261227/// Returns true if \p I is a memory terminator instruction like1228/// llvm.lifetime.end or free.1229bool isMemTerminatorInst(Instruction *I) const {1230auto *CB = dyn_cast<CallBase>(I);1231return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||1232getFreedOperand(CB, &TLI) != nullptr);1233}12341235/// Returns true if \p MaybeTerm is a memory terminator for \p Loc from1236/// instruction \p AccessI.1237bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,1238Instruction *MaybeTerm) {1239std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =1240getLocForTerminator(MaybeTerm);12411242if (!MaybeTermLoc)1243return false;12441245// If the terminator is a free-like call, all accesses to the underlying1246// object can be considered terminated.1247if (getUnderlyingObject(Loc.Ptr) !=1248getUnderlyingObject(MaybeTermLoc->first.Ptr))1249return false;12501251auto TermLoc = MaybeTermLoc->first;1252if (MaybeTermLoc->second) {1253const Value *LocUO = getUnderlyingObject(Loc.Ptr);1254return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);1255}1256int64_t InstWriteOffset = 0;1257int64_t DepWriteOffset = 0;1258return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,1259DepWriteOffset) == OW_Complete;1260}12611262// Returns true if \p Use may read from \p DefLoc.1263bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {1264if (isNoopIntrinsic(UseInst))1265return false;12661267// Monotonic or weaker atomic stores can be re-ordered and do not need to be1268// treated as read clobber.1269if (auto SI = dyn_cast<StoreInst>(UseInst))1270return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);12711272if (!UseInst->mayReadFromMemory())1273return false;12741275if (auto *CB = dyn_cast<CallBase>(UseInst))1276if (CB->onlyAccessesInaccessibleMemory())1277return false;12781279return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));1280}12811282/// Returns true if a dependency between \p Current and \p KillingDef is1283/// guaranteed to be loop invariant for the loops that they are in. Either1284/// because they are known to be in the same block, in the same loop level or1285/// by guaranteeing that \p CurrentLoc only references a single MemoryLocation1286/// during execution of the containing function.1287bool isGuaranteedLoopIndependent(const Instruction *Current,1288const Instruction *KillingDef,1289const MemoryLocation &CurrentLoc) {1290// If the dependency is within the same block or loop level (being careful1291// of irreducible loops), we know that AA will return a valid result for the1292// memory dependency. (Both at the function level, outside of any loop,1293// would also be valid but we currently disable that to limit compile time).1294if (Current->getParent() == KillingDef->getParent())1295return true;1296const Loop *CurrentLI = LI.getLoopFor(Current->getParent());1297if (!ContainsIrreducibleLoops && CurrentLI &&1298CurrentLI == LI.getLoopFor(KillingDef->getParent()))1299return true;1300// Otherwise check the memory location is invariant to any loops.1301return isGuaranteedLoopInvariant(CurrentLoc.Ptr);1302}13031304/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible1305/// loop. In particular, this guarantees that it only references a single1306/// MemoryLocation during execution of the containing function.1307bool isGuaranteedLoopInvariant(const Value *Ptr) {1308Ptr = Ptr->stripPointerCasts();1309if (auto *GEP = dyn_cast<GEPOperator>(Ptr))1310if (GEP->hasAllConstantIndices())1311Ptr = GEP->getPointerOperand()->stripPointerCasts();13121313if (auto *I = dyn_cast<Instruction>(Ptr)) {1314return I->getParent()->isEntryBlock() ||1315(!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));1316}1317return true;1318}13191320// Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,1321// with no read access between them or on any other path to a function exit1322// block if \p KillingLoc is not accessible after the function returns. If1323// there is no such MemoryDef, return std::nullopt. The returned value may not1324// (completely) overwrite \p KillingLoc. Currently we bail out when we1325// encounter an aliasing MemoryUse (read).1326std::optional<MemoryAccess *>1327getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,1328const MemoryLocation &KillingLoc, const Value *KillingUndObj,1329unsigned &ScanLimit, unsigned &WalkerStepLimit,1330bool IsMemTerm, unsigned &PartialLimit) {1331if (ScanLimit == 0 || WalkerStepLimit == 0) {1332LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");1333return std::nullopt;1334}13351336MemoryAccess *Current = StartAccess;1337Instruction *KillingI = KillingDef->getMemoryInst();1338LLVM_DEBUG(dbgs() << " trying to get dominating access\n");13391340// Only optimize defining access of KillingDef when directly starting at its1341// defining access. The defining access also must only access KillingLoc. At1342// the moment we only support instructions with a single write location, so1343// it should be sufficient to disable optimizations for instructions that1344// also read from memory.1345bool CanOptimize = OptimizeMemorySSA &&1346KillingDef->getDefiningAccess() == StartAccess &&1347!KillingI->mayReadFromMemory();13481349// Find the next clobbering Mod access for DefLoc, starting at StartAccess.1350std::optional<MemoryLocation> CurrentLoc;1351for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {1352LLVM_DEBUG({1353dbgs() << " visiting " << *Current;1354if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))1355dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()1356<< ")";1357dbgs() << "\n";1358});13591360// Reached TOP.1361if (MSSA.isLiveOnEntryDef(Current)) {1362LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");1363if (CanOptimize && Current != KillingDef->getDefiningAccess())1364// The first clobbering def is... none.1365KillingDef->setOptimized(Current);1366return std::nullopt;1367}13681369// Cost of a step. Accesses in the same block are more likely to be valid1370// candidates for elimination, hence consider them cheaper.1371unsigned StepCost = KillingDef->getBlock() == Current->getBlock()1372? MemorySSASameBBStepCost1373: MemorySSAOtherBBStepCost;1374if (WalkerStepLimit <= StepCost) {1375LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");1376return std::nullopt;1377}1378WalkerStepLimit -= StepCost;13791380// Return for MemoryPhis. They cannot be eliminated directly and the1381// caller is responsible for traversing them.1382if (isa<MemoryPhi>(Current)) {1383LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");1384return Current;1385}13861387// Below, check if CurrentDef is a valid candidate to be eliminated by1388// KillingDef. If it is not, check the next candidate.1389MemoryDef *CurrentDef = cast<MemoryDef>(Current);1390Instruction *CurrentI = CurrentDef->getMemoryInst();13911392if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {1393CanOptimize = false;1394continue;1395}13961397// Before we try to remove anything, check for any extra throwing1398// instructions that block us from DSEing1399if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {1400LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");1401return std::nullopt;1402}14031404// Check for anything that looks like it will be a barrier to further1405// removal1406if (isDSEBarrier(KillingUndObj, CurrentI)) {1407LLVM_DEBUG(dbgs() << " ... skip, barrier\n");1408return std::nullopt;1409}14101411// If Current is known to be on path that reads DefLoc or is a read1412// clobber, bail out, as the path is not profitable. We skip this check1413// for intrinsic calls, because the code knows how to handle memcpy1414// intrinsics.1415if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))1416return std::nullopt;14171418// Quick check if there are direct uses that are read-clobbers.1419if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {1420if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))1421return !MSSA.dominates(StartAccess, UseOrDef) &&1422isReadClobber(KillingLoc, UseOrDef->getMemoryInst());1423return false;1424})) {1425LLVM_DEBUG(dbgs() << " ... found a read clobber\n");1426return std::nullopt;1427}14281429// If Current does not have an analyzable write location or is not1430// removable, skip it.1431CurrentLoc = getLocForWrite(CurrentI);1432if (!CurrentLoc || !isRemovable(CurrentI)) {1433CanOptimize = false;1434continue;1435}14361437// AliasAnalysis does not account for loops. Limit elimination to1438// candidates for which we can guarantee they always store to the same1439// memory location and not located in different loops.1440if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {1441LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");1442CanOptimize = false;1443continue;1444}14451446if (IsMemTerm) {1447// If the killing def is a memory terminator (e.g. lifetime.end), check1448// the next candidate if the current Current does not write the same1449// underlying object as the terminator.1450if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {1451CanOptimize = false;1452continue;1453}1454} else {1455int64_t KillingOffset = 0;1456int64_t DeadOffset = 0;1457auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,1458KillingOffset, DeadOffset);1459if (CanOptimize) {1460// CurrentDef is the earliest write clobber of KillingDef. Use it as1461// optimized access. Do not optimize if CurrentDef is already the1462// defining access of KillingDef.1463if (CurrentDef != KillingDef->getDefiningAccess() &&1464(OR == OW_Complete || OR == OW_MaybePartial))1465KillingDef->setOptimized(CurrentDef);14661467// Once a may-aliasing def is encountered do not set an optimized1468// access.1469if (OR != OW_None)1470CanOptimize = false;1471}14721473// If Current does not write to the same object as KillingDef, check1474// the next candidate.1475if (OR == OW_Unknown || OR == OW_None)1476continue;1477else if (OR == OW_MaybePartial) {1478// If KillingDef only partially overwrites Current, check the next1479// candidate if the partial step limit is exceeded. This aggressively1480// limits the number of candidates for partial store elimination,1481// which are less likely to be removable in the end.1482if (PartialLimit <= 1) {1483WalkerStepLimit -= 1;1484LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");1485continue;1486}1487PartialLimit -= 1;1488}1489}1490break;1491};14921493// Accesses to objects accessible after the function returns can only be1494// eliminated if the access is dead along all paths to the exit. Collect1495// the blocks with killing (=completely overwriting MemoryDefs) and check if1496// they cover all paths from MaybeDeadAccess to any function exit.1497SmallPtrSet<Instruction *, 16> KillingDefs;1498KillingDefs.insert(KillingDef->getMemoryInst());1499MemoryAccess *MaybeDeadAccess = Current;1500MemoryLocation MaybeDeadLoc = *CurrentLoc;1501Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();1502LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("1503<< *MaybeDeadI << ")\n");15041505SmallVector<MemoryAccess *, 32> WorkList;1506SmallPtrSet<MemoryAccess *, 32> Visited;1507pushMemUses(MaybeDeadAccess, WorkList, Visited);15081509// Check if DeadDef may be read.1510for (unsigned I = 0; I < WorkList.size(); I++) {1511MemoryAccess *UseAccess = WorkList[I];15121513LLVM_DEBUG(dbgs() << " " << *UseAccess);1514// Bail out if the number of accesses to check exceeds the scan limit.1515if (ScanLimit < (WorkList.size() - I)) {1516LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");1517return std::nullopt;1518}1519--ScanLimit;1520NumDomMemDefChecks++;15211522if (isa<MemoryPhi>(UseAccess)) {1523if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {1524return DT.properlyDominates(KI->getParent(),1525UseAccess->getBlock());1526})) {1527LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");1528continue;1529}1530LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");1531pushMemUses(UseAccess, WorkList, Visited);1532continue;1533}15341535Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();1536LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");15371538if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {1539return DT.dominates(KI, UseInst);1540})) {1541LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");1542continue;1543}15441545// A memory terminator kills all preceeding MemoryDefs and all succeeding1546// MemoryAccesses. We do not have to check it's users.1547if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {1548LLVM_DEBUG(1549dbgs()1550<< " ... skipping, memterminator invalidates following accesses\n");1551continue;1552}15531554if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {1555LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");1556pushMemUses(UseAccess, WorkList, Visited);1557continue;1558}15591560if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {1561LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");1562return std::nullopt;1563}15641565// Uses which may read the original MemoryDef mean we cannot eliminate the1566// original MD. Stop walk.1567if (isReadClobber(MaybeDeadLoc, UseInst)) {1568LLVM_DEBUG(dbgs() << " ... found read clobber\n");1569return std::nullopt;1570}15711572// If this worklist walks back to the original memory access (and the1573// pointer is not guarenteed loop invariant) then we cannot assume that a1574// store kills itself.1575if (MaybeDeadAccess == UseAccess &&1576!isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {1577LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");1578return std::nullopt;1579}1580// Otherwise, for the KillingDef and MaybeDeadAccess we only have to check1581// if it reads the memory location.1582// TODO: It would probably be better to check for self-reads before1583// calling the function.1584if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {1585LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");1586continue;1587}15881589// Check all uses for MemoryDefs, except for defs completely overwriting1590// the original location. Otherwise we have to check uses of *all*1591// MemoryDefs we discover, including non-aliasing ones. Otherwise we might1592// miss cases like the following1593// 1 = Def(LoE) ; <----- DeadDef stores [0,1]1594// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]1595// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].1596// (The Use points to the *first* Def it may alias)1597// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,1598// stores [0,1]1599if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {1600if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {1601BasicBlock *MaybeKillingBlock = UseInst->getParent();1602if (PostOrderNumbers.find(MaybeKillingBlock)->second <1603PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {1604if (!isInvisibleToCallerAfterRet(KillingUndObj)) {1605LLVM_DEBUG(dbgs()1606<< " ... found killing def " << *UseInst << "\n");1607KillingDefs.insert(UseInst);1608}1609} else {1610LLVM_DEBUG(dbgs()1611<< " ... found preceeding def " << *UseInst << "\n");1612return std::nullopt;1613}1614} else1615pushMemUses(UseDef, WorkList, Visited);1616}1617}16181619// For accesses to locations visible after the function returns, make sure1620// that the location is dead (=overwritten) along all paths from1621// MaybeDeadAccess to the exit.1622if (!isInvisibleToCallerAfterRet(KillingUndObj)) {1623SmallPtrSet<BasicBlock *, 16> KillingBlocks;1624for (Instruction *KD : KillingDefs)1625KillingBlocks.insert(KD->getParent());1626assert(!KillingBlocks.empty() &&1627"Expected at least a single killing block");16281629// Find the common post-dominator of all killing blocks.1630BasicBlock *CommonPred = *KillingBlocks.begin();1631for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {1632if (!CommonPred)1633break;1634CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);1635}16361637// If the common post-dominator does not post-dominate MaybeDeadAccess,1638// there is a path from MaybeDeadAccess to an exit not going through a1639// killing block.1640if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {1641if (!AnyUnreachableExit)1642return std::nullopt;16431644// Fall back to CFG scan starting at all non-unreachable roots if not1645// all paths to the exit go through CommonPred.1646CommonPred = nullptr;1647}16481649// If CommonPred itself is in the set of killing blocks, we're done.1650if (KillingBlocks.count(CommonPred))1651return {MaybeDeadAccess};16521653SetVector<BasicBlock *> WorkList;1654// If CommonPred is null, there are multiple exits from the function.1655// They all have to be added to the worklist.1656if (CommonPred)1657WorkList.insert(CommonPred);1658else1659for (BasicBlock *R : PDT.roots()) {1660if (!isa<UnreachableInst>(R->getTerminator()))1661WorkList.insert(R);1662}16631664NumCFGTries++;1665// Check if all paths starting from an exit node go through one of the1666// killing blocks before reaching MaybeDeadAccess.1667for (unsigned I = 0; I < WorkList.size(); I++) {1668NumCFGChecks++;1669BasicBlock *Current = WorkList[I];1670if (KillingBlocks.count(Current))1671continue;1672if (Current == MaybeDeadAccess->getBlock())1673return std::nullopt;16741675// MaybeDeadAccess is reachable from the entry, so we don't have to1676// explore unreachable blocks further.1677if (!DT.isReachableFromEntry(Current))1678continue;16791680for (BasicBlock *Pred : predecessors(Current))1681WorkList.insert(Pred);16821683if (WorkList.size() >= MemorySSAPathCheckLimit)1684return std::nullopt;1685}1686NumCFGSuccess++;1687}16881689// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is1690// potentially dead.1691return {MaybeDeadAccess};1692}16931694/// Delete dead memory defs and recursively add their operands to ToRemove if1695/// they became dead.1696void1697deleteDeadInstruction(Instruction *SI,1698SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr) {1699MemorySSAUpdater Updater(&MSSA);1700SmallVector<Instruction *, 32> NowDeadInsts;1701NowDeadInsts.push_back(SI);1702--NumFastOther;17031704while (!NowDeadInsts.empty()) {1705Instruction *DeadInst = NowDeadInsts.pop_back_val();1706++NumFastOther;17071708// Try to preserve debug information attached to the dead instruction.1709salvageDebugInfo(*DeadInst);1710salvageKnowledge(DeadInst);17111712// Remove the Instruction from MSSA.1713MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);1714bool IsMemDef = MA && isa<MemoryDef>(MA);1715if (MA) {1716if (IsMemDef) {1717auto *MD = cast<MemoryDef>(MA);1718SkipStores.insert(MD);1719if (Deleted)1720Deleted->insert(MD);1721if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {1722if (SI->getValueOperand()->getType()->isPointerTy()) {1723const Value *UO = getUnderlyingObject(SI->getValueOperand());1724if (CapturedBeforeReturn.erase(UO))1725ShouldIterateEndOfFunctionDSE = true;1726InvisibleToCallerAfterRet.erase(UO);1727}1728}1729}17301731Updater.removeMemoryAccess(MA);1732}17331734auto I = IOLs.find(DeadInst->getParent());1735if (I != IOLs.end())1736I->second.erase(DeadInst);1737// Remove its operands1738for (Use &O : DeadInst->operands())1739if (Instruction *OpI = dyn_cast<Instruction>(O)) {1740O.set(PoisonValue::get(O->getType()));1741if (isInstructionTriviallyDead(OpI, &TLI))1742NowDeadInsts.push_back(OpI);1743}17441745EI.removeInstruction(DeadInst);1746// Remove memory defs directly if they don't produce results, but only1747// queue other dead instructions for later removal. They may have been1748// used as memory locations that have been cached by BatchAA. Removing1749// them here may lead to newly created instructions to be allocated at the1750// same address, yielding stale cache entries.1751if (IsMemDef && DeadInst->getType()->isVoidTy())1752DeadInst->eraseFromParent();1753else1754ToRemove.push_back(DeadInst);1755}1756}17571758// Check for any extra throws between \p KillingI and \p DeadI that block1759// DSE. This only checks extra maythrows (those that aren't MemoryDef's).1760// MemoryDef that may throw are handled during the walk from one def to the1761// next.1762bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,1763const Value *KillingUndObj) {1764// First see if we can ignore it by using the fact that KillingI is an1765// alloca/alloca like object that is not visible to the caller during1766// execution of the function.1767if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))1768return false;17691770if (KillingI->getParent() == DeadI->getParent())1771return ThrowingBlocks.count(KillingI->getParent());1772return !ThrowingBlocks.empty();1773}17741775// Check if \p DeadI acts as a DSE barrier for \p KillingI. The following1776// instructions act as barriers:1777// * A memory instruction that may throw and \p KillingI accesses a non-stack1778// object.1779// * Atomic stores stronger that monotonic.1780bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {1781// If DeadI may throw it acts as a barrier, unless we are to an1782// alloca/alloca like object that does not escape.1783if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))1784return true;17851786// If DeadI is an atomic load/store stronger than monotonic, do not try to1787// eliminate/reorder it.1788if (DeadI->isAtomic()) {1789if (auto *LI = dyn_cast<LoadInst>(DeadI))1790return isStrongerThanMonotonic(LI->getOrdering());1791if (auto *SI = dyn_cast<StoreInst>(DeadI))1792return isStrongerThanMonotonic(SI->getOrdering());1793if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))1794return isStrongerThanMonotonic(ARMW->getOrdering());1795if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))1796return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||1797isStrongerThanMonotonic(CmpXchg->getFailureOrdering());1798llvm_unreachable("other instructions should be skipped in MemorySSA");1799}1800return false;1801}18021803/// Eliminate writes to objects that are not visible in the caller and are not1804/// accessed before returning from the function.1805bool eliminateDeadWritesAtEndOfFunction() {1806bool MadeChange = false;1807LLVM_DEBUG(1808dbgs()1809<< "Trying to eliminate MemoryDefs at the end of the function\n");1810do {1811ShouldIterateEndOfFunctionDSE = false;1812for (MemoryDef *Def : llvm::reverse(MemDefs)) {1813if (SkipStores.contains(Def))1814continue;18151816Instruction *DefI = Def->getMemoryInst();1817auto DefLoc = getLocForWrite(DefI);1818if (!DefLoc || !isRemovable(DefI)) {1819LLVM_DEBUG(dbgs() << " ... could not get location for write or "1820"instruction not removable.\n");1821continue;1822}18231824// NOTE: Currently eliminating writes at the end of a function is1825// limited to MemoryDefs with a single underlying object, to save1826// compile-time. In practice it appears the case with multiple1827// underlying objects is very uncommon. If it turns out to be important,1828// we can use getUnderlyingObjects here instead.1829const Value *UO = getUnderlyingObject(DefLoc->Ptr);1830if (!isInvisibleToCallerAfterRet(UO))1831continue;18321833if (isWriteAtEndOfFunction(Def, *DefLoc)) {1834// See through pointer-to-pointer bitcasts1835LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "1836"of the function\n");1837deleteDeadInstruction(DefI);1838++NumFastStores;1839MadeChange = true;1840}1841}1842} while (ShouldIterateEndOfFunctionDSE);1843return MadeChange;1844}18451846/// If we have a zero initializing memset following a call to malloc,1847/// try folding it into a call to calloc.1848bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {1849Instruction *DefI = Def->getMemoryInst();1850MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);1851if (!MemSet)1852// TODO: Could handle zero store to small allocation as well.1853return false;1854Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());1855if (!StoredConstant || !StoredConstant->isNullValue())1856return false;18571858if (!isRemovable(DefI))1859// The memset might be volatile..1860return false;18611862if (F.hasFnAttribute(Attribute::SanitizeMemory) ||1863F.hasFnAttribute(Attribute::SanitizeAddress) ||1864F.hasFnAttribute(Attribute::SanitizeHWAddress) ||1865F.getName() == "calloc")1866return false;1867auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));1868if (!Malloc)1869return false;1870auto *InnerCallee = Malloc->getCalledFunction();1871if (!InnerCallee)1872return false;1873LibFunc Func;1874if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||1875Func != LibFunc_malloc)1876return false;1877// Gracefully handle malloc with unexpected memory attributes.1878auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));1879if (!MallocDef)1880return false;18811882auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {1883// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end1884// of malloc block1885auto *MallocBB = Malloc->getParent(),1886*MemsetBB = Memset->getParent();1887if (MallocBB == MemsetBB)1888return true;1889auto *Ptr = Memset->getArgOperand(0);1890auto *TI = MallocBB->getTerminator();1891ICmpInst::Predicate Pred;1892BasicBlock *TrueBB, *FalseBB;1893if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,1894FalseBB)))1895return false;1896if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)1897return false;1898return true;1899};19001901if (Malloc->getOperand(0) != MemSet->getLength())1902return false;1903if (!shouldCreateCalloc(Malloc, MemSet) ||1904!DT.dominates(Malloc, MemSet) ||1905!memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))1906return false;1907IRBuilder<> IRB(Malloc);1908Type *SizeTTy = Malloc->getArgOperand(0)->getType();1909auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),1910Malloc->getArgOperand(0), IRB, TLI);1911if (!Calloc)1912return false;19131914MemorySSAUpdater Updater(&MSSA);1915auto *NewAccess =1916Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,1917MallocDef);1918auto *NewAccessMD = cast<MemoryDef>(NewAccess);1919Updater.insertDef(NewAccessMD, /*RenameUses=*/true);1920Malloc->replaceAllUsesWith(Calloc);1921deleteDeadInstruction(Malloc);1922return true;1923}19241925// Check if there is a dominating condition, that implies that the value1926// being stored in a ptr is already present in the ptr.1927bool dominatingConditionImpliesValue(MemoryDef *Def) {1928auto *StoreI = cast<StoreInst>(Def->getMemoryInst());1929BasicBlock *StoreBB = StoreI->getParent();1930Value *StorePtr = StoreI->getPointerOperand();1931Value *StoreVal = StoreI->getValueOperand();19321933DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();1934if (!IDom)1935return false;19361937auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());1938if (!BI || !BI->isConditional())1939return false;19401941// In case both blocks are the same, it is not possible to determine1942// if optimization is possible. (We would not want to optimize a store1943// in the FalseBB if condition is true and vice versa.)1944if (BI->getSuccessor(0) == BI->getSuccessor(1))1945return false;19461947Instruction *ICmpL;1948ICmpInst::Predicate Pred;1949if (!match(BI->getCondition(),1950m_c_ICmp(Pred,1951m_CombineAnd(m_Load(m_Specific(StorePtr)),1952m_Instruction(ICmpL)),1953m_Specific(StoreVal))) ||1954!ICmpInst::isEquality(Pred))1955return false;19561957// In case the else blocks also branches to the if block or the other way1958// around it is not possible to determine if the optimization is possible.1959if (Pred == ICmpInst::ICMP_EQ &&1960!DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),1961StoreBB))1962return false;19631964if (Pred == ICmpInst::ICMP_NE &&1965!DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),1966StoreBB))1967return false;19681969MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);1970MemoryAccess *ClobAcc =1971MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);19721973return MSSA.dominates(ClobAcc, LoadAcc);1974}19751976/// \returns true if \p Def is a no-op store, either because it1977/// directly stores back a loaded value or stores zero to a calloced object.1978bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {1979Instruction *DefI = Def->getMemoryInst();1980StoreInst *Store = dyn_cast<StoreInst>(DefI);1981MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);1982Constant *StoredConstant = nullptr;1983if (Store)1984StoredConstant = dyn_cast<Constant>(Store->getOperand(0));1985else if (MemSet)1986StoredConstant = dyn_cast<Constant>(MemSet->getValue());1987else1988return false;19891990if (!isRemovable(DefI))1991return false;19921993if (StoredConstant) {1994Constant *InitC =1995getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());1996// If the clobbering access is LiveOnEntry, no instructions between them1997// can modify the memory location.1998if (InitC && InitC == StoredConstant)1999return MSSA.isLiveOnEntryDef(2000MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));2001}20022003if (!Store)2004return false;20052006if (dominatingConditionImpliesValue(Def))2007return true;20082009if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {2010if (LoadI->getPointerOperand() == Store->getOperand(1)) {2011// Get the defining access for the load.2012auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();2013// Fast path: the defining accesses are the same.2014if (LoadAccess == Def->getDefiningAccess())2015return true;20162017// Look through phi accesses. Recursively scan all phi accesses by2018// adding them to a worklist. Bail when we run into a memory def that2019// does not match LoadAccess.2020SetVector<MemoryAccess *> ToCheck;2021MemoryAccess *Current =2022MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);2023// We don't want to bail when we run into the store memory def. But,2024// the phi access may point to it. So, pretend like we've already2025// checked it.2026ToCheck.insert(Def);2027ToCheck.insert(Current);2028// Start at current (1) to simulate already having checked Def.2029for (unsigned I = 1; I < ToCheck.size(); ++I) {2030Current = ToCheck[I];2031if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {2032// Check all the operands.2033for (auto &Use : PhiAccess->incoming_values())2034ToCheck.insert(cast<MemoryAccess>(&Use));2035continue;2036}20372038// If we found a memory def, bail. This happens when we have an2039// unrelated write in between an otherwise noop store.2040assert(isa<MemoryDef>(Current) &&2041"Only MemoryDefs should reach here.");2042// TODO: Skip no alias MemoryDefs that have no aliasing reads.2043// We are searching for the definition of the store's destination.2044// So, if that is the same definition as the load, then this is a2045// noop. Otherwise, fail.2046if (LoadAccess != Current)2047return false;2048}2049return true;2050}2051}20522053return false;2054}20552056bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {2057bool Changed = false;2058for (auto OI : IOL) {2059Instruction *DeadI = OI.first;2060MemoryLocation Loc = *getLocForWrite(DeadI);2061assert(isRemovable(DeadI) && "Expect only removable instruction");20622063const Value *Ptr = Loc.Ptr->stripPointerCasts();2064int64_t DeadStart = 0;2065uint64_t DeadSize = Loc.Size.getValue();2066GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);2067OverlapIntervalsTy &IntervalMap = OI.second;2068Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);2069if (IntervalMap.empty())2070continue;2071Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);2072}2073return Changed;2074}20752076/// Eliminates writes to locations where the value that is being written2077/// is already stored at the same location.2078bool eliminateRedundantStoresOfExistingValues() {2079bool MadeChange = false;2080LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "2081"already existing value\n");2082for (auto *Def : MemDefs) {2083if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))2084continue;20852086Instruction *DefInst = Def->getMemoryInst();2087auto MaybeDefLoc = getLocForWrite(DefInst);2088if (!MaybeDefLoc || !isRemovable(DefInst))2089continue;20902091MemoryDef *UpperDef;2092// To conserve compile-time, we avoid walking to the next clobbering def.2093// Instead, we just try to get the optimized access, if it exists. DSE2094// will try to optimize defs during the earlier traversal.2095if (Def->isOptimized())2096UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());2097else2098UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());2099if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))2100continue;21012102Instruction *UpperInst = UpperDef->getMemoryInst();2103auto IsRedundantStore = [&]() {2104if (DefInst->isIdenticalTo(UpperInst))2105return true;2106if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {2107if (auto *SI = dyn_cast<StoreInst>(DefInst)) {2108// MemSetInst must have a write location.2109auto UpperLoc = getLocForWrite(UpperInst);2110if (!UpperLoc)2111return false;2112int64_t InstWriteOffset = 0;2113int64_t DepWriteOffset = 0;2114auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,2115InstWriteOffset, DepWriteOffset);2116Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);2117return StoredByte && StoredByte == MemSetI->getOperand(1) &&2118OR == OW_Complete;2119}2120}2121return false;2122};21232124if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))2125continue;2126LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst2127<< '\n');2128deleteDeadInstruction(DefInst);2129NumRedundantStores++;2130MadeChange = true;2131}2132return MadeChange;2133}2134};21352136static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,2137DominatorTree &DT, PostDominatorTree &PDT,2138const TargetLibraryInfo &TLI,2139const LoopInfo &LI) {2140bool MadeChange = false;21412142DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);2143// For each store:2144for (unsigned I = 0; I < State.MemDefs.size(); I++) {2145MemoryDef *KillingDef = State.MemDefs[I];2146if (State.SkipStores.count(KillingDef))2147continue;2148Instruction *KillingI = KillingDef->getMemoryInst();21492150std::optional<MemoryLocation> MaybeKillingLoc;2151if (State.isMemTerminatorInst(KillingI)) {2152if (auto KillingLoc = State.getLocForTerminator(KillingI))2153MaybeKillingLoc = KillingLoc->first;2154} else {2155MaybeKillingLoc = State.getLocForWrite(KillingI);2156}21572158if (!MaybeKillingLoc) {2159LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "2160<< *KillingI << "\n");2161continue;2162}2163MemoryLocation KillingLoc = *MaybeKillingLoc;2164assert(KillingLoc.Ptr && "KillingLoc should not be null");2165const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);2166LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "2167<< *KillingDef << " (" << *KillingI << ")\n");21682169unsigned ScanLimit = MemorySSAScanLimit;2170unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;2171unsigned PartialLimit = MemorySSAPartialStoreLimit;2172// Worklist of MemoryAccesses that may be killed by KillingDef.2173SmallSetVector<MemoryAccess *, 8> ToCheck;2174// Track MemoryAccesses that have been deleted in the loop below, so we can2175// skip them. Don't use SkipStores for this, which may contain reused2176// MemoryAccess addresses.2177SmallPtrSet<MemoryAccess *, 8> Deleted;2178[[maybe_unused]] unsigned OrigNumSkipStores = State.SkipStores.size();2179ToCheck.insert(KillingDef->getDefiningAccess());21802181bool Shortend = false;2182bool IsMemTerm = State.isMemTerminatorInst(KillingI);2183// Check if MemoryAccesses in the worklist are killed by KillingDef.2184for (unsigned I = 0; I < ToCheck.size(); I++) {2185MemoryAccess *Current = ToCheck[I];2186if (Deleted.contains(Current))2187continue;21882189std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(2190KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,2191WalkerStepLimit, IsMemTerm, PartialLimit);21922193if (!MaybeDeadAccess) {2194LLVM_DEBUG(dbgs() << " finished walk\n");2195continue;2196}21972198MemoryAccess *DeadAccess = *MaybeDeadAccess;2199LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);2200if (isa<MemoryPhi>(DeadAccess)) {2201LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");2202for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {2203MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);2204BasicBlock *IncomingBlock = IncomingAccess->getBlock();2205BasicBlock *PhiBlock = DeadAccess->getBlock();22062207// We only consider incoming MemoryAccesses that come before the2208// MemoryPhi. Otherwise we could discover candidates that do not2209// strictly dominate our starting def.2210if (State.PostOrderNumbers[IncomingBlock] >2211State.PostOrderNumbers[PhiBlock])2212ToCheck.insert(IncomingAccess);2213}2214continue;2215}2216auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);2217Instruction *DeadI = DeadDefAccess->getMemoryInst();2218LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");2219ToCheck.insert(DeadDefAccess->getDefiningAccess());2220NumGetDomMemoryDefPassed++;22212222if (!DebugCounter::shouldExecute(MemorySSACounter))2223continue;22242225MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);22262227if (IsMemTerm) {2228const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);2229if (KillingUndObj != DeadUndObj)2230continue;2231LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI2232<< "\n KILLER: " << *KillingI << '\n');2233State.deleteDeadInstruction(DeadI, &Deleted);2234++NumFastStores;2235MadeChange = true;2236} else {2237// Check if DeadI overwrites KillingI.2238int64_t KillingOffset = 0;2239int64_t DeadOffset = 0;2240OverwriteResult OR = State.isOverwrite(2241KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);2242if (OR == OW_MaybePartial) {2243auto Iter = State.IOLs.insert(2244std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(2245DeadI->getParent(), InstOverlapIntervalsTy()));2246auto &IOL = Iter.first->second;2247OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,2248DeadOffset, DeadI, IOL);2249}22502251if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {2252auto *DeadSI = dyn_cast<StoreInst>(DeadI);2253auto *KillingSI = dyn_cast<StoreInst>(KillingI);2254// We are re-using tryToMergePartialOverlappingStores, which requires2255// DeadSI to dominate KillingSI.2256// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.2257if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {2258if (Constant *Merged = tryToMergePartialOverlappingStores(2259KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,2260State.BatchAA, &DT)) {22612262// Update stored value of earlier store to merged constant.2263DeadSI->setOperand(0, Merged);2264++NumModifiedStores;2265MadeChange = true;22662267Shortend = true;2268// Remove killing store and remove any outstanding overlap2269// intervals for the updated store.2270State.deleteDeadInstruction(KillingSI, &Deleted);2271auto I = State.IOLs.find(DeadSI->getParent());2272if (I != State.IOLs.end())2273I->second.erase(DeadSI);2274break;2275}2276}2277}22782279if (OR == OW_Complete) {2280LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI2281<< "\n KILLER: " << *KillingI << '\n');2282State.deleteDeadInstruction(DeadI, &Deleted);2283++NumFastStores;2284MadeChange = true;2285}2286}2287}22882289assert(State.SkipStores.size() - OrigNumSkipStores == Deleted.size() &&2290"SkipStores and Deleted out of sync?");22912292// Check if the store is a no-op.2293if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {2294LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI2295<< '\n');2296State.deleteDeadInstruction(KillingI);2297NumRedundantStores++;2298MadeChange = true;2299continue;2300}23012302// Can we form a calloc from a memset/malloc pair?2303if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {2304LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"2305<< " DEAD: " << *KillingI << '\n');2306State.deleteDeadInstruction(KillingI);2307MadeChange = true;2308continue;2309}2310}23112312if (EnablePartialOverwriteTracking)2313for (auto &KV : State.IOLs)2314MadeChange |= State.removePartiallyOverlappedStores(KV.second);23152316MadeChange |= State.eliminateRedundantStoresOfExistingValues();2317MadeChange |= State.eliminateDeadWritesAtEndOfFunction();23182319while (!State.ToRemove.empty()) {2320Instruction *DeadInst = State.ToRemove.pop_back_val();2321DeadInst->eraseFromParent();2322}23232324return MadeChange;2325}2326} // end anonymous namespace23272328//===----------------------------------------------------------------------===//2329// DSE Pass2330//===----------------------------------------------------------------------===//2331PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {2332AliasAnalysis &AA = AM.getResult<AAManager>(F);2333const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);2334DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);2335MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();2336PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);2337LoopInfo &LI = AM.getResult<LoopAnalysis>(F);23382339bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);23402341#ifdef LLVM_ENABLE_STATS2342if (AreStatisticsEnabled())2343for (auto &I : instructions(F))2344NumRemainingStores += isa<StoreInst>(&I);2345#endif23462347if (!Changed)2348return PreservedAnalyses::all();23492350PreservedAnalyses PA;2351PA.preserveSet<CFGAnalyses>();2352PA.preserve<MemorySSAAnalysis>();2353PA.preserve<LoopAnalysis>();2354return PA;2355}235623572358