Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
35294 views
1
//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// The code below implements dead store elimination using MemorySSA. It uses
10
// the following general approach: given a MemoryDef, walk upwards to find
11
// clobbering MemoryDefs that may be killed by the starting def. Then check
12
// that there are no uses that may read the location of the original MemoryDef
13
// in between both MemoryDefs. A bit more concretely:
14
//
15
// For all MemoryDefs StartDef:
16
// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17
// upwards.
18
// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19
// checking all uses starting at MaybeDeadAccess and walking until we see
20
// StartDef.
21
// 3. For each found CurrentDef, check that:
22
// 1. There are no barrier instructions between CurrentDef and StartDef (like
23
// throws or stores with ordering constraints).
24
// 2. StartDef is executed whenever CurrentDef is executed.
25
// 3. StartDef completely overwrites CurrentDef.
26
// 4. Erase CurrentDef from the function and MemorySSA.
27
//
28
//===----------------------------------------------------------------------===//
29
30
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31
#include "llvm/ADT/APInt.h"
32
#include "llvm/ADT/DenseMap.h"
33
#include "llvm/ADT/MapVector.h"
34
#include "llvm/ADT/PostOrderIterator.h"
35
#include "llvm/ADT/SetVector.h"
36
#include "llvm/ADT/SmallPtrSet.h"
37
#include "llvm/ADT/SmallVector.h"
38
#include "llvm/ADT/Statistic.h"
39
#include "llvm/ADT/StringRef.h"
40
#include "llvm/Analysis/AliasAnalysis.h"
41
#include "llvm/Analysis/CaptureTracking.h"
42
#include "llvm/Analysis/GlobalsModRef.h"
43
#include "llvm/Analysis/LoopInfo.h"
44
#include "llvm/Analysis/MemoryBuiltins.h"
45
#include "llvm/Analysis/MemoryLocation.h"
46
#include "llvm/Analysis/MemorySSA.h"
47
#include "llvm/Analysis/MemorySSAUpdater.h"
48
#include "llvm/Analysis/MustExecute.h"
49
#include "llvm/Analysis/PostDominators.h"
50
#include "llvm/Analysis/TargetLibraryInfo.h"
51
#include "llvm/Analysis/ValueTracking.h"
52
#include "llvm/IR/Argument.h"
53
#include "llvm/IR/BasicBlock.h"
54
#include "llvm/IR/Constant.h"
55
#include "llvm/IR/Constants.h"
56
#include "llvm/IR/DataLayout.h"
57
#include "llvm/IR/DebugInfo.h"
58
#include "llvm/IR/Dominators.h"
59
#include "llvm/IR/Function.h"
60
#include "llvm/IR/IRBuilder.h"
61
#include "llvm/IR/InstIterator.h"
62
#include "llvm/IR/InstrTypes.h"
63
#include "llvm/IR/Instruction.h"
64
#include "llvm/IR/Instructions.h"
65
#include "llvm/IR/IntrinsicInst.h"
66
#include "llvm/IR/Module.h"
67
#include "llvm/IR/PassManager.h"
68
#include "llvm/IR/PatternMatch.h"
69
#include "llvm/IR/Value.h"
70
#include "llvm/Support/Casting.h"
71
#include "llvm/Support/CommandLine.h"
72
#include "llvm/Support/Debug.h"
73
#include "llvm/Support/DebugCounter.h"
74
#include "llvm/Support/ErrorHandling.h"
75
#include "llvm/Support/raw_ostream.h"
76
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
77
#include "llvm/Transforms/Utils/BuildLibCalls.h"
78
#include "llvm/Transforms/Utils/Local.h"
79
#include <algorithm>
80
#include <cassert>
81
#include <cstdint>
82
#include <iterator>
83
#include <map>
84
#include <optional>
85
#include <utility>
86
87
using namespace llvm;
88
using namespace PatternMatch;
89
90
#define DEBUG_TYPE "dse"
91
92
STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
93
STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
94
STATISTIC(NumFastStores, "Number of stores deleted");
95
STATISTIC(NumFastOther, "Number of other instrs removed");
96
STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
97
STATISTIC(NumModifiedStores, "Number of stores modified");
98
STATISTIC(NumCFGChecks, "Number of stores modified");
99
STATISTIC(NumCFGTries, "Number of stores modified");
100
STATISTIC(NumCFGSuccess, "Number of stores modified");
101
STATISTIC(NumGetDomMemoryDefPassed,
102
"Number of times a valid candidate is returned from getDomMemoryDef");
103
STATISTIC(NumDomMemDefChecks,
104
"Number iterations check for reads in getDomMemoryDef");
105
106
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
107
"Controls which MemoryDefs are eliminated.");
108
109
static cl::opt<bool>
110
EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
111
cl::init(true), cl::Hidden,
112
cl::desc("Enable partial-overwrite tracking in DSE"));
113
114
static cl::opt<bool>
115
EnablePartialStoreMerging("enable-dse-partial-store-merging",
116
cl::init(true), cl::Hidden,
117
cl::desc("Enable partial store merging in DSE"));
118
119
static cl::opt<unsigned>
120
MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
121
cl::desc("The number of memory instructions to scan for "
122
"dead store elimination (default = 150)"));
123
static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
124
"dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
125
cl::desc("The maximum number of steps while walking upwards to find "
126
"MemoryDefs that may be killed (default = 90)"));
127
128
static cl::opt<unsigned> MemorySSAPartialStoreLimit(
129
"dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
130
cl::desc("The maximum number candidates that only partially overwrite the "
131
"killing MemoryDef to consider"
132
" (default = 5)"));
133
134
static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
135
"dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
136
cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
137
"other stores per basic block (default = 5000)"));
138
139
static cl::opt<unsigned> MemorySSASameBBStepCost(
140
"dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
141
cl::desc(
142
"The cost of a step in the same basic block as the killing MemoryDef"
143
"(default = 1)"));
144
145
static cl::opt<unsigned>
146
MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
147
cl::Hidden,
148
cl::desc("The cost of a step in a different basic "
149
"block than the killing MemoryDef"
150
"(default = 5)"));
151
152
static cl::opt<unsigned> MemorySSAPathCheckLimit(
153
"dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
154
cl::desc("The maximum number of blocks to check when trying to prove that "
155
"all paths to an exit go through a killing block (default = 50)"));
156
157
// This flags allows or disallows DSE to optimize MemorySSA during its
158
// traversal. Note that DSE optimizing MemorySSA may impact other passes
159
// downstream of the DSE invocation and can lead to issues not being
160
// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
161
// those cases, the flag can be used to check if DSE's MemorySSA optimizations
162
// impact follow-up passes.
163
static cl::opt<bool>
164
OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
165
cl::desc("Allow DSE to optimize memory accesses."));
166
167
//===----------------------------------------------------------------------===//
168
// Helper functions
169
//===----------------------------------------------------------------------===//
170
using OverlapIntervalsTy = std::map<int64_t, int64_t>;
171
using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
172
173
/// Returns true if the end of this instruction can be safely shortened in
174
/// length.
175
static bool isShortenableAtTheEnd(Instruction *I) {
176
// Don't shorten stores for now
177
if (isa<StoreInst>(I))
178
return false;
179
180
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
181
switch (II->getIntrinsicID()) {
182
default: return false;
183
case Intrinsic::memset:
184
case Intrinsic::memcpy:
185
case Intrinsic::memcpy_element_unordered_atomic:
186
case Intrinsic::memset_element_unordered_atomic:
187
// Do shorten memory intrinsics.
188
// FIXME: Add memmove if it's also safe to transform.
189
return true;
190
}
191
}
192
193
// Don't shorten libcalls calls for now.
194
195
return false;
196
}
197
198
/// Returns true if the beginning of this instruction can be safely shortened
199
/// in length.
200
static bool isShortenableAtTheBeginning(Instruction *I) {
201
// FIXME: Handle only memset for now. Supporting memcpy/memmove should be
202
// easily done by offsetting the source address.
203
return isa<AnyMemSetInst>(I);
204
}
205
206
static std::optional<TypeSize> getPointerSize(const Value *V,
207
const DataLayout &DL,
208
const TargetLibraryInfo &TLI,
209
const Function *F) {
210
uint64_t Size;
211
ObjectSizeOpts Opts;
212
Opts.NullIsUnknownSize = NullPointerIsDefined(F);
213
214
if (getObjectSize(V, Size, DL, &TLI, Opts))
215
return TypeSize::getFixed(Size);
216
return std::nullopt;
217
}
218
219
namespace {
220
221
enum OverwriteResult {
222
OW_Begin,
223
OW_Complete,
224
OW_End,
225
OW_PartialEarlierWithFullLater,
226
OW_MaybePartial,
227
OW_None,
228
OW_Unknown
229
};
230
231
} // end anonymous namespace
232
233
/// Check if two instruction are masked stores that completely
234
/// overwrite one another. More specifically, \p KillingI has to
235
/// overwrite \p DeadI.
236
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
237
const Instruction *DeadI,
238
BatchAAResults &AA) {
239
const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
240
const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
241
if (KillingII == nullptr || DeadII == nullptr)
242
return OW_Unknown;
243
if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
244
return OW_Unknown;
245
if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
246
// Type size.
247
VectorType *KillingTy =
248
cast<VectorType>(KillingII->getArgOperand(0)->getType());
249
VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
250
if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
251
return OW_Unknown;
252
// Element count.
253
if (KillingTy->getElementCount() != DeadTy->getElementCount())
254
return OW_Unknown;
255
// Pointers.
256
Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
257
Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
258
if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
259
return OW_Unknown;
260
// Masks.
261
// TODO: check that KillingII's mask is a superset of the DeadII's mask.
262
if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
263
return OW_Unknown;
264
return OW_Complete;
265
}
266
return OW_Unknown;
267
}
268
269
/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
270
/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
271
/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
272
/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
273
/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
274
/// overwritten by a killing (smaller) store which doesn't write outside the big
275
/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
276
/// NOTE: This function must only be called if both \p KillingLoc and \p
277
/// DeadLoc belong to the same underlying object with valid \p KillingOff and
278
/// \p DeadOff.
279
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
280
const MemoryLocation &DeadLoc,
281
int64_t KillingOff, int64_t DeadOff,
282
Instruction *DeadI,
283
InstOverlapIntervalsTy &IOL) {
284
const uint64_t KillingSize = KillingLoc.Size.getValue();
285
const uint64_t DeadSize = DeadLoc.Size.getValue();
286
// We may now overlap, although the overlap is not complete. There might also
287
// be other incomplete overlaps, and together, they might cover the complete
288
// dead store.
289
// Note: The correctness of this logic depends on the fact that this function
290
// is not even called providing DepWrite when there are any intervening reads.
291
if (EnablePartialOverwriteTracking &&
292
KillingOff < int64_t(DeadOff + DeadSize) &&
293
int64_t(KillingOff + KillingSize) >= DeadOff) {
294
295
// Insert our part of the overlap into the map.
296
auto &IM = IOL[DeadI];
297
LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
298
<< int64_t(DeadOff + DeadSize) << ") KillingLoc ["
299
<< KillingOff << ", " << int64_t(KillingOff + KillingSize)
300
<< ")\n");
301
302
// Make sure that we only insert non-overlapping intervals and combine
303
// adjacent intervals. The intervals are stored in the map with the ending
304
// offset as the key (in the half-open sense) and the starting offset as
305
// the value.
306
int64_t KillingIntStart = KillingOff;
307
int64_t KillingIntEnd = KillingOff + KillingSize;
308
309
// Find any intervals ending at, or after, KillingIntStart which start
310
// before KillingIntEnd.
311
auto ILI = IM.lower_bound(KillingIntStart);
312
if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
313
// This existing interval is overlapped with the current store somewhere
314
// in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
315
// intervals and adjusting our start and end.
316
KillingIntStart = std::min(KillingIntStart, ILI->second);
317
KillingIntEnd = std::max(KillingIntEnd, ILI->first);
318
ILI = IM.erase(ILI);
319
320
// Continue erasing and adjusting our end in case other previous
321
// intervals are also overlapped with the current store.
322
//
323
// |--- dead 1 ---| |--- dead 2 ---|
324
// |------- killing---------|
325
//
326
while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
327
assert(ILI->second > KillingIntStart && "Unexpected interval");
328
KillingIntEnd = std::max(KillingIntEnd, ILI->first);
329
ILI = IM.erase(ILI);
330
}
331
}
332
333
IM[KillingIntEnd] = KillingIntStart;
334
335
ILI = IM.begin();
336
if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
337
LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
338
<< DeadOff << ", " << int64_t(DeadOff + DeadSize)
339
<< ") Composite KillingLoc [" << ILI->second << ", "
340
<< ILI->first << ")\n");
341
++NumCompletePartials;
342
return OW_Complete;
343
}
344
}
345
346
// Check for a dead store which writes to all the memory locations that
347
// the killing store writes to.
348
if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
349
int64_t(DeadOff + DeadSize) > KillingOff &&
350
uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
351
LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
352
<< ", " << int64_t(DeadOff + DeadSize)
353
<< ") by a killing store [" << KillingOff << ", "
354
<< int64_t(KillingOff + KillingSize) << ")\n");
355
// TODO: Maybe come up with a better name?
356
return OW_PartialEarlierWithFullLater;
357
}
358
359
// Another interesting case is if the killing store overwrites the end of the
360
// dead store.
361
//
362
// |--dead--|
363
// |-- killing --|
364
//
365
// In this case we may want to trim the size of dead store to avoid
366
// generating stores to addresses which will definitely be overwritten killing
367
// store.
368
if (!EnablePartialOverwriteTracking &&
369
(KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
370
int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
371
return OW_End;
372
373
// Finally, we also need to check if the killing store overwrites the
374
// beginning of the dead store.
375
//
376
// |--dead--|
377
// |-- killing --|
378
//
379
// In this case we may want to move the destination address and trim the size
380
// of dead store to avoid generating stores to addresses which will definitely
381
// be overwritten killing store.
382
if (!EnablePartialOverwriteTracking &&
383
(KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
384
assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
385
"Expect to be handled as OW_Complete");
386
return OW_Begin;
387
}
388
// Otherwise, they don't completely overlap.
389
return OW_Unknown;
390
}
391
392
/// Returns true if the memory which is accessed by the second instruction is not
393
/// modified between the first and the second instruction.
394
/// Precondition: Second instruction must be dominated by the first
395
/// instruction.
396
static bool
397
memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
398
BatchAAResults &AA, const DataLayout &DL,
399
DominatorTree *DT) {
400
// Do a backwards scan through the CFG from SecondI to FirstI. Look for
401
// instructions which can modify the memory location accessed by SecondI.
402
//
403
// While doing the walk keep track of the address to check. It might be
404
// different in different basic blocks due to PHI translation.
405
using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
406
SmallVector<BlockAddressPair, 16> WorkList;
407
// Keep track of the address we visited each block with. Bail out if we
408
// visit a block with different addresses.
409
DenseMap<BasicBlock *, Value *> Visited;
410
411
BasicBlock::iterator FirstBBI(FirstI);
412
++FirstBBI;
413
BasicBlock::iterator SecondBBI(SecondI);
414
BasicBlock *FirstBB = FirstI->getParent();
415
BasicBlock *SecondBB = SecondI->getParent();
416
MemoryLocation MemLoc;
417
if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
418
MemLoc = MemoryLocation::getForDest(MemSet);
419
else
420
MemLoc = MemoryLocation::get(SecondI);
421
422
auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
423
424
// Start checking the SecondBB.
425
WorkList.push_back(
426
std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
427
bool isFirstBlock = true;
428
429
// Check all blocks going backward until we reach the FirstBB.
430
while (!WorkList.empty()) {
431
BlockAddressPair Current = WorkList.pop_back_val();
432
BasicBlock *B = Current.first;
433
PHITransAddr &Addr = Current.second;
434
Value *Ptr = Addr.getAddr();
435
436
// Ignore instructions before FirstI if this is the FirstBB.
437
BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
438
439
BasicBlock::iterator EI;
440
if (isFirstBlock) {
441
// Ignore instructions after SecondI if this is the first visit of SecondBB.
442
assert(B == SecondBB && "first block is not the store block");
443
EI = SecondBBI;
444
isFirstBlock = false;
445
} else {
446
// It's not SecondBB or (in case of a loop) the second visit of SecondBB.
447
// In this case we also have to look at instructions after SecondI.
448
EI = B->end();
449
}
450
for (; BI != EI; ++BI) {
451
Instruction *I = &*BI;
452
if (I->mayWriteToMemory() && I != SecondI)
453
if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
454
return false;
455
}
456
if (B != FirstBB) {
457
assert(B != &FirstBB->getParent()->getEntryBlock() &&
458
"Should not hit the entry block because SI must be dominated by LI");
459
for (BasicBlock *Pred : predecessors(B)) {
460
PHITransAddr PredAddr = Addr;
461
if (PredAddr.needsPHITranslationFromBlock(B)) {
462
if (!PredAddr.isPotentiallyPHITranslatable())
463
return false;
464
if (!PredAddr.translateValue(B, Pred, DT, false))
465
return false;
466
}
467
Value *TranslatedPtr = PredAddr.getAddr();
468
auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
469
if (!Inserted.second) {
470
// We already visited this block before. If it was with a different
471
// address - bail out!
472
if (TranslatedPtr != Inserted.first->second)
473
return false;
474
// ... otherwise just skip it.
475
continue;
476
}
477
WorkList.push_back(std::make_pair(Pred, PredAddr));
478
}
479
}
480
}
481
return true;
482
}
483
484
static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
485
uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
486
uint64_t NewSizeInBits, bool IsOverwriteEnd) {
487
const DataLayout &DL = Inst->getDataLayout();
488
uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
489
uint64_t DeadSliceOffsetInBits =
490
OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
491
auto SetDeadFragExpr = [](auto *Assign,
492
DIExpression::FragmentInfo DeadFragment) {
493
// createFragmentExpression expects an offset relative to the existing
494
// fragment offset if there is one.
495
uint64_t RelativeOffset = DeadFragment.OffsetInBits -
496
Assign->getExpression()
497
->getFragmentInfo()
498
.value_or(DIExpression::FragmentInfo(0, 0))
499
.OffsetInBits;
500
if (auto NewExpr = DIExpression::createFragmentExpression(
501
Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
502
Assign->setExpression(*NewExpr);
503
return;
504
}
505
// Failed to create a fragment expression for this so discard the value,
506
// making this a kill location.
507
auto *Expr = *DIExpression::createFragmentExpression(
508
DIExpression::get(Assign->getContext(), std::nullopt),
509
DeadFragment.OffsetInBits, DeadFragment.SizeInBits);
510
Assign->setExpression(Expr);
511
Assign->setKillLocation();
512
};
513
514
// A DIAssignID to use so that the inserted dbg.assign intrinsics do not
515
// link to any instructions. Created in the loop below (once).
516
DIAssignID *LinkToNothing = nullptr;
517
LLVMContext &Ctx = Inst->getContext();
518
auto GetDeadLink = [&Ctx, &LinkToNothing]() {
519
if (!LinkToNothing)
520
LinkToNothing = DIAssignID::getDistinct(Ctx);
521
return LinkToNothing;
522
};
523
524
// Insert an unlinked dbg.assign intrinsic for the dead fragment after each
525
// overlapping dbg.assign intrinsic. The loop invalidates the iterators
526
// returned by getAssignmentMarkers so save a copy of the markers to iterate
527
// over.
528
auto LinkedRange = at::getAssignmentMarkers(Inst);
529
SmallVector<DbgVariableRecord *> LinkedDVRAssigns =
530
at::getDVRAssignmentMarkers(Inst);
531
SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
532
LinkedRange.end());
533
auto InsertAssignForOverlap = [&](auto *Assign) {
534
std::optional<DIExpression::FragmentInfo> NewFragment;
535
if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
536
DeadSliceSizeInBits, Assign,
537
NewFragment) ||
538
!NewFragment) {
539
// We couldn't calculate the intersecting fragment for some reason. Be
540
// cautious and unlink the whole assignment from the store.
541
Assign->setKillAddress();
542
Assign->setAssignId(GetDeadLink());
543
return;
544
}
545
// No intersect.
546
if (NewFragment->SizeInBits == 0)
547
return;
548
549
// Fragments overlap: insert a new dbg.assign for this dead part.
550
auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
551
NewAssign->insertAfter(Assign);
552
NewAssign->setAssignId(GetDeadLink());
553
if (NewFragment)
554
SetDeadFragExpr(NewAssign, *NewFragment);
555
NewAssign->setKillAddress();
556
};
557
for_each(Linked, InsertAssignForOverlap);
558
for_each(LinkedDVRAssigns, InsertAssignForOverlap);
559
}
560
561
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
562
uint64_t &DeadSize, int64_t KillingStart,
563
uint64_t KillingSize, bool IsOverwriteEnd) {
564
auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
565
Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
566
567
// We assume that memet/memcpy operates in chunks of the "largest" native
568
// type size and aligned on the same value. That means optimal start and size
569
// of memset/memcpy should be modulo of preferred alignment of that type. That
570
// is it there is no any sense in trying to reduce store size any further
571
// since any "extra" stores comes for free anyway.
572
// On the other hand, maximum alignment we can achieve is limited by alignment
573
// of initial store.
574
575
// TODO: Limit maximum alignment by preferred (or abi?) alignment of the
576
// "largest" native type.
577
// Note: What is the proper way to get that value?
578
// Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
579
// PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
580
581
int64_t ToRemoveStart = 0;
582
uint64_t ToRemoveSize = 0;
583
// Compute start and size of the region to remove. Make sure 'PrefAlign' is
584
// maintained on the remaining store.
585
if (IsOverwriteEnd) {
586
// Calculate required adjustment for 'KillingStart' in order to keep
587
// remaining store size aligned on 'PerfAlign'.
588
uint64_t Off =
589
offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
590
ToRemoveStart = KillingStart + Off;
591
if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
592
return false;
593
ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
594
} else {
595
ToRemoveStart = DeadStart;
596
assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
597
"Not overlapping accesses?");
598
ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
599
// Calculate required adjustment for 'ToRemoveSize'in order to keep
600
// start of the remaining store aligned on 'PerfAlign'.
601
uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
602
if (Off != 0) {
603
if (ToRemoveSize <= (PrefAlign.value() - Off))
604
return false;
605
ToRemoveSize -= PrefAlign.value() - Off;
606
}
607
assert(isAligned(PrefAlign, ToRemoveSize) &&
608
"Should preserve selected alignment");
609
}
610
611
assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
612
assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
613
614
uint64_t NewSize = DeadSize - ToRemoveSize;
615
if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
616
// When shortening an atomic memory intrinsic, the newly shortened
617
// length must remain an integer multiple of the element size.
618
const uint32_t ElementSize = AMI->getElementSizeInBytes();
619
if (0 != NewSize % ElementSize)
620
return false;
621
}
622
623
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
624
<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
625
<< "\n KILLER [" << ToRemoveStart << ", "
626
<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
627
628
Value *DeadWriteLength = DeadIntrinsic->getLength();
629
Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
630
DeadIntrinsic->setLength(TrimmedLength);
631
DeadIntrinsic->setDestAlignment(PrefAlign);
632
633
Value *OrigDest = DeadIntrinsic->getRawDest();
634
if (!IsOverwriteEnd) {
635
Value *Indices[1] = {
636
ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
637
Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
638
Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
639
DeadI->getIterator());
640
NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
641
DeadIntrinsic->setDest(NewDestGEP);
642
}
643
644
// Update attached dbg.assign intrinsics. Assume 8-bit byte.
645
shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
646
IsOverwriteEnd);
647
648
// Finally update start and size of dead access.
649
if (!IsOverwriteEnd)
650
DeadStart += ToRemoveSize;
651
DeadSize = NewSize;
652
653
return true;
654
}
655
656
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
657
int64_t &DeadStart, uint64_t &DeadSize) {
658
if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
659
return false;
660
661
OverlapIntervalsTy::iterator OII = --IntervalMap.end();
662
int64_t KillingStart = OII->second;
663
uint64_t KillingSize = OII->first - KillingStart;
664
665
assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
666
667
if (KillingStart > DeadStart &&
668
// Note: "KillingStart - KillingStart" is known to be positive due to
669
// preceding check.
670
(uint64_t)(KillingStart - DeadStart) < DeadSize &&
671
// Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
672
// be non negative due to preceding checks.
673
KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
674
if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
675
true)) {
676
IntervalMap.erase(OII);
677
return true;
678
}
679
}
680
return false;
681
}
682
683
static bool tryToShortenBegin(Instruction *DeadI,
684
OverlapIntervalsTy &IntervalMap,
685
int64_t &DeadStart, uint64_t &DeadSize) {
686
if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
687
return false;
688
689
OverlapIntervalsTy::iterator OII = IntervalMap.begin();
690
int64_t KillingStart = OII->second;
691
uint64_t KillingSize = OII->first - KillingStart;
692
693
assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
694
695
if (KillingStart <= DeadStart &&
696
// Note: "DeadStart - KillingStart" is known to be non negative due to
697
// preceding check.
698
KillingSize > (uint64_t)(DeadStart - KillingStart)) {
699
// Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
700
// be positive due to preceding checks.
701
assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
702
"Should have been handled as OW_Complete");
703
if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
704
false)) {
705
IntervalMap.erase(OII);
706
return true;
707
}
708
}
709
return false;
710
}
711
712
static Constant *
713
tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
714
int64_t KillingOffset, int64_t DeadOffset,
715
const DataLayout &DL, BatchAAResults &AA,
716
DominatorTree *DT) {
717
718
if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
719
DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
720
KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
721
DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
722
memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
723
// If the store we find is:
724
// a) partially overwritten by the store to 'Loc'
725
// b) the killing store is fully contained in the dead one and
726
// c) they both have a constant value
727
// d) none of the two stores need padding
728
// Merge the two stores, replacing the dead store's value with a
729
// merge of both values.
730
// TODO: Deal with other constant types (vectors, etc), and probably
731
// some mem intrinsics (if needed)
732
733
APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
734
APInt KillingValue =
735
cast<ConstantInt>(KillingI->getValueOperand())->getValue();
736
unsigned KillingBits = KillingValue.getBitWidth();
737
assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
738
KillingValue = KillingValue.zext(DeadValue.getBitWidth());
739
740
// Offset of the smaller store inside the larger store
741
unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
742
unsigned LShiftAmount =
743
DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
744
: BitOffsetDiff;
745
APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
746
LShiftAmount + KillingBits);
747
// Clear the bits we'll be replacing, then OR with the smaller
748
// store, shifted appropriately.
749
APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
750
LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
751
<< "\n Killing: " << *KillingI
752
<< "\n Merged Value: " << Merged << '\n');
753
return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
754
}
755
return nullptr;
756
}
757
758
namespace {
759
// Returns true if \p I is an intrinsic that does not read or write memory.
760
bool isNoopIntrinsic(Instruction *I) {
761
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
762
switch (II->getIntrinsicID()) {
763
case Intrinsic::lifetime_start:
764
case Intrinsic::lifetime_end:
765
case Intrinsic::invariant_end:
766
case Intrinsic::launder_invariant_group:
767
case Intrinsic::assume:
768
return true;
769
case Intrinsic::dbg_declare:
770
case Intrinsic::dbg_label:
771
case Intrinsic::dbg_value:
772
llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
773
default:
774
return false;
775
}
776
}
777
return false;
778
}
779
780
// Check if we can ignore \p D for DSE.
781
bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
782
Instruction *DI = D->getMemoryInst();
783
// Calls that only access inaccessible memory cannot read or write any memory
784
// locations we consider for elimination.
785
if (auto *CB = dyn_cast<CallBase>(DI))
786
if (CB->onlyAccessesInaccessibleMemory())
787
return true;
788
789
// We can eliminate stores to locations not visible to the caller across
790
// throwing instructions.
791
if (DI->mayThrow() && !DefVisibleToCaller)
792
return true;
793
794
// We can remove the dead stores, irrespective of the fence and its ordering
795
// (release/acquire/seq_cst). Fences only constraints the ordering of
796
// already visible stores, it does not make a store visible to other
797
// threads. So, skipping over a fence does not change a store from being
798
// dead.
799
if (isa<FenceInst>(DI))
800
return true;
801
802
// Skip intrinsics that do not really read or modify memory.
803
if (isNoopIntrinsic(DI))
804
return true;
805
806
return false;
807
}
808
809
struct DSEState {
810
Function &F;
811
AliasAnalysis &AA;
812
EarliestEscapeInfo EI;
813
814
/// The single BatchAA instance that is used to cache AA queries. It will
815
/// not be invalidated over the whole run. This is safe, because:
816
/// 1. Only memory writes are removed, so the alias cache for memory
817
/// locations remains valid.
818
/// 2. No new instructions are added (only instructions removed), so cached
819
/// information for a deleted value cannot be accessed by a re-used new
820
/// value pointer.
821
BatchAAResults BatchAA;
822
823
MemorySSA &MSSA;
824
DominatorTree &DT;
825
PostDominatorTree &PDT;
826
const TargetLibraryInfo &TLI;
827
const DataLayout &DL;
828
const LoopInfo &LI;
829
830
// Whether the function contains any irreducible control flow, useful for
831
// being accurately able to detect loops.
832
bool ContainsIrreducibleLoops;
833
834
// All MemoryDefs that potentially could kill other MemDefs.
835
SmallVector<MemoryDef *, 64> MemDefs;
836
// Any that should be skipped as they are already deleted
837
SmallPtrSet<MemoryAccess *, 4> SkipStores;
838
// Keep track whether a given object is captured before return or not.
839
DenseMap<const Value *, bool> CapturedBeforeReturn;
840
// Keep track of all of the objects that are invisible to the caller after
841
// the function returns.
842
DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
843
// Keep track of blocks with throwing instructions not modeled in MemorySSA.
844
SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
845
// Post-order numbers for each basic block. Used to figure out if memory
846
// accesses are executed before another access.
847
DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
848
849
/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
850
/// basic block.
851
MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
852
// Check if there are root nodes that are terminated by UnreachableInst.
853
// Those roots pessimize post-dominance queries. If there are such roots,
854
// fall back to CFG scan starting from all non-unreachable roots.
855
bool AnyUnreachableExit;
856
857
// Whether or not we should iterate on removing dead stores at the end of the
858
// function due to removing a store causing a previously captured pointer to
859
// no longer be captured.
860
bool ShouldIterateEndOfFunctionDSE;
861
862
/// Dead instructions to be removed at the end of DSE.
863
SmallVector<Instruction *> ToRemove;
864
865
// Class contains self-reference, make sure it's not copied/moved.
866
DSEState(const DSEState &) = delete;
867
DSEState &operator=(const DSEState &) = delete;
868
869
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
870
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
871
const LoopInfo &LI)
872
: F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
873
PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
874
// Collect blocks with throwing instructions not modeled in MemorySSA and
875
// alloc-like objects.
876
unsigned PO = 0;
877
for (BasicBlock *BB : post_order(&F)) {
878
PostOrderNumbers[BB] = PO++;
879
for (Instruction &I : *BB) {
880
MemoryAccess *MA = MSSA.getMemoryAccess(&I);
881
if (I.mayThrow() && !MA)
882
ThrowingBlocks.insert(I.getParent());
883
884
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
885
if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
886
(getLocForWrite(&I) || isMemTerminatorInst(&I)))
887
MemDefs.push_back(MD);
888
}
889
}
890
891
// Treat byval or inalloca arguments the same as Allocas, stores to them are
892
// dead at the end of the function.
893
for (Argument &AI : F.args())
894
if (AI.hasPassPointeeByValueCopyAttr())
895
InvisibleToCallerAfterRet.insert({&AI, true});
896
897
// Collect whether there is any irreducible control flow in the function.
898
ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
899
900
AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
901
return isa<UnreachableInst>(E->getTerminator());
902
});
903
}
904
905
static void pushMemUses(MemoryAccess *Acc,
906
SmallVectorImpl<MemoryAccess *> &WorkList,
907
SmallPtrSetImpl<MemoryAccess *> &Visited) {
908
for (Use &U : Acc->uses()) {
909
auto *MA = cast<MemoryAccess>(U.getUser());
910
if (Visited.insert(MA).second)
911
WorkList.push_back(MA);
912
}
913
};
914
915
LocationSize strengthenLocationSize(const Instruction *I,
916
LocationSize Size) const {
917
if (auto *CB = dyn_cast<CallBase>(I)) {
918
LibFunc F;
919
if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
920
(F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
921
// Use the precise location size specified by the 3rd argument
922
// for determining KillingI overwrites DeadLoc if it is a memset_chk
923
// instruction. memset_chk will write either the amount specified as 3rd
924
// argument or the function will immediately abort and exit the program.
925
// NOTE: AA may determine NoAlias if it can prove that the access size
926
// is larger than the allocation size due to that being UB. To avoid
927
// returning potentially invalid NoAlias results by AA, limit the use of
928
// the precise location size to isOverwrite.
929
if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
930
return LocationSize::precise(Len->getZExtValue());
931
}
932
}
933
return Size;
934
}
935
936
/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
937
/// KillingI instruction) completely overwrites a store to the 'DeadLoc'
938
/// location (by \p DeadI instruction).
939
/// Return OW_MaybePartial if \p KillingI does not completely overwrite
940
/// \p DeadI, but they both write to the same underlying object. In that
941
/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
942
/// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
943
/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
944
OverwriteResult isOverwrite(const Instruction *KillingI,
945
const Instruction *DeadI,
946
const MemoryLocation &KillingLoc,
947
const MemoryLocation &DeadLoc,
948
int64_t &KillingOff, int64_t &DeadOff) {
949
// AliasAnalysis does not always account for loops. Limit overwrite checks
950
// to dependencies for which we can guarantee they are independent of any
951
// loops they are in.
952
if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
953
return OW_Unknown;
954
955
LocationSize KillingLocSize =
956
strengthenLocationSize(KillingI, KillingLoc.Size);
957
const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
958
const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
959
const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
960
const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
961
962
// Check whether the killing store overwrites the whole object, in which
963
// case the size/offset of the dead store does not matter.
964
if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
965
isIdentifiedObject(KillingUndObj)) {
966
std::optional<TypeSize> KillingUndObjSize =
967
getPointerSize(KillingUndObj, DL, TLI, &F);
968
if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
969
return OW_Complete;
970
}
971
972
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
973
// get imprecise values here, though (except for unknown sizes).
974
if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
975
// In case no constant size is known, try to an IR values for the number
976
// of bytes written and check if they match.
977
const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
978
const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
979
if (KillingMemI && DeadMemI) {
980
const Value *KillingV = KillingMemI->getLength();
981
const Value *DeadV = DeadMemI->getLength();
982
if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
983
return OW_Complete;
984
}
985
986
// Masked stores have imprecise locations, but we can reason about them
987
// to some extent.
988
return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
989
}
990
991
const TypeSize KillingSize = KillingLocSize.getValue();
992
const TypeSize DeadSize = DeadLoc.Size.getValue();
993
// Bail on doing Size comparison which depends on AA for now
994
// TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
995
const bool AnyScalable =
996
DeadSize.isScalable() || KillingLocSize.isScalable();
997
998
if (AnyScalable)
999
return OW_Unknown;
1000
// Query the alias information
1001
AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
1002
1003
// If the start pointers are the same, we just have to compare sizes to see if
1004
// the killing store was larger than the dead store.
1005
if (AAR == AliasResult::MustAlias) {
1006
// Make sure that the KillingSize size is >= the DeadSize size.
1007
if (KillingSize >= DeadSize)
1008
return OW_Complete;
1009
}
1010
1011
// If we hit a partial alias we may have a full overwrite
1012
if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1013
int32_t Off = AAR.getOffset();
1014
if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1015
return OW_Complete;
1016
}
1017
1018
// If we can't resolve the same pointers to the same object, then we can't
1019
// analyze them at all.
1020
if (DeadUndObj != KillingUndObj) {
1021
// Non aliasing stores to different objects don't overlap. Note that
1022
// if the killing store is known to overwrite whole object (out of
1023
// bounds access overwrites whole object as well) then it is assumed to
1024
// completely overwrite any store to the same object even if they don't
1025
// actually alias (see next check).
1026
if (AAR == AliasResult::NoAlias)
1027
return OW_None;
1028
return OW_Unknown;
1029
}
1030
1031
// Okay, we have stores to two completely different pointers. Try to
1032
// decompose the pointer into a "base + constant_offset" form. If the base
1033
// pointers are equal, then we can reason about the two stores.
1034
DeadOff = 0;
1035
KillingOff = 0;
1036
const Value *DeadBasePtr =
1037
GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1038
const Value *KillingBasePtr =
1039
GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1040
1041
// If the base pointers still differ, we have two completely different
1042
// stores.
1043
if (DeadBasePtr != KillingBasePtr)
1044
return OW_Unknown;
1045
1046
// The killing access completely overlaps the dead store if and only if
1047
// both start and end of the dead one is "inside" the killing one:
1048
// |<->|--dead--|<->|
1049
// |-----killing------|
1050
// Accesses may overlap if and only if start of one of them is "inside"
1051
// another one:
1052
// |<->|--dead--|<-------->|
1053
// |-------killing--------|
1054
// OR
1055
// |-------dead-------|
1056
// |<->|---killing---|<----->|
1057
//
1058
// We have to be careful here as *Off is signed while *.Size is unsigned.
1059
1060
// Check if the dead access starts "not before" the killing one.
1061
if (DeadOff >= KillingOff) {
1062
// If the dead access ends "not after" the killing access then the
1063
// dead one is completely overwritten by the killing one.
1064
if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1065
return OW_Complete;
1066
// If start of the dead access is "before" end of the killing access
1067
// then accesses overlap.
1068
else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1069
return OW_MaybePartial;
1070
}
1071
// If start of the killing access is "before" end of the dead access then
1072
// accesses overlap.
1073
else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1074
return OW_MaybePartial;
1075
}
1076
1077
// Can reach here only if accesses are known not to overlap.
1078
return OW_None;
1079
}
1080
1081
bool isInvisibleToCallerAfterRet(const Value *V) {
1082
if (isa<AllocaInst>(V))
1083
return true;
1084
auto I = InvisibleToCallerAfterRet.insert({V, false});
1085
if (I.second) {
1086
if (!isInvisibleToCallerOnUnwind(V)) {
1087
I.first->second = false;
1088
} else if (isNoAliasCall(V)) {
1089
I.first->second = !PointerMayBeCaptured(V, true, false);
1090
}
1091
}
1092
return I.first->second;
1093
}
1094
1095
bool isInvisibleToCallerOnUnwind(const Value *V) {
1096
bool RequiresNoCaptureBeforeUnwind;
1097
if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1098
return false;
1099
if (!RequiresNoCaptureBeforeUnwind)
1100
return true;
1101
1102
auto I = CapturedBeforeReturn.insert({V, true});
1103
if (I.second)
1104
// NOTE: This could be made more precise by PointerMayBeCapturedBefore
1105
// with the killing MemoryDef. But we refrain from doing so for now to
1106
// limit compile-time and this does not cause any changes to the number
1107
// of stores removed on a large test set in practice.
1108
I.first->second = PointerMayBeCaptured(V, false, true);
1109
return !I.first->second;
1110
}
1111
1112
std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1113
if (!I->mayWriteToMemory())
1114
return std::nullopt;
1115
1116
if (auto *CB = dyn_cast<CallBase>(I))
1117
return MemoryLocation::getForDest(CB, TLI);
1118
1119
return MemoryLocation::getOrNone(I);
1120
}
1121
1122
/// Assuming this instruction has a dead analyzable write, can we delete
1123
/// this instruction?
1124
bool isRemovable(Instruction *I) {
1125
assert(getLocForWrite(I) && "Must have analyzable write");
1126
1127
// Don't remove volatile/atomic stores.
1128
if (StoreInst *SI = dyn_cast<StoreInst>(I))
1129
return SI->isUnordered();
1130
1131
if (auto *CB = dyn_cast<CallBase>(I)) {
1132
// Don't remove volatile memory intrinsics.
1133
if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1134
return !MI->isVolatile();
1135
1136
// Never remove dead lifetime intrinsics, e.g. because they are followed
1137
// by a free.
1138
if (CB->isLifetimeStartOrEnd())
1139
return false;
1140
1141
return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1142
!CB->isTerminator();
1143
}
1144
1145
return false;
1146
}
1147
1148
/// Returns true if \p UseInst completely overwrites \p DefLoc
1149
/// (stored by \p DefInst).
1150
bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1151
Instruction *UseInst) {
1152
// UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1153
// MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1154
// MemoryDef.
1155
if (!UseInst->mayWriteToMemory())
1156
return false;
1157
1158
if (auto *CB = dyn_cast<CallBase>(UseInst))
1159
if (CB->onlyAccessesInaccessibleMemory())
1160
return false;
1161
1162
int64_t InstWriteOffset, DepWriteOffset;
1163
if (auto CC = getLocForWrite(UseInst))
1164
return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1165
DepWriteOffset) == OW_Complete;
1166
return false;
1167
}
1168
1169
/// Returns true if \p Def is not read before returning from the function.
1170
bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc) {
1171
LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1172
<< *Def->getMemoryInst()
1173
<< ") is at the end the function \n");
1174
SmallVector<MemoryAccess *, 4> WorkList;
1175
SmallPtrSet<MemoryAccess *, 8> Visited;
1176
1177
pushMemUses(Def, WorkList, Visited);
1178
for (unsigned I = 0; I < WorkList.size(); I++) {
1179
if (WorkList.size() >= MemorySSAScanLimit) {
1180
LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1181
return false;
1182
}
1183
1184
MemoryAccess *UseAccess = WorkList[I];
1185
if (isa<MemoryPhi>(UseAccess)) {
1186
// AliasAnalysis does not account for loops. Limit elimination to
1187
// candidates for which we can guarantee they always store to the same
1188
// memory location.
1189
if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
1190
return false;
1191
1192
pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1193
continue;
1194
}
1195
// TODO: Checking for aliasing is expensive. Consider reducing the amount
1196
// of times this is called and/or caching it.
1197
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1198
if (isReadClobber(DefLoc, UseInst)) {
1199
LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1200
return false;
1201
}
1202
1203
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1204
pushMemUses(UseDef, WorkList, Visited);
1205
}
1206
return true;
1207
}
1208
1209
/// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1210
/// pair with the MemoryLocation terminated by \p I and a boolean flag
1211
/// indicating whether \p I is a free-like call.
1212
std::optional<std::pair<MemoryLocation, bool>>
1213
getLocForTerminator(Instruction *I) const {
1214
uint64_t Len;
1215
Value *Ptr;
1216
if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1217
m_Value(Ptr))))
1218
return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1219
1220
if (auto *CB = dyn_cast<CallBase>(I)) {
1221
if (Value *FreedOp = getFreedOperand(CB, &TLI))
1222
return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1223
}
1224
1225
return std::nullopt;
1226
}
1227
1228
/// Returns true if \p I is a memory terminator instruction like
1229
/// llvm.lifetime.end or free.
1230
bool isMemTerminatorInst(Instruction *I) const {
1231
auto *CB = dyn_cast<CallBase>(I);
1232
return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1233
getFreedOperand(CB, &TLI) != nullptr);
1234
}
1235
1236
/// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1237
/// instruction \p AccessI.
1238
bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1239
Instruction *MaybeTerm) {
1240
std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1241
getLocForTerminator(MaybeTerm);
1242
1243
if (!MaybeTermLoc)
1244
return false;
1245
1246
// If the terminator is a free-like call, all accesses to the underlying
1247
// object can be considered terminated.
1248
if (getUnderlyingObject(Loc.Ptr) !=
1249
getUnderlyingObject(MaybeTermLoc->first.Ptr))
1250
return false;
1251
1252
auto TermLoc = MaybeTermLoc->first;
1253
if (MaybeTermLoc->second) {
1254
const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1255
return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1256
}
1257
int64_t InstWriteOffset = 0;
1258
int64_t DepWriteOffset = 0;
1259
return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1260
DepWriteOffset) == OW_Complete;
1261
}
1262
1263
// Returns true if \p Use may read from \p DefLoc.
1264
bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1265
if (isNoopIntrinsic(UseInst))
1266
return false;
1267
1268
// Monotonic or weaker atomic stores can be re-ordered and do not need to be
1269
// treated as read clobber.
1270
if (auto SI = dyn_cast<StoreInst>(UseInst))
1271
return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1272
1273
if (!UseInst->mayReadFromMemory())
1274
return false;
1275
1276
if (auto *CB = dyn_cast<CallBase>(UseInst))
1277
if (CB->onlyAccessesInaccessibleMemory())
1278
return false;
1279
1280
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1281
}
1282
1283
/// Returns true if a dependency between \p Current and \p KillingDef is
1284
/// guaranteed to be loop invariant for the loops that they are in. Either
1285
/// because they are known to be in the same block, in the same loop level or
1286
/// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1287
/// during execution of the containing function.
1288
bool isGuaranteedLoopIndependent(const Instruction *Current,
1289
const Instruction *KillingDef,
1290
const MemoryLocation &CurrentLoc) {
1291
// If the dependency is within the same block or loop level (being careful
1292
// of irreducible loops), we know that AA will return a valid result for the
1293
// memory dependency. (Both at the function level, outside of any loop,
1294
// would also be valid but we currently disable that to limit compile time).
1295
if (Current->getParent() == KillingDef->getParent())
1296
return true;
1297
const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1298
if (!ContainsIrreducibleLoops && CurrentLI &&
1299
CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1300
return true;
1301
// Otherwise check the memory location is invariant to any loops.
1302
return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1303
}
1304
1305
/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1306
/// loop. In particular, this guarantees that it only references a single
1307
/// MemoryLocation during execution of the containing function.
1308
bool isGuaranteedLoopInvariant(const Value *Ptr) {
1309
Ptr = Ptr->stripPointerCasts();
1310
if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1311
if (GEP->hasAllConstantIndices())
1312
Ptr = GEP->getPointerOperand()->stripPointerCasts();
1313
1314
if (auto *I = dyn_cast<Instruction>(Ptr)) {
1315
return I->getParent()->isEntryBlock() ||
1316
(!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1317
}
1318
return true;
1319
}
1320
1321
// Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1322
// with no read access between them or on any other path to a function exit
1323
// block if \p KillingLoc is not accessible after the function returns. If
1324
// there is no such MemoryDef, return std::nullopt. The returned value may not
1325
// (completely) overwrite \p KillingLoc. Currently we bail out when we
1326
// encounter an aliasing MemoryUse (read).
1327
std::optional<MemoryAccess *>
1328
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1329
const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1330
unsigned &ScanLimit, unsigned &WalkerStepLimit,
1331
bool IsMemTerm, unsigned &PartialLimit) {
1332
if (ScanLimit == 0 || WalkerStepLimit == 0) {
1333
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1334
return std::nullopt;
1335
}
1336
1337
MemoryAccess *Current = StartAccess;
1338
Instruction *KillingI = KillingDef->getMemoryInst();
1339
LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1340
1341
// Only optimize defining access of KillingDef when directly starting at its
1342
// defining access. The defining access also must only access KillingLoc. At
1343
// the moment we only support instructions with a single write location, so
1344
// it should be sufficient to disable optimizations for instructions that
1345
// also read from memory.
1346
bool CanOptimize = OptimizeMemorySSA &&
1347
KillingDef->getDefiningAccess() == StartAccess &&
1348
!KillingI->mayReadFromMemory();
1349
1350
// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1351
std::optional<MemoryLocation> CurrentLoc;
1352
for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1353
LLVM_DEBUG({
1354
dbgs() << " visiting " << *Current;
1355
if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1356
dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1357
<< ")";
1358
dbgs() << "\n";
1359
});
1360
1361
// Reached TOP.
1362
if (MSSA.isLiveOnEntryDef(Current)) {
1363
LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1364
if (CanOptimize && Current != KillingDef->getDefiningAccess())
1365
// The first clobbering def is... none.
1366
KillingDef->setOptimized(Current);
1367
return std::nullopt;
1368
}
1369
1370
// Cost of a step. Accesses in the same block are more likely to be valid
1371
// candidates for elimination, hence consider them cheaper.
1372
unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1373
? MemorySSASameBBStepCost
1374
: MemorySSAOtherBBStepCost;
1375
if (WalkerStepLimit <= StepCost) {
1376
LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1377
return std::nullopt;
1378
}
1379
WalkerStepLimit -= StepCost;
1380
1381
// Return for MemoryPhis. They cannot be eliminated directly and the
1382
// caller is responsible for traversing them.
1383
if (isa<MemoryPhi>(Current)) {
1384
LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1385
return Current;
1386
}
1387
1388
// Below, check if CurrentDef is a valid candidate to be eliminated by
1389
// KillingDef. If it is not, check the next candidate.
1390
MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1391
Instruction *CurrentI = CurrentDef->getMemoryInst();
1392
1393
if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1394
CanOptimize = false;
1395
continue;
1396
}
1397
1398
// Before we try to remove anything, check for any extra throwing
1399
// instructions that block us from DSEing
1400
if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1401
LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1402
return std::nullopt;
1403
}
1404
1405
// Check for anything that looks like it will be a barrier to further
1406
// removal
1407
if (isDSEBarrier(KillingUndObj, CurrentI)) {
1408
LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1409
return std::nullopt;
1410
}
1411
1412
// If Current is known to be on path that reads DefLoc or is a read
1413
// clobber, bail out, as the path is not profitable. We skip this check
1414
// for intrinsic calls, because the code knows how to handle memcpy
1415
// intrinsics.
1416
if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1417
return std::nullopt;
1418
1419
// Quick check if there are direct uses that are read-clobbers.
1420
if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1421
if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1422
return !MSSA.dominates(StartAccess, UseOrDef) &&
1423
isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1424
return false;
1425
})) {
1426
LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1427
return std::nullopt;
1428
}
1429
1430
// If Current does not have an analyzable write location or is not
1431
// removable, skip it.
1432
CurrentLoc = getLocForWrite(CurrentI);
1433
if (!CurrentLoc || !isRemovable(CurrentI)) {
1434
CanOptimize = false;
1435
continue;
1436
}
1437
1438
// AliasAnalysis does not account for loops. Limit elimination to
1439
// candidates for which we can guarantee they always store to the same
1440
// memory location and not located in different loops.
1441
if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1442
LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1443
CanOptimize = false;
1444
continue;
1445
}
1446
1447
if (IsMemTerm) {
1448
// If the killing def is a memory terminator (e.g. lifetime.end), check
1449
// the next candidate if the current Current does not write the same
1450
// underlying object as the terminator.
1451
if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1452
CanOptimize = false;
1453
continue;
1454
}
1455
} else {
1456
int64_t KillingOffset = 0;
1457
int64_t DeadOffset = 0;
1458
auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1459
KillingOffset, DeadOffset);
1460
if (CanOptimize) {
1461
// CurrentDef is the earliest write clobber of KillingDef. Use it as
1462
// optimized access. Do not optimize if CurrentDef is already the
1463
// defining access of KillingDef.
1464
if (CurrentDef != KillingDef->getDefiningAccess() &&
1465
(OR == OW_Complete || OR == OW_MaybePartial))
1466
KillingDef->setOptimized(CurrentDef);
1467
1468
// Once a may-aliasing def is encountered do not set an optimized
1469
// access.
1470
if (OR != OW_None)
1471
CanOptimize = false;
1472
}
1473
1474
// If Current does not write to the same object as KillingDef, check
1475
// the next candidate.
1476
if (OR == OW_Unknown || OR == OW_None)
1477
continue;
1478
else if (OR == OW_MaybePartial) {
1479
// If KillingDef only partially overwrites Current, check the next
1480
// candidate if the partial step limit is exceeded. This aggressively
1481
// limits the number of candidates for partial store elimination,
1482
// which are less likely to be removable in the end.
1483
if (PartialLimit <= 1) {
1484
WalkerStepLimit -= 1;
1485
LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1486
continue;
1487
}
1488
PartialLimit -= 1;
1489
}
1490
}
1491
break;
1492
};
1493
1494
// Accesses to objects accessible after the function returns can only be
1495
// eliminated if the access is dead along all paths to the exit. Collect
1496
// the blocks with killing (=completely overwriting MemoryDefs) and check if
1497
// they cover all paths from MaybeDeadAccess to any function exit.
1498
SmallPtrSet<Instruction *, 16> KillingDefs;
1499
KillingDefs.insert(KillingDef->getMemoryInst());
1500
MemoryAccess *MaybeDeadAccess = Current;
1501
MemoryLocation MaybeDeadLoc = *CurrentLoc;
1502
Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1503
LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1504
<< *MaybeDeadI << ")\n");
1505
1506
SmallVector<MemoryAccess *, 32> WorkList;
1507
SmallPtrSet<MemoryAccess *, 32> Visited;
1508
pushMemUses(MaybeDeadAccess, WorkList, Visited);
1509
1510
// Check if DeadDef may be read.
1511
for (unsigned I = 0; I < WorkList.size(); I++) {
1512
MemoryAccess *UseAccess = WorkList[I];
1513
1514
LLVM_DEBUG(dbgs() << " " << *UseAccess);
1515
// Bail out if the number of accesses to check exceeds the scan limit.
1516
if (ScanLimit < (WorkList.size() - I)) {
1517
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1518
return std::nullopt;
1519
}
1520
--ScanLimit;
1521
NumDomMemDefChecks++;
1522
1523
if (isa<MemoryPhi>(UseAccess)) {
1524
if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1525
return DT.properlyDominates(KI->getParent(),
1526
UseAccess->getBlock());
1527
})) {
1528
LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1529
continue;
1530
}
1531
LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1532
pushMemUses(UseAccess, WorkList, Visited);
1533
continue;
1534
}
1535
1536
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1537
LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1538
1539
if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1540
return DT.dominates(KI, UseInst);
1541
})) {
1542
LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1543
continue;
1544
}
1545
1546
// A memory terminator kills all preceeding MemoryDefs and all succeeding
1547
// MemoryAccesses. We do not have to check it's users.
1548
if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1549
LLVM_DEBUG(
1550
dbgs()
1551
<< " ... skipping, memterminator invalidates following accesses\n");
1552
continue;
1553
}
1554
1555
if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1556
LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1557
pushMemUses(UseAccess, WorkList, Visited);
1558
continue;
1559
}
1560
1561
if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1562
LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1563
return std::nullopt;
1564
}
1565
1566
// Uses which may read the original MemoryDef mean we cannot eliminate the
1567
// original MD. Stop walk.
1568
if (isReadClobber(MaybeDeadLoc, UseInst)) {
1569
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1570
return std::nullopt;
1571
}
1572
1573
// If this worklist walks back to the original memory access (and the
1574
// pointer is not guarenteed loop invariant) then we cannot assume that a
1575
// store kills itself.
1576
if (MaybeDeadAccess == UseAccess &&
1577
!isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1578
LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1579
return std::nullopt;
1580
}
1581
// Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1582
// if it reads the memory location.
1583
// TODO: It would probably be better to check for self-reads before
1584
// calling the function.
1585
if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1586
LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1587
continue;
1588
}
1589
1590
// Check all uses for MemoryDefs, except for defs completely overwriting
1591
// the original location. Otherwise we have to check uses of *all*
1592
// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1593
// miss cases like the following
1594
// 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1595
// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1596
// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1597
// (The Use points to the *first* Def it may alias)
1598
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1599
// stores [0,1]
1600
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1601
if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1602
BasicBlock *MaybeKillingBlock = UseInst->getParent();
1603
if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1604
PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1605
if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1606
LLVM_DEBUG(dbgs()
1607
<< " ... found killing def " << *UseInst << "\n");
1608
KillingDefs.insert(UseInst);
1609
}
1610
} else {
1611
LLVM_DEBUG(dbgs()
1612
<< " ... found preceeding def " << *UseInst << "\n");
1613
return std::nullopt;
1614
}
1615
} else
1616
pushMemUses(UseDef, WorkList, Visited);
1617
}
1618
}
1619
1620
// For accesses to locations visible after the function returns, make sure
1621
// that the location is dead (=overwritten) along all paths from
1622
// MaybeDeadAccess to the exit.
1623
if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1624
SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1625
for (Instruction *KD : KillingDefs)
1626
KillingBlocks.insert(KD->getParent());
1627
assert(!KillingBlocks.empty() &&
1628
"Expected at least a single killing block");
1629
1630
// Find the common post-dominator of all killing blocks.
1631
BasicBlock *CommonPred = *KillingBlocks.begin();
1632
for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1633
if (!CommonPred)
1634
break;
1635
CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1636
}
1637
1638
// If the common post-dominator does not post-dominate MaybeDeadAccess,
1639
// there is a path from MaybeDeadAccess to an exit not going through a
1640
// killing block.
1641
if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1642
if (!AnyUnreachableExit)
1643
return std::nullopt;
1644
1645
// Fall back to CFG scan starting at all non-unreachable roots if not
1646
// all paths to the exit go through CommonPred.
1647
CommonPred = nullptr;
1648
}
1649
1650
// If CommonPred itself is in the set of killing blocks, we're done.
1651
if (KillingBlocks.count(CommonPred))
1652
return {MaybeDeadAccess};
1653
1654
SetVector<BasicBlock *> WorkList;
1655
// If CommonPred is null, there are multiple exits from the function.
1656
// They all have to be added to the worklist.
1657
if (CommonPred)
1658
WorkList.insert(CommonPred);
1659
else
1660
for (BasicBlock *R : PDT.roots()) {
1661
if (!isa<UnreachableInst>(R->getTerminator()))
1662
WorkList.insert(R);
1663
}
1664
1665
NumCFGTries++;
1666
// Check if all paths starting from an exit node go through one of the
1667
// killing blocks before reaching MaybeDeadAccess.
1668
for (unsigned I = 0; I < WorkList.size(); I++) {
1669
NumCFGChecks++;
1670
BasicBlock *Current = WorkList[I];
1671
if (KillingBlocks.count(Current))
1672
continue;
1673
if (Current == MaybeDeadAccess->getBlock())
1674
return std::nullopt;
1675
1676
// MaybeDeadAccess is reachable from the entry, so we don't have to
1677
// explore unreachable blocks further.
1678
if (!DT.isReachableFromEntry(Current))
1679
continue;
1680
1681
for (BasicBlock *Pred : predecessors(Current))
1682
WorkList.insert(Pred);
1683
1684
if (WorkList.size() >= MemorySSAPathCheckLimit)
1685
return std::nullopt;
1686
}
1687
NumCFGSuccess++;
1688
}
1689
1690
// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1691
// potentially dead.
1692
return {MaybeDeadAccess};
1693
}
1694
1695
/// Delete dead memory defs and recursively add their operands to ToRemove if
1696
/// they became dead.
1697
void
1698
deleteDeadInstruction(Instruction *SI,
1699
SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr) {
1700
MemorySSAUpdater Updater(&MSSA);
1701
SmallVector<Instruction *, 32> NowDeadInsts;
1702
NowDeadInsts.push_back(SI);
1703
--NumFastOther;
1704
1705
while (!NowDeadInsts.empty()) {
1706
Instruction *DeadInst = NowDeadInsts.pop_back_val();
1707
++NumFastOther;
1708
1709
// Try to preserve debug information attached to the dead instruction.
1710
salvageDebugInfo(*DeadInst);
1711
salvageKnowledge(DeadInst);
1712
1713
// Remove the Instruction from MSSA.
1714
MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
1715
bool IsMemDef = MA && isa<MemoryDef>(MA);
1716
if (MA) {
1717
if (IsMemDef) {
1718
auto *MD = cast<MemoryDef>(MA);
1719
SkipStores.insert(MD);
1720
if (Deleted)
1721
Deleted->insert(MD);
1722
if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1723
if (SI->getValueOperand()->getType()->isPointerTy()) {
1724
const Value *UO = getUnderlyingObject(SI->getValueOperand());
1725
if (CapturedBeforeReturn.erase(UO))
1726
ShouldIterateEndOfFunctionDSE = true;
1727
InvisibleToCallerAfterRet.erase(UO);
1728
}
1729
}
1730
}
1731
1732
Updater.removeMemoryAccess(MA);
1733
}
1734
1735
auto I = IOLs.find(DeadInst->getParent());
1736
if (I != IOLs.end())
1737
I->second.erase(DeadInst);
1738
// Remove its operands
1739
for (Use &O : DeadInst->operands())
1740
if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1741
O.set(PoisonValue::get(O->getType()));
1742
if (isInstructionTriviallyDead(OpI, &TLI))
1743
NowDeadInsts.push_back(OpI);
1744
}
1745
1746
EI.removeInstruction(DeadInst);
1747
// Remove memory defs directly if they don't produce results, but only
1748
// queue other dead instructions for later removal. They may have been
1749
// used as memory locations that have been cached by BatchAA. Removing
1750
// them here may lead to newly created instructions to be allocated at the
1751
// same address, yielding stale cache entries.
1752
if (IsMemDef && DeadInst->getType()->isVoidTy())
1753
DeadInst->eraseFromParent();
1754
else
1755
ToRemove.push_back(DeadInst);
1756
}
1757
}
1758
1759
// Check for any extra throws between \p KillingI and \p DeadI that block
1760
// DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1761
// MemoryDef that may throw are handled during the walk from one def to the
1762
// next.
1763
bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1764
const Value *KillingUndObj) {
1765
// First see if we can ignore it by using the fact that KillingI is an
1766
// alloca/alloca like object that is not visible to the caller during
1767
// execution of the function.
1768
if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1769
return false;
1770
1771
if (KillingI->getParent() == DeadI->getParent())
1772
return ThrowingBlocks.count(KillingI->getParent());
1773
return !ThrowingBlocks.empty();
1774
}
1775
1776
// Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1777
// instructions act as barriers:
1778
// * A memory instruction that may throw and \p KillingI accesses a non-stack
1779
// object.
1780
// * Atomic stores stronger that monotonic.
1781
bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1782
// If DeadI may throw it acts as a barrier, unless we are to an
1783
// alloca/alloca like object that does not escape.
1784
if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1785
return true;
1786
1787
// If DeadI is an atomic load/store stronger than monotonic, do not try to
1788
// eliminate/reorder it.
1789
if (DeadI->isAtomic()) {
1790
if (auto *LI = dyn_cast<LoadInst>(DeadI))
1791
return isStrongerThanMonotonic(LI->getOrdering());
1792
if (auto *SI = dyn_cast<StoreInst>(DeadI))
1793
return isStrongerThanMonotonic(SI->getOrdering());
1794
if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1795
return isStrongerThanMonotonic(ARMW->getOrdering());
1796
if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1797
return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1798
isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1799
llvm_unreachable("other instructions should be skipped in MemorySSA");
1800
}
1801
return false;
1802
}
1803
1804
/// Eliminate writes to objects that are not visible in the caller and are not
1805
/// accessed before returning from the function.
1806
bool eliminateDeadWritesAtEndOfFunction() {
1807
bool MadeChange = false;
1808
LLVM_DEBUG(
1809
dbgs()
1810
<< "Trying to eliminate MemoryDefs at the end of the function\n");
1811
do {
1812
ShouldIterateEndOfFunctionDSE = false;
1813
for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1814
if (SkipStores.contains(Def))
1815
continue;
1816
1817
Instruction *DefI = Def->getMemoryInst();
1818
auto DefLoc = getLocForWrite(DefI);
1819
if (!DefLoc || !isRemovable(DefI)) {
1820
LLVM_DEBUG(dbgs() << " ... could not get location for write or "
1821
"instruction not removable.\n");
1822
continue;
1823
}
1824
1825
// NOTE: Currently eliminating writes at the end of a function is
1826
// limited to MemoryDefs with a single underlying object, to save
1827
// compile-time. In practice it appears the case with multiple
1828
// underlying objects is very uncommon. If it turns out to be important,
1829
// we can use getUnderlyingObjects here instead.
1830
const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1831
if (!isInvisibleToCallerAfterRet(UO))
1832
continue;
1833
1834
if (isWriteAtEndOfFunction(Def, *DefLoc)) {
1835
// See through pointer-to-pointer bitcasts
1836
LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1837
"of the function\n");
1838
deleteDeadInstruction(DefI);
1839
++NumFastStores;
1840
MadeChange = true;
1841
}
1842
}
1843
} while (ShouldIterateEndOfFunctionDSE);
1844
return MadeChange;
1845
}
1846
1847
/// If we have a zero initializing memset following a call to malloc,
1848
/// try folding it into a call to calloc.
1849
bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1850
Instruction *DefI = Def->getMemoryInst();
1851
MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1852
if (!MemSet)
1853
// TODO: Could handle zero store to small allocation as well.
1854
return false;
1855
Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1856
if (!StoredConstant || !StoredConstant->isNullValue())
1857
return false;
1858
1859
if (!isRemovable(DefI))
1860
// The memset might be volatile..
1861
return false;
1862
1863
if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1864
F.hasFnAttribute(Attribute::SanitizeAddress) ||
1865
F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1866
F.getName() == "calloc")
1867
return false;
1868
auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1869
if (!Malloc)
1870
return false;
1871
auto *InnerCallee = Malloc->getCalledFunction();
1872
if (!InnerCallee)
1873
return false;
1874
LibFunc Func;
1875
if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1876
Func != LibFunc_malloc)
1877
return false;
1878
// Gracefully handle malloc with unexpected memory attributes.
1879
auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
1880
if (!MallocDef)
1881
return false;
1882
1883
auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1884
// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1885
// of malloc block
1886
auto *MallocBB = Malloc->getParent(),
1887
*MemsetBB = Memset->getParent();
1888
if (MallocBB == MemsetBB)
1889
return true;
1890
auto *Ptr = Memset->getArgOperand(0);
1891
auto *TI = MallocBB->getTerminator();
1892
ICmpInst::Predicate Pred;
1893
BasicBlock *TrueBB, *FalseBB;
1894
if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
1895
FalseBB)))
1896
return false;
1897
if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
1898
return false;
1899
return true;
1900
};
1901
1902
if (Malloc->getOperand(0) != MemSet->getLength())
1903
return false;
1904
if (!shouldCreateCalloc(Malloc, MemSet) ||
1905
!DT.dominates(Malloc, MemSet) ||
1906
!memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
1907
return false;
1908
IRBuilder<> IRB(Malloc);
1909
Type *SizeTTy = Malloc->getArgOperand(0)->getType();
1910
auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),
1911
Malloc->getArgOperand(0), IRB, TLI);
1912
if (!Calloc)
1913
return false;
1914
1915
MemorySSAUpdater Updater(&MSSA);
1916
auto *NewAccess =
1917
Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,
1918
MallocDef);
1919
auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1920
Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1921
Malloc->replaceAllUsesWith(Calloc);
1922
deleteDeadInstruction(Malloc);
1923
return true;
1924
}
1925
1926
// Check if there is a dominating condition, that implies that the value
1927
// being stored in a ptr is already present in the ptr.
1928
bool dominatingConditionImpliesValue(MemoryDef *Def) {
1929
auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
1930
BasicBlock *StoreBB = StoreI->getParent();
1931
Value *StorePtr = StoreI->getPointerOperand();
1932
Value *StoreVal = StoreI->getValueOperand();
1933
1934
DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
1935
if (!IDom)
1936
return false;
1937
1938
auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
1939
if (!BI || !BI->isConditional())
1940
return false;
1941
1942
// In case both blocks are the same, it is not possible to determine
1943
// if optimization is possible. (We would not want to optimize a store
1944
// in the FalseBB if condition is true and vice versa.)
1945
if (BI->getSuccessor(0) == BI->getSuccessor(1))
1946
return false;
1947
1948
Instruction *ICmpL;
1949
ICmpInst::Predicate Pred;
1950
if (!match(BI->getCondition(),
1951
m_c_ICmp(Pred,
1952
m_CombineAnd(m_Load(m_Specific(StorePtr)),
1953
m_Instruction(ICmpL)),
1954
m_Specific(StoreVal))) ||
1955
!ICmpInst::isEquality(Pred))
1956
return false;
1957
1958
// In case the else blocks also branches to the if block or the other way
1959
// around it is not possible to determine if the optimization is possible.
1960
if (Pred == ICmpInst::ICMP_EQ &&
1961
!DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
1962
StoreBB))
1963
return false;
1964
1965
if (Pred == ICmpInst::ICMP_NE &&
1966
!DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
1967
StoreBB))
1968
return false;
1969
1970
MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
1971
MemoryAccess *ClobAcc =
1972
MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
1973
1974
return MSSA.dominates(ClobAcc, LoadAcc);
1975
}
1976
1977
/// \returns true if \p Def is a no-op store, either because it
1978
/// directly stores back a loaded value or stores zero to a calloced object.
1979
bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1980
Instruction *DefI = Def->getMemoryInst();
1981
StoreInst *Store = dyn_cast<StoreInst>(DefI);
1982
MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1983
Constant *StoredConstant = nullptr;
1984
if (Store)
1985
StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1986
else if (MemSet)
1987
StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1988
else
1989
return false;
1990
1991
if (!isRemovable(DefI))
1992
return false;
1993
1994
if (StoredConstant) {
1995
Constant *InitC =
1996
getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
1997
// If the clobbering access is LiveOnEntry, no instructions between them
1998
// can modify the memory location.
1999
if (InitC && InitC == StoredConstant)
2000
return MSSA.isLiveOnEntryDef(
2001
MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2002
}
2003
2004
if (!Store)
2005
return false;
2006
2007
if (dominatingConditionImpliesValue(Def))
2008
return true;
2009
2010
if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2011
if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2012
// Get the defining access for the load.
2013
auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2014
// Fast path: the defining accesses are the same.
2015
if (LoadAccess == Def->getDefiningAccess())
2016
return true;
2017
2018
// Look through phi accesses. Recursively scan all phi accesses by
2019
// adding them to a worklist. Bail when we run into a memory def that
2020
// does not match LoadAccess.
2021
SetVector<MemoryAccess *> ToCheck;
2022
MemoryAccess *Current =
2023
MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2024
// We don't want to bail when we run into the store memory def. But,
2025
// the phi access may point to it. So, pretend like we've already
2026
// checked it.
2027
ToCheck.insert(Def);
2028
ToCheck.insert(Current);
2029
// Start at current (1) to simulate already having checked Def.
2030
for (unsigned I = 1; I < ToCheck.size(); ++I) {
2031
Current = ToCheck[I];
2032
if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2033
// Check all the operands.
2034
for (auto &Use : PhiAccess->incoming_values())
2035
ToCheck.insert(cast<MemoryAccess>(&Use));
2036
continue;
2037
}
2038
2039
// If we found a memory def, bail. This happens when we have an
2040
// unrelated write in between an otherwise noop store.
2041
assert(isa<MemoryDef>(Current) &&
2042
"Only MemoryDefs should reach here.");
2043
// TODO: Skip no alias MemoryDefs that have no aliasing reads.
2044
// We are searching for the definition of the store's destination.
2045
// So, if that is the same definition as the load, then this is a
2046
// noop. Otherwise, fail.
2047
if (LoadAccess != Current)
2048
return false;
2049
}
2050
return true;
2051
}
2052
}
2053
2054
return false;
2055
}
2056
2057
bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2058
bool Changed = false;
2059
for (auto OI : IOL) {
2060
Instruction *DeadI = OI.first;
2061
MemoryLocation Loc = *getLocForWrite(DeadI);
2062
assert(isRemovable(DeadI) && "Expect only removable instruction");
2063
2064
const Value *Ptr = Loc.Ptr->stripPointerCasts();
2065
int64_t DeadStart = 0;
2066
uint64_t DeadSize = Loc.Size.getValue();
2067
GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2068
OverlapIntervalsTy &IntervalMap = OI.second;
2069
Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2070
if (IntervalMap.empty())
2071
continue;
2072
Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2073
}
2074
return Changed;
2075
}
2076
2077
/// Eliminates writes to locations where the value that is being written
2078
/// is already stored at the same location.
2079
bool eliminateRedundantStoresOfExistingValues() {
2080
bool MadeChange = false;
2081
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2082
"already existing value\n");
2083
for (auto *Def : MemDefs) {
2084
if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2085
continue;
2086
2087
Instruction *DefInst = Def->getMemoryInst();
2088
auto MaybeDefLoc = getLocForWrite(DefInst);
2089
if (!MaybeDefLoc || !isRemovable(DefInst))
2090
continue;
2091
2092
MemoryDef *UpperDef;
2093
// To conserve compile-time, we avoid walking to the next clobbering def.
2094
// Instead, we just try to get the optimized access, if it exists. DSE
2095
// will try to optimize defs during the earlier traversal.
2096
if (Def->isOptimized())
2097
UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2098
else
2099
UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2100
if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2101
continue;
2102
2103
Instruction *UpperInst = UpperDef->getMemoryInst();
2104
auto IsRedundantStore = [&]() {
2105
if (DefInst->isIdenticalTo(UpperInst))
2106
return true;
2107
if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2108
if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2109
// MemSetInst must have a write location.
2110
auto UpperLoc = getLocForWrite(UpperInst);
2111
if (!UpperLoc)
2112
return false;
2113
int64_t InstWriteOffset = 0;
2114
int64_t DepWriteOffset = 0;
2115
auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2116
InstWriteOffset, DepWriteOffset);
2117
Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2118
return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2119
OR == OW_Complete;
2120
}
2121
}
2122
return false;
2123
};
2124
2125
if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2126
continue;
2127
LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2128
<< '\n');
2129
deleteDeadInstruction(DefInst);
2130
NumRedundantStores++;
2131
MadeChange = true;
2132
}
2133
return MadeChange;
2134
}
2135
};
2136
2137
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2138
DominatorTree &DT, PostDominatorTree &PDT,
2139
const TargetLibraryInfo &TLI,
2140
const LoopInfo &LI) {
2141
bool MadeChange = false;
2142
2143
DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2144
// For each store:
2145
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2146
MemoryDef *KillingDef = State.MemDefs[I];
2147
if (State.SkipStores.count(KillingDef))
2148
continue;
2149
Instruction *KillingI = KillingDef->getMemoryInst();
2150
2151
std::optional<MemoryLocation> MaybeKillingLoc;
2152
if (State.isMemTerminatorInst(KillingI)) {
2153
if (auto KillingLoc = State.getLocForTerminator(KillingI))
2154
MaybeKillingLoc = KillingLoc->first;
2155
} else {
2156
MaybeKillingLoc = State.getLocForWrite(KillingI);
2157
}
2158
2159
if (!MaybeKillingLoc) {
2160
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2161
<< *KillingI << "\n");
2162
continue;
2163
}
2164
MemoryLocation KillingLoc = *MaybeKillingLoc;
2165
assert(KillingLoc.Ptr && "KillingLoc should not be null");
2166
const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
2167
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2168
<< *KillingDef << " (" << *KillingI << ")\n");
2169
2170
unsigned ScanLimit = MemorySSAScanLimit;
2171
unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2172
unsigned PartialLimit = MemorySSAPartialStoreLimit;
2173
// Worklist of MemoryAccesses that may be killed by KillingDef.
2174
SmallSetVector<MemoryAccess *, 8> ToCheck;
2175
// Track MemoryAccesses that have been deleted in the loop below, so we can
2176
// skip them. Don't use SkipStores for this, which may contain reused
2177
// MemoryAccess addresses.
2178
SmallPtrSet<MemoryAccess *, 8> Deleted;
2179
[[maybe_unused]] unsigned OrigNumSkipStores = State.SkipStores.size();
2180
ToCheck.insert(KillingDef->getDefiningAccess());
2181
2182
bool Shortend = false;
2183
bool IsMemTerm = State.isMemTerminatorInst(KillingI);
2184
// Check if MemoryAccesses in the worklist are killed by KillingDef.
2185
for (unsigned I = 0; I < ToCheck.size(); I++) {
2186
MemoryAccess *Current = ToCheck[I];
2187
if (Deleted.contains(Current))
2188
continue;
2189
2190
std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2191
KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
2192
WalkerStepLimit, IsMemTerm, PartialLimit);
2193
2194
if (!MaybeDeadAccess) {
2195
LLVM_DEBUG(dbgs() << " finished walk\n");
2196
continue;
2197
}
2198
2199
MemoryAccess *DeadAccess = *MaybeDeadAccess;
2200
LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2201
if (isa<MemoryPhi>(DeadAccess)) {
2202
LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2203
for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2204
MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2205
BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2206
BasicBlock *PhiBlock = DeadAccess->getBlock();
2207
2208
// We only consider incoming MemoryAccesses that come before the
2209
// MemoryPhi. Otherwise we could discover candidates that do not
2210
// strictly dominate our starting def.
2211
if (State.PostOrderNumbers[IncomingBlock] >
2212
State.PostOrderNumbers[PhiBlock])
2213
ToCheck.insert(IncomingAccess);
2214
}
2215
continue;
2216
}
2217
auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
2218
Instruction *DeadI = DeadDefAccess->getMemoryInst();
2219
LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2220
ToCheck.insert(DeadDefAccess->getDefiningAccess());
2221
NumGetDomMemoryDefPassed++;
2222
2223
if (!DebugCounter::shouldExecute(MemorySSACounter))
2224
continue;
2225
2226
MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
2227
2228
if (IsMemTerm) {
2229
const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2230
if (KillingUndObj != DeadUndObj)
2231
continue;
2232
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2233
<< "\n KILLER: " << *KillingI << '\n');
2234
State.deleteDeadInstruction(DeadI, &Deleted);
2235
++NumFastStores;
2236
MadeChange = true;
2237
} else {
2238
// Check if DeadI overwrites KillingI.
2239
int64_t KillingOffset = 0;
2240
int64_t DeadOffset = 0;
2241
OverwriteResult OR = State.isOverwrite(
2242
KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2243
if (OR == OW_MaybePartial) {
2244
auto Iter = State.IOLs.insert(
2245
std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2246
DeadI->getParent(), InstOverlapIntervalsTy()));
2247
auto &IOL = Iter.first->second;
2248
OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2249
DeadOffset, DeadI, IOL);
2250
}
2251
2252
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2253
auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2254
auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2255
// We are re-using tryToMergePartialOverlappingStores, which requires
2256
// DeadSI to dominate KillingSI.
2257
// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2258
if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2259
if (Constant *Merged = tryToMergePartialOverlappingStores(
2260
KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2261
State.BatchAA, &DT)) {
2262
2263
// Update stored value of earlier store to merged constant.
2264
DeadSI->setOperand(0, Merged);
2265
++NumModifiedStores;
2266
MadeChange = true;
2267
2268
Shortend = true;
2269
// Remove killing store and remove any outstanding overlap
2270
// intervals for the updated store.
2271
State.deleteDeadInstruction(KillingSI, &Deleted);
2272
auto I = State.IOLs.find(DeadSI->getParent());
2273
if (I != State.IOLs.end())
2274
I->second.erase(DeadSI);
2275
break;
2276
}
2277
}
2278
}
2279
2280
if (OR == OW_Complete) {
2281
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2282
<< "\n KILLER: " << *KillingI << '\n');
2283
State.deleteDeadInstruction(DeadI, &Deleted);
2284
++NumFastStores;
2285
MadeChange = true;
2286
}
2287
}
2288
}
2289
2290
assert(State.SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2291
"SkipStores and Deleted out of sync?");
2292
2293
// Check if the store is a no-op.
2294
if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
2295
LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2296
<< '\n');
2297
State.deleteDeadInstruction(KillingI);
2298
NumRedundantStores++;
2299
MadeChange = true;
2300
continue;
2301
}
2302
2303
// Can we form a calloc from a memset/malloc pair?
2304
if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
2305
LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2306
<< " DEAD: " << *KillingI << '\n');
2307
State.deleteDeadInstruction(KillingI);
2308
MadeChange = true;
2309
continue;
2310
}
2311
}
2312
2313
if (EnablePartialOverwriteTracking)
2314
for (auto &KV : State.IOLs)
2315
MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2316
2317
MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2318
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2319
2320
while (!State.ToRemove.empty()) {
2321
Instruction *DeadInst = State.ToRemove.pop_back_val();
2322
DeadInst->eraseFromParent();
2323
}
2324
2325
return MadeChange;
2326
}
2327
} // end anonymous namespace
2328
2329
//===----------------------------------------------------------------------===//
2330
// DSE Pass
2331
//===----------------------------------------------------------------------===//
2332
PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2333
AliasAnalysis &AA = AM.getResult<AAManager>(F);
2334
const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2335
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
2336
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2337
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
2338
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2339
2340
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2341
2342
#ifdef LLVM_ENABLE_STATS
2343
if (AreStatisticsEnabled())
2344
for (auto &I : instructions(F))
2345
NumRemainingStores += isa<StoreInst>(&I);
2346
#endif
2347
2348
if (!Changed)
2349
return PreservedAnalyses::all();
2350
2351
PreservedAnalyses PA;
2352
PA.preserveSet<CFGAnalyses>();
2353
PA.preserve<MemorySSAAnalysis>();
2354
PA.preserve<LoopAnalysis>();
2355
return PA;
2356
}
2357
2358