Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
35294 views
1
//===- GVNSink.cpp - sink expressions into successors ---------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file GVNSink.cpp
10
/// This pass attempts to sink instructions into successors, reducing static
11
/// instruction count and enabling if-conversion.
12
///
13
/// We use a variant of global value numbering to decide what can be sunk.
14
/// Consider:
15
///
16
/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17
/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
18
/// \ /
19
/// [ %e = phi i32 %a2, %c2 ]
20
/// [ add i32 %e, 4 ]
21
///
22
///
23
/// GVN would number %a1 and %c1 differently because they compute different
24
/// results - the VN of an instruction is a function of its opcode and the
25
/// transitive closure of its operands. This is the key property for hoisting
26
/// and CSE.
27
///
28
/// What we want when sinking however is for a numbering that is a function of
29
/// the *uses* of an instruction, which allows us to answer the question "if I
30
/// replace %a1 with %c1, will it contribute in an equivalent way to all
31
/// successive instructions?". The PostValueTable class in GVN provides this
32
/// mapping.
33
//
34
//===----------------------------------------------------------------------===//
35
36
#include "llvm/ADT/ArrayRef.h"
37
#include "llvm/ADT/DenseMap.h"
38
#include "llvm/ADT/DenseSet.h"
39
#include "llvm/ADT/Hashing.h"
40
#include "llvm/ADT/PostOrderIterator.h"
41
#include "llvm/ADT/STLExtras.h"
42
#include "llvm/ADT/SmallPtrSet.h"
43
#include "llvm/ADT/SmallVector.h"
44
#include "llvm/ADT/Statistic.h"
45
#include "llvm/Analysis/GlobalsModRef.h"
46
#include "llvm/IR/BasicBlock.h"
47
#include "llvm/IR/CFG.h"
48
#include "llvm/IR/Constants.h"
49
#include "llvm/IR/Function.h"
50
#include "llvm/IR/InstrTypes.h"
51
#include "llvm/IR/Instruction.h"
52
#include "llvm/IR/Instructions.h"
53
#include "llvm/IR/PassManager.h"
54
#include "llvm/IR/Type.h"
55
#include "llvm/IR/Use.h"
56
#include "llvm/IR/Value.h"
57
#include "llvm/Support/Allocator.h"
58
#include "llvm/Support/ArrayRecycler.h"
59
#include "llvm/Support/AtomicOrdering.h"
60
#include "llvm/Support/Casting.h"
61
#include "llvm/Support/Compiler.h"
62
#include "llvm/Support/Debug.h"
63
#include "llvm/Support/raw_ostream.h"
64
#include "llvm/Transforms/Scalar/GVN.h"
65
#include "llvm/Transforms/Scalar/GVNExpression.h"
66
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
67
#include "llvm/Transforms/Utils/Local.h"
68
#include <algorithm>
69
#include <cassert>
70
#include <cstddef>
71
#include <cstdint>
72
#include <iterator>
73
#include <utility>
74
75
using namespace llvm;
76
77
#define DEBUG_TYPE "gvn-sink"
78
79
STATISTIC(NumRemoved, "Number of instructions removed");
80
81
namespace llvm {
82
namespace GVNExpression {
83
84
LLVM_DUMP_METHOD void Expression::dump() const {
85
print(dbgs());
86
dbgs() << "\n";
87
}
88
89
} // end namespace GVNExpression
90
} // end namespace llvm
91
92
namespace {
93
94
static bool isMemoryInst(const Instruction *I) {
95
return isa<LoadInst>(I) || isa<StoreInst>(I) ||
96
(isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
97
(isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
98
}
99
100
/// Iterates through instructions in a set of blocks in reverse order from the
101
/// first non-terminator. For example (assume all blocks have size n):
102
/// LockstepReverseIterator I([B1, B2, B3]);
103
/// *I-- = [B1[n], B2[n], B3[n]];
104
/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
105
/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
106
/// ...
107
///
108
/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
109
/// to
110
/// determine which blocks are still going and the order they appear in the
111
/// list returned by operator*.
112
class LockstepReverseIterator {
113
ArrayRef<BasicBlock *> Blocks;
114
SmallSetVector<BasicBlock *, 4> ActiveBlocks;
115
SmallVector<Instruction *, 4> Insts;
116
bool Fail;
117
118
public:
119
LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
120
reset();
121
}
122
123
void reset() {
124
Fail = false;
125
ActiveBlocks.clear();
126
for (BasicBlock *BB : Blocks)
127
ActiveBlocks.insert(BB);
128
Insts.clear();
129
for (BasicBlock *BB : Blocks) {
130
if (BB->size() <= 1) {
131
// Block wasn't big enough - only contained a terminator.
132
ActiveBlocks.remove(BB);
133
continue;
134
}
135
Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());
136
}
137
if (Insts.empty())
138
Fail = true;
139
}
140
141
bool isValid() const { return !Fail; }
142
ArrayRef<Instruction *> operator*() const { return Insts; }
143
144
// Note: This needs to return a SmallSetVector as the elements of
145
// ActiveBlocks will be later copied to Blocks using std::copy. The
146
// resultant order of elements in Blocks needs to be deterministic.
147
// Using SmallPtrSet instead causes non-deterministic order while
148
// copying. And we cannot simply sort Blocks as they need to match the
149
// corresponding Values.
150
SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
151
152
void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
153
for (auto II = Insts.begin(); II != Insts.end();) {
154
if (!Blocks.contains((*II)->getParent())) {
155
ActiveBlocks.remove((*II)->getParent());
156
II = Insts.erase(II);
157
} else {
158
++II;
159
}
160
}
161
}
162
163
void operator--() {
164
if (Fail)
165
return;
166
SmallVector<Instruction *, 4> NewInsts;
167
for (auto *Inst : Insts) {
168
if (Inst == &Inst->getParent()->front())
169
ActiveBlocks.remove(Inst->getParent());
170
else
171
NewInsts.push_back(Inst->getPrevNonDebugInstruction());
172
}
173
if (NewInsts.empty()) {
174
Fail = true;
175
return;
176
}
177
Insts = NewInsts;
178
}
179
};
180
181
//===----------------------------------------------------------------------===//
182
183
/// Candidate solution for sinking. There may be different ways to
184
/// sink instructions, differing in the number of instructions sunk,
185
/// the number of predecessors sunk from and the number of PHIs
186
/// required.
187
struct SinkingInstructionCandidate {
188
unsigned NumBlocks;
189
unsigned NumInstructions;
190
unsigned NumPHIs;
191
unsigned NumMemoryInsts;
192
int Cost = -1;
193
SmallVector<BasicBlock *, 4> Blocks;
194
195
void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
196
unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
197
unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
198
Cost = (NumInstructions * (NumBlocks - 1)) -
199
(NumExtraPHIs *
200
NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
201
- SplitEdgeCost;
202
}
203
204
bool operator>(const SinkingInstructionCandidate &Other) const {
205
return Cost > Other.Cost;
206
}
207
};
208
209
#ifndef NDEBUG
210
raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
211
OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
212
<< " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
213
return OS;
214
}
215
#endif
216
217
//===----------------------------------------------------------------------===//
218
219
/// Describes a PHI node that may or may not exist. These track the PHIs
220
/// that must be created if we sunk a sequence of instructions. It provides
221
/// a hash function for efficient equality comparisons.
222
class ModelledPHI {
223
SmallVector<Value *, 4> Values;
224
SmallVector<BasicBlock *, 4> Blocks;
225
226
public:
227
ModelledPHI() = default;
228
229
ModelledPHI(const PHINode *PN,
230
const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
231
// BasicBlock comes first so we sort by basic block pointer order,
232
// then by value pointer order. No need to call `verifyModelledPHI`
233
// As the Values and Blocks are populated in a deterministic order.
234
using OpsType = std::pair<BasicBlock *, Value *>;
235
SmallVector<OpsType, 4> Ops;
236
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
237
Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
238
239
auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) {
240
return BlockOrder.lookup(O1.first) < BlockOrder.lookup(O2.first);
241
};
242
// Sort in a deterministic order.
243
llvm::sort(Ops, ComesBefore);
244
245
for (auto &P : Ops) {
246
Blocks.push_back(P.first);
247
Values.push_back(P.second);
248
}
249
}
250
251
/// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
252
/// without the same ID.
253
/// \note This is specifically for DenseMapInfo - do not use this!
254
static ModelledPHI createDummy(size_t ID) {
255
ModelledPHI M;
256
M.Values.push_back(reinterpret_cast<Value*>(ID));
257
return M;
258
}
259
260
void
261
verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
262
assert(Values.size() > 1 && Blocks.size() > 1 &&
263
"Modelling PHI with less than 2 values");
264
auto ComesBefore = [BlockOrder](const BasicBlock *BB1,
265
const BasicBlock *BB2) {
266
return BlockOrder.lookup(BB1) < BlockOrder.lookup(BB2);
267
};
268
assert(llvm::is_sorted(Blocks, ComesBefore));
269
int C = 0;
270
for (const Value *V : Values) {
271
if (!isa<UndefValue>(V)) {
272
assert(cast<Instruction>(V)->getParent() == Blocks[C]);
273
(void)C;
274
}
275
C++;
276
}
277
}
278
/// Create a PHI from an array of incoming values and incoming blocks.
279
ModelledPHI(SmallVectorImpl<Instruction *> &V,
280
SmallSetVector<BasicBlock *, 4> &B,
281
const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
282
// The order of Values and Blocks are already ordered by the caller.
283
llvm::copy(V, std::back_inserter(Values));
284
llvm::copy(B, std::back_inserter(Blocks));
285
verifyModelledPHI(BlockOrder);
286
}
287
288
/// Create a PHI from [I[OpNum] for I in Insts].
289
/// TODO: Figure out a way to verifyModelledPHI in this constructor.
290
ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,
291
SmallSetVector<BasicBlock *, 4> &B) {
292
llvm::copy(B, std::back_inserter(Blocks));
293
for (auto *I : Insts)
294
Values.push_back(I->getOperand(OpNum));
295
}
296
297
/// Restrict the PHI's contents down to only \c NewBlocks.
298
/// \c NewBlocks must be a subset of \c this->Blocks.
299
void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
300
auto BI = Blocks.begin();
301
auto VI = Values.begin();
302
while (BI != Blocks.end()) {
303
assert(VI != Values.end());
304
if (!NewBlocks.contains(*BI)) {
305
BI = Blocks.erase(BI);
306
VI = Values.erase(VI);
307
} else {
308
++BI;
309
++VI;
310
}
311
}
312
assert(Blocks.size() == NewBlocks.size());
313
}
314
315
ArrayRef<Value *> getValues() const { return Values; }
316
317
bool areAllIncomingValuesSame() const {
318
return llvm::all_equal(Values);
319
}
320
321
bool areAllIncomingValuesSameType() const {
322
return llvm::all_of(
323
Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
324
}
325
326
bool areAnyIncomingValuesConstant() const {
327
return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
328
}
329
330
// Hash functor
331
unsigned hash() const {
332
// Is deterministic because Values are saved in a specific order.
333
return (unsigned)hash_combine_range(Values.begin(), Values.end());
334
}
335
336
bool operator==(const ModelledPHI &Other) const {
337
return Values == Other.Values && Blocks == Other.Blocks;
338
}
339
};
340
341
template <typename ModelledPHI> struct DenseMapInfo {
342
static inline ModelledPHI &getEmptyKey() {
343
static ModelledPHI Dummy = ModelledPHI::createDummy(0);
344
return Dummy;
345
}
346
347
static inline ModelledPHI &getTombstoneKey() {
348
static ModelledPHI Dummy = ModelledPHI::createDummy(1);
349
return Dummy;
350
}
351
352
static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
353
354
static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
355
return LHS == RHS;
356
}
357
};
358
359
using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
360
361
//===----------------------------------------------------------------------===//
362
// ValueTable
363
//===----------------------------------------------------------------------===//
364
// This is a value number table where the value number is a function of the
365
// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
366
// that the program would be equivalent if we replaced A with PHI(A, B).
367
//===----------------------------------------------------------------------===//
368
369
/// A GVN expression describing how an instruction is used. The operands
370
/// field of BasicExpression is used to store uses, not operands.
371
///
372
/// This class also contains fields for discriminators used when determining
373
/// equivalence of instructions with sideeffects.
374
class InstructionUseExpr : public GVNExpression::BasicExpression {
375
unsigned MemoryUseOrder = -1;
376
bool Volatile = false;
377
ArrayRef<int> ShuffleMask;
378
379
public:
380
InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
381
BumpPtrAllocator &A)
382
: GVNExpression::BasicExpression(I->getNumUses()) {
383
allocateOperands(R, A);
384
setOpcode(I->getOpcode());
385
setType(I->getType());
386
387
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
388
ShuffleMask = SVI->getShuffleMask().copy(A);
389
390
for (auto &U : I->uses())
391
op_push_back(U.getUser());
392
llvm::sort(op_begin(), op_end());
393
}
394
395
void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
396
void setVolatile(bool V) { Volatile = V; }
397
398
hash_code getHashValue() const override {
399
return hash_combine(GVNExpression::BasicExpression::getHashValue(),
400
MemoryUseOrder, Volatile, ShuffleMask);
401
}
402
403
template <typename Function> hash_code getHashValue(Function MapFn) {
404
hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
405
ShuffleMask);
406
for (auto *V : operands())
407
H = hash_combine(H, MapFn(V));
408
return H;
409
}
410
};
411
412
using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
413
414
class ValueTable {
415
DenseMap<Value *, uint32_t> ValueNumbering;
416
DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
417
DenseMap<size_t, uint32_t> HashNumbering;
418
BumpPtrAllocator Allocator;
419
ArrayRecycler<Value *> Recycler;
420
uint32_t nextValueNumber = 1;
421
BasicBlocksSet ReachableBBs;
422
423
/// Create an expression for I based on its opcode and its uses. If I
424
/// touches or reads memory, the expression is also based upon its memory
425
/// order - see \c getMemoryUseOrder().
426
InstructionUseExpr *createExpr(Instruction *I) {
427
InstructionUseExpr *E =
428
new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
429
if (isMemoryInst(I))
430
E->setMemoryUseOrder(getMemoryUseOrder(I));
431
432
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
433
CmpInst::Predicate Predicate = C->getPredicate();
434
E->setOpcode((C->getOpcode() << 8) | Predicate);
435
}
436
return E;
437
}
438
439
/// Helper to compute the value number for a memory instruction
440
/// (LoadInst/StoreInst), including checking the memory ordering and
441
/// volatility.
442
template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
443
if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
444
return nullptr;
445
InstructionUseExpr *E = createExpr(I);
446
E->setVolatile(I->isVolatile());
447
return E;
448
}
449
450
public:
451
ValueTable() = default;
452
453
/// Set basic blocks reachable from entry block.
454
void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
455
this->ReachableBBs = ReachableBBs;
456
}
457
458
/// Returns the value number for the specified value, assigning
459
/// it a new number if it did not have one before.
460
uint32_t lookupOrAdd(Value *V) {
461
auto VI = ValueNumbering.find(V);
462
if (VI != ValueNumbering.end())
463
return VI->second;
464
465
if (!isa<Instruction>(V)) {
466
ValueNumbering[V] = nextValueNumber;
467
return nextValueNumber++;
468
}
469
470
Instruction *I = cast<Instruction>(V);
471
if (!ReachableBBs.contains(I->getParent()))
472
return ~0U;
473
474
InstructionUseExpr *exp = nullptr;
475
switch (I->getOpcode()) {
476
case Instruction::Load:
477
exp = createMemoryExpr(cast<LoadInst>(I));
478
break;
479
case Instruction::Store:
480
exp = createMemoryExpr(cast<StoreInst>(I));
481
break;
482
case Instruction::Call:
483
case Instruction::Invoke:
484
case Instruction::FNeg:
485
case Instruction::Add:
486
case Instruction::FAdd:
487
case Instruction::Sub:
488
case Instruction::FSub:
489
case Instruction::Mul:
490
case Instruction::FMul:
491
case Instruction::UDiv:
492
case Instruction::SDiv:
493
case Instruction::FDiv:
494
case Instruction::URem:
495
case Instruction::SRem:
496
case Instruction::FRem:
497
case Instruction::Shl:
498
case Instruction::LShr:
499
case Instruction::AShr:
500
case Instruction::And:
501
case Instruction::Or:
502
case Instruction::Xor:
503
case Instruction::ICmp:
504
case Instruction::FCmp:
505
case Instruction::Trunc:
506
case Instruction::ZExt:
507
case Instruction::SExt:
508
case Instruction::FPToUI:
509
case Instruction::FPToSI:
510
case Instruction::UIToFP:
511
case Instruction::SIToFP:
512
case Instruction::FPTrunc:
513
case Instruction::FPExt:
514
case Instruction::PtrToInt:
515
case Instruction::IntToPtr:
516
case Instruction::BitCast:
517
case Instruction::AddrSpaceCast:
518
case Instruction::Select:
519
case Instruction::ExtractElement:
520
case Instruction::InsertElement:
521
case Instruction::ShuffleVector:
522
case Instruction::InsertValue:
523
case Instruction::GetElementPtr:
524
exp = createExpr(I);
525
break;
526
default:
527
break;
528
}
529
530
if (!exp) {
531
ValueNumbering[V] = nextValueNumber;
532
return nextValueNumber++;
533
}
534
535
uint32_t e = ExpressionNumbering[exp];
536
if (!e) {
537
hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
538
auto I = HashNumbering.find(H);
539
if (I != HashNumbering.end()) {
540
e = I->second;
541
} else {
542
e = nextValueNumber++;
543
HashNumbering[H] = e;
544
ExpressionNumbering[exp] = e;
545
}
546
}
547
ValueNumbering[V] = e;
548
return e;
549
}
550
551
/// Returns the value number of the specified value. Fails if the value has
552
/// not yet been numbered.
553
uint32_t lookup(Value *V) const {
554
auto VI = ValueNumbering.find(V);
555
assert(VI != ValueNumbering.end() && "Value not numbered?");
556
return VI->second;
557
}
558
559
/// Removes all value numberings and resets the value table.
560
void clear() {
561
ValueNumbering.clear();
562
ExpressionNumbering.clear();
563
HashNumbering.clear();
564
Recycler.clear(Allocator);
565
nextValueNumber = 1;
566
}
567
568
/// \c Inst uses or touches memory. Return an ID describing the memory state
569
/// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
570
/// the exact same memory operations happen after I1 and I2.
571
///
572
/// This is a very hard problem in general, so we use domain-specific
573
/// knowledge that we only ever check for equivalence between blocks sharing a
574
/// single immediate successor that is common, and when determining if I1 ==
575
/// I2 we will have already determined that next(I1) == next(I2). This
576
/// inductive property allows us to simply return the value number of the next
577
/// instruction that defines memory.
578
uint32_t getMemoryUseOrder(Instruction *Inst) {
579
auto *BB = Inst->getParent();
580
for (auto I = std::next(Inst->getIterator()), E = BB->end();
581
I != E && !I->isTerminator(); ++I) {
582
if (!isMemoryInst(&*I))
583
continue;
584
if (isa<LoadInst>(&*I))
585
continue;
586
CallInst *CI = dyn_cast<CallInst>(&*I);
587
if (CI && CI->onlyReadsMemory())
588
continue;
589
InvokeInst *II = dyn_cast<InvokeInst>(&*I);
590
if (II && II->onlyReadsMemory())
591
continue;
592
return lookupOrAdd(&*I);
593
}
594
return 0;
595
}
596
};
597
598
//===----------------------------------------------------------------------===//
599
600
class GVNSink {
601
public:
602
GVNSink() {}
603
604
bool run(Function &F) {
605
LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
606
<< "\n");
607
608
unsigned NumSunk = 0;
609
ReversePostOrderTraversal<Function*> RPOT(&F);
610
VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
611
// Populate reverse post-order to order basic blocks in deterministic
612
// order. Any arbitrary ordering will work in this case as long as they are
613
// deterministic. The node ordering of newly created basic blocks
614
// are irrelevant because RPOT(for computing sinkable candidates) is also
615
// obtained ahead of time and only their order are relevant for this pass.
616
unsigned NodeOrdering = 0;
617
RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
618
for (auto *BB : RPOT)
619
if (!pred_empty(BB))
620
RPOTOrder[BB] = ++NodeOrdering;
621
for (auto *N : RPOT)
622
NumSunk += sinkBB(N);
623
624
return NumSunk > 0;
625
}
626
627
private:
628
ValueTable VN;
629
DenseMap<const BasicBlock *, unsigned> RPOTOrder;
630
631
bool shouldAvoidSinkingInstruction(Instruction *I) {
632
// These instructions may change or break semantics if moved.
633
if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
634
I->getType()->isTokenTy())
635
return true;
636
return false;
637
}
638
639
/// The main heuristic function. Analyze the set of instructions pointed to by
640
/// LRI and return a candidate solution if these instructions can be sunk, or
641
/// std::nullopt otherwise.
642
std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
643
LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
644
ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
645
646
/// Create a ModelledPHI for each PHI in BB, adding to PHIs.
647
void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
648
SmallPtrSetImpl<Value *> &PHIContents) {
649
for (PHINode &PN : BB->phis()) {
650
auto MPHI = ModelledPHI(&PN, RPOTOrder);
651
PHIs.insert(MPHI);
652
for (auto *V : MPHI.getValues())
653
PHIContents.insert(V);
654
}
655
}
656
657
/// The main instruction sinking driver. Set up state and try and sink
658
/// instructions into BBEnd from its predecessors.
659
unsigned sinkBB(BasicBlock *BBEnd);
660
661
/// Perform the actual mechanics of sinking an instruction from Blocks into
662
/// BBEnd, which is their only successor.
663
void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
664
665
/// Remove PHIs that all have the same incoming value.
666
void foldPointlessPHINodes(BasicBlock *BB) {
667
auto I = BB->begin();
668
while (PHINode *PN = dyn_cast<PHINode>(I++)) {
669
if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
670
return V == PN->getIncomingValue(0);
671
}))
672
continue;
673
if (PN->getIncomingValue(0) != PN)
674
PN->replaceAllUsesWith(PN->getIncomingValue(0));
675
else
676
PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
677
PN->eraseFromParent();
678
}
679
}
680
};
681
682
std::optional<SinkingInstructionCandidate>
683
GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
684
unsigned &InstNum,
685
unsigned &MemoryInstNum,
686
ModelledPHISet &NeededPHIs,
687
SmallPtrSetImpl<Value *> &PHIContents) {
688
auto Insts = *LRI;
689
LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
690
: Insts) {
691
I->dump();
692
} dbgs() << " ]\n";);
693
694
DenseMap<uint32_t, unsigned> VNums;
695
for (auto *I : Insts) {
696
uint32_t N = VN.lookupOrAdd(I);
697
LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
698
if (N == ~0U)
699
return std::nullopt;
700
VNums[N]++;
701
}
702
unsigned VNumToSink = llvm::max_element(VNums, llvm::less_second())->first;
703
704
if (VNums[VNumToSink] == 1)
705
// Can't sink anything!
706
return std::nullopt;
707
708
// Now restrict the number of incoming blocks down to only those with
709
// VNumToSink.
710
auto &ActivePreds = LRI.getActiveBlocks();
711
unsigned InitialActivePredSize = ActivePreds.size();
712
SmallVector<Instruction *, 4> NewInsts;
713
for (auto *I : Insts) {
714
if (VN.lookup(I) != VNumToSink)
715
ActivePreds.remove(I->getParent());
716
else
717
NewInsts.push_back(I);
718
}
719
for (auto *I : NewInsts)
720
if (shouldAvoidSinkingInstruction(I))
721
return std::nullopt;
722
723
// If we've restricted the incoming blocks, restrict all needed PHIs also
724
// to that set.
725
bool RecomputePHIContents = false;
726
if (ActivePreds.size() != InitialActivePredSize) {
727
ModelledPHISet NewNeededPHIs;
728
for (auto P : NeededPHIs) {
729
P.restrictToBlocks(ActivePreds);
730
NewNeededPHIs.insert(P);
731
}
732
NeededPHIs = NewNeededPHIs;
733
LRI.restrictToBlocks(ActivePreds);
734
RecomputePHIContents = true;
735
}
736
737
// The sunk instruction's results.
738
ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
739
740
// Does sinking this instruction render previous PHIs redundant?
741
if (NeededPHIs.erase(NewPHI))
742
RecomputePHIContents = true;
743
744
if (RecomputePHIContents) {
745
// The needed PHIs have changed, so recompute the set of all needed
746
// values.
747
PHIContents.clear();
748
for (auto &PHI : NeededPHIs)
749
PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
750
}
751
752
// Is this instruction required by a later PHI that doesn't match this PHI?
753
// if so, we can't sink this instruction.
754
for (auto *V : NewPHI.getValues())
755
if (PHIContents.count(V))
756
// V exists in this PHI, but the whole PHI is different to NewPHI
757
// (else it would have been removed earlier). We cannot continue
758
// because this isn't representable.
759
return std::nullopt;
760
761
// Which operands need PHIs?
762
// FIXME: If any of these fail, we should partition up the candidates to
763
// try and continue making progress.
764
Instruction *I0 = NewInsts[0];
765
766
auto isNotSameOperation = [&I0](Instruction *I) {
767
return !I0->isSameOperationAs(I);
768
};
769
770
if (any_of(NewInsts, isNotSameOperation))
771
return std::nullopt;
772
773
for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
774
ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
775
if (PHI.areAllIncomingValuesSame())
776
continue;
777
if (!canReplaceOperandWithVariable(I0, OpNum))
778
// We can 't create a PHI from this instruction!
779
return std::nullopt;
780
if (NeededPHIs.count(PHI))
781
continue;
782
if (!PHI.areAllIncomingValuesSameType())
783
return std::nullopt;
784
// Don't create indirect calls! The called value is the final operand.
785
if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
786
PHI.areAnyIncomingValuesConstant())
787
return std::nullopt;
788
789
NeededPHIs.reserve(NeededPHIs.size());
790
NeededPHIs.insert(PHI);
791
PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
792
}
793
794
if (isMemoryInst(NewInsts[0]))
795
++MemoryInstNum;
796
797
SinkingInstructionCandidate Cand;
798
Cand.NumInstructions = ++InstNum;
799
Cand.NumMemoryInsts = MemoryInstNum;
800
Cand.NumBlocks = ActivePreds.size();
801
Cand.NumPHIs = NeededPHIs.size();
802
append_range(Cand.Blocks, ActivePreds);
803
804
return Cand;
805
}
806
807
unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
808
LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
809
BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
810
SmallVector<BasicBlock *, 4> Preds;
811
for (auto *B : predecessors(BBEnd)) {
812
// Bailout on basic blocks without predecessor(PR42346).
813
if (!RPOTOrder.count(B))
814
return 0;
815
auto *T = B->getTerminator();
816
if (isa<BranchInst>(T) || isa<SwitchInst>(T))
817
Preds.push_back(B);
818
else
819
return 0;
820
}
821
if (Preds.size() < 2)
822
return 0;
823
auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {
824
return RPOTOrder.lookup(BB1) < RPOTOrder.lookup(BB2);
825
};
826
// Sort in a deterministic order.
827
llvm::sort(Preds, ComesBefore);
828
829
unsigned NumOrigPreds = Preds.size();
830
// We can only sink instructions through unconditional branches.
831
llvm::erase_if(Preds, [](BasicBlock *BB) {
832
return BB->getTerminator()->getNumSuccessors() != 1;
833
});
834
835
LockstepReverseIterator LRI(Preds);
836
SmallVector<SinkingInstructionCandidate, 4> Candidates;
837
unsigned InstNum = 0, MemoryInstNum = 0;
838
ModelledPHISet NeededPHIs;
839
SmallPtrSet<Value *, 4> PHIContents;
840
analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
841
unsigned NumOrigPHIs = NeededPHIs.size();
842
843
while (LRI.isValid()) {
844
auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
845
NeededPHIs, PHIContents);
846
if (!Cand)
847
break;
848
Cand->calculateCost(NumOrigPHIs, Preds.size());
849
Candidates.emplace_back(*Cand);
850
--LRI;
851
}
852
853
llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
854
LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
855
: Candidates) dbgs()
856
<< " " << C << "\n";);
857
858
// Pick the top candidate, as long it is positive!
859
if (Candidates.empty() || Candidates.front().Cost <= 0)
860
return 0;
861
auto C = Candidates.front();
862
863
LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
864
BasicBlock *InsertBB = BBEnd;
865
if (C.Blocks.size() < NumOrigPreds) {
866
LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
867
BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
868
InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
869
if (!InsertBB) {
870
LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
871
// Edge couldn't be split.
872
return 0;
873
}
874
}
875
876
for (unsigned I = 0; I < C.NumInstructions; ++I)
877
sinkLastInstruction(C.Blocks, InsertBB);
878
879
return C.NumInstructions;
880
}
881
882
void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
883
BasicBlock *BBEnd) {
884
SmallVector<Instruction *, 4> Insts;
885
for (BasicBlock *BB : Blocks)
886
Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());
887
Instruction *I0 = Insts.front();
888
889
SmallVector<Value *, 4> NewOperands;
890
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
891
bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
892
return I->getOperand(O) != I0->getOperand(O);
893
});
894
if (!NeedPHI) {
895
NewOperands.push_back(I0->getOperand(O));
896
continue;
897
}
898
899
// Create a new PHI in the successor block and populate it.
900
auto *Op = I0->getOperand(O);
901
assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
902
auto *PN =
903
PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
904
PN->insertBefore(BBEnd->begin());
905
for (auto *I : Insts)
906
PN->addIncoming(I->getOperand(O), I->getParent());
907
NewOperands.push_back(PN);
908
}
909
910
// Arbitrarily use I0 as the new "common" instruction; remap its operands
911
// and move it to the start of the successor block.
912
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
913
I0->getOperandUse(O).set(NewOperands[O]);
914
I0->moveBefore(&*BBEnd->getFirstInsertionPt());
915
916
// Update metadata and IR flags.
917
for (auto *I : Insts)
918
if (I != I0) {
919
combineMetadataForCSE(I0, I, true);
920
I0->andIRFlags(I);
921
}
922
923
for (auto *I : Insts)
924
if (I != I0) {
925
I->replaceAllUsesWith(I0);
926
I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
927
}
928
foldPointlessPHINodes(BBEnd);
929
930
// Finally nuke all instructions apart from the common instruction.
931
for (auto *I : Insts)
932
if (I != I0)
933
I->eraseFromParent();
934
935
NumRemoved += Insts.size() - 1;
936
}
937
938
} // end anonymous namespace
939
940
PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
941
GVNSink G;
942
if (!G.run(F))
943
return PreservedAnalyses::all();
944
945
return PreservedAnalyses::none();
946
}
947
948