Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
35266 views
1
//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass merges conditional blocks of code and reduces the number of
10
// conditional branches in the hot paths based on profiles.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/DenseSet.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/ADT/StringSet.h"
19
#include "llvm/Analysis/BlockFrequencyInfo.h"
20
#include "llvm/Analysis/GlobalsModRef.h"
21
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22
#include "llvm/Analysis/ProfileSummaryInfo.h"
23
#include "llvm/Analysis/RegionInfo.h"
24
#include "llvm/Analysis/RegionIterator.h"
25
#include "llvm/Analysis/ValueTracking.h"
26
#include "llvm/IR/CFG.h"
27
#include "llvm/IR/Dominators.h"
28
#include "llvm/IR/IRBuilder.h"
29
#include "llvm/IR/IntrinsicInst.h"
30
#include "llvm/IR/MDBuilder.h"
31
#include "llvm/IR/Module.h"
32
#include "llvm/IR/PassManager.h"
33
#include "llvm/IR/ProfDataUtils.h"
34
#include "llvm/Support/BranchProbability.h"
35
#include "llvm/Support/CommandLine.h"
36
#include "llvm/Support/MemoryBuffer.h"
37
#include "llvm/Transforms/Utils.h"
38
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
39
#include "llvm/Transforms/Utils/Cloning.h"
40
#include "llvm/Transforms/Utils/ValueMapper.h"
41
42
#include <optional>
43
#include <set>
44
#include <sstream>
45
46
using namespace llvm;
47
48
#define DEBUG_TYPE "chr"
49
50
#define CHR_DEBUG(X) LLVM_DEBUG(X)
51
52
static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden,
53
cl::desc("Disable CHR for all functions"));
54
55
static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
56
cl::desc("Apply CHR for all functions"));
57
58
static cl::opt<double> CHRBiasThreshold(
59
"chr-bias-threshold", cl::init(0.99), cl::Hidden,
60
cl::desc("CHR considers a branch bias greater than this ratio as biased"));
61
62
static cl::opt<unsigned> CHRMergeThreshold(
63
"chr-merge-threshold", cl::init(2), cl::Hidden,
64
cl::desc("CHR merges a group of N branches/selects where N >= this value"));
65
66
static cl::opt<std::string> CHRModuleList(
67
"chr-module-list", cl::init(""), cl::Hidden,
68
cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
69
70
static cl::opt<std::string> CHRFunctionList(
71
"chr-function-list", cl::init(""), cl::Hidden,
72
cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
73
74
static cl::opt<unsigned> CHRDupThreshsold(
75
"chr-dup-threshold", cl::init(3), cl::Hidden,
76
cl::desc("Max number of duplications by CHR for a region"));
77
78
static StringSet<> CHRModules;
79
static StringSet<> CHRFunctions;
80
81
static void parseCHRFilterFiles() {
82
if (!CHRModuleList.empty()) {
83
auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
84
if (!FileOrErr) {
85
errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
86
std::exit(1);
87
}
88
StringRef Buf = FileOrErr->get()->getBuffer();
89
SmallVector<StringRef, 0> Lines;
90
Buf.split(Lines, '\n');
91
for (StringRef Line : Lines) {
92
Line = Line.trim();
93
if (!Line.empty())
94
CHRModules.insert(Line);
95
}
96
}
97
if (!CHRFunctionList.empty()) {
98
auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
99
if (!FileOrErr) {
100
errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
101
std::exit(1);
102
}
103
StringRef Buf = FileOrErr->get()->getBuffer();
104
SmallVector<StringRef, 0> Lines;
105
Buf.split(Lines, '\n');
106
for (StringRef Line : Lines) {
107
Line = Line.trim();
108
if (!Line.empty())
109
CHRFunctions.insert(Line);
110
}
111
}
112
}
113
114
namespace {
115
116
struct CHRStats {
117
CHRStats() = default;
118
void print(raw_ostream &OS) const {
119
OS << "CHRStats: NumBranches " << NumBranches
120
<< " NumBranchesDelta " << NumBranchesDelta
121
<< " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
122
}
123
// The original number of conditional branches / selects
124
uint64_t NumBranches = 0;
125
// The decrease of the number of conditional branches / selects in the hot
126
// paths due to CHR.
127
uint64_t NumBranchesDelta = 0;
128
// NumBranchesDelta weighted by the profile count at the scope entry.
129
uint64_t WeightedNumBranchesDelta = 0;
130
};
131
132
// RegInfo - some properties of a Region.
133
struct RegInfo {
134
RegInfo() = default;
135
RegInfo(Region *RegionIn) : R(RegionIn) {}
136
Region *R = nullptr;
137
bool HasBranch = false;
138
SmallVector<SelectInst *, 8> Selects;
139
};
140
141
typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
142
143
// CHRScope - a sequence of regions to CHR together. It corresponds to a
144
// sequence of conditional blocks. It can have subscopes which correspond to
145
// nested conditional blocks. Nested CHRScopes form a tree.
146
class CHRScope {
147
public:
148
CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
149
assert(RI.R && "Null RegionIn");
150
RegInfos.push_back(RI);
151
}
152
153
Region *getParentRegion() {
154
assert(RegInfos.size() > 0 && "Empty CHRScope");
155
Region *Parent = RegInfos[0].R->getParent();
156
assert(Parent && "Unexpected to call this on the top-level region");
157
return Parent;
158
}
159
160
BasicBlock *getEntryBlock() {
161
assert(RegInfos.size() > 0 && "Empty CHRScope");
162
return RegInfos.front().R->getEntry();
163
}
164
165
BasicBlock *getExitBlock() {
166
assert(RegInfos.size() > 0 && "Empty CHRScope");
167
return RegInfos.back().R->getExit();
168
}
169
170
bool appendable(CHRScope *Next) {
171
// The next scope is appendable only if this scope is directly connected to
172
// it (which implies it post-dominates this scope) and this scope dominates
173
// it (no edge to the next scope outside this scope).
174
BasicBlock *NextEntry = Next->getEntryBlock();
175
if (getExitBlock() != NextEntry)
176
// Not directly connected.
177
return false;
178
Region *LastRegion = RegInfos.back().R;
179
for (BasicBlock *Pred : predecessors(NextEntry))
180
if (!LastRegion->contains(Pred))
181
// There's an edge going into the entry of the next scope from outside
182
// of this scope.
183
return false;
184
return true;
185
}
186
187
void append(CHRScope *Next) {
188
assert(RegInfos.size() > 0 && "Empty CHRScope");
189
assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
190
assert(getParentRegion() == Next->getParentRegion() &&
191
"Must be siblings");
192
assert(getExitBlock() == Next->getEntryBlock() &&
193
"Must be adjacent");
194
RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
195
Subs.append(Next->Subs.begin(), Next->Subs.end());
196
}
197
198
void addSub(CHRScope *SubIn) {
199
#ifndef NDEBUG
200
bool IsChild = false;
201
for (RegInfo &RI : RegInfos)
202
if (RI.R == SubIn->getParentRegion()) {
203
IsChild = true;
204
break;
205
}
206
assert(IsChild && "Must be a child");
207
#endif
208
Subs.push_back(SubIn);
209
}
210
211
// Split this scope at the boundary region into two, which will belong to the
212
// tail and returns the tail.
213
CHRScope *split(Region *Boundary) {
214
assert(Boundary && "Boundary null");
215
assert(RegInfos.begin()->R != Boundary &&
216
"Can't be split at beginning");
217
auto BoundaryIt = llvm::find_if(
218
RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
219
if (BoundaryIt == RegInfos.end())
220
return nullptr;
221
ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
222
DenseSet<Region *> TailRegionSet;
223
for (const RegInfo &RI : TailRegInfos)
224
TailRegionSet.insert(RI.R);
225
226
auto TailIt =
227
std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
228
assert(Sub && "null Sub");
229
Region *Parent = Sub->getParentRegion();
230
if (TailRegionSet.count(Parent))
231
return false;
232
233
assert(llvm::any_of(
234
RegInfos,
235
[&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
236
"Must be in head");
237
return true;
238
});
239
ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
240
241
assert(HoistStopMap.empty() && "MapHoistStops must be empty");
242
auto *Scope = new CHRScope(TailRegInfos, TailSubs);
243
RegInfos.erase(BoundaryIt, RegInfos.end());
244
Subs.erase(TailIt, Subs.end());
245
return Scope;
246
}
247
248
bool contains(Instruction *I) const {
249
BasicBlock *Parent = I->getParent();
250
for (const RegInfo &RI : RegInfos)
251
if (RI.R->contains(Parent))
252
return true;
253
return false;
254
}
255
256
void print(raw_ostream &OS) const;
257
258
SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
259
SmallVector<CHRScope *, 8> Subs; // Subscopes.
260
261
// The instruction at which to insert the CHR conditional branch (and hoist
262
// the dependent condition values).
263
Instruction *BranchInsertPoint;
264
265
// True-biased and false-biased regions (conditional blocks),
266
// respectively. Used only for the outermost scope and includes regions in
267
// subscopes. The rest are unbiased.
268
DenseSet<Region *> TrueBiasedRegions;
269
DenseSet<Region *> FalseBiasedRegions;
270
// Among the biased regions, the regions that get CHRed.
271
SmallVector<RegInfo, 8> CHRRegions;
272
273
// True-biased and false-biased selects, respectively. Used only for the
274
// outermost scope and includes ones in subscopes.
275
DenseSet<SelectInst *> TrueBiasedSelects;
276
DenseSet<SelectInst *> FalseBiasedSelects;
277
278
// Map from one of the above regions to the instructions to stop
279
// hoisting instructions at through use-def chains.
280
HoistStopMapTy HoistStopMap;
281
282
private:
283
CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
284
: RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
285
Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
286
};
287
288
class CHR {
289
public:
290
CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
291
ProfileSummaryInfo &PSIin, RegionInfo &RIin,
292
OptimizationRemarkEmitter &OREin)
293
: F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
294
295
~CHR() {
296
for (CHRScope *Scope : Scopes) {
297
delete Scope;
298
}
299
}
300
301
bool run();
302
303
private:
304
// See the comments in CHR::run() for the high level flow of the algorithm and
305
// what the following functions do.
306
307
void findScopes(SmallVectorImpl<CHRScope *> &Output) {
308
Region *R = RI.getTopLevelRegion();
309
if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
310
Output.push_back(Scope);
311
}
312
}
313
CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
314
SmallVectorImpl<CHRScope *> &Scopes);
315
CHRScope *findScope(Region *R);
316
void checkScopeHoistable(CHRScope *Scope);
317
318
void splitScopes(SmallVectorImpl<CHRScope *> &Input,
319
SmallVectorImpl<CHRScope *> &Output);
320
SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
321
CHRScope *Outer,
322
DenseSet<Value *> *OuterConditionValues,
323
Instruction *OuterInsertPoint,
324
SmallVectorImpl<CHRScope *> &Output,
325
DenseSet<Instruction *> &Unhoistables);
326
327
void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
328
void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
329
330
void filterScopes(SmallVectorImpl<CHRScope *> &Input,
331
SmallVectorImpl<CHRScope *> &Output);
332
333
void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
334
SmallVectorImpl<CHRScope *> &Output);
335
void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
336
337
void sortScopes(SmallVectorImpl<CHRScope *> &Input,
338
SmallVectorImpl<CHRScope *> &Output);
339
340
void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
341
void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
342
void cloneScopeBlocks(CHRScope *Scope,
343
BasicBlock *PreEntryBlock,
344
BasicBlock *ExitBlock,
345
Region *LastRegion,
346
ValueToValueMapTy &VMap);
347
BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
348
BasicBlock *EntryBlock,
349
BasicBlock *NewEntryBlock,
350
ValueToValueMapTy &VMap);
351
void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
352
BranchInst *MergedBR, uint64_t ProfileCount);
353
void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB,
354
Value *&MergedCondition, BranchProbability &CHRBranchBias);
355
void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB,
356
Value *&MergedCondition, BranchProbability &CHRBranchBias);
357
void addToMergedCondition(bool IsTrueBiased, Value *Cond,
358
Instruction *BranchOrSelect, CHRScope *Scope,
359
IRBuilder<> &IRB, Value *&MergedCondition);
360
unsigned getRegionDuplicationCount(const Region *R) {
361
unsigned Count = 0;
362
// Find out how many times region R is cloned. Note that if the parent
363
// of R is cloned, R is also cloned, but R's clone count is not updated
364
// from the clone of the parent. We need to accumlate all the counts
365
// from the ancestors to get the clone count.
366
while (R) {
367
Count += DuplicationCount[R];
368
R = R->getParent();
369
}
370
return Count;
371
}
372
373
Function &F;
374
BlockFrequencyInfo &BFI;
375
DominatorTree &DT;
376
ProfileSummaryInfo &PSI;
377
RegionInfo &RI;
378
OptimizationRemarkEmitter &ORE;
379
CHRStats Stats;
380
381
// All the true-biased regions in the function
382
DenseSet<Region *> TrueBiasedRegionsGlobal;
383
// All the false-biased regions in the function
384
DenseSet<Region *> FalseBiasedRegionsGlobal;
385
// All the true-biased selects in the function
386
DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
387
// All the false-biased selects in the function
388
DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
389
// A map from biased regions to their branch bias
390
DenseMap<Region *, BranchProbability> BranchBiasMap;
391
// A map from biased selects to their branch bias
392
DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
393
// All the scopes.
394
DenseSet<CHRScope *> Scopes;
395
// This maps records how many times this region is cloned.
396
DenseMap<const Region *, unsigned> DuplicationCount;
397
};
398
399
} // end anonymous namespace
400
401
static inline
402
raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
403
const CHRStats &Stats) {
404
Stats.print(OS);
405
return OS;
406
}
407
408
static inline
409
raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
410
Scope.print(OS);
411
return OS;
412
}
413
414
static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) {
415
if (DisableCHR)
416
return false;
417
418
if (ForceCHR)
419
return true;
420
421
if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
422
if (CHRModules.count(F.getParent()->getName()))
423
return true;
424
return CHRFunctions.count(F.getName());
425
}
426
427
return PSI.isFunctionEntryHot(&F);
428
}
429
430
static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
431
CHRStats *Stats) {
432
StringRef FuncName = F.getName();
433
StringRef ModuleName = F.getParent()->getName();
434
(void)(FuncName); // Unused in release build.
435
(void)(ModuleName); // Unused in release build.
436
CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
437
<< FuncName);
438
if (Stats)
439
CHR_DEBUG(dbgs() << " " << *Stats);
440
CHR_DEBUG(dbgs() << "\n");
441
CHR_DEBUG(F.dump());
442
}
443
444
void CHRScope::print(raw_ostream &OS) const {
445
assert(RegInfos.size() > 0 && "Empty CHRScope");
446
OS << "CHRScope[";
447
OS << RegInfos.size() << ", Regions[";
448
for (const RegInfo &RI : RegInfos) {
449
OS << RI.R->getNameStr();
450
if (RI.HasBranch)
451
OS << " B";
452
if (RI.Selects.size() > 0)
453
OS << " S" << RI.Selects.size();
454
OS << ", ";
455
}
456
if (RegInfos[0].R->getParent()) {
457
OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
458
} else {
459
// top level region
460
OS << "]";
461
}
462
OS << ", Subs[";
463
for (CHRScope *Sub : Subs) {
464
OS << *Sub << ", ";
465
}
466
OS << "]]";
467
}
468
469
// Return true if the given instruction type can be hoisted by CHR.
470
static bool isHoistableInstructionType(Instruction *I) {
471
return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
472
isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
473
isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
474
isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
475
isa<InsertValueInst>(I);
476
}
477
478
// Return true if the given instruction can be hoisted by CHR.
479
static bool isHoistable(Instruction *I, DominatorTree &DT) {
480
if (!isHoistableInstructionType(I))
481
return false;
482
return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT);
483
}
484
485
// Recursively traverse the use-def chains of the given value and return a set
486
// of the unhoistable base values defined within the scope (excluding the
487
// first-region entry block) or the (hoistable or unhoistable) base values that
488
// are defined outside (including the first-region entry block) of the
489
// scope. The returned set doesn't include constants.
490
static const std::set<Value *> &
491
getBaseValues(Value *V, DominatorTree &DT,
492
DenseMap<Value *, std::set<Value *>> &Visited) {
493
auto It = Visited.find(V);
494
if (It != Visited.end()) {
495
return It->second;
496
}
497
std::set<Value *> Result;
498
if (auto *I = dyn_cast<Instruction>(V)) {
499
// We don't stop at a block that's not in the Scope because we would miss
500
// some instructions that are based on the same base values if we stop
501
// there.
502
if (!isHoistable(I, DT)) {
503
Result.insert(I);
504
return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
505
}
506
// I is hoistable above the Scope.
507
for (Value *Op : I->operands()) {
508
const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
509
Result.insert(OpResult.begin(), OpResult.end());
510
}
511
return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
512
}
513
if (isa<Argument>(V)) {
514
Result.insert(V);
515
}
516
// We don't include others like constants because those won't lead to any
517
// chance of folding of conditions (eg two bit checks merged into one check)
518
// after CHR.
519
return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
520
}
521
522
// Return true if V is already hoisted or can be hoisted (along with its
523
// operands) above the insert point. When it returns true and HoistStops is
524
// non-null, the instructions to stop hoisting at through the use-def chains are
525
// inserted into HoistStops.
526
static bool
527
checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
528
DenseSet<Instruction *> &Unhoistables,
529
DenseSet<Instruction *> *HoistStops,
530
DenseMap<Instruction *, bool> &Visited) {
531
assert(InsertPoint && "Null InsertPoint");
532
if (auto *I = dyn_cast<Instruction>(V)) {
533
auto It = Visited.find(I);
534
if (It != Visited.end()) {
535
return It->second;
536
}
537
assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
538
assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
539
if (Unhoistables.count(I)) {
540
// Don't hoist if they are not to be hoisted.
541
Visited[I] = false;
542
return false;
543
}
544
if (DT.dominates(I, InsertPoint)) {
545
// We are already above the insert point. Stop here.
546
if (HoistStops)
547
HoistStops->insert(I);
548
Visited[I] = true;
549
return true;
550
}
551
// We aren't not above the insert point, check if we can hoist it above the
552
// insert point.
553
if (isHoistable(I, DT)) {
554
// Check operands first.
555
DenseSet<Instruction *> OpsHoistStops;
556
bool AllOpsHoisted = true;
557
for (Value *Op : I->operands()) {
558
if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
559
Visited)) {
560
AllOpsHoisted = false;
561
break;
562
}
563
}
564
if (AllOpsHoisted) {
565
CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
566
if (HoistStops)
567
HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
568
Visited[I] = true;
569
return true;
570
}
571
}
572
Visited[I] = false;
573
return false;
574
}
575
// Non-instructions are considered hoistable.
576
return true;
577
}
578
579
// Constructs the true and false branch probabilities if the the instruction has
580
// valid branch weights. Returns true when this was successful, false otherwise.
581
static bool extractBranchProbabilities(Instruction *I,
582
BranchProbability &TrueProb,
583
BranchProbability &FalseProb) {
584
uint64_t TrueWeight;
585
uint64_t FalseWeight;
586
if (!extractBranchWeights(*I, TrueWeight, FalseWeight))
587
return false;
588
uint64_t SumWeight = TrueWeight + FalseWeight;
589
590
assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight &&
591
"Overflow calculating branch probabilities.");
592
593
// Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
594
if (SumWeight == 0)
595
return false;
596
597
TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight);
598
FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight);
599
return true;
600
}
601
602
static BranchProbability getCHRBiasThreshold() {
603
return BranchProbability::getBranchProbability(
604
static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
605
}
606
607
// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
608
// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
609
// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
610
// false.
611
template <typename K, typename S, typename M>
612
static bool checkBias(K *Key, BranchProbability TrueProb,
613
BranchProbability FalseProb, S &TrueSet, S &FalseSet,
614
M &BiasMap) {
615
BranchProbability Threshold = getCHRBiasThreshold();
616
if (TrueProb >= Threshold) {
617
TrueSet.insert(Key);
618
BiasMap[Key] = TrueProb;
619
return true;
620
} else if (FalseProb >= Threshold) {
621
FalseSet.insert(Key);
622
BiasMap[Key] = FalseProb;
623
return true;
624
}
625
return false;
626
}
627
628
// Returns true and insert a region into the right biased set and the map if the
629
// branch of the region is biased.
630
static bool checkBiasedBranch(BranchInst *BI, Region *R,
631
DenseSet<Region *> &TrueBiasedRegionsGlobal,
632
DenseSet<Region *> &FalseBiasedRegionsGlobal,
633
DenseMap<Region *, BranchProbability> &BranchBiasMap) {
634
if (!BI->isConditional())
635
return false;
636
BranchProbability ThenProb, ElseProb;
637
if (!extractBranchProbabilities(BI, ThenProb, ElseProb))
638
return false;
639
BasicBlock *IfThen = BI->getSuccessor(0);
640
BasicBlock *IfElse = BI->getSuccessor(1);
641
assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
642
IfThen != IfElse &&
643
"Invariant from findScopes");
644
if (IfThen == R->getExit()) {
645
// Swap them so that IfThen/ThenProb means going into the conditional code
646
// and IfElse/ElseProb means skipping it.
647
std::swap(IfThen, IfElse);
648
std::swap(ThenProb, ElseProb);
649
}
650
CHR_DEBUG(dbgs() << "BI " << *BI << " ");
651
CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
652
CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
653
return checkBias(R, ThenProb, ElseProb,
654
TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
655
BranchBiasMap);
656
}
657
658
// Returns true and insert a select into the right biased set and the map if the
659
// select is biased.
660
static bool checkBiasedSelect(
661
SelectInst *SI, Region *R,
662
DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
663
DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
664
DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
665
BranchProbability TrueProb, FalseProb;
666
if (!extractBranchProbabilities(SI, TrueProb, FalseProb))
667
return false;
668
CHR_DEBUG(dbgs() << "SI " << *SI << " ");
669
CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
670
CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
671
return checkBias(SI, TrueProb, FalseProb,
672
TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
673
SelectBiasMap);
674
}
675
676
// Returns the instruction at which to hoist the dependent condition values and
677
// insert the CHR branch for a region. This is the terminator branch in the
678
// entry block or the first select in the entry block, if any.
679
static Instruction* getBranchInsertPoint(RegInfo &RI) {
680
Region *R = RI.R;
681
BasicBlock *EntryBB = R->getEntry();
682
// The hoist point is by default the terminator of the entry block, which is
683
// the same as the branch instruction if RI.HasBranch is true.
684
Instruction *HoistPoint = EntryBB->getTerminator();
685
for (SelectInst *SI : RI.Selects) {
686
if (SI->getParent() == EntryBB) {
687
// Pick the first select in Selects in the entry block. Note Selects is
688
// sorted in the instruction order within a block (asserted below).
689
HoistPoint = SI;
690
break;
691
}
692
}
693
assert(HoistPoint && "Null HoistPoint");
694
#ifndef NDEBUG
695
// Check that HoistPoint is the first one in Selects in the entry block,
696
// if any.
697
DenseSet<Instruction *> EntryBlockSelectSet;
698
for (SelectInst *SI : RI.Selects) {
699
if (SI->getParent() == EntryBB) {
700
EntryBlockSelectSet.insert(SI);
701
}
702
}
703
for (Instruction &I : *EntryBB) {
704
if (EntryBlockSelectSet.contains(&I)) {
705
assert(&I == HoistPoint &&
706
"HoistPoint must be the first one in Selects");
707
break;
708
}
709
}
710
#endif
711
return HoistPoint;
712
}
713
714
// Find a CHR scope in the given region.
715
CHRScope * CHR::findScope(Region *R) {
716
CHRScope *Result = nullptr;
717
BasicBlock *Entry = R->getEntry();
718
BasicBlock *Exit = R->getExit(); // null if top level.
719
assert(Entry && "Entry must not be null");
720
assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
721
"Only top level region has a null exit");
722
if (Entry)
723
CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
724
else
725
CHR_DEBUG(dbgs() << "Entry null\n");
726
if (Exit)
727
CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
728
else
729
CHR_DEBUG(dbgs() << "Exit null\n");
730
// Exclude cases where Entry is part of a subregion (hence it doesn't belong
731
// to this region).
732
bool EntryInSubregion = RI.getRegionFor(Entry) != R;
733
if (EntryInSubregion)
734
return nullptr;
735
// Exclude loops
736
for (BasicBlock *Pred : predecessors(Entry))
737
if (R->contains(Pred))
738
return nullptr;
739
// If any of the basic blocks have address taken, we must skip this region
740
// because we cannot clone basic blocks that have address taken.
741
for (BasicBlock *BB : R->blocks()) {
742
if (BB->hasAddressTaken())
743
return nullptr;
744
// If we encounter llvm.coro.id, skip this region because if the basic block
745
// is cloned, we end up inserting a token type PHI node to the block with
746
// llvm.coro.begin.
747
// FIXME: This could lead to less optimal codegen, because the region is
748
// excluded, it can prevent CHR from merging adjacent regions into bigger
749
// scope and hoisting more branches.
750
for (Instruction &I : *BB)
751
if (auto *II = dyn_cast<IntrinsicInst>(&I))
752
if (II->getIntrinsicID() == Intrinsic::coro_id)
753
return nullptr;
754
}
755
756
if (Exit) {
757
// Try to find an if-then block (check if R is an if-then).
758
// if (cond) {
759
// ...
760
// }
761
auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
762
if (BI)
763
CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
764
else
765
CHR_DEBUG(dbgs() << "BI null\n");
766
if (BI && BI->isConditional()) {
767
BasicBlock *S0 = BI->getSuccessor(0);
768
BasicBlock *S1 = BI->getSuccessor(1);
769
CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
770
CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
771
if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
772
RegInfo RI(R);
773
RI.HasBranch = checkBiasedBranch(
774
BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
775
BranchBiasMap);
776
Result = new CHRScope(RI);
777
Scopes.insert(Result);
778
CHR_DEBUG(dbgs() << "Found a region with a branch\n");
779
++Stats.NumBranches;
780
if (!RI.HasBranch) {
781
ORE.emit([&]() {
782
return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
783
<< "Branch not biased";
784
});
785
}
786
}
787
}
788
}
789
{
790
// Try to look for selects in the direct child blocks (as opposed to in
791
// subregions) of R.
792
// ...
793
// if (..) { // Some subregion
794
// ...
795
// }
796
// if (..) { // Some subregion
797
// ...
798
// }
799
// ...
800
// a = cond ? b : c;
801
// ...
802
SmallVector<SelectInst *, 8> Selects;
803
for (RegionNode *E : R->elements()) {
804
if (E->isSubRegion())
805
continue;
806
// This returns the basic block of E if E is a direct child of R (not a
807
// subregion.)
808
BasicBlock *BB = E->getEntry();
809
// Need to push in the order to make it easier to find the first Select
810
// later.
811
for (Instruction &I : *BB) {
812
if (auto *SI = dyn_cast<SelectInst>(&I)) {
813
Selects.push_back(SI);
814
++Stats.NumBranches;
815
}
816
}
817
}
818
if (Selects.size() > 0) {
819
auto AddSelects = [&](RegInfo &RI) {
820
for (auto *SI : Selects)
821
if (checkBiasedSelect(SI, RI.R,
822
TrueBiasedSelectsGlobal,
823
FalseBiasedSelectsGlobal,
824
SelectBiasMap))
825
RI.Selects.push_back(SI);
826
else
827
ORE.emit([&]() {
828
return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
829
<< "Select not biased";
830
});
831
};
832
if (!Result) {
833
CHR_DEBUG(dbgs() << "Found a select-only region\n");
834
RegInfo RI(R);
835
AddSelects(RI);
836
Result = new CHRScope(RI);
837
Scopes.insert(Result);
838
} else {
839
CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
840
AddSelects(Result->RegInfos[0]);
841
}
842
}
843
}
844
845
if (Result) {
846
checkScopeHoistable(Result);
847
}
848
return Result;
849
}
850
851
// Check that any of the branch and the selects in the region could be
852
// hoisted above the the CHR branch insert point (the most dominating of
853
// them, either the branch (at the end of the first block) or the first
854
// select in the first block). If the branch can't be hoisted, drop the
855
// selects in the first blocks.
856
//
857
// For example, for the following scope/region with selects, we want to insert
858
// the merged branch right before the first select in the first/entry block by
859
// hoisting c1, c2, c3, and c4.
860
//
861
// // Branch insert point here.
862
// a = c1 ? b : c; // Select 1
863
// d = c2 ? e : f; // Select 2
864
// if (c3) { // Branch
865
// ...
866
// c4 = foo() // A call.
867
// g = c4 ? h : i; // Select 3
868
// }
869
//
870
// But suppose we can't hoist c4 because it's dependent on the preceding
871
// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
872
// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
873
void CHR::checkScopeHoistable(CHRScope *Scope) {
874
RegInfo &RI = Scope->RegInfos[0];
875
Region *R = RI.R;
876
BasicBlock *EntryBB = R->getEntry();
877
auto *Branch = RI.HasBranch ?
878
cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
879
SmallVector<SelectInst *, 8> &Selects = RI.Selects;
880
if (RI.HasBranch || !Selects.empty()) {
881
Instruction *InsertPoint = getBranchInsertPoint(RI);
882
CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
883
// Avoid a data dependence from a select or a branch to a(nother)
884
// select. Note no instruction can't data-depend on a branch (a branch
885
// instruction doesn't produce a value).
886
DenseSet<Instruction *> Unhoistables;
887
// Initialize Unhoistables with the selects.
888
for (SelectInst *SI : Selects) {
889
Unhoistables.insert(SI);
890
}
891
// Remove Selects that can't be hoisted.
892
for (auto it = Selects.begin(); it != Selects.end(); ) {
893
SelectInst *SI = *it;
894
if (SI == InsertPoint) {
895
++it;
896
continue;
897
}
898
DenseMap<Instruction *, bool> Visited;
899
bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
900
DT, Unhoistables, nullptr, Visited);
901
if (!IsHoistable) {
902
CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
903
ORE.emit([&]() {
904
return OptimizationRemarkMissed(DEBUG_TYPE,
905
"DropUnhoistableSelect", SI)
906
<< "Dropped unhoistable select";
907
});
908
it = Selects.erase(it);
909
// Since we are dropping the select here, we also drop it from
910
// Unhoistables.
911
Unhoistables.erase(SI);
912
} else
913
++it;
914
}
915
// Update InsertPoint after potentially removing selects.
916
InsertPoint = getBranchInsertPoint(RI);
917
CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
918
if (RI.HasBranch && InsertPoint != Branch) {
919
DenseMap<Instruction *, bool> Visited;
920
bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
921
DT, Unhoistables, nullptr, Visited);
922
if (!IsHoistable) {
923
// If the branch isn't hoistable, drop the selects in the entry
924
// block, preferring the branch, which makes the branch the hoist
925
// point.
926
assert(InsertPoint != Branch && "Branch must not be the hoist point");
927
CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
928
CHR_DEBUG(
929
for (SelectInst *SI : Selects) {
930
dbgs() << "SI " << *SI << "\n";
931
});
932
for (SelectInst *SI : Selects) {
933
ORE.emit([&]() {
934
return OptimizationRemarkMissed(DEBUG_TYPE,
935
"DropSelectUnhoistableBranch", SI)
936
<< "Dropped select due to unhoistable branch";
937
});
938
}
939
llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
940
return SI->getParent() == EntryBB;
941
});
942
Unhoistables.clear();
943
InsertPoint = Branch;
944
}
945
}
946
CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
947
#ifndef NDEBUG
948
if (RI.HasBranch) {
949
assert(!DT.dominates(Branch, InsertPoint) &&
950
"Branch can't be already above the hoist point");
951
DenseMap<Instruction *, bool> Visited;
952
assert(checkHoistValue(Branch->getCondition(), InsertPoint,
953
DT, Unhoistables, nullptr, Visited) &&
954
"checkHoistValue for branch");
955
}
956
for (auto *SI : Selects) {
957
assert(!DT.dominates(SI, InsertPoint) &&
958
"SI can't be already above the hoist point");
959
DenseMap<Instruction *, bool> Visited;
960
assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
961
Unhoistables, nullptr, Visited) &&
962
"checkHoistValue for selects");
963
}
964
CHR_DEBUG(dbgs() << "Result\n");
965
if (RI.HasBranch) {
966
CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
967
}
968
for (auto *SI : Selects) {
969
CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
970
}
971
#endif
972
}
973
}
974
975
// Traverse the region tree, find all nested scopes and merge them if possible.
976
CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
977
SmallVectorImpl<CHRScope *> &Scopes) {
978
CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
979
CHRScope *Result = findScope(R);
980
// Visit subscopes.
981
CHRScope *ConsecutiveSubscope = nullptr;
982
SmallVector<CHRScope *, 8> Subscopes;
983
for (auto It = R->begin(); It != R->end(); ++It) {
984
const std::unique_ptr<Region> &SubR = *It;
985
auto NextIt = std::next(It);
986
Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
987
CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
988
<< "\n");
989
CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
990
if (SubCHRScope) {
991
CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
992
} else {
993
CHR_DEBUG(dbgs() << "Subregion Scope null\n");
994
}
995
if (SubCHRScope) {
996
if (!ConsecutiveSubscope)
997
ConsecutiveSubscope = SubCHRScope;
998
else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
999
Subscopes.push_back(ConsecutiveSubscope);
1000
ConsecutiveSubscope = SubCHRScope;
1001
} else
1002
ConsecutiveSubscope->append(SubCHRScope);
1003
} else {
1004
if (ConsecutiveSubscope) {
1005
Subscopes.push_back(ConsecutiveSubscope);
1006
}
1007
ConsecutiveSubscope = nullptr;
1008
}
1009
}
1010
if (ConsecutiveSubscope) {
1011
Subscopes.push_back(ConsecutiveSubscope);
1012
}
1013
for (CHRScope *Sub : Subscopes) {
1014
if (Result) {
1015
// Combine it with the parent.
1016
Result->addSub(Sub);
1017
} else {
1018
// Push Subscopes as they won't be combined with the parent.
1019
Scopes.push_back(Sub);
1020
}
1021
}
1022
return Result;
1023
}
1024
1025
static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1026
DenseSet<Value *> ConditionValues;
1027
if (RI.HasBranch) {
1028
auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1029
ConditionValues.insert(BI->getCondition());
1030
}
1031
for (SelectInst *SI : RI.Selects) {
1032
ConditionValues.insert(SI->getCondition());
1033
}
1034
return ConditionValues;
1035
}
1036
1037
1038
// Determine whether to split a scope depending on the sets of the branch
1039
// condition values of the previous region and the current region. We split
1040
// (return true) it if 1) the condition values of the inner/lower scope can't be
1041
// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1042
// values have an empty intersection (because the combined branch conditions
1043
// won't probably lead to a simpler combined condition).
1044
static bool shouldSplit(Instruction *InsertPoint,
1045
DenseSet<Value *> &PrevConditionValues,
1046
DenseSet<Value *> &ConditionValues,
1047
DominatorTree &DT,
1048
DenseSet<Instruction *> &Unhoistables) {
1049
assert(InsertPoint && "Null InsertPoint");
1050
CHR_DEBUG(
1051
dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1052
for (Value *V : PrevConditionValues) {
1053
dbgs() << *V << ", ";
1054
}
1055
dbgs() << " ConditionValues ";
1056
for (Value *V : ConditionValues) {
1057
dbgs() << *V << ", ";
1058
}
1059
dbgs() << "\n");
1060
// If any of Bases isn't hoistable to the hoist point, split.
1061
for (Value *V : ConditionValues) {
1062
DenseMap<Instruction *, bool> Visited;
1063
if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1064
CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1065
return true; // Not hoistable, split.
1066
}
1067
}
1068
// If PrevConditionValues or ConditionValues is empty, don't split to avoid
1069
// unnecessary splits at scopes with no branch/selects. If
1070
// PrevConditionValues and ConditionValues don't intersect at all, split.
1071
if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1072
// Use std::set as DenseSet doesn't work with set_intersection.
1073
std::set<Value *> PrevBases, Bases;
1074
DenseMap<Value *, std::set<Value *>> Visited;
1075
for (Value *V : PrevConditionValues) {
1076
const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1077
PrevBases.insert(BaseValues.begin(), BaseValues.end());
1078
}
1079
for (Value *V : ConditionValues) {
1080
const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1081
Bases.insert(BaseValues.begin(), BaseValues.end());
1082
}
1083
CHR_DEBUG(
1084
dbgs() << "PrevBases ";
1085
for (Value *V : PrevBases) {
1086
dbgs() << *V << ", ";
1087
}
1088
dbgs() << " Bases ";
1089
for (Value *V : Bases) {
1090
dbgs() << *V << ", ";
1091
}
1092
dbgs() << "\n");
1093
std::vector<Value *> Intersection;
1094
std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1095
Bases.end(), std::back_inserter(Intersection));
1096
if (Intersection.empty()) {
1097
// Empty intersection, split.
1098
CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1099
return true;
1100
}
1101
}
1102
CHR_DEBUG(dbgs() << "No split\n");
1103
return false; // Don't split.
1104
}
1105
1106
static void getSelectsInScope(CHRScope *Scope,
1107
DenseSet<Instruction *> &Output) {
1108
for (RegInfo &RI : Scope->RegInfos)
1109
for (SelectInst *SI : RI.Selects)
1110
Output.insert(SI);
1111
for (CHRScope *Sub : Scope->Subs)
1112
getSelectsInScope(Sub, Output);
1113
}
1114
1115
void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1116
SmallVectorImpl<CHRScope *> &Output) {
1117
for (CHRScope *Scope : Input) {
1118
assert(!Scope->BranchInsertPoint &&
1119
"BranchInsertPoint must not be set");
1120
DenseSet<Instruction *> Unhoistables;
1121
getSelectsInScope(Scope, Unhoistables);
1122
splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1123
}
1124
#ifndef NDEBUG
1125
for (CHRScope *Scope : Output) {
1126
assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1127
}
1128
#endif
1129
}
1130
1131
SmallVector<CHRScope *, 8> CHR::splitScope(
1132
CHRScope *Scope,
1133
CHRScope *Outer,
1134
DenseSet<Value *> *OuterConditionValues,
1135
Instruction *OuterInsertPoint,
1136
SmallVectorImpl<CHRScope *> &Output,
1137
DenseSet<Instruction *> &Unhoistables) {
1138
if (Outer) {
1139
assert(OuterConditionValues && "Null OuterConditionValues");
1140
assert(OuterInsertPoint && "Null OuterInsertPoint");
1141
}
1142
bool PrevSplitFromOuter = true;
1143
DenseSet<Value *> PrevConditionValues;
1144
Instruction *PrevInsertPoint = nullptr;
1145
SmallVector<CHRScope *, 8> Splits;
1146
SmallVector<bool, 8> SplitsSplitFromOuter;
1147
SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1148
SmallVector<Instruction *, 8> SplitsInsertPoints;
1149
SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1150
for (RegInfo &RI : RegInfos) {
1151
Instruction *InsertPoint = getBranchInsertPoint(RI);
1152
DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1153
CHR_DEBUG(
1154
dbgs() << "ConditionValues ";
1155
for (Value *V : ConditionValues) {
1156
dbgs() << *V << ", ";
1157
}
1158
dbgs() << "\n");
1159
if (RI.R == RegInfos[0].R) {
1160
// First iteration. Check to see if we should split from the outer.
1161
if (Outer) {
1162
CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1163
CHR_DEBUG(dbgs() << "Should split from outer at "
1164
<< RI.R->getNameStr() << "\n");
1165
if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1166
ConditionValues, DT, Unhoistables)) {
1167
PrevConditionValues = ConditionValues;
1168
PrevInsertPoint = InsertPoint;
1169
ORE.emit([&]() {
1170
return OptimizationRemarkMissed(DEBUG_TYPE,
1171
"SplitScopeFromOuter",
1172
RI.R->getEntry()->getTerminator())
1173
<< "Split scope from outer due to unhoistable branch/select "
1174
<< "and/or lack of common condition values";
1175
});
1176
} else {
1177
// Not splitting from the outer. Use the outer bases and insert
1178
// point. Union the bases.
1179
PrevSplitFromOuter = false;
1180
PrevConditionValues = *OuterConditionValues;
1181
PrevConditionValues.insert(ConditionValues.begin(),
1182
ConditionValues.end());
1183
PrevInsertPoint = OuterInsertPoint;
1184
}
1185
} else {
1186
CHR_DEBUG(dbgs() << "Outer null\n");
1187
PrevConditionValues = ConditionValues;
1188
PrevInsertPoint = InsertPoint;
1189
}
1190
} else {
1191
CHR_DEBUG(dbgs() << "Should split from prev at "
1192
<< RI.R->getNameStr() << "\n");
1193
if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1194
DT, Unhoistables)) {
1195
CHRScope *Tail = Scope->split(RI.R);
1196
Scopes.insert(Tail);
1197
Splits.push_back(Scope);
1198
SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1199
SplitsConditionValues.push_back(PrevConditionValues);
1200
SplitsInsertPoints.push_back(PrevInsertPoint);
1201
Scope = Tail;
1202
PrevConditionValues = ConditionValues;
1203
PrevInsertPoint = InsertPoint;
1204
PrevSplitFromOuter = true;
1205
ORE.emit([&]() {
1206
return OptimizationRemarkMissed(DEBUG_TYPE,
1207
"SplitScopeFromPrev",
1208
RI.R->getEntry()->getTerminator())
1209
<< "Split scope from previous due to unhoistable branch/select "
1210
<< "and/or lack of common condition values";
1211
});
1212
} else {
1213
// Not splitting. Union the bases. Keep the hoist point.
1214
PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1215
}
1216
}
1217
}
1218
Splits.push_back(Scope);
1219
SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1220
SplitsConditionValues.push_back(PrevConditionValues);
1221
assert(PrevInsertPoint && "Null PrevInsertPoint");
1222
SplitsInsertPoints.push_back(PrevInsertPoint);
1223
assert(Splits.size() == SplitsConditionValues.size() &&
1224
Splits.size() == SplitsSplitFromOuter.size() &&
1225
Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1226
for (size_t I = 0; I < Splits.size(); ++I) {
1227
CHRScope *Split = Splits[I];
1228
DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1229
Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1230
SmallVector<CHRScope *, 8> NewSubs;
1231
DenseSet<Instruction *> SplitUnhoistables;
1232
getSelectsInScope(Split, SplitUnhoistables);
1233
for (CHRScope *Sub : Split->Subs) {
1234
SmallVector<CHRScope *, 8> SubSplits = splitScope(
1235
Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1236
SplitUnhoistables);
1237
llvm::append_range(NewSubs, SubSplits);
1238
}
1239
Split->Subs = NewSubs;
1240
}
1241
SmallVector<CHRScope *, 8> Result;
1242
for (size_t I = 0; I < Splits.size(); ++I) {
1243
CHRScope *Split = Splits[I];
1244
if (SplitsSplitFromOuter[I]) {
1245
// Split from the outer.
1246
Output.push_back(Split);
1247
Split->BranchInsertPoint = SplitsInsertPoints[I];
1248
CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1249
<< "\n");
1250
} else {
1251
// Connected to the outer.
1252
Result.push_back(Split);
1253
}
1254
}
1255
if (!Outer)
1256
assert(Result.empty() &&
1257
"If no outer (top-level), must return no nested ones");
1258
return Result;
1259
}
1260
1261
void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1262
for (CHRScope *Scope : Scopes) {
1263
assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1264
classifyBiasedScopes(Scope, Scope);
1265
CHR_DEBUG(
1266
dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1267
dbgs() << "TrueBiasedRegions ";
1268
for (Region *R : Scope->TrueBiasedRegions) {
1269
dbgs() << R->getNameStr() << ", ";
1270
}
1271
dbgs() << "\n";
1272
dbgs() << "FalseBiasedRegions ";
1273
for (Region *R : Scope->FalseBiasedRegions) {
1274
dbgs() << R->getNameStr() << ", ";
1275
}
1276
dbgs() << "\n";
1277
dbgs() << "TrueBiasedSelects ";
1278
for (SelectInst *SI : Scope->TrueBiasedSelects) {
1279
dbgs() << *SI << ", ";
1280
}
1281
dbgs() << "\n";
1282
dbgs() << "FalseBiasedSelects ";
1283
for (SelectInst *SI : Scope->FalseBiasedSelects) {
1284
dbgs() << *SI << ", ";
1285
}
1286
dbgs() << "\n";);
1287
}
1288
}
1289
1290
void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1291
for (RegInfo &RI : Scope->RegInfos) {
1292
if (RI.HasBranch) {
1293
Region *R = RI.R;
1294
if (TrueBiasedRegionsGlobal.contains(R))
1295
OutermostScope->TrueBiasedRegions.insert(R);
1296
else if (FalseBiasedRegionsGlobal.contains(R))
1297
OutermostScope->FalseBiasedRegions.insert(R);
1298
else
1299
llvm_unreachable("Must be biased");
1300
}
1301
for (SelectInst *SI : RI.Selects) {
1302
if (TrueBiasedSelectsGlobal.contains(SI))
1303
OutermostScope->TrueBiasedSelects.insert(SI);
1304
else if (FalseBiasedSelectsGlobal.contains(SI))
1305
OutermostScope->FalseBiasedSelects.insert(SI);
1306
else
1307
llvm_unreachable("Must be biased");
1308
}
1309
}
1310
for (CHRScope *Sub : Scope->Subs) {
1311
classifyBiasedScopes(Sub, OutermostScope);
1312
}
1313
}
1314
1315
static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1316
unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1317
Scope->FalseBiasedRegions.size() +
1318
Scope->TrueBiasedSelects.size() +
1319
Scope->FalseBiasedSelects.size();
1320
return NumBiased >= CHRMergeThreshold;
1321
}
1322
1323
void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1324
SmallVectorImpl<CHRScope *> &Output) {
1325
for (CHRScope *Scope : Input) {
1326
// Filter out the ones with only one region and no subs.
1327
if (!hasAtLeastTwoBiasedBranches(Scope)) {
1328
CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1329
<< Scope->TrueBiasedRegions.size()
1330
<< " falsy-regions " << Scope->FalseBiasedRegions.size()
1331
<< " true-selects " << Scope->TrueBiasedSelects.size()
1332
<< " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1333
ORE.emit([&]() {
1334
return OptimizationRemarkMissed(
1335
DEBUG_TYPE,
1336
"DropScopeWithOneBranchOrSelect",
1337
Scope->RegInfos[0].R->getEntry()->getTerminator())
1338
<< "Drop scope with < "
1339
<< ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1340
<< " biased branch(es) or select(s)";
1341
});
1342
continue;
1343
}
1344
Output.push_back(Scope);
1345
}
1346
}
1347
1348
void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1349
SmallVectorImpl<CHRScope *> &Output) {
1350
for (CHRScope *Scope : Input) {
1351
assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1352
"Empty");
1353
setCHRRegions(Scope, Scope);
1354
Output.push_back(Scope);
1355
CHR_DEBUG(
1356
dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1357
for (auto pair : Scope->HoistStopMap) {
1358
Region *R = pair.first;
1359
dbgs() << "Region " << R->getNameStr() << "\n";
1360
for (Instruction *I : pair.second) {
1361
dbgs() << "HoistStop " << *I << "\n";
1362
}
1363
}
1364
dbgs() << "CHRRegions" << "\n";
1365
for (RegInfo &RI : Scope->CHRRegions) {
1366
dbgs() << RI.R->getNameStr() << "\n";
1367
});
1368
}
1369
}
1370
1371
void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1372
DenseSet<Instruction *> Unhoistables;
1373
// Put the biased selects in Unhoistables because they should stay where they
1374
// are and constant-folded after CHR (in case one biased select or a branch
1375
// can depend on another biased select.)
1376
for (RegInfo &RI : Scope->RegInfos) {
1377
for (SelectInst *SI : RI.Selects) {
1378
Unhoistables.insert(SI);
1379
}
1380
}
1381
Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1382
for (RegInfo &RI : Scope->RegInfos) {
1383
Region *R = RI.R;
1384
DenseSet<Instruction *> HoistStops;
1385
bool IsHoisted = false;
1386
if (RI.HasBranch) {
1387
assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1388
OutermostScope->FalseBiasedRegions.contains(R)) &&
1389
"Must be truthy or falsy");
1390
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1391
// Note checkHoistValue fills in HoistStops.
1392
DenseMap<Instruction *, bool> Visited;
1393
bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1394
Unhoistables, &HoistStops, Visited);
1395
assert(IsHoistable && "Must be hoistable");
1396
(void)(IsHoistable); // Unused in release build
1397
IsHoisted = true;
1398
}
1399
for (SelectInst *SI : RI.Selects) {
1400
assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1401
OutermostScope->FalseBiasedSelects.contains(SI)) &&
1402
"Must be true or false biased");
1403
// Note checkHoistValue fills in HoistStops.
1404
DenseMap<Instruction *, bool> Visited;
1405
bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1406
Unhoistables, &HoistStops, Visited);
1407
assert(IsHoistable && "Must be hoistable");
1408
(void)(IsHoistable); // Unused in release build
1409
IsHoisted = true;
1410
}
1411
if (IsHoisted) {
1412
OutermostScope->CHRRegions.push_back(RI);
1413
OutermostScope->HoistStopMap[R] = HoistStops;
1414
}
1415
}
1416
for (CHRScope *Sub : Scope->Subs)
1417
setCHRRegions(Sub, OutermostScope);
1418
}
1419
1420
static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1421
return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1422
}
1423
1424
void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1425
SmallVectorImpl<CHRScope *> &Output) {
1426
Output.resize(Input.size());
1427
llvm::copy(Input, Output.begin());
1428
llvm::stable_sort(Output, CHRScopeSorter);
1429
}
1430
1431
// Return true if V is already hoisted or was hoisted (along with its operands)
1432
// to the insert point.
1433
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1434
HoistStopMapTy &HoistStopMap,
1435
DenseSet<Instruction *> &HoistedSet,
1436
DenseSet<PHINode *> &TrivialPHIs,
1437
DominatorTree &DT) {
1438
auto IT = HoistStopMap.find(R);
1439
assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1440
DenseSet<Instruction *> &HoistStops = IT->second;
1441
if (auto *I = dyn_cast<Instruction>(V)) {
1442
if (I == HoistPoint)
1443
return;
1444
if (HoistStops.count(I))
1445
return;
1446
if (auto *PN = dyn_cast<PHINode>(I))
1447
if (TrivialPHIs.count(PN))
1448
// The trivial phi inserted by the previous CHR scope could replace a
1449
// non-phi in HoistStops. Note that since this phi is at the exit of a
1450
// previous CHR scope, which dominates this scope, it's safe to stop
1451
// hoisting there.
1452
return;
1453
if (HoistedSet.count(I))
1454
// Already hoisted, return.
1455
return;
1456
assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1457
assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1458
assert(DT.getNode(HoistPoint->getParent()) &&
1459
"DT must contain HoistPoint block");
1460
if (DT.dominates(I, HoistPoint))
1461
// We are already above the hoist point. Stop here. This may be necessary
1462
// when multiple scopes would independently hoist the same
1463
// instruction. Since an outer (dominating) scope would hoist it to its
1464
// entry before an inner (dominated) scope would to its entry, the inner
1465
// scope may see the instruction already hoisted, in which case it
1466
// potentially wrong for the inner scope to hoist it and could cause bad
1467
// IR (non-dominating def), but safe to skip hoisting it instead because
1468
// it's already in a block that dominates the inner scope.
1469
return;
1470
for (Value *Op : I->operands()) {
1471
hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1472
}
1473
I->moveBefore(HoistPoint);
1474
HoistedSet.insert(I);
1475
CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1476
}
1477
}
1478
1479
// Hoist the dependent condition values of the branches and the selects in the
1480
// scope to the insert point.
1481
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1482
DenseSet<PHINode *> &TrivialPHIs,
1483
DominatorTree &DT) {
1484
DenseSet<Instruction *> HoistedSet;
1485
for (const RegInfo &RI : Scope->CHRRegions) {
1486
Region *R = RI.R;
1487
bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1488
bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1489
if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1490
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1491
hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1492
HoistedSet, TrivialPHIs, DT);
1493
}
1494
for (SelectInst *SI : RI.Selects) {
1495
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1496
bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1497
if (!(IsTrueBiased || IsFalseBiased))
1498
continue;
1499
hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1500
HoistedSet, TrivialPHIs, DT);
1501
}
1502
}
1503
}
1504
1505
// Negate the predicate if an ICmp if it's used only by branches or selects by
1506
// swapping the operands of the branches or the selects. Returns true if success.
1507
static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1508
Instruction *ExcludedUser,
1509
CHRScope *Scope) {
1510
for (User *U : ICmp->users()) {
1511
if (U == ExcludedUser)
1512
continue;
1513
if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1514
continue;
1515
if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1516
continue;
1517
return false;
1518
}
1519
for (User *U : ICmp->users()) {
1520
if (U == ExcludedUser)
1521
continue;
1522
if (auto *BI = dyn_cast<BranchInst>(U)) {
1523
assert(BI->isConditional() && "Must be conditional");
1524
BI->swapSuccessors();
1525
// Don't need to swap this in terms of
1526
// TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1527
// mean whehter the branch is likely go into the if-then rather than
1528
// successor0/successor1 and because we can tell which edge is the then or
1529
// the else one by comparing the destination to the region exit block.
1530
continue;
1531
}
1532
if (auto *SI = dyn_cast<SelectInst>(U)) {
1533
// Swap operands
1534
SI->swapValues();
1535
SI->swapProfMetadata();
1536
if (Scope->TrueBiasedSelects.count(SI)) {
1537
assert(!Scope->FalseBiasedSelects.contains(SI) &&
1538
"Must not be already in");
1539
Scope->FalseBiasedSelects.insert(SI);
1540
} else if (Scope->FalseBiasedSelects.count(SI)) {
1541
assert(!Scope->TrueBiasedSelects.contains(SI) &&
1542
"Must not be already in");
1543
Scope->TrueBiasedSelects.insert(SI);
1544
}
1545
continue;
1546
}
1547
llvm_unreachable("Must be a branch or a select");
1548
}
1549
ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1550
return true;
1551
}
1552
1553
// A helper for transformScopes. Insert a trivial phi at the scope exit block
1554
// for a value that's defined in the scope but used outside it (meaning it's
1555
// alive at the exit block).
1556
static void insertTrivialPHIs(CHRScope *Scope,
1557
BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1558
DenseSet<PHINode *> &TrivialPHIs) {
1559
SmallSetVector<BasicBlock *, 8> BlocksInScope;
1560
for (RegInfo &RI : Scope->RegInfos) {
1561
for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1562
// sub-Scopes.
1563
BlocksInScope.insert(BB);
1564
}
1565
}
1566
CHR_DEBUG({
1567
dbgs() << "Inserting redundant phis\n";
1568
for (BasicBlock *BB : BlocksInScope)
1569
dbgs() << "BlockInScope " << BB->getName() << "\n";
1570
});
1571
for (BasicBlock *BB : BlocksInScope) {
1572
for (Instruction &I : *BB) {
1573
SmallVector<Instruction *, 8> Users;
1574
for (User *U : I.users()) {
1575
if (auto *UI = dyn_cast<Instruction>(U)) {
1576
if (!BlocksInScope.contains(UI->getParent()) &&
1577
// Unless there's already a phi for I at the exit block.
1578
!(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1579
CHR_DEBUG(dbgs() << "V " << I << "\n");
1580
CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1581
Users.push_back(UI);
1582
} else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1583
// There's a loop backedge from a block that's dominated by this
1584
// scope to the entry block.
1585
CHR_DEBUG(dbgs() << "V " << I << "\n");
1586
CHR_DEBUG(dbgs()
1587
<< "Used at entry block (for a back edge) by a phi user "
1588
<< *UI << "\n");
1589
Users.push_back(UI);
1590
}
1591
}
1592
}
1593
if (Users.size() > 0) {
1594
// Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1595
// ExitBlock. Replace I with the new phi in UI unless UI is another
1596
// phi at ExitBlock.
1597
PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "");
1598
PN->insertBefore(ExitBlock->begin());
1599
for (BasicBlock *Pred : predecessors(ExitBlock)) {
1600
PN->addIncoming(&I, Pred);
1601
}
1602
TrivialPHIs.insert(PN);
1603
CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1604
for (Instruction *UI : Users) {
1605
for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1606
if (UI->getOperand(J) == &I) {
1607
UI->setOperand(J, PN);
1608
}
1609
}
1610
CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1611
}
1612
}
1613
}
1614
}
1615
}
1616
1617
// Assert that all the CHR regions of the scope have a biased branch or select.
1618
static void LLVM_ATTRIBUTE_UNUSED
1619
assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1620
#ifndef NDEBUG
1621
auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1622
if (Scope->TrueBiasedRegions.count(RI.R) ||
1623
Scope->FalseBiasedRegions.count(RI.R))
1624
return true;
1625
for (SelectInst *SI : RI.Selects)
1626
if (Scope->TrueBiasedSelects.count(SI) ||
1627
Scope->FalseBiasedSelects.count(SI))
1628
return true;
1629
return false;
1630
};
1631
for (RegInfo &RI : Scope->CHRRegions) {
1632
assert(HasBiasedBranchOrSelect(RI, Scope) &&
1633
"Must have biased branch or select");
1634
}
1635
#endif
1636
}
1637
1638
// Assert that all the condition values of the biased branches and selects have
1639
// been hoisted to the pre-entry block or outside of the scope.
1640
static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
1641
CHRScope *Scope, BasicBlock *PreEntryBlock) {
1642
CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1643
for (RegInfo &RI : Scope->CHRRegions) {
1644
Region *R = RI.R;
1645
bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1646
bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1647
if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1648
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1649
Value *V = BI->getCondition();
1650
CHR_DEBUG(dbgs() << *V << "\n");
1651
if (auto *I = dyn_cast<Instruction>(V)) {
1652
(void)(I); // Unused in release build.
1653
assert((I->getParent() == PreEntryBlock ||
1654
!Scope->contains(I)) &&
1655
"Must have been hoisted to PreEntryBlock or outside the scope");
1656
}
1657
}
1658
for (SelectInst *SI : RI.Selects) {
1659
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1660
bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1661
if (!(IsTrueBiased || IsFalseBiased))
1662
continue;
1663
Value *V = SI->getCondition();
1664
CHR_DEBUG(dbgs() << *V << "\n");
1665
if (auto *I = dyn_cast<Instruction>(V)) {
1666
(void)(I); // Unused in release build.
1667
assert((I->getParent() == PreEntryBlock ||
1668
!Scope->contains(I)) &&
1669
"Must have been hoisted to PreEntryBlock or outside the scope");
1670
}
1671
}
1672
}
1673
}
1674
1675
void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1676
CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1677
1678
assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1679
1680
for (RegInfo &RI : Scope->RegInfos) {
1681
const Region *R = RI.R;
1682
unsigned Duplication = getRegionDuplicationCount(R);
1683
CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication
1684
<< "\n");
1685
if (Duplication >= CHRDupThreshsold) {
1686
CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication
1687
<< " for this region");
1688
ORE.emit([&]() {
1689
return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached",
1690
R->getEntry()->getTerminator())
1691
<< "Reached the duplication threshold for the region";
1692
});
1693
return;
1694
}
1695
}
1696
for (RegInfo &RI : Scope->RegInfos) {
1697
DuplicationCount[RI.R]++;
1698
}
1699
1700
Region *FirstRegion = Scope->RegInfos[0].R;
1701
BasicBlock *EntryBlock = FirstRegion->getEntry();
1702
Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1703
BasicBlock *ExitBlock = LastRegion->getExit();
1704
std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1705
1706
if (ExitBlock) {
1707
// Insert a trivial phi at the exit block (where the CHR hot path and the
1708
// cold path merges) for a value that's defined in the scope but used
1709
// outside it (meaning it's alive at the exit block). We will add the
1710
// incoming values for the CHR cold paths to it below. Without this, we'd
1711
// miss updating phi's for such values unless there happens to already be a
1712
// phi for that value there.
1713
insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1714
}
1715
1716
// Split the entry block of the first region. The new block becomes the new
1717
// entry block of the first region. The old entry block becomes the block to
1718
// insert the CHR branch into. Note DT gets updated. Since DT gets updated
1719
// through the split, we update the entry of the first region after the split,
1720
// and Region only points to the entry and the exit blocks, rather than
1721
// keeping everything in a list or set, the blocks membership and the
1722
// entry/exit blocks of the region are still valid after the split.
1723
CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1724
<< " at " << *Scope->BranchInsertPoint << "\n");
1725
BasicBlock *NewEntryBlock =
1726
SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1727
assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1728
"NewEntryBlock's only pred must be EntryBlock");
1729
FirstRegion->replaceEntryRecursive(NewEntryBlock);
1730
BasicBlock *PreEntryBlock = EntryBlock;
1731
1732
ValueToValueMapTy VMap;
1733
// Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1734
// hot path (originals) and a cold path (clones) and update the PHIs at the
1735
// exit block.
1736
cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1737
1738
// Replace the old (placeholder) branch with the new (merged) conditional
1739
// branch.
1740
BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1741
NewEntryBlock, VMap);
1742
1743
#ifndef NDEBUG
1744
assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1745
#endif
1746
1747
// Hoist the conditional values of the branches/selects.
1748
hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1749
1750
#ifndef NDEBUG
1751
assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1752
#endif
1753
1754
// Create the combined branch condition and constant-fold the branches/selects
1755
// in the hot path.
1756
fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1757
ProfileCount.value_or(0));
1758
}
1759
1760
// A helper for transformScopes. Clone the blocks in the scope (excluding the
1761
// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1762
// at the exit block.
1763
void CHR::cloneScopeBlocks(CHRScope *Scope,
1764
BasicBlock *PreEntryBlock,
1765
BasicBlock *ExitBlock,
1766
Region *LastRegion,
1767
ValueToValueMapTy &VMap) {
1768
// Clone all the blocks. The original blocks will be the hot-path
1769
// CHR-optimized code and the cloned blocks will be the original unoptimized
1770
// code. This is so that the block pointers from the
1771
// CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1772
// which CHR should apply to.
1773
SmallVector<BasicBlock*, 8> NewBlocks;
1774
for (RegInfo &RI : Scope->RegInfos)
1775
for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1776
// sub-Scopes.
1777
assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1778
BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1779
NewBlocks.push_back(NewBB);
1780
VMap[BB] = NewBB;
1781
1782
// Unreachable predecessors will not be cloned and will not have an edge
1783
// to the cloned block. As such, also remove them from any phi nodes.
1784
for (PHINode &PN : make_early_inc_range(NewBB->phis()))
1785
PN.removeIncomingValueIf([&](unsigned Idx) {
1786
return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx));
1787
});
1788
}
1789
1790
// Place the cloned blocks right after the original blocks (right before the
1791
// exit block of.)
1792
if (ExitBlock)
1793
F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(),
1794
F.end());
1795
1796
// Update the cloned blocks/instructions to refer to themselves.
1797
for (BasicBlock *NewBB : NewBlocks)
1798
for (Instruction &I : *NewBB)
1799
RemapInstruction(&I, VMap,
1800
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1801
1802
// Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1803
// the top-level region but we don't need to add PHIs. The trivial PHIs
1804
// inserted above will be updated here.
1805
if (ExitBlock)
1806
for (PHINode &PN : ExitBlock->phis())
1807
for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1808
++I) {
1809
BasicBlock *Pred = PN.getIncomingBlock(I);
1810
if (LastRegion->contains(Pred)) {
1811
Value *V = PN.getIncomingValue(I);
1812
auto It = VMap.find(V);
1813
if (It != VMap.end()) V = It->second;
1814
assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1815
PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1816
}
1817
}
1818
}
1819
1820
// A helper for transformScope. Replace the old (placeholder) branch with the
1821
// new (merged) conditional branch.
1822
BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1823
BasicBlock *EntryBlock,
1824
BasicBlock *NewEntryBlock,
1825
ValueToValueMapTy &VMap) {
1826
BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1827
assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1828
"SplitBlock did not work correctly!");
1829
assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1830
"NewEntryBlock's only pred must be EntryBlock");
1831
assert(VMap.find(NewEntryBlock) != VMap.end() &&
1832
"NewEntryBlock must have been copied");
1833
OldBR->dropAllReferences();
1834
OldBR->eraseFromParent();
1835
// The true predicate is a placeholder. It will be replaced later in
1836
// fixupBranchesAndSelects().
1837
BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1838
cast<BasicBlock>(VMap[NewEntryBlock]),
1839
ConstantInt::getTrue(F.getContext()));
1840
NewBR->insertInto(PreEntryBlock, PreEntryBlock->end());
1841
assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1842
"NewEntryBlock's only pred must be EntryBlock");
1843
return NewBR;
1844
}
1845
1846
// A helper for transformScopes. Create the combined branch condition and
1847
// constant-fold the branches/selects in the hot path.
1848
void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1849
BasicBlock *PreEntryBlock,
1850
BranchInst *MergedBR,
1851
uint64_t ProfileCount) {
1852
Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1853
BranchProbability CHRBranchBias(1, 1);
1854
uint64_t NumCHRedBranches = 0;
1855
IRBuilder<> IRB(PreEntryBlock->getTerminator());
1856
for (RegInfo &RI : Scope->CHRRegions) {
1857
Region *R = RI.R;
1858
if (RI.HasBranch) {
1859
fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1860
++NumCHRedBranches;
1861
}
1862
for (SelectInst *SI : RI.Selects) {
1863
fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1864
++NumCHRedBranches;
1865
}
1866
}
1867
Stats.NumBranchesDelta += NumCHRedBranches - 1;
1868
Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1869
ORE.emit([&]() {
1870
return OptimizationRemark(DEBUG_TYPE,
1871
"CHR",
1872
// Refer to the hot (original) path
1873
MergedBR->getSuccessor(0)->getTerminator())
1874
<< "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1875
<< " branches or selects";
1876
});
1877
MergedBR->setCondition(MergedCondition);
1878
uint32_t Weights[] = {
1879
static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1880
static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1881
};
1882
setBranchWeights(*MergedBR, Weights, /*IsExpected=*/false);
1883
CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1884
<< "\n");
1885
}
1886
1887
// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1888
// and constant-fold a branch in the hot path.
1889
void CHR::fixupBranch(Region *R, CHRScope *Scope,
1890
IRBuilder<> &IRB,
1891
Value *&MergedCondition,
1892
BranchProbability &CHRBranchBias) {
1893
bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1894
assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1895
"Must be truthy or falsy");
1896
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1897
assert(BranchBiasMap.contains(R) && "Must be in the bias map");
1898
BranchProbability Bias = BranchBiasMap[R];
1899
assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1900
// Take the min.
1901
if (CHRBranchBias > Bias)
1902
CHRBranchBias = Bias;
1903
BasicBlock *IfThen = BI->getSuccessor(1);
1904
BasicBlock *IfElse = BI->getSuccessor(0);
1905
BasicBlock *RegionExitBlock = R->getExit();
1906
assert(RegionExitBlock && "Null ExitBlock");
1907
assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1908
IfThen != IfElse && "Invariant from findScopes");
1909
if (IfThen == RegionExitBlock) {
1910
// Swap them so that IfThen means going into it and IfElse means skipping
1911
// it.
1912
std::swap(IfThen, IfElse);
1913
}
1914
CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1915
<< " IfElse " << IfElse->getName() << "\n");
1916
Value *Cond = BI->getCondition();
1917
BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1918
bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1919
addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1920
MergedCondition);
1921
// Constant-fold the branch at ClonedEntryBlock.
1922
assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1923
"The successor shouldn't change");
1924
Value *NewCondition = ConditionTrue ?
1925
ConstantInt::getTrue(F.getContext()) :
1926
ConstantInt::getFalse(F.getContext());
1927
BI->setCondition(NewCondition);
1928
}
1929
1930
// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1931
// and constant-fold a select in the hot path.
1932
void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1933
IRBuilder<> &IRB,
1934
Value *&MergedCondition,
1935
BranchProbability &CHRBranchBias) {
1936
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1937
assert((IsTrueBiased ||
1938
Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1939
assert(SelectBiasMap.contains(SI) && "Must be in the bias map");
1940
BranchProbability Bias = SelectBiasMap[SI];
1941
assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1942
// Take the min.
1943
if (CHRBranchBias > Bias)
1944
CHRBranchBias = Bias;
1945
Value *Cond = SI->getCondition();
1946
addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1947
MergedCondition);
1948
Value *NewCondition = IsTrueBiased ?
1949
ConstantInt::getTrue(F.getContext()) :
1950
ConstantInt::getFalse(F.getContext());
1951
SI->setCondition(NewCondition);
1952
}
1953
1954
// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1955
// condition.
1956
void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1957
Instruction *BranchOrSelect, CHRScope *Scope,
1958
IRBuilder<> &IRB, Value *&MergedCondition) {
1959
if (!IsTrueBiased) {
1960
// If Cond is an icmp and all users of V except for BranchOrSelect is a
1961
// branch, negate the icmp predicate and swap the branch targets and avoid
1962
// inserting an Xor to negate Cond.
1963
auto *ICmp = dyn_cast<ICmpInst>(Cond);
1964
if (!ICmp ||
1965
!negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
1966
Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
1967
}
1968
1969
// Freeze potentially poisonous conditions.
1970
if (!isGuaranteedNotToBeUndefOrPoison(Cond))
1971
Cond = IRB.CreateFreeze(Cond);
1972
1973
// Use logical and to avoid propagating poison from later conditions.
1974
MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
1975
}
1976
1977
void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1978
unsigned I = 0;
1979
DenseSet<PHINode *> TrivialPHIs;
1980
for (CHRScope *Scope : CHRScopes) {
1981
transformScopes(Scope, TrivialPHIs);
1982
CHR_DEBUG(
1983
std::ostringstream oss;
1984
oss << " after transformScopes " << I++;
1985
dumpIR(F, oss.str().c_str(), nullptr));
1986
(void)I;
1987
}
1988
}
1989
1990
static void LLVM_ATTRIBUTE_UNUSED
1991
dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1992
dbgs() << Label << " " << Scopes.size() << "\n";
1993
for (CHRScope *Scope : Scopes) {
1994
dbgs() << *Scope << "\n";
1995
}
1996
}
1997
1998
bool CHR::run() {
1999
if (!shouldApply(F, PSI))
2000
return false;
2001
2002
CHR_DEBUG(dumpIR(F, "before", nullptr));
2003
2004
bool Changed = false;
2005
{
2006
CHR_DEBUG(
2007
dbgs() << "RegionInfo:\n";
2008
RI.print(dbgs()));
2009
2010
// Recursively traverse the region tree and find regions that have biased
2011
// branches and/or selects and create scopes.
2012
SmallVector<CHRScope *, 8> AllScopes;
2013
findScopes(AllScopes);
2014
CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2015
2016
// Split the scopes if 1) the conditional values of the biased
2017
// branches/selects of the inner/lower scope can't be hoisted up to the
2018
// outermost/uppermost scope entry, or 2) the condition values of the biased
2019
// branches/selects in a scope (including subscopes) don't share at least
2020
// one common value.
2021
SmallVector<CHRScope *, 8> SplitScopes;
2022
splitScopes(AllScopes, SplitScopes);
2023
CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2024
2025
// After splitting, set the biased regions and selects of a scope (a tree
2026
// root) that include those of the subscopes.
2027
classifyBiasedScopes(SplitScopes);
2028
CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2029
2030
// Filter out the scopes that has only one biased region or select (CHR
2031
// isn't useful in such a case).
2032
SmallVector<CHRScope *, 8> FilteredScopes;
2033
filterScopes(SplitScopes, FilteredScopes);
2034
CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2035
2036
// Set the regions to be CHR'ed and their hoist stops for each scope.
2037
SmallVector<CHRScope *, 8> SetScopes;
2038
setCHRRegions(FilteredScopes, SetScopes);
2039
CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2040
2041
// Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2042
// ones. We need to apply CHR from outer to inner so that we apply CHR only
2043
// to the hot path, rather than both hot and cold paths.
2044
SmallVector<CHRScope *, 8> SortedScopes;
2045
sortScopes(SetScopes, SortedScopes);
2046
CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2047
2048
CHR_DEBUG(
2049
dbgs() << "RegionInfo:\n";
2050
RI.print(dbgs()));
2051
2052
// Apply the CHR transformation.
2053
if (!SortedScopes.empty()) {
2054
transformScopes(SortedScopes);
2055
Changed = true;
2056
}
2057
}
2058
2059
if (Changed) {
2060
CHR_DEBUG(dumpIR(F, "after", &Stats));
2061
ORE.emit([&]() {
2062
return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2063
<< ore::NV("Function", &F) << " "
2064
<< "Reduced the number of branches in hot paths by "
2065
<< ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2066
<< " (static) and "
2067
<< ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2068
<< " (weighted by PGO count)";
2069
});
2070
}
2071
2072
return Changed;
2073
}
2074
2075
namespace llvm {
2076
2077
ControlHeightReductionPass::ControlHeightReductionPass() {
2078
parseCHRFilterFiles();
2079
}
2080
2081
PreservedAnalyses ControlHeightReductionPass::run(
2082
Function &F,
2083
FunctionAnalysisManager &FAM) {
2084
auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2085
auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2086
// If there is no profile summary, we should not do CHR.
2087
if (!PPSI || !PPSI->hasProfileSummary())
2088
return PreservedAnalyses::all();
2089
auto &PSI = *PPSI;
2090
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2091
auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2092
auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2093
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2094
bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2095
if (!Changed)
2096
return PreservedAnalyses::all();
2097
return PreservedAnalyses::none();
2098
}
2099
2100
} // namespace llvm
2101
2102