Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
35294 views
1
//===- CallSiteSplitting.cpp ----------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements a transformation that tries to split a call-site to pass
10
// more constrained arguments if its argument is predicated in the control flow
11
// so that we can expose better context to the later passes (e.g, inliner, jump
12
// threading, or IPA-CP based function cloning, etc.).
13
// As of now we support two cases :
14
//
15
// 1) Try to a split call-site with constrained arguments, if any constraints
16
// on any argument can be found by following the single predecessors of the
17
// all site's predecessors. Currently this pass only handles call-sites with 2
18
// predecessors. For example, in the code below, we try to split the call-site
19
// since we can predicate the argument(ptr) based on the OR condition.
20
//
21
// Split from :
22
// if (!ptr || c)
23
// callee(ptr);
24
// to :
25
// if (!ptr)
26
// callee(null) // set the known constant value
27
// else if (c)
28
// callee(nonnull ptr) // set non-null attribute in the argument
29
//
30
// 2) We can also split a call-site based on constant incoming values of a PHI
31
// For example,
32
// from :
33
// Header:
34
// %c = icmp eq i32 %i1, %i2
35
// br i1 %c, label %Tail, label %TBB
36
// TBB:
37
// br label Tail%
38
// Tail:
39
// %p = phi i32 [ 0, %Header], [ 1, %TBB]
40
// call void @bar(i32 %p)
41
// to
42
// Header:
43
// %c = icmp eq i32 %i1, %i2
44
// br i1 %c, label %Tail-split0, label %TBB
45
// TBB:
46
// br label %Tail-split1
47
// Tail-split0:
48
// call void @bar(i32 0)
49
// br label %Tail
50
// Tail-split1:
51
// call void @bar(i32 1)
52
// br label %Tail
53
// Tail:
54
// %p = phi i32 [ 0, %Tail-split0 ], [ 1, %Tail-split1 ]
55
//
56
//===----------------------------------------------------------------------===//
57
58
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
59
#include "llvm/ADT/Statistic.h"
60
#include "llvm/Analysis/DomTreeUpdater.h"
61
#include "llvm/Analysis/TargetLibraryInfo.h"
62
#include "llvm/Analysis/TargetTransformInfo.h"
63
#include "llvm/IR/IntrinsicInst.h"
64
#include "llvm/IR/PatternMatch.h"
65
#include "llvm/Support/CommandLine.h"
66
#include "llvm/Support/Debug.h"
67
#include "llvm/Transforms/Utils/Cloning.h"
68
#include "llvm/Transforms/Utils/Local.h"
69
70
using namespace llvm;
71
using namespace PatternMatch;
72
73
#define DEBUG_TYPE "callsite-splitting"
74
75
STATISTIC(NumCallSiteSplit, "Number of call-site split");
76
77
/// Only allow instructions before a call, if their CodeSize cost is below
78
/// DuplicationThreshold. Those instructions need to be duplicated in all
79
/// split blocks.
80
static cl::opt<unsigned>
81
DuplicationThreshold("callsite-splitting-duplication-threshold", cl::Hidden,
82
cl::desc("Only allow instructions before a call, if "
83
"their cost is below DuplicationThreshold"),
84
cl::init(5));
85
86
static void addNonNullAttribute(CallBase &CB, Value *Op) {
87
unsigned ArgNo = 0;
88
for (auto &I : CB.args()) {
89
if (&*I == Op)
90
CB.addParamAttr(ArgNo, Attribute::NonNull);
91
++ArgNo;
92
}
93
}
94
95
static void setConstantInArgument(CallBase &CB, Value *Op,
96
Constant *ConstValue) {
97
unsigned ArgNo = 0;
98
for (auto &I : CB.args()) {
99
if (&*I == Op) {
100
// It is possible we have already added the non-null attribute to the
101
// parameter by using an earlier constraining condition.
102
CB.removeParamAttr(ArgNo, Attribute::NonNull);
103
CB.setArgOperand(ArgNo, ConstValue);
104
}
105
++ArgNo;
106
}
107
}
108
109
static bool isCondRelevantToAnyCallArgument(ICmpInst *Cmp, CallBase &CB) {
110
assert(isa<Constant>(Cmp->getOperand(1)) && "Expected a constant operand.");
111
Value *Op0 = Cmp->getOperand(0);
112
unsigned ArgNo = 0;
113
for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I, ++ArgNo) {
114
// Don't consider constant or arguments that are already known non-null.
115
if (isa<Constant>(*I) || CB.paramHasAttr(ArgNo, Attribute::NonNull))
116
continue;
117
118
if (*I == Op0)
119
return true;
120
}
121
return false;
122
}
123
124
using ConditionTy = std::pair<ICmpInst *, unsigned>;
125
using ConditionsTy = SmallVector<ConditionTy, 2>;
126
127
/// If From has a conditional jump to To, add the condition to Conditions,
128
/// if it is relevant to any argument at CB.
129
static void recordCondition(CallBase &CB, BasicBlock *From, BasicBlock *To,
130
ConditionsTy &Conditions) {
131
auto *BI = dyn_cast<BranchInst>(From->getTerminator());
132
if (!BI || !BI->isConditional())
133
return;
134
135
CmpInst::Predicate Pred;
136
Value *Cond = BI->getCondition();
137
if (!match(Cond, m_ICmp(Pred, m_Value(), m_Constant())))
138
return;
139
140
ICmpInst *Cmp = cast<ICmpInst>(Cond);
141
if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)
142
if (isCondRelevantToAnyCallArgument(Cmp, CB))
143
Conditions.push_back({Cmp, From->getTerminator()->getSuccessor(0) == To
144
? Pred
145
: Cmp->getInversePredicate()});
146
}
147
148
/// Record ICmp conditions relevant to any argument in CB following Pred's
149
/// single predecessors. If there are conflicting conditions along a path, like
150
/// x == 1 and x == 0, the first condition will be used. We stop once we reach
151
/// an edge to StopAt.
152
static void recordConditions(CallBase &CB, BasicBlock *Pred,
153
ConditionsTy &Conditions, BasicBlock *StopAt) {
154
BasicBlock *From = Pred;
155
BasicBlock *To = Pred;
156
SmallPtrSet<BasicBlock *, 4> Visited;
157
while (To != StopAt && !Visited.count(From->getSinglePredecessor()) &&
158
(From = From->getSinglePredecessor())) {
159
recordCondition(CB, From, To, Conditions);
160
Visited.insert(From);
161
To = From;
162
}
163
}
164
165
static void addConditions(CallBase &CB, const ConditionsTy &Conditions) {
166
for (const auto &Cond : Conditions) {
167
Value *Arg = Cond.first->getOperand(0);
168
Constant *ConstVal = cast<Constant>(Cond.first->getOperand(1));
169
if (Cond.second == ICmpInst::ICMP_EQ)
170
setConstantInArgument(CB, Arg, ConstVal);
171
else if (ConstVal->getType()->isPointerTy() && ConstVal->isNullValue()) {
172
assert(Cond.second == ICmpInst::ICMP_NE);
173
addNonNullAttribute(CB, Arg);
174
}
175
}
176
}
177
178
static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) {
179
SmallVector<BasicBlock *, 2> Preds(predecessors((BB)));
180
assert(Preds.size() == 2 && "Expected exactly 2 predecessors!");
181
return Preds;
182
}
183
184
static bool canSplitCallSite(CallBase &CB, TargetTransformInfo &TTI) {
185
if (CB.isConvergent() || CB.cannotDuplicate())
186
return false;
187
188
// FIXME: As of now we handle only CallInst. InvokeInst could be handled
189
// without too much effort.
190
if (!isa<CallInst>(CB))
191
return false;
192
193
BasicBlock *CallSiteBB = CB.getParent();
194
// Need 2 predecessors and cannot split an edge from an IndirectBrInst.
195
SmallVector<BasicBlock *, 2> Preds(predecessors(CallSiteBB));
196
if (Preds.size() != 2 || isa<IndirectBrInst>(Preds[0]->getTerminator()) ||
197
isa<IndirectBrInst>(Preds[1]->getTerminator()))
198
return false;
199
200
// BasicBlock::canSplitPredecessors is more aggressive, so checking for
201
// BasicBlock::isEHPad as well.
202
if (!CallSiteBB->canSplitPredecessors() || CallSiteBB->isEHPad())
203
return false;
204
205
// Allow splitting a call-site only when the CodeSize cost of the
206
// instructions before the call is less then DuplicationThreshold. The
207
// instructions before the call will be duplicated in the split blocks and
208
// corresponding uses will be updated.
209
InstructionCost Cost = 0;
210
for (auto &InstBeforeCall :
211
llvm::make_range(CallSiteBB->begin(), CB.getIterator())) {
212
Cost += TTI.getInstructionCost(&InstBeforeCall,
213
TargetTransformInfo::TCK_CodeSize);
214
if (Cost >= DuplicationThreshold)
215
return false;
216
}
217
218
return true;
219
}
220
221
static Instruction *cloneInstForMustTail(Instruction *I, Instruction *Before,
222
Value *V) {
223
Instruction *Copy = I->clone();
224
Copy->setName(I->getName());
225
Copy->insertBefore(Before);
226
if (V)
227
Copy->setOperand(0, V);
228
return Copy;
229
}
230
231
/// Copy mandatory `musttail` return sequence that follows original `CI`, and
232
/// link it up to `NewCI` value instead:
233
///
234
/// * (optional) `bitcast NewCI to ...`
235
/// * `ret bitcast or NewCI`
236
///
237
/// Insert this sequence right before `SplitBB`'s terminator, which will be
238
/// cleaned up later in `splitCallSite` below.
239
static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
240
Instruction *NewCI) {
241
bool IsVoid = SplitBB->getParent()->getReturnType()->isVoidTy();
242
auto II = std::next(CI->getIterator());
243
244
BitCastInst* BCI = dyn_cast<BitCastInst>(&*II);
245
if (BCI)
246
++II;
247
248
ReturnInst* RI = dyn_cast<ReturnInst>(&*II);
249
assert(RI && "`musttail` call must be followed by `ret` instruction");
250
251
Instruction *TI = SplitBB->getTerminator();
252
Value *V = NewCI;
253
if (BCI)
254
V = cloneInstForMustTail(BCI, TI, V);
255
cloneInstForMustTail(RI, TI, IsVoid ? nullptr : V);
256
257
// FIXME: remove TI here, `DuplicateInstructionsInSplitBetween` has a bug
258
// that prevents doing this now.
259
}
260
261
/// For each (predecessor, conditions from predecessors) pair, it will split the
262
/// basic block containing the call site, hook it up to the predecessor and
263
/// replace the call instruction with new call instructions, which contain
264
/// constraints based on the conditions from their predecessors.
265
/// For example, in the IR below with an OR condition, the call-site can
266
/// be split. In this case, Preds for Tail is [(Header, a == null),
267
/// (TBB, a != null, b == null)]. Tail is replaced by 2 split blocks, containing
268
/// CallInst1, which has constraints based on the conditions from Head and
269
/// CallInst2, which has constraints based on the conditions coming from TBB.
270
///
271
/// From :
272
///
273
/// Header:
274
/// %c = icmp eq i32* %a, null
275
/// br i1 %c %Tail, %TBB
276
/// TBB:
277
/// %c2 = icmp eq i32* %b, null
278
/// br i1 %c %Tail, %End
279
/// Tail:
280
/// %ca = call i1 @callee (i32* %a, i32* %b)
281
///
282
/// to :
283
///
284
/// Header: // PredBB1 is Header
285
/// %c = icmp eq i32* %a, null
286
/// br i1 %c %Tail-split1, %TBB
287
/// TBB: // PredBB2 is TBB
288
/// %c2 = icmp eq i32* %b, null
289
/// br i1 %c %Tail-split2, %End
290
/// Tail-split1:
291
/// %ca1 = call @callee (i32* null, i32* %b) // CallInst1
292
/// br %Tail
293
/// Tail-split2:
294
/// %ca2 = call @callee (i32* nonnull %a, i32* null) // CallInst2
295
/// br %Tail
296
/// Tail:
297
/// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2]
298
///
299
/// Note that in case any arguments at the call-site are constrained by its
300
/// predecessors, new call-sites with more constrained arguments will be
301
/// created in createCallSitesOnPredicatedArgument().
302
static void splitCallSite(CallBase &CB,
303
ArrayRef<std::pair<BasicBlock *, ConditionsTy>> Preds,
304
DomTreeUpdater &DTU) {
305
BasicBlock *TailBB = CB.getParent();
306
bool IsMustTailCall = CB.isMustTailCall();
307
308
PHINode *CallPN = nullptr;
309
310
// `musttail` calls must be followed by optional `bitcast`, and `ret`. The
311
// split blocks will be terminated right after that so there're no users for
312
// this phi in a `TailBB`.
313
if (!IsMustTailCall && !CB.use_empty()) {
314
CallPN = PHINode::Create(CB.getType(), Preds.size(), "phi.call");
315
CallPN->setDebugLoc(CB.getDebugLoc());
316
}
317
318
LLVM_DEBUG(dbgs() << "split call-site : " << CB << " into \n");
319
320
assert(Preds.size() == 2 && "The ValueToValueMaps array has size 2.");
321
// ValueToValueMapTy is neither copy nor moveable, so we use a simple array
322
// here.
323
ValueToValueMapTy ValueToValueMaps[2];
324
for (unsigned i = 0; i < Preds.size(); i++) {
325
BasicBlock *PredBB = Preds[i].first;
326
BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
327
TailBB, PredBB, &*std::next(CB.getIterator()), ValueToValueMaps[i],
328
DTU);
329
assert(SplitBlock && "Unexpected new basic block split.");
330
331
auto *NewCI =
332
cast<CallBase>(&*std::prev(SplitBlock->getTerminator()->getIterator()));
333
addConditions(*NewCI, Preds[i].second);
334
335
// Handle PHIs used as arguments in the call-site.
336
for (PHINode &PN : TailBB->phis()) {
337
unsigned ArgNo = 0;
338
for (auto &CI : CB.args()) {
339
if (&*CI == &PN) {
340
NewCI->setArgOperand(ArgNo, PN.getIncomingValueForBlock(SplitBlock));
341
}
342
++ArgNo;
343
}
344
}
345
LLVM_DEBUG(dbgs() << " " << *NewCI << " in " << SplitBlock->getName()
346
<< "\n");
347
if (CallPN)
348
CallPN->addIncoming(NewCI, SplitBlock);
349
350
// Clone and place bitcast and return instructions before `TI`
351
if (IsMustTailCall)
352
copyMustTailReturn(SplitBlock, &CB, NewCI);
353
}
354
355
NumCallSiteSplit++;
356
357
// FIXME: remove TI in `copyMustTailReturn`
358
if (IsMustTailCall) {
359
// Remove superfluous `br` terminators from the end of the Split blocks
360
// NOTE: Removing terminator removes the SplitBlock from the TailBB's
361
// predecessors. Therefore we must get complete list of Splits before
362
// attempting removal.
363
SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB)));
364
assert(Splits.size() == 2 && "Expected exactly 2 splits!");
365
for (BasicBlock *BB : Splits) {
366
BB->getTerminator()->eraseFromParent();
367
DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, TailBB}});
368
}
369
370
// Erase the tail block once done with musttail patching
371
DTU.deleteBB(TailBB);
372
return;
373
}
374
375
BasicBlock::iterator OriginalBegin = TailBB->begin();
376
// Replace users of the original call with a PHI mering call-sites split.
377
if (CallPN) {
378
CallPN->insertBefore(*TailBB, OriginalBegin);
379
CB.replaceAllUsesWith(CallPN);
380
}
381
382
// Remove instructions moved to split blocks from TailBB, from the duplicated
383
// call instruction to the beginning of the basic block. If an instruction
384
// has any uses, add a new PHI node to combine the values coming from the
385
// split blocks. The new PHI nodes are placed before the first original
386
// instruction, so we do not end up deleting them. By using reverse-order, we
387
// do not introduce unnecessary PHI nodes for def-use chains from the call
388
// instruction to the beginning of the block.
389
auto I = CB.getReverseIterator();
390
Instruction *OriginalBeginInst = &*OriginalBegin;
391
while (I != TailBB->rend()) {
392
Instruction *CurrentI = &*I++;
393
if (!CurrentI->use_empty()) {
394
// If an existing PHI has users after the call, there is no need to create
395
// a new one.
396
if (isa<PHINode>(CurrentI))
397
continue;
398
PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size());
399
NewPN->setDebugLoc(CurrentI->getDebugLoc());
400
for (auto &Mapping : ValueToValueMaps)
401
NewPN->addIncoming(Mapping[CurrentI],
402
cast<Instruction>(Mapping[CurrentI])->getParent());
403
NewPN->insertBefore(*TailBB, TailBB->begin());
404
CurrentI->replaceAllUsesWith(NewPN);
405
}
406
CurrentI->dropDbgRecords();
407
CurrentI->eraseFromParent();
408
// We are done once we handled the first original instruction in TailBB.
409
if (CurrentI == OriginalBeginInst)
410
break;
411
}
412
}
413
414
// Return true if the call-site has an argument which is a PHI with only
415
// constant incoming values.
416
static bool isPredicatedOnPHI(CallBase &CB) {
417
BasicBlock *Parent = CB.getParent();
418
if (&CB != Parent->getFirstNonPHIOrDbg())
419
return false;
420
421
for (auto &PN : Parent->phis()) {
422
for (auto &Arg : CB.args()) {
423
if (&*Arg != &PN)
424
continue;
425
assert(PN.getNumIncomingValues() == 2 &&
426
"Unexpected number of incoming values");
427
if (PN.getIncomingBlock(0) == PN.getIncomingBlock(1))
428
return false;
429
if (PN.getIncomingValue(0) == PN.getIncomingValue(1))
430
continue;
431
if (isa<Constant>(PN.getIncomingValue(0)) &&
432
isa<Constant>(PN.getIncomingValue(1)))
433
return true;
434
}
435
}
436
return false;
437
}
438
439
using PredsWithCondsTy = SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2>;
440
441
// Check if any of the arguments in CS are predicated on a PHI node and return
442
// the set of predecessors we should use for splitting.
443
static PredsWithCondsTy shouldSplitOnPHIPredicatedArgument(CallBase &CB) {
444
if (!isPredicatedOnPHI(CB))
445
return {};
446
447
auto Preds = getTwoPredecessors(CB.getParent());
448
return {{Preds[0], {}}, {Preds[1], {}}};
449
}
450
451
// Checks if any of the arguments in CS are predicated in a predecessor and
452
// returns a list of predecessors with the conditions that hold on their edges
453
// to CS.
454
static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallBase &CB,
455
DomTreeUpdater &DTU) {
456
auto Preds = getTwoPredecessors(CB.getParent());
457
if (Preds[0] == Preds[1])
458
return {};
459
460
// We can stop recording conditions once we reached the immediate dominator
461
// for the block containing the call site. Conditions in predecessors of the
462
// that node will be the same for all paths to the call site and splitting
463
// is not beneficial.
464
assert(DTU.hasDomTree() && "We need a DTU with a valid DT!");
465
auto *CSDTNode = DTU.getDomTree().getNode(CB.getParent());
466
BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
467
468
SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
469
for (auto *Pred : llvm::reverse(Preds)) {
470
ConditionsTy Conditions;
471
// Record condition on edge BB(CS) <- Pred
472
recordCondition(CB, Pred, CB.getParent(), Conditions);
473
// Record conditions following Pred's single predecessors.
474
recordConditions(CB, Pred, Conditions, StopAt);
475
PredsCS.push_back({Pred, Conditions});
476
}
477
478
if (all_of(PredsCS, [](const std::pair<BasicBlock *, ConditionsTy> &P) {
479
return P.second.empty();
480
}))
481
return {};
482
483
return PredsCS;
484
}
485
486
static bool tryToSplitCallSite(CallBase &CB, TargetTransformInfo &TTI,
487
DomTreeUpdater &DTU) {
488
// Check if we can split the call site.
489
if (!CB.arg_size() || !canSplitCallSite(CB, TTI))
490
return false;
491
492
auto PredsWithConds = shouldSplitOnPredicatedArgument(CB, DTU);
493
if (PredsWithConds.empty())
494
PredsWithConds = shouldSplitOnPHIPredicatedArgument(CB);
495
if (PredsWithConds.empty())
496
return false;
497
498
splitCallSite(CB, PredsWithConds, DTU);
499
return true;
500
}
501
502
static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
503
TargetTransformInfo &TTI, DominatorTree &DT) {
504
505
DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
506
bool Changed = false;
507
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
508
auto II = BB.getFirstNonPHIOrDbg()->getIterator();
509
auto IE = BB.getTerminator()->getIterator();
510
// Iterate until we reach the terminator instruction. tryToSplitCallSite
511
// can replace BB's terminator in case BB is a successor of itself. In that
512
// case, IE will be invalidated and we also have to check the current
513
// terminator.
514
while (II != IE && &*II != BB.getTerminator()) {
515
CallBase *CB = dyn_cast<CallBase>(&*II++);
516
if (!CB || isa<IntrinsicInst>(CB) || isInstructionTriviallyDead(CB, &TLI))
517
continue;
518
519
Function *Callee = CB->getCalledFunction();
520
if (!Callee || Callee->isDeclaration())
521
continue;
522
523
// Successful musttail call-site splits result in erased CI and erased BB.
524
// Check if such path is possible before attempting the splitting.
525
bool IsMustTail = CB->isMustTailCall();
526
527
Changed |= tryToSplitCallSite(*CB, TTI, DTU);
528
529
// There're no interesting instructions after this. The call site
530
// itself might have been erased on splitting.
531
if (IsMustTail)
532
break;
533
}
534
}
535
return Changed;
536
}
537
538
PreservedAnalyses CallSiteSplittingPass::run(Function &F,
539
FunctionAnalysisManager &AM) {
540
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
541
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
542
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
543
544
if (!doCallSiteSplitting(F, TLI, TTI, DT))
545
return PreservedAnalyses::all();
546
PreservedAnalyses PA;
547
PA.preserve<DominatorTreeAnalysis>();
548
return PA;
549
}
550
551