Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
35271 views
1
//===- InlineFunction.cpp - Code to perform function inlining -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements inlining of a function into a call site, resolving
10
// parameters and the return value as appropriate.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/ADT/DenseMap.h"
15
#include "llvm/ADT/STLExtras.h"
16
#include "llvm/ADT/SetVector.h"
17
#include "llvm/ADT/SmallPtrSet.h"
18
#include "llvm/ADT/SmallVector.h"
19
#include "llvm/ADT/StringExtras.h"
20
#include "llvm/ADT/iterator_range.h"
21
#include "llvm/Analysis/AliasAnalysis.h"
22
#include "llvm/Analysis/AssumptionCache.h"
23
#include "llvm/Analysis/BlockFrequencyInfo.h"
24
#include "llvm/Analysis/CallGraph.h"
25
#include "llvm/Analysis/CaptureTracking.h"
26
#include "llvm/Analysis/IndirectCallVisitor.h"
27
#include "llvm/Analysis/InstructionSimplify.h"
28
#include "llvm/Analysis/MemoryProfileInfo.h"
29
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
30
#include "llvm/Analysis/ObjCARCUtil.h"
31
#include "llvm/Analysis/ProfileSummaryInfo.h"
32
#include "llvm/Analysis/ValueTracking.h"
33
#include "llvm/Analysis/VectorUtils.h"
34
#include "llvm/IR/Argument.h"
35
#include "llvm/IR/AttributeMask.h"
36
#include "llvm/IR/BasicBlock.h"
37
#include "llvm/IR/CFG.h"
38
#include "llvm/IR/Constant.h"
39
#include "llvm/IR/ConstantRange.h"
40
#include "llvm/IR/Constants.h"
41
#include "llvm/IR/DataLayout.h"
42
#include "llvm/IR/DebugInfo.h"
43
#include "llvm/IR/DebugInfoMetadata.h"
44
#include "llvm/IR/DebugLoc.h"
45
#include "llvm/IR/DerivedTypes.h"
46
#include "llvm/IR/Dominators.h"
47
#include "llvm/IR/EHPersonalities.h"
48
#include "llvm/IR/Function.h"
49
#include "llvm/IR/IRBuilder.h"
50
#include "llvm/IR/InlineAsm.h"
51
#include "llvm/IR/InstrTypes.h"
52
#include "llvm/IR/Instruction.h"
53
#include "llvm/IR/Instructions.h"
54
#include "llvm/IR/IntrinsicInst.h"
55
#include "llvm/IR/Intrinsics.h"
56
#include "llvm/IR/LLVMContext.h"
57
#include "llvm/IR/MDBuilder.h"
58
#include "llvm/IR/Metadata.h"
59
#include "llvm/IR/Module.h"
60
#include "llvm/IR/ProfDataUtils.h"
61
#include "llvm/IR/Type.h"
62
#include "llvm/IR/User.h"
63
#include "llvm/IR/Value.h"
64
#include "llvm/Support/Casting.h"
65
#include "llvm/Support/CommandLine.h"
66
#include "llvm/Support/ErrorHandling.h"
67
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
68
#include "llvm/Transforms/Utils/Cloning.h"
69
#include "llvm/Transforms/Utils/Local.h"
70
#include "llvm/Transforms/Utils/ValueMapper.h"
71
#include <algorithm>
72
#include <cassert>
73
#include <cstdint>
74
#include <iterator>
75
#include <limits>
76
#include <optional>
77
#include <string>
78
#include <utility>
79
#include <vector>
80
81
#define DEBUG_TYPE "inline-function"
82
83
using namespace llvm;
84
using namespace llvm::memprof;
85
using ProfileCount = Function::ProfileCount;
86
87
static cl::opt<bool>
88
EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
89
cl::Hidden,
90
cl::desc("Convert noalias attributes to metadata during inlining."));
91
92
static cl::opt<bool>
93
UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
94
cl::init(true),
95
cl::desc("Use the llvm.experimental.noalias.scope.decl "
96
"intrinsic during inlining."));
97
98
// Disabled by default, because the added alignment assumptions may increase
99
// compile-time and block optimizations. This option is not suitable for use
100
// with frontends that emit comprehensive parameter alignment annotations.
101
static cl::opt<bool>
102
PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
103
cl::init(false), cl::Hidden,
104
cl::desc("Convert align attributes to assumptions during inlining."));
105
106
static cl::opt<unsigned> InlinerAttributeWindow(
107
"max-inst-checked-for-throw-during-inlining", cl::Hidden,
108
cl::desc("the maximum number of instructions analyzed for may throw during "
109
"attribute inference in inlined body"),
110
cl::init(4));
111
112
namespace {
113
114
/// A class for recording information about inlining a landing pad.
115
class LandingPadInliningInfo {
116
/// Destination of the invoke's unwind.
117
BasicBlock *OuterResumeDest;
118
119
/// Destination for the callee's resume.
120
BasicBlock *InnerResumeDest = nullptr;
121
122
/// LandingPadInst associated with the invoke.
123
LandingPadInst *CallerLPad = nullptr;
124
125
/// PHI for EH values from landingpad insts.
126
PHINode *InnerEHValuesPHI = nullptr;
127
128
SmallVector<Value*, 8> UnwindDestPHIValues;
129
130
public:
131
LandingPadInliningInfo(InvokeInst *II)
132
: OuterResumeDest(II->getUnwindDest()) {
133
// If there are PHI nodes in the unwind destination block, we need to keep
134
// track of which values came into them from the invoke before removing
135
// the edge from this block.
136
BasicBlock *InvokeBB = II->getParent();
137
BasicBlock::iterator I = OuterResumeDest->begin();
138
for (; isa<PHINode>(I); ++I) {
139
// Save the value to use for this edge.
140
PHINode *PHI = cast<PHINode>(I);
141
UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
142
}
143
144
CallerLPad = cast<LandingPadInst>(I);
145
}
146
147
/// The outer unwind destination is the target of
148
/// unwind edges introduced for calls within the inlined function.
149
BasicBlock *getOuterResumeDest() const {
150
return OuterResumeDest;
151
}
152
153
BasicBlock *getInnerResumeDest();
154
155
LandingPadInst *getLandingPadInst() const { return CallerLPad; }
156
157
/// Forward the 'resume' instruction to the caller's landing pad block.
158
/// When the landing pad block has only one predecessor, this is
159
/// a simple branch. When there is more than one predecessor, we need to
160
/// split the landing pad block after the landingpad instruction and jump
161
/// to there.
162
void forwardResume(ResumeInst *RI,
163
SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
164
165
/// Add incoming-PHI values to the unwind destination block for the given
166
/// basic block, using the values for the original invoke's source block.
167
void addIncomingPHIValuesFor(BasicBlock *BB) const {
168
addIncomingPHIValuesForInto(BB, OuterResumeDest);
169
}
170
171
void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
172
BasicBlock::iterator I = dest->begin();
173
for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
174
PHINode *phi = cast<PHINode>(I);
175
phi->addIncoming(UnwindDestPHIValues[i], src);
176
}
177
}
178
};
179
180
} // end anonymous namespace
181
182
/// Get or create a target for the branch from ResumeInsts.
183
BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
184
if (InnerResumeDest) return InnerResumeDest;
185
186
// Split the landing pad.
187
BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
188
InnerResumeDest =
189
OuterResumeDest->splitBasicBlock(SplitPoint,
190
OuterResumeDest->getName() + ".body");
191
192
// The number of incoming edges we expect to the inner landing pad.
193
const unsigned PHICapacity = 2;
194
195
// Create corresponding new PHIs for all the PHIs in the outer landing pad.
196
BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
197
BasicBlock::iterator I = OuterResumeDest->begin();
198
for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
199
PHINode *OuterPHI = cast<PHINode>(I);
200
PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,
201
OuterPHI->getName() + ".lpad-body");
202
InnerPHI->insertBefore(InsertPoint);
203
OuterPHI->replaceAllUsesWith(InnerPHI);
204
InnerPHI->addIncoming(OuterPHI, OuterResumeDest);
205
}
206
207
// Create a PHI for the exception values.
208
InnerEHValuesPHI =
209
PHINode::Create(CallerLPad->getType(), PHICapacity, "eh.lpad-body");
210
InnerEHValuesPHI->insertBefore(InsertPoint);
211
CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);
212
InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);
213
214
// All done.
215
return InnerResumeDest;
216
}
217
218
/// Forward the 'resume' instruction to the caller's landing pad block.
219
/// When the landing pad block has only one predecessor, this is a simple
220
/// branch. When there is more than one predecessor, we need to split the
221
/// landing pad block after the landingpad instruction and jump to there.
222
void LandingPadInliningInfo::forwardResume(
223
ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
224
BasicBlock *Dest = getInnerResumeDest();
225
BasicBlock *Src = RI->getParent();
226
227
BranchInst::Create(Dest, Src);
228
229
// Update the PHIs in the destination. They were inserted in an order which
230
// makes this work.
231
addIncomingPHIValuesForInto(Src, Dest);
232
233
InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
234
RI->eraseFromParent();
235
}
236
237
/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
238
static Value *getParentPad(Value *EHPad) {
239
if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
240
return FPI->getParentPad();
241
return cast<CatchSwitchInst>(EHPad)->getParentPad();
242
}
243
244
using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
245
246
/// Helper for getUnwindDestToken that does the descendant-ward part of
247
/// the search.
248
static Value *getUnwindDestTokenHelper(Instruction *EHPad,
249
UnwindDestMemoTy &MemoMap) {
250
SmallVector<Instruction *, 8> Worklist(1, EHPad);
251
252
while (!Worklist.empty()) {
253
Instruction *CurrentPad = Worklist.pop_back_val();
254
// We only put pads on the worklist that aren't in the MemoMap. When
255
// we find an unwind dest for a pad we may update its ancestors, but
256
// the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
257
// so they should never get updated while queued on the worklist.
258
assert(!MemoMap.count(CurrentPad));
259
Value *UnwindDestToken = nullptr;
260
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {
261
if (CatchSwitch->hasUnwindDest()) {
262
UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
263
} else {
264
// Catchswitch doesn't have a 'nounwind' variant, and one might be
265
// annotated as "unwinds to caller" when really it's nounwind (see
266
// e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
267
// parent's unwind dest from this. We can check its catchpads'
268
// descendants, since they might include a cleanuppad with an
269
// "unwinds to caller" cleanupret, which can be trusted.
270
for (auto HI = CatchSwitch->handler_begin(),
271
HE = CatchSwitch->handler_end();
272
HI != HE && !UnwindDestToken; ++HI) {
273
BasicBlock *HandlerBlock = *HI;
274
auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());
275
for (User *Child : CatchPad->users()) {
276
// Intentionally ignore invokes here -- since the catchswitch is
277
// marked "unwind to caller", it would be a verifier error if it
278
// contained an invoke which unwinds out of it, so any invoke we'd
279
// encounter must unwind to some child of the catch.
280
if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))
281
continue;
282
283
Instruction *ChildPad = cast<Instruction>(Child);
284
auto Memo = MemoMap.find(ChildPad);
285
if (Memo == MemoMap.end()) {
286
// Haven't figured out this child pad yet; queue it.
287
Worklist.push_back(ChildPad);
288
continue;
289
}
290
// We've already checked this child, but might have found that
291
// it offers no proof either way.
292
Value *ChildUnwindDestToken = Memo->second;
293
if (!ChildUnwindDestToken)
294
continue;
295
// We already know the child's unwind dest, which can either
296
// be ConstantTokenNone to indicate unwind to caller, or can
297
// be another child of the catchpad. Only the former indicates
298
// the unwind dest of the catchswitch.
299
if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {
300
UnwindDestToken = ChildUnwindDestToken;
301
break;
302
}
303
assert(getParentPad(ChildUnwindDestToken) == CatchPad);
304
}
305
}
306
}
307
} else {
308
auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);
309
for (User *U : CleanupPad->users()) {
310
if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
311
if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
312
UnwindDestToken = RetUnwindDest->getFirstNonPHI();
313
else
314
UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());
315
break;
316
}
317
Value *ChildUnwindDestToken;
318
if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
319
ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
320
} else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
321
Instruction *ChildPad = cast<Instruction>(U);
322
auto Memo = MemoMap.find(ChildPad);
323
if (Memo == MemoMap.end()) {
324
// Haven't resolved this child yet; queue it and keep searching.
325
Worklist.push_back(ChildPad);
326
continue;
327
}
328
// We've checked this child, but still need to ignore it if it
329
// had no proof either way.
330
ChildUnwindDestToken = Memo->second;
331
if (!ChildUnwindDestToken)
332
continue;
333
} else {
334
// Not a relevant user of the cleanuppad
335
continue;
336
}
337
// In a well-formed program, the child/invoke must either unwind to
338
// an(other) child of the cleanup, or exit the cleanup. In the
339
// first case, continue searching.
340
if (isa<Instruction>(ChildUnwindDestToken) &&
341
getParentPad(ChildUnwindDestToken) == CleanupPad)
342
continue;
343
UnwindDestToken = ChildUnwindDestToken;
344
break;
345
}
346
}
347
// If we haven't found an unwind dest for CurrentPad, we may have queued its
348
// children, so move on to the next in the worklist.
349
if (!UnwindDestToken)
350
continue;
351
352
// Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
353
// any ancestors of CurrentPad up to but not including UnwindDestToken's
354
// parent pad. Record this in the memo map, and check to see if the
355
// original EHPad being queried is one of the ones exited.
356
Value *UnwindParent;
357
if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))
358
UnwindParent = getParentPad(UnwindPad);
359
else
360
UnwindParent = nullptr;
361
bool ExitedOriginalPad = false;
362
for (Instruction *ExitedPad = CurrentPad;
363
ExitedPad && ExitedPad != UnwindParent;
364
ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {
365
// Skip over catchpads since they just follow their catchswitches.
366
if (isa<CatchPadInst>(ExitedPad))
367
continue;
368
MemoMap[ExitedPad] = UnwindDestToken;
369
ExitedOriginalPad |= (ExitedPad == EHPad);
370
}
371
372
if (ExitedOriginalPad)
373
return UnwindDestToken;
374
375
// Continue the search.
376
}
377
378
// No definitive information is contained within this funclet.
379
return nullptr;
380
}
381
382
/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
383
/// return that pad instruction. If it unwinds to caller, return
384
/// ConstantTokenNone. If it does not have a definitive unwind destination,
385
/// return nullptr.
386
///
387
/// This routine gets invoked for calls in funclets in inlinees when inlining
388
/// an invoke. Since many funclets don't have calls inside them, it's queried
389
/// on-demand rather than building a map of pads to unwind dests up front.
390
/// Determining a funclet's unwind dest may require recursively searching its
391
/// descendants, and also ancestors and cousins if the descendants don't provide
392
/// an answer. Since most funclets will have their unwind dest immediately
393
/// available as the unwind dest of a catchswitch or cleanupret, this routine
394
/// searches top-down from the given pad and then up. To avoid worst-case
395
/// quadratic run-time given that approach, it uses a memo map to avoid
396
/// re-processing funclet trees. The callers that rewrite the IR as they go
397
/// take advantage of this, for correctness, by checking/forcing rewritten
398
/// pads' entries to match the original callee view.
399
static Value *getUnwindDestToken(Instruction *EHPad,
400
UnwindDestMemoTy &MemoMap) {
401
// Catchpads unwind to the same place as their catchswitch;
402
// redirct any queries on catchpads so the code below can
403
// deal with just catchswitches and cleanuppads.
404
if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))
405
EHPad = CPI->getCatchSwitch();
406
407
// Check if we've already determined the unwind dest for this pad.
408
auto Memo = MemoMap.find(EHPad);
409
if (Memo != MemoMap.end())
410
return Memo->second;
411
412
// Search EHPad and, if necessary, its descendants.
413
Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
414
assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
415
if (UnwindDestToken)
416
return UnwindDestToken;
417
418
// No information is available for this EHPad from itself or any of its
419
// descendants. An unwind all the way out to a pad in the caller would
420
// need also to agree with the unwind dest of the parent funclet, so
421
// search up the chain to try to find a funclet with information. Put
422
// null entries in the memo map to avoid re-processing as we go up.
423
MemoMap[EHPad] = nullptr;
424
#ifndef NDEBUG
425
SmallPtrSet<Instruction *, 4> TempMemos;
426
TempMemos.insert(EHPad);
427
#endif
428
Instruction *LastUselessPad = EHPad;
429
Value *AncestorToken;
430
for (AncestorToken = getParentPad(EHPad);
431
auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);
432
AncestorToken = getParentPad(AncestorToken)) {
433
// Skip over catchpads since they just follow their catchswitches.
434
if (isa<CatchPadInst>(AncestorPad))
435
continue;
436
// If the MemoMap had an entry mapping AncestorPad to nullptr, since we
437
// haven't yet called getUnwindDestTokenHelper for AncestorPad in this
438
// call to getUnwindDestToken, that would mean that AncestorPad had no
439
// information in itself, its descendants, or its ancestors. If that
440
// were the case, then we should also have recorded the lack of information
441
// for the descendant that we're coming from. So assert that we don't
442
// find a null entry in the MemoMap for AncestorPad.
443
assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
444
auto AncestorMemo = MemoMap.find(AncestorPad);
445
if (AncestorMemo == MemoMap.end()) {
446
UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);
447
} else {
448
UnwindDestToken = AncestorMemo->second;
449
}
450
if (UnwindDestToken)
451
break;
452
LastUselessPad = AncestorPad;
453
MemoMap[LastUselessPad] = nullptr;
454
#ifndef NDEBUG
455
TempMemos.insert(LastUselessPad);
456
#endif
457
}
458
459
// We know that getUnwindDestTokenHelper was called on LastUselessPad and
460
// returned nullptr (and likewise for EHPad and any of its ancestors up to
461
// LastUselessPad), so LastUselessPad has no information from below. Since
462
// getUnwindDestTokenHelper must investigate all downward paths through
463
// no-information nodes to prove that a node has no information like this,
464
// and since any time it finds information it records it in the MemoMap for
465
// not just the immediately-containing funclet but also any ancestors also
466
// exited, it must be the case that, walking downward from LastUselessPad,
467
// visiting just those nodes which have not been mapped to an unwind dest
468
// by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
469
// they are just used to keep getUnwindDestTokenHelper from repeating work),
470
// any node visited must have been exhaustively searched with no information
471
// for it found.
472
SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
473
while (!Worklist.empty()) {
474
Instruction *UselessPad = Worklist.pop_back_val();
475
auto Memo = MemoMap.find(UselessPad);
476
if (Memo != MemoMap.end() && Memo->second) {
477
// Here the name 'UselessPad' is a bit of a misnomer, because we've found
478
// that it is a funclet that does have information about unwinding to
479
// a particular destination; its parent was a useless pad.
480
// Since its parent has no information, the unwind edge must not escape
481
// the parent, and must target a sibling of this pad. This local unwind
482
// gives us no information about EHPad. Leave it and the subtree rooted
483
// at it alone.
484
assert(getParentPad(Memo->second) == getParentPad(UselessPad));
485
continue;
486
}
487
// We know we don't have information for UselesPad. If it has an entry in
488
// the MemoMap (mapping it to nullptr), it must be one of the TempMemos
489
// added on this invocation of getUnwindDestToken; if a previous invocation
490
// recorded nullptr, it would have had to prove that the ancestors of
491
// UselessPad, which include LastUselessPad, had no information, and that
492
// in turn would have required proving that the descendants of
493
// LastUselesPad, which include EHPad, have no information about
494
// LastUselessPad, which would imply that EHPad was mapped to nullptr in
495
// the MemoMap on that invocation, which isn't the case if we got here.
496
assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
497
// Assert as we enumerate users that 'UselessPad' doesn't have any unwind
498
// information that we'd be contradicting by making a map entry for it
499
// (which is something that getUnwindDestTokenHelper must have proved for
500
// us to get here). Just assert on is direct users here; the checks in
501
// this downward walk at its descendants will verify that they don't have
502
// any unwind edges that exit 'UselessPad' either (i.e. they either have no
503
// unwind edges or unwind to a sibling).
504
MemoMap[UselessPad] = UnwindDestToken;
505
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
506
assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
507
for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
508
auto *CatchPad = HandlerBlock->getFirstNonPHI();
509
for (User *U : CatchPad->users()) {
510
assert(
511
(!isa<InvokeInst>(U) ||
512
(getParentPad(
513
cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
514
CatchPad)) &&
515
"Expected useless pad");
516
if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
517
Worklist.push_back(cast<Instruction>(U));
518
}
519
}
520
} else {
521
assert(isa<CleanupPadInst>(UselessPad));
522
for (User *U : UselessPad->users()) {
523
assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
524
assert((!isa<InvokeInst>(U) ||
525
(getParentPad(
526
cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
527
UselessPad)) &&
528
"Expected useless pad");
529
if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
530
Worklist.push_back(cast<Instruction>(U));
531
}
532
}
533
}
534
535
return UnwindDestToken;
536
}
537
538
/// When we inline a basic block into an invoke,
539
/// we have to turn all of the calls that can throw into invokes.
540
/// This function analyze BB to see if there are any calls, and if so,
541
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
542
/// nodes in that block with the values specified in InvokeDestPHIValues.
543
static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
544
BasicBlock *BB, BasicBlock *UnwindEdge,
545
UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
546
for (Instruction &I : llvm::make_early_inc_range(*BB)) {
547
// We only need to check for function calls: inlined invoke
548
// instructions require no special handling.
549
CallInst *CI = dyn_cast<CallInst>(&I);
550
551
if (!CI || CI->doesNotThrow())
552
continue;
553
554
// We do not need to (and in fact, cannot) convert possibly throwing calls
555
// to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
556
// invokes. The caller's "segment" of the deoptimization continuation
557
// attached to the newly inlined @llvm.experimental_deoptimize
558
// (resp. @llvm.experimental.guard) call should contain the exception
559
// handling logic, if any.
560
if (auto *F = CI->getCalledFunction())
561
if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
562
F->getIntrinsicID() == Intrinsic::experimental_guard)
563
continue;
564
565
if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
566
// This call is nested inside a funclet. If that funclet has an unwind
567
// destination within the inlinee, then unwinding out of this call would
568
// be UB. Rewriting this call to an invoke which targets the inlined
569
// invoke's unwind dest would give the call's parent funclet multiple
570
// unwind destinations, which is something that subsequent EH table
571
// generation can't handle and that the veirifer rejects. So when we
572
// see such a call, leave it as a call.
573
auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);
574
Value *UnwindDestToken =
575
getUnwindDestToken(FuncletPad, *FuncletUnwindMap);
576
if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
577
continue;
578
#ifndef NDEBUG
579
Instruction *MemoKey;
580
if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
581
MemoKey = CatchPad->getCatchSwitch();
582
else
583
MemoKey = FuncletPad;
584
assert(FuncletUnwindMap->count(MemoKey) &&
585
(*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
586
"must get memoized to avoid confusing later searches");
587
#endif // NDEBUG
588
}
589
590
changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
591
return BB;
592
}
593
return nullptr;
594
}
595
596
/// If we inlined an invoke site, we need to convert calls
597
/// in the body of the inlined function into invokes.
598
///
599
/// II is the invoke instruction being inlined. FirstNewBlock is the first
600
/// block of the inlined code (the last block is the end of the function),
601
/// and InlineCodeInfo is information about the code that got inlined.
602
static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
603
ClonedCodeInfo &InlinedCodeInfo) {
604
BasicBlock *InvokeDest = II->getUnwindDest();
605
606
Function *Caller = FirstNewBlock->getParent();
607
608
// The inlined code is currently at the end of the function, scan from the
609
// start of the inlined code to its end, checking for stuff we need to
610
// rewrite.
611
LandingPadInliningInfo Invoke(II);
612
613
// Get all of the inlined landing pad instructions.
614
SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
615
for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
616
I != E; ++I)
617
if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
618
InlinedLPads.insert(II->getLandingPadInst());
619
620
// Append the clauses from the outer landing pad instruction into the inlined
621
// landing pad instructions.
622
LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
623
for (LandingPadInst *InlinedLPad : InlinedLPads) {
624
unsigned OuterNum = OuterLPad->getNumClauses();
625
InlinedLPad->reserveClauses(OuterNum);
626
for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
627
InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
628
if (OuterLPad->isCleanup())
629
InlinedLPad->setCleanup(true);
630
}
631
632
for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
633
BB != E; ++BB) {
634
if (InlinedCodeInfo.ContainsCalls)
635
if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
636
&*BB, Invoke.getOuterResumeDest()))
637
// Update any PHI nodes in the exceptional block to indicate that there
638
// is now a new entry in them.
639
Invoke.addIncomingPHIValuesFor(NewBB);
640
641
// Forward any resumes that are remaining here.
642
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
643
Invoke.forwardResume(RI, InlinedLPads);
644
}
645
646
// Now that everything is happy, we have one final detail. The PHI nodes in
647
// the exception destination block still have entries due to the original
648
// invoke instruction. Eliminate these entries (which might even delete the
649
// PHI node) now.
650
InvokeDest->removePredecessor(II->getParent());
651
}
652
653
/// If we inlined an invoke site, we need to convert calls
654
/// in the body of the inlined function into invokes.
655
///
656
/// II is the invoke instruction being inlined. FirstNewBlock is the first
657
/// block of the inlined code (the last block is the end of the function),
658
/// and InlineCodeInfo is information about the code that got inlined.
659
static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
660
ClonedCodeInfo &InlinedCodeInfo) {
661
BasicBlock *UnwindDest = II->getUnwindDest();
662
Function *Caller = FirstNewBlock->getParent();
663
664
assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
665
666
// If there are PHI nodes in the unwind destination block, we need to keep
667
// track of which values came into them from the invoke before removing the
668
// edge from this block.
669
SmallVector<Value *, 8> UnwindDestPHIValues;
670
BasicBlock *InvokeBB = II->getParent();
671
for (PHINode &PHI : UnwindDest->phis()) {
672
// Save the value to use for this edge.
673
UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB));
674
}
675
676
// Add incoming-PHI values to the unwind destination block for the given basic
677
// block, using the values for the original invoke's source block.
678
auto UpdatePHINodes = [&](BasicBlock *Src) {
679
BasicBlock::iterator I = UnwindDest->begin();
680
for (Value *V : UnwindDestPHIValues) {
681
PHINode *PHI = cast<PHINode>(I);
682
PHI->addIncoming(V, Src);
683
++I;
684
}
685
};
686
687
// This connects all the instructions which 'unwind to caller' to the invoke
688
// destination.
689
UnwindDestMemoTy FuncletUnwindMap;
690
for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
691
BB != E; ++BB) {
692
if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
693
if (CRI->unwindsToCaller()) {
694
auto *CleanupPad = CRI->getCleanupPad();
695
CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI->getIterator());
696
CRI->eraseFromParent();
697
UpdatePHINodes(&*BB);
698
// Finding a cleanupret with an unwind destination would confuse
699
// subsequent calls to getUnwindDestToken, so map the cleanuppad
700
// to short-circuit any such calls and recognize this as an "unwind
701
// to caller" cleanup.
702
assert(!FuncletUnwindMap.count(CleanupPad) ||
703
isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
704
FuncletUnwindMap[CleanupPad] =
705
ConstantTokenNone::get(Caller->getContext());
706
}
707
}
708
709
Instruction *I = BB->getFirstNonPHI();
710
if (!I->isEHPad())
711
continue;
712
713
Instruction *Replacement = nullptr;
714
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
715
if (CatchSwitch->unwindsToCaller()) {
716
Value *UnwindDestToken;
717
if (auto *ParentPad =
718
dyn_cast<Instruction>(CatchSwitch->getParentPad())) {
719
// This catchswitch is nested inside another funclet. If that
720
// funclet has an unwind destination within the inlinee, then
721
// unwinding out of this catchswitch would be UB. Rewriting this
722
// catchswitch to unwind to the inlined invoke's unwind dest would
723
// give the parent funclet multiple unwind destinations, which is
724
// something that subsequent EH table generation can't handle and
725
// that the veirifer rejects. So when we see such a call, leave it
726
// as "unwind to caller".
727
UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);
728
if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
729
continue;
730
} else {
731
// This catchswitch has no parent to inherit constraints from, and
732
// none of its descendants can have an unwind edge that exits it and
733
// targets another funclet in the inlinee. It may or may not have a
734
// descendant that definitively has an unwind to caller. In either
735
// case, we'll have to assume that any unwinds out of it may need to
736
// be routed to the caller, so treat it as though it has a definitive
737
// unwind to caller.
738
UnwindDestToken = ConstantTokenNone::get(Caller->getContext());
739
}
740
auto *NewCatchSwitch = CatchSwitchInst::Create(
741
CatchSwitch->getParentPad(), UnwindDest,
742
CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
743
CatchSwitch->getIterator());
744
for (BasicBlock *PadBB : CatchSwitch->handlers())
745
NewCatchSwitch->addHandler(PadBB);
746
// Propagate info for the old catchswitch over to the new one in
747
// the unwind map. This also serves to short-circuit any subsequent
748
// checks for the unwind dest of this catchswitch, which would get
749
// confused if they found the outer handler in the callee.
750
FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
751
Replacement = NewCatchSwitch;
752
}
753
} else if (!isa<FuncletPadInst>(I)) {
754
llvm_unreachable("unexpected EHPad!");
755
}
756
757
if (Replacement) {
758
Replacement->takeName(I);
759
I->replaceAllUsesWith(Replacement);
760
I->eraseFromParent();
761
UpdatePHINodes(&*BB);
762
}
763
}
764
765
if (InlinedCodeInfo.ContainsCalls)
766
for (Function::iterator BB = FirstNewBlock->getIterator(),
767
E = Caller->end();
768
BB != E; ++BB)
769
if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
770
&*BB, UnwindDest, &FuncletUnwindMap))
771
// Update any PHI nodes in the exceptional block to indicate that there
772
// is now a new entry in them.
773
UpdatePHINodes(NewBB);
774
775
// Now that everything is happy, we have one final detail. The PHI nodes in
776
// the exception destination block still have entries due to the original
777
// invoke instruction. Eliminate these entries (which might even delete the
778
// PHI node) now.
779
UnwindDest->removePredecessor(InvokeBB);
780
}
781
782
static bool haveCommonPrefix(MDNode *MIBStackContext,
783
MDNode *CallsiteStackContext) {
784
assert(MIBStackContext->getNumOperands() > 0 &&
785
CallsiteStackContext->getNumOperands() > 0);
786
// Because of the context trimming performed during matching, the callsite
787
// context could have more stack ids than the MIB. We match up to the end of
788
// the shortest stack context.
789
for (auto MIBStackIter = MIBStackContext->op_begin(),
790
CallsiteStackIter = CallsiteStackContext->op_begin();
791
MIBStackIter != MIBStackContext->op_end() &&
792
CallsiteStackIter != CallsiteStackContext->op_end();
793
MIBStackIter++, CallsiteStackIter++) {
794
auto *Val1 = mdconst::dyn_extract<ConstantInt>(*MIBStackIter);
795
auto *Val2 = mdconst::dyn_extract<ConstantInt>(*CallsiteStackIter);
796
assert(Val1 && Val2);
797
if (Val1->getZExtValue() != Val2->getZExtValue())
798
return false;
799
}
800
return true;
801
}
802
803
static void removeMemProfMetadata(CallBase *Call) {
804
Call->setMetadata(LLVMContext::MD_memprof, nullptr);
805
}
806
807
static void removeCallsiteMetadata(CallBase *Call) {
808
Call->setMetadata(LLVMContext::MD_callsite, nullptr);
809
}
810
811
static void updateMemprofMetadata(CallBase *CI,
812
const std::vector<Metadata *> &MIBList) {
813
assert(!MIBList.empty());
814
// Remove existing memprof, which will either be replaced or may not be needed
815
// if we are able to use a single allocation type function attribute.
816
removeMemProfMetadata(CI);
817
CallStackTrie CallStack;
818
for (Metadata *MIB : MIBList)
819
CallStack.addCallStack(cast<MDNode>(MIB));
820
bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
821
assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
822
if (!MemprofMDAttached)
823
// If we used a function attribute remove the callsite metadata as well.
824
removeCallsiteMetadata(CI);
825
}
826
827
// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
828
// inlined callee body, based on the callsite metadata InlinedCallsiteMD from
829
// the call that was inlined.
830
static void propagateMemProfHelper(const CallBase *OrigCall,
831
CallBase *ClonedCall,
832
MDNode *InlinedCallsiteMD) {
833
MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite);
834
MDNode *ClonedCallsiteMD = nullptr;
835
// Check if the call originally had callsite metadata, and update it for the
836
// new call in the inlined body.
837
if (OrigCallsiteMD) {
838
// The cloned call's context is now the concatenation of the original call's
839
// callsite metadata and the callsite metadata on the call where it was
840
// inlined.
841
ClonedCallsiteMD = MDNode::concatenate(OrigCallsiteMD, InlinedCallsiteMD);
842
ClonedCall->setMetadata(LLVMContext::MD_callsite, ClonedCallsiteMD);
843
}
844
845
// Update any memprof metadata on the cloned call.
846
MDNode *OrigMemProfMD = ClonedCall->getMetadata(LLVMContext::MD_memprof);
847
if (!OrigMemProfMD)
848
return;
849
// We currently expect that allocations with memprof metadata also have
850
// callsite metadata for the allocation's part of the context.
851
assert(OrigCallsiteMD);
852
853
// New call's MIB list.
854
std::vector<Metadata *> NewMIBList;
855
856
// For each MIB metadata, check if its call stack context starts with the
857
// new clone's callsite metadata. If so, that MIB goes onto the cloned call in
858
// the inlined body. If not, it stays on the out-of-line original call.
859
for (auto &MIBOp : OrigMemProfMD->operands()) {
860
MDNode *MIB = dyn_cast<MDNode>(MIBOp);
861
// Stack is first operand of MIB.
862
MDNode *StackMD = getMIBStackNode(MIB);
863
assert(StackMD);
864
// See if the new cloned callsite context matches this profiled context.
865
if (haveCommonPrefix(StackMD, ClonedCallsiteMD))
866
// Add it to the cloned call's MIB list.
867
NewMIBList.push_back(MIB);
868
}
869
if (NewMIBList.empty()) {
870
removeMemProfMetadata(ClonedCall);
871
removeCallsiteMetadata(ClonedCall);
872
return;
873
}
874
if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
875
updateMemprofMetadata(ClonedCall, NewMIBList);
876
}
877
878
// Update memprof related metadata (!memprof and !callsite) based on the
879
// inlining of Callee into the callsite at CB. The updates include merging the
880
// inlined callee's callsite metadata with that of the inlined call,
881
// and moving the subset of any memprof contexts to the inlined callee
882
// allocations if they match the new inlined call stack.
883
static void
884
propagateMemProfMetadata(Function *Callee, CallBase &CB,
885
bool ContainsMemProfMetadata,
886
const ValueMap<const Value *, WeakTrackingVH> &VMap) {
887
MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite);
888
// Only need to update if the inlined callsite had callsite metadata, or if
889
// there was any memprof metadata inlined.
890
if (!CallsiteMD && !ContainsMemProfMetadata)
891
return;
892
893
// Propagate metadata onto the cloned calls in the inlined callee.
894
for (const auto &Entry : VMap) {
895
// See if this is a call that has been inlined and remapped, and not
896
// simplified away in the process.
897
auto *OrigCall = dyn_cast_or_null<CallBase>(Entry.first);
898
auto *ClonedCall = dyn_cast_or_null<CallBase>(Entry.second);
899
if (!OrigCall || !ClonedCall)
900
continue;
901
// If the inlined callsite did not have any callsite metadata, then it isn't
902
// involved in any profiled call contexts, and we can remove any memprof
903
// metadata on the cloned call.
904
if (!CallsiteMD) {
905
removeMemProfMetadata(ClonedCall);
906
removeCallsiteMetadata(ClonedCall);
907
continue;
908
}
909
propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD);
910
}
911
}
912
913
/// When inlining a call site that has !llvm.mem.parallel_loop_access,
914
/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
915
/// be propagated to all memory-accessing cloned instructions.
916
static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
917
Function::iterator FEnd) {
918
MDNode *MemParallelLoopAccess =
919
CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
920
MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
921
MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope);
922
MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias);
923
if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
924
return;
925
926
for (BasicBlock &BB : make_range(FStart, FEnd)) {
927
for (Instruction &I : BB) {
928
// This metadata is only relevant for instructions that access memory.
929
if (!I.mayReadOrWriteMemory())
930
continue;
931
932
if (MemParallelLoopAccess) {
933
// TODO: This probably should not overwrite MemParalleLoopAccess.
934
MemParallelLoopAccess = MDNode::concatenate(
935
I.getMetadata(LLVMContext::MD_mem_parallel_loop_access),
936
MemParallelLoopAccess);
937
I.setMetadata(LLVMContext::MD_mem_parallel_loop_access,
938
MemParallelLoopAccess);
939
}
940
941
if (AccessGroup)
942
I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
943
I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
944
945
if (AliasScope)
946
I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
947
I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));
948
949
if (NoAlias)
950
I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
951
I.getMetadata(LLVMContext::MD_noalias), NoAlias));
952
}
953
}
954
}
955
956
/// Bundle operands of the inlined function must be added to inlined call sites.
957
static void PropagateOperandBundles(Function::iterator InlinedBB,
958
Instruction *CallSiteEHPad) {
959
for (Instruction &II : llvm::make_early_inc_range(*InlinedBB)) {
960
CallBase *I = dyn_cast<CallBase>(&II);
961
if (!I)
962
continue;
963
// Skip call sites which already have a "funclet" bundle.
964
if (I->getOperandBundle(LLVMContext::OB_funclet))
965
continue;
966
// Skip call sites which are nounwind intrinsics (as long as they don't
967
// lower into regular function calls in the course of IR transformations).
968
auto *CalledFn =
969
dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts());
970
if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
971
!IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID()))
972
continue;
973
974
SmallVector<OperandBundleDef, 1> OpBundles;
975
I->getOperandBundlesAsDefs(OpBundles);
976
OpBundles.emplace_back("funclet", CallSiteEHPad);
977
978
Instruction *NewInst = CallBase::Create(I, OpBundles, I->getIterator());
979
NewInst->takeName(I);
980
I->replaceAllUsesWith(NewInst);
981
I->eraseFromParent();
982
}
983
}
984
985
namespace {
986
/// Utility for cloning !noalias and !alias.scope metadata. When a code region
987
/// using scoped alias metadata is inlined, the aliasing relationships may not
988
/// hold between the two version. It is necessary to create a deep clone of the
989
/// metadata, putting the two versions in separate scope domains.
990
class ScopedAliasMetadataDeepCloner {
991
using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
992
SetVector<const MDNode *> MD;
993
MetadataMap MDMap;
994
void addRecursiveMetadataUses();
995
996
public:
997
ScopedAliasMetadataDeepCloner(const Function *F);
998
999
/// Create a new clone of the scoped alias metadata, which will be used by
1000
/// subsequent remap() calls.
1001
void clone();
1002
1003
/// Remap instructions in the given range from the original to the cloned
1004
/// metadata.
1005
void remap(Function::iterator FStart, Function::iterator FEnd);
1006
};
1007
} // namespace
1008
1009
ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
1010
const Function *F) {
1011
for (const BasicBlock &BB : *F) {
1012
for (const Instruction &I : BB) {
1013
if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
1014
MD.insert(M);
1015
if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
1016
MD.insert(M);
1017
1018
// We also need to clone the metadata in noalias intrinsics.
1019
if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
1020
MD.insert(Decl->getScopeList());
1021
}
1022
}
1023
addRecursiveMetadataUses();
1024
}
1025
1026
void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
1027
SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
1028
while (!Queue.empty()) {
1029
const MDNode *M = cast<MDNode>(Queue.pop_back_val());
1030
for (const Metadata *Op : M->operands())
1031
if (const MDNode *OpMD = dyn_cast<MDNode>(Op))
1032
if (MD.insert(OpMD))
1033
Queue.push_back(OpMD);
1034
}
1035
}
1036
1037
void ScopedAliasMetadataDeepCloner::clone() {
1038
assert(MDMap.empty() && "clone() already called ?");
1039
1040
SmallVector<TempMDTuple, 16> DummyNodes;
1041
for (const MDNode *I : MD) {
1042
DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), std::nullopt));
1043
MDMap[I].reset(DummyNodes.back().get());
1044
}
1045
1046
// Create new metadata nodes to replace the dummy nodes, replacing old
1047
// metadata references with either a dummy node or an already-created new
1048
// node.
1049
SmallVector<Metadata *, 4> NewOps;
1050
for (const MDNode *I : MD) {
1051
for (const Metadata *Op : I->operands()) {
1052
if (const MDNode *M = dyn_cast<MDNode>(Op))
1053
NewOps.push_back(MDMap[M]);
1054
else
1055
NewOps.push_back(const_cast<Metadata *>(Op));
1056
}
1057
1058
MDNode *NewM = MDNode::get(I->getContext(), NewOps);
1059
MDTuple *TempM = cast<MDTuple>(MDMap[I]);
1060
assert(TempM->isTemporary() && "Expected temporary node");
1061
1062
TempM->replaceAllUsesWith(NewM);
1063
NewOps.clear();
1064
}
1065
}
1066
1067
void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
1068
Function::iterator FEnd) {
1069
if (MDMap.empty())
1070
return; // Nothing to do.
1071
1072
for (BasicBlock &BB : make_range(FStart, FEnd)) {
1073
for (Instruction &I : BB) {
1074
// TODO: The null checks for the MDMap.lookup() results should no longer
1075
// be necessary.
1076
if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
1077
if (MDNode *MNew = MDMap.lookup(M))
1078
I.setMetadata(LLVMContext::MD_alias_scope, MNew);
1079
1080
if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
1081
if (MDNode *MNew = MDMap.lookup(M))
1082
I.setMetadata(LLVMContext::MD_noalias, MNew);
1083
1084
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
1085
if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
1086
Decl->setScopeList(MNew);
1087
}
1088
}
1089
}
1090
1091
/// If the inlined function has noalias arguments,
1092
/// then add new alias scopes for each noalias argument, tag the mapped noalias
1093
/// parameters with noalias metadata specifying the new scope, and tag all
1094
/// non-derived loads, stores and memory intrinsics with the new alias scopes.
1095
static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1096
const DataLayout &DL, AAResults *CalleeAAR,
1097
ClonedCodeInfo &InlinedFunctionInfo) {
1098
if (!EnableNoAliasConversion)
1099
return;
1100
1101
const Function *CalledFunc = CB.getCalledFunction();
1102
SmallVector<const Argument *, 4> NoAliasArgs;
1103
1104
for (const Argument &Arg : CalledFunc->args())
1105
if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty())
1106
NoAliasArgs.push_back(&Arg);
1107
1108
if (NoAliasArgs.empty())
1109
return;
1110
1111
// To do a good job, if a noalias variable is captured, we need to know if
1112
// the capture point dominates the particular use we're considering.
1113
DominatorTree DT;
1114
DT.recalculate(const_cast<Function&>(*CalledFunc));
1115
1116
// noalias indicates that pointer values based on the argument do not alias
1117
// pointer values which are not based on it. So we add a new "scope" for each
1118
// noalias function argument. Accesses using pointers based on that argument
1119
// become part of that alias scope, accesses using pointers not based on that
1120
// argument are tagged as noalias with that scope.
1121
1122
DenseMap<const Argument *, MDNode *> NewScopes;
1123
MDBuilder MDB(CalledFunc->getContext());
1124
1125
// Create a new scope domain for this function.
1126
MDNode *NewDomain =
1127
MDB.createAnonymousAliasScopeDomain(CalledFunc->getName());
1128
for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
1129
const Argument *A = NoAliasArgs[i];
1130
1131
std::string Name = std::string(CalledFunc->getName());
1132
if (A->hasName()) {
1133
Name += ": %";
1134
Name += A->getName();
1135
} else {
1136
Name += ": argument ";
1137
Name += utostr(i);
1138
}
1139
1140
// Note: We always create a new anonymous root here. This is true regardless
1141
// of the linkage of the callee because the aliasing "scope" is not just a
1142
// property of the callee, but also all control dependencies in the caller.
1143
MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
1144
NewScopes.insert(std::make_pair(A, NewScope));
1145
1146
if (UseNoAliasIntrinsic) {
1147
// Introduce a llvm.experimental.noalias.scope.decl for the noalias
1148
// argument.
1149
MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope);
1150
auto *NoAliasDecl =
1151
IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList);
1152
// Ignore the result for now. The result will be used when the
1153
// llvm.noalias intrinsic is introduced.
1154
(void)NoAliasDecl;
1155
}
1156
}
1157
1158
// Iterate over all new instructions in the map; for all memory-access
1159
// instructions, add the alias scope metadata.
1160
for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1161
VMI != VMIE; ++VMI) {
1162
if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {
1163
if (!VMI->second)
1164
continue;
1165
1166
Instruction *NI = dyn_cast<Instruction>(VMI->second);
1167
if (!NI || InlinedFunctionInfo.isSimplified(I, NI))
1168
continue;
1169
1170
bool IsArgMemOnlyCall = false, IsFuncCall = false;
1171
SmallVector<const Value *, 2> PtrArgs;
1172
1173
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
1174
PtrArgs.push_back(LI->getPointerOperand());
1175
else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
1176
PtrArgs.push_back(SI->getPointerOperand());
1177
else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
1178
PtrArgs.push_back(VAAI->getPointerOperand());
1179
else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
1180
PtrArgs.push_back(CXI->getPointerOperand());
1181
else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
1182
PtrArgs.push_back(RMWI->getPointerOperand());
1183
else if (const auto *Call = dyn_cast<CallBase>(I)) {
1184
// If we know that the call does not access memory, then we'll still
1185
// know that about the inlined clone of this call site, and we don't
1186
// need to add metadata.
1187
if (Call->doesNotAccessMemory())
1188
continue;
1189
1190
IsFuncCall = true;
1191
if (CalleeAAR) {
1192
MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
1193
1194
// We'll retain this knowledge without additional metadata.
1195
if (ME.onlyAccessesInaccessibleMem())
1196
continue;
1197
1198
if (ME.onlyAccessesArgPointees())
1199
IsArgMemOnlyCall = true;
1200
}
1201
1202
for (Value *Arg : Call->args()) {
1203
// Only care about pointer arguments. If a noalias argument is
1204
// accessed through a non-pointer argument, it must be captured
1205
// first (e.g. via ptrtoint), and we protect against captures below.
1206
if (!Arg->getType()->isPointerTy())
1207
continue;
1208
1209
PtrArgs.push_back(Arg);
1210
}
1211
}
1212
1213
// If we found no pointers, then this instruction is not suitable for
1214
// pairing with an instruction to receive aliasing metadata.
1215
// However, if this is a call, this we might just alias with none of the
1216
// noalias arguments.
1217
if (PtrArgs.empty() && !IsFuncCall)
1218
continue;
1219
1220
// It is possible that there is only one underlying object, but you
1221
// need to go through several PHIs to see it, and thus could be
1222
// repeated in the Objects list.
1223
SmallPtrSet<const Value *, 4> ObjSet;
1224
SmallVector<Metadata *, 4> Scopes, NoAliases;
1225
1226
for (const Value *V : PtrArgs) {
1227
SmallVector<const Value *, 4> Objects;
1228
getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
1229
1230
for (const Value *O : Objects)
1231
ObjSet.insert(O);
1232
}
1233
1234
// Figure out if we're derived from anything that is not a noalias
1235
// argument.
1236
bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
1237
UsesUnknownObject = false;
1238
for (const Value *V : ObjSet) {
1239
// Is this value a constant that cannot be derived from any pointer
1240
// value (we need to exclude constant expressions, for example, that
1241
// are formed from arithmetic on global symbols).
1242
bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) ||
1243
isa<ConstantPointerNull>(V) ||
1244
isa<ConstantDataVector>(V) || isa<UndefValue>(V);
1245
if (IsNonPtrConst)
1246
continue;
1247
1248
// If this is anything other than a noalias argument, then we cannot
1249
// completely describe the aliasing properties using alias.scope
1250
// metadata (and, thus, won't add any).
1251
if (const Argument *A = dyn_cast<Argument>(V)) {
1252
if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias))
1253
UsesAliasingPtr = true;
1254
} else {
1255
UsesAliasingPtr = true;
1256
}
1257
1258
if (isEscapeSource(V)) {
1259
// An escape source can only alias with a noalias argument if it has
1260
// been captured beforehand.
1261
RequiresNoCaptureBefore = true;
1262
} else if (!isa<Argument>(V) && !isIdentifiedObject(V)) {
1263
// If this is neither an escape source, nor some identified object
1264
// (which cannot directly alias a noalias argument), nor some other
1265
// argument (which, by definition, also cannot alias a noalias
1266
// argument), conservatively do not make any assumptions.
1267
UsesUnknownObject = true;
1268
}
1269
}
1270
1271
// Nothing we can do if the used underlying object cannot be reliably
1272
// determined.
1273
if (UsesUnknownObject)
1274
continue;
1275
1276
// A function call can always get captured noalias pointers (via other
1277
// parameters, globals, etc.).
1278
if (IsFuncCall && !IsArgMemOnlyCall)
1279
RequiresNoCaptureBefore = true;
1280
1281
// First, we want to figure out all of the sets with which we definitely
1282
// don't alias. Iterate over all noalias set, and add those for which:
1283
// 1. The noalias argument is not in the set of objects from which we
1284
// definitely derive.
1285
// 2. The noalias argument has not yet been captured.
1286
// An arbitrary function that might load pointers could see captured
1287
// noalias arguments via other noalias arguments or globals, and so we
1288
// must always check for prior capture.
1289
for (const Argument *A : NoAliasArgs) {
1290
if (ObjSet.contains(A))
1291
continue; // May be based on a noalias argument.
1292
1293
// It might be tempting to skip the PointerMayBeCapturedBefore check if
1294
// A->hasNoCaptureAttr() is true, but this is incorrect because
1295
// nocapture only guarantees that no copies outlive the function, not
1296
// that the value cannot be locally captured.
1297
if (!RequiresNoCaptureBefore ||
1298
!PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false,
1299
/* StoreCaptures */ false, I, &DT))
1300
NoAliases.push_back(NewScopes[A]);
1301
}
1302
1303
if (!NoAliases.empty())
1304
NI->setMetadata(LLVMContext::MD_noalias,
1305
MDNode::concatenate(
1306
NI->getMetadata(LLVMContext::MD_noalias),
1307
MDNode::get(CalledFunc->getContext(), NoAliases)));
1308
1309
// Next, we want to figure out all of the sets to which we might belong.
1310
// We might belong to a set if the noalias argument is in the set of
1311
// underlying objects. If there is some non-noalias argument in our list
1312
// of underlying objects, then we cannot add a scope because the fact
1313
// that some access does not alias with any set of our noalias arguments
1314
// cannot itself guarantee that it does not alias with this access
1315
// (because there is some pointer of unknown origin involved and the
1316
// other access might also depend on this pointer). We also cannot add
1317
// scopes to arbitrary functions unless we know they don't access any
1318
// non-parameter pointer-values.
1319
bool CanAddScopes = !UsesAliasingPtr;
1320
if (CanAddScopes && IsFuncCall)
1321
CanAddScopes = IsArgMemOnlyCall;
1322
1323
if (CanAddScopes)
1324
for (const Argument *A : NoAliasArgs) {
1325
if (ObjSet.count(A))
1326
Scopes.push_back(NewScopes[A]);
1327
}
1328
1329
if (!Scopes.empty())
1330
NI->setMetadata(
1331
LLVMContext::MD_alias_scope,
1332
MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),
1333
MDNode::get(CalledFunc->getContext(), Scopes)));
1334
}
1335
}
1336
}
1337
1338
static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
1339
ReturnInst *End) {
1340
1341
assert(Begin->getParent() == End->getParent() &&
1342
"Expected to be in same basic block!");
1343
auto BeginIt = Begin->getIterator();
1344
assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
1345
return !llvm::isGuaranteedToTransferExecutionToSuccessor(
1346
++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);
1347
}
1348
1349
// Add attributes from CB params and Fn attributes that can always be propagated
1350
// to the corresponding argument / inner callbases.
1351
static void AddParamAndFnBasicAttributes(const CallBase &CB,
1352
ValueToValueMapTy &VMap,
1353
ClonedCodeInfo &InlinedFunctionInfo) {
1354
auto *CalledFunction = CB.getCalledFunction();
1355
auto &Context = CalledFunction->getContext();
1356
1357
// Collect valid attributes for all params.
1358
SmallVector<AttrBuilder> ValidParamAttrs;
1359
bool HasAttrToPropagate = false;
1360
1361
for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {
1362
ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()});
1363
// Access attributes can be propagated to any param with the same underlying
1364
// object as the argument.
1365
if (CB.paramHasAttr(I, Attribute::ReadNone))
1366
ValidParamAttrs.back().addAttribute(Attribute::ReadNone);
1367
if (CB.paramHasAttr(I, Attribute::ReadOnly))
1368
ValidParamAttrs.back().addAttribute(Attribute::ReadOnly);
1369
HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();
1370
}
1371
1372
// Won't be able to propagate anything.
1373
if (!HasAttrToPropagate)
1374
return;
1375
1376
for (BasicBlock &BB : *CalledFunction) {
1377
for (Instruction &Ins : BB) {
1378
const auto *InnerCB = dyn_cast<CallBase>(&Ins);
1379
if (!InnerCB)
1380
continue;
1381
auto *NewInnerCB = dyn_cast_or_null<CallBase>(VMap.lookup(InnerCB));
1382
if (!NewInnerCB)
1383
continue;
1384
// The InnerCB might have be simplified during the inlining
1385
// process which can make propagation incorrect.
1386
if (InlinedFunctionInfo.isSimplified(InnerCB, NewInnerCB))
1387
continue;
1388
1389
AttributeList AL = NewInnerCB->getAttributes();
1390
for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {
1391
// Check if the underlying value for the parameter is an argument.
1392
const Value *UnderlyingV =
1393
getUnderlyingObject(InnerCB->getArgOperand(I));
1394
const Argument *Arg = dyn_cast<Argument>(UnderlyingV);
1395
if (!Arg)
1396
continue;
1397
1398
if (NewInnerCB->paramHasAttr(I, Attribute::ByVal))
1399
// It's unsound to propagate memory attributes to byval arguments.
1400
// Even if CalledFunction doesn't e.g. write to the argument,
1401
// the call to NewInnerCB may write to its by-value copy.
1402
continue;
1403
1404
unsigned ArgNo = Arg->getArgNo();
1405
// If so, propagate its access attributes.
1406
AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]);
1407
// We can have conflicting attributes from the inner callsite and
1408
// to-be-inlined callsite. In that case, choose the most
1409
// restrictive.
1410
1411
// readonly + writeonly means we can never deref so make readnone.
1412
if (AL.hasParamAttr(I, Attribute::ReadOnly) &&
1413
AL.hasParamAttr(I, Attribute::WriteOnly))
1414
AL = AL.addParamAttribute(Context, I, Attribute::ReadNone);
1415
1416
// If have readnone, need to clear readonly/writeonly
1417
if (AL.hasParamAttr(I, Attribute::ReadNone)) {
1418
AL = AL.removeParamAttribute(Context, I, Attribute::ReadOnly);
1419
AL = AL.removeParamAttribute(Context, I, Attribute::WriteOnly);
1420
}
1421
1422
// Writable cannot exist in conjunction w/ readonly/readnone
1423
if (AL.hasParamAttr(I, Attribute::ReadOnly) ||
1424
AL.hasParamAttr(I, Attribute::ReadNone))
1425
AL = AL.removeParamAttribute(Context, I, Attribute::Writable);
1426
}
1427
NewInnerCB->setAttributes(AL);
1428
}
1429
}
1430
}
1431
1432
// Only allow these white listed attributes to be propagated back to the
1433
// callee. This is because other attributes may only be valid on the call
1434
// itself, i.e. attributes such as signext and zeroext.
1435
1436
// Attributes that are always okay to propagate as if they are violated its
1437
// immediate UB.
1438
static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
1439
AttrBuilder Valid(CB.getContext());
1440
if (auto DerefBytes = CB.getRetDereferenceableBytes())
1441
Valid.addDereferenceableAttr(DerefBytes);
1442
if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
1443
Valid.addDereferenceableOrNullAttr(DerefOrNullBytes);
1444
if (CB.hasRetAttr(Attribute::NoAlias))
1445
Valid.addAttribute(Attribute::NoAlias);
1446
if (CB.hasRetAttr(Attribute::NoUndef))
1447
Valid.addAttribute(Attribute::NoUndef);
1448
return Valid;
1449
}
1450
1451
// Attributes that need additional checks as propagating them may change
1452
// behavior or cause new UB.
1453
static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
1454
AttrBuilder Valid(CB.getContext());
1455
if (CB.hasRetAttr(Attribute::NonNull))
1456
Valid.addAttribute(Attribute::NonNull);
1457
if (CB.hasRetAttr(Attribute::Alignment))
1458
Valid.addAlignmentAttr(CB.getRetAlign());
1459
if (std::optional<ConstantRange> Range = CB.getRange())
1460
Valid.addRangeAttr(*Range);
1461
return Valid;
1462
}
1463
1464
static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap,
1465
ClonedCodeInfo &InlinedFunctionInfo) {
1466
AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
1467
AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1468
if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
1469
return;
1470
auto *CalledFunction = CB.getCalledFunction();
1471
auto &Context = CalledFunction->getContext();
1472
1473
for (auto &BB : *CalledFunction) {
1474
auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
1475
if (!RI || !isa<CallBase>(RI->getOperand(0)))
1476
continue;
1477
auto *RetVal = cast<CallBase>(RI->getOperand(0));
1478
// Check that the cloned RetVal exists and is a call, otherwise we cannot
1479
// add the attributes on the cloned RetVal. Simplification during inlining
1480
// could have transformed the cloned instruction.
1481
auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
1482
if (!NewRetVal)
1483
continue;
1484
1485
// The RetVal might have be simplified during the inlining
1486
// process which can make propagation incorrect.
1487
if (InlinedFunctionInfo.isSimplified(RetVal, NewRetVal))
1488
continue;
1489
// Backward propagation of attributes to the returned value may be incorrect
1490
// if it is control flow dependent.
1491
// Consider:
1492
// @callee {
1493
// %rv = call @foo()
1494
// %rv2 = call @bar()
1495
// if (%rv2 != null)
1496
// return %rv2
1497
// if (%rv == null)
1498
// exit()
1499
// return %rv
1500
// }
1501
// caller() {
1502
// %val = call nonnull @callee()
1503
// }
1504
// Here we cannot add the nonnull attribute on either foo or bar. So, we
1505
// limit the check to both RetVal and RI are in the same basic block and
1506
// there are no throwing/exiting instructions between these instructions.
1507
if (RI->getParent() != RetVal->getParent() ||
1508
MayContainThrowingOrExitingCallAfterCB(RetVal, RI))
1509
continue;
1510
// Add to the existing attributes of NewRetVal, i.e. the cloned call
1511
// instruction.
1512
// NB! When we have the same attribute already existing on NewRetVal, but
1513
// with a differing value, the AttributeList's merge API honours the already
1514
// existing attribute value (i.e. attributes such as dereferenceable,
1515
// dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1516
AttributeList AL = NewRetVal->getAttributes();
1517
if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
1518
ValidUB.removeAttribute(Attribute::Dereferenceable);
1519
if (ValidUB.getDereferenceableOrNullBytes() <
1520
AL.getRetDereferenceableOrNullBytes())
1521
ValidUB.removeAttribute(Attribute::DereferenceableOrNull);
1522
AttributeList NewAL = AL.addRetAttributes(Context, ValidUB);
1523
// Attributes that may generate poison returns are a bit tricky. If we
1524
// propagate them, other uses of the callsite might have their behavior
1525
// change or cause UB (if they have noundef) b.c of the new potential
1526
// poison.
1527
// Take the following three cases:
1528
//
1529
// 1)
1530
// define nonnull ptr @foo() {
1531
// %p = call ptr @bar()
1532
// call void @use(ptr %p) willreturn nounwind
1533
// ret ptr %p
1534
// }
1535
//
1536
// 2)
1537
// define noundef nonnull ptr @foo() {
1538
// %p = call ptr @bar()
1539
// call void @use(ptr %p) willreturn nounwind
1540
// ret ptr %p
1541
// }
1542
//
1543
// 3)
1544
// define nonnull ptr @foo() {
1545
// %p = call noundef ptr @bar()
1546
// ret ptr %p
1547
// }
1548
//
1549
// In case 1, we can't propagate nonnull because poison value in @use may
1550
// change behavior or trigger UB.
1551
// In case 2, we don't need to be concerned about propagating nonnull, as
1552
// any new poison at @use will trigger UB anyways.
1553
// In case 3, we can never propagate nonnull because it may create UB due to
1554
// the noundef on @bar.
1555
if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
1556
ValidPG.removeAttribute(Attribute::Alignment);
1557
if (ValidPG.hasAttributes()) {
1558
Attribute CBRange = ValidPG.getAttribute(Attribute::Range);
1559
if (CBRange.isValid()) {
1560
Attribute NewRange = AL.getRetAttr(Attribute::Range);
1561
if (NewRange.isValid()) {
1562
ValidPG.addRangeAttr(
1563
CBRange.getRange().intersectWith(NewRange.getRange()));
1564
}
1565
}
1566
// Three checks.
1567
// If the callsite has `noundef`, then a poison due to violating the
1568
// return attribute will create UB anyways so we can always propagate.
1569
// Otherwise, if the return value (callee to be inlined) has `noundef`, we
1570
// can't propagate as a new poison return will cause UB.
1571
// Finally, check if the return value has no uses whose behavior may
1572
// change/may cause UB if we potentially return poison. At the moment this
1573
// is implemented overly conservatively with a single-use check.
1574
// TODO: Update the single-use check to iterate through uses and only bail
1575
// if we have a potentially dangerous use.
1576
1577
if (CB.hasRetAttr(Attribute::NoUndef) ||
1578
(RetVal->hasOneUse() && !RetVal->hasRetAttr(Attribute::NoUndef)))
1579
NewAL = NewAL.addRetAttributes(Context, ValidPG);
1580
}
1581
NewRetVal->setAttributes(NewAL);
1582
}
1583
}
1584
1585
/// If the inlined function has non-byval align arguments, then
1586
/// add @llvm.assume-based alignment assumptions to preserve this information.
1587
static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
1588
if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
1589
return;
1590
1591
AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
1592
auto &DL = CB.getDataLayout();
1593
1594
// To avoid inserting redundant assumptions, we should check for assumptions
1595
// already in the caller. To do this, we might need a DT of the caller.
1596
DominatorTree DT;
1597
bool DTCalculated = false;
1598
1599
Function *CalledFunc = CB.getCalledFunction();
1600
for (Argument &Arg : CalledFunc->args()) {
1601
if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||
1602
Arg.hasNUses(0))
1603
continue;
1604
MaybeAlign Alignment = Arg.getParamAlign();
1605
if (!Alignment)
1606
continue;
1607
1608
if (!DTCalculated) {
1609
DT.recalculate(*CB.getCaller());
1610
DTCalculated = true;
1611
}
1612
// If we can already prove the asserted alignment in the context of the
1613
// caller, then don't bother inserting the assumption.
1614
Value *ArgVal = CB.getArgOperand(Arg.getArgNo());
1615
if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= *Alignment)
1616
continue;
1617
1618
CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
1619
DL, ArgVal, Alignment->value());
1620
AC->registerAssumption(cast<AssumeInst>(NewAsmp));
1621
}
1622
}
1623
1624
static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1625
Module *M, BasicBlock *InsertBlock,
1626
InlineFunctionInfo &IFI,
1627
Function *CalledFunc) {
1628
IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1629
1630
Value *Size =
1631
Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));
1632
1633
// Always generate a memcpy of alignment 1 here because we don't know
1634
// the alignment of the src pointer. Other optimizations can infer
1635
// better alignment.
1636
CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
1637
/*SrcAlign*/ Align(1), Size);
1638
1639
// The verifier requires that all calls of debug-info-bearing functions
1640
// from debug-info-bearing functions have a debug location (for inlining
1641
// purposes). Assign a dummy location to satisfy the constraint.
1642
if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
1643
if (DISubprogram *SP = CalledFunc->getSubprogram())
1644
CI->setDebugLoc(DILocation::get(SP->getContext(), 0, 0, SP));
1645
}
1646
1647
/// When inlining a call site that has a byval argument,
1648
/// we have to make the implicit memcpy explicit by adding it.
1649
static Value *HandleByValArgument(Type *ByValType, Value *Arg,
1650
Instruction *TheCall,
1651
const Function *CalledFunc,
1652
InlineFunctionInfo &IFI,
1653
MaybeAlign ByValAlignment) {
1654
Function *Caller = TheCall->getFunction();
1655
const DataLayout &DL = Caller->getDataLayout();
1656
1657
// If the called function is readonly, then it could not mutate the caller's
1658
// copy of the byval'd memory. In this case, it is safe to elide the copy and
1659
// temporary.
1660
if (CalledFunc->onlyReadsMemory()) {
1661
// If the byval argument has a specified alignment that is greater than the
1662
// passed in pointer, then we either have to round up the input pointer or
1663
// give up on this transformation.
1664
if (ByValAlignment.valueOrOne() == 1)
1665
return Arg;
1666
1667
AssumptionCache *AC =
1668
IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
1669
1670
// If the pointer is already known to be sufficiently aligned, or if we can
1671
// round it up to a larger alignment, then we don't need a temporary.
1672
if (getOrEnforceKnownAlignment(Arg, *ByValAlignment, DL, TheCall, AC) >=
1673
*ByValAlignment)
1674
return Arg;
1675
1676
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
1677
// for code quality, but rarely happens and is required for correctness.
1678
}
1679
1680
// Create the alloca. If we have DataLayout, use nice alignment.
1681
Align Alignment = DL.getPrefTypeAlign(ByValType);
1682
1683
// If the byval had an alignment specified, we *must* use at least that
1684
// alignment, as it is required by the byval argument (and uses of the
1685
// pointer inside the callee).
1686
if (ByValAlignment)
1687
Alignment = std::max(Alignment, *ByValAlignment);
1688
1689
AllocaInst *NewAlloca =
1690
new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(),
1691
nullptr, Alignment, Arg->getName());
1692
NewAlloca->insertBefore(Caller->begin()->begin());
1693
IFI.StaticAllocas.push_back(NewAlloca);
1694
1695
// Uses of the argument in the function should use our new alloca
1696
// instead.
1697
return NewAlloca;
1698
}
1699
1700
// Check whether this Value is used by a lifetime intrinsic.
1701
static bool isUsedByLifetimeMarker(Value *V) {
1702
for (User *U : V->users())
1703
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))
1704
if (II->isLifetimeStartOrEnd())
1705
return true;
1706
return false;
1707
}
1708
1709
// Check whether the given alloca already has
1710
// lifetime.start or lifetime.end intrinsics.
1711
static bool hasLifetimeMarkers(AllocaInst *AI) {
1712
Type *Ty = AI->getType();
1713
Type *Int8PtrTy =
1714
PointerType::get(Ty->getContext(), Ty->getPointerAddressSpace());
1715
if (Ty == Int8PtrTy)
1716
return isUsedByLifetimeMarker(AI);
1717
1718
// Do a scan to find all the casts to i8*.
1719
for (User *U : AI->users()) {
1720
if (U->getType() != Int8PtrTy) continue;
1721
if (U->stripPointerCasts() != AI) continue;
1722
if (isUsedByLifetimeMarker(U))
1723
return true;
1724
}
1725
return false;
1726
}
1727
1728
/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1729
/// block. Allocas used in inalloca calls and allocas of dynamic array size
1730
/// cannot be static.
1731
static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1732
return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
1733
}
1734
1735
/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1736
/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1737
static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1738
LLVMContext &Ctx,
1739
DenseMap<const MDNode *, MDNode *> &IANodes) {
1740
auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
1741
return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(),
1742
OrigDL.getScope(), IA);
1743
}
1744
1745
/// Update inlined instructions' line numbers to
1746
/// to encode location where these instructions are inlined.
1747
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
1748
Instruction *TheCall, bool CalleeHasDebugInfo) {
1749
const DebugLoc &TheCallDL = TheCall->getDebugLoc();
1750
if (!TheCallDL)
1751
return;
1752
1753
auto &Ctx = Fn->getContext();
1754
DILocation *InlinedAtNode = TheCallDL;
1755
1756
// Create a unique call site, not to be confused with any other call from the
1757
// same location.
1758
InlinedAtNode = DILocation::getDistinct(
1759
Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),
1760
InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());
1761
1762
// Cache the inlined-at nodes as they're built so they are reused, without
1763
// this every instruction's inlined-at chain would become distinct from each
1764
// other.
1765
DenseMap<const MDNode *, MDNode *> IANodes;
1766
1767
// Check if we are not generating inline line tables and want to use
1768
// the call site location instead.
1769
bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables");
1770
1771
// Helper-util for updating the metadata attached to an instruction.
1772
auto UpdateInst = [&](Instruction &I) {
1773
// Loop metadata needs to be updated so that the start and end locs
1774
// reference inlined-at locations.
1775
auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
1776
&IANodes](Metadata *MD) -> Metadata * {
1777
if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
1778
return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get();
1779
return MD;
1780
};
1781
updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
1782
1783
if (!NoInlineLineTables)
1784
if (DebugLoc DL = I.getDebugLoc()) {
1785
DebugLoc IDL =
1786
inlineDebugLoc(DL, InlinedAtNode, I.getContext(), IANodes);
1787
I.setDebugLoc(IDL);
1788
return;
1789
}
1790
1791
if (CalleeHasDebugInfo && !NoInlineLineTables)
1792
return;
1793
1794
// If the inlined instruction has no line number, or if inline info
1795
// is not being generated, make it look as if it originates from the call
1796
// location. This is important for ((__always_inline, __nodebug__))
1797
// functions which must use caller location for all instructions in their
1798
// function body.
1799
1800
// Don't update static allocas, as they may get moved later.
1801
if (auto *AI = dyn_cast<AllocaInst>(&I))
1802
if (allocaWouldBeStaticInEntry(AI))
1803
return;
1804
1805
// Do not force a debug loc for pseudo probes, since they do not need to
1806
// be debuggable, and also they are expected to have a zero/null dwarf
1807
// discriminator at this point which could be violated otherwise.
1808
if (isa<PseudoProbeInst>(I))
1809
return;
1810
1811
I.setDebugLoc(TheCallDL);
1812
};
1813
1814
// Helper-util for updating debug-info records attached to instructions.
1815
auto UpdateDVR = [&](DbgRecord *DVR) {
1816
assert(DVR->getDebugLoc() && "Debug Value must have debug loc");
1817
if (NoInlineLineTables) {
1818
DVR->setDebugLoc(TheCallDL);
1819
return;
1820
}
1821
DebugLoc DL = DVR->getDebugLoc();
1822
DebugLoc IDL =
1823
inlineDebugLoc(DL, InlinedAtNode,
1824
DVR->getMarker()->getParent()->getContext(), IANodes);
1825
DVR->setDebugLoc(IDL);
1826
};
1827
1828
// Iterate over all instructions, updating metadata and debug-info records.
1829
for (; FI != Fn->end(); ++FI) {
1830
for (Instruction &I : *FI) {
1831
UpdateInst(I);
1832
for (DbgRecord &DVR : I.getDbgRecordRange()) {
1833
UpdateDVR(&DVR);
1834
}
1835
}
1836
1837
// Remove debug info intrinsics if we're not keeping inline info.
1838
if (NoInlineLineTables) {
1839
BasicBlock::iterator BI = FI->begin();
1840
while (BI != FI->end()) {
1841
if (isa<DbgInfoIntrinsic>(BI)) {
1842
BI = BI->eraseFromParent();
1843
continue;
1844
} else {
1845
BI->dropDbgRecords();
1846
}
1847
++BI;
1848
}
1849
}
1850
}
1851
}
1852
1853
#undef DEBUG_TYPE
1854
#define DEBUG_TYPE "assignment-tracking"
1855
/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
1856
static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
1857
const CallBase &CB) {
1858
at::StorageToVarsMap EscapedLocals;
1859
SmallPtrSet<const Value *, 4> SeenBases;
1860
1861
LLVM_DEBUG(
1862
errs() << "# Finding caller local variables escaped by callee\n");
1863
for (const Value *Arg : CB.args()) {
1864
LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
1865
if (!Arg->getType()->isPointerTy()) {
1866
LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");
1867
continue;
1868
}
1869
1870
const Instruction *I = dyn_cast<Instruction>(Arg);
1871
if (!I) {
1872
LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");
1873
continue;
1874
}
1875
1876
// Walk back to the base storage.
1877
assert(Arg->getType()->isPtrOrPtrVectorTy());
1878
APInt TmpOffset(DL.getIndexTypeSizeInBits(Arg->getType()), 0, false);
1879
const AllocaInst *Base = dyn_cast<AllocaInst>(
1880
Arg->stripAndAccumulateConstantOffsets(DL, TmpOffset, true));
1881
if (!Base) {
1882
LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");
1883
continue;
1884
}
1885
1886
assert(Base);
1887
LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");
1888
// We only need to process each base address once - skip any duplicates.
1889
if (!SeenBases.insert(Base).second)
1890
continue;
1891
1892
// Find all local variables associated with the backing storage.
1893
auto CollectAssignsForStorage = [&](auto *DbgAssign) {
1894
// Skip variables from inlined functions - they are not local variables.
1895
if (DbgAssign->getDebugLoc().getInlinedAt())
1896
return;
1897
LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
1898
EscapedLocals[Base].insert(at::VarRecord(DbgAssign));
1899
};
1900
for_each(at::getAssignmentMarkers(Base), CollectAssignsForStorage);
1901
for_each(at::getDVRAssignmentMarkers(Base), CollectAssignsForStorage);
1902
}
1903
return EscapedLocals;
1904
}
1905
1906
static void trackInlinedStores(Function::iterator Start, Function::iterator End,
1907
const CallBase &CB) {
1908
LLVM_DEBUG(errs() << "trackInlinedStores into "
1909
<< Start->getParent()->getName() << " from "
1910
<< CB.getCalledFunction()->getName() << "\n");
1911
std::unique_ptr<DataLayout> DL = std::make_unique<DataLayout>(CB.getModule());
1912
at::trackAssignments(Start, End, collectEscapedLocals(*DL, CB), *DL);
1913
}
1914
1915
/// Update inlined instructions' DIAssignID metadata. We need to do this
1916
/// otherwise a function inlined more than once into the same function
1917
/// will cause DIAssignID to be shared by many instructions.
1918
static void fixupAssignments(Function::iterator Start, Function::iterator End) {
1919
DenseMap<DIAssignID *, DIAssignID *> Map;
1920
// Loop over all the inlined instructions. If we find a DIAssignID
1921
// attachment or use, replace it with a new version.
1922
for (auto BBI = Start; BBI != End; ++BBI) {
1923
for (Instruction &I : *BBI)
1924
at::remapAssignID(Map, I);
1925
}
1926
}
1927
#undef DEBUG_TYPE
1928
#define DEBUG_TYPE "inline-function"
1929
1930
/// Update the block frequencies of the caller after a callee has been inlined.
1931
///
1932
/// Each block cloned into the caller has its block frequency scaled by the
1933
/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
1934
/// callee's entry block gets the same frequency as the callsite block and the
1935
/// relative frequencies of all cloned blocks remain the same after cloning.
1936
static void updateCallerBFI(BasicBlock *CallSiteBlock,
1937
const ValueToValueMapTy &VMap,
1938
BlockFrequencyInfo *CallerBFI,
1939
BlockFrequencyInfo *CalleeBFI,
1940
const BasicBlock &CalleeEntryBlock) {
1941
SmallPtrSet<BasicBlock *, 16> ClonedBBs;
1942
for (auto Entry : VMap) {
1943
if (!isa<BasicBlock>(Entry.first) || !Entry.second)
1944
continue;
1945
auto *OrigBB = cast<BasicBlock>(Entry.first);
1946
auto *ClonedBB = cast<BasicBlock>(Entry.second);
1947
BlockFrequency Freq = CalleeBFI->getBlockFreq(OrigBB);
1948
if (!ClonedBBs.insert(ClonedBB).second) {
1949
// Multiple blocks in the callee might get mapped to one cloned block in
1950
// the caller since we prune the callee as we clone it. When that happens,
1951
// we want to use the maximum among the original blocks' frequencies.
1952
BlockFrequency NewFreq = CallerBFI->getBlockFreq(ClonedBB);
1953
if (NewFreq > Freq)
1954
Freq = NewFreq;
1955
}
1956
CallerBFI->setBlockFreq(ClonedBB, Freq);
1957
}
1958
BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
1959
CallerBFI->setBlockFreqAndScale(
1960
EntryClone, CallerBFI->getBlockFreq(CallSiteBlock), ClonedBBs);
1961
}
1962
1963
/// Update the branch metadata for cloned call instructions.
1964
static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
1965
const ProfileCount &CalleeEntryCount,
1966
const CallBase &TheCall, ProfileSummaryInfo *PSI,
1967
BlockFrequencyInfo *CallerBFI) {
1968
if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
1969
return;
1970
auto CallSiteCount =
1971
PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt;
1972
int64_t CallCount =
1973
std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount());
1974
updateProfileCallee(Callee, -CallCount, &VMap);
1975
}
1976
1977
void llvm::updateProfileCallee(
1978
Function *Callee, int64_t EntryDelta,
1979
const ValueMap<const Value *, WeakTrackingVH> *VMap) {
1980
auto CalleeCount = Callee->getEntryCount();
1981
if (!CalleeCount)
1982
return;
1983
1984
const uint64_t PriorEntryCount = CalleeCount->getCount();
1985
1986
// Since CallSiteCount is an estimate, it could exceed the original callee
1987
// count and has to be set to 0 so guard against underflow.
1988
const uint64_t NewEntryCount =
1989
(EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
1990
? 0
1991
: PriorEntryCount + EntryDelta;
1992
1993
auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,
1994
const uint64_t PriorEntryCount) {
1995
Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);
1996
if (VPtr)
1997
scaleProfData(*VPtr, NewEntryCount, PriorEntryCount);
1998
};
1999
2000
// During inlining ?
2001
if (VMap) {
2002
uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
2003
for (auto Entry : *VMap) {
2004
if (isa<CallInst>(Entry.first))
2005
if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) {
2006
CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
2007
updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount);
2008
}
2009
2010
if (isa<InvokeInst>(Entry.first))
2011
if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second)) {
2012
II->updateProfWeight(CloneEntryCount, PriorEntryCount);
2013
updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount);
2014
}
2015
}
2016
}
2017
2018
if (EntryDelta) {
2019
Callee->setEntryCount(NewEntryCount);
2020
2021
for (BasicBlock &BB : *Callee)
2022
// No need to update the callsite if it is pruned during inlining.
2023
if (!VMap || VMap->count(&BB))
2024
for (Instruction &I : BB) {
2025
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2026
CI->updateProfWeight(NewEntryCount, PriorEntryCount);
2027
updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount);
2028
}
2029
if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
2030
II->updateProfWeight(NewEntryCount, PriorEntryCount);
2031
updateVTableProfWeight(II, NewEntryCount, PriorEntryCount);
2032
}
2033
}
2034
}
2035
}
2036
2037
/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
2038
/// result is implicitly consumed by a call to retainRV or claimRV immediately
2039
/// after the call. This function inlines the retainRV/claimRV calls.
2040
///
2041
/// There are three cases to consider:
2042
///
2043
/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
2044
/// object in the callee return block, the autoreleaseRV call and the
2045
/// retainRV/claimRV call in the caller cancel out. If the call in the caller
2046
/// is a claimRV call, a call to objc_release is emitted.
2047
///
2048
/// 2. If there is a call in the callee return block that doesn't have operand
2049
/// bundle "clang.arc.attachedcall", the operand bundle on the original call
2050
/// is transferred to the call in the callee.
2051
///
2052
/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
2053
/// a retainRV call.
2054
static void
2055
inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
2056
const SmallVectorImpl<ReturnInst *> &Returns) {
2057
Module *Mod = CB.getModule();
2058
assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
2059
bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
2060
IsUnsafeClaimRV = !IsRetainRV;
2061
2062
for (auto *RI : Returns) {
2063
Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));
2064
bool InsertRetainCall = IsRetainRV;
2065
IRBuilder<> Builder(RI->getContext());
2066
2067
// Walk backwards through the basic block looking for either a matching
2068
// autoreleaseRV call or an unannotated call.
2069
auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()),
2070
RI->getParent()->rend());
2071
for (Instruction &I : llvm::make_early_inc_range(InstRange)) {
2072
// Ignore casts.
2073
if (isa<CastInst>(I))
2074
continue;
2075
2076
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
2077
if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
2078
!II->hasNUses(0) ||
2079
objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd)
2080
break;
2081
2082
// If we've found a matching authoreleaseRV call:
2083
// - If claimRV is attached to the call, insert a call to objc_release
2084
// and erase the autoreleaseRV call.
2085
// - If retainRV is attached to the call, just erase the autoreleaseRV
2086
// call.
2087
if (IsUnsafeClaimRV) {
2088
Builder.SetInsertPoint(II);
2089
Function *IFn =
2090
Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
2091
Builder.CreateCall(IFn, RetOpnd, "");
2092
}
2093
II->eraseFromParent();
2094
InsertRetainCall = false;
2095
break;
2096
}
2097
2098
auto *CI = dyn_cast<CallInst>(&I);
2099
2100
if (!CI)
2101
break;
2102
2103
if (objcarc::GetRCIdentityRoot(CI) != RetOpnd ||
2104
objcarc::hasAttachedCallOpBundle(CI))
2105
break;
2106
2107
// If we've found an unannotated call that defines RetOpnd, add a
2108
// "clang.arc.attachedcall" operand bundle.
2109
Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)};
2110
OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
2111
auto *NewCall = CallBase::addOperandBundle(
2112
CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI->getIterator());
2113
NewCall->copyMetadata(*CI);
2114
CI->replaceAllUsesWith(NewCall);
2115
CI->eraseFromParent();
2116
InsertRetainCall = false;
2117
break;
2118
}
2119
2120
if (InsertRetainCall) {
2121
// The retainRV is attached to the call and we've failed to find a
2122
// matching autoreleaseRV or an annotated call in the callee. Emit a call
2123
// to objc_retain.
2124
Builder.SetInsertPoint(RI);
2125
Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain);
2126
Builder.CreateCall(IFn, RetOpnd, "");
2127
}
2128
}
2129
}
2130
2131
/// This function inlines the called function into the basic block of the
2132
/// caller. This returns false if it is not possible to inline this call.
2133
/// The program is still in a well defined state if this occurs though.
2134
///
2135
/// Note that this only does one level of inlining. For example, if the
2136
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
2137
/// exists in the instruction stream. Similarly this will inline a recursive
2138
/// function by one level.
2139
llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2140
bool MergeAttributes,
2141
AAResults *CalleeAAR,
2142
bool InsertLifetime,
2143
Function *ForwardVarArgsTo) {
2144
assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
2145
2146
// FIXME: we don't inline callbr yet.
2147
if (isa<CallBrInst>(CB))
2148
return InlineResult::failure("We don't inline callbr yet.");
2149
2150
// If IFI has any state in it, zap it before we fill it in.
2151
IFI.reset();
2152
2153
Function *CalledFunc = CB.getCalledFunction();
2154
if (!CalledFunc || // Can't inline external function or indirect
2155
CalledFunc->isDeclaration()) // call!
2156
return InlineResult::failure("external or indirect");
2157
2158
// The inliner does not know how to inline through calls with operand bundles
2159
// in general ...
2160
Value *ConvergenceControlToken = nullptr;
2161
if (CB.hasOperandBundles()) {
2162
for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
2163
auto OBUse = CB.getOperandBundleAt(i);
2164
uint32_t Tag = OBUse.getTagID();
2165
// ... but it knows how to inline through "deopt" operand bundles ...
2166
if (Tag == LLVMContext::OB_deopt)
2167
continue;
2168
// ... and "funclet" operand bundles.
2169
if (Tag == LLVMContext::OB_funclet)
2170
continue;
2171
if (Tag == LLVMContext::OB_clang_arc_attachedcall)
2172
continue;
2173
if (Tag == LLVMContext::OB_kcfi)
2174
continue;
2175
if (Tag == LLVMContext::OB_convergencectrl) {
2176
ConvergenceControlToken = OBUse.Inputs[0].get();
2177
continue;
2178
}
2179
2180
return InlineResult::failure("unsupported operand bundle");
2181
}
2182
}
2183
2184
// FIXME: The check below is redundant and incomplete. According to spec, if a
2185
// convergent call is missing a token, then the caller is using uncontrolled
2186
// convergence. If the callee has an entry intrinsic, then the callee is using
2187
// controlled convergence, and the call cannot be inlined. A proper
2188
// implemenation of this check requires a whole new analysis that identifies
2189
// convergence in every function. For now, we skip that and just do this one
2190
// cursory check. The underlying assumption is that in a compiler flow that
2191
// fully implements convergence control tokens, there is no mixing of
2192
// controlled and uncontrolled convergent operations in the whole program.
2193
if (CB.isConvergent()) {
2194
auto *I = CalledFunc->getEntryBlock().getFirstNonPHI();
2195
if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) {
2196
if (IntrinsicCall->getIntrinsicID() ==
2197
Intrinsic::experimental_convergence_entry) {
2198
if (!ConvergenceControlToken) {
2199
return InlineResult::failure(
2200
"convergent call needs convergencectrl operand");
2201
}
2202
}
2203
}
2204
}
2205
2206
// If the call to the callee cannot throw, set the 'nounwind' flag on any
2207
// calls that we inline.
2208
bool MarkNoUnwind = CB.doesNotThrow();
2209
2210
BasicBlock *OrigBB = CB.getParent();
2211
Function *Caller = OrigBB->getParent();
2212
2213
// GC poses two hazards to inlining, which only occur when the callee has GC:
2214
// 1. If the caller has no GC, then the callee's GC must be propagated to the
2215
// caller.
2216
// 2. If the caller has a differing GC, it is invalid to inline.
2217
if (CalledFunc->hasGC()) {
2218
if (!Caller->hasGC())
2219
Caller->setGC(CalledFunc->getGC());
2220
else if (CalledFunc->getGC() != Caller->getGC())
2221
return InlineResult::failure("incompatible GC");
2222
}
2223
2224
// Get the personality function from the callee if it contains a landing pad.
2225
Constant *CalledPersonality =
2226
CalledFunc->hasPersonalityFn()
2227
? CalledFunc->getPersonalityFn()->stripPointerCasts()
2228
: nullptr;
2229
2230
// Find the personality function used by the landing pads of the caller. If it
2231
// exists, then check to see that it matches the personality function used in
2232
// the callee.
2233
Constant *CallerPersonality =
2234
Caller->hasPersonalityFn()
2235
? Caller->getPersonalityFn()->stripPointerCasts()
2236
: nullptr;
2237
if (CalledPersonality) {
2238
if (!CallerPersonality)
2239
Caller->setPersonalityFn(CalledPersonality);
2240
// If the personality functions match, then we can perform the
2241
// inlining. Otherwise, we can't inline.
2242
// TODO: This isn't 100% true. Some personality functions are proper
2243
// supersets of others and can be used in place of the other.
2244
else if (CalledPersonality != CallerPersonality)
2245
return InlineResult::failure("incompatible personality");
2246
}
2247
2248
// We need to figure out which funclet the callsite was in so that we may
2249
// properly nest the callee.
2250
Instruction *CallSiteEHPad = nullptr;
2251
if (CallerPersonality) {
2252
EHPersonality Personality = classifyEHPersonality(CallerPersonality);
2253
if (isScopedEHPersonality(Personality)) {
2254
std::optional<OperandBundleUse> ParentFunclet =
2255
CB.getOperandBundle(LLVMContext::OB_funclet);
2256
if (ParentFunclet)
2257
CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
2258
2259
// OK, the inlining site is legal. What about the target function?
2260
2261
if (CallSiteEHPad) {
2262
if (Personality == EHPersonality::MSVC_CXX) {
2263
// The MSVC personality cannot tolerate catches getting inlined into
2264
// cleanup funclets.
2265
if (isa<CleanupPadInst>(CallSiteEHPad)) {
2266
// Ok, the call site is within a cleanuppad. Let's check the callee
2267
// for catchpads.
2268
for (const BasicBlock &CalledBB : *CalledFunc) {
2269
if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
2270
return InlineResult::failure("catch in cleanup funclet");
2271
}
2272
}
2273
} else if (isAsynchronousEHPersonality(Personality)) {
2274
// SEH is even less tolerant, there may not be any sort of exceptional
2275
// funclet in the callee.
2276
for (const BasicBlock &CalledBB : *CalledFunc) {
2277
if (CalledBB.isEHPad())
2278
return InlineResult::failure("SEH in cleanup funclet");
2279
}
2280
}
2281
}
2282
}
2283
}
2284
2285
// Determine if we are dealing with a call in an EHPad which does not unwind
2286
// to caller.
2287
bool EHPadForCallUnwindsLocally = false;
2288
if (CallSiteEHPad && isa<CallInst>(CB)) {
2289
UnwindDestMemoTy FuncletUnwindMap;
2290
Value *CallSiteUnwindDestToken =
2291
getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);
2292
2293
EHPadForCallUnwindsLocally =
2294
CallSiteUnwindDestToken &&
2295
!isa<ConstantTokenNone>(CallSiteUnwindDestToken);
2296
}
2297
2298
// Get an iterator to the last basic block in the function, which will have
2299
// the new function inlined after it.
2300
Function::iterator LastBlock = --Caller->end();
2301
2302
// Make sure to capture all of the return instructions from the cloned
2303
// function.
2304
SmallVector<ReturnInst*, 8> Returns;
2305
ClonedCodeInfo InlinedFunctionInfo;
2306
Function::iterator FirstNewBlock;
2307
2308
{ // Scope to destroy VMap after cloning.
2309
ValueToValueMapTy VMap;
2310
struct ByValInit {
2311
Value *Dst;
2312
Value *Src;
2313
Type *Ty;
2314
};
2315
// Keep a list of pair (dst, src) to emit byval initializations.
2316
SmallVector<ByValInit, 4> ByValInits;
2317
2318
// When inlining a function that contains noalias scope metadata,
2319
// this metadata needs to be cloned so that the inlined blocks
2320
// have different "unique scopes" at every call site.
2321
// Track the metadata that must be cloned. Do this before other changes to
2322
// the function, so that we do not get in trouble when inlining caller ==
2323
// callee.
2324
ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
2325
2326
auto &DL = Caller->getDataLayout();
2327
2328
// Calculate the vector of arguments to pass into the function cloner, which
2329
// matches up the formal to the actual argument values.
2330
auto AI = CB.arg_begin();
2331
unsigned ArgNo = 0;
2332
for (Function::arg_iterator I = CalledFunc->arg_begin(),
2333
E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
2334
Value *ActualArg = *AI;
2335
2336
// When byval arguments actually inlined, we need to make the copy implied
2337
// by them explicit. However, we don't do this if the callee is readonly
2338
// or readnone, because the copy would be unneeded: the callee doesn't
2339
// modify the struct.
2340
if (CB.isByValArgument(ArgNo)) {
2341
ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg,
2342
&CB, CalledFunc, IFI,
2343
CalledFunc->getParamAlign(ArgNo));
2344
if (ActualArg != *AI)
2345
ByValInits.push_back(
2346
{ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});
2347
}
2348
2349
VMap[&*I] = ActualArg;
2350
}
2351
2352
// TODO: Remove this when users have been updated to the assume bundles.
2353
// Add alignment assumptions if necessary. We do this before the inlined
2354
// instructions are actually cloned into the caller so that we can easily
2355
// check what will be known at the start of the inlined code.
2356
AddAlignmentAssumptions(CB, IFI);
2357
2358
AssumptionCache *AC =
2359
IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
2360
2361
/// Preserve all attributes on of the call and its parameters.
2362
salvageKnowledge(&CB, AC);
2363
2364
// We want the inliner to prune the code as it copies. We would LOVE to
2365
// have no dead or constant instructions leftover after inlining occurs
2366
// (which can happen, e.g., because an argument was constant), but we'll be
2367
// happy with whatever the cloner can do.
2368
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
2369
/*ModuleLevelChanges=*/false, Returns, ".i",
2370
&InlinedFunctionInfo);
2371
// Remember the first block that is newly cloned over.
2372
FirstNewBlock = LastBlock; ++FirstNewBlock;
2373
2374
// Insert retainRV/clainRV runtime calls.
2375
objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB);
2376
if (RVCallKind != objcarc::ARCInstKind::None)
2377
inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
2378
2379
// Updated caller/callee profiles only when requested. For sample loader
2380
// inlining, the context-sensitive inlinee profile doesn't need to be
2381
// subtracted from callee profile, and the inlined clone also doesn't need
2382
// to be scaled based on call site count.
2383
if (IFI.UpdateProfile) {
2384
if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
2385
// Update the BFI of blocks cloned into the caller.
2386
updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
2387
CalledFunc->front());
2388
2389
if (auto Profile = CalledFunc->getEntryCount())
2390
updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI,
2391
IFI.CallerBFI);
2392
}
2393
2394
// Inject byval arguments initialization.
2395
for (ByValInit &Init : ByValInits)
2396
HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
2397
&*FirstNewBlock, IFI, CalledFunc);
2398
2399
std::optional<OperandBundleUse> ParentDeopt =
2400
CB.getOperandBundle(LLVMContext::OB_deopt);
2401
if (ParentDeopt) {
2402
SmallVector<OperandBundleDef, 2> OpDefs;
2403
2404
for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
2405
CallBase *ICS = dyn_cast_or_null<CallBase>(VH);
2406
if (!ICS)
2407
continue; // instruction was DCE'd or RAUW'ed to undef
2408
2409
OpDefs.clear();
2410
2411
OpDefs.reserve(ICS->getNumOperandBundles());
2412
2413
for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
2414
++COBi) {
2415
auto ChildOB = ICS->getOperandBundleAt(COBi);
2416
if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
2417
// If the inlined call has other operand bundles, let them be
2418
OpDefs.emplace_back(ChildOB);
2419
continue;
2420
}
2421
2422
// It may be useful to separate this logic (of handling operand
2423
// bundles) out to a separate "policy" component if this gets crowded.
2424
// Prepend the parent's deoptimization continuation to the newly
2425
// inlined call's deoptimization continuation.
2426
std::vector<Value *> MergedDeoptArgs;
2427
MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
2428
ChildOB.Inputs.size());
2429
2430
llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs);
2431
llvm::append_range(MergedDeoptArgs, ChildOB.Inputs);
2432
2433
OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
2434
}
2435
2436
Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS->getIterator());
2437
2438
// Note: the RAUW does the appropriate fixup in VMap, so we need to do
2439
// this even if the call returns void.
2440
ICS->replaceAllUsesWith(NewI);
2441
2442
VH = nullptr;
2443
ICS->eraseFromParent();
2444
}
2445
}
2446
2447
// For 'nodebug' functions, the associated DISubprogram is always null.
2448
// Conservatively avoid propagating the callsite debug location to
2449
// instructions inlined from a function whose DISubprogram is not null.
2450
fixupLineNumbers(Caller, FirstNewBlock, &CB,
2451
CalledFunc->getSubprogram() != nullptr);
2452
2453
if (isAssignmentTrackingEnabled(*Caller->getParent())) {
2454
// Interpret inlined stores to caller-local variables as assignments.
2455
trackInlinedStores(FirstNewBlock, Caller->end(), CB);
2456
2457
// Update DIAssignID metadata attachments and uses so that they are
2458
// unique to this inlined instance.
2459
fixupAssignments(FirstNewBlock, Caller->end());
2460
}
2461
2462
// Now clone the inlined noalias scope metadata.
2463
SAMetadataCloner.clone();
2464
SAMetadataCloner.remap(FirstNewBlock, Caller->end());
2465
2466
// Add noalias metadata if necessary.
2467
AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2468
2469
// Clone return attributes on the callsite into the calls within the inlined
2470
// function which feed into its return value.
2471
AddReturnAttributes(CB, VMap, InlinedFunctionInfo);
2472
2473
// Clone attributes on the params of the callsite to calls within the
2474
// inlined function which use the same param.
2475
AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
2476
2477
propagateMemProfMetadata(CalledFunc, CB,
2478
InlinedFunctionInfo.ContainsMemProfMetadata, VMap);
2479
2480
// Propagate metadata on the callsite if necessary.
2481
PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());
2482
2483
// Register any cloned assumptions.
2484
if (IFI.GetAssumptionCache)
2485
for (BasicBlock &NewBlock :
2486
make_range(FirstNewBlock->getIterator(), Caller->end()))
2487
for (Instruction &I : NewBlock)
2488
if (auto *II = dyn_cast<AssumeInst>(&I))
2489
IFI.GetAssumptionCache(*Caller).registerAssumption(II);
2490
}
2491
2492
if (ConvergenceControlToken) {
2493
auto *I = FirstNewBlock->getFirstNonPHI();
2494
if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) {
2495
if (IntrinsicCall->getIntrinsicID() ==
2496
Intrinsic::experimental_convergence_entry) {
2497
IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken);
2498
IntrinsicCall->eraseFromParent();
2499
}
2500
}
2501
}
2502
2503
// If there are any alloca instructions in the block that used to be the entry
2504
// block for the callee, move them to the entry block of the caller. First
2505
// calculate which instruction they should be inserted before. We insert the
2506
// instructions at the end of the current alloca list.
2507
{
2508
BasicBlock::iterator InsertPoint = Caller->begin()->begin();
2509
for (BasicBlock::iterator I = FirstNewBlock->begin(),
2510
E = FirstNewBlock->end(); I != E; ) {
2511
AllocaInst *AI = dyn_cast<AllocaInst>(I++);
2512
if (!AI) continue;
2513
2514
// If the alloca is now dead, remove it. This often occurs due to code
2515
// specialization.
2516
if (AI->use_empty()) {
2517
AI->eraseFromParent();
2518
continue;
2519
}
2520
2521
if (!allocaWouldBeStaticInEntry(AI))
2522
continue;
2523
2524
// Keep track of the static allocas that we inline into the caller.
2525
IFI.StaticAllocas.push_back(AI);
2526
2527
// Scan for the block of allocas that we can move over, and move them
2528
// all at once.
2529
while (isa<AllocaInst>(I) &&
2530
!cast<AllocaInst>(I)->use_empty() &&
2531
allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
2532
IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
2533
++I;
2534
}
2535
2536
// Transfer all of the allocas over in a block. Using splice means
2537
// that the instructions aren't removed from the symbol table, then
2538
// reinserted.
2539
I.setTailBit(true);
2540
Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock,
2541
AI->getIterator(), I);
2542
}
2543
}
2544
2545
SmallVector<Value*,4> VarArgsToForward;
2546
SmallVector<AttributeSet, 4> VarArgsAttrs;
2547
for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
2548
i < CB.arg_size(); i++) {
2549
VarArgsToForward.push_back(CB.getArgOperand(i));
2550
VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i));
2551
}
2552
2553
bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
2554
if (InlinedFunctionInfo.ContainsCalls) {
2555
CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
2556
if (CallInst *CI = dyn_cast<CallInst>(&CB))
2557
CallSiteTailKind = CI->getTailCallKind();
2558
2559
// For inlining purposes, the "notail" marker is the same as no marker.
2560
if (CallSiteTailKind == CallInst::TCK_NoTail)
2561
CallSiteTailKind = CallInst::TCK_None;
2562
2563
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
2564
++BB) {
2565
for (Instruction &I : llvm::make_early_inc_range(*BB)) {
2566
CallInst *CI = dyn_cast<CallInst>(&I);
2567
if (!CI)
2568
continue;
2569
2570
// Forward varargs from inlined call site to calls to the
2571
// ForwardVarArgsTo function, if requested, and to musttail calls.
2572
if (!VarArgsToForward.empty() &&
2573
((ForwardVarArgsTo &&
2574
CI->getCalledFunction() == ForwardVarArgsTo) ||
2575
CI->isMustTailCall())) {
2576
// Collect attributes for non-vararg parameters.
2577
AttributeList Attrs = CI->getAttributes();
2578
SmallVector<AttributeSet, 8> ArgAttrs;
2579
if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
2580
for (unsigned ArgNo = 0;
2581
ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
2582
ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
2583
}
2584
2585
// Add VarArg attributes.
2586
ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());
2587
Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(),
2588
Attrs.getRetAttrs(), ArgAttrs);
2589
// Add VarArgs to existing parameters.
2590
SmallVector<Value *, 6> Params(CI->args());
2591
Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
2592
CallInst *NewCI = CallInst::Create(
2593
CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI->getIterator());
2594
NewCI->setDebugLoc(CI->getDebugLoc());
2595
NewCI->setAttributes(Attrs);
2596
NewCI->setCallingConv(CI->getCallingConv());
2597
CI->replaceAllUsesWith(NewCI);
2598
CI->eraseFromParent();
2599
CI = NewCI;
2600
}
2601
2602
if (Function *F = CI->getCalledFunction())
2603
InlinedDeoptimizeCalls |=
2604
F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
2605
2606
// We need to reduce the strength of any inlined tail calls. For
2607
// musttail, we have to avoid introducing potential unbounded stack
2608
// growth. For example, if functions 'f' and 'g' are mutually recursive
2609
// with musttail, we can inline 'g' into 'f' so long as we preserve
2610
// musttail on the cloned call to 'f'. If either the inlined call site
2611
// or the cloned call site is *not* musttail, the program already has
2612
// one frame of stack growth, so it's safe to remove musttail. Here is
2613
// a table of example transformations:
2614
//
2615
// f -> musttail g -> musttail f ==> f -> musttail f
2616
// f -> musttail g -> tail f ==> f -> tail f
2617
// f -> g -> musttail f ==> f -> f
2618
// f -> g -> tail f ==> f -> f
2619
//
2620
// Inlined notail calls should remain notail calls.
2621
CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
2622
if (ChildTCK != CallInst::TCK_NoTail)
2623
ChildTCK = std::min(CallSiteTailKind, ChildTCK);
2624
CI->setTailCallKind(ChildTCK);
2625
InlinedMustTailCalls |= CI->isMustTailCall();
2626
2627
// Call sites inlined through a 'nounwind' call site should be
2628
// 'nounwind' as well. However, avoid marking call sites explicitly
2629
// where possible. This helps expose more opportunities for CSE after
2630
// inlining, commonly when the callee is an intrinsic.
2631
if (MarkNoUnwind && !CI->doesNotThrow())
2632
CI->setDoesNotThrow();
2633
}
2634
}
2635
}
2636
2637
// Leave lifetime markers for the static alloca's, scoping them to the
2638
// function we just inlined.
2639
// We need to insert lifetime intrinsics even at O0 to avoid invalid
2640
// access caused by multithreaded coroutines. The check
2641
// `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
2642
if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
2643
!IFI.StaticAllocas.empty()) {
2644
IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
2645
for (AllocaInst *AI : IFI.StaticAllocas) {
2646
// Don't mark swifterror allocas. They can't have bitcast uses.
2647
if (AI->isSwiftError())
2648
continue;
2649
2650
// If the alloca is already scoped to something smaller than the whole
2651
// function then there's no need to add redundant, less accurate markers.
2652
if (hasLifetimeMarkers(AI))
2653
continue;
2654
2655
// Try to determine the size of the allocation.
2656
ConstantInt *AllocaSize = nullptr;
2657
if (ConstantInt *AIArraySize =
2658
dyn_cast<ConstantInt>(AI->getArraySize())) {
2659
auto &DL = Caller->getDataLayout();
2660
Type *AllocaType = AI->getAllocatedType();
2661
TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
2662
uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
2663
2664
// Don't add markers for zero-sized allocas.
2665
if (AllocaArraySize == 0)
2666
continue;
2667
2668
// Check that array size doesn't saturate uint64_t and doesn't
2669
// overflow when it's multiplied by type size.
2670
if (!AllocaTypeSize.isScalable() &&
2671
AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
2672
std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
2673
AllocaTypeSize.getFixedValue()) {
2674
AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
2675
AllocaArraySize * AllocaTypeSize);
2676
}
2677
}
2678
2679
builder.CreateLifetimeStart(AI, AllocaSize);
2680
for (ReturnInst *RI : Returns) {
2681
// Don't insert llvm.lifetime.end calls between a musttail or deoptimize
2682
// call and a return. The return kills all local allocas.
2683
if (InlinedMustTailCalls &&
2684
RI->getParent()->getTerminatingMustTailCall())
2685
continue;
2686
if (InlinedDeoptimizeCalls &&
2687
RI->getParent()->getTerminatingDeoptimizeCall())
2688
continue;
2689
IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
2690
}
2691
}
2692
}
2693
2694
// If the inlined code contained dynamic alloca instructions, wrap the inlined
2695
// code with llvm.stacksave/llvm.stackrestore intrinsics.
2696
if (InlinedFunctionInfo.ContainsDynamicAllocas) {
2697
// Insert the llvm.stacksave.
2698
CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
2699
.CreateStackSave("savedstack");
2700
2701
// Insert a call to llvm.stackrestore before any return instructions in the
2702
// inlined function.
2703
for (ReturnInst *RI : Returns) {
2704
// Don't insert llvm.stackrestore calls between a musttail or deoptimize
2705
// call and a return. The return will restore the stack pointer.
2706
if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
2707
continue;
2708
if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
2709
continue;
2710
IRBuilder<>(RI).CreateStackRestore(SavedPtr);
2711
}
2712
}
2713
2714
// If we are inlining for an invoke instruction, we must make sure to rewrite
2715
// any call instructions into invoke instructions. This is sensitive to which
2716
// funclet pads were top-level in the inlinee, so must be done before
2717
// rewriting the "parent pad" links.
2718
if (auto *II = dyn_cast<InvokeInst>(&CB)) {
2719
BasicBlock *UnwindDest = II->getUnwindDest();
2720
Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
2721
if (isa<LandingPadInst>(FirstNonPHI)) {
2722
HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
2723
} else {
2724
HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
2725
}
2726
}
2727
2728
// Update the lexical scopes of the new funclets and callsites.
2729
// Anything that had 'none' as its parent is now nested inside the callsite's
2730
// EHPad.
2731
if (CallSiteEHPad) {
2732
for (Function::iterator BB = FirstNewBlock->getIterator(),
2733
E = Caller->end();
2734
BB != E; ++BB) {
2735
// Add bundle operands to inlined call sites.
2736
PropagateOperandBundles(BB, CallSiteEHPad);
2737
2738
// It is problematic if the inlinee has a cleanupret which unwinds to
2739
// caller and we inline it into a call site which doesn't unwind but into
2740
// an EH pad that does. Such an edge must be dynamically unreachable.
2741
// As such, we replace the cleanupret with unreachable.
2742
if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))
2743
if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
2744
changeToUnreachable(CleanupRet);
2745
2746
Instruction *I = BB->getFirstNonPHI();
2747
if (!I->isEHPad())
2748
continue;
2749
2750
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
2751
if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
2752
CatchSwitch->setParentPad(CallSiteEHPad);
2753
} else {
2754
auto *FPI = cast<FuncletPadInst>(I);
2755
if (isa<ConstantTokenNone>(FPI->getParentPad()))
2756
FPI->setParentPad(CallSiteEHPad);
2757
}
2758
}
2759
}
2760
2761
if (InlinedDeoptimizeCalls) {
2762
// We need to at least remove the deoptimizing returns from the Return set,
2763
// so that the control flow from those returns does not get merged into the
2764
// caller (but terminate it instead). If the caller's return type does not
2765
// match the callee's return type, we also need to change the return type of
2766
// the intrinsic.
2767
if (Caller->getReturnType() == CB.getType()) {
2768
llvm::erase_if(Returns, [](ReturnInst *RI) {
2769
return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
2770
});
2771
} else {
2772
SmallVector<ReturnInst *, 8> NormalReturns;
2773
Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
2774
Caller->getParent(), Intrinsic::experimental_deoptimize,
2775
{Caller->getReturnType()});
2776
2777
for (ReturnInst *RI : Returns) {
2778
CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
2779
if (!DeoptCall) {
2780
NormalReturns.push_back(RI);
2781
continue;
2782
}
2783
2784
// The calling convention on the deoptimize call itself may be bogus,
2785
// since the code we're inlining may have undefined behavior (and may
2786
// never actually execute at runtime); but all
2787
// @llvm.experimental.deoptimize declarations have to have the same
2788
// calling convention in a well-formed module.
2789
auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
2790
NewDeoptIntrinsic->setCallingConv(CallingConv);
2791
auto *CurBB = RI->getParent();
2792
RI->eraseFromParent();
2793
2794
SmallVector<Value *, 4> CallArgs(DeoptCall->args());
2795
2796
SmallVector<OperandBundleDef, 1> OpBundles;
2797
DeoptCall->getOperandBundlesAsDefs(OpBundles);
2798
auto DeoptAttributes = DeoptCall->getAttributes();
2799
DeoptCall->eraseFromParent();
2800
assert(!OpBundles.empty() &&
2801
"Expected at least the deopt operand bundle");
2802
2803
IRBuilder<> Builder(CurBB);
2804
CallInst *NewDeoptCall =
2805
Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);
2806
NewDeoptCall->setCallingConv(CallingConv);
2807
NewDeoptCall->setAttributes(DeoptAttributes);
2808
if (NewDeoptCall->getType()->isVoidTy())
2809
Builder.CreateRetVoid();
2810
else
2811
Builder.CreateRet(NewDeoptCall);
2812
// Since the ret type is changed, remove the incompatible attributes.
2813
NewDeoptCall->removeRetAttrs(
2814
AttributeFuncs::typeIncompatible(NewDeoptCall->getType()));
2815
}
2816
2817
// Leave behind the normal returns so we can merge control flow.
2818
std::swap(Returns, NormalReturns);
2819
}
2820
}
2821
2822
// Handle any inlined musttail call sites. In order for a new call site to be
2823
// musttail, the source of the clone and the inlined call site must have been
2824
// musttail. Therefore it's safe to return without merging control into the
2825
// phi below.
2826
if (InlinedMustTailCalls) {
2827
// Check if we need to bitcast the result of any musttail calls.
2828
Type *NewRetTy = Caller->getReturnType();
2829
bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
2830
2831
// Handle the returns preceded by musttail calls separately.
2832
SmallVector<ReturnInst *, 8> NormalReturns;
2833
for (ReturnInst *RI : Returns) {
2834
CallInst *ReturnedMustTail =
2835
RI->getParent()->getTerminatingMustTailCall();
2836
if (!ReturnedMustTail) {
2837
NormalReturns.push_back(RI);
2838
continue;
2839
}
2840
if (!NeedBitCast)
2841
continue;
2842
2843
// Delete the old return and any preceding bitcast.
2844
BasicBlock *CurBB = RI->getParent();
2845
auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
2846
RI->eraseFromParent();
2847
if (OldCast)
2848
OldCast->eraseFromParent();
2849
2850
// Insert a new bitcast and return with the right type.
2851
IRBuilder<> Builder(CurBB);
2852
Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
2853
}
2854
2855
// Leave behind the normal returns so we can merge control flow.
2856
std::swap(Returns, NormalReturns);
2857
}
2858
2859
// Now that all of the transforms on the inlined code have taken place but
2860
// before we splice the inlined code into the CFG and lose track of which
2861
// blocks were actually inlined, collect the call sites. We only do this if
2862
// call graph updates weren't requested, as those provide value handle based
2863
// tracking of inlined call sites instead. Calls to intrinsics are not
2864
// collected because they are not inlineable.
2865
if (InlinedFunctionInfo.ContainsCalls) {
2866
// Otherwise just collect the raw call sites that were inlined.
2867
for (BasicBlock &NewBB :
2868
make_range(FirstNewBlock->getIterator(), Caller->end()))
2869
for (Instruction &I : NewBB)
2870
if (auto *CB = dyn_cast<CallBase>(&I))
2871
if (!(CB->getCalledFunction() &&
2872
CB->getCalledFunction()->isIntrinsic()))
2873
IFI.InlinedCallSites.push_back(CB);
2874
}
2875
2876
// If we cloned in _exactly one_ basic block, and if that block ends in a
2877
// return instruction, we splice the body of the inlined callee directly into
2878
// the calling basic block.
2879
if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
2880
// Move all of the instructions right before the call.
2881
OrigBB->splice(CB.getIterator(), &*FirstNewBlock, FirstNewBlock->begin(),
2882
FirstNewBlock->end());
2883
// Remove the cloned basic block.
2884
Caller->back().eraseFromParent();
2885
2886
// If the call site was an invoke instruction, add a branch to the normal
2887
// destination.
2888
if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
2889
BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), CB.getIterator());
2890
NewBr->setDebugLoc(Returns[0]->getDebugLoc());
2891
}
2892
2893
// If the return instruction returned a value, replace uses of the call with
2894
// uses of the returned value.
2895
if (!CB.use_empty()) {
2896
ReturnInst *R = Returns[0];
2897
if (&CB == R->getReturnValue())
2898
CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
2899
else
2900
CB.replaceAllUsesWith(R->getReturnValue());
2901
}
2902
// Since we are now done with the Call/Invoke, we can delete it.
2903
CB.eraseFromParent();
2904
2905
// Since we are now done with the return instruction, delete it also.
2906
Returns[0]->eraseFromParent();
2907
2908
if (MergeAttributes)
2909
AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc);
2910
2911
// We are now done with the inlining.
2912
return InlineResult::success();
2913
}
2914
2915
// Otherwise, we have the normal case, of more than one block to inline or
2916
// multiple return sites.
2917
2918
// We want to clone the entire callee function into the hole between the
2919
// "starter" and "ender" blocks. How we accomplish this depends on whether
2920
// this is an invoke instruction or a call instruction.
2921
BasicBlock *AfterCallBB;
2922
BranchInst *CreatedBranchToNormalDest = nullptr;
2923
if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
2924
2925
// Add an unconditional branch to make this look like the CallInst case...
2926
CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), CB.getIterator());
2927
2928
// Split the basic block. This guarantees that no PHI nodes will have to be
2929
// updated due to new incoming edges, and make the invoke case more
2930
// symmetric to the call case.
2931
AfterCallBB =
2932
OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
2933
CalledFunc->getName() + ".exit");
2934
2935
} else { // It's a call
2936
// If this is a call instruction, we need to split the basic block that
2937
// the call lives in.
2938
//
2939
AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(),
2940
CalledFunc->getName() + ".exit");
2941
}
2942
2943
if (IFI.CallerBFI) {
2944
// Copy original BB's block frequency to AfterCallBB
2945
IFI.CallerBFI->setBlockFreq(AfterCallBB,
2946
IFI.CallerBFI->getBlockFreq(OrigBB));
2947
}
2948
2949
// Change the branch that used to go to AfterCallBB to branch to the first
2950
// basic block of the inlined function.
2951
//
2952
Instruction *Br = OrigBB->getTerminator();
2953
assert(Br && Br->getOpcode() == Instruction::Br &&
2954
"splitBasicBlock broken!");
2955
Br->setOperand(0, &*FirstNewBlock);
2956
2957
// Now that the function is correct, make it a little bit nicer. In
2958
// particular, move the basic blocks inserted from the end of the function
2959
// into the space made by splitting the source basic block.
2960
Caller->splice(AfterCallBB->getIterator(), Caller, FirstNewBlock,
2961
Caller->end());
2962
2963
// Handle all of the return instructions that we just cloned in, and eliminate
2964
// any users of the original call/invoke instruction.
2965
Type *RTy = CalledFunc->getReturnType();
2966
2967
PHINode *PHI = nullptr;
2968
if (Returns.size() > 1) {
2969
// The PHI node should go at the front of the new basic block to merge all
2970
// possible incoming values.
2971
if (!CB.use_empty()) {
2972
PHI = PHINode::Create(RTy, Returns.size(), CB.getName());
2973
PHI->insertBefore(AfterCallBB->begin());
2974
// Anything that used the result of the function call should now use the
2975
// PHI node as their operand.
2976
CB.replaceAllUsesWith(PHI);
2977
}
2978
2979
// Loop over all of the return instructions adding entries to the PHI node
2980
// as appropriate.
2981
if (PHI) {
2982
for (ReturnInst *RI : Returns) {
2983
assert(RI->getReturnValue()->getType() == PHI->getType() &&
2984
"Ret value not consistent in function!");
2985
PHI->addIncoming(RI->getReturnValue(), RI->getParent());
2986
}
2987
}
2988
2989
// Add a branch to the merge points and remove return instructions.
2990
DebugLoc Loc;
2991
for (ReturnInst *RI : Returns) {
2992
BranchInst *BI = BranchInst::Create(AfterCallBB, RI->getIterator());
2993
Loc = RI->getDebugLoc();
2994
BI->setDebugLoc(Loc);
2995
RI->eraseFromParent();
2996
}
2997
// We need to set the debug location to *somewhere* inside the
2998
// inlined function. The line number may be nonsensical, but the
2999
// instruction will at least be associated with the right
3000
// function.
3001
if (CreatedBranchToNormalDest)
3002
CreatedBranchToNormalDest->setDebugLoc(Loc);
3003
} else if (!Returns.empty()) {
3004
// Otherwise, if there is exactly one return value, just replace anything
3005
// using the return value of the call with the computed value.
3006
if (!CB.use_empty()) {
3007
if (&CB == Returns[0]->getReturnValue())
3008
CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
3009
else
3010
CB.replaceAllUsesWith(Returns[0]->getReturnValue());
3011
}
3012
3013
// Update PHI nodes that use the ReturnBB to use the AfterCallBB.
3014
BasicBlock *ReturnBB = Returns[0]->getParent();
3015
ReturnBB->replaceAllUsesWith(AfterCallBB);
3016
3017
// Splice the code from the return block into the block that it will return
3018
// to, which contains the code that was after the call.
3019
AfterCallBB->splice(AfterCallBB->begin(), ReturnBB);
3020
3021
if (CreatedBranchToNormalDest)
3022
CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
3023
3024
// Delete the return instruction now and empty ReturnBB now.
3025
Returns[0]->eraseFromParent();
3026
ReturnBB->eraseFromParent();
3027
} else if (!CB.use_empty()) {
3028
// No returns, but something is using the return value of the call. Just
3029
// nuke the result.
3030
CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
3031
}
3032
3033
// Since we are now done with the Call/Invoke, we can delete it.
3034
CB.eraseFromParent();
3035
3036
// If we inlined any musttail calls and the original return is now
3037
// unreachable, delete it. It can only contain a bitcast and ret.
3038
if (InlinedMustTailCalls && pred_empty(AfterCallBB))
3039
AfterCallBB->eraseFromParent();
3040
3041
// We should always be able to fold the entry block of the function into the
3042
// single predecessor of the block...
3043
assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
3044
BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
3045
3046
// Splice the code entry block into calling block, right before the
3047
// unconditional branch.
3048
CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes
3049
OrigBB->splice(Br->getIterator(), CalleeEntry);
3050
3051
// Remove the unconditional branch.
3052
Br->eraseFromParent();
3053
3054
// Now we can remove the CalleeEntry block, which is now empty.
3055
CalleeEntry->eraseFromParent();
3056
3057
// If we inserted a phi node, check to see if it has a single value (e.g. all
3058
// the entries are the same or undef). If so, remove the PHI so it doesn't
3059
// block other optimizations.
3060
if (PHI) {
3061
AssumptionCache *AC =
3062
IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
3063
auto &DL = Caller->getDataLayout();
3064
if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {
3065
PHI->replaceAllUsesWith(V);
3066
PHI->eraseFromParent();
3067
}
3068
}
3069
3070
if (MergeAttributes)
3071
AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc);
3072
3073
return InlineResult::success();
3074
}
3075
3076