Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
35266 views
1
//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10
// It also builds the data structures and initialization code needed for
11
// updating execution counts and emitting the profile at runtime.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16
#include "llvm/ADT/ArrayRef.h"
17
#include "llvm/ADT/STLExtras.h"
18
#include "llvm/ADT/SmallVector.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/ADT/Twine.h"
21
#include "llvm/Analysis/BlockFrequencyInfo.h"
22
#include "llvm/Analysis/BranchProbabilityInfo.h"
23
#include "llvm/Analysis/LoopInfo.h"
24
#include "llvm/Analysis/TargetLibraryInfo.h"
25
#include "llvm/IR/Attributes.h"
26
#include "llvm/IR/BasicBlock.h"
27
#include "llvm/IR/CFG.h"
28
#include "llvm/IR/Constant.h"
29
#include "llvm/IR/Constants.h"
30
#include "llvm/IR/DIBuilder.h"
31
#include "llvm/IR/DerivedTypes.h"
32
#include "llvm/IR/DiagnosticInfo.h"
33
#include "llvm/IR/Dominators.h"
34
#include "llvm/IR/Function.h"
35
#include "llvm/IR/GlobalValue.h"
36
#include "llvm/IR/GlobalVariable.h"
37
#include "llvm/IR/IRBuilder.h"
38
#include "llvm/IR/Instruction.h"
39
#include "llvm/IR/Instructions.h"
40
#include "llvm/IR/IntrinsicInst.h"
41
#include "llvm/IR/MDBuilder.h"
42
#include "llvm/IR/Module.h"
43
#include "llvm/IR/Type.h"
44
#include "llvm/InitializePasses.h"
45
#include "llvm/Pass.h"
46
#include "llvm/ProfileData/InstrProf.h"
47
#include "llvm/ProfileData/InstrProfCorrelator.h"
48
#include "llvm/Support/Casting.h"
49
#include "llvm/Support/CommandLine.h"
50
#include "llvm/Support/Error.h"
51
#include "llvm/Support/ErrorHandling.h"
52
#include "llvm/TargetParser/Triple.h"
53
#include "llvm/Transforms/Instrumentation.h"
54
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
55
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
56
#include "llvm/Transforms/Utils/ModuleUtils.h"
57
#include "llvm/Transforms/Utils/SSAUpdater.h"
58
#include <algorithm>
59
#include <cassert>
60
#include <cstdint>
61
#include <string>
62
63
using namespace llvm;
64
65
#define DEBUG_TYPE "instrprof"
66
67
namespace llvm {
68
// Command line option to enable vtable value profiling. Defined in
69
// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
70
extern cl::opt<bool> EnableVTableValueProfiling;
71
// TODO: Remove -debug-info-correlate in next LLVM release, in favor of
72
// -profile-correlate=debug-info.
73
cl::opt<bool> DebugInfoCorrelate(
74
"debug-info-correlate",
75
cl::desc("Use debug info to correlate profiles. (Deprecated, use "
76
"-profile-correlate=debug-info)"),
77
cl::init(false));
78
79
cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
80
"profile-correlate",
81
cl::desc("Use debug info or binary file to correlate profiles."),
82
cl::init(InstrProfCorrelator::NONE),
83
cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
84
"No profile correlation"),
85
clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
86
"Use debug info to correlate"),
87
clEnumValN(InstrProfCorrelator::BINARY, "binary",
88
"Use binary to correlate")));
89
} // namespace llvm
90
91
namespace {
92
93
cl::opt<bool> DoHashBasedCounterSplit(
94
"hash-based-counter-split",
95
cl::desc("Rename counter variable of a comdat function based on cfg hash"),
96
cl::init(true));
97
98
cl::opt<bool>
99
RuntimeCounterRelocation("runtime-counter-relocation",
100
cl::desc("Enable relocating counters at runtime."),
101
cl::init(false));
102
103
cl::opt<bool> ValueProfileStaticAlloc(
104
"vp-static-alloc",
105
cl::desc("Do static counter allocation for value profiler"),
106
cl::init(true));
107
108
cl::opt<double> NumCountersPerValueSite(
109
"vp-counters-per-site",
110
cl::desc("The average number of profile counters allocated "
111
"per value profiling site."),
112
// This is set to a very small value because in real programs, only
113
// a very small percentage of value sites have non-zero targets, e.g, 1/30.
114
// For those sites with non-zero profile, the average number of targets
115
// is usually smaller than 2.
116
cl::init(1.0));
117
118
cl::opt<bool> AtomicCounterUpdateAll(
119
"instrprof-atomic-counter-update-all",
120
cl::desc("Make all profile counter updates atomic (for testing only)"),
121
cl::init(false));
122
123
cl::opt<bool> AtomicCounterUpdatePromoted(
124
"atomic-counter-update-promoted",
125
cl::desc("Do counter update using atomic fetch add "
126
" for promoted counters only"),
127
cl::init(false));
128
129
cl::opt<bool> AtomicFirstCounter(
130
"atomic-first-counter",
131
cl::desc("Use atomic fetch add for first counter in a function (usually "
132
"the entry counter)"),
133
cl::init(false));
134
135
// If the option is not specified, the default behavior about whether
136
// counter promotion is done depends on how instrumentaiton lowering
137
// pipeline is setup, i.e., the default value of true of this option
138
// does not mean the promotion will be done by default. Explicitly
139
// setting this option can override the default behavior.
140
cl::opt<bool> DoCounterPromotion("do-counter-promotion",
141
cl::desc("Do counter register promotion"),
142
cl::init(false));
143
cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
144
"max-counter-promotions-per-loop", cl::init(20),
145
cl::desc("Max number counter promotions per loop to avoid"
146
" increasing register pressure too much"));
147
148
// A debug option
149
cl::opt<int>
150
MaxNumOfPromotions("max-counter-promotions", cl::init(-1),
151
cl::desc("Max number of allowed counter promotions"));
152
153
cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
154
"speculative-counter-promotion-max-exiting", cl::init(3),
155
cl::desc("The max number of exiting blocks of a loop to allow "
156
" speculative counter promotion"));
157
158
cl::opt<bool> SpeculativeCounterPromotionToLoop(
159
"speculative-counter-promotion-to-loop",
160
cl::desc("When the option is false, if the target block is in a loop, "
161
"the promotion will be disallowed unless the promoted counter "
162
" update can be further/iteratively promoted into an acyclic "
163
" region."));
164
165
cl::opt<bool> IterativeCounterPromotion(
166
"iterative-counter-promotion", cl::init(true),
167
cl::desc("Allow counter promotion across the whole loop nest."));
168
169
cl::opt<bool> SkipRetExitBlock(
170
"skip-ret-exit-block", cl::init(true),
171
cl::desc("Suppress counter promotion if exit blocks contain ret."));
172
173
static cl::opt<bool> SampledInstr("sampled-instrumentation", cl::ZeroOrMore,
174
cl::init(false),
175
cl::desc("Do PGO instrumentation sampling"));
176
177
static cl::opt<unsigned> SampledInstrPeriod(
178
"sampled-instr-period",
179
cl::desc("Set the profile instrumentation sample period. For each sample "
180
"period, a fixed number of consecutive samples will be recorded. "
181
"The number is controlled by 'sampled-instr-burst-duration' flag. "
182
"The default sample period of 65535 is optimized for generating "
183
"efficient code that leverages unsigned integer wrapping in "
184
"overflow."),
185
cl::init(65535));
186
187
static cl::opt<unsigned> SampledInstrBurstDuration(
188
"sampled-instr-burst-duration",
189
cl::desc("Set the profile instrumentation burst duration, which can range "
190
"from 0 to one less than the value of 'sampled-instr-period'. "
191
"This number of samples will be recorded for each "
192
"'sampled-instr-period' count update. Setting to 1 enables "
193
"simple sampling, in which case it is recommended to set "
194
"'sampled-instr-period' to a prime number."),
195
cl::init(200));
196
197
using LoadStorePair = std::pair<Instruction *, Instruction *>;
198
199
static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
200
auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));
201
if (!MD)
202
return 0;
203
204
// If the flag is a ConstantAsMetadata, it should be an integer representable
205
// in 64-bits.
206
return cast<ConstantInt>(MD->getValue())->getZExtValue();
207
}
208
209
static bool enablesValueProfiling(const Module &M) {
210
return isIRPGOFlagSet(&M) ||
211
getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0;
212
}
213
214
// Conservatively returns true if value profiling is enabled.
215
static bool profDataReferencedByCode(const Module &M) {
216
return enablesValueProfiling(M);
217
}
218
219
class InstrLowerer final {
220
public:
221
InstrLowerer(Module &M, const InstrProfOptions &Options,
222
std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
223
bool IsCS)
224
: M(M), Options(Options), TT(Triple(M.getTargetTriple())), IsCS(IsCS),
225
GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}
226
227
bool lower();
228
229
private:
230
Module &M;
231
const InstrProfOptions Options;
232
const Triple TT;
233
// Is this lowering for the context-sensitive instrumentation.
234
const bool IsCS;
235
236
std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
237
238
const bool DataReferencedByCode;
239
240
struct PerFunctionProfileData {
241
uint32_t NumValueSites[IPVK_Last + 1] = {};
242
GlobalVariable *RegionCounters = nullptr;
243
GlobalVariable *DataVar = nullptr;
244
GlobalVariable *RegionBitmaps = nullptr;
245
uint32_t NumBitmapBytes = 0;
246
247
PerFunctionProfileData() = default;
248
};
249
DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
250
// Key is virtual table variable, value is 'VTableProfData' in the form of
251
// GlobalVariable.
252
DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
253
/// If runtime relocation is enabled, this maps functions to the load
254
/// instruction that produces the profile relocation bias.
255
DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
256
std::vector<GlobalValue *> CompilerUsedVars;
257
std::vector<GlobalValue *> UsedVars;
258
std::vector<GlobalVariable *> ReferencedNames;
259
// The list of virtual table variables of which the VTableProfData is
260
// collected.
261
std::vector<GlobalVariable *> ReferencedVTables;
262
GlobalVariable *NamesVar = nullptr;
263
size_t NamesSize = 0;
264
265
/// The instance of [[alwaysinline]] rmw_or(ptr, i8).
266
/// This is name-insensitive.
267
Function *RMWOrFunc = nullptr;
268
269
// vector of counter load/store pairs to be register promoted.
270
std::vector<LoadStorePair> PromotionCandidates;
271
272
int64_t TotalCountersPromoted = 0;
273
274
/// Lower instrumentation intrinsics in the function. Returns true if there
275
/// any lowering.
276
bool lowerIntrinsics(Function *F);
277
278
/// Register-promote counter loads and stores in loops.
279
void promoteCounterLoadStores(Function *F);
280
281
/// Returns true if relocating counters at runtime is enabled.
282
bool isRuntimeCounterRelocationEnabled() const;
283
284
/// Returns true if profile counter update register promotion is enabled.
285
bool isCounterPromotionEnabled() const;
286
287
/// Return true if profile sampling is enabled.
288
bool isSamplingEnabled() const;
289
290
/// Count the number of instrumented value sites for the function.
291
void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
292
293
/// Replace instrprof.value.profile with a call to runtime library.
294
void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
295
296
/// Replace instrprof.cover with a store instruction to the coverage byte.
297
void lowerCover(InstrProfCoverInst *Inc);
298
299
/// Replace instrprof.timestamp with a call to
300
/// INSTR_PROF_PROFILE_SET_TIMESTAMP.
301
void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
302
303
/// Replace instrprof.increment with an increment of the appropriate value.
304
void lowerIncrement(InstrProfIncrementInst *Inc);
305
306
/// Force emitting of name vars for unused functions.
307
void lowerCoverageData(GlobalVariable *CoverageNamesVar);
308
309
/// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
310
/// using the index represented by the a temp value into a bitmap.
311
void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
312
313
/// Get the Bias value for data to access mmap-ed area.
314
/// Create it if it hasn't been seen.
315
GlobalVariable *getOrCreateBiasVar(StringRef VarName);
316
317
/// Compute the address of the counter value that this profiling instruction
318
/// acts on.
319
Value *getCounterAddress(InstrProfCntrInstBase *I);
320
321
/// Lower the incremental instructions under profile sampling predicates.
322
void doSampling(Instruction *I);
323
324
/// Get the region counters for an increment, creating them if necessary.
325
///
326
/// If the counter array doesn't yet exist, the profile data variables
327
/// referring to them will also be created.
328
GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
329
330
/// Create the region counters.
331
GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
332
StringRef Name,
333
GlobalValue::LinkageTypes Linkage);
334
335
/// Create [[alwaysinline]] rmw_or(ptr, i8).
336
/// This doesn't update `RMWOrFunc`.
337
Function *createRMWOrFunc();
338
339
/// Get the call to `rmw_or`.
340
/// Create the instance if it is unknown.
341
CallInst *getRMWOrCall(Value *Addr, Value *Val);
342
343
/// Compute the address of the test vector bitmap that this profiling
344
/// instruction acts on.
345
Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
346
347
/// Get the region bitmaps for an increment, creating them if necessary.
348
///
349
/// If the bitmap array doesn't yet exist, the profile data variables
350
/// referring to them will also be created.
351
GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
352
353
/// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
354
/// an MC/DC Decision region. The number of bytes required is indicated by
355
/// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
356
/// as part of setupProfileSection() and is conceptually very similar to
357
/// what is done for profile data counters in createRegionCounters().
358
GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
359
StringRef Name,
360
GlobalValue::LinkageTypes Linkage);
361
362
/// Set Comdat property of GV, if required.
363
void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);
364
365
/// Setup the sections into which counters and bitmaps are allocated.
366
GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
367
InstrProfSectKind IPSK);
368
369
/// Create INSTR_PROF_DATA variable for counters and bitmaps.
370
void createDataVariable(InstrProfCntrInstBase *Inc);
371
372
/// Get the counters for virtual table values, creating them if necessary.
373
void getOrCreateVTableProfData(GlobalVariable *GV);
374
375
/// Emit the section with compressed function names.
376
void emitNameData();
377
378
/// Emit the section with compressed vtable names.
379
void emitVTableNames();
380
381
/// Emit value nodes section for value profiling.
382
void emitVNodes();
383
384
/// Emit runtime registration functions for each profile data variable.
385
void emitRegistration();
386
387
/// Emit the necessary plumbing to pull in the runtime initialization.
388
/// Returns true if a change was made.
389
bool emitRuntimeHook();
390
391
/// Add uses of our data variables and runtime hook.
392
void emitUses();
393
394
/// Create a static initializer for our data, on platforms that need it,
395
/// and for any profile output file that was specified.
396
void emitInitialization();
397
};
398
399
///
400
/// A helper class to promote one counter RMW operation in the loop
401
/// into register update.
402
///
403
/// RWM update for the counter will be sinked out of the loop after
404
/// the transformation.
405
///
406
class PGOCounterPromoterHelper : public LoadAndStorePromoter {
407
public:
408
PGOCounterPromoterHelper(
409
Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
410
BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
411
ArrayRef<Instruction *> InsertPts,
412
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
413
LoopInfo &LI)
414
: LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
415
InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
416
assert(isa<LoadInst>(L));
417
assert(isa<StoreInst>(S));
418
SSA.AddAvailableValue(PH, Init);
419
}
420
421
void doExtraRewritesBeforeFinalDeletion() override {
422
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
423
BasicBlock *ExitBlock = ExitBlocks[i];
424
Instruction *InsertPos = InsertPts[i];
425
// Get LiveIn value into the ExitBlock. If there are multiple
426
// predecessors, the value is defined by a PHI node in this
427
// block.
428
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
429
Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
430
Type *Ty = LiveInValue->getType();
431
IRBuilder<> Builder(InsertPos);
432
if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) {
433
// If isRuntimeCounterRelocationEnabled() is true then the address of
434
// the store instruction is computed with two instructions in
435
// InstrProfiling::getCounterAddress(). We need to copy those
436
// instructions to this block to compute Addr correctly.
437
// %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
438
// %Addr = inttoptr i64 %BiasAdd to i64*
439
auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));
440
assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
441
Value *BiasInst = Builder.Insert(OrigBiasInst->clone());
442
Addr = Builder.CreateIntToPtr(BiasInst,
443
PointerType::getUnqual(Ty->getContext()));
444
}
445
if (AtomicCounterUpdatePromoted)
446
// automic update currently can only be promoted across the current
447
// loop, not the whole loop nest.
448
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
449
MaybeAlign(),
450
AtomicOrdering::SequentiallyConsistent);
451
else {
452
LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
453
auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
454
auto *NewStore = Builder.CreateStore(NewVal, Addr);
455
456
// Now update the parent loop's candidate list:
457
if (IterativeCounterPromotion) {
458
auto *TargetLoop = LI.getLoopFor(ExitBlock);
459
if (TargetLoop)
460
LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
461
}
462
}
463
}
464
}
465
466
private:
467
Instruction *Store;
468
ArrayRef<BasicBlock *> ExitBlocks;
469
ArrayRef<Instruction *> InsertPts;
470
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
471
LoopInfo &LI;
472
};
473
474
/// A helper class to do register promotion for all profile counter
475
/// updates in a loop.
476
///
477
class PGOCounterPromoter {
478
public:
479
PGOCounterPromoter(
480
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
481
Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
482
: LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {
483
484
// Skip collection of ExitBlocks and InsertPts for loops that will not be
485
// able to have counters promoted.
486
SmallVector<BasicBlock *, 8> LoopExitBlocks;
487
SmallPtrSet<BasicBlock *, 8> BlockSet;
488
489
L.getExitBlocks(LoopExitBlocks);
490
if (!isPromotionPossible(&L, LoopExitBlocks))
491
return;
492
493
for (BasicBlock *ExitBlock : LoopExitBlocks) {
494
if (BlockSet.insert(ExitBlock).second &&
495
llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) {
496
return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock);
497
})) {
498
ExitBlocks.push_back(ExitBlock);
499
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
500
}
501
}
502
}
503
504
bool run(int64_t *NumPromoted) {
505
// Skip 'infinite' loops:
506
if (ExitBlocks.size() == 0)
507
return false;
508
509
// Skip if any of the ExitBlocks contains a ret instruction.
510
// This is to prevent dumping of incomplete profile -- if the
511
// the loop is a long running loop and dump is called in the middle
512
// of the loop, the result profile is incomplete.
513
// FIXME: add other heuristics to detect long running loops.
514
if (SkipRetExitBlock) {
515
for (auto *BB : ExitBlocks)
516
if (isa<ReturnInst>(BB->getTerminator()))
517
return false;
518
}
519
520
unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
521
if (MaxProm == 0)
522
return false;
523
524
unsigned Promoted = 0;
525
for (auto &Cand : LoopToCandidates[&L]) {
526
527
SmallVector<PHINode *, 4> NewPHIs;
528
SSAUpdater SSA(&NewPHIs);
529
Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
530
531
// If BFI is set, we will use it to guide the promotions.
532
if (BFI) {
533
auto *BB = Cand.first->getParent();
534
auto InstrCount = BFI->getBlockProfileCount(BB);
535
if (!InstrCount)
536
continue;
537
auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
538
// If the average loop trip count is not greater than 1.5, we skip
539
// promotion.
540
if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
541
continue;
542
}
543
544
PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
545
L.getLoopPreheader(), ExitBlocks,
546
InsertPts, LoopToCandidates, LI);
547
Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
548
Promoted++;
549
if (Promoted >= MaxProm)
550
break;
551
552
(*NumPromoted)++;
553
if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
554
break;
555
}
556
557
LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
558
<< L.getLoopDepth() << ")\n");
559
return Promoted != 0;
560
}
561
562
private:
563
bool allowSpeculativeCounterPromotion(Loop *LP) {
564
SmallVector<BasicBlock *, 8> ExitingBlocks;
565
L.getExitingBlocks(ExitingBlocks);
566
// Not considierered speculative.
567
if (ExitingBlocks.size() == 1)
568
return true;
569
if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
570
return false;
571
return true;
572
}
573
574
// Check whether the loop satisfies the basic conditions needed to perform
575
// Counter Promotions.
576
bool
577
isPromotionPossible(Loop *LP,
578
const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
579
// We can't insert into a catchswitch.
580
if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
581
return isa<CatchSwitchInst>(Exit->getTerminator());
582
}))
583
return false;
584
585
if (!LP->hasDedicatedExits())
586
return false;
587
588
BasicBlock *PH = LP->getLoopPreheader();
589
if (!PH)
590
return false;
591
592
return true;
593
}
594
595
// Returns the max number of Counter Promotions for LP.
596
unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
597
SmallVector<BasicBlock *, 8> LoopExitBlocks;
598
LP->getExitBlocks(LoopExitBlocks);
599
if (!isPromotionPossible(LP, LoopExitBlocks))
600
return 0;
601
602
SmallVector<BasicBlock *, 8> ExitingBlocks;
603
LP->getExitingBlocks(ExitingBlocks);
604
605
// If BFI is set, we do more aggressive promotions based on BFI.
606
if (BFI)
607
return (unsigned)-1;
608
609
// Not considierered speculative.
610
if (ExitingBlocks.size() == 1)
611
return MaxNumOfPromotionsPerLoop;
612
613
if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
614
return 0;
615
616
// Whether the target block is in a loop does not matter:
617
if (SpeculativeCounterPromotionToLoop)
618
return MaxNumOfPromotionsPerLoop;
619
620
// Now check the target block:
621
unsigned MaxProm = MaxNumOfPromotionsPerLoop;
622
for (auto *TargetBlock : LoopExitBlocks) {
623
auto *TargetLoop = LI.getLoopFor(TargetBlock);
624
if (!TargetLoop)
625
continue;
626
unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
627
unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
628
MaxProm =
629
std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
630
PendingCandsInTarget);
631
}
632
return MaxProm;
633
}
634
635
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
636
SmallVector<BasicBlock *, 8> ExitBlocks;
637
SmallVector<Instruction *, 8> InsertPts;
638
Loop &L;
639
LoopInfo &LI;
640
BlockFrequencyInfo *BFI;
641
};
642
643
enum class ValueProfilingCallType {
644
// Individual values are tracked. Currently used for indiret call target
645
// profiling.
646
Default,
647
648
// MemOp: the memop size value profiling.
649
MemOp
650
};
651
652
} // end anonymous namespace
653
654
PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
655
ModuleAnalysisManager &AM) {
656
FunctionAnalysisManager &FAM =
657
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
658
auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
659
return FAM.getResult<TargetLibraryAnalysis>(F);
660
};
661
InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
662
if (!Lowerer.lower())
663
return PreservedAnalyses::all();
664
665
return PreservedAnalyses::none();
666
}
667
668
//
669
// Perform instrumentation sampling.
670
//
671
// There are 3 favors of sampling:
672
// (1) Full burst sampling: We transform:
673
// Increment_Instruction;
674
// to:
675
// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
676
// Increment_Instruction;
677
// }
678
// __llvm_profile_sampling__ += 1;
679
// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
680
// __llvm_profile_sampling__ = 0;
681
// }
682
//
683
// "__llvm_profile_sampling__" is a thread-local global shared by all PGO
684
// counters (value-instrumentation and edge instrumentation).
685
//
686
// (2) Fast burst sampling:
687
// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
688
// wrap around to zero when overflows. In this case, the second check is
689
// unnecessary, so we won't generate check2 when the SampledInstrPeriod is
690
// set to 65535 (64K - 1). The code after:
691
// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
692
// Increment_Instruction;
693
// }
694
// __llvm_profile_sampling__ += 1;
695
//
696
// (3) Simple sampling:
697
// When SampledInstrBurstDuration sets to 1, we do a simple sampling:
698
// __llvm_profile_sampling__ += 1;
699
// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
700
// __llvm_profile_sampling__ = 0;
701
// Increment_Instruction;
702
// }
703
//
704
// Note that, the code snippet after the transformation can still be counter
705
// promoted. However, with sampling enabled, counter updates are expected to
706
// be infrequent, making the benefits of counter promotion negligible.
707
// Moreover, counter promotion can potentially cause issues in server
708
// applications, particularly when the counters are dumped without a clean
709
// exit. To mitigate this risk, counter promotion is disabled by default when
710
// sampling is enabled. This behavior can be overridden using the internal
711
// option.
712
void InstrLowerer::doSampling(Instruction *I) {
713
if (!isSamplingEnabled())
714
return;
715
716
unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue();
717
unsigned SampledPeriod = SampledInstrPeriod.getValue();
718
if (SampledBurstDuration >= SampledPeriod) {
719
report_fatal_error(
720
"SampledPeriod needs to be greater than SampledBurstDuration");
721
}
722
bool UseShort = (SampledPeriod <= USHRT_MAX);
723
bool IsSimpleSampling = (SampledBurstDuration == 1);
724
// If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate
725
// the simple sampling style code.
726
bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535);
727
728
auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) {
729
if (UseShort)
730
return Builder.getInt16(C);
731
else
732
return Builder.getInt32(C);
733
};
734
735
IntegerType *SamplingVarTy;
736
if (UseShort)
737
SamplingVarTy = Type::getInt16Ty(M.getContext());
738
else
739
SamplingVarTy = Type::getInt32Ty(M.getContext());
740
auto *SamplingVar =
741
M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
742
assert(SamplingVar && "SamplingVar not set properly");
743
744
// Create the condition for checking the burst duration.
745
Instruction *SamplingVarIncr;
746
Value *NewSamplingVarVal;
747
MDBuilder MDB(I->getContext());
748
MDNode *BranchWeight;
749
IRBuilder<> CondBuilder(I);
750
auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar);
751
if (IsSimpleSampling) {
752
// For the simple sampling, just create the load and increments.
753
IRBuilder<> IncBuilder(I);
754
NewSamplingVarVal =
755
IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
756
SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
757
} else {
758
// For the bust-sampling, create the conditonal update.
759
auto *DurationCond = CondBuilder.CreateICmpULE(
760
LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration));
761
BranchWeight = MDB.createBranchWeights(
762
SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration);
763
Instruction *ThenTerm = SplitBlockAndInsertIfThen(
764
DurationCond, I, /* Unreachable */ false, BranchWeight);
765
IRBuilder<> IncBuilder(I);
766
NewSamplingVarVal =
767
IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
768
SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
769
I->moveBefore(ThenTerm);
770
}
771
772
if (IsFastSampling)
773
return;
774
775
// Create the condtion for checking the period.
776
Instruction *ThenTerm, *ElseTerm;
777
IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
778
auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
779
NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod));
780
BranchWeight = MDB.createBranchWeights(1, SampledPeriod);
781
SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm,
782
&ElseTerm, BranchWeight);
783
784
// For the simple sampling, the counter update happens in sampling var reset.
785
if (IsSimpleSampling)
786
I->moveBefore(ThenTerm);
787
788
IRBuilder<> ResetBuilder(ThenTerm);
789
ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar);
790
SamplingVarIncr->moveBefore(ElseTerm);
791
}
792
793
bool InstrLowerer::lowerIntrinsics(Function *F) {
794
bool MadeChange = false;
795
PromotionCandidates.clear();
796
SmallVector<InstrProfInstBase *, 8> InstrProfInsts;
797
798
// To ensure compatibility with sampling, we save the intrinsics into
799
// a buffer to prevent potential breakage of the iterator (as the
800
// intrinsics will be moved to a different BB).
801
for (BasicBlock &BB : *F) {
802
for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
803
if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr))
804
InstrProfInsts.push_back(IP);
805
}
806
}
807
808
for (auto *Instr : InstrProfInsts) {
809
doSampling(Instr);
810
if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) {
811
lowerIncrement(IPIS);
812
MadeChange = true;
813
} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) {
814
lowerIncrement(IPI);
815
MadeChange = true;
816
} else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) {
817
lowerTimestamp(IPC);
818
MadeChange = true;
819
} else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) {
820
lowerCover(IPC);
821
MadeChange = true;
822
} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) {
823
lowerValueProfileInst(IPVP);
824
MadeChange = true;
825
} else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) {
826
IPMP->eraseFromParent();
827
MadeChange = true;
828
} else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) {
829
lowerMCDCTestVectorBitmapUpdate(IPBU);
830
MadeChange = true;
831
}
832
}
833
834
if (!MadeChange)
835
return false;
836
837
promoteCounterLoadStores(F);
838
return true;
839
}
840
841
bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
842
// Mach-O don't support weak external references.
843
if (TT.isOSBinFormatMachO())
844
return false;
845
846
if (RuntimeCounterRelocation.getNumOccurrences() > 0)
847
return RuntimeCounterRelocation;
848
849
// Fuchsia uses runtime counter relocation by default.
850
return TT.isOSFuchsia();
851
}
852
853
bool InstrLowerer::isSamplingEnabled() const {
854
if (SampledInstr.getNumOccurrences() > 0)
855
return SampledInstr;
856
return Options.Sampling;
857
}
858
859
bool InstrLowerer::isCounterPromotionEnabled() const {
860
if (DoCounterPromotion.getNumOccurrences() > 0)
861
return DoCounterPromotion;
862
863
return Options.DoCounterPromotion;
864
}
865
866
void InstrLowerer::promoteCounterLoadStores(Function *F) {
867
if (!isCounterPromotionEnabled())
868
return;
869
870
DominatorTree DT(*F);
871
LoopInfo LI(DT);
872
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
873
874
std::unique_ptr<BlockFrequencyInfo> BFI;
875
if (Options.UseBFIInPromotion) {
876
std::unique_ptr<BranchProbabilityInfo> BPI;
877
BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
878
BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
879
}
880
881
for (const auto &LoadStore : PromotionCandidates) {
882
auto *CounterLoad = LoadStore.first;
883
auto *CounterStore = LoadStore.second;
884
BasicBlock *BB = CounterLoad->getParent();
885
Loop *ParentLoop = LI.getLoopFor(BB);
886
if (!ParentLoop)
887
continue;
888
LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
889
}
890
891
SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
892
893
// Do a post-order traversal of the loops so that counter updates can be
894
// iteratively hoisted outside the loop nest.
895
for (auto *Loop : llvm::reverse(Loops)) {
896
PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
897
Promoter.run(&TotalCountersPromoted);
898
}
899
}
900
901
static bool needsRuntimeHookUnconditionally(const Triple &TT) {
902
// On Fuchsia, we only need runtime hook if any counters are present.
903
if (TT.isOSFuchsia())
904
return false;
905
906
return true;
907
}
908
909
/// Check if the module contains uses of any profiling intrinsics.
910
static bool containsProfilingIntrinsics(Module &M) {
911
auto containsIntrinsic = [&](int ID) {
912
if (auto *F = M.getFunction(Intrinsic::getName(ID)))
913
return !F->use_empty();
914
return false;
915
};
916
return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||
917
containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
918
containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||
919
containsIntrinsic(llvm::Intrinsic::instrprof_timestamp) ||
920
containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
921
}
922
923
bool InstrLowerer::lower() {
924
bool MadeChange = false;
925
bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
926
if (NeedsRuntimeHook)
927
MadeChange = emitRuntimeHook();
928
929
if (!IsCS && isSamplingEnabled())
930
createProfileSamplingVar(M);
931
932
bool ContainsProfiling = containsProfilingIntrinsics(M);
933
GlobalVariable *CoverageNamesVar =
934
M.getNamedGlobal(getCoverageUnusedNamesVarName());
935
// Improve compile time by avoiding linear scans when there is no work.
936
if (!ContainsProfiling && !CoverageNamesVar)
937
return MadeChange;
938
939
// We did not know how many value sites there would be inside
940
// the instrumented function. This is counting the number of instrumented
941
// target value sites to enter it as field in the profile data variable.
942
for (Function &F : M) {
943
InstrProfCntrInstBase *FirstProfInst = nullptr;
944
for (BasicBlock &BB : F) {
945
for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
946
if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
947
computeNumValueSiteCounts(Ind);
948
else {
949
if (FirstProfInst == nullptr &&
950
(isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I)))
951
FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I);
952
// If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
953
if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I))
954
static_cast<void>(getOrCreateRegionBitmaps(Params));
955
}
956
}
957
}
958
959
// Use a profile intrinsic to create the region counters and data variable.
960
// Also create the data variable based on the MCDCParams.
961
if (FirstProfInst != nullptr) {
962
static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
963
}
964
}
965
966
if (EnableVTableValueProfiling)
967
for (GlobalVariable &GV : M.globals())
968
// Global variables with type metadata are virtual table variables.
969
if (GV.hasMetadata(LLVMContext::MD_type))
970
getOrCreateVTableProfData(&GV);
971
972
for (Function &F : M)
973
MadeChange |= lowerIntrinsics(&F);
974
975
if (CoverageNamesVar) {
976
lowerCoverageData(CoverageNamesVar);
977
MadeChange = true;
978
}
979
980
if (!MadeChange)
981
return false;
982
983
emitVNodes();
984
emitNameData();
985
emitVTableNames();
986
987
// Emit runtime hook for the cases where the target does not unconditionally
988
// require pulling in profile runtime, and coverage is enabled on code that is
989
// not eliminated by the front-end, e.g. unused functions with internal
990
// linkage.
991
if (!NeedsRuntimeHook && ContainsProfiling)
992
emitRuntimeHook();
993
994
emitRegistration();
995
emitUses();
996
emitInitialization();
997
return true;
998
}
999
1000
static FunctionCallee getOrInsertValueProfilingCall(
1001
Module &M, const TargetLibraryInfo &TLI,
1002
ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
1003
LLVMContext &Ctx = M.getContext();
1004
auto *ReturnTy = Type::getVoidTy(M.getContext());
1005
1006
AttributeList AL;
1007
if (auto AK = TLI.getExtAttrForI32Param(false))
1008
AL = AL.addParamAttribute(M.getContext(), 2, AK);
1009
1010
assert((CallType == ValueProfilingCallType::Default ||
1011
CallType == ValueProfilingCallType::MemOp) &&
1012
"Must be Default or MemOp");
1013
Type *ParamTypes[] = {
1014
#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
1015
#include "llvm/ProfileData/InstrProfData.inc"
1016
};
1017
auto *ValueProfilingCallTy =
1018
FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false);
1019
StringRef FuncName = CallType == ValueProfilingCallType::Default
1020
? getInstrProfValueProfFuncName()
1021
: getInstrProfValueProfMemOpFuncName();
1022
return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
1023
}
1024
1025
void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
1026
GlobalVariable *Name = Ind->getName();
1027
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1028
uint64_t Index = Ind->getIndex()->getZExtValue();
1029
auto &PD = ProfileDataMap[Name];
1030
PD.NumValueSites[ValueKind] =
1031
std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
1032
}
1033
1034
void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
1035
// TODO: Value profiling heavily depends on the data section which is omitted
1036
// in lightweight mode. We need to move the value profile pointer to the
1037
// Counter struct to get this working.
1038
assert(
1039
!DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE &&
1040
"Value profiling is not yet supported with lightweight instrumentation");
1041
GlobalVariable *Name = Ind->getName();
1042
auto It = ProfileDataMap.find(Name);
1043
assert(It != ProfileDataMap.end() && It->second.DataVar &&
1044
"value profiling detected in function with no counter incerement");
1045
1046
GlobalVariable *DataVar = It->second.DataVar;
1047
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1048
uint64_t Index = Ind->getIndex()->getZExtValue();
1049
for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
1050
Index += It->second.NumValueSites[Kind];
1051
1052
IRBuilder<> Builder(Ind);
1053
bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
1054
llvm::InstrProfValueKind::IPVK_MemOPSize);
1055
CallInst *Call = nullptr;
1056
auto *TLI = &GetTLI(*Ind->getFunction());
1057
1058
// To support value profiling calls within Windows exception handlers, funclet
1059
// information contained within operand bundles needs to be copied over to
1060
// the library call. This is required for the IR to be processed by the
1061
// WinEHPrepare pass.
1062
SmallVector<OperandBundleDef, 1> OpBundles;
1063
Ind->getOperandBundlesAsDefs(OpBundles);
1064
if (!IsMemOpSize) {
1065
Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
1066
Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
1067
OpBundles);
1068
} else {
1069
Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
1070
Call = Builder.CreateCall(
1071
getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
1072
Args, OpBundles);
1073
}
1074
if (auto AK = TLI->getExtAttrForI32Param(false))
1075
Call->addParamAttr(2, AK);
1076
Ind->replaceAllUsesWith(Call);
1077
Ind->eraseFromParent();
1078
}
1079
1080
GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
1081
GlobalVariable *Bias = M.getGlobalVariable(VarName);
1082
if (Bias)
1083
return Bias;
1084
1085
Type *Int64Ty = Type::getInt64Ty(M.getContext());
1086
1087
// Compiler must define this variable when runtime counter relocation
1088
// is being used. Runtime has a weak external reference that is used
1089
// to check whether that's the case or not.
1090
Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
1091
Constant::getNullValue(Int64Ty), VarName);
1092
Bias->setVisibility(GlobalVariable::HiddenVisibility);
1093
// A definition that's weak (linkonce_odr) without being in a COMDAT
1094
// section wouldn't lead to link errors, but it would lead to a dead
1095
// data word from every TU but one. Putting it in COMDAT ensures there
1096
// will be exactly one data slot in the link.
1097
if (TT.supportsCOMDAT())
1098
Bias->setComdat(M.getOrInsertComdat(VarName));
1099
1100
return Bias;
1101
}
1102
1103
Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1104
auto *Counters = getOrCreateRegionCounters(I);
1105
IRBuilder<> Builder(I);
1106
1107
if (isa<InstrProfTimestampInst>(I))
1108
Counters->setAlignment(Align(8));
1109
1110
auto *Addr = Builder.CreateConstInBoundsGEP2_32(
1111
Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
1112
1113
if (!isRuntimeCounterRelocationEnabled())
1114
return Addr;
1115
1116
Type *Int64Ty = Type::getInt64Ty(M.getContext());
1117
Function *Fn = I->getParent()->getParent();
1118
LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
1119
if (!BiasLI) {
1120
IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1121
auto *Bias = getOrCreateBiasVar(getInstrProfCounterBiasVarName());
1122
BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profc_bias");
1123
// Bias doesn't change after startup.
1124
BiasLI->setMetadata(LLVMContext::MD_invariant_load,
1125
MDNode::get(M.getContext(), std::nullopt));
1126
}
1127
auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);
1128
return Builder.CreateIntToPtr(Add, Addr->getType());
1129
}
1130
1131
/// Create `void [[alwaysinline]] rmw_or(uint8_t *ArgAddr, uint8_t ArgVal)`
1132
/// "Basic" sequence is `*ArgAddr |= ArgVal`
1133
Function *InstrLowerer::createRMWOrFunc() {
1134
auto &Ctx = M.getContext();
1135
auto *Int8Ty = Type::getInt8Ty(Ctx);
1136
Function *Fn = Function::Create(
1137
FunctionType::get(Type::getVoidTy(Ctx),
1138
{PointerType::getUnqual(Ctx), Int8Ty}, false),
1139
Function::LinkageTypes::PrivateLinkage, "rmw_or", M);
1140
Fn->addFnAttr(Attribute::AlwaysInline);
1141
auto *ArgAddr = Fn->getArg(0);
1142
auto *ArgVal = Fn->getArg(1);
1143
IRBuilder<> Builder(BasicBlock::Create(Ctx, "", Fn));
1144
1145
// Load profile bitmap byte.
1146
// %mcdc.bits = load i8, ptr %4, align 1
1147
auto *Bitmap = Builder.CreateLoad(Int8Ty, ArgAddr, "mcdc.bits");
1148
1149
if (Options.Atomic || AtomicCounterUpdateAll) {
1150
// If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
1151
// Note, just-loaded Bitmap might not be up-to-date. Use it just for
1152
// early testing.
1153
auto *Masked = Builder.CreateAnd(Bitmap, ArgVal);
1154
auto *ShouldStore = Builder.CreateICmpNE(Masked, ArgVal);
1155
auto *ThenTerm = BasicBlock::Create(Ctx, "", Fn);
1156
auto *ElseTerm = BasicBlock::Create(Ctx, "", Fn);
1157
// Assume updating will be rare.
1158
auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();
1159
Builder.CreateCondBr(ShouldStore, ThenTerm, ElseTerm, Unlikely);
1160
1161
IRBuilder<> ThenBuilder(ThenTerm);
1162
ThenBuilder.CreateAtomicRMW(AtomicRMWInst::Or, ArgAddr, ArgVal,
1163
MaybeAlign(), AtomicOrdering::Monotonic);
1164
ThenBuilder.CreateRetVoid();
1165
1166
IRBuilder<> ElseBuilder(ElseTerm);
1167
ElseBuilder.CreateRetVoid();
1168
1169
return Fn;
1170
}
1171
1172
// Perform logical OR of profile bitmap byte and shifted bit offset.
1173
// %8 = or i8 %mcdc.bits, %7
1174
auto *Result = Builder.CreateOr(Bitmap, ArgVal);
1175
1176
// Store the updated profile bitmap byte.
1177
// store i8 %8, ptr %3, align 1
1178
Builder.CreateStore(Result, ArgAddr);
1179
1180
// Terminator
1181
Builder.CreateRetVoid();
1182
1183
return Fn;
1184
}
1185
1186
CallInst *InstrLowerer::getRMWOrCall(Value *Addr, Value *Val) {
1187
if (!RMWOrFunc)
1188
RMWOrFunc = createRMWOrFunc();
1189
1190
return CallInst::Create(RMWOrFunc, {Addr, Val});
1191
}
1192
1193
Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
1194
auto *Bitmaps = getOrCreateRegionBitmaps(I);
1195
IRBuilder<> Builder(I);
1196
1197
if (isRuntimeCounterRelocationEnabled()) {
1198
LLVMContext &Ctx = M.getContext();
1199
Ctx.diagnose(DiagnosticInfoPGOProfile(
1200
M.getName().data(),
1201
Twine("Runtime counter relocation is presently not supported for MC/DC "
1202
"bitmaps."),
1203
DS_Warning));
1204
}
1205
1206
return Bitmaps;
1207
}
1208
1209
void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
1210
auto *Addr = getCounterAddress(CoverInstruction);
1211
IRBuilder<> Builder(CoverInstruction);
1212
// We store zero to represent that this block is covered.
1213
Builder.CreateStore(Builder.getInt8(0), Addr);
1214
CoverInstruction->eraseFromParent();
1215
}
1216
1217
void InstrLowerer::lowerTimestamp(
1218
InstrProfTimestampInst *TimestampInstruction) {
1219
assert(TimestampInstruction->getIndex()->isZeroValue() &&
1220
"timestamp probes are always the first probe for a function");
1221
auto &Ctx = M.getContext();
1222
auto *TimestampAddr = getCounterAddress(TimestampInstruction);
1223
IRBuilder<> Builder(TimestampInstruction);
1224
auto *CalleeTy =
1225
FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false);
1226
auto Callee = M.getOrInsertFunction(
1227
INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy);
1228
Builder.CreateCall(Callee, {TimestampAddr});
1229
TimestampInstruction->eraseFromParent();
1230
}
1231
1232
void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
1233
auto *Addr = getCounterAddress(Inc);
1234
1235
IRBuilder<> Builder(Inc);
1236
if (Options.Atomic || AtomicCounterUpdateAll ||
1237
(Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
1238
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
1239
MaybeAlign(), AtomicOrdering::Monotonic);
1240
} else {
1241
Value *IncStep = Inc->getStep();
1242
Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
1243
auto *Count = Builder.CreateAdd(Load, Inc->getStep());
1244
auto *Store = Builder.CreateStore(Count, Addr);
1245
if (isCounterPromotionEnabled())
1246
PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
1247
}
1248
Inc->eraseFromParent();
1249
}
1250
1251
void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
1252
ConstantArray *Names =
1253
cast<ConstantArray>(CoverageNamesVar->getInitializer());
1254
for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
1255
Constant *NC = Names->getOperand(I);
1256
Value *V = NC->stripPointerCasts();
1257
assert(isa<GlobalVariable>(V) && "Missing reference to function name");
1258
GlobalVariable *Name = cast<GlobalVariable>(V);
1259
1260
Name->setLinkage(GlobalValue::PrivateLinkage);
1261
ReferencedNames.push_back(Name);
1262
if (isa<ConstantExpr>(NC))
1263
NC->dropAllReferences();
1264
}
1265
CoverageNamesVar->eraseFromParent();
1266
}
1267
1268
void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
1269
InstrProfMCDCTVBitmapUpdate *Update) {
1270
IRBuilder<> Builder(Update);
1271
auto *Int8Ty = Type::getInt8Ty(M.getContext());
1272
auto *Int32Ty = Type::getInt32Ty(M.getContext());
1273
auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1274
auto *BitmapAddr = getBitmapAddress(Update);
1275
1276
// Load Temp Val + BitmapIdx.
1277
// %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1278
auto *Temp = Builder.CreateAdd(
1279
Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"),
1280
Update->getBitmapIndex());
1281
1282
// Calculate byte offset using div8.
1283
// %1 = lshr i32 %mcdc.temp, 3
1284
auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3);
1285
1286
// Add byte offset to section base byte address.
1287
// %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %1
1288
auto *BitmapByteAddr =
1289
Builder.CreateInBoundsPtrAdd(BitmapAddr, BitmapByteOffset);
1290
1291
// Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
1292
// %5 = and i32 %mcdc.temp, 7
1293
// %6 = trunc i32 %5 to i8
1294
auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty);
1295
1296
// Shift bit offset left to form a bitmap.
1297
// %7 = shl i8 1, %6
1298
auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet);
1299
1300
Builder.Insert(getRMWOrCall(BitmapByteAddr, ShiftedVal));
1301
Update->eraseFromParent();
1302
}
1303
1304
/// Get the name of a profiling variable for a particular function.
1305
static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1306
bool &Renamed) {
1307
StringRef NamePrefix = getInstrProfNameVarPrefix();
1308
StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
1309
Function *F = Inc->getParent()->getParent();
1310
Module *M = F->getParent();
1311
if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1312
!canRenameComdatFunc(*F)) {
1313
Renamed = false;
1314
return (Prefix + Name).str();
1315
}
1316
Renamed = true;
1317
uint64_t FuncHash = Inc->getHash()->getZExtValue();
1318
SmallVector<char, 24> HashPostfix;
1319
if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
1320
return (Prefix + Name).str();
1321
return (Prefix + Name + "." + Twine(FuncHash)).str();
1322
}
1323
1324
static inline bool shouldRecordFunctionAddr(Function *F) {
1325
// Only record function addresses if IR PGO is enabled or if clang value
1326
// profiling is enabled. Recording function addresses greatly increases object
1327
// file size, because it prevents the inliner from deleting functions that
1328
// have been inlined everywhere.
1329
if (!profDataReferencedByCode(*F->getParent()))
1330
return false;
1331
1332
// Check the linkage
1333
bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1334
if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1335
!HasAvailableExternallyLinkage)
1336
return true;
1337
1338
// A function marked 'alwaysinline' with available_externally linkage can't
1339
// have its address taken. Doing so would create an undefined external ref to
1340
// the function, which would fail to link.
1341
if (HasAvailableExternallyLinkage &&
1342
F->hasFnAttribute(Attribute::AlwaysInline))
1343
return false;
1344
1345
// Prohibit function address recording if the function is both internal and
1346
// COMDAT. This avoids the profile data variable referencing internal symbols
1347
// in COMDAT.
1348
if (F->hasLocalLinkage() && F->hasComdat())
1349
return false;
1350
1351
// Check uses of this function for other than direct calls or invokes to it.
1352
// Inline virtual functions have linkeOnceODR linkage. When a key method
1353
// exists, the vtable will only be emitted in the TU where the key method
1354
// is defined. In a TU where vtable is not available, the function won't
1355
// be 'addresstaken'. If its address is not recorded here, the profile data
1356
// with missing address may be picked by the linker leading to missing
1357
// indirect call target info.
1358
return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1359
}
1360
1361
static inline bool shouldUsePublicSymbol(Function *Fn) {
1362
// It isn't legal to make an alias of this function at all
1363
if (Fn->isDeclarationForLinker())
1364
return true;
1365
1366
// Symbols with local linkage can just use the symbol directly without
1367
// introducing relocations
1368
if (Fn->hasLocalLinkage())
1369
return true;
1370
1371
// PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1372
// unfavorable interaction between the new alias and the alias renaming done
1373
// in LowerTypeTests under ThinLTO. For comdat functions that would normally
1374
// be deduplicated, but the renaming scheme ends up preventing renaming, since
1375
// it creates unique names for each alias, resulting in duplicated symbols. In
1376
// the future, we should update the CFI related passes to migrate these
1377
// aliases to the same module as the jump-table they refer to will be defined.
1378
if (Fn->hasMetadata(LLVMContext::MD_type))
1379
return true;
1380
1381
// For comdat functions, an alias would need the same linkage as the original
1382
// function and hidden visibility. There is no point in adding an alias with
1383
// identical linkage an visibility to avoid introducing symbolic relocations.
1384
if (Fn->hasComdat() &&
1385
(Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility))
1386
return true;
1387
1388
// its OK to use an alias
1389
return false;
1390
}
1391
1392
static inline Constant *getFuncAddrForProfData(Function *Fn) {
1393
auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext());
1394
// Store a nullptr in __llvm_profd, if we shouldn't use a real address
1395
if (!shouldRecordFunctionAddr(Fn))
1396
return ConstantPointerNull::get(Int8PtrTy);
1397
1398
// If we can't use an alias, we must use the public symbol, even though this
1399
// may require a symbolic relocation.
1400
if (shouldUsePublicSymbol(Fn))
1401
return Fn;
1402
1403
// When possible use a private alias to avoid symbolic relocations.
1404
auto *GA = GlobalAlias::create(GlobalValue::LinkageTypes::PrivateLinkage,
1405
Fn->getName() + ".local", Fn);
1406
1407
// When the instrumented function is a COMDAT function, we cannot use a
1408
// private alias. If we did, we would create reference to a local label in
1409
// this function's section. If this version of the function isn't selected by
1410
// the linker, then the metadata would introduce a reference to a discarded
1411
// section. So, for COMDAT functions, we need to adjust the linkage of the
1412
// alias. Using hidden visibility avoids a dynamic relocation and an entry in
1413
// the dynamic symbol table.
1414
//
1415
// Note that this handles COMDAT functions with visibility other than Hidden,
1416
// since that case is covered in shouldUsePublicSymbol()
1417
if (Fn->hasComdat()) {
1418
GA->setLinkage(Fn->getLinkage());
1419
GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility);
1420
}
1421
1422
// appendToCompilerUsed(*Fn->getParent(), {GA});
1423
1424
return GA;
1425
}
1426
1427
static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
1428
// compiler-rt uses linker support to get data/counters/name start/end for
1429
// ELF, COFF, Mach-O and XCOFF.
1430
if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1431
TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF())
1432
return false;
1433
1434
return true;
1435
}
1436
1437
void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,
1438
StringRef CounterGroupName) {
1439
// Place lowered global variables in a comdat group if the associated function
1440
// or global variable is a COMDAT. This will make sure that only one copy of
1441
// global variable (e.g. function counters) of the COMDAT function will be
1442
// emitted after linking.
1443
bool NeedComdat = needsComdatForCounter(*GO, M);
1444
bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1445
1446
if (!UseComdat)
1447
return;
1448
1449
// Keep in mind that this pass may run before the inliner, so we need to
1450
// create a new comdat group (for counters, profiling data, etc). If we use
1451
// the comdat of the parent function, that will result in relocations against
1452
// discarded sections.
1453
//
1454
// If the data variable is referenced by code, non-counter variables (notably
1455
// profiling data) and counters have to be in different comdats for COFF
1456
// because the Visual C++ linker will report duplicate symbol errors if there
1457
// are multiple external symbols with the same name marked
1458
// IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1459
StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
1460
? GV->getName()
1461
: CounterGroupName;
1462
Comdat *C = M.getOrInsertComdat(GroupName);
1463
1464
if (!NeedComdat) {
1465
// Object file format must be ELF since `UseComdat && !NeedComdat` is true.
1466
//
1467
// For ELF, when not using COMDAT, put counters, data and values into a
1468
// nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1469
// allows -z start-stop-gc to discard the entire group when the function is
1470
// discarded.
1471
C->setSelectionKind(Comdat::NoDeduplicate);
1472
}
1473
GV->setComdat(C);
1474
// COFF doesn't allow the comdat group leader to have private linkage, so
1475
// upgrade private linkage to internal linkage to produce a symbol table
1476
// entry.
1477
if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1478
GV->setLinkage(GlobalValue::InternalLinkage);
1479
}
1480
1481
static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {
1482
if (!profDataReferencedByCode(*GV->getParent()))
1483
return false;
1484
1485
if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
1486
!GV->hasAvailableExternallyLinkage())
1487
return true;
1488
1489
// This avoids the profile data from referencing internal symbols in
1490
// COMDAT.
1491
if (GV->hasLocalLinkage() && GV->hasComdat())
1492
return false;
1493
1494
return true;
1495
}
1496
1497
// FIXME: Introduce an internal alias like what's done for functions to reduce
1498
// the number of relocation entries.
1499
static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
1500
auto *Int8PtrTy = PointerType::getUnqual(GV->getContext());
1501
1502
// Store a nullptr in __profvt_ if a real address shouldn't be used.
1503
if (!shouldRecordVTableAddr(GV))
1504
return ConstantPointerNull::get(Int8PtrTy);
1505
1506
return ConstantExpr::getBitCast(GV, Int8PtrTy);
1507
}
1508
1509
void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
1510
assert(!DebugInfoCorrelate &&
1511
"Value profiling is not supported with lightweight instrumentation");
1512
if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
1513
return;
1514
1515
// Skip llvm internal global variable or __prof variables.
1516
if (GV->getName().starts_with("llvm.") ||
1517
GV->getName().starts_with("__llvm") ||
1518
GV->getName().starts_with("__prof"))
1519
return;
1520
1521
// VTableProfData already created
1522
auto It = VTableDataMap.find(GV);
1523
if (It != VTableDataMap.end() && It->second)
1524
return;
1525
1526
GlobalValue::LinkageTypes Linkage = GV->getLinkage();
1527
GlobalValue::VisibilityTypes Visibility = GV->getVisibility();
1528
1529
// This is to keep consistent with per-function profile data
1530
// for correctness.
1531
if (TT.isOSBinFormatXCOFF()) {
1532
Linkage = GlobalValue::InternalLinkage;
1533
Visibility = GlobalValue::DefaultVisibility;
1534
}
1535
1536
LLVMContext &Ctx = M.getContext();
1537
Type *DataTypes[] = {
1538
#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
1539
#include "llvm/ProfileData/InstrProfData.inc"
1540
#undef INSTR_PROF_VTABLE_DATA
1541
};
1542
1543
auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
1544
1545
// Used by INSTR_PROF_VTABLE_DATA MACRO
1546
Constant *VTableAddr = getVTableAddrForProfData(GV);
1547
const std::string PGOVTableName = getPGOName(*GV);
1548
// Record the length of the vtable. This is needed since vtable pointers
1549
// loaded from C++ objects might be from the middle of a vtable definition.
1550
uint32_t VTableSizeVal =
1551
M.getDataLayout().getTypeAllocSize(GV->getValueType());
1552
1553
Constant *DataVals[] = {
1554
#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
1555
#include "llvm/ProfileData/InstrProfData.inc"
1556
#undef INSTR_PROF_VTABLE_DATA
1557
};
1558
1559
auto *Data =
1560
new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,
1561
ConstantStruct::get(DataTy, DataVals),
1562
getInstrProfVTableVarPrefix() + PGOVTableName);
1563
1564
Data->setVisibility(Visibility);
1565
Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat()));
1566
Data->setAlignment(Align(8));
1567
1568
maybeSetComdat(Data, GV, Data->getName());
1569
1570
VTableDataMap[GV] = Data;
1571
1572
ReferencedVTables.push_back(GV);
1573
1574
// VTable <Hash, Addr> is used by runtime but not referenced by other
1575
// sections. Conservatively mark it linker retained.
1576
UsedVars.push_back(Data);
1577
}
1578
1579
GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1580
InstrProfSectKind IPSK) {
1581
GlobalVariable *NamePtr = Inc->getName();
1582
1583
// Match the linkage and visibility of the name global.
1584
Function *Fn = Inc->getParent()->getParent();
1585
GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1586
GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1587
1588
// Use internal rather than private linkage so the counter variable shows up
1589
// in the symbol table when using debug info for correlation.
1590
if ((DebugInfoCorrelate ||
1591
ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) &&
1592
TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1593
Linkage = GlobalValue::InternalLinkage;
1594
1595
// Due to the limitation of binder as of 2021/09/28, the duplicate weak
1596
// symbols in the same csect won't be discarded. When there are duplicate weak
1597
// symbols, we can NOT guarantee that the relocations get resolved to the
1598
// intended weak symbol, so we can not ensure the correctness of the relative
1599
// CounterPtr, so we have to use private linkage for counter and data symbols.
1600
if (TT.isOSBinFormatXCOFF()) {
1601
Linkage = GlobalValue::PrivateLinkage;
1602
Visibility = GlobalValue::DefaultVisibility;
1603
}
1604
// Move the name variable to the right section.
1605
bool Renamed;
1606
GlobalVariable *Ptr;
1607
StringRef VarPrefix;
1608
std::string VarName;
1609
if (IPSK == IPSK_cnts) {
1610
VarPrefix = getInstrProfCountersVarPrefix();
1611
VarName = getVarName(Inc, VarPrefix, Renamed);
1612
InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc);
1613
Ptr = createRegionCounters(CntrIncrement, VarName, Linkage);
1614
} else if (IPSK == IPSK_bitmap) {
1615
VarPrefix = getInstrProfBitmapVarPrefix();
1616
VarName = getVarName(Inc, VarPrefix, Renamed);
1617
InstrProfMCDCBitmapInstBase *BitmapUpdate =
1618
dyn_cast<InstrProfMCDCBitmapInstBase>(Inc);
1619
Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage);
1620
} else {
1621
llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1622
}
1623
1624
Ptr->setVisibility(Visibility);
1625
// Put the counters and bitmaps in their own sections so linkers can
1626
// remove unneeded sections.
1627
Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat()));
1628
Ptr->setLinkage(Linkage);
1629
maybeSetComdat(Ptr, Fn, VarName);
1630
return Ptr;
1631
}
1632
1633
GlobalVariable *
1634
InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1635
StringRef Name,
1636
GlobalValue::LinkageTypes Linkage) {
1637
uint64_t NumBytes = Inc->getNumBitmapBytes();
1638
auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes);
1639
auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1640
Constant::getNullValue(BitmapTy), Name);
1641
GV->setAlignment(Align(1));
1642
return GV;
1643
}
1644
1645
GlobalVariable *
1646
InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1647
GlobalVariable *NamePtr = Inc->getName();
1648
auto &PD = ProfileDataMap[NamePtr];
1649
if (PD.RegionBitmaps)
1650
return PD.RegionBitmaps;
1651
1652
// If RegionBitmaps doesn't already exist, create it by first setting up
1653
// the corresponding profile section.
1654
auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap);
1655
PD.RegionBitmaps = BitmapPtr;
1656
PD.NumBitmapBytes = Inc->getNumBitmapBytes();
1657
return PD.RegionBitmaps;
1658
}
1659
1660
GlobalVariable *
1661
InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1662
GlobalValue::LinkageTypes Linkage) {
1663
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1664
auto &Ctx = M.getContext();
1665
GlobalVariable *GV;
1666
if (isa<InstrProfCoverInst>(Inc)) {
1667
auto *CounterTy = Type::getInt8Ty(Ctx);
1668
auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
1669
// TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1670
std::vector<Constant *> InitialValues(NumCounters,
1671
Constant::getAllOnesValue(CounterTy));
1672
GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1673
ConstantArray::get(CounterArrTy, InitialValues),
1674
Name);
1675
GV->setAlignment(Align(1));
1676
} else {
1677
auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
1678
GV = new GlobalVariable(M, CounterTy, false, Linkage,
1679
Constant::getNullValue(CounterTy), Name);
1680
GV->setAlignment(Align(8));
1681
}
1682
return GV;
1683
}
1684
1685
GlobalVariable *
1686
InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1687
GlobalVariable *NamePtr = Inc->getName();
1688
auto &PD = ProfileDataMap[NamePtr];
1689
if (PD.RegionCounters)
1690
return PD.RegionCounters;
1691
1692
// If RegionCounters doesn't already exist, create it by first setting up
1693
// the corresponding profile section.
1694
auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);
1695
PD.RegionCounters = CounterPtr;
1696
1697
if (DebugInfoCorrelate ||
1698
ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
1699
LLVMContext &Ctx = M.getContext();
1700
Function *Fn = Inc->getParent()->getParent();
1701
if (auto *SP = Fn->getSubprogram()) {
1702
DIBuilder DB(M, true, SP->getUnit());
1703
Metadata *FunctionNameAnnotation[] = {
1704
MDString::get(Ctx, InstrProfCorrelator::FunctionNameAttributeName),
1705
MDString::get(Ctx, getPGOFuncNameVarInitializer(NamePtr)),
1706
};
1707
Metadata *CFGHashAnnotation[] = {
1708
MDString::get(Ctx, InstrProfCorrelator::CFGHashAttributeName),
1709
ConstantAsMetadata::get(Inc->getHash()),
1710
};
1711
Metadata *NumCountersAnnotation[] = {
1712
MDString::get(Ctx, InstrProfCorrelator::NumCountersAttributeName),
1713
ConstantAsMetadata::get(Inc->getNumCounters()),
1714
};
1715
auto Annotations = DB.getOrCreateArray({
1716
MDNode::get(Ctx, FunctionNameAnnotation),
1717
MDNode::get(Ctx, CFGHashAnnotation),
1718
MDNode::get(Ctx, NumCountersAnnotation),
1719
});
1720
auto *DICounter = DB.createGlobalVariableExpression(
1721
SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),
1722
/*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"),
1723
CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,
1724
/*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1725
Annotations);
1726
CounterPtr->addDebugInfo(DICounter);
1727
DB.finalize();
1728
}
1729
1730
// Mark the counter variable as used so that it isn't optimized out.
1731
CompilerUsedVars.push_back(PD.RegionCounters);
1732
}
1733
1734
// Create the data variable (if it doesn't already exist).
1735
createDataVariable(Inc);
1736
1737
return PD.RegionCounters;
1738
}
1739
1740
void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
1741
// When debug information is correlated to profile data, a data variable
1742
// is not needed.
1743
if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
1744
return;
1745
1746
GlobalVariable *NamePtr = Inc->getName();
1747
auto &PD = ProfileDataMap[NamePtr];
1748
1749
// Return if data variable was already created.
1750
if (PD.DataVar)
1751
return;
1752
1753
LLVMContext &Ctx = M.getContext();
1754
1755
Function *Fn = Inc->getParent()->getParent();
1756
GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1757
GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1758
1759
// Due to the limitation of binder as of 2021/09/28, the duplicate weak
1760
// symbols in the same csect won't be discarded. When there are duplicate weak
1761
// symbols, we can NOT guarantee that the relocations get resolved to the
1762
// intended weak symbol, so we can not ensure the correctness of the relative
1763
// CounterPtr, so we have to use private linkage for counter and data symbols.
1764
if (TT.isOSBinFormatXCOFF()) {
1765
Linkage = GlobalValue::PrivateLinkage;
1766
Visibility = GlobalValue::DefaultVisibility;
1767
}
1768
1769
bool NeedComdat = needsComdatForCounter(*Fn, M);
1770
bool Renamed;
1771
1772
// The Data Variable section is anchored to profile counters.
1773
std::string CntsVarName =
1774
getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed);
1775
std::string DataVarName =
1776
getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
1777
1778
auto *Int8PtrTy = PointerType::getUnqual(Ctx);
1779
// Allocate statically the array of pointers to value profile nodes for
1780
// the current function.
1781
Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
1782
uint64_t NS = 0;
1783
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1784
NS += PD.NumValueSites[Kind];
1785
if (NS > 0 && ValueProfileStaticAlloc &&
1786
!needsRuntimeRegistrationOfSectionRange(TT)) {
1787
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
1788
auto *ValuesVar = new GlobalVariable(
1789
M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
1790
getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
1791
ValuesVar->setVisibility(Visibility);
1792
setGlobalVariableLargeSection(TT, *ValuesVar);
1793
ValuesVar->setSection(
1794
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
1795
ValuesVar->setAlignment(Align(8));
1796
maybeSetComdat(ValuesVar, Fn, CntsVarName);
1797
ValuesPtrExpr = ValuesVar;
1798
}
1799
1800
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1801
auto *CounterPtr = PD.RegionCounters;
1802
1803
uint64_t NumBitmapBytes = PD.NumBitmapBytes;
1804
1805
// Create data variable.
1806
auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
1807
auto *Int16Ty = Type::getInt16Ty(Ctx);
1808
auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
1809
Type *DataTypes[] = {
1810
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
1811
#include "llvm/ProfileData/InstrProfData.inc"
1812
};
1813
auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
1814
1815
Constant *FunctionAddr = getFuncAddrForProfData(Fn);
1816
1817
Constant *Int16ArrayVals[IPVK_Last + 1];
1818
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1819
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
1820
1821
// If the data variable is not referenced by code (if we don't emit
1822
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
1823
// data variable live under linker GC, the data variable can be private. This
1824
// optimization applies to ELF.
1825
//
1826
// On COFF, a comdat leader cannot be local so we require DataReferencedByCode
1827
// to be false.
1828
//
1829
// If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
1830
// that other copies must have the same CFG and cannot have value profiling.
1831
// If no hash suffix, other profd copies may be referenced by code.
1832
if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1833
(TT.isOSBinFormatELF() ||
1834
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1835
Linkage = GlobalValue::PrivateLinkage;
1836
Visibility = GlobalValue::DefaultVisibility;
1837
}
1838
auto *Data =
1839
new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
1840
Constant *RelativeCounterPtr;
1841
GlobalVariable *BitmapPtr = PD.RegionBitmaps;
1842
Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0);
1843
InstrProfSectKind DataSectionKind;
1844
// With binary profile correlation, profile data is not loaded into memory.
1845
// profile data must reference profile counter with an absolute relocation.
1846
if (ProfileCorrelate == InstrProfCorrelator::BINARY) {
1847
DataSectionKind = IPSK_covdata;
1848
RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
1849
if (BitmapPtr != nullptr)
1850
RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);
1851
} else {
1852
// Reference the counter variable with a label difference (link-time
1853
// constant).
1854
DataSectionKind = IPSK_data;
1855
RelativeCounterPtr =
1856
ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
1857
ConstantExpr::getPtrToInt(Data, IntPtrTy));
1858
if (BitmapPtr != nullptr)
1859
RelativeBitmapPtr =
1860
ConstantExpr::getSub(ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy),
1861
ConstantExpr::getPtrToInt(Data, IntPtrTy));
1862
}
1863
1864
Constant *DataVals[] = {
1865
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
1866
#include "llvm/ProfileData/InstrProfData.inc"
1867
};
1868
Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
1869
1870
Data->setVisibility(Visibility);
1871
Data->setSection(
1872
getInstrProfSectionName(DataSectionKind, TT.getObjectFormat()));
1873
Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1874
maybeSetComdat(Data, Fn, CntsVarName);
1875
1876
PD.DataVar = Data;
1877
1878
// Mark the data variable as used so that it isn't stripped out.
1879
CompilerUsedVars.push_back(Data);
1880
// Now that the linkage set by the FE has been passed to the data and counter
1881
// variables, reset Name variable's linkage and visibility to private so that
1882
// it can be removed later by the compiler.
1883
NamePtr->setLinkage(GlobalValue::PrivateLinkage);
1884
// Collect the referenced names to be used by emitNameData.
1885
ReferencedNames.push_back(NamePtr);
1886
}
1887
1888
void InstrLowerer::emitVNodes() {
1889
if (!ValueProfileStaticAlloc)
1890
return;
1891
1892
// For now only support this on platforms that do
1893
// not require runtime registration to discover
1894
// named section start/end.
1895
if (needsRuntimeRegistrationOfSectionRange(TT))
1896
return;
1897
1898
size_t TotalNS = 0;
1899
for (auto &PD : ProfileDataMap) {
1900
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1901
TotalNS += PD.second.NumValueSites[Kind];
1902
}
1903
1904
if (!TotalNS)
1905
return;
1906
1907
uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
1908
// Heuristic for small programs with very few total value sites.
1909
// The default value of vp-counters-per-site is chosen based on
1910
// the observation that large apps usually have a low percentage
1911
// of value sites that actually have any profile data, and thus
1912
// the average number of counters per site is low. For small
1913
// apps with very few sites, this may not be true. Bump up the
1914
// number of counters in this case.
1915
#define INSTR_PROF_MIN_VAL_COUNTS 10
1916
if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
1917
NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
1918
1919
auto &Ctx = M.getContext();
1920
Type *VNodeTypes[] = {
1921
#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
1922
#include "llvm/ProfileData/InstrProfData.inc"
1923
};
1924
auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes));
1925
1926
ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
1927
auto *VNodesVar = new GlobalVariable(
1928
M, VNodesTy, false, GlobalValue::PrivateLinkage,
1929
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
1930
setGlobalVariableLargeSection(TT, *VNodesVar);
1931
VNodesVar->setSection(
1932
getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
1933
VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy));
1934
// VNodesVar is used by runtime but not referenced via relocation by other
1935
// sections. Conservatively make it linker retained.
1936
UsedVars.push_back(VNodesVar);
1937
}
1938
1939
void InstrLowerer::emitNameData() {
1940
std::string UncompressedData;
1941
1942
if (ReferencedNames.empty())
1943
return;
1944
1945
std::string CompressedNameStr;
1946
if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
1947
DoInstrProfNameCompression)) {
1948
report_fatal_error(Twine(toString(std::move(E))), false);
1949
}
1950
1951
auto &Ctx = M.getContext();
1952
auto *NamesVal =
1953
ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
1954
NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
1955
GlobalValue::PrivateLinkage, NamesVal,
1956
getInstrProfNamesVarName());
1957
NamesSize = CompressedNameStr.size();
1958
setGlobalVariableLargeSection(TT, *NamesVar);
1959
NamesVar->setSection(
1960
ProfileCorrelate == InstrProfCorrelator::BINARY
1961
? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat())
1962
: getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
1963
// On COFF, it's important to reduce the alignment down to 1 to prevent the
1964
// linker from inserting padding before the start of the names section or
1965
// between names entries.
1966
NamesVar->setAlignment(Align(1));
1967
// NamesVar is used by runtime but not referenced via relocation by other
1968
// sections. Conservatively make it linker retained.
1969
UsedVars.push_back(NamesVar);
1970
1971
for (auto *NamePtr : ReferencedNames)
1972
NamePtr->eraseFromParent();
1973
}
1974
1975
void InstrLowerer::emitVTableNames() {
1976
if (!EnableVTableValueProfiling || ReferencedVTables.empty())
1977
return;
1978
1979
// Collect the PGO names of referenced vtables and compress them.
1980
std::string CompressedVTableNames;
1981
if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames,
1982
DoInstrProfNameCompression)) {
1983
report_fatal_error(Twine(toString(std::move(E))), false);
1984
}
1985
1986
auto &Ctx = M.getContext();
1987
auto *VTableNamesVal = ConstantDataArray::getString(
1988
Ctx, StringRef(CompressedVTableNames), false /* AddNull */);
1989
GlobalVariable *VTableNamesVar =
1990
new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
1991
GlobalValue::PrivateLinkage, VTableNamesVal,
1992
getInstrProfVTableNamesVarName());
1993
VTableNamesVar->setSection(
1994
getInstrProfSectionName(IPSK_vname, TT.getObjectFormat()));
1995
VTableNamesVar->setAlignment(Align(1));
1996
// Make VTableNames linker retained.
1997
UsedVars.push_back(VTableNamesVar);
1998
}
1999
2000
void InstrLowerer::emitRegistration() {
2001
if (!needsRuntimeRegistrationOfSectionRange(TT))
2002
return;
2003
2004
// Construct the function.
2005
auto *VoidTy = Type::getVoidTy(M.getContext());
2006
auto *VoidPtrTy = PointerType::getUnqual(M.getContext());
2007
auto *Int64Ty = Type::getInt64Ty(M.getContext());
2008
auto *RegisterFTy = FunctionType::get(VoidTy, false);
2009
auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
2010
getInstrProfRegFuncsName(), M);
2011
RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2012
if (Options.NoRedZone)
2013
RegisterF->addFnAttr(Attribute::NoRedZone);
2014
2015
auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
2016
auto *RuntimeRegisterF =
2017
Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
2018
getInstrProfRegFuncName(), M);
2019
2020
IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
2021
for (Value *Data : CompilerUsedVars)
2022
if (!isa<Function>(Data))
2023
IRB.CreateCall(RuntimeRegisterF, Data);
2024
for (Value *Data : UsedVars)
2025
if (Data != NamesVar && !isa<Function>(Data))
2026
IRB.CreateCall(RuntimeRegisterF, Data);
2027
2028
if (NamesVar) {
2029
Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
2030
auto *NamesRegisterTy =
2031
FunctionType::get(VoidTy, ArrayRef(ParamTypes), false);
2032
auto *NamesRegisterF =
2033
Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
2034
getInstrProfNamesRegFuncName(), M);
2035
IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
2036
}
2037
2038
IRB.CreateRetVoid();
2039
}
2040
2041
bool InstrLowerer::emitRuntimeHook() {
2042
// We expect the linker to be invoked with -u<hook_var> flag for Linux
2043
// in which case there is no need to emit the external variable.
2044
if (TT.isOSLinux() || TT.isOSAIX())
2045
return false;
2046
2047
// If the module's provided its own runtime, we don't need to do anything.
2048
if (M.getGlobalVariable(getInstrProfRuntimeHookVarName()))
2049
return false;
2050
2051
// Declare an external variable that will pull in the runtime initialization.
2052
auto *Int32Ty = Type::getInt32Ty(M.getContext());
2053
auto *Var =
2054
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
2055
nullptr, getInstrProfRuntimeHookVarName());
2056
Var->setVisibility(GlobalValue::HiddenVisibility);
2057
2058
if (TT.isOSBinFormatELF() && !TT.isPS()) {
2059
// Mark the user variable as used so that it isn't stripped out.
2060
CompilerUsedVars.push_back(Var);
2061
} else {
2062
// Make a function that uses it.
2063
auto *User = Function::Create(FunctionType::get(Int32Ty, false),
2064
GlobalValue::LinkOnceODRLinkage,
2065
getInstrProfRuntimeHookVarUseFuncName(), M);
2066
User->addFnAttr(Attribute::NoInline);
2067
if (Options.NoRedZone)
2068
User->addFnAttr(Attribute::NoRedZone);
2069
User->setVisibility(GlobalValue::HiddenVisibility);
2070
if (TT.supportsCOMDAT())
2071
User->setComdat(M.getOrInsertComdat(User->getName()));
2072
2073
IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User));
2074
auto *Load = IRB.CreateLoad(Int32Ty, Var);
2075
IRB.CreateRet(Load);
2076
2077
// Mark the function as used so that it isn't stripped out.
2078
CompilerUsedVars.push_back(User);
2079
}
2080
return true;
2081
}
2082
2083
void InstrLowerer::emitUses() {
2084
// The metadata sections are parallel arrays. Optimizers (e.g.
2085
// GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
2086
// we conservatively retain all unconditionally in the compiler.
2087
//
2088
// On ELF and Mach-O, the linker can guarantee the associated sections will be
2089
// retained or discarded as a unit, so llvm.compiler.used is sufficient.
2090
// Similarly on COFF, if prof data is not referenced by code we use one comdat
2091
// and ensure this GC property as well. Otherwise, we have to conservatively
2092
// make all of the sections retained by the linker.
2093
if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
2094
(TT.isOSBinFormatCOFF() && !DataReferencedByCode))
2095
appendToCompilerUsed(M, CompilerUsedVars);
2096
else
2097
appendToUsed(M, CompilerUsedVars);
2098
2099
// We do not add proper references from used metadata sections to NamesVar and
2100
// VNodesVar, so we have to be conservative and place them in llvm.used
2101
// regardless of the target,
2102
appendToUsed(M, UsedVars);
2103
}
2104
2105
void InstrLowerer::emitInitialization() {
2106
// Create ProfileFileName variable. Don't don't this for the
2107
// context-sensitive instrumentation lowering: This lowering is after
2108
// LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
2109
// have already create the variable before LTO/ThinLTO linking.
2110
if (!IsCS)
2111
createProfileFileNameVar(M, Options.InstrProfileOutput);
2112
Function *RegisterF = M.getFunction(getInstrProfRegFuncsName());
2113
if (!RegisterF)
2114
return;
2115
2116
// Create the initialization function.
2117
auto *VoidTy = Type::getVoidTy(M.getContext());
2118
auto *F = Function::Create(FunctionType::get(VoidTy, false),
2119
GlobalValue::InternalLinkage,
2120
getInstrProfInitFuncName(), M);
2121
F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2122
F->addFnAttr(Attribute::NoInline);
2123
if (Options.NoRedZone)
2124
F->addFnAttr(Attribute::NoRedZone);
2125
2126
// Add the basic block and the necessary calls.
2127
IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F));
2128
IRB.CreateCall(RegisterF, {});
2129
IRB.CreateRetVoid();
2130
2131
appendToGlobalCtors(M, F, 0);
2132
}
2133
2134
namespace llvm {
2135
// Create the variable for profile sampling.
2136
void createProfileSamplingVar(Module &M) {
2137
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
2138
IntegerType *SamplingVarTy;
2139
Constant *ValueZero;
2140
if (SampledInstrPeriod.getValue() <= USHRT_MAX) {
2141
SamplingVarTy = Type::getInt16Ty(M.getContext());
2142
ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0));
2143
} else {
2144
SamplingVarTy = Type::getInt32Ty(M.getContext());
2145
ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(32, 0));
2146
}
2147
auto SamplingVar = new GlobalVariable(
2148
M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2149
SamplingVar->setVisibility(GlobalValue::DefaultVisibility);
2150
SamplingVar->setThreadLocal(true);
2151
Triple TT(M.getTargetTriple());
2152
if (TT.supportsCOMDAT()) {
2153
SamplingVar->setLinkage(GlobalValue::ExternalLinkage);
2154
SamplingVar->setComdat(M.getOrInsertComdat(VarName));
2155
}
2156
appendToCompilerUsed(M, SamplingVar);
2157
}
2158
} // namespace llvm
2159
2160