Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
35266 views
1
//===- MemProfiler.cpp - memory allocation and access profiler ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file is a part of MemProfiler. Memory accesses are instrumented
10
// to increment the access count held in a shadow memory location, or
11
// alternatively to call into the runtime. Memory intrinsic calls (memmove,
12
// memcpy, memset) are changed to call the memory profiling runtime version
13
// instead.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
18
#include "llvm/ADT/SmallVector.h"
19
#include "llvm/ADT/Statistic.h"
20
#include "llvm/ADT/StringRef.h"
21
#include "llvm/Analysis/MemoryBuiltins.h"
22
#include "llvm/Analysis/MemoryProfileInfo.h"
23
#include "llvm/Analysis/ValueTracking.h"
24
#include "llvm/IR/Constant.h"
25
#include "llvm/IR/DataLayout.h"
26
#include "llvm/IR/DiagnosticInfo.h"
27
#include "llvm/IR/Function.h"
28
#include "llvm/IR/GlobalValue.h"
29
#include "llvm/IR/IRBuilder.h"
30
#include "llvm/IR/Instruction.h"
31
#include "llvm/IR/IntrinsicInst.h"
32
#include "llvm/IR/Module.h"
33
#include "llvm/IR/Type.h"
34
#include "llvm/IR/Value.h"
35
#include "llvm/ProfileData/InstrProf.h"
36
#include "llvm/ProfileData/InstrProfReader.h"
37
#include "llvm/Support/BLAKE3.h"
38
#include "llvm/Support/CommandLine.h"
39
#include "llvm/Support/Debug.h"
40
#include "llvm/Support/HashBuilder.h"
41
#include "llvm/Support/VirtualFileSystem.h"
42
#include "llvm/TargetParser/Triple.h"
43
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
44
#include "llvm/Transforms/Utils/ModuleUtils.h"
45
#include <map>
46
#include <set>
47
48
using namespace llvm;
49
using namespace llvm::memprof;
50
51
#define DEBUG_TYPE "memprof"
52
53
namespace llvm {
54
extern cl::opt<bool> PGOWarnMissing;
55
extern cl::opt<bool> NoPGOWarnMismatch;
56
extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
57
} // namespace llvm
58
59
constexpr int LLVM_MEM_PROFILER_VERSION = 1;
60
61
// Size of memory mapped to a single shadow location.
62
constexpr uint64_t DefaultMemGranularity = 64;
63
64
// Scale from granularity down to shadow size.
65
constexpr uint64_t DefaultShadowScale = 3;
66
67
constexpr char MemProfModuleCtorName[] = "memprof.module_ctor";
68
constexpr uint64_t MemProfCtorAndDtorPriority = 1;
69
// On Emscripten, the system needs more than one priorities for constructors.
70
constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50;
71
constexpr char MemProfInitName[] = "__memprof_init";
72
constexpr char MemProfVersionCheckNamePrefix[] =
73
"__memprof_version_mismatch_check_v";
74
75
constexpr char MemProfShadowMemoryDynamicAddress[] =
76
"__memprof_shadow_memory_dynamic_address";
77
78
constexpr char MemProfFilenameVar[] = "__memprof_profile_filename";
79
80
constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram";
81
82
// Command-line flags.
83
84
static cl::opt<bool> ClInsertVersionCheck(
85
"memprof-guard-against-version-mismatch",
86
cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden,
87
cl::init(true));
88
89
// This flag may need to be replaced with -f[no-]memprof-reads.
90
static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads",
91
cl::desc("instrument read instructions"),
92
cl::Hidden, cl::init(true));
93
94
static cl::opt<bool>
95
ClInstrumentWrites("memprof-instrument-writes",
96
cl::desc("instrument write instructions"), cl::Hidden,
97
cl::init(true));
98
99
static cl::opt<bool> ClInstrumentAtomics(
100
"memprof-instrument-atomics",
101
cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
102
cl::init(true));
103
104
static cl::opt<bool> ClUseCalls(
105
"memprof-use-callbacks",
106
cl::desc("Use callbacks instead of inline instrumentation sequences."),
107
cl::Hidden, cl::init(false));
108
109
static cl::opt<std::string>
110
ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix",
111
cl::desc("Prefix for memory access callbacks"),
112
cl::Hidden, cl::init("__memprof_"));
113
114
// These flags allow to change the shadow mapping.
115
// The shadow mapping looks like
116
// Shadow = ((Mem & mask) >> scale) + offset
117
118
static cl::opt<int> ClMappingScale("memprof-mapping-scale",
119
cl::desc("scale of memprof shadow mapping"),
120
cl::Hidden, cl::init(DefaultShadowScale));
121
122
static cl::opt<int>
123
ClMappingGranularity("memprof-mapping-granularity",
124
cl::desc("granularity of memprof shadow mapping"),
125
cl::Hidden, cl::init(DefaultMemGranularity));
126
127
static cl::opt<bool> ClStack("memprof-instrument-stack",
128
cl::desc("Instrument scalar stack variables"),
129
cl::Hidden, cl::init(false));
130
131
// Debug flags.
132
133
static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden,
134
cl::init(0));
135
136
static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden,
137
cl::desc("Debug func"));
138
139
static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"),
140
cl::Hidden, cl::init(-1));
141
142
static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
143
cl::Hidden, cl::init(-1));
144
145
// By default disable matching of allocation profiles onto operator new that
146
// already explicitly pass a hot/cold hint, since we don't currently
147
// override these hints anyway.
148
static cl::opt<bool> ClMemProfMatchHotColdNew(
149
"memprof-match-hot-cold-new",
150
cl::desc(
151
"Match allocation profiles onto existing hot/cold operator new calls"),
152
cl::Hidden, cl::init(false));
153
154
static cl::opt<bool> ClHistogram("memprof-histogram",
155
cl::desc("Collect access count histograms"),
156
cl::Hidden, cl::init(false));
157
158
static cl::opt<bool>
159
ClPrintMemProfMatchInfo("memprof-print-match-info",
160
cl::desc("Print matching stats for each allocation "
161
"context in this module's profiles"),
162
cl::Hidden, cl::init(false));
163
164
extern cl::opt<bool> MemProfReportHintedSizes;
165
166
// Instrumentation statistics
167
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
168
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
169
STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
170
STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
171
172
// Matching statistics
173
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
174
STATISTIC(NumOfMemProfMismatch,
175
"Number of functions having mismatched memory profile hash.");
176
STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
177
STATISTIC(NumOfMemProfAllocContextProfiles,
178
"Number of alloc contexts in memory profile.");
179
STATISTIC(NumOfMemProfCallSiteProfiles,
180
"Number of callsites in memory profile.");
181
STATISTIC(NumOfMemProfMatchedAllocContexts,
182
"Number of matched memory profile alloc contexts.");
183
STATISTIC(NumOfMemProfMatchedAllocs,
184
"Number of matched memory profile allocs.");
185
STATISTIC(NumOfMemProfMatchedCallSites,
186
"Number of matched memory profile callsites.");
187
188
namespace {
189
190
/// This struct defines the shadow mapping using the rule:
191
/// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset.
192
struct ShadowMapping {
193
ShadowMapping() {
194
Scale = ClMappingScale;
195
Granularity = ClMappingGranularity;
196
Mask = ~(Granularity - 1);
197
}
198
199
int Scale;
200
int Granularity;
201
uint64_t Mask; // Computed as ~(Granularity-1)
202
};
203
204
static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) {
205
return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority
206
: MemProfCtorAndDtorPriority;
207
}
208
209
struct InterestingMemoryAccess {
210
Value *Addr = nullptr;
211
bool IsWrite;
212
Type *AccessTy;
213
Value *MaybeMask = nullptr;
214
};
215
216
/// Instrument the code in module to profile memory accesses.
217
class MemProfiler {
218
public:
219
MemProfiler(Module &M) {
220
C = &(M.getContext());
221
LongSize = M.getDataLayout().getPointerSizeInBits();
222
IntptrTy = Type::getIntNTy(*C, LongSize);
223
PtrTy = PointerType::getUnqual(*C);
224
}
225
226
/// If it is an interesting memory access, populate information
227
/// about the access and return a InterestingMemoryAccess struct.
228
/// Otherwise return std::nullopt.
229
std::optional<InterestingMemoryAccess>
230
isInterestingMemoryAccess(Instruction *I) const;
231
232
void instrumentMop(Instruction *I, const DataLayout &DL,
233
InterestingMemoryAccess &Access);
234
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
235
Value *Addr, bool IsWrite);
236
void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
237
Instruction *I, Value *Addr, Type *AccessTy,
238
bool IsWrite);
239
void instrumentMemIntrinsic(MemIntrinsic *MI);
240
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
241
bool instrumentFunction(Function &F);
242
bool maybeInsertMemProfInitAtFunctionEntry(Function &F);
243
bool insertDynamicShadowAtFunctionEntry(Function &F);
244
245
private:
246
void initializeCallbacks(Module &M);
247
248
LLVMContext *C;
249
int LongSize;
250
Type *IntptrTy;
251
PointerType *PtrTy;
252
ShadowMapping Mapping;
253
254
// These arrays is indexed by AccessIsWrite
255
FunctionCallee MemProfMemoryAccessCallback[2];
256
257
FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset;
258
Value *DynamicShadowOffset = nullptr;
259
};
260
261
class ModuleMemProfiler {
262
public:
263
ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); }
264
265
bool instrumentModule(Module &);
266
267
private:
268
Triple TargetTriple;
269
ShadowMapping Mapping;
270
Function *MemProfCtorFunction = nullptr;
271
};
272
273
} // end anonymous namespace
274
275
MemProfilerPass::MemProfilerPass() = default;
276
277
PreservedAnalyses MemProfilerPass::run(Function &F,
278
AnalysisManager<Function> &AM) {
279
Module &M = *F.getParent();
280
MemProfiler Profiler(M);
281
if (Profiler.instrumentFunction(F))
282
return PreservedAnalyses::none();
283
return PreservedAnalyses::all();
284
}
285
286
ModuleMemProfilerPass::ModuleMemProfilerPass() = default;
287
288
PreservedAnalyses ModuleMemProfilerPass::run(Module &M,
289
AnalysisManager<Module> &AM) {
290
291
assert((!ClHistogram || (ClHistogram && ClUseCalls)) &&
292
"Cannot use -memprof-histogram without Callbacks. Set "
293
"memprof-use-callbacks");
294
295
ModuleMemProfiler Profiler(M);
296
if (Profiler.instrumentModule(M))
297
return PreservedAnalyses::none();
298
return PreservedAnalyses::all();
299
}
300
301
Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
302
// (Shadow & mask) >> scale
303
Shadow = IRB.CreateAnd(Shadow, Mapping.Mask);
304
Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
305
// (Shadow >> scale) | offset
306
assert(DynamicShadowOffset);
307
return IRB.CreateAdd(Shadow, DynamicShadowOffset);
308
}
309
310
// Instrument memset/memmove/memcpy
311
void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) {
312
IRBuilder<> IRB(MI);
313
if (isa<MemTransferInst>(MI)) {
314
IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy,
315
{MI->getOperand(0), MI->getOperand(1),
316
IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
317
} else if (isa<MemSetInst>(MI)) {
318
IRB.CreateCall(
319
MemProfMemset,
320
{MI->getOperand(0),
321
IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
322
IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
323
}
324
MI->eraseFromParent();
325
}
326
327
std::optional<InterestingMemoryAccess>
328
MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
329
// Do not instrument the load fetching the dynamic shadow address.
330
if (DynamicShadowOffset == I)
331
return std::nullopt;
332
333
InterestingMemoryAccess Access;
334
335
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
336
if (!ClInstrumentReads)
337
return std::nullopt;
338
Access.IsWrite = false;
339
Access.AccessTy = LI->getType();
340
Access.Addr = LI->getPointerOperand();
341
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
342
if (!ClInstrumentWrites)
343
return std::nullopt;
344
Access.IsWrite = true;
345
Access.AccessTy = SI->getValueOperand()->getType();
346
Access.Addr = SI->getPointerOperand();
347
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
348
if (!ClInstrumentAtomics)
349
return std::nullopt;
350
Access.IsWrite = true;
351
Access.AccessTy = RMW->getValOperand()->getType();
352
Access.Addr = RMW->getPointerOperand();
353
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
354
if (!ClInstrumentAtomics)
355
return std::nullopt;
356
Access.IsWrite = true;
357
Access.AccessTy = XCHG->getCompareOperand()->getType();
358
Access.Addr = XCHG->getPointerOperand();
359
} else if (auto *CI = dyn_cast<CallInst>(I)) {
360
auto *F = CI->getCalledFunction();
361
if (F && (F->getIntrinsicID() == Intrinsic::masked_load ||
362
F->getIntrinsicID() == Intrinsic::masked_store)) {
363
unsigned OpOffset = 0;
364
if (F->getIntrinsicID() == Intrinsic::masked_store) {
365
if (!ClInstrumentWrites)
366
return std::nullopt;
367
// Masked store has an initial operand for the value.
368
OpOffset = 1;
369
Access.AccessTy = CI->getArgOperand(0)->getType();
370
Access.IsWrite = true;
371
} else {
372
if (!ClInstrumentReads)
373
return std::nullopt;
374
Access.AccessTy = CI->getType();
375
Access.IsWrite = false;
376
}
377
378
auto *BasePtr = CI->getOperand(0 + OpOffset);
379
Access.MaybeMask = CI->getOperand(2 + OpOffset);
380
Access.Addr = BasePtr;
381
}
382
}
383
384
if (!Access.Addr)
385
return std::nullopt;
386
387
// Do not instrument accesses from different address spaces; we cannot deal
388
// with them.
389
Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType());
390
if (PtrTy->getPointerAddressSpace() != 0)
391
return std::nullopt;
392
393
// Ignore swifterror addresses.
394
// swifterror memory addresses are mem2reg promoted by instruction
395
// selection. As such they cannot have regular uses like an instrumentation
396
// function and it makes no sense to track them as memory.
397
if (Access.Addr->isSwiftError())
398
return std::nullopt;
399
400
// Peel off GEPs and BitCasts.
401
auto *Addr = Access.Addr->stripInBoundsOffsets();
402
403
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
404
// Do not instrument PGO counter updates.
405
if (GV->hasSection()) {
406
StringRef SectionName = GV->getSection();
407
// Check if the global is in the PGO counters section.
408
auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat();
409
if (SectionName.ends_with(
410
getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
411
return std::nullopt;
412
}
413
414
// Do not instrument accesses to LLVM internal variables.
415
if (GV->getName().starts_with("__llvm"))
416
return std::nullopt;
417
}
418
419
return Access;
420
}
421
422
void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
423
Instruction *I, Value *Addr,
424
Type *AccessTy, bool IsWrite) {
425
auto *VTy = cast<FixedVectorType>(AccessTy);
426
unsigned Num = VTy->getNumElements();
427
auto *Zero = ConstantInt::get(IntptrTy, 0);
428
for (unsigned Idx = 0; Idx < Num; ++Idx) {
429
Value *InstrumentedAddress = nullptr;
430
Instruction *InsertBefore = I;
431
if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
432
// dyn_cast as we might get UndefValue
433
if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
434
if (Masked->isZero())
435
// Mask is constant false, so no instrumentation needed.
436
continue;
437
// If we have a true or undef value, fall through to instrumentAddress.
438
// with InsertBefore == I
439
}
440
} else {
441
IRBuilder<> IRB(I);
442
Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
443
Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
444
InsertBefore = ThenTerm;
445
}
446
447
IRBuilder<> IRB(InsertBefore);
448
InstrumentedAddress =
449
IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
450
instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite);
451
}
452
}
453
454
void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
455
InterestingMemoryAccess &Access) {
456
// Skip instrumentation of stack accesses unless requested.
457
if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) {
458
if (Access.IsWrite)
459
++NumSkippedStackWrites;
460
else
461
++NumSkippedStackReads;
462
return;
463
}
464
465
if (Access.IsWrite)
466
NumInstrumentedWrites++;
467
else
468
NumInstrumentedReads++;
469
470
if (Access.MaybeMask) {
471
instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,
472
Access.AccessTy, Access.IsWrite);
473
} else {
474
// Since the access counts will be accumulated across the entire allocation,
475
// we only update the shadow access count for the first location and thus
476
// don't need to worry about alignment and type size.
477
instrumentAddress(I, I, Access.Addr, Access.IsWrite);
478
}
479
}
480
481
void MemProfiler::instrumentAddress(Instruction *OrigIns,
482
Instruction *InsertBefore, Value *Addr,
483
bool IsWrite) {
484
IRBuilder<> IRB(InsertBefore);
485
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
486
487
if (ClUseCalls) {
488
IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong);
489
return;
490
}
491
492
// Create an inline sequence to compute shadow location, and increment the
493
// value by one.
494
Type *ShadowTy = Type::getInt64Ty(*C);
495
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
496
Value *ShadowPtr = memToShadow(AddrLong, IRB);
497
Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);
498
Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);
499
Value *Inc = ConstantInt::get(Type::getInt64Ty(*C), 1);
500
ShadowValue = IRB.CreateAdd(ShadowValue, Inc);
501
IRB.CreateStore(ShadowValue, ShadowAddr);
502
}
503
504
// Create the variable for the profile file name.
505
void createProfileFileNameVar(Module &M) {
506
const MDString *MemProfFilename =
507
dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename"));
508
if (!MemProfFilename)
509
return;
510
assert(!MemProfFilename->getString().empty() &&
511
"Unexpected MemProfProfileFilename metadata with empty string");
512
Constant *ProfileNameConst = ConstantDataArray::getString(
513
M.getContext(), MemProfFilename->getString(), true);
514
GlobalVariable *ProfileNameVar = new GlobalVariable(
515
M, ProfileNameConst->getType(), /*isConstant=*/true,
516
GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar);
517
Triple TT(M.getTargetTriple());
518
if (TT.supportsCOMDAT()) {
519
ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
520
ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar));
521
}
522
}
523
524
// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
525
// to the runtime, changing shadow count behavior.
526
void createMemprofHistogramFlagVar(Module &M) {
527
const StringRef VarName(MemProfHistogramFlagVar);
528
Type *IntTy1 = Type::getInt1Ty(M.getContext());
529
auto MemprofHistogramFlag = new GlobalVariable(
530
M, IntTy1, true, GlobalValue::WeakAnyLinkage,
531
Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName);
532
Triple TT(M.getTargetTriple());
533
if (TT.supportsCOMDAT()) {
534
MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage);
535
MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName));
536
}
537
appendToCompilerUsed(M, MemprofHistogramFlag);
538
}
539
540
bool ModuleMemProfiler::instrumentModule(Module &M) {
541
542
// Create a module constructor.
543
std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION);
544
std::string VersionCheckName =
545
ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion)
546
: "";
547
std::tie(MemProfCtorFunction, std::ignore) =
548
createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName,
549
MemProfInitName, /*InitArgTypes=*/{},
550
/*InitArgs=*/{}, VersionCheckName);
551
552
const uint64_t Priority = getCtorAndDtorPriority(TargetTriple);
553
appendToGlobalCtors(M, MemProfCtorFunction, Priority);
554
555
createProfileFileNameVar(M);
556
557
createMemprofHistogramFlagVar(M);
558
559
return true;
560
}
561
562
void MemProfiler::initializeCallbacks(Module &M) {
563
IRBuilder<> IRB(*C);
564
565
for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
566
const std::string TypeStr = AccessIsWrite ? "store" : "load";
567
const std::string HistPrefix = ClHistogram ? "hist_" : "";
568
569
SmallVector<Type *, 2> Args1{1, IntptrTy};
570
MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction(
571
ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr,
572
FunctionType::get(IRB.getVoidTy(), Args1, false));
573
}
574
MemProfMemmove = M.getOrInsertFunction(
575
ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
576
MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy",
577
PtrTy, PtrTy, PtrTy, IntptrTy);
578
MemProfMemset =
579
M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy,
580
PtrTy, IRB.getInt32Ty(), IntptrTy);
581
}
582
583
bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) {
584
// For each NSObject descendant having a +load method, this method is invoked
585
// by the ObjC runtime before any of the static constructors is called.
586
// Therefore we need to instrument such methods with a call to __memprof_init
587
// at the beginning in order to initialize our runtime before any access to
588
// the shadow memory.
589
// We cannot just ignore these methods, because they may call other
590
// instrumented functions.
591
if (F.getName().contains(" load]")) {
592
FunctionCallee MemProfInitFunction =
593
declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {});
594
IRBuilder<> IRB(&F.front(), F.front().begin());
595
IRB.CreateCall(MemProfInitFunction, {});
596
return true;
597
}
598
return false;
599
}
600
601
bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) {
602
IRBuilder<> IRB(&F.front().front());
603
Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
604
MemProfShadowMemoryDynamicAddress, IntptrTy);
605
if (F.getParent()->getPICLevel() == PICLevel::NotPIC)
606
cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true);
607
DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
608
return true;
609
}
610
611
bool MemProfiler::instrumentFunction(Function &F) {
612
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
613
return false;
614
if (ClDebugFunc == F.getName())
615
return false;
616
if (F.getName().starts_with("__memprof_"))
617
return false;
618
619
bool FunctionModified = false;
620
621
// If needed, insert __memprof_init.
622
// This function needs to be called even if the function body is not
623
// instrumented.
624
if (maybeInsertMemProfInitAtFunctionEntry(F))
625
FunctionModified = true;
626
627
LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n");
628
629
initializeCallbacks(*F.getParent());
630
631
SmallVector<Instruction *, 16> ToInstrument;
632
633
// Fill the set of memory operations to instrument.
634
for (auto &BB : F) {
635
for (auto &Inst : BB) {
636
if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst))
637
ToInstrument.push_back(&Inst);
638
}
639
}
640
641
if (ToInstrument.empty()) {
642
LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified
643
<< " " << F << "\n");
644
645
return FunctionModified;
646
}
647
648
FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
649
650
int NumInstrumented = 0;
651
for (auto *Inst : ToInstrument) {
652
if (ClDebugMin < 0 || ClDebugMax < 0 ||
653
(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
654
std::optional<InterestingMemoryAccess> Access =
655
isInterestingMemoryAccess(Inst);
656
if (Access)
657
instrumentMop(Inst, F.getDataLayout(), *Access);
658
else
659
instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
660
}
661
NumInstrumented++;
662
}
663
664
if (NumInstrumented > 0)
665
FunctionModified = true;
666
667
LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " "
668
<< F << "\n");
669
670
return FunctionModified;
671
}
672
673
static void addCallsiteMetadata(Instruction &I,
674
std::vector<uint64_t> &InlinedCallStack,
675
LLVMContext &Ctx) {
676
I.setMetadata(LLVMContext::MD_callsite,
677
buildCallstackMetadata(InlinedCallStack, Ctx));
678
}
679
680
static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
681
uint32_t Column) {
682
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
683
HashBuilder;
684
HashBuilder.add(Function, LineOffset, Column);
685
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
686
uint64_t Id;
687
std::memcpy(&Id, Hash.data(), sizeof(Hash));
688
return Id;
689
}
690
691
static uint64_t computeStackId(const memprof::Frame &Frame) {
692
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
693
}
694
695
// Helper to generate a single hash id for a given callstack, used for emitting
696
// matching statistics and useful for uniquing such statistics across modules.
697
static uint64_t
698
computeFullStackId(const std::vector<memprof::Frame> &CallStack) {
699
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
700
HashBuilder;
701
for (auto &F : CallStack)
702
HashBuilder.add(F.Function, F.LineOffset, F.Column);
703
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
704
uint64_t Id;
705
std::memcpy(&Id, Hash.data(), sizeof(Hash));
706
return Id;
707
}
708
709
static AllocationType addCallStack(CallStackTrie &AllocTrie,
710
const AllocationInfo *AllocInfo) {
711
SmallVector<uint64_t> StackIds;
712
for (const auto &StackFrame : AllocInfo->CallStack)
713
StackIds.push_back(computeStackId(StackFrame));
714
auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
715
AllocInfo->Info.getAllocCount(),
716
AllocInfo->Info.getTotalLifetime());
717
uint64_t TotalSize = 0;
718
if (MemProfReportHintedSizes) {
719
TotalSize = AllocInfo->Info.getTotalSize();
720
assert(TotalSize);
721
}
722
AllocTrie.addCallStack(AllocType, StackIds, TotalSize);
723
return AllocType;
724
}
725
726
// Helper to compare the InlinedCallStack computed from an instruction's debug
727
// info to a list of Frames from profile data (either the allocation data or a
728
// callsite). For callsites, the StartIndex to use in the Frame array may be
729
// non-zero.
730
static bool
731
stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
732
ArrayRef<uint64_t> InlinedCallStack,
733
unsigned StartIndex = 0) {
734
auto StackFrame = ProfileCallStack.begin() + StartIndex;
735
auto InlCallStackIter = InlinedCallStack.begin();
736
for (; StackFrame != ProfileCallStack.end() &&
737
InlCallStackIter != InlinedCallStack.end();
738
++StackFrame, ++InlCallStackIter) {
739
uint64_t StackId = computeStackId(*StackFrame);
740
if (StackId != *InlCallStackIter)
741
return false;
742
}
743
// Return true if we found and matched all stack ids from the call
744
// instruction.
745
return InlCallStackIter == InlinedCallStack.end();
746
}
747
748
static bool isNewWithHotColdVariant(Function *Callee,
749
const TargetLibraryInfo &TLI) {
750
if (!Callee)
751
return false;
752
LibFunc Func;
753
if (!TLI.getLibFunc(*Callee, Func))
754
return false;
755
switch (Func) {
756
case LibFunc_Znwm:
757
case LibFunc_ZnwmRKSt9nothrow_t:
758
case LibFunc_ZnwmSt11align_val_t:
759
case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
760
case LibFunc_Znam:
761
case LibFunc_ZnamRKSt9nothrow_t:
762
case LibFunc_ZnamSt11align_val_t:
763
case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
764
return true;
765
case LibFunc_Znwm12__hot_cold_t:
766
case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
767
case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
768
case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
769
case LibFunc_Znam12__hot_cold_t:
770
case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
771
case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
772
case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
773
return ClMemProfMatchHotColdNew;
774
default:
775
return false;
776
}
777
}
778
779
struct AllocMatchInfo {
780
uint64_t TotalSize = 0;
781
AllocationType AllocType = AllocationType::None;
782
bool Matched = false;
783
};
784
785
static void
786
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
787
const TargetLibraryInfo &TLI,
788
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
789
auto &Ctx = M.getContext();
790
// Previously we used getIRPGOFuncName() here. If F is local linkage,
791
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
792
// llvm-profdata uses FuncName in dwarf to create GUID which doesn't
793
// contain FileName's prefix. It caused local linkage function can't
794
// find MemProfRecord. So we use getName() now.
795
// 'unique-internal-linkage-names' can make MemProf work better for local
796
// linkage function.
797
auto FuncName = F.getName();
798
auto FuncGUID = Function::getGUID(FuncName);
799
std::optional<memprof::MemProfRecord> MemProfRec;
800
auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
801
if (Err) {
802
handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
803
auto Err = IPE.get();
804
bool SkipWarning = false;
805
LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
806
<< ": ");
807
if (Err == instrprof_error::unknown_function) {
808
NumOfMemProfMissing++;
809
SkipWarning = !PGOWarnMissing;
810
LLVM_DEBUG(dbgs() << "unknown function");
811
} else if (Err == instrprof_error::hash_mismatch) {
812
NumOfMemProfMismatch++;
813
SkipWarning =
814
NoPGOWarnMismatch ||
815
(NoPGOWarnMismatchComdatWeak &&
816
(F.hasComdat() ||
817
F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
818
LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
819
}
820
821
if (SkipWarning)
822
return;
823
824
std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
825
Twine(" Hash = ") + std::to_string(FuncGUID))
826
.str();
827
828
Ctx.diagnose(
829
DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
830
});
831
return;
832
}
833
834
NumOfMemProfFunc++;
835
836
// Detect if there are non-zero column numbers in the profile. If not,
837
// treat all column numbers as 0 when matching (i.e. ignore any non-zero
838
// columns in the IR). The profiled binary might have been built with
839
// column numbers disabled, for example.
840
bool ProfileHasColumns = false;
841
842
// Build maps of the location hash to all profile data with that leaf location
843
// (allocation info and the callsites).
844
std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
845
// For the callsites we need to record the index of the associated frame in
846
// the frame array (see comments below where the map entries are added).
847
std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, unsigned>>>
848
LocHashToCallSites;
849
for (auto &AI : MemProfRec->AllocSites) {
850
NumOfMemProfAllocContextProfiles++;
851
// Associate the allocation info with the leaf frame. The later matching
852
// code will match any inlined call sequences in the IR with a longer prefix
853
// of call stack frames.
854
uint64_t StackId = computeStackId(AI.CallStack[0]);
855
LocHashToAllocInfo[StackId].insert(&AI);
856
ProfileHasColumns |= AI.CallStack[0].Column;
857
}
858
for (auto &CS : MemProfRec->CallSites) {
859
NumOfMemProfCallSiteProfiles++;
860
// Need to record all frames from leaf up to and including this function,
861
// as any of these may or may not have been inlined at this point.
862
unsigned Idx = 0;
863
for (auto &StackFrame : CS) {
864
uint64_t StackId = computeStackId(StackFrame);
865
LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
866
ProfileHasColumns |= StackFrame.Column;
867
// Once we find this function, we can stop recording.
868
if (StackFrame.Function == FuncGUID)
869
break;
870
}
871
assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
872
}
873
874
auto GetOffset = [](const DILocation *DIL) {
875
return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
876
0xffff;
877
};
878
879
// Now walk the instructions, looking up the associated profile data using
880
// debug locations.
881
for (auto &BB : F) {
882
for (auto &I : BB) {
883
if (I.isDebugOrPseudoInst())
884
continue;
885
// We are only interested in calls (allocation or interior call stack
886
// context calls).
887
auto *CI = dyn_cast<CallBase>(&I);
888
if (!CI)
889
continue;
890
auto *CalledFunction = CI->getCalledFunction();
891
if (CalledFunction && CalledFunction->isIntrinsic())
892
continue;
893
// List of call stack ids computed from the location hashes on debug
894
// locations (leaf to inlined at root).
895
std::vector<uint64_t> InlinedCallStack;
896
// Was the leaf location found in one of the profile maps?
897
bool LeafFound = false;
898
// If leaf was found in a map, iterators pointing to its location in both
899
// of the maps. It might exist in neither, one, or both (the latter case
900
// can happen because we don't currently have discriminators to
901
// distinguish the case when a single line/col maps to both an allocation
902
// and another callsite).
903
std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
904
AllocInfoIter;
905
std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *,
906
unsigned>>>::iterator CallSitesIter;
907
for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
908
DIL = DIL->getInlinedAt()) {
909
// Use C++ linkage name if possible. Need to compile with
910
// -fdebug-info-for-profiling to get linkage name.
911
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
912
if (Name.empty())
913
Name = DIL->getScope()->getSubprogram()->getName();
914
auto CalleeGUID = Function::getGUID(Name);
915
auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
916
ProfileHasColumns ? DIL->getColumn() : 0);
917
// Check if we have found the profile's leaf frame. If yes, collect
918
// the rest of the call's inlined context starting here. If not, see if
919
// we find a match further up the inlined context (in case the profile
920
// was missing debug frames at the leaf).
921
if (!LeafFound) {
922
AllocInfoIter = LocHashToAllocInfo.find(StackId);
923
CallSitesIter = LocHashToCallSites.find(StackId);
924
if (AllocInfoIter != LocHashToAllocInfo.end() ||
925
CallSitesIter != LocHashToCallSites.end())
926
LeafFound = true;
927
}
928
if (LeafFound)
929
InlinedCallStack.push_back(StackId);
930
}
931
// If leaf not in either of the maps, skip inst.
932
if (!LeafFound)
933
continue;
934
935
// First add !memprof metadata from allocation info, if we found the
936
// instruction's leaf location in that map, and if the rest of the
937
// instruction's locations match the prefix Frame locations on an
938
// allocation context with the same leaf.
939
if (AllocInfoIter != LocHashToAllocInfo.end()) {
940
// Only consider allocations via new, to reduce unnecessary metadata,
941
// since those are the only allocations that will be targeted initially.
942
if (!isNewWithHotColdVariant(CI->getCalledFunction(), TLI))
943
continue;
944
// We may match this instruction's location list to multiple MIB
945
// contexts. Add them to a Trie specialized for trimming the contexts to
946
// the minimal needed to disambiguate contexts with unique behavior.
947
CallStackTrie AllocTrie;
948
for (auto *AllocInfo : AllocInfoIter->second) {
949
// Check the full inlined call stack against this one.
950
// If we found and thus matched all frames on the call, include
951
// this MIB.
952
if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
953
InlinedCallStack)) {
954
NumOfMemProfMatchedAllocContexts++;
955
auto AllocType = addCallStack(AllocTrie, AllocInfo);
956
// Record information about the allocation if match info printing
957
// was requested.
958
if (ClPrintMemProfMatchInfo) {
959
auto FullStackId = computeFullStackId(AllocInfo->CallStack);
960
FullStackIdToAllocMatchInfo[FullStackId] = {
961
AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
962
}
963
}
964
}
965
// We might not have matched any to the full inlined call stack.
966
// But if we did, create and attach metadata, or a function attribute if
967
// all contexts have identical profiled behavior.
968
if (!AllocTrie.empty()) {
969
NumOfMemProfMatchedAllocs++;
970
// MemprofMDAttached will be false if a function attribute was
971
// attached.
972
bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
973
assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
974
if (MemprofMDAttached) {
975
// Add callsite metadata for the instruction's location list so that
976
// it simpler later on to identify which part of the MIB contexts
977
// are from this particular instruction (including during inlining,
978
// when the callsite metadata will be updated appropriately).
979
// FIXME: can this be changed to strip out the matching stack
980
// context ids from the MIB contexts and not add any callsite
981
// metadata here to save space?
982
addCallsiteMetadata(I, InlinedCallStack, Ctx);
983
}
984
}
985
continue;
986
}
987
988
// Otherwise, add callsite metadata. If we reach here then we found the
989
// instruction's leaf location in the callsites map and not the allocation
990
// map.
991
assert(CallSitesIter != LocHashToCallSites.end());
992
for (auto CallStackIdx : CallSitesIter->second) {
993
// If we found and thus matched all frames on the call, create and
994
// attach call stack metadata.
995
if (stackFrameIncludesInlinedCallStack(
996
*CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
997
NumOfMemProfMatchedCallSites++;
998
addCallsiteMetadata(I, InlinedCallStack, Ctx);
999
// Only need to find one with a matching call stack and add a single
1000
// callsite metadata.
1001
break;
1002
}
1003
}
1004
}
1005
}
1006
}
1007
1008
MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
1009
IntrusiveRefCntPtr<vfs::FileSystem> FS)
1010
: MemoryProfileFileName(MemoryProfileFile), FS(FS) {
1011
if (!FS)
1012
this->FS = vfs::getRealFileSystem();
1013
}
1014
1015
PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
1016
LLVM_DEBUG(dbgs() << "Read in memory profile:");
1017
auto &Ctx = M.getContext();
1018
auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
1019
if (Error E = ReaderOrErr.takeError()) {
1020
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1021
Ctx.diagnose(
1022
DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
1023
});
1024
return PreservedAnalyses::all();
1025
}
1026
1027
std::unique_ptr<IndexedInstrProfReader> MemProfReader =
1028
std::move(ReaderOrErr.get());
1029
if (!MemProfReader) {
1030
Ctx.diagnose(DiagnosticInfoPGOProfile(
1031
MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
1032
return PreservedAnalyses::all();
1033
}
1034
1035
if (!MemProfReader->hasMemoryProfile()) {
1036
Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
1037
"Not a memory profile"));
1038
return PreservedAnalyses::all();
1039
}
1040
1041
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1042
1043
// Map from the stack has of each allocation context in the function profiles
1044
// to the total profiled size (bytes), allocation type, and whether we matched
1045
// it to an allocation in the IR.
1046
std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
1047
1048
for (auto &F : M) {
1049
if (F.isDeclaration())
1050
continue;
1051
1052
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1053
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);
1054
}
1055
1056
if (ClPrintMemProfMatchInfo) {
1057
for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1058
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
1059
<< " context with id " << Id << " has total profiled size "
1060
<< Info.TotalSize << (Info.Matched ? " is" : " not")
1061
<< " matched\n";
1062
}
1063
1064
return PreservedAnalyses::none();
1065
}
1066
1067