Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
35233 views
1
//===- InstrProf.cpp - Instrumented profiling format support --------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains support for clang's instrumentation based PGO and
10
// coverage.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/ProfileData/InstrProf.h"
15
#include "llvm/ADT/ArrayRef.h"
16
#include "llvm/ADT/SmallVector.h"
17
#include "llvm/ADT/StringExtras.h"
18
#include "llvm/ADT/StringRef.h"
19
#include "llvm/Config/config.h"
20
#include "llvm/IR/Constant.h"
21
#include "llvm/IR/Constants.h"
22
#include "llvm/IR/Function.h"
23
#include "llvm/IR/GlobalValue.h"
24
#include "llvm/IR/GlobalVariable.h"
25
#include "llvm/IR/Instruction.h"
26
#include "llvm/IR/LLVMContext.h"
27
#include "llvm/IR/MDBuilder.h"
28
#include "llvm/IR/Metadata.h"
29
#include "llvm/IR/Module.h"
30
#include "llvm/IR/Type.h"
31
#include "llvm/ProfileData/InstrProfReader.h"
32
#include "llvm/Support/Casting.h"
33
#include "llvm/Support/CommandLine.h"
34
#include "llvm/Support/Compiler.h"
35
#include "llvm/Support/Compression.h"
36
#include "llvm/Support/Debug.h"
37
#include "llvm/Support/Endian.h"
38
#include "llvm/Support/Error.h"
39
#include "llvm/Support/ErrorHandling.h"
40
#include "llvm/Support/LEB128.h"
41
#include "llvm/Support/MathExtras.h"
42
#include "llvm/Support/Path.h"
43
#include "llvm/Support/SwapByteOrder.h"
44
#include "llvm/Support/VirtualFileSystem.h"
45
#include "llvm/TargetParser/Triple.h"
46
#include <algorithm>
47
#include <cassert>
48
#include <cstddef>
49
#include <cstdint>
50
#include <cstring>
51
#include <memory>
52
#include <string>
53
#include <system_error>
54
#include <type_traits>
55
#include <utility>
56
#include <vector>
57
58
using namespace llvm;
59
60
#define DEBUG_TYPE "instrprof"
61
62
static cl::opt<bool> StaticFuncFullModulePrefix(
63
"static-func-full-module-prefix", cl::init(true), cl::Hidden,
64
cl::desc("Use full module build paths in the profile counter names for "
65
"static functions."));
66
67
// This option is tailored to users that have different top-level directory in
68
// profile-gen and profile-use compilation. Users need to specific the number
69
// of levels to strip. A value larger than the number of directories in the
70
// source file will strip all the directory names and only leave the basename.
71
//
72
// Note current ThinLTO module importing for the indirect-calls assumes
73
// the source directory name not being stripped. A non-zero option value here
74
// can potentially prevent some inter-module indirect-call-promotions.
75
static cl::opt<unsigned> StaticFuncStripDirNamePrefix(
76
"static-func-strip-dirname-prefix", cl::init(0), cl::Hidden,
77
cl::desc("Strip specified level of directory name from source path in "
78
"the profile counter name for static functions."));
79
80
static std::string getInstrProfErrString(instrprof_error Err,
81
const std::string &ErrMsg = "") {
82
std::string Msg;
83
raw_string_ostream OS(Msg);
84
85
switch (Err) {
86
case instrprof_error::success:
87
OS << "success";
88
break;
89
case instrprof_error::eof:
90
OS << "end of File";
91
break;
92
case instrprof_error::unrecognized_format:
93
OS << "unrecognized instrumentation profile encoding format";
94
break;
95
case instrprof_error::bad_magic:
96
OS << "invalid instrumentation profile data (bad magic)";
97
break;
98
case instrprof_error::bad_header:
99
OS << "invalid instrumentation profile data (file header is corrupt)";
100
break;
101
case instrprof_error::unsupported_version:
102
OS << "unsupported instrumentation profile format version";
103
break;
104
case instrprof_error::unsupported_hash_type:
105
OS << "unsupported instrumentation profile hash type";
106
break;
107
case instrprof_error::too_large:
108
OS << "too much profile data";
109
break;
110
case instrprof_error::truncated:
111
OS << "truncated profile data";
112
break;
113
case instrprof_error::malformed:
114
OS << "malformed instrumentation profile data";
115
break;
116
case instrprof_error::missing_correlation_info:
117
OS << "debug info/binary for correlation is required";
118
break;
119
case instrprof_error::unexpected_correlation_info:
120
OS << "debug info/binary for correlation is not necessary";
121
break;
122
case instrprof_error::unable_to_correlate_profile:
123
OS << "unable to correlate profile";
124
break;
125
case instrprof_error::invalid_prof:
126
OS << "invalid profile created. Please file a bug "
127
"at: " BUG_REPORT_URL
128
" and include the profraw files that caused this error.";
129
break;
130
case instrprof_error::unknown_function:
131
OS << "no profile data available for function";
132
break;
133
case instrprof_error::hash_mismatch:
134
OS << "function control flow change detected (hash mismatch)";
135
break;
136
case instrprof_error::count_mismatch:
137
OS << "function basic block count change detected (counter mismatch)";
138
break;
139
case instrprof_error::bitmap_mismatch:
140
OS << "function bitmap size change detected (bitmap size mismatch)";
141
break;
142
case instrprof_error::counter_overflow:
143
OS << "counter overflow";
144
break;
145
case instrprof_error::value_site_count_mismatch:
146
OS << "function value site count change detected (counter mismatch)";
147
break;
148
case instrprof_error::compress_failed:
149
OS << "failed to compress data (zlib)";
150
break;
151
case instrprof_error::uncompress_failed:
152
OS << "failed to uncompress data (zlib)";
153
break;
154
case instrprof_error::empty_raw_profile:
155
OS << "empty raw profile file";
156
break;
157
case instrprof_error::zlib_unavailable:
158
OS << "profile uses zlib compression but the profile reader was built "
159
"without zlib support";
160
break;
161
case instrprof_error::raw_profile_version_mismatch:
162
OS << "raw profile version mismatch";
163
break;
164
case instrprof_error::counter_value_too_large:
165
OS << "excessively large counter value suggests corrupted profile data";
166
break;
167
}
168
169
// If optional error message is not empty, append it to the message.
170
if (!ErrMsg.empty())
171
OS << ": " << ErrMsg;
172
173
return OS.str();
174
}
175
176
namespace {
177
178
// FIXME: This class is only here to support the transition to llvm::Error. It
179
// will be removed once this transition is complete. Clients should prefer to
180
// deal with the Error value directly, rather than converting to error_code.
181
class InstrProfErrorCategoryType : public std::error_category {
182
const char *name() const noexcept override { return "llvm.instrprof"; }
183
184
std::string message(int IE) const override {
185
return getInstrProfErrString(static_cast<instrprof_error>(IE));
186
}
187
};
188
189
} // end anonymous namespace
190
191
const std::error_category &llvm::instrprof_category() {
192
static InstrProfErrorCategoryType ErrorCategory;
193
return ErrorCategory;
194
}
195
196
namespace {
197
198
const char *InstrProfSectNameCommon[] = {
199
#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
200
SectNameCommon,
201
#include "llvm/ProfileData/InstrProfData.inc"
202
};
203
204
const char *InstrProfSectNameCoff[] = {
205
#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
206
SectNameCoff,
207
#include "llvm/ProfileData/InstrProfData.inc"
208
};
209
210
const char *InstrProfSectNamePrefix[] = {
211
#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
212
Prefix,
213
#include "llvm/ProfileData/InstrProfData.inc"
214
};
215
216
} // namespace
217
218
namespace llvm {
219
220
cl::opt<bool> DoInstrProfNameCompression(
221
"enable-name-compression",
222
cl::desc("Enable name/filename string compression"), cl::init(true));
223
224
cl::opt<bool> EnableVTableValueProfiling(
225
"enable-vtable-value-profiling", cl::init(false),
226
cl::desc("If true, the virtual table address will be instrumented to know "
227
"the types of a C++ pointer. The information is used in indirect "
228
"call promotion to do selective vtable-based comparison."));
229
230
cl::opt<bool> EnableVTableProfileUse(
231
"enable-vtable-profile-use", cl::init(false),
232
cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable "
233
"profiles will be used by ICP pass for more efficient indirect "
234
"call sequence. If false, type profiles won't be used."));
235
236
std::string getInstrProfSectionName(InstrProfSectKind IPSK,
237
Triple::ObjectFormatType OF,
238
bool AddSegmentInfo) {
239
std::string SectName;
240
241
if (OF == Triple::MachO && AddSegmentInfo)
242
SectName = InstrProfSectNamePrefix[IPSK];
243
244
if (OF == Triple::COFF)
245
SectName += InstrProfSectNameCoff[IPSK];
246
else
247
SectName += InstrProfSectNameCommon[IPSK];
248
249
if (OF == Triple::MachO && IPSK == IPSK_data && AddSegmentInfo)
250
SectName += ",regular,live_support";
251
252
return SectName;
253
}
254
255
std::string InstrProfError::message() const {
256
return getInstrProfErrString(Err, Msg);
257
}
258
259
char InstrProfError::ID = 0;
260
261
std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage,
262
StringRef FileName,
263
uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
264
// Value names may be prefixed with a binary '1' to indicate
265
// that the backend should not modify the symbols due to any platform
266
// naming convention. Do not include that '1' in the PGO profile name.
267
if (Name[0] == '\1')
268
Name = Name.substr(1);
269
270
std::string NewName = std::string(Name);
271
if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
272
// For local symbols, prepend the main file name to distinguish them.
273
// Do not include the full path in the file name since there's no guarantee
274
// that it will stay the same, e.g., if the files are checked out from
275
// version control in different locations.
276
if (FileName.empty())
277
NewName = NewName.insert(0, "<unknown>:");
278
else
279
NewName = NewName.insert(0, FileName.str() + ":");
280
}
281
return NewName;
282
}
283
284
// Strip NumPrefix level of directory name from PathNameStr. If the number of
285
// directory separators is less than NumPrefix, strip all the directories and
286
// leave base file name only.
287
static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
288
uint32_t Count = NumPrefix;
289
uint32_t Pos = 0, LastPos = 0;
290
for (const auto &CI : PathNameStr) {
291
++Pos;
292
if (llvm::sys::path::is_separator(CI)) {
293
LastPos = Pos;
294
--Count;
295
}
296
if (Count == 0)
297
break;
298
}
299
return PathNameStr.substr(LastPos);
300
}
301
302
static StringRef getStrippedSourceFileName(const GlobalObject &GO) {
303
StringRef FileName(GO.getParent()->getSourceFileName());
304
uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
305
if (StripLevel < StaticFuncStripDirNamePrefix)
306
StripLevel = StaticFuncStripDirNamePrefix;
307
if (StripLevel)
308
FileName = stripDirPrefix(FileName, StripLevel);
309
return FileName;
310
}
311
312
// The PGO name has the format [<filepath>;]<mangled-name> where <filepath>; is
313
// provided if linkage is local and is used to discriminate possibly identical
314
// mangled names. ";" is used because it is unlikely to be found in either
315
// <filepath> or <mangled-name>.
316
//
317
// Older compilers used getPGOFuncName() which has the format
318
// [<filepath>:]<mangled-name>. This caused trouble for Objective-C functions
319
// which commonly have :'s in their names. We still need to compute this name to
320
// lookup functions from profiles built by older compilers.
321
static std::string
322
getIRPGONameForGlobalObject(const GlobalObject &GO,
323
GlobalValue::LinkageTypes Linkage,
324
StringRef FileName) {
325
return GlobalValue::getGlobalIdentifier(GO.getName(), Linkage, FileName);
326
}
327
328
static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) {
329
if (MD != nullptr) {
330
StringRef S = cast<MDString>(MD->getOperand(0))->getString();
331
return S.str();
332
}
333
return {};
334
}
335
336
// Returns the PGO object name. This function has some special handling
337
// when called in LTO optimization. The following only applies when calling in
338
// LTO passes (when \c InLTO is true): LTO's internalization privatizes many
339
// global linkage symbols. This happens after value profile annotation, but
340
// those internal linkage functions should not have a source prefix.
341
// Additionally, for ThinLTO mode, exported internal functions are promoted
342
// and renamed. We need to ensure that the original internal PGO name is
343
// used when computing the GUID that is compared against the profiled GUIDs.
344
// To differentiate compiler generated internal symbols from original ones,
345
// PGOFuncName meta data are created and attached to the original internal
346
// symbols in the value profile annotation step
347
// (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
348
// data, its original linkage must be non-internal.
349
static std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO,
350
MDNode *PGONameMetadata) {
351
if (!InLTO) {
352
auto FileName = getStrippedSourceFileName(GO);
353
return getIRPGONameForGlobalObject(GO, GO.getLinkage(), FileName);
354
}
355
356
// In LTO mode (when InLTO is true), first check if there is a meta data.
357
if (auto IRPGOFuncName = lookupPGONameFromMetadata(PGONameMetadata))
358
return *IRPGOFuncName;
359
360
// If there is no meta data, the function must be a global before the value
361
// profile annotation pass. Its current linkage may be internal if it is
362
// internalized in LTO mode.
363
return getIRPGONameForGlobalObject(GO, GlobalValue::ExternalLinkage, "");
364
}
365
366
// Returns the IRPGO function name and does special handling when called
367
// in LTO optimization. See the comments of `getIRPGOObjectName` for details.
368
std::string getIRPGOFuncName(const Function &F, bool InLTO) {
369
return getIRPGOObjectName(F, InLTO, getPGOFuncNameMetadata(F));
370
}
371
372
// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is
373
// for front-end (Clang, etc) instrumentation.
374
// The implementation is kept for profile matching from older profiles.
375
// This is similar to `getIRPGOFuncName` except that this function calls
376
// 'getPGOFuncName' to get a name and `getIRPGOFuncName` calls
377
// 'getIRPGONameForGlobalObject'. See the difference between two callees in the
378
// comments of `getIRPGONameForGlobalObject`.
379
std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
380
if (!InLTO) {
381
auto FileName = getStrippedSourceFileName(F);
382
return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version);
383
}
384
385
// In LTO mode (when InLTO is true), first check if there is a meta data.
386
if (auto PGOFuncName = lookupPGONameFromMetadata(getPGOFuncNameMetadata(F)))
387
return *PGOFuncName;
388
389
// If there is no meta data, the function must be a global before the value
390
// profile annotation pass. Its current linkage may be internal if it is
391
// internalized in LTO mode.
392
return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, "");
393
}
394
395
std::string getPGOName(const GlobalVariable &V, bool InLTO) {
396
// PGONameMetadata should be set by compiler at profile use time
397
// and read by symtab creation to look up symbols corresponding to
398
// a MD5 hash.
399
return getIRPGOObjectName(V, InLTO, V.getMetadata(getPGONameMetadataName()));
400
}
401
402
// See getIRPGOObjectName() for a discription of the format.
403
std::pair<StringRef, StringRef> getParsedIRPGOName(StringRef IRPGOName) {
404
auto [FileName, MangledName] = IRPGOName.split(GlobalIdentifierDelimiter);
405
if (MangledName.empty())
406
return std::make_pair(StringRef(), IRPGOName);
407
return std::make_pair(FileName, MangledName);
408
}
409
410
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) {
411
if (FileName.empty())
412
return PGOFuncName;
413
// Drop the file name including ':' or ';'. See getIRPGONameForGlobalObject as
414
// well.
415
if (PGOFuncName.starts_with(FileName))
416
PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1);
417
return PGOFuncName;
418
}
419
420
// \p FuncName is the string used as profile lookup key for the function. A
421
// symbol is created to hold the name. Return the legalized symbol name.
422
std::string getPGOFuncNameVarName(StringRef FuncName,
423
GlobalValue::LinkageTypes Linkage) {
424
std::string VarName = std::string(getInstrProfNameVarPrefix());
425
VarName += FuncName;
426
427
if (!GlobalValue::isLocalLinkage(Linkage))
428
return VarName;
429
430
// Now fix up illegal chars in local VarName that may upset the assembler.
431
const char InvalidChars[] = "-:;<>/\"'";
432
size_t FoundPos = VarName.find_first_of(InvalidChars);
433
while (FoundPos != std::string::npos) {
434
VarName[FoundPos] = '_';
435
FoundPos = VarName.find_first_of(InvalidChars, FoundPos + 1);
436
}
437
return VarName;
438
}
439
440
GlobalVariable *createPGOFuncNameVar(Module &M,
441
GlobalValue::LinkageTypes Linkage,
442
StringRef PGOFuncName) {
443
// We generally want to match the function's linkage, but available_externally
444
// and extern_weak both have the wrong semantics, and anything that doesn't
445
// need to link across compilation units doesn't need to be visible at all.
446
if (Linkage == GlobalValue::ExternalWeakLinkage)
447
Linkage = GlobalValue::LinkOnceAnyLinkage;
448
else if (Linkage == GlobalValue::AvailableExternallyLinkage)
449
Linkage = GlobalValue::LinkOnceODRLinkage;
450
else if (Linkage == GlobalValue::InternalLinkage ||
451
Linkage == GlobalValue::ExternalLinkage)
452
Linkage = GlobalValue::PrivateLinkage;
453
454
auto *Value =
455
ConstantDataArray::getString(M.getContext(), PGOFuncName, false);
456
auto *FuncNameVar =
457
new GlobalVariable(M, Value->getType(), true, Linkage, Value,
458
getPGOFuncNameVarName(PGOFuncName, Linkage));
459
460
// Hide the symbol so that we correctly get a copy for each executable.
461
if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
462
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
463
464
return FuncNameVar;
465
}
466
467
GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName) {
468
return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), PGOFuncName);
469
}
470
471
Error InstrProfSymtab::create(Module &M, bool InLTO) {
472
for (Function &F : M) {
473
// Function may not have a name: like using asm("") to overwrite the name.
474
// Ignore in this case.
475
if (!F.hasName())
476
continue;
477
if (Error E = addFuncWithName(F, getIRPGOFuncName(F, InLTO)))
478
return E;
479
// Also use getPGOFuncName() so that we can find records from older profiles
480
if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO)))
481
return E;
482
}
483
484
SmallVector<MDNode *, 2> Types;
485
for (GlobalVariable &G : M.globals()) {
486
if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
487
continue;
488
if (Error E = addVTableWithName(G, getPGOName(G, InLTO)))
489
return E;
490
}
491
492
Sorted = false;
493
finalizeSymtab();
494
return Error::success();
495
}
496
497
Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
498
StringRef VTablePGOName) {
499
auto NameToGUIDMap = [&](StringRef Name) -> Error {
500
if (Error E = addSymbolName(Name))
501
return E;
502
503
bool Inserted = true;
504
std::tie(std::ignore, Inserted) =
505
MD5VTableMap.try_emplace(GlobalValue::getGUID(Name), &VTable);
506
if (!Inserted)
507
LLVM_DEBUG(dbgs() << "GUID conflict within one module");
508
return Error::success();
509
};
510
if (Error E = NameToGUIDMap(VTablePGOName))
511
return E;
512
513
StringRef CanonicalName = getCanonicalName(VTablePGOName);
514
if (CanonicalName != VTablePGOName)
515
return NameToGUIDMap(CanonicalName);
516
517
return Error::success();
518
}
519
520
/// \c NameStrings is a string composed of one of more possibly encoded
521
/// sub-strings. The substrings are separated by 0 or more zero bytes. This
522
/// method decodes the string and calls `NameCallback` for each substring.
523
static Error
524
readAndDecodeStrings(StringRef NameStrings,
525
std::function<Error(StringRef)> NameCallback) {
526
const uint8_t *P = NameStrings.bytes_begin();
527
const uint8_t *EndP = NameStrings.bytes_end();
528
while (P < EndP) {
529
uint32_t N;
530
uint64_t UncompressedSize = decodeULEB128(P, &N);
531
P += N;
532
uint64_t CompressedSize = decodeULEB128(P, &N);
533
P += N;
534
const bool IsCompressed = (CompressedSize != 0);
535
SmallVector<uint8_t, 128> UncompressedNameStrings;
536
StringRef NameStrings;
537
if (IsCompressed) {
538
if (!llvm::compression::zlib::isAvailable())
539
return make_error<InstrProfError>(instrprof_error::zlib_unavailable);
540
541
if (Error E = compression::zlib::decompress(ArrayRef(P, CompressedSize),
542
UncompressedNameStrings,
543
UncompressedSize)) {
544
consumeError(std::move(E));
545
return make_error<InstrProfError>(instrprof_error::uncompress_failed);
546
}
547
P += CompressedSize;
548
NameStrings = toStringRef(UncompressedNameStrings);
549
} else {
550
NameStrings =
551
StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
552
P += UncompressedSize;
553
}
554
// Now parse the name strings.
555
SmallVector<StringRef, 0> Names;
556
NameStrings.split(Names, getInstrProfNameSeparator());
557
for (StringRef &Name : Names)
558
if (Error E = NameCallback(Name))
559
return E;
560
561
while (P < EndP && *P == 0)
562
P++;
563
}
564
return Error::success();
565
}
566
567
Error InstrProfSymtab::create(StringRef NameStrings) {
568
return readAndDecodeStrings(
569
NameStrings,
570
std::bind(&InstrProfSymtab::addFuncName, this, std::placeholders::_1));
571
}
572
573
Error InstrProfSymtab::create(StringRef FuncNameStrings,
574
StringRef VTableNameStrings) {
575
if (Error E = readAndDecodeStrings(FuncNameStrings,
576
std::bind(&InstrProfSymtab::addFuncName,
577
this, std::placeholders::_1)))
578
return E;
579
580
return readAndDecodeStrings(
581
VTableNameStrings,
582
std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
583
}
584
585
Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
586
StringRef CompressedVTableStrings) {
587
return readAndDecodeStrings(
588
CompressedVTableStrings,
589
std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
590
}
591
592
StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName) {
593
// In ThinLTO, local function may have been promoted to global and have
594
// suffix ".llvm." added to the function name. We need to add the
595
// stripped function name to the symbol table so that we can find a match
596
// from profile.
597
//
598
// ".__uniq." suffix is used to differentiate internal linkage functions in
599
// different modules and should be kept. This is the only suffix with the
600
// pattern ".xxx" which is kept before matching, other suffixes similar as
601
// ".llvm." will be stripped.
602
const std::string UniqSuffix = ".__uniq.";
603
size_t Pos = PGOName.find(UniqSuffix);
604
if (Pos != StringRef::npos)
605
Pos += UniqSuffix.length();
606
else
607
Pos = 0;
608
609
// Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from
610
// the beginning.
611
Pos = PGOName.find('.', Pos);
612
if (Pos != StringRef::npos && Pos != 0)
613
return PGOName.substr(0, Pos);
614
615
return PGOName;
616
}
617
618
Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
619
auto NameToGUIDMap = [&](StringRef Name) -> Error {
620
if (Error E = addFuncName(Name))
621
return E;
622
MD5FuncMap.emplace_back(Function::getGUID(Name), &F);
623
return Error::success();
624
};
625
if (Error E = NameToGUIDMap(PGOFuncName))
626
return E;
627
628
StringRef CanonicalFuncName = getCanonicalName(PGOFuncName);
629
if (CanonicalFuncName != PGOFuncName)
630
return NameToGUIDMap(CanonicalFuncName);
631
632
return Error::success();
633
}
634
635
uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) {
636
// Given a runtime address, look up the hash value in the interval map, and
637
// fallback to value 0 if a hash value is not found.
638
return VTableAddrMap.lookup(Address, 0);
639
}
640
641
uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
642
finalizeSymtab();
643
auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
644
return A.first < Address;
645
});
646
// Raw function pointer collected by value profiler may be from
647
// external functions that are not instrumented. They won't have
648
// mapping data to be used by the deserializer. Force the value to
649
// be 0 in this case.
650
if (It != AddrToMD5Map.end() && It->first == Address)
651
return (uint64_t)It->second;
652
return 0;
653
}
654
655
void InstrProfSymtab::dumpNames(raw_ostream &OS) const {
656
SmallVector<StringRef, 0> Sorted(NameTab.keys());
657
llvm::sort(Sorted);
658
for (StringRef S : Sorted)
659
OS << S << '\n';
660
}
661
662
Error collectGlobalObjectNameStrings(ArrayRef<std::string> NameStrs,
663
bool DoCompression, std::string &Result) {
664
assert(!NameStrs.empty() && "No name data to emit");
665
666
uint8_t Header[20], *P = Header;
667
std::string UncompressedNameStrings =
668
join(NameStrs.begin(), NameStrs.end(), getInstrProfNameSeparator());
669
670
assert(StringRef(UncompressedNameStrings)
671
.count(getInstrProfNameSeparator()) == (NameStrs.size() - 1) &&
672
"PGO name is invalid (contains separator token)");
673
674
unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
675
P += EncLen;
676
677
auto WriteStringToResult = [&](size_t CompressedLen, StringRef InputStr) {
678
EncLen = encodeULEB128(CompressedLen, P);
679
P += EncLen;
680
char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
681
unsigned HeaderLen = P - &Header[0];
682
Result.append(HeaderStr, HeaderLen);
683
Result += InputStr;
684
return Error::success();
685
};
686
687
if (!DoCompression) {
688
return WriteStringToResult(0, UncompressedNameStrings);
689
}
690
691
SmallVector<uint8_t, 128> CompressedNameStrings;
692
compression::zlib::compress(arrayRefFromStringRef(UncompressedNameStrings),
693
CompressedNameStrings,
694
compression::zlib::BestSizeCompression);
695
696
return WriteStringToResult(CompressedNameStrings.size(),
697
toStringRef(CompressedNameStrings));
698
}
699
700
StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) {
701
auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
702
StringRef NameStr =
703
Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
704
return NameStr;
705
}
706
707
Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
708
std::string &Result, bool DoCompression) {
709
std::vector<std::string> NameStrs;
710
for (auto *NameVar : NameVars) {
711
NameStrs.push_back(std::string(getPGOFuncNameVarInitializer(NameVar)));
712
}
713
return collectGlobalObjectNameStrings(
714
NameStrs, compression::zlib::isAvailable() && DoCompression, Result);
715
}
716
717
Error collectVTableStrings(ArrayRef<GlobalVariable *> VTables,
718
std::string &Result, bool DoCompression) {
719
std::vector<std::string> VTableNameStrs;
720
for (auto *VTable : VTables)
721
VTableNameStrs.push_back(getPGOName(*VTable));
722
return collectGlobalObjectNameStrings(
723
VTableNameStrs, compression::zlib::isAvailable() && DoCompression,
724
Result);
725
}
726
727
void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const {
728
uint64_t FuncSum = 0;
729
Sum.NumEntries += Counts.size();
730
for (uint64_t Count : Counts)
731
FuncSum += Count;
732
Sum.CountSum += FuncSum;
733
734
for (uint32_t VK = IPVK_First; VK <= IPVK_Last; ++VK) {
735
uint64_t KindSum = 0;
736
uint32_t NumValueSites = getNumValueSites(VK);
737
for (size_t I = 0; I < NumValueSites; ++I) {
738
for (const auto &V : getValueArrayForSite(VK, I))
739
KindSum += V.Count;
740
}
741
Sum.ValueCounts[VK] += KindSum;
742
}
743
}
744
745
void InstrProfValueSiteRecord::overlap(InstrProfValueSiteRecord &Input,
746
uint32_t ValueKind,
747
OverlapStats &Overlap,
748
OverlapStats &FuncLevelOverlap) {
749
this->sortByTargetValues();
750
Input.sortByTargetValues();
751
double Score = 0.0f, FuncLevelScore = 0.0f;
752
auto I = ValueData.begin();
753
auto IE = ValueData.end();
754
auto J = Input.ValueData.begin();
755
auto JE = Input.ValueData.end();
756
while (I != IE && J != JE) {
757
if (I->Value == J->Value) {
758
Score += OverlapStats::score(I->Count, J->Count,
759
Overlap.Base.ValueCounts[ValueKind],
760
Overlap.Test.ValueCounts[ValueKind]);
761
FuncLevelScore += OverlapStats::score(
762
I->Count, J->Count, FuncLevelOverlap.Base.ValueCounts[ValueKind],
763
FuncLevelOverlap.Test.ValueCounts[ValueKind]);
764
++I;
765
} else if (I->Value < J->Value) {
766
++I;
767
continue;
768
}
769
++J;
770
}
771
Overlap.Overlap.ValueCounts[ValueKind] += Score;
772
FuncLevelOverlap.Overlap.ValueCounts[ValueKind] += FuncLevelScore;
773
}
774
775
// Return false on mismatch.
776
void InstrProfRecord::overlapValueProfData(uint32_t ValueKind,
777
InstrProfRecord &Other,
778
OverlapStats &Overlap,
779
OverlapStats &FuncLevelOverlap) {
780
uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
781
assert(ThisNumValueSites == Other.getNumValueSites(ValueKind));
782
if (!ThisNumValueSites)
783
return;
784
785
std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
786
getOrCreateValueSitesForKind(ValueKind);
787
MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords =
788
Other.getValueSitesForKind(ValueKind);
789
for (uint32_t I = 0; I < ThisNumValueSites; I++)
790
ThisSiteRecords[I].overlap(OtherSiteRecords[I], ValueKind, Overlap,
791
FuncLevelOverlap);
792
}
793
794
void InstrProfRecord::overlap(InstrProfRecord &Other, OverlapStats &Overlap,
795
OverlapStats &FuncLevelOverlap,
796
uint64_t ValueCutoff) {
797
// FuncLevel CountSum for other should already computed and nonzero.
798
assert(FuncLevelOverlap.Test.CountSum >= 1.0f);
799
accumulateCounts(FuncLevelOverlap.Base);
800
bool Mismatch = (Counts.size() != Other.Counts.size());
801
802
// Check if the value profiles mismatch.
803
if (!Mismatch) {
804
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
805
uint32_t ThisNumValueSites = getNumValueSites(Kind);
806
uint32_t OtherNumValueSites = Other.getNumValueSites(Kind);
807
if (ThisNumValueSites != OtherNumValueSites) {
808
Mismatch = true;
809
break;
810
}
811
}
812
}
813
if (Mismatch) {
814
Overlap.addOneMismatch(FuncLevelOverlap.Test);
815
return;
816
}
817
818
// Compute overlap for value counts.
819
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
820
overlapValueProfData(Kind, Other, Overlap, FuncLevelOverlap);
821
822
double Score = 0.0;
823
uint64_t MaxCount = 0;
824
// Compute overlap for edge counts.
825
for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
826
Score += OverlapStats::score(Counts[I], Other.Counts[I],
827
Overlap.Base.CountSum, Overlap.Test.CountSum);
828
MaxCount = std::max(Other.Counts[I], MaxCount);
829
}
830
Overlap.Overlap.CountSum += Score;
831
Overlap.Overlap.NumEntries += 1;
832
833
if (MaxCount >= ValueCutoff) {
834
double FuncScore = 0.0;
835
for (size_t I = 0, E = Other.Counts.size(); I < E; ++I)
836
FuncScore += OverlapStats::score(Counts[I], Other.Counts[I],
837
FuncLevelOverlap.Base.CountSum,
838
FuncLevelOverlap.Test.CountSum);
839
FuncLevelOverlap.Overlap.CountSum = FuncScore;
840
FuncLevelOverlap.Overlap.NumEntries = Other.Counts.size();
841
FuncLevelOverlap.Valid = true;
842
}
843
}
844
845
void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
846
uint64_t Weight,
847
function_ref<void(instrprof_error)> Warn) {
848
this->sortByTargetValues();
849
Input.sortByTargetValues();
850
auto I = ValueData.begin();
851
auto IE = ValueData.end();
852
std::vector<InstrProfValueData> Merged;
853
Merged.reserve(std::max(ValueData.size(), Input.ValueData.size()));
854
for (const InstrProfValueData &J : Input.ValueData) {
855
while (I != IE && I->Value < J.Value) {
856
Merged.push_back(*I);
857
++I;
858
}
859
if (I != IE && I->Value == J.Value) {
860
bool Overflowed;
861
I->Count = SaturatingMultiplyAdd(J.Count, Weight, I->Count, &Overflowed);
862
if (Overflowed)
863
Warn(instrprof_error::counter_overflow);
864
Merged.push_back(*I);
865
++I;
866
continue;
867
}
868
Merged.push_back(J);
869
}
870
Merged.insert(Merged.end(), I, IE);
871
ValueData = std::move(Merged);
872
}
873
874
void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D,
875
function_ref<void(instrprof_error)> Warn) {
876
for (InstrProfValueData &I : ValueData) {
877
bool Overflowed;
878
I.Count = SaturatingMultiply(I.Count, N, &Overflowed) / D;
879
if (Overflowed)
880
Warn(instrprof_error::counter_overflow);
881
}
882
}
883
884
// Merge Value Profile data from Src record to this record for ValueKind.
885
// Scale merged value counts by \p Weight.
886
void InstrProfRecord::mergeValueProfData(
887
uint32_t ValueKind, InstrProfRecord &Src, uint64_t Weight,
888
function_ref<void(instrprof_error)> Warn) {
889
uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
890
uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind);
891
if (ThisNumValueSites != OtherNumValueSites) {
892
Warn(instrprof_error::value_site_count_mismatch);
893
return;
894
}
895
if (!ThisNumValueSites)
896
return;
897
std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
898
getOrCreateValueSitesForKind(ValueKind);
899
MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords =
900
Src.getValueSitesForKind(ValueKind);
901
for (uint32_t I = 0; I < ThisNumValueSites; I++)
902
ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight, Warn);
903
}
904
905
void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight,
906
function_ref<void(instrprof_error)> Warn) {
907
// If the number of counters doesn't match we either have bad data
908
// or a hash collision.
909
if (Counts.size() != Other.Counts.size()) {
910
Warn(instrprof_error::count_mismatch);
911
return;
912
}
913
914
// Special handling of the first count as the PseudoCount.
915
CountPseudoKind OtherKind = Other.getCountPseudoKind();
916
CountPseudoKind ThisKind = getCountPseudoKind();
917
if (OtherKind != NotPseudo || ThisKind != NotPseudo) {
918
// We don't allow the merge of a profile with pseudo counts and
919
// a normal profile (i.e. without pesudo counts).
920
// Profile supplimenation should be done after the profile merge.
921
if (OtherKind == NotPseudo || ThisKind == NotPseudo) {
922
Warn(instrprof_error::count_mismatch);
923
return;
924
}
925
if (OtherKind == PseudoHot || ThisKind == PseudoHot)
926
setPseudoCount(PseudoHot);
927
else
928
setPseudoCount(PseudoWarm);
929
return;
930
}
931
932
for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
933
bool Overflowed;
934
uint64_t Value =
935
SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed);
936
if (Value > getInstrMaxCountValue()) {
937
Value = getInstrMaxCountValue();
938
Overflowed = true;
939
}
940
Counts[I] = Value;
941
if (Overflowed)
942
Warn(instrprof_error::counter_overflow);
943
}
944
945
// If the number of bitmap bytes doesn't match we either have bad data
946
// or a hash collision.
947
if (BitmapBytes.size() != Other.BitmapBytes.size()) {
948
Warn(instrprof_error::bitmap_mismatch);
949
return;
950
}
951
952
// Bitmap bytes are merged by simply ORing them together.
953
for (size_t I = 0, E = Other.BitmapBytes.size(); I < E; ++I) {
954
BitmapBytes[I] = Other.BitmapBytes[I] | BitmapBytes[I];
955
}
956
957
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
958
mergeValueProfData(Kind, Other, Weight, Warn);
959
}
960
961
void InstrProfRecord::scaleValueProfData(
962
uint32_t ValueKind, uint64_t N, uint64_t D,
963
function_ref<void(instrprof_error)> Warn) {
964
for (auto &R : getValueSitesForKind(ValueKind))
965
R.scale(N, D, Warn);
966
}
967
968
void InstrProfRecord::scale(uint64_t N, uint64_t D,
969
function_ref<void(instrprof_error)> Warn) {
970
assert(D != 0 && "D cannot be 0");
971
for (auto &Count : this->Counts) {
972
bool Overflowed;
973
Count = SaturatingMultiply(Count, N, &Overflowed) / D;
974
if (Count > getInstrMaxCountValue()) {
975
Count = getInstrMaxCountValue();
976
Overflowed = true;
977
}
978
if (Overflowed)
979
Warn(instrprof_error::counter_overflow);
980
}
981
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
982
scaleValueProfData(Kind, N, D, Warn);
983
}
984
985
// Map indirect call target name hash to name string.
986
uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
987
InstrProfSymtab *SymTab) {
988
if (!SymTab)
989
return Value;
990
991
if (ValueKind == IPVK_IndirectCallTarget)
992
return SymTab->getFunctionHashFromAddress(Value);
993
994
if (ValueKind == IPVK_VTableTarget)
995
return SymTab->getVTableHashFromAddress(Value);
996
997
return Value;
998
}
999
1000
void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site,
1001
ArrayRef<InstrProfValueData> VData,
1002
InstrProfSymtab *ValueMap) {
1003
// Remap values.
1004
std::vector<InstrProfValueData> RemappedVD;
1005
RemappedVD.reserve(VData.size());
1006
for (const auto &V : VData) {
1007
uint64_t NewValue = remapValue(V.Value, ValueKind, ValueMap);
1008
RemappedVD.push_back({NewValue, V.Count});
1009
}
1010
1011
std::vector<InstrProfValueSiteRecord> &ValueSites =
1012
getOrCreateValueSitesForKind(ValueKind);
1013
assert(ValueSites.size() == Site);
1014
1015
// Add a new value site with remapped value profiling data.
1016
ValueSites.emplace_back(std::move(RemappedVD));
1017
}
1018
1019
void TemporalProfTraceTy::createBPFunctionNodes(
1020
ArrayRef<TemporalProfTraceTy> Traces, std::vector<BPFunctionNode> &Nodes,
1021
bool RemoveOutlierUNs) {
1022
using IDT = BPFunctionNode::IDT;
1023
using UtilityNodeT = BPFunctionNode::UtilityNodeT;
1024
UtilityNodeT MaxUN = 0;
1025
DenseMap<IDT, size_t> IdToFirstTimestamp;
1026
DenseMap<IDT, UtilityNodeT> IdToFirstUN;
1027
DenseMap<IDT, SmallVector<UtilityNodeT>> IdToUNs;
1028
// TODO: We need to use the Trace.Weight field to give more weight to more
1029
// important utilities
1030
for (auto &Trace : Traces) {
1031
size_t CutoffTimestamp = 1;
1032
for (size_t Timestamp = 0; Timestamp < Trace.FunctionNameRefs.size();
1033
Timestamp++) {
1034
IDT Id = Trace.FunctionNameRefs[Timestamp];
1035
auto [It, WasInserted] = IdToFirstTimestamp.try_emplace(Id, Timestamp);
1036
if (!WasInserted)
1037
It->getSecond() = std::min<size_t>(It->getSecond(), Timestamp);
1038
if (Timestamp >= CutoffTimestamp) {
1039
++MaxUN;
1040
CutoffTimestamp = 2 * Timestamp;
1041
}
1042
IdToFirstUN.try_emplace(Id, MaxUN);
1043
}
1044
for (auto &[Id, FirstUN] : IdToFirstUN)
1045
for (auto UN = FirstUN; UN <= MaxUN; ++UN)
1046
IdToUNs[Id].push_back(UN);
1047
++MaxUN;
1048
IdToFirstUN.clear();
1049
}
1050
1051
if (RemoveOutlierUNs) {
1052
DenseMap<UtilityNodeT, unsigned> UNFrequency;
1053
for (auto &[Id, UNs] : IdToUNs)
1054
for (auto &UN : UNs)
1055
++UNFrequency[UN];
1056
// Filter out utility nodes that are too infrequent or too prevalent to make
1057
// BalancedPartitioning more effective.
1058
for (auto &[Id, UNs] : IdToUNs)
1059
llvm::erase_if(UNs, [&](auto &UN) {
1060
return UNFrequency[UN] <= 1 || 2 * UNFrequency[UN] > IdToUNs.size();
1061
});
1062
}
1063
1064
for (auto &[Id, UNs] : IdToUNs)
1065
Nodes.emplace_back(Id, UNs);
1066
1067
// Since BalancedPartitioning is sensitive to the initial order, we explicitly
1068
// order nodes by their earliest timestamp.
1069
llvm::sort(Nodes, [&](auto &L, auto &R) {
1070
return std::make_pair(IdToFirstTimestamp[L.Id], L.Id) <
1071
std::make_pair(IdToFirstTimestamp[R.Id], R.Id);
1072
});
1073
}
1074
1075
#define INSTR_PROF_COMMON_API_IMPL
1076
#include "llvm/ProfileData/InstrProfData.inc"
1077
1078
/*!
1079
* ValueProfRecordClosure Interface implementation for InstrProfRecord
1080
* class. These C wrappers are used as adaptors so that C++ code can be
1081
* invoked as callbacks.
1082
*/
1083
uint32_t getNumValueKindsInstrProf(const void *Record) {
1084
return reinterpret_cast<const InstrProfRecord *>(Record)->getNumValueKinds();
1085
}
1086
1087
uint32_t getNumValueSitesInstrProf(const void *Record, uint32_t VKind) {
1088
return reinterpret_cast<const InstrProfRecord *>(Record)
1089
->getNumValueSites(VKind);
1090
}
1091
1092
uint32_t getNumValueDataInstrProf(const void *Record, uint32_t VKind) {
1093
return reinterpret_cast<const InstrProfRecord *>(Record)
1094
->getNumValueData(VKind);
1095
}
1096
1097
uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK,
1098
uint32_t S) {
1099
const auto *IPR = reinterpret_cast<const InstrProfRecord *>(R);
1100
return IPR->getValueArrayForSite(VK, S).size();
1101
}
1102
1103
void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst,
1104
uint32_t K, uint32_t S) {
1105
const auto *IPR = reinterpret_cast<const InstrProfRecord *>(R);
1106
llvm::copy(IPR->getValueArrayForSite(K, S), Dst);
1107
}
1108
1109
ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) {
1110
ValueProfData *VD =
1111
(ValueProfData *)(new (::operator new(TotalSizeInBytes)) ValueProfData());
1112
memset(VD, 0, TotalSizeInBytes);
1113
return VD;
1114
}
1115
1116
static ValueProfRecordClosure InstrProfRecordClosure = {
1117
nullptr,
1118
getNumValueKindsInstrProf,
1119
getNumValueSitesInstrProf,
1120
getNumValueDataInstrProf,
1121
getNumValueDataForSiteInstrProf,
1122
nullptr,
1123
getValueForSiteInstrProf,
1124
allocValueProfDataInstrProf};
1125
1126
// Wrapper implementation using the closure mechanism.
1127
uint32_t ValueProfData::getSize(const InstrProfRecord &Record) {
1128
auto Closure = InstrProfRecordClosure;
1129
Closure.Record = &Record;
1130
return getValueProfDataSize(&Closure);
1131
}
1132
1133
// Wrapper implementation using the closure mechanism.
1134
std::unique_ptr<ValueProfData>
1135
ValueProfData::serializeFrom(const InstrProfRecord &Record) {
1136
InstrProfRecordClosure.Record = &Record;
1137
1138
std::unique_ptr<ValueProfData> VPD(
1139
serializeValueProfDataFrom(&InstrProfRecordClosure, nullptr));
1140
return VPD;
1141
}
1142
1143
void ValueProfRecord::deserializeTo(InstrProfRecord &Record,
1144
InstrProfSymtab *SymTab) {
1145
Record.reserveSites(Kind, NumValueSites);
1146
1147
InstrProfValueData *ValueData = getValueProfRecordValueData(this);
1148
for (uint64_t VSite = 0; VSite < NumValueSites; ++VSite) {
1149
uint8_t ValueDataCount = this->SiteCountArray[VSite];
1150
ArrayRef<InstrProfValueData> VDs(ValueData, ValueDataCount);
1151
Record.addValueData(Kind, VSite, VDs, SymTab);
1152
ValueData += ValueDataCount;
1153
}
1154
}
1155
1156
// For writing/serializing, Old is the host endianness, and New is
1157
// byte order intended on disk. For Reading/deserialization, Old
1158
// is the on-disk source endianness, and New is the host endianness.
1159
void ValueProfRecord::swapBytes(llvm::endianness Old, llvm::endianness New) {
1160
using namespace support;
1161
1162
if (Old == New)
1163
return;
1164
1165
if (llvm::endianness::native != Old) {
1166
sys::swapByteOrder<uint32_t>(NumValueSites);
1167
sys::swapByteOrder<uint32_t>(Kind);
1168
}
1169
uint32_t ND = getValueProfRecordNumValueData(this);
1170
InstrProfValueData *VD = getValueProfRecordValueData(this);
1171
1172
// No need to swap byte array: SiteCountArrray.
1173
for (uint32_t I = 0; I < ND; I++) {
1174
sys::swapByteOrder<uint64_t>(VD[I].Value);
1175
sys::swapByteOrder<uint64_t>(VD[I].Count);
1176
}
1177
if (llvm::endianness::native == Old) {
1178
sys::swapByteOrder<uint32_t>(NumValueSites);
1179
sys::swapByteOrder<uint32_t>(Kind);
1180
}
1181
}
1182
1183
void ValueProfData::deserializeTo(InstrProfRecord &Record,
1184
InstrProfSymtab *SymTab) {
1185
if (NumValueKinds == 0)
1186
return;
1187
1188
ValueProfRecord *VR = getFirstValueProfRecord(this);
1189
for (uint32_t K = 0; K < NumValueKinds; K++) {
1190
VR->deserializeTo(Record, SymTab);
1191
VR = getValueProfRecordNext(VR);
1192
}
1193
}
1194
1195
static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) {
1196
return std::unique_ptr<ValueProfData>(new (::operator new(TotalSize))
1197
ValueProfData());
1198
}
1199
1200
Error ValueProfData::checkIntegrity() {
1201
if (NumValueKinds > IPVK_Last + 1)
1202
return make_error<InstrProfError>(
1203
instrprof_error::malformed, "number of value profile kinds is invalid");
1204
// Total size needs to be multiple of quadword size.
1205
if (TotalSize % sizeof(uint64_t))
1206
return make_error<InstrProfError>(
1207
instrprof_error::malformed, "total size is not multiples of quardword");
1208
1209
ValueProfRecord *VR = getFirstValueProfRecord(this);
1210
for (uint32_t K = 0; K < this->NumValueKinds; K++) {
1211
if (VR->Kind > IPVK_Last)
1212
return make_error<InstrProfError>(instrprof_error::malformed,
1213
"value kind is invalid");
1214
VR = getValueProfRecordNext(VR);
1215
if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize)
1216
return make_error<InstrProfError>(
1217
instrprof_error::malformed,
1218
"value profile address is greater than total size");
1219
}
1220
return Error::success();
1221
}
1222
1223
Expected<std::unique_ptr<ValueProfData>>
1224
ValueProfData::getValueProfData(const unsigned char *D,
1225
const unsigned char *const BufferEnd,
1226
llvm::endianness Endianness) {
1227
using namespace support;
1228
1229
if (D + sizeof(ValueProfData) > BufferEnd)
1230
return make_error<InstrProfError>(instrprof_error::truncated);
1231
1232
const unsigned char *Header = D;
1233
uint32_t TotalSize = endian::readNext<uint32_t>(Header, Endianness);
1234
1235
if (D + TotalSize > BufferEnd)
1236
return make_error<InstrProfError>(instrprof_error::too_large);
1237
1238
std::unique_ptr<ValueProfData> VPD = allocValueProfData(TotalSize);
1239
memcpy(VPD.get(), D, TotalSize);
1240
// Byte swap.
1241
VPD->swapBytesToHost(Endianness);
1242
1243
Error E = VPD->checkIntegrity();
1244
if (E)
1245
return std::move(E);
1246
1247
return std::move(VPD);
1248
}
1249
1250
void ValueProfData::swapBytesToHost(llvm::endianness Endianness) {
1251
using namespace support;
1252
1253
if (Endianness == llvm::endianness::native)
1254
return;
1255
1256
sys::swapByteOrder<uint32_t>(TotalSize);
1257
sys::swapByteOrder<uint32_t>(NumValueKinds);
1258
1259
ValueProfRecord *VR = getFirstValueProfRecord(this);
1260
for (uint32_t K = 0; K < NumValueKinds; K++) {
1261
VR->swapBytes(Endianness, llvm::endianness::native);
1262
VR = getValueProfRecordNext(VR);
1263
}
1264
}
1265
1266
void ValueProfData::swapBytesFromHost(llvm::endianness Endianness) {
1267
using namespace support;
1268
1269
if (Endianness == llvm::endianness::native)
1270
return;
1271
1272
ValueProfRecord *VR = getFirstValueProfRecord(this);
1273
for (uint32_t K = 0; K < NumValueKinds; K++) {
1274
ValueProfRecord *NVR = getValueProfRecordNext(VR);
1275
VR->swapBytes(llvm::endianness::native, Endianness);
1276
VR = NVR;
1277
}
1278
sys::swapByteOrder<uint32_t>(TotalSize);
1279
sys::swapByteOrder<uint32_t>(NumValueKinds);
1280
}
1281
1282
void annotateValueSite(Module &M, Instruction &Inst,
1283
const InstrProfRecord &InstrProfR,
1284
InstrProfValueKind ValueKind, uint32_t SiteIdx,
1285
uint32_t MaxMDCount) {
1286
auto VDs = InstrProfR.getValueArrayForSite(ValueKind, SiteIdx);
1287
if (VDs.empty())
1288
return;
1289
uint64_t Sum = 0;
1290
for (const InstrProfValueData &V : VDs)
1291
Sum = SaturatingAdd(Sum, V.Count);
1292
annotateValueSite(M, Inst, VDs, Sum, ValueKind, MaxMDCount);
1293
}
1294
1295
void annotateValueSite(Module &M, Instruction &Inst,
1296
ArrayRef<InstrProfValueData> VDs,
1297
uint64_t Sum, InstrProfValueKind ValueKind,
1298
uint32_t MaxMDCount) {
1299
if (VDs.empty())
1300
return;
1301
LLVMContext &Ctx = M.getContext();
1302
MDBuilder MDHelper(Ctx);
1303
SmallVector<Metadata *, 3> Vals;
1304
// Tag
1305
Vals.push_back(MDHelper.createString("VP"));
1306
// Value Kind
1307
Vals.push_back(MDHelper.createConstant(
1308
ConstantInt::get(Type::getInt32Ty(Ctx), ValueKind)));
1309
// Total Count
1310
Vals.push_back(
1311
MDHelper.createConstant(ConstantInt::get(Type::getInt64Ty(Ctx), Sum)));
1312
1313
// Value Profile Data
1314
uint32_t MDCount = MaxMDCount;
1315
for (const auto &VD : VDs) {
1316
Vals.push_back(MDHelper.createConstant(
1317
ConstantInt::get(Type::getInt64Ty(Ctx), VD.Value)));
1318
Vals.push_back(MDHelper.createConstant(
1319
ConstantInt::get(Type::getInt64Ty(Ctx), VD.Count)));
1320
if (--MDCount == 0)
1321
break;
1322
}
1323
Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals));
1324
}
1325
1326
MDNode *mayHaveValueProfileOfKind(const Instruction &Inst,
1327
InstrProfValueKind ValueKind) {
1328
MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof);
1329
if (!MD)
1330
return nullptr;
1331
1332
if (MD->getNumOperands() < 5)
1333
return nullptr;
1334
1335
MDString *Tag = cast<MDString>(MD->getOperand(0));
1336
if (!Tag || Tag->getString() != "VP")
1337
return nullptr;
1338
1339
// Now check kind:
1340
ConstantInt *KindInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
1341
if (!KindInt)
1342
return nullptr;
1343
if (KindInt->getZExtValue() != ValueKind)
1344
return nullptr;
1345
1346
return MD;
1347
}
1348
1349
SmallVector<InstrProfValueData, 4>
1350
getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind,
1351
uint32_t MaxNumValueData, uint64_t &TotalC,
1352
bool GetNoICPValue) {
1353
// Four inline elements seem to work well in practice. With MaxNumValueData,
1354
// this array won't grow very big anyway.
1355
SmallVector<InstrProfValueData, 4> ValueData;
1356
MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind);
1357
if (!MD)
1358
return ValueData;
1359
const unsigned NOps = MD->getNumOperands();
1360
// Get total count
1361
ConstantInt *TotalCInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
1362
if (!TotalCInt)
1363
return ValueData;
1364
TotalC = TotalCInt->getZExtValue();
1365
1366
ValueData.reserve((NOps - 3) / 2);
1367
for (unsigned I = 3; I < NOps; I += 2) {
1368
if (ValueData.size() >= MaxNumValueData)
1369
break;
1370
ConstantInt *Value = mdconst::dyn_extract<ConstantInt>(MD->getOperand(I));
1371
ConstantInt *Count =
1372
mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1));
1373
if (!Value || !Count) {
1374
ValueData.clear();
1375
return ValueData;
1376
}
1377
uint64_t CntValue = Count->getZExtValue();
1378
if (!GetNoICPValue && (CntValue == NOMORE_ICP_MAGICNUM))
1379
continue;
1380
InstrProfValueData V;
1381
V.Value = Value->getZExtValue();
1382
V.Count = CntValue;
1383
ValueData.push_back(V);
1384
}
1385
return ValueData;
1386
}
1387
1388
MDNode *getPGOFuncNameMetadata(const Function &F) {
1389
return F.getMetadata(getPGOFuncNameMetadataName());
1390
}
1391
1392
static void createPGONameMetadata(GlobalObject &GO, StringRef MetadataName,
1393
StringRef PGOName) {
1394
// Only for internal linkage functions or global variables. The name is not
1395
// the same as PGO name for these global objects.
1396
if (GO.getName() == PGOName)
1397
return;
1398
1399
// Don't create duplicated metadata.
1400
if (GO.getMetadata(MetadataName))
1401
return;
1402
1403
LLVMContext &C = GO.getContext();
1404
MDNode *N = MDNode::get(C, MDString::get(C, PGOName));
1405
GO.setMetadata(MetadataName, N);
1406
}
1407
1408
void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
1409
return createPGONameMetadata(F, getPGOFuncNameMetadataName(), PGOFuncName);
1410
}
1411
1412
void createPGONameMetadata(GlobalObject &GO, StringRef PGOName) {
1413
return createPGONameMetadata(GO, getPGONameMetadataName(), PGOName);
1414
}
1415
1416
bool needsComdatForCounter(const GlobalObject &GO, const Module &M) {
1417
if (GO.hasComdat())
1418
return true;
1419
1420
if (!Triple(M.getTargetTriple()).supportsCOMDAT())
1421
return false;
1422
1423
// See createPGOFuncNameVar for more details. To avoid link errors, profile
1424
// counters for function with available_externally linkage needs to be changed
1425
// to linkonce linkage. On ELF based systems, this leads to weak symbols to be
1426
// created. Without using comdat, duplicate entries won't be removed by the
1427
// linker leading to increased data segement size and raw profile size. Even
1428
// worse, since the referenced counter from profile per-function data object
1429
// will be resolved to the common strong definition, the profile counts for
1430
// available_externally functions will end up being duplicated in raw profile
1431
// data. This can result in distorted profile as the counts of those dups
1432
// will be accumulated by the profile merger.
1433
GlobalValue::LinkageTypes Linkage = GO.getLinkage();
1434
if (Linkage != GlobalValue::ExternalWeakLinkage &&
1435
Linkage != GlobalValue::AvailableExternallyLinkage)
1436
return false;
1437
1438
return true;
1439
}
1440
1441
// Check if INSTR_PROF_RAW_VERSION_VAR is defined.
1442
bool isIRPGOFlagSet(const Module *M) {
1443
const GlobalVariable *IRInstrVar =
1444
M->getNamedGlobal(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
1445
if (!IRInstrVar || IRInstrVar->hasLocalLinkage())
1446
return false;
1447
1448
// For CSPGO+LTO, this variable might be marked as non-prevailing and we only
1449
// have the decl.
1450
if (IRInstrVar->isDeclaration())
1451
return true;
1452
1453
// Check if the flag is set.
1454
if (!IRInstrVar->hasInitializer())
1455
return false;
1456
1457
auto *InitVal = dyn_cast_or_null<ConstantInt>(IRInstrVar->getInitializer());
1458
if (!InitVal)
1459
return false;
1460
return (InitVal->getZExtValue() & VARIANT_MASK_IR_PROF) != 0;
1461
}
1462
1463
// Check if we can safely rename this Comdat function.
1464
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
1465
if (F.getName().empty())
1466
return false;
1467
if (!needsComdatForCounter(F, *(F.getParent())))
1468
return false;
1469
// Unsafe to rename the address-taken function (which can be used in
1470
// function comparison).
1471
if (CheckAddressTaken && F.hasAddressTaken())
1472
return false;
1473
// Only safe to do if this function may be discarded if it is not used
1474
// in the compilation unit.
1475
if (!GlobalValue::isDiscardableIfUnused(F.getLinkage()))
1476
return false;
1477
1478
// For AvailableExternallyLinkage functions.
1479
if (!F.hasComdat()) {
1480
assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
1481
return true;
1482
}
1483
return true;
1484
}
1485
1486
// Create the variable for the profile file name.
1487
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
1488
if (InstrProfileOutput.empty())
1489
return;
1490
Constant *ProfileNameConst =
1491
ConstantDataArray::getString(M.getContext(), InstrProfileOutput, true);
1492
GlobalVariable *ProfileNameVar = new GlobalVariable(
1493
M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
1494
ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
1495
ProfileNameVar->setVisibility(GlobalValue::HiddenVisibility);
1496
Triple TT(M.getTargetTriple());
1497
if (TT.supportsCOMDAT()) {
1498
ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
1499
ProfileNameVar->setComdat(M.getOrInsertComdat(
1500
StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
1501
}
1502
}
1503
1504
Error OverlapStats::accumulateCounts(const std::string &BaseFilename,
1505
const std::string &TestFilename,
1506
bool IsCS) {
1507
auto GetProfileSum = [IsCS](const std::string &Filename,
1508
CountSumOrPercent &Sum) -> Error {
1509
// This function is only used from llvm-profdata that doesn't use any kind
1510
// of VFS. Just create a default RealFileSystem to read profiles.
1511
auto FS = vfs::getRealFileSystem();
1512
auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
1513
if (Error E = ReaderOrErr.takeError()) {
1514
return E;
1515
}
1516
auto Reader = std::move(ReaderOrErr.get());
1517
Reader->accumulateCounts(Sum, IsCS);
1518
return Error::success();
1519
};
1520
auto Ret = GetProfileSum(BaseFilename, Base);
1521
if (Ret)
1522
return Ret;
1523
Ret = GetProfileSum(TestFilename, Test);
1524
if (Ret)
1525
return Ret;
1526
this->BaseFilename = &BaseFilename;
1527
this->TestFilename = &TestFilename;
1528
Valid = true;
1529
return Error::success();
1530
}
1531
1532
void OverlapStats::addOneMismatch(const CountSumOrPercent &MismatchFunc) {
1533
Mismatch.NumEntries += 1;
1534
Mismatch.CountSum += MismatchFunc.CountSum / Test.CountSum;
1535
for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
1536
if (Test.ValueCounts[I] >= 1.0f)
1537
Mismatch.ValueCounts[I] +=
1538
MismatchFunc.ValueCounts[I] / Test.ValueCounts[I];
1539
}
1540
}
1541
1542
void OverlapStats::addOneUnique(const CountSumOrPercent &UniqueFunc) {
1543
Unique.NumEntries += 1;
1544
Unique.CountSum += UniqueFunc.CountSum / Test.CountSum;
1545
for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
1546
if (Test.ValueCounts[I] >= 1.0f)
1547
Unique.ValueCounts[I] += UniqueFunc.ValueCounts[I] / Test.ValueCounts[I];
1548
}
1549
}
1550
1551
void OverlapStats::dump(raw_fd_ostream &OS) const {
1552
if (!Valid)
1553
return;
1554
1555
const char *EntryName =
1556
(Level == ProgramLevel ? "functions" : "edge counters");
1557
if (Level == ProgramLevel) {
1558
OS << "Profile overlap infomation for base_profile: " << *BaseFilename
1559
<< " and test_profile: " << *TestFilename << "\nProgram level:\n";
1560
} else {
1561
OS << "Function level:\n"
1562
<< " Function: " << FuncName << " (Hash=" << FuncHash << ")\n";
1563
}
1564
1565
OS << " # of " << EntryName << " overlap: " << Overlap.NumEntries << "\n";
1566
if (Mismatch.NumEntries)
1567
OS << " # of " << EntryName << " mismatch: " << Mismatch.NumEntries
1568
<< "\n";
1569
if (Unique.NumEntries)
1570
OS << " # of " << EntryName
1571
<< " only in test_profile: " << Unique.NumEntries << "\n";
1572
1573
OS << " Edge profile overlap: " << format("%.3f%%", Overlap.CountSum * 100)
1574
<< "\n";
1575
if (Mismatch.NumEntries)
1576
OS << " Mismatched count percentage (Edge): "
1577
<< format("%.3f%%", Mismatch.CountSum * 100) << "\n";
1578
if (Unique.NumEntries)
1579
OS << " Percentage of Edge profile only in test_profile: "
1580
<< format("%.3f%%", Unique.CountSum * 100) << "\n";
1581
OS << " Edge profile base count sum: " << format("%.0f", Base.CountSum)
1582
<< "\n"
1583
<< " Edge profile test count sum: " << format("%.0f", Test.CountSum)
1584
<< "\n";
1585
1586
for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
1587
if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f)
1588
continue;
1589
char ProfileKindName[20] = {0};
1590
switch (I) {
1591
case IPVK_IndirectCallTarget:
1592
strncpy(ProfileKindName, "IndirectCall", 19);
1593
break;
1594
case IPVK_MemOPSize:
1595
strncpy(ProfileKindName, "MemOP", 19);
1596
break;
1597
case IPVK_VTableTarget:
1598
strncpy(ProfileKindName, "VTable", 19);
1599
break;
1600
default:
1601
snprintf(ProfileKindName, 19, "VP[%d]", I);
1602
break;
1603
}
1604
OS << " " << ProfileKindName
1605
<< " profile overlap: " << format("%.3f%%", Overlap.ValueCounts[I] * 100)
1606
<< "\n";
1607
if (Mismatch.NumEntries)
1608
OS << " Mismatched count percentage (" << ProfileKindName
1609
<< "): " << format("%.3f%%", Mismatch.ValueCounts[I] * 100) << "\n";
1610
if (Unique.NumEntries)
1611
OS << " Percentage of " << ProfileKindName
1612
<< " profile only in test_profile: "
1613
<< format("%.3f%%", Unique.ValueCounts[I] * 100) << "\n";
1614
OS << " " << ProfileKindName
1615
<< " profile base count sum: " << format("%.0f", Base.ValueCounts[I])
1616
<< "\n"
1617
<< " " << ProfileKindName
1618
<< " profile test count sum: " << format("%.0f", Test.ValueCounts[I])
1619
<< "\n";
1620
}
1621
}
1622
1623
namespace IndexedInstrProf {
1624
Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
1625
using namespace support;
1626
static_assert(std::is_standard_layout_v<Header>,
1627
"Use standard layout for Header for simplicity");
1628
Header H;
1629
1630
H.Magic = endian::readNext<uint64_t, llvm::endianness::little>(Buffer);
1631
// Check the magic number.
1632
if (H.Magic != IndexedInstrProf::Magic)
1633
return make_error<InstrProfError>(instrprof_error::bad_magic);
1634
1635
// Read the version.
1636
H.Version = endian::readNext<uint64_t, llvm::endianness::little>(Buffer);
1637
if (H.getIndexedProfileVersion() >
1638
IndexedInstrProf::ProfVersion::CurrentVersion)
1639
return make_error<InstrProfError>(instrprof_error::unsupported_version);
1640
1641
static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
1642
"Please update the reader as needed when a new field is added "
1643
"or when indexed profile version gets bumped.");
1644
1645
Buffer += sizeof(uint64_t); // Skip Header.Unused field.
1646
H.HashType = endian::readNext<uint64_t, llvm::endianness::little>(Buffer);
1647
H.HashOffset = endian::readNext<uint64_t, llvm::endianness::little>(Buffer);
1648
if (H.getIndexedProfileVersion() >= 8)
1649
H.MemProfOffset =
1650
endian::readNext<uint64_t, llvm::endianness::little>(Buffer);
1651
if (H.getIndexedProfileVersion() >= 9)
1652
H.BinaryIdOffset =
1653
endian::readNext<uint64_t, llvm::endianness::little>(Buffer);
1654
// Version 11 is handled by this condition.
1655
if (H.getIndexedProfileVersion() >= 10)
1656
H.TemporalProfTracesOffset =
1657
endian::readNext<uint64_t, llvm::endianness::little>(Buffer);
1658
if (H.getIndexedProfileVersion() >= 12)
1659
H.VTableNamesOffset =
1660
endian::readNext<uint64_t, llvm::endianness::little>(Buffer);
1661
return H;
1662
}
1663
1664
uint64_t Header::getIndexedProfileVersion() const {
1665
return GET_VERSION(Version);
1666
}
1667
1668
size_t Header::size() const {
1669
switch (getIndexedProfileVersion()) {
1670
// To retain backward compatibility, new fields must be appended to the end
1671
// of the header, and byte offset of existing fields shouldn't change when
1672
// indexed profile version gets incremented.
1673
static_assert(
1674
IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
1675
"Please update the size computation below if a new field has "
1676
"been added to the header; for a version bump without new "
1677
"fields, add a case statement to fall through to the latest version.");
1678
case 12ull:
1679
return 72;
1680
case 11ull:
1681
[[fallthrough]];
1682
case 10ull:
1683
return 64;
1684
case 9ull:
1685
return 56;
1686
case 8ull:
1687
return 48;
1688
default: // Version7 (when the backwards compatible header was introduced).
1689
return 40;
1690
}
1691
}
1692
1693
} // namespace IndexedInstrProf
1694
1695
} // end namespace llvm
1696
1697