Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp
35258 views
1
//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// llvm-profdata merges .profdata files.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/ADT/SmallSet.h"
14
#include "llvm/ADT/SmallVector.h"
15
#include "llvm/ADT/StringRef.h"
16
#include "llvm/IR/LLVMContext.h"
17
#include "llvm/Object/Binary.h"
18
#include "llvm/ProfileData/InstrProfCorrelator.h"
19
#include "llvm/ProfileData/InstrProfReader.h"
20
#include "llvm/ProfileData/InstrProfWriter.h"
21
#include "llvm/ProfileData/MemProf.h"
22
#include "llvm/ProfileData/MemProfReader.h"
23
#include "llvm/ProfileData/ProfileCommon.h"
24
#include "llvm/ProfileData/SampleProfReader.h"
25
#include "llvm/ProfileData/SampleProfWriter.h"
26
#include "llvm/Support/BalancedPartitioning.h"
27
#include "llvm/Support/CommandLine.h"
28
#include "llvm/Support/Discriminator.h"
29
#include "llvm/Support/Errc.h"
30
#include "llvm/Support/FileSystem.h"
31
#include "llvm/Support/Format.h"
32
#include "llvm/Support/FormattedStream.h"
33
#include "llvm/Support/LLVMDriver.h"
34
#include "llvm/Support/MD5.h"
35
#include "llvm/Support/MemoryBuffer.h"
36
#include "llvm/Support/Path.h"
37
#include "llvm/Support/Regex.h"
38
#include "llvm/Support/ThreadPool.h"
39
#include "llvm/Support/Threading.h"
40
#include "llvm/Support/VirtualFileSystem.h"
41
#include "llvm/Support/WithColor.h"
42
#include "llvm/Support/raw_ostream.h"
43
#include <algorithm>
44
#include <cmath>
45
#include <optional>
46
#include <queue>
47
48
using namespace llvm;
49
using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind;
50
51
// https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
52
// on each subcommand.
53
cl::SubCommand ShowSubcommand(
54
"show",
55
"Takes a profile data file and displays the profiles. See detailed "
56
"documentation in "
57
"https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
58
cl::SubCommand OrderSubcommand(
59
"order",
60
"Reads temporal profiling traces from a profile and outputs a function "
61
"order that reduces the number of page faults for those traces. See "
62
"detailed documentation in "
63
"https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
64
cl::SubCommand OverlapSubcommand(
65
"overlap",
66
"Computes and displays the overlap between two profiles. See detailed "
67
"documentation in "
68
"https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
69
cl::SubCommand MergeSubcommand(
70
"merge",
71
"Takes several profiles and merge them together. See detailed "
72
"documentation in "
73
"https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
74
75
namespace {
76
enum ProfileKinds { instr, sample, memory };
77
enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
78
79
enum ProfileFormat {
80
PF_None = 0,
81
PF_Text,
82
PF_Compact_Binary, // Deprecated
83
PF_Ext_Binary,
84
PF_GCC,
85
PF_Binary
86
};
87
88
enum class ShowFormat { Text, Json, Yaml };
89
} // namespace
90
91
// Common options.
92
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
93
cl::init("-"), cl::desc("Output file"),
94
cl::sub(ShowSubcommand),
95
cl::sub(OrderSubcommand),
96
cl::sub(OverlapSubcommand),
97
cl::sub(MergeSubcommand));
98
// NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
99
// will be used. llvm::cl::alias::done() method asserts this condition.
100
cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
101
cl::aliasopt(OutputFilename));
102
103
// Options common to at least two commands.
104
cl::opt<ProfileKinds> ProfileKind(
105
cl::desc("Profile kind:"), cl::sub(MergeSubcommand),
106
cl::sub(OverlapSubcommand), cl::init(instr),
107
cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
108
clEnumVal(sample, "Sample profile")));
109
cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"),
110
cl::sub(ShowSubcommand),
111
cl::sub(OrderSubcommand));
112
cl::opt<unsigned> MaxDbgCorrelationWarnings(
113
"max-debug-info-correlation-warnings",
114
cl::desc("The maximum number of warnings to emit when correlating "
115
"profile from debug info (0 = no limit)"),
116
cl::sub(MergeSubcommand), cl::sub(ShowSubcommand), cl::init(5));
117
cl::opt<std::string> ProfiledBinary(
118
"profiled-binary", cl::init(""),
119
cl::desc("Path to binary from which the profile was collected."),
120
cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
121
cl::opt<std::string> DebugInfoFilename(
122
"debug-info", cl::init(""),
123
cl::desc(
124
"For show, read and extract profile metadata from debug info and show "
125
"the functions it found. For merge, use the provided debug info to "
126
"correlate the raw profile."),
127
cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
128
cl::opt<std::string>
129
BinaryFilename("binary-file", cl::init(""),
130
cl::desc("For merge, use the provided unstripped bianry to "
131
"correlate the raw profile."),
132
cl::sub(MergeSubcommand));
133
cl::opt<std::string> FuncNameFilter(
134
"function",
135
cl::desc("Only functions matching the filter are shown in the output. For "
136
"overlapping CSSPGO, this takes a function name with calling "
137
"context."),
138
cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
139
cl::sub(MergeSubcommand));
140
141
// TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
142
// factor out the common cl::sub in cl::opt constructor for subcommand-specific
143
// options.
144
145
// Options specific to merge subcommand.
146
cl::list<std::string> InputFilenames(cl::Positional, cl::sub(MergeSubcommand),
147
cl::desc("<filename...>"));
148
cl::list<std::string> WeightedInputFilenames("weighted-input",
149
cl::sub(MergeSubcommand),
150
cl::desc("<weight>,<filename>"));
151
cl::opt<ProfileFormat> OutputFormat(
152
cl::desc("Format of output profile"), cl::sub(MergeSubcommand),
153
cl::init(PF_Ext_Binary),
154
cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"),
155
clEnumValN(PF_Ext_Binary, "extbinary",
156
"Extensible binary encoding "
157
"(default)"),
158
clEnumValN(PF_Text, "text", "Text encoding"),
159
clEnumValN(PF_GCC, "gcc",
160
"GCC encoding (only meaningful for -sample)")));
161
cl::opt<std::string>
162
InputFilenamesFile("input-files", cl::init(""), cl::sub(MergeSubcommand),
163
cl::desc("Path to file containing newline-separated "
164
"[<weight>,]<filename> entries"));
165
cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
166
cl::aliasopt(InputFilenamesFile));
167
cl::opt<bool> DumpInputFileList(
168
"dump-input-file-list", cl::init(false), cl::Hidden,
169
cl::sub(MergeSubcommand),
170
cl::desc("Dump the list of input files and their weights, then exit"));
171
cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
172
cl::sub(MergeSubcommand),
173
cl::desc("Symbol remapping file"));
174
cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
175
cl::aliasopt(RemappingFile));
176
cl::opt<bool>
177
UseMD5("use-md5", cl::init(false), cl::Hidden,
178
cl::desc("Choose to use MD5 to represent string in name table (only "
179
"meaningful for -extbinary)"),
180
cl::sub(MergeSubcommand));
181
cl::opt<bool> CompressAllSections(
182
"compress-all-sections", cl::init(false), cl::Hidden,
183
cl::sub(MergeSubcommand),
184
cl::desc("Compress all sections when writing the profile (only "
185
"meaningful for -extbinary)"));
186
cl::opt<bool> SampleMergeColdContext(
187
"sample-merge-cold-context", cl::init(false), cl::Hidden,
188
cl::sub(MergeSubcommand),
189
cl::desc(
190
"Merge context sample profiles whose count is below cold threshold"));
191
cl::opt<bool> SampleTrimColdContext(
192
"sample-trim-cold-context", cl::init(false), cl::Hidden,
193
cl::sub(MergeSubcommand),
194
cl::desc(
195
"Trim context sample profiles whose count is below cold threshold"));
196
cl::opt<uint32_t> SampleColdContextFrameDepth(
197
"sample-frame-depth-for-cold-context", cl::init(1),
198
cl::sub(MergeSubcommand),
199
cl::desc("Keep the last K frames while merging cold profile. 1 means the "
200
"context-less base profile"));
201
cl::opt<size_t> OutputSizeLimit(
202
"output-size-limit", cl::init(0), cl::Hidden, cl::sub(MergeSubcommand),
203
cl::desc("Trim cold functions until profile size is below specified "
204
"limit in bytes. This uses a heursitic and functions may be "
205
"excessively trimmed"));
206
cl::opt<bool> GenPartialProfile(
207
"gen-partial-profile", cl::init(false), cl::Hidden,
208
cl::sub(MergeSubcommand),
209
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
210
cl::opt<std::string> SupplInstrWithSample(
211
"supplement-instr-with-sample", cl::init(""), cl::Hidden,
212
cl::sub(MergeSubcommand),
213
cl::desc("Supplement an instr profile with sample profile, to correct "
214
"the profile unrepresentativeness issue. The sample "
215
"profile is the input of the flag. Output will be in instr "
216
"format (The flag only works with -instr)"));
217
cl::opt<float> ZeroCounterThreshold(
218
"zero-counter-threshold", cl::init(0.7), cl::Hidden,
219
cl::sub(MergeSubcommand),
220
cl::desc("For the function which is cold in instr profile but hot in "
221
"sample profile, if the ratio of the number of zero counters "
222
"divided by the total number of counters is above the "
223
"threshold, the profile of the function will be regarded as "
224
"being harmful for performance and will be dropped."));
225
cl::opt<unsigned> SupplMinSizeThreshold(
226
"suppl-min-size-threshold", cl::init(10), cl::Hidden,
227
cl::sub(MergeSubcommand),
228
cl::desc("If the size of a function is smaller than the threshold, "
229
"assume it can be inlined by PGO early inliner and it won't "
230
"be adjusted based on sample profile."));
231
cl::opt<unsigned> InstrProfColdThreshold(
232
"instr-prof-cold-threshold", cl::init(0), cl::Hidden,
233
cl::sub(MergeSubcommand),
234
cl::desc("User specified cold threshold for instr profile which will "
235
"override the cold threshold got from profile summary. "));
236
// WARNING: This reservoir size value is propagated to any input indexed
237
// profiles for simplicity. Changing this value between invocations could
238
// result in sample bias.
239
cl::opt<uint64_t> TemporalProfTraceReservoirSize(
240
"temporal-profile-trace-reservoir-size", cl::init(100),
241
cl::sub(MergeSubcommand),
242
cl::desc("The maximum number of stored temporal profile traces (default: "
243
"100)"));
244
cl::opt<uint64_t> TemporalProfMaxTraceLength(
245
"temporal-profile-max-trace-length", cl::init(10000),
246
cl::sub(MergeSubcommand),
247
cl::desc("The maximum length of a single temporal profile trace "
248
"(default: 10000)"));
249
cl::opt<std::string> FuncNameNegativeFilter(
250
"no-function", cl::init(""),
251
cl::sub(MergeSubcommand),
252
cl::desc("Exclude functions matching the filter from the output."));
253
254
cl::opt<FailureMode>
255
FailMode("failure-mode", cl::init(failIfAnyAreInvalid),
256
cl::desc("Failure mode:"), cl::sub(MergeSubcommand),
257
cl::values(clEnumValN(warnOnly, "warn",
258
"Do not fail and just print warnings."),
259
clEnumValN(failIfAnyAreInvalid, "any",
260
"Fail if any profile is invalid."),
261
clEnumValN(failIfAllAreInvalid, "all",
262
"Fail only if all profiles are invalid.")));
263
264
cl::opt<bool> OutputSparse(
265
"sparse", cl::init(false), cl::sub(MergeSubcommand),
266
cl::desc("Generate a sparse profile (only meaningful for -instr)"));
267
cl::opt<unsigned> NumThreads(
268
"num-threads", cl::init(0), cl::sub(MergeSubcommand),
269
cl::desc("Number of merge threads to use (default: autodetect)"));
270
cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
271
cl::aliasopt(NumThreads));
272
273
cl::opt<std::string> ProfileSymbolListFile(
274
"prof-sym-list", cl::init(""), cl::sub(MergeSubcommand),
275
cl::desc("Path to file containing the list of function symbols "
276
"used to populate profile symbol list"));
277
278
cl::opt<SampleProfileLayout> ProfileLayout(
279
"convert-sample-profile-layout",
280
cl::desc("Convert the generated profile to a profile with a new layout"),
281
cl::sub(MergeSubcommand), cl::init(SPL_None),
282
cl::values(
283
clEnumValN(SPL_Nest, "nest",
284
"Nested profile, the input should be CS flat profile"),
285
clEnumValN(SPL_Flat, "flat",
286
"Profile with nested inlinee flatten out")));
287
288
cl::opt<bool> DropProfileSymbolList(
289
"drop-profile-symbol-list", cl::init(false), cl::Hidden,
290
cl::sub(MergeSubcommand),
291
cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
292
"(only meaningful for -sample)"));
293
294
cl::opt<bool> KeepVTableSymbols(
295
"keep-vtable-symbols", cl::init(false), cl::Hidden,
296
cl::sub(MergeSubcommand),
297
cl::desc("If true, keep the vtable symbols in indexed profiles"));
298
299
// Temporary support for writing the previous version of the format, to enable
300
// some forward compatibility.
301
// TODO: Consider enabling this with future version changes as well, to ease
302
// deployment of newer versions of llvm-profdata.
303
cl::opt<bool> DoWritePrevVersion(
304
"write-prev-version", cl::init(false), cl::Hidden,
305
cl::desc("Write the previous version of indexed format, to enable "
306
"some forward compatibility."));
307
308
cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
309
"memprof-version", cl::Hidden, cl::sub(MergeSubcommand),
310
cl::desc("Specify the version of the memprof format to use"),
311
cl::init(memprof::Version0),
312
cl::values(clEnumValN(memprof::Version0, "0", "version 0"),
313
clEnumValN(memprof::Version1, "1", "version 1"),
314
clEnumValN(memprof::Version2, "2", "version 2"),
315
clEnumValN(memprof::Version3, "3", "version 3")));
316
317
cl::opt<bool> MemProfFullSchema(
318
"memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
319
cl::desc("Use the full schema for serialization"), cl::init(false));
320
321
// Options specific to overlap subcommand.
322
cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
323
cl::desc("<base profile file>"),
324
cl::sub(OverlapSubcommand));
325
cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
326
cl::desc("<test profile file>"),
327
cl::sub(OverlapSubcommand));
328
329
cl::opt<unsigned long long> SimilarityCutoff(
330
"similarity-cutoff", cl::init(0),
331
cl::desc("For sample profiles, list function names (with calling context "
332
"for csspgo) for overlapped functions "
333
"with similarities below the cutoff (percentage times 10000)."),
334
cl::sub(OverlapSubcommand));
335
336
cl::opt<bool> IsCS(
337
"cs", cl::init(false),
338
cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
339
cl::sub(OverlapSubcommand));
340
341
cl::opt<unsigned long long> OverlapValueCutoff(
342
"value-cutoff", cl::init(-1),
343
cl::desc(
344
"Function level overlap information for every function (with calling "
345
"context for csspgo) in test "
346
"profile with max count value greater then the parameter value"),
347
cl::sub(OverlapSubcommand));
348
349
// Options specific to show subcommand.
350
cl::opt<bool> ShowCounts("counts", cl::init(false),
351
cl::desc("Show counter values for shown functions"),
352
cl::sub(ShowSubcommand));
353
cl::opt<ShowFormat>
354
SFormat("show-format", cl::init(ShowFormat::Text),
355
cl::desc("Emit output in the selected format if supported"),
356
cl::sub(ShowSubcommand),
357
cl::values(clEnumValN(ShowFormat::Text, "text",
358
"emit normal text output (default)"),
359
clEnumValN(ShowFormat::Json, "json", "emit JSON"),
360
clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
361
// TODO: Consider replacing this with `--show-format=text-encoding`.
362
cl::opt<bool>
363
TextFormat("text", cl::init(false),
364
cl::desc("Show instr profile data in text dump format"),
365
cl::sub(ShowSubcommand));
366
cl::opt<bool>
367
JsonFormat("json",
368
cl::desc("Show sample profile data in the JSON format "
369
"(deprecated, please use --show-format=json)"),
370
cl::sub(ShowSubcommand));
371
cl::opt<bool> ShowIndirectCallTargets(
372
"ic-targets", cl::init(false),
373
cl::desc("Show indirect call site target values for shown functions"),
374
cl::sub(ShowSubcommand));
375
cl::opt<bool> ShowVTables("show-vtables", cl::init(false),
376
cl::desc("Show vtable names for shown functions"),
377
cl::sub(ShowSubcommand));
378
cl::opt<bool> ShowMemOPSizes(
379
"memop-sizes", cl::init(false),
380
cl::desc("Show the profiled sizes of the memory intrinsic calls "
381
"for shown functions"),
382
cl::sub(ShowSubcommand));
383
cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
384
cl::desc("Show detailed profile summary"),
385
cl::sub(ShowSubcommand));
386
cl::list<uint32_t> DetailedSummaryCutoffs(
387
cl::CommaSeparated, "detailed-summary-cutoffs",
388
cl::desc(
389
"Cutoff percentages (times 10000) for generating detailed summary"),
390
cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand));
391
cl::opt<bool>
392
ShowHotFuncList("hot-func-list", cl::init(false),
393
cl::desc("Show profile summary of a list of hot functions"),
394
cl::sub(ShowSubcommand));
395
cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
396
cl::desc("Details for each and every function"),
397
cl::sub(ShowSubcommand));
398
cl::opt<bool> ShowCS("showcs", cl::init(false),
399
cl::desc("Show context sensitive counts"),
400
cl::sub(ShowSubcommand));
401
cl::opt<ProfileKinds> ShowProfileKind(
402
cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand),
403
cl::init(instr),
404
cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
405
clEnumVal(sample, "Sample profile"),
406
clEnumVal(memory, "MemProf memory access profile")));
407
cl::opt<uint32_t> TopNFunctions(
408
"topn", cl::init(0),
409
cl::desc("Show the list of functions with the largest internal counts"),
410
cl::sub(ShowSubcommand));
411
cl::opt<uint32_t> ShowValueCutoff(
412
"value-cutoff", cl::init(0),
413
cl::desc("Set the count value cutoff. Functions with the maximum count "
414
"less than this value will not be printed out. (Default is 0)"),
415
cl::sub(ShowSubcommand));
416
cl::opt<bool> OnlyListBelow(
417
"list-below-cutoff", cl::init(false),
418
cl::desc("Only output names of functions whose max count values are "
419
"below the cutoff value"),
420
cl::sub(ShowSubcommand));
421
cl::opt<bool> ShowProfileSymbolList(
422
"show-prof-sym-list", cl::init(false),
423
cl::desc("Show profile symbol list if it exists in the profile. "),
424
cl::sub(ShowSubcommand));
425
cl::opt<bool> ShowSectionInfoOnly(
426
"show-sec-info-only", cl::init(false),
427
cl::desc("Show the information of each section in the sample profile. "
428
"The flag is only usable when the sample profile is in "
429
"extbinary format"),
430
cl::sub(ShowSubcommand));
431
cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false),
432
cl::desc("Show binary ids in the profile. "),
433
cl::sub(ShowSubcommand));
434
cl::opt<bool> ShowTemporalProfTraces(
435
"temporal-profile-traces",
436
cl::desc("Show temporal profile traces in the profile."),
437
cl::sub(ShowSubcommand));
438
439
cl::opt<bool>
440
ShowCovered("covered", cl::init(false),
441
cl::desc("Show only the functions that have been executed."),
442
cl::sub(ShowSubcommand));
443
444
cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false),
445
cl::desc("Show profile version. "),
446
cl::sub(ShowSubcommand));
447
448
// Options specific to order subcommand.
449
cl::opt<unsigned>
450
NumTestTraces("num-test-traces", cl::init(0),
451
cl::desc("Keep aside the last <num-test-traces> traces in "
452
"the profile when computing the function order and "
453
"instead use them to evaluate that order"),
454
cl::sub(OrderSubcommand));
455
456
// We use this string to indicate that there are
457
// multiple static functions map to the same name.
458
const std::string DuplicateNameStr = "----";
459
460
static void warn(Twine Message, StringRef Whence = "", StringRef Hint = "") {
461
WithColor::warning();
462
if (!Whence.empty())
463
errs() << Whence << ": ";
464
errs() << Message << "\n";
465
if (!Hint.empty())
466
WithColor::note() << Hint << "\n";
467
}
468
469
static void warn(Error E, StringRef Whence = "") {
470
if (E.isA<InstrProfError>()) {
471
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
472
warn(IPE.message(), Whence);
473
});
474
}
475
}
476
477
static void exitWithError(Twine Message, StringRef Whence = "",
478
StringRef Hint = "") {
479
WithColor::error();
480
if (!Whence.empty())
481
errs() << Whence << ": ";
482
errs() << Message << "\n";
483
if (!Hint.empty())
484
WithColor::note() << Hint << "\n";
485
::exit(1);
486
}
487
488
static void exitWithError(Error E, StringRef Whence = "") {
489
if (E.isA<InstrProfError>()) {
490
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
491
instrprof_error instrError = IPE.get();
492
StringRef Hint = "";
493
if (instrError == instrprof_error::unrecognized_format) {
494
// Hint in case user missed specifying the profile type.
495
Hint = "Perhaps you forgot to use the --sample or --memory option?";
496
}
497
exitWithError(IPE.message(), Whence, Hint);
498
});
499
return;
500
}
501
502
exitWithError(toString(std::move(E)), Whence);
503
}
504
505
static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
506
exitWithError(EC.message(), Whence);
507
}
508
509
static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
510
StringRef Whence = "") {
511
if (FailMode == failIfAnyAreInvalid)
512
exitWithErrorCode(EC, Whence);
513
else
514
warn(EC.message(), Whence);
515
}
516
517
static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
518
StringRef WhenceFunction = "",
519
bool ShowHint = true) {
520
if (!WhenceFile.empty())
521
errs() << WhenceFile << ": ";
522
if (!WhenceFunction.empty())
523
errs() << WhenceFunction << ": ";
524
525
auto IPE = instrprof_error::success;
526
E = handleErrors(std::move(E),
527
[&IPE](std::unique_ptr<InstrProfError> E) -> Error {
528
IPE = E->get();
529
return Error(std::move(E));
530
});
531
errs() << toString(std::move(E)) << "\n";
532
533
if (ShowHint) {
534
StringRef Hint = "";
535
if (IPE != instrprof_error::success) {
536
switch (IPE) {
537
case instrprof_error::hash_mismatch:
538
case instrprof_error::count_mismatch:
539
case instrprof_error::value_site_count_mismatch:
540
Hint = "Make sure that all profile data to be merged is generated "
541
"from the same binary.";
542
break;
543
default:
544
break;
545
}
546
}
547
548
if (!Hint.empty())
549
errs() << Hint << "\n";
550
}
551
}
552
553
namespace {
554
/// A remapper from original symbol names to new symbol names based on a file
555
/// containing a list of mappings from old name to new name.
556
class SymbolRemapper {
557
std::unique_ptr<MemoryBuffer> File;
558
DenseMap<StringRef, StringRef> RemappingTable;
559
560
public:
561
/// Build a SymbolRemapper from a file containing a list of old/new symbols.
562
static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
563
auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
564
if (!BufOrError)
565
exitWithErrorCode(BufOrError.getError(), InputFile);
566
567
auto Remapper = std::make_unique<SymbolRemapper>();
568
Remapper->File = std::move(BufOrError.get());
569
570
for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
571
!LineIt.is_at_eof(); ++LineIt) {
572
std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
573
if (Parts.first.empty() || Parts.second.empty() ||
574
Parts.second.count(' ')) {
575
exitWithError("unexpected line in remapping file",
576
(InputFile + ":" + Twine(LineIt.line_number())).str(),
577
"expected 'old_symbol new_symbol'");
578
}
579
Remapper->RemappingTable.insert(Parts);
580
}
581
return Remapper;
582
}
583
584
/// Attempt to map the given old symbol into a new symbol.
585
///
586
/// \return The new symbol, or \p Name if no such symbol was found.
587
StringRef operator()(StringRef Name) {
588
StringRef New = RemappingTable.lookup(Name);
589
return New.empty() ? Name : New;
590
}
591
592
FunctionId operator()(FunctionId Name) {
593
// MD5 name cannot be remapped.
594
if (!Name.isStringRef())
595
return Name;
596
StringRef New = RemappingTable.lookup(Name.stringRef());
597
return New.empty() ? Name : FunctionId(New);
598
}
599
};
600
}
601
602
struct WeightedFile {
603
std::string Filename;
604
uint64_t Weight;
605
};
606
typedef SmallVector<WeightedFile, 5> WeightedFileVector;
607
608
/// Keep track of merged data and reported errors.
609
struct WriterContext {
610
std::mutex Lock;
611
InstrProfWriter Writer;
612
std::vector<std::pair<Error, std::string>> Errors;
613
std::mutex &ErrLock;
614
SmallSet<instrprof_error, 4> &WriterErrorCodes;
615
616
WriterContext(bool IsSparse, std::mutex &ErrLock,
617
SmallSet<instrprof_error, 4> &WriterErrorCodes,
618
uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
619
: Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
620
MemProfVersionRequested, MemProfFullSchema),
621
ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
622
};
623
624
/// Computer the overlap b/w profile BaseFilename and TestFileName,
625
/// and store the program level result to Overlap.
626
static void overlapInput(const std::string &BaseFilename,
627
const std::string &TestFilename, WriterContext *WC,
628
OverlapStats &Overlap,
629
const OverlapFuncFilters &FuncFilter,
630
raw_fd_ostream &OS, bool IsCS) {
631
auto FS = vfs::getRealFileSystem();
632
auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS);
633
if (Error E = ReaderOrErr.takeError()) {
634
// Skip the empty profiles by returning sliently.
635
auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
636
if (ErrorCode != instrprof_error::empty_raw_profile)
637
WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
638
TestFilename);
639
return;
640
}
641
642
auto Reader = std::move(ReaderOrErr.get());
643
for (auto &I : *Reader) {
644
OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
645
FuncOverlap.setFuncInfo(I.Name, I.Hash);
646
647
WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
648
FuncOverlap.dump(OS);
649
}
650
}
651
652
/// Load an input into a writer context.
653
static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
654
const InstrProfCorrelator *Correlator,
655
const StringRef ProfiledBinary, WriterContext *WC) {
656
std::unique_lock<std::mutex> CtxGuard{WC->Lock};
657
658
// Copy the filename, because llvm::ThreadPool copied the input "const
659
// WeightedFile &" by value, making a reference to the filename within it
660
// invalid outside of this packaged task.
661
std::string Filename = Input.Filename;
662
663
using ::llvm::memprof::RawMemProfReader;
664
if (RawMemProfReader::hasFormat(Input.Filename)) {
665
auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
666
if (!ReaderOrErr) {
667
exitWithError(ReaderOrErr.takeError(), Input.Filename);
668
}
669
std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
670
// Check if the profile types can be merged, e.g. clang frontend profiles
671
// should not be merged with memprof profiles.
672
if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
673
consumeError(std::move(E));
674
WC->Errors.emplace_back(
675
make_error<StringError>(
676
"Cannot merge MemProf profile with Clang generated profile.",
677
std::error_code()),
678
Filename);
679
return;
680
}
681
682
auto MemProfError = [&](Error E) {
683
auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
684
WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
685
Filename);
686
};
687
688
// Add the frame mappings into the writer context.
689
const auto &IdToFrame = Reader->getFrameMapping();
690
for (const auto &I : IdToFrame) {
691
bool Succeeded = WC->Writer.addMemProfFrame(
692
/*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
693
// If we weren't able to add the frame mappings then it doesn't make sense
694
// to try to add the records from this profile.
695
if (!Succeeded)
696
return;
697
}
698
699
// Add the call stacks into the writer context.
700
const auto &CSIdToCallStacks = Reader->getCallStacks();
701
for (const auto &I : CSIdToCallStacks) {
702
bool Succeeded = WC->Writer.addMemProfCallStack(
703
/*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
704
// If we weren't able to add the call stacks then it doesn't make sense
705
// to try to add the records from this profile.
706
if (!Succeeded)
707
return;
708
}
709
710
const auto &FunctionProfileData = Reader->getProfileData();
711
// Add the memprof records into the writer context.
712
for (const auto &[GUID, Record] : FunctionProfileData) {
713
WC->Writer.addMemProfRecord(GUID, Record);
714
}
715
return;
716
}
717
718
auto FS = vfs::getRealFileSystem();
719
// TODO: This only saves the first non-fatal error from InstrProfReader, and
720
// then added to WriterContext::Errors. However, this is not extensible, if
721
// we have more non-fatal errors from InstrProfReader in the future. How
722
// should this interact with different -failure-mode?
723
std::optional<std::pair<Error, std::string>> ReaderWarning;
724
auto Warn = [&](Error E) {
725
if (ReaderWarning) {
726
consumeError(std::move(E));
727
return;
728
}
729
// Only show the first time an error occurs in this file.
730
auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
731
ReaderWarning = {make_error<InstrProfError>(ErrCode, Msg), Filename};
732
};
733
auto ReaderOrErr =
734
InstrProfReader::create(Input.Filename, *FS, Correlator, Warn);
735
if (Error E = ReaderOrErr.takeError()) {
736
// Skip the empty profiles by returning silently.
737
auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
738
if (ErrCode != instrprof_error::empty_raw_profile)
739
WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg),
740
Filename);
741
return;
742
}
743
744
auto Reader = std::move(ReaderOrErr.get());
745
if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
746
consumeError(std::move(E));
747
WC->Errors.emplace_back(
748
make_error<StringError>(
749
"Merge IR generated profile with Clang generated profile.",
750
std::error_code()),
751
Filename);
752
return;
753
}
754
755
for (auto &I : *Reader) {
756
if (Remapper)
757
I.Name = (*Remapper)(I.Name);
758
const StringRef FuncName = I.Name;
759
bool Reported = false;
760
WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
761
if (Reported) {
762
consumeError(std::move(E));
763
return;
764
}
765
Reported = true;
766
// Only show hint the first time an error occurs.
767
auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
768
std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
769
bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
770
handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
771
Input.Filename, FuncName, firstTime);
772
});
773
}
774
775
if (KeepVTableSymbols) {
776
const InstrProfSymtab &symtab = Reader->getSymtab();
777
const auto &VTableNames = symtab.getVTableNames();
778
779
for (const auto &kv : VTableNames)
780
WC->Writer.addVTableName(kv.getKey());
781
}
782
783
if (Reader->hasTemporalProfile()) {
784
auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
785
if (!Traces.empty())
786
WC->Writer.addTemporalProfileTraces(
787
Traces, Reader->getTemporalProfTraceStreamSize());
788
}
789
if (Reader->hasError()) {
790
if (Error E = Reader->getError()) {
791
WC->Errors.emplace_back(std::move(E), Filename);
792
return;
793
}
794
}
795
796
std::vector<llvm::object::BuildID> BinaryIds;
797
if (Error E = Reader->readBinaryIds(BinaryIds)) {
798
WC->Errors.emplace_back(std::move(E), Filename);
799
return;
800
}
801
WC->Writer.addBinaryIds(BinaryIds);
802
803
if (ReaderWarning) {
804
WC->Errors.emplace_back(std::move(ReaderWarning->first),
805
ReaderWarning->second);
806
}
807
}
808
809
/// Merge the \p Src writer context into \p Dst.
810
static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
811
for (auto &ErrorPair : Src->Errors)
812
Dst->Errors.push_back(std::move(ErrorPair));
813
Src->Errors.clear();
814
815
if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind()))
816
exitWithError(std::move(E));
817
818
Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
819
auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
820
std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
821
bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second;
822
if (firstTime)
823
warn(toString(make_error<InstrProfError>(ErrorCode, Msg)));
824
});
825
}
826
827
static StringRef
828
getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
829
return Val.first();
830
}
831
832
static std::string
833
getFuncName(const SampleProfileMap::value_type &Val) {
834
return Val.second.getContext().toString();
835
}
836
837
template <typename T>
838
static void filterFunctions(T &ProfileMap) {
839
bool hasFilter = !FuncNameFilter.empty();
840
bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
841
if (!hasFilter && !hasNegativeFilter)
842
return;
843
844
// If filter starts with '?' it is MSVC mangled name, not a regex.
845
llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
846
if (hasFilter && FuncNameFilter[0] == '?' &&
847
ProbablyMSVCMangledName.match(FuncNameFilter))
848
FuncNameFilter = llvm::Regex::escape(FuncNameFilter);
849
if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
850
ProbablyMSVCMangledName.match(FuncNameNegativeFilter))
851
FuncNameNegativeFilter = llvm::Regex::escape(FuncNameNegativeFilter);
852
853
size_t Count = ProfileMap.size();
854
llvm::Regex Pattern(FuncNameFilter);
855
llvm::Regex NegativePattern(FuncNameNegativeFilter);
856
std::string Error;
857
if (hasFilter && !Pattern.isValid(Error))
858
exitWithError(Error);
859
if (hasNegativeFilter && !NegativePattern.isValid(Error))
860
exitWithError(Error);
861
862
// Handle MD5 profile, so it is still able to match using the original name.
863
std::string MD5Name = std::to_string(llvm::MD5Hash(FuncNameFilter));
864
std::string NegativeMD5Name =
865
std::to_string(llvm::MD5Hash(FuncNameNegativeFilter));
866
867
for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
868
auto Tmp = I++;
869
const auto &FuncName = getFuncName(*Tmp);
870
// Negative filter has higher precedence than positive filter.
871
if ((hasNegativeFilter &&
872
(NegativePattern.match(FuncName) ||
873
(FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
874
(hasFilter && !(Pattern.match(FuncName) ||
875
(FunctionSamples::UseMD5 && MD5Name == FuncName))))
876
ProfileMap.erase(Tmp);
877
}
878
879
llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
880
<< "in the original profile are filtered.\n";
881
}
882
883
static void writeInstrProfile(StringRef OutputFilename,
884
ProfileFormat OutputFormat,
885
InstrProfWriter &Writer) {
886
std::error_code EC;
887
raw_fd_ostream Output(OutputFilename.data(), EC,
888
OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
889
: sys::fs::OF_None);
890
if (EC)
891
exitWithErrorCode(EC, OutputFilename);
892
893
if (OutputFormat == PF_Text) {
894
if (Error E = Writer.writeText(Output))
895
warn(std::move(E));
896
} else {
897
if (Output.is_displayed())
898
exitWithError("cannot write a non-text format profile to the terminal");
899
if (Error E = Writer.write(Output))
900
warn(std::move(E));
901
}
902
}
903
904
static void mergeInstrProfile(const WeightedFileVector &Inputs,
905
SymbolRemapper *Remapper,
906
int MaxDbgCorrelationWarnings,
907
const StringRef ProfiledBinary) {
908
const uint64_t TraceReservoirSize = TemporalProfTraceReservoirSize.getValue();
909
const uint64_t MaxTraceLength = TemporalProfMaxTraceLength.getValue();
910
if (OutputFormat == PF_Compact_Binary)
911
exitWithError("Compact Binary is deprecated");
912
if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
913
OutputFormat != PF_Text)
914
exitWithError("unknown format is specified");
915
916
// TODO: Maybe we should support correlation with mixture of different
917
// correlation modes(w/wo debug-info/object correlation).
918
if (!DebugInfoFilename.empty() && !BinaryFilename.empty())
919
exitWithError("Expected only one of -debug-info, -binary-file");
920
std::string CorrelateFilename;
921
ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
922
if (!DebugInfoFilename.empty()) {
923
CorrelateFilename = DebugInfoFilename;
924
CorrelateKind = ProfCorrelatorKind::DEBUG_INFO;
925
} else if (!BinaryFilename.empty()) {
926
CorrelateFilename = BinaryFilename;
927
CorrelateKind = ProfCorrelatorKind::BINARY;
928
}
929
930
std::unique_ptr<InstrProfCorrelator> Correlator;
931
if (CorrelateKind != InstrProfCorrelator::NONE) {
932
if (auto Err = InstrProfCorrelator::get(CorrelateFilename, CorrelateKind)
933
.moveInto(Correlator))
934
exitWithError(std::move(Err), CorrelateFilename);
935
if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
936
exitWithError(std::move(Err), CorrelateFilename);
937
}
938
939
std::mutex ErrorLock;
940
SmallSet<instrprof_error, 4> WriterErrorCodes;
941
942
// If NumThreads is not specified, auto-detect a good default.
943
if (NumThreads == 0)
944
NumThreads = std::min(hardware_concurrency().compute_thread_count(),
945
unsigned((Inputs.size() + 1) / 2));
946
947
// Initialize the writer contexts.
948
SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
949
for (unsigned I = 0; I < NumThreads; ++I)
950
Contexts.emplace_back(std::make_unique<WriterContext>(
951
OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize,
952
MaxTraceLength));
953
954
if (NumThreads == 1) {
955
for (const auto &Input : Inputs)
956
loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
957
Contexts[0].get());
958
} else {
959
DefaultThreadPool Pool(hardware_concurrency(NumThreads));
960
961
// Load the inputs in parallel (N/NumThreads serial steps).
962
unsigned Ctx = 0;
963
for (const auto &Input : Inputs) {
964
Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
965
Contexts[Ctx].get());
966
Ctx = (Ctx + 1) % NumThreads;
967
}
968
Pool.wait();
969
970
// Merge the writer contexts together (~ lg(NumThreads) serial steps).
971
unsigned Mid = Contexts.size() / 2;
972
unsigned End = Contexts.size();
973
assert(Mid > 0 && "Expected more than one context");
974
do {
975
for (unsigned I = 0; I < Mid; ++I)
976
Pool.async(mergeWriterContexts, Contexts[I].get(),
977
Contexts[I + Mid].get());
978
Pool.wait();
979
if (End & 1) {
980
Pool.async(mergeWriterContexts, Contexts[0].get(),
981
Contexts[End - 1].get());
982
Pool.wait();
983
}
984
End = Mid;
985
Mid /= 2;
986
} while (Mid > 0);
987
}
988
989
// Handle deferred errors encountered during merging. If the number of errors
990
// is equal to the number of inputs the merge failed.
991
unsigned NumErrors = 0;
992
for (std::unique_ptr<WriterContext> &WC : Contexts) {
993
for (auto &ErrorPair : WC->Errors) {
994
++NumErrors;
995
warn(toString(std::move(ErrorPair.first)), ErrorPair.second);
996
}
997
}
998
if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) ||
999
(NumErrors > 0 && FailMode == failIfAnyAreInvalid))
1000
exitWithError("no profile can be merged");
1001
1002
filterFunctions(Contexts[0]->Writer.getProfileData());
1003
1004
writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
1005
}
1006
1007
/// The profile entry for a function in instrumentation profile.
1008
struct InstrProfileEntry {
1009
uint64_t MaxCount = 0;
1010
uint64_t NumEdgeCounters = 0;
1011
float ZeroCounterRatio = 0.0;
1012
InstrProfRecord *ProfRecord;
1013
InstrProfileEntry(InstrProfRecord *Record);
1014
InstrProfileEntry() = default;
1015
};
1016
1017
InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
1018
ProfRecord = Record;
1019
uint64_t CntNum = Record->Counts.size();
1020
uint64_t ZeroCntNum = 0;
1021
for (size_t I = 0; I < CntNum; ++I) {
1022
MaxCount = std::max(MaxCount, Record->Counts[I]);
1023
ZeroCntNum += !Record->Counts[I];
1024
}
1025
ZeroCounterRatio = (float)ZeroCntNum / CntNum;
1026
NumEdgeCounters = CntNum;
1027
}
1028
1029
/// Either set all the counters in the instr profile entry \p IFE to
1030
/// -1 / -2 /in order to drop the profile or scale up the
1031
/// counters in \p IFP to be above hot / cold threshold. We use
1032
/// the ratio of zero counters in the profile of a function to
1033
/// decide the profile is helpful or harmful for performance,
1034
/// and to choose whether to scale up or drop it.
1035
static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
1036
uint64_t HotInstrThreshold,
1037
uint64_t ColdInstrThreshold,
1038
float ZeroCounterThreshold) {
1039
InstrProfRecord *ProfRecord = IFE.ProfRecord;
1040
if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
1041
// If all or most of the counters of the function are zero, the
1042
// profile is unaccountable and should be dropped. Reset all the
1043
// counters to be -1 / -2 and PGO profile-use will drop the profile.
1044
// All counters being -1 also implies that the function is hot so
1045
// PGO profile-use will also set the entry count metadata to be
1046
// above hot threshold.
1047
// All counters being -2 implies that the function is warm so
1048
// PGO profile-use will also set the entry count metadata to be
1049
// above cold threshold.
1050
auto Kind =
1051
(SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
1052
ProfRecord->setPseudoCount(Kind);
1053
return;
1054
}
1055
1056
// Scale up the MaxCount to be multiple times above hot / cold threshold.
1057
const unsigned MultiplyFactor = 3;
1058
uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
1059
uint64_t Numerator = Threshold * MultiplyFactor;
1060
1061
// Make sure Threshold for warm counters is below the HotInstrThreshold.
1062
if (!SetToHot && Threshold >= HotInstrThreshold) {
1063
Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
1064
}
1065
1066
uint64_t Denominator = IFE.MaxCount;
1067
if (Numerator <= Denominator)
1068
return;
1069
ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
1070
warn(toString(make_error<InstrProfError>(E)));
1071
});
1072
}
1073
1074
const uint64_t ColdPercentileIdx = 15;
1075
const uint64_t HotPercentileIdx = 11;
1076
1077
using sampleprof::FSDiscriminatorPass;
1078
1079
// Internal options to set FSDiscriminatorPass. Used in merge and show
1080
// commands.
1081
static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
1082
"fs-discriminator-pass", cl::init(PassLast), cl::Hidden,
1083
cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1084
"pass beyond this value. The enum values are defined in "
1085
"Support/Discriminator.h"),
1086
cl::values(clEnumVal(Base, "Use base discriminators only"),
1087
clEnumVal(Pass1, "Use base and pass 1 discriminators"),
1088
clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1089
clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1090
clEnumVal(PassLast, "Use all discriminator bits (default)")));
1091
1092
static unsigned getDiscriminatorMask() {
1093
return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue()));
1094
}
1095
1096
/// Adjust the instr profile in \p WC based on the sample profile in
1097
/// \p Reader.
1098
static void
1099
adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
1100
std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
1101
unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
1102
unsigned InstrProfColdThreshold) {
1103
// Function to its entry in instr profile.
1104
StringMap<InstrProfileEntry> InstrProfileMap;
1105
StringMap<StringRef> StaticFuncMap;
1106
InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
1107
1108
auto checkSampleProfileHasFUnique = [&Reader]() {
1109
for (const auto &PD : Reader->getProfiles()) {
1110
auto &FContext = PD.second.getContext();
1111
if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
1112
std::string::npos) {
1113
return true;
1114
}
1115
}
1116
return false;
1117
};
1118
1119
bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
1120
1121
auto buildStaticFuncMap = [&StaticFuncMap,
1122
SampleProfileHasFUnique](const StringRef Name) {
1123
std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1124
size_t PrefixPos = StringRef::npos;
1125
for (auto &FilePrefix : FilePrefixes) {
1126
std::string NamePrefix = FilePrefix + GlobalIdentifierDelimiter;
1127
PrefixPos = Name.find_insensitive(NamePrefix);
1128
if (PrefixPos == StringRef::npos)
1129
continue;
1130
PrefixPos += NamePrefix.size();
1131
break;
1132
}
1133
1134
if (PrefixPos == StringRef::npos) {
1135
return;
1136
}
1137
1138
StringRef NewName = Name.drop_front(PrefixPos);
1139
StringRef FName = Name.substr(0, PrefixPos - 1);
1140
if (NewName.size() == 0) {
1141
return;
1142
}
1143
1144
// This name should have a static linkage.
1145
size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix);
1146
bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
1147
1148
// If sample profile and instrumented profile do not agree on symbol
1149
// uniqification.
1150
if (SampleProfileHasFUnique != ProfileHasFUnique) {
1151
// If instrumented profile uses -funique-internal-linkage-symbols,
1152
// we need to trim the name.
1153
if (ProfileHasFUnique) {
1154
NewName = NewName.substr(0, PostfixPos);
1155
} else {
1156
// If sample profile uses -funique-internal-linkage-symbols,
1157
// we build the map.
1158
std::string NStr =
1159
NewName.str() + getUniqueInternalLinkagePostfix(FName);
1160
NewName = StringRef(NStr);
1161
StaticFuncMap[NewName] = Name;
1162
return;
1163
}
1164
}
1165
1166
if (!StaticFuncMap.contains(NewName)) {
1167
StaticFuncMap[NewName] = Name;
1168
} else {
1169
StaticFuncMap[NewName] = DuplicateNameStr;
1170
}
1171
};
1172
1173
// We need to flatten the SampleFDO profile as the InstrFDO
1174
// profile does not have inlined callsite profiles.
1175
// One caveat is the pre-inlined function -- their samples
1176
// should be collapsed into the caller function.
1177
// Here we do a DFS traversal to get the flatten profile
1178
// info: the sum of entrycount and the max of maxcount.
1179
// Here is the algorithm:
1180
// recursive (FS, root_name) {
1181
// name = FS->getName();
1182
// get samples for FS;
1183
// if (InstrProf.find(name) {
1184
// root_name = name;
1185
// } else {
1186
// if (name is in static_func map) {
1187
// root_name = static_name;
1188
// }
1189
// }
1190
// update the Map entry for root_name;
1191
// for (subfs: FS) {
1192
// recursive(subfs, root_name);
1193
// }
1194
// }
1195
//
1196
// Here is an example.
1197
//
1198
// SampleProfile:
1199
// foo:12345:1000
1200
// 1: 1000
1201
// 2.1: 1000
1202
// 15: 5000
1203
// 4: bar:1000
1204
// 1: 1000
1205
// 2: goo:3000
1206
// 1: 3000
1207
// 8: bar:40000
1208
// 1: 10000
1209
// 2: goo:30000
1210
// 1: 30000
1211
//
1212
// InstrProfile has two entries:
1213
// foo
1214
// bar.cc;bar
1215
//
1216
// After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1217
// {"foo", {1000, 5000}}
1218
// {"bar.cc;bar", {11000, 30000}}
1219
//
1220
// foo's has an entry count of 1000, and max body count of 5000.
1221
// bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1222
// 10000), and max count of 30000 (from the callsite in line 8).
1223
//
1224
// Note that goo's count will remain in bar.cc;bar() as it does not have an
1225
// entry in InstrProfile.
1226
llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
1227
auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
1228
&InstrProfileMap](const FunctionSamples &FS,
1229
const StringRef &RootName) {
1230
auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
1231
const StringRef &RootName,
1232
auto &BuildImpl) -> void {
1233
std::string NameStr = FS.getFunction().str();
1234
const StringRef Name = NameStr;
1235
const StringRef *NewRootName = &RootName;
1236
uint64_t EntrySample = FS.getHeadSamplesEstimate();
1237
uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
1238
1239
auto It = InstrProfileMap.find(Name);
1240
if (It != InstrProfileMap.end()) {
1241
NewRootName = &Name;
1242
} else {
1243
auto NewName = StaticFuncMap.find(Name);
1244
if (NewName != StaticFuncMap.end()) {
1245
It = InstrProfileMap.find(NewName->second.str());
1246
if (NewName->second != DuplicateNameStr) {
1247
NewRootName = &NewName->second;
1248
}
1249
} else {
1250
// Here the EntrySample is of an inlined function, so we should not
1251
// update the EntrySample in the map.
1252
EntrySample = 0;
1253
}
1254
}
1255
EntrySample += FlattenSampleMap[*NewRootName].first;
1256
MaxBodySample =
1257
std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample);
1258
FlattenSampleMap[*NewRootName] =
1259
std::make_pair(EntrySample, MaxBodySample);
1260
1261
for (const auto &C : FS.getCallsiteSamples())
1262
for (const auto &F : C.second)
1263
BuildImpl(F.second, *NewRootName, BuildImpl);
1264
};
1265
BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
1266
};
1267
1268
for (auto &PD : WC->Writer.getProfileData()) {
1269
// Populate IPBuilder.
1270
for (const auto &PDV : PD.getValue()) {
1271
InstrProfRecord Record = PDV.second;
1272
IPBuilder.addRecord(Record);
1273
}
1274
1275
// If a function has multiple entries in instr profile, skip it.
1276
if (PD.getValue().size() != 1)
1277
continue;
1278
1279
// Initialize InstrProfileMap.
1280
InstrProfRecord *R = &PD.getValue().begin()->second;
1281
StringRef FullName = PD.getKey();
1282
InstrProfileMap[FullName] = InstrProfileEntry(R);
1283
buildStaticFuncMap(FullName);
1284
}
1285
1286
for (auto &PD : Reader->getProfiles()) {
1287
sampleprof::FunctionSamples &FS = PD.second;
1288
std::string Name = FS.getFunction().str();
1289
BuildMaxSampleMap(FS, Name);
1290
}
1291
1292
ProfileSummary InstrPS = *IPBuilder.getSummary();
1293
ProfileSummary SamplePS = Reader->getSummary();
1294
1295
// Compute cold thresholds for instr profile and sample profile.
1296
uint64_t HotSampleThreshold =
1297
ProfileSummaryBuilder::getEntryForPercentile(
1298
SamplePS.getDetailedSummary(),
1299
ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1300
.MinCount;
1301
uint64_t ColdSampleThreshold =
1302
ProfileSummaryBuilder::getEntryForPercentile(
1303
SamplePS.getDetailedSummary(),
1304
ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1305
.MinCount;
1306
uint64_t HotInstrThreshold =
1307
ProfileSummaryBuilder::getEntryForPercentile(
1308
InstrPS.getDetailedSummary(),
1309
ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1310
.MinCount;
1311
uint64_t ColdInstrThreshold =
1312
InstrProfColdThreshold
1313
? InstrProfColdThreshold
1314
: ProfileSummaryBuilder::getEntryForPercentile(
1315
InstrPS.getDetailedSummary(),
1316
ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1317
.MinCount;
1318
1319
// Find hot/warm functions in sample profile which is cold in instr profile
1320
// and adjust the profiles of those functions in the instr profile.
1321
for (const auto &E : FlattenSampleMap) {
1322
uint64_t SampleMaxCount = std::max(E.second.first, E.second.second);
1323
if (SampleMaxCount < ColdSampleThreshold)
1324
continue;
1325
StringRef Name = E.first();
1326
auto It = InstrProfileMap.find(Name);
1327
if (It == InstrProfileMap.end()) {
1328
auto NewName = StaticFuncMap.find(Name);
1329
if (NewName != StaticFuncMap.end()) {
1330
It = InstrProfileMap.find(NewName->second.str());
1331
if (NewName->second == DuplicateNameStr) {
1332
WithColor::warning()
1333
<< "Static function " << Name
1334
<< " has multiple promoted names, cannot adjust profile.\n";
1335
}
1336
}
1337
}
1338
if (It == InstrProfileMap.end() ||
1339
It->second.MaxCount > ColdInstrThreshold ||
1340
It->second.NumEdgeCounters < SupplMinSizeThreshold)
1341
continue;
1342
bool SetToHot = SampleMaxCount >= HotSampleThreshold;
1343
updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold,
1344
ColdInstrThreshold, ZeroCounterThreshold);
1345
}
1346
}
1347
1348
/// The main function to supplement instr profile with sample profile.
1349
/// \Inputs contains the instr profile. \p SampleFilename specifies the
1350
/// sample profile. \p OutputFilename specifies the output profile name.
1351
/// \p OutputFormat specifies the output profile format. \p OutputSparse
1352
/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1353
/// specifies the minimal size for the functions whose profile will be
1354
/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1355
/// a function contains too many zero counters and whether its profile
1356
/// should be dropped. \p InstrProfColdThreshold is the user specified
1357
/// cold threshold which will override the cold threshold got from the
1358
/// instr profile summary.
1359
static void supplementInstrProfile(const WeightedFileVector &Inputs,
1360
StringRef SampleFilename, bool OutputSparse,
1361
unsigned SupplMinSizeThreshold,
1362
float ZeroCounterThreshold,
1363
unsigned InstrProfColdThreshold) {
1364
if (OutputFilename == "-")
1365
exitWithError("cannot write indexed profdata format to stdout");
1366
if (Inputs.size() != 1)
1367
exitWithError("expect one input to be an instr profile");
1368
if (Inputs[0].Weight != 1)
1369
exitWithError("expect instr profile doesn't have weight");
1370
1371
StringRef InstrFilename = Inputs[0].Filename;
1372
1373
// Read sample profile.
1374
LLVMContext Context;
1375
auto FS = vfs::getRealFileSystem();
1376
auto ReaderOrErr = sampleprof::SampleProfileReader::create(
1377
SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption);
1378
if (std::error_code EC = ReaderOrErr.getError())
1379
exitWithErrorCode(EC, SampleFilename);
1380
auto Reader = std::move(ReaderOrErr.get());
1381
if (std::error_code EC = Reader->read())
1382
exitWithErrorCode(EC, SampleFilename);
1383
1384
// Read instr profile.
1385
std::mutex ErrorLock;
1386
SmallSet<instrprof_error, 4> WriterErrorCodes;
1387
auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
1388
WriterErrorCodes);
1389
loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get());
1390
if (WC->Errors.size() > 0)
1391
exitWithError(std::move(WC->Errors[0].first), InstrFilename);
1392
1393
adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
1394
InstrProfColdThreshold);
1395
writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
1396
}
1397
1398
/// Make a copy of the given function samples with all symbol names remapped
1399
/// by the provided symbol remapper.
1400
static sampleprof::FunctionSamples
1401
remapSamples(const sampleprof::FunctionSamples &Samples,
1402
SymbolRemapper &Remapper, sampleprof_error &Error) {
1403
sampleprof::FunctionSamples Result;
1404
Result.setFunction(Remapper(Samples.getFunction()));
1405
Result.addTotalSamples(Samples.getTotalSamples());
1406
Result.addHeadSamples(Samples.getHeadSamples());
1407
for (const auto &BodySample : Samples.getBodySamples()) {
1408
uint32_t MaskedDiscriminator =
1409
BodySample.first.Discriminator & getDiscriminatorMask();
1410
Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator,
1411
BodySample.second.getSamples());
1412
for (const auto &Target : BodySample.second.getCallTargets()) {
1413
Result.addCalledTargetSamples(BodySample.first.LineOffset,
1414
MaskedDiscriminator,
1415
Remapper(Target.first), Target.second);
1416
}
1417
}
1418
for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1419
sampleprof::FunctionSamplesMap &Target =
1420
Result.functionSamplesAt(CallsiteSamples.first);
1421
for (const auto &Callsite : CallsiteSamples.second) {
1422
sampleprof::FunctionSamples Remapped =
1423
remapSamples(Callsite.second, Remapper, Error);
1424
mergeSampleProfErrors(Error,
1425
Target[Remapped.getFunction()].merge(Remapped));
1426
}
1427
}
1428
return Result;
1429
}
1430
1431
static sampleprof::SampleProfileFormat FormatMap[] = {
1432
sampleprof::SPF_None,
1433
sampleprof::SPF_Text,
1434
sampleprof::SPF_None,
1435
sampleprof::SPF_Ext_Binary,
1436
sampleprof::SPF_GCC,
1437
sampleprof::SPF_Binary};
1438
1439
static std::unique_ptr<MemoryBuffer>
1440
getInputFileBuf(const StringRef &InputFile) {
1441
if (InputFile == "")
1442
return {};
1443
1444
auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
1445
if (!BufOrError)
1446
exitWithErrorCode(BufOrError.getError(), InputFile);
1447
1448
return std::move(*BufOrError);
1449
}
1450
1451
static void populateProfileSymbolList(MemoryBuffer *Buffer,
1452
sampleprof::ProfileSymbolList &PSL) {
1453
if (!Buffer)
1454
return;
1455
1456
SmallVector<StringRef, 32> SymbolVec;
1457
StringRef Data = Buffer->getBuffer();
1458
Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1459
1460
for (StringRef SymbolStr : SymbolVec)
1461
PSL.add(SymbolStr.trim());
1462
}
1463
1464
static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
1465
ProfileFormat OutputFormat,
1466
MemoryBuffer *Buffer,
1467
sampleprof::ProfileSymbolList &WriterList,
1468
bool CompressAllSections, bool UseMD5,
1469
bool GenPartialProfile) {
1470
populateProfileSymbolList(Buffer, WriterList);
1471
if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
1472
warn("Profile Symbol list is not empty but the output format is not "
1473
"ExtBinary format. The list will be lost in the output. ");
1474
1475
Writer.setProfileSymbolList(&WriterList);
1476
1477
if (CompressAllSections) {
1478
if (OutputFormat != PF_Ext_Binary)
1479
warn("-compress-all-section is ignored. Specify -extbinary to enable it");
1480
else
1481
Writer.setToCompressAllSections();
1482
}
1483
if (UseMD5) {
1484
if (OutputFormat != PF_Ext_Binary)
1485
warn("-use-md5 is ignored. Specify -extbinary to enable it");
1486
else
1487
Writer.setUseMD5();
1488
}
1489
if (GenPartialProfile) {
1490
if (OutputFormat != PF_Ext_Binary)
1491
warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
1492
else
1493
Writer.setPartialProfile();
1494
}
1495
}
1496
1497
static void mergeSampleProfile(const WeightedFileVector &Inputs,
1498
SymbolRemapper *Remapper,
1499
StringRef ProfileSymbolListFile,
1500
size_t OutputSizeLimit) {
1501
using namespace sampleprof;
1502
SampleProfileMap ProfileMap;
1503
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
1504
LLVMContext Context;
1505
sampleprof::ProfileSymbolList WriterList;
1506
std::optional<bool> ProfileIsProbeBased;
1507
std::optional<bool> ProfileIsCS;
1508
for (const auto &Input : Inputs) {
1509
auto FS = vfs::getRealFileSystem();
1510
auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS,
1511
FSDiscriminatorPassOption);
1512
if (std::error_code EC = ReaderOrErr.getError()) {
1513
warnOrExitGivenError(FailMode, EC, Input.Filename);
1514
continue;
1515
}
1516
1517
// We need to keep the readers around until after all the files are
1518
// read so that we do not lose the function names stored in each
1519
// reader's memory. The function names are needed to write out the
1520
// merged profile map.
1521
Readers.push_back(std::move(ReaderOrErr.get()));
1522
const auto Reader = Readers.back().get();
1523
if (std::error_code EC = Reader->read()) {
1524
warnOrExitGivenError(FailMode, EC, Input.Filename);
1525
Readers.pop_back();
1526
continue;
1527
}
1528
1529
SampleProfileMap &Profiles = Reader->getProfiles();
1530
if (ProfileIsProbeBased &&
1531
ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1532
exitWithError(
1533
"cannot merge probe-based profile with non-probe-based profile");
1534
ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1535
if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1536
exitWithError("cannot merge CS profile with non-CS profile");
1537
ProfileIsCS = FunctionSamples::ProfileIsCS;
1538
for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1539
I != E; ++I) {
1540
sampleprof_error Result = sampleprof_error::success;
1541
FunctionSamples Remapped =
1542
Remapper ? remapSamples(I->second, *Remapper, Result)
1543
: FunctionSamples();
1544
FunctionSamples &Samples = Remapper ? Remapped : I->second;
1545
SampleContext FContext = Samples.getContext();
1546
mergeSampleProfErrors(Result,
1547
ProfileMap[FContext].merge(Samples, Input.Weight));
1548
if (Result != sampleprof_error::success) {
1549
std::error_code EC = make_error_code(Result);
1550
handleMergeWriterError(errorCodeToError(EC), Input.Filename,
1551
FContext.toString());
1552
}
1553
}
1554
1555
if (!DropProfileSymbolList) {
1556
std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1557
Reader->getProfileSymbolList();
1558
if (ReaderList)
1559
WriterList.merge(*ReaderList);
1560
}
1561
}
1562
1563
if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1564
// Use threshold calculated from profile summary unless specified.
1565
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1566
auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
1567
uint64_t SampleProfColdThreshold =
1568
ProfileSummaryBuilder::getColdCountThreshold(
1569
(Summary->getDetailedSummary()));
1570
1571
// Trim and merge cold context profile using cold threshold above;
1572
SampleContextTrimmer(ProfileMap)
1573
.trimAndMergeColdContextProfiles(
1574
SampleProfColdThreshold, SampleTrimColdContext,
1575
SampleMergeColdContext, SampleColdContextFrameDepth, false);
1576
}
1577
1578
if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1579
ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
1580
ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1581
} else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1582
ProfileConverter CSConverter(ProfileMap);
1583
CSConverter.convertCSProfiles();
1584
ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1585
}
1586
1587
filterFunctions(ProfileMap);
1588
1589
auto WriterOrErr =
1590
SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
1591
if (std::error_code EC = WriterOrErr.getError())
1592
exitWithErrorCode(EC, OutputFilename);
1593
1594
auto Writer = std::move(WriterOrErr.get());
1595
// WriterList will have StringRef refering to string in Buffer.
1596
// Make sure Buffer lives as long as WriterList.
1597
auto Buffer = getInputFileBuf(ProfileSymbolListFile);
1598
handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
1599
CompressAllSections, UseMD5, GenPartialProfile);
1600
1601
// If OutputSizeLimit is 0 (default), it is the same as write().
1602
if (std::error_code EC =
1603
Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1604
exitWithErrorCode(EC);
1605
}
1606
1607
static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1608
StringRef WeightStr, FileName;
1609
std::tie(WeightStr, FileName) = WeightedFilename.split(',');
1610
1611
uint64_t Weight;
1612
if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
1613
exitWithError("input weight must be a positive integer");
1614
1615
return {std::string(FileName), Weight};
1616
}
1617
1618
static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1619
StringRef Filename = WF.Filename;
1620
uint64_t Weight = WF.Weight;
1621
1622
// If it's STDIN just pass it on.
1623
if (Filename == "-") {
1624
WNI.push_back({std::string(Filename), Weight});
1625
return;
1626
}
1627
1628
llvm::sys::fs::file_status Status;
1629
llvm::sys::fs::status(Filename, Status);
1630
if (!llvm::sys::fs::exists(Status))
1631
exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
1632
Filename);
1633
// If it's a source file, collect it.
1634
if (llvm::sys::fs::is_regular_file(Status)) {
1635
WNI.push_back({std::string(Filename), Weight});
1636
return;
1637
}
1638
1639
if (llvm::sys::fs::is_directory(Status)) {
1640
std::error_code EC;
1641
for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1642
F != E && !EC; F.increment(EC)) {
1643
if (llvm::sys::fs::is_regular_file(F->path())) {
1644
addWeightedInput(WNI, {F->path(), Weight});
1645
}
1646
}
1647
if (EC)
1648
exitWithErrorCode(EC, Filename);
1649
}
1650
}
1651
1652
static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1653
WeightedFileVector &WFV) {
1654
if (!Buffer)
1655
return;
1656
1657
SmallVector<StringRef, 8> Entries;
1658
StringRef Data = Buffer->getBuffer();
1659
Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1660
for (const StringRef &FileWeightEntry : Entries) {
1661
StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
1662
// Skip comments.
1663
if (SanitizedEntry.starts_with("#"))
1664
continue;
1665
// If there's no comma, it's an unweighted profile.
1666
else if (!SanitizedEntry.contains(','))
1667
addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
1668
else
1669
addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
1670
}
1671
}
1672
1673
static int merge_main(StringRef ProgName) {
1674
WeightedFileVector WeightedInputs;
1675
for (StringRef Filename : InputFilenames)
1676
addWeightedInput(WeightedInputs, {std::string(Filename), 1});
1677
for (StringRef WeightedFilename : WeightedInputFilenames)
1678
addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename));
1679
1680
// Make sure that the file buffer stays alive for the duration of the
1681
// weighted input vector's lifetime.
1682
auto Buffer = getInputFileBuf(InputFilenamesFile);
1683
parseInputFilenamesFile(Buffer.get(), WeightedInputs);
1684
1685
if (WeightedInputs.empty())
1686
exitWithError("no input files specified. See " + ProgName + " merge -help");
1687
1688
if (DumpInputFileList) {
1689
for (auto &WF : WeightedInputs)
1690
outs() << WF.Weight << "," << WF.Filename << "\n";
1691
return 0;
1692
}
1693
1694
std::unique_ptr<SymbolRemapper> Remapper;
1695
if (!RemappingFile.empty())
1696
Remapper = SymbolRemapper::create(RemappingFile);
1697
1698
if (!SupplInstrWithSample.empty()) {
1699
if (ProfileKind != instr)
1700
exitWithError(
1701
"-supplement-instr-with-sample can only work with -instr. ");
1702
1703
supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputSparse,
1704
SupplMinSizeThreshold, ZeroCounterThreshold,
1705
InstrProfColdThreshold);
1706
return 0;
1707
}
1708
1709
if (ProfileKind == instr)
1710
mergeInstrProfile(WeightedInputs, Remapper.get(), MaxDbgCorrelationWarnings,
1711
ProfiledBinary);
1712
else
1713
mergeSampleProfile(WeightedInputs, Remapper.get(), ProfileSymbolListFile,
1714
OutputSizeLimit);
1715
return 0;
1716
}
1717
1718
/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1719
static void overlapInstrProfile(const std::string &BaseFilename,
1720
const std::string &TestFilename,
1721
const OverlapFuncFilters &FuncFilter,
1722
raw_fd_ostream &OS, bool IsCS) {
1723
std::mutex ErrorLock;
1724
SmallSet<instrprof_error, 4> WriterErrorCodes;
1725
WriterContext Context(false, ErrorLock, WriterErrorCodes);
1726
WeightedFile WeightedInput{BaseFilename, 1};
1727
OverlapStats Overlap;
1728
Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1729
if (E)
1730
exitWithError(std::move(E), "error in getting profile count sums");
1731
if (Overlap.Base.CountSum < 1.0f) {
1732
OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1733
exit(0);
1734
}
1735
if (Overlap.Test.CountSum < 1.0f) {
1736
OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1737
exit(0);
1738
}
1739
loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context);
1740
overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
1741
IsCS);
1742
Overlap.dump(OS);
1743
}
1744
1745
namespace {
1746
struct SampleOverlapStats {
1747
SampleContext BaseName;
1748
SampleContext TestName;
1749
// Number of overlap units
1750
uint64_t OverlapCount = 0;
1751
// Total samples of overlap units
1752
uint64_t OverlapSample = 0;
1753
// Number of and total samples of units that only present in base or test
1754
// profile
1755
uint64_t BaseUniqueCount = 0;
1756
uint64_t BaseUniqueSample = 0;
1757
uint64_t TestUniqueCount = 0;
1758
uint64_t TestUniqueSample = 0;
1759
// Number of units and total samples in base or test profile
1760
uint64_t BaseCount = 0;
1761
uint64_t BaseSample = 0;
1762
uint64_t TestCount = 0;
1763
uint64_t TestSample = 0;
1764
// Number of and total samples of units that present in at least one profile
1765
uint64_t UnionCount = 0;
1766
uint64_t UnionSample = 0;
1767
// Weighted similarity
1768
double Similarity = 0.0;
1769
// For SampleOverlapStats instances representing functions, weights of the
1770
// function in base and test profiles
1771
double BaseWeight = 0.0;
1772
double TestWeight = 0.0;
1773
1774
SampleOverlapStats() = default;
1775
};
1776
} // end anonymous namespace
1777
1778
namespace {
1779
struct FuncSampleStats {
1780
uint64_t SampleSum = 0;
1781
uint64_t MaxSample = 0;
1782
uint64_t HotBlockCount = 0;
1783
FuncSampleStats() = default;
1784
FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1785
uint64_t HotBlockCount)
1786
: SampleSum(SampleSum), MaxSample(MaxSample),
1787
HotBlockCount(HotBlockCount) {}
1788
};
1789
} // end anonymous namespace
1790
1791
namespace {
1792
enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1793
1794
// Class for updating merging steps for two sorted maps. The class should be
1795
// instantiated with a map iterator type.
1796
template <class T> class MatchStep {
1797
public:
1798
MatchStep() = delete;
1799
1800
MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1801
: FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1802
SecondEnd(SecondEnd), Status(MS_None) {}
1803
1804
bool areBothFinished() const {
1805
return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1806
}
1807
1808
bool isFirstFinished() const { return FirstIter == FirstEnd; }
1809
1810
bool isSecondFinished() const { return SecondIter == SecondEnd; }
1811
1812
/// Advance one step based on the previous match status unless the previous
1813
/// status is MS_None. Then update Status based on the comparison between two
1814
/// container iterators at the current step. If the previous status is
1815
/// MS_None, it means two iterators are at the beginning and no comparison has
1816
/// been made, so we simply update Status without advancing the iterators.
1817
void updateOneStep();
1818
1819
T getFirstIter() const { return FirstIter; }
1820
1821
T getSecondIter() const { return SecondIter; }
1822
1823
MatchStatus getMatchStatus() const { return Status; }
1824
1825
private:
1826
// Current iterator and end iterator of the first container.
1827
T FirstIter;
1828
T FirstEnd;
1829
// Current iterator and end iterator of the second container.
1830
T SecondIter;
1831
T SecondEnd;
1832
// Match status of the current step.
1833
MatchStatus Status;
1834
};
1835
} // end anonymous namespace
1836
1837
template <class T> void MatchStep<T>::updateOneStep() {
1838
switch (Status) {
1839
case MS_Match:
1840
++FirstIter;
1841
++SecondIter;
1842
break;
1843
case MS_FirstUnique:
1844
++FirstIter;
1845
break;
1846
case MS_SecondUnique:
1847
++SecondIter;
1848
break;
1849
case MS_None:
1850
break;
1851
}
1852
1853
// Update Status according to iterators at the current step.
1854
if (areBothFinished())
1855
return;
1856
if (FirstIter != FirstEnd &&
1857
(SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1858
Status = MS_FirstUnique;
1859
else if (SecondIter != SecondEnd &&
1860
(FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1861
Status = MS_SecondUnique;
1862
else
1863
Status = MS_Match;
1864
}
1865
1866
// Return the sum of line/block samples, the max line/block sample, and the
1867
// number of line/block samples above the given threshold in a function
1868
// including its inlinees.
1869
static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1870
FuncSampleStats &FuncStats,
1871
uint64_t HotThreshold) {
1872
for (const auto &L : Func.getBodySamples()) {
1873
uint64_t Sample = L.second.getSamples();
1874
FuncStats.SampleSum += Sample;
1875
FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample);
1876
if (Sample >= HotThreshold)
1877
++FuncStats.HotBlockCount;
1878
}
1879
1880
for (const auto &C : Func.getCallsiteSamples()) {
1881
for (const auto &F : C.second)
1882
getFuncSampleStats(F.second, FuncStats, HotThreshold);
1883
}
1884
}
1885
1886
/// Predicate that determines if a function is hot with a given threshold. We
1887
/// keep it separate from its callsites for possible extension in the future.
1888
static bool isFunctionHot(const FuncSampleStats &FuncStats,
1889
uint64_t HotThreshold) {
1890
// We intentionally compare the maximum sample count in a function with the
1891
// HotThreshold to get an approximate determination on hot functions.
1892
return (FuncStats.MaxSample >= HotThreshold);
1893
}
1894
1895
namespace {
1896
class SampleOverlapAggregator {
1897
public:
1898
SampleOverlapAggregator(const std::string &BaseFilename,
1899
const std::string &TestFilename,
1900
double LowSimilarityThreshold, double Epsilon,
1901
const OverlapFuncFilters &FuncFilter)
1902
: BaseFilename(BaseFilename), TestFilename(TestFilename),
1903
LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
1904
FuncFilter(FuncFilter) {}
1905
1906
/// Detect 0-sample input profile and report to output stream. This interface
1907
/// should be called after loadProfiles().
1908
bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
1909
1910
/// Write out function-level similarity statistics for functions specified by
1911
/// options --function, --value-cutoff, and --similarity-cutoff.
1912
void dumpFuncSimilarity(raw_fd_ostream &OS) const;
1913
1914
/// Write out program-level similarity and overlap statistics.
1915
void dumpProgramSummary(raw_fd_ostream &OS) const;
1916
1917
/// Write out hot-function and hot-block statistics for base_profile,
1918
/// test_profile, and their overlap. For both cases, the overlap HO is
1919
/// calculated as follows:
1920
/// Given the number of functions (or blocks) that are hot in both profiles
1921
/// HCommon and the number of functions (or blocks) that are hot in at
1922
/// least one profile HUnion, HO = HCommon / HUnion.
1923
void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
1924
1925
/// This function tries matching functions in base and test profiles. For each
1926
/// pair of matched functions, it aggregates the function-level
1927
/// similarity into a profile-level similarity. It also dump function-level
1928
/// similarity information of functions specified by --function,
1929
/// --value-cutoff, and --similarity-cutoff options. The program-level
1930
/// similarity PS is computed as follows:
1931
/// Given function-level similarity FS(A) for all function A, the
1932
/// weight of function A in base profile WB(A), and the weight of function
1933
/// A in test profile WT(A), compute PS(base_profile, test_profile) =
1934
/// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1935
/// meaning no-overlap.
1936
void computeSampleProfileOverlap(raw_fd_ostream &OS);
1937
1938
/// Initialize ProfOverlap with the sum of samples in base and test
1939
/// profiles. This function also computes and keeps the sum of samples and
1940
/// max sample counts of each function in BaseStats and TestStats for later
1941
/// use to avoid re-computations.
1942
void initializeSampleProfileOverlap();
1943
1944
/// Load profiles specified by BaseFilename and TestFilename.
1945
std::error_code loadProfiles();
1946
1947
using FuncSampleStatsMap =
1948
std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
1949
1950
private:
1951
SampleOverlapStats ProfOverlap;
1952
SampleOverlapStats HotFuncOverlap;
1953
SampleOverlapStats HotBlockOverlap;
1954
std::string BaseFilename;
1955
std::string TestFilename;
1956
std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
1957
std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
1958
// BaseStats and TestStats hold FuncSampleStats for each function, with
1959
// function name as the key.
1960
FuncSampleStatsMap BaseStats;
1961
FuncSampleStatsMap TestStats;
1962
// Low similarity threshold in floating point number
1963
double LowSimilarityThreshold;
1964
// Block samples above BaseHotThreshold or TestHotThreshold are considered hot
1965
// for tracking hot blocks.
1966
uint64_t BaseHotThreshold;
1967
uint64_t TestHotThreshold;
1968
// A small threshold used to round the results of floating point accumulations
1969
// to resolve imprecision.
1970
const double Epsilon;
1971
std::multimap<double, SampleOverlapStats, std::greater<double>>
1972
FuncSimilarityDump;
1973
// FuncFilter carries specifications in options --value-cutoff and
1974
// --function.
1975
OverlapFuncFilters FuncFilter;
1976
// Column offsets for printing the function-level details table.
1977
static const unsigned int TestWeightCol = 15;
1978
static const unsigned int SimilarityCol = 30;
1979
static const unsigned int OverlapCol = 43;
1980
static const unsigned int BaseUniqueCol = 53;
1981
static const unsigned int TestUniqueCol = 67;
1982
static const unsigned int BaseSampleCol = 81;
1983
static const unsigned int TestSampleCol = 96;
1984
static const unsigned int FuncNameCol = 111;
1985
1986
/// Return a similarity of two line/block sample counters in the same
1987
/// function in base and test profiles. The line/block-similarity BS(i) is
1988
/// computed as follows:
1989
/// For an offsets i, given the sample count at i in base profile BB(i),
1990
/// the sample count at i in test profile BT(i), the sum of sample counts
1991
/// in this function in base profile SB, and the sum of sample counts in
1992
/// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
1993
/// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
1994
double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
1995
const SampleOverlapStats &FuncOverlap) const;
1996
1997
void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
1998
uint64_t HotBlockCount);
1999
2000
void getHotFunctions(const FuncSampleStatsMap &ProfStats,
2001
FuncSampleStatsMap &HotFunc,
2002
uint64_t HotThreshold) const;
2003
2004
void computeHotFuncOverlap();
2005
2006
/// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2007
/// Difference for two sample units in a matched function according to the
2008
/// given match status.
2009
void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
2010
uint64_t HotBlockCount,
2011
SampleOverlapStats &FuncOverlap,
2012
double &Difference, MatchStatus Status);
2013
2014
/// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2015
/// Difference for unmatched callees that only present in one profile in a
2016
/// matched caller function.
2017
void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
2018
SampleOverlapStats &FuncOverlap,
2019
double &Difference, MatchStatus Status);
2020
2021
/// This function updates sample overlap statistics of an overlap function in
2022
/// base and test profile. It also calculates a function-internal similarity
2023
/// FIS as follows:
2024
/// For offsets i that have samples in at least one profile in this
2025
/// function A, given BS(i) returned by computeBlockSimilarity(), compute
2026
/// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2027
/// 0.0 meaning no overlap.
2028
double computeSampleFunctionInternalOverlap(
2029
const sampleprof::FunctionSamples &BaseFunc,
2030
const sampleprof::FunctionSamples &TestFunc,
2031
SampleOverlapStats &FuncOverlap);
2032
2033
/// Function-level similarity (FS) is a weighted value over function internal
2034
/// similarity (FIS). This function computes a function's FS from its FIS by
2035
/// applying the weight.
2036
double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
2037
uint64_t TestFuncSample) const;
2038
2039
/// The function-level similarity FS(A) for a function A is computed as
2040
/// follows:
2041
/// Compute a function-internal similarity FIS(A) by
2042
/// computeSampleFunctionInternalOverlap(). Then, with the weight of
2043
/// function A in base profile WB(A), and the weight of function A in test
2044
/// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2045
/// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2046
double
2047
computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
2048
const sampleprof::FunctionSamples *TestFunc,
2049
SampleOverlapStats *FuncOverlap,
2050
uint64_t BaseFuncSample,
2051
uint64_t TestFuncSample);
2052
2053
/// Profile-level similarity (PS) is a weighted aggregate over function-level
2054
/// similarities (FS). This method weights the FS value by the function
2055
/// weights in the base and test profiles for the aggregation.
2056
double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
2057
uint64_t TestFuncSample) const;
2058
};
2059
} // end anonymous namespace
2060
2061
bool SampleOverlapAggregator::detectZeroSampleProfile(
2062
raw_fd_ostream &OS) const {
2063
bool HaveZeroSample = false;
2064
if (ProfOverlap.BaseSample == 0) {
2065
OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
2066
HaveZeroSample = true;
2067
}
2068
if (ProfOverlap.TestSample == 0) {
2069
OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
2070
HaveZeroSample = true;
2071
}
2072
return HaveZeroSample;
2073
}
2074
2075
double SampleOverlapAggregator::computeBlockSimilarity(
2076
uint64_t BaseSample, uint64_t TestSample,
2077
const SampleOverlapStats &FuncOverlap) const {
2078
double BaseFrac = 0.0;
2079
double TestFrac = 0.0;
2080
if (FuncOverlap.BaseSample > 0)
2081
BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
2082
if (FuncOverlap.TestSample > 0)
2083
TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
2084
return 1.0 - std::fabs(BaseFrac - TestFrac);
2085
}
2086
2087
void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
2088
uint64_t TestSample,
2089
uint64_t HotBlockCount) {
2090
bool IsBaseHot = (BaseSample >= BaseHotThreshold);
2091
bool IsTestHot = (TestSample >= TestHotThreshold);
2092
if (!IsBaseHot && !IsTestHot)
2093
return;
2094
2095
HotBlockOverlap.UnionCount += HotBlockCount;
2096
if (IsBaseHot)
2097
HotBlockOverlap.BaseCount += HotBlockCount;
2098
if (IsTestHot)
2099
HotBlockOverlap.TestCount += HotBlockCount;
2100
if (IsBaseHot && IsTestHot)
2101
HotBlockOverlap.OverlapCount += HotBlockCount;
2102
}
2103
2104
void SampleOverlapAggregator::getHotFunctions(
2105
const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
2106
uint64_t HotThreshold) const {
2107
for (const auto &F : ProfStats) {
2108
if (isFunctionHot(F.second, HotThreshold))
2109
HotFunc.emplace(F.first, F.second);
2110
}
2111
}
2112
2113
void SampleOverlapAggregator::computeHotFuncOverlap() {
2114
FuncSampleStatsMap BaseHotFunc;
2115
getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
2116
HotFuncOverlap.BaseCount = BaseHotFunc.size();
2117
2118
FuncSampleStatsMap TestHotFunc;
2119
getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
2120
HotFuncOverlap.TestCount = TestHotFunc.size();
2121
HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
2122
2123
for (const auto &F : BaseHotFunc) {
2124
if (TestHotFunc.count(F.first))
2125
++HotFuncOverlap.OverlapCount;
2126
else
2127
++HotFuncOverlap.UnionCount;
2128
}
2129
}
2130
2131
void SampleOverlapAggregator::updateOverlapStatsForFunction(
2132
uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
2133
SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
2134
assert(Status != MS_None &&
2135
"Match status should be updated before updating overlap statistics");
2136
if (Status == MS_FirstUnique) {
2137
TestSample = 0;
2138
FuncOverlap.BaseUniqueSample += BaseSample;
2139
} else if (Status == MS_SecondUnique) {
2140
BaseSample = 0;
2141
FuncOverlap.TestUniqueSample += TestSample;
2142
} else {
2143
++FuncOverlap.OverlapCount;
2144
}
2145
2146
FuncOverlap.UnionSample += std::max(BaseSample, TestSample);
2147
FuncOverlap.OverlapSample += std::min(BaseSample, TestSample);
2148
Difference +=
2149
1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2150
updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
2151
}
2152
2153
void SampleOverlapAggregator::updateForUnmatchedCallee(
2154
const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
2155
double &Difference, MatchStatus Status) {
2156
assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
2157
"Status must be either of the two unmatched cases");
2158
FuncSampleStats FuncStats;
2159
if (Status == MS_FirstUnique) {
2160
getFuncSampleStats(Func, FuncStats, BaseHotThreshold);
2161
updateOverlapStatsForFunction(FuncStats.SampleSum, 0,
2162
FuncStats.HotBlockCount, FuncOverlap,
2163
Difference, Status);
2164
} else {
2165
getFuncSampleStats(Func, FuncStats, TestHotThreshold);
2166
updateOverlapStatsForFunction(0, FuncStats.SampleSum,
2167
FuncStats.HotBlockCount, FuncOverlap,
2168
Difference, Status);
2169
}
2170
}
2171
2172
double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2173
const sampleprof::FunctionSamples &BaseFunc,
2174
const sampleprof::FunctionSamples &TestFunc,
2175
SampleOverlapStats &FuncOverlap) {
2176
2177
using namespace sampleprof;
2178
2179
double Difference = 0;
2180
2181
// Accumulate Difference for regular line/block samples in the function.
2182
// We match them through sort-merge join algorithm because
2183
// FunctionSamples::getBodySamples() returns a map of sample counters ordered
2184
// by their offsets.
2185
MatchStep<BodySampleMap::const_iterator> BlockIterStep(
2186
BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
2187
TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
2188
BlockIterStep.updateOneStep();
2189
while (!BlockIterStep.areBothFinished()) {
2190
uint64_t BaseSample =
2191
BlockIterStep.isFirstFinished()
2192
? 0
2193
: BlockIterStep.getFirstIter()->second.getSamples();
2194
uint64_t TestSample =
2195
BlockIterStep.isSecondFinished()
2196
? 0
2197
: BlockIterStep.getSecondIter()->second.getSamples();
2198
updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap,
2199
Difference, BlockIterStep.getMatchStatus());
2200
2201
BlockIterStep.updateOneStep();
2202
}
2203
2204
// Accumulate Difference for callsite lines in the function. We match
2205
// them through sort-merge algorithm because
2206
// FunctionSamples::getCallsiteSamples() returns a map of callsite records
2207
// ordered by their offsets.
2208
MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
2209
BaseFunc.getCallsiteSamples().cbegin(),
2210
BaseFunc.getCallsiteSamples().cend(),
2211
TestFunc.getCallsiteSamples().cbegin(),
2212
TestFunc.getCallsiteSamples().cend());
2213
CallsiteIterStep.updateOneStep();
2214
while (!CallsiteIterStep.areBothFinished()) {
2215
MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
2216
assert(CallsiteStepStatus != MS_None &&
2217
"Match status should be updated before entering loop body");
2218
2219
if (CallsiteStepStatus != MS_Match) {
2220
auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
2221
? CallsiteIterStep.getFirstIter()
2222
: CallsiteIterStep.getSecondIter();
2223
for (const auto &F : Callsite->second)
2224
updateForUnmatchedCallee(F.second, FuncOverlap, Difference,
2225
CallsiteStepStatus);
2226
} else {
2227
// There may be multiple inlinees at the same offset, so we need to try
2228
// matching all of them. This match is implemented through sort-merge
2229
// algorithm because callsite records at the same offset are ordered by
2230
// function names.
2231
MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
2232
CallsiteIterStep.getFirstIter()->second.cbegin(),
2233
CallsiteIterStep.getFirstIter()->second.cend(),
2234
CallsiteIterStep.getSecondIter()->second.cbegin(),
2235
CallsiteIterStep.getSecondIter()->second.cend());
2236
CalleeIterStep.updateOneStep();
2237
while (!CalleeIterStep.areBothFinished()) {
2238
MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
2239
if (CalleeStepStatus != MS_Match) {
2240
auto Callee = (CalleeStepStatus == MS_FirstUnique)
2241
? CalleeIterStep.getFirstIter()
2242
: CalleeIterStep.getSecondIter();
2243
updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
2244
CalleeStepStatus);
2245
} else {
2246
// An inlined function can contain other inlinees inside, so compute
2247
// the Difference recursively.
2248
Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
2249
CalleeIterStep.getFirstIter()->second,
2250
CalleeIterStep.getSecondIter()->second,
2251
FuncOverlap);
2252
}
2253
CalleeIterStep.updateOneStep();
2254
}
2255
}
2256
CallsiteIterStep.updateOneStep();
2257
}
2258
2259
// Difference reflects the total differences of line/block samples in this
2260
// function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2261
// reflect the similarity between function profiles in [0.0f to 1.0f].
2262
return (2.0 - Difference) / 2;
2263
}
2264
2265
double SampleOverlapAggregator::weightForFuncSimilarity(
2266
double FuncInternalSimilarity, uint64_t BaseFuncSample,
2267
uint64_t TestFuncSample) const {
2268
// Compute the weight as the distance between the function weights in two
2269
// profiles.
2270
double BaseFrac = 0.0;
2271
double TestFrac = 0.0;
2272
assert(ProfOverlap.BaseSample > 0 &&
2273
"Total samples in base profile should be greater than 0");
2274
BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
2275
assert(ProfOverlap.TestSample > 0 &&
2276
"Total samples in test profile should be greater than 0");
2277
TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
2278
double WeightDistance = std::fabs(BaseFrac - TestFrac);
2279
2280
// Take WeightDistance into the similarity.
2281
return FuncInternalSimilarity * (1 - WeightDistance);
2282
}
2283
2284
double
2285
SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
2286
uint64_t BaseFuncSample,
2287
uint64_t TestFuncSample) const {
2288
2289
double BaseFrac = 0.0;
2290
double TestFrac = 0.0;
2291
assert(ProfOverlap.BaseSample > 0 &&
2292
"Total samples in base profile should be greater than 0");
2293
BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
2294
assert(ProfOverlap.TestSample > 0 &&
2295
"Total samples in test profile should be greater than 0");
2296
TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
2297
return FuncSimilarity * (BaseFrac + TestFrac);
2298
}
2299
2300
double SampleOverlapAggregator::computeSampleFunctionOverlap(
2301
const sampleprof::FunctionSamples *BaseFunc,
2302
const sampleprof::FunctionSamples *TestFunc,
2303
SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
2304
uint64_t TestFuncSample) {
2305
// Default function internal similarity before weighted, meaning two functions
2306
// has no overlap.
2307
const double DefaultFuncInternalSimilarity = 0;
2308
double FuncSimilarity;
2309
double FuncInternalSimilarity;
2310
2311
// If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2312
// In this case, we use DefaultFuncInternalSimilarity as the function internal
2313
// similarity.
2314
if (!BaseFunc || !TestFunc) {
2315
FuncInternalSimilarity = DefaultFuncInternalSimilarity;
2316
} else {
2317
assert(FuncOverlap != nullptr &&
2318
"FuncOverlap should be provided in this case");
2319
FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
2320
*BaseFunc, *TestFunc, *FuncOverlap);
2321
// Now, FuncInternalSimilarity may be a little less than 0 due to
2322
// imprecision of floating point accumulations. Make it zero if the
2323
// difference is below Epsilon.
2324
FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
2325
? 0
2326
: FuncInternalSimilarity;
2327
}
2328
FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
2329
BaseFuncSample, TestFuncSample);
2330
return FuncSimilarity;
2331
}
2332
2333
void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2334
using namespace sampleprof;
2335
2336
std::unordered_map<SampleContext, const FunctionSamples *,
2337
SampleContext::Hash>
2338
BaseFuncProf;
2339
const auto &BaseProfiles = BaseReader->getProfiles();
2340
for (const auto &BaseFunc : BaseProfiles) {
2341
BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second));
2342
}
2343
ProfOverlap.UnionCount = BaseFuncProf.size();
2344
2345
const auto &TestProfiles = TestReader->getProfiles();
2346
for (const auto &TestFunc : TestProfiles) {
2347
SampleOverlapStats FuncOverlap;
2348
FuncOverlap.TestName = TestFunc.second.getContext();
2349
assert(TestStats.count(FuncOverlap.TestName) &&
2350
"TestStats should have records for all functions in test profile "
2351
"except inlinees");
2352
FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
2353
2354
bool Matched = false;
2355
const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
2356
if (Match == BaseFuncProf.end()) {
2357
const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
2358
++ProfOverlap.TestUniqueCount;
2359
ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2360
FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2361
2362
updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount);
2363
2364
double FuncSimilarity = computeSampleFunctionOverlap(
2365
nullptr, nullptr, nullptr, 0, FuncStats.SampleSum);
2366
ProfOverlap.Similarity +=
2367
weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum);
2368
2369
++ProfOverlap.UnionCount;
2370
ProfOverlap.UnionSample += FuncStats.SampleSum;
2371
} else {
2372
++ProfOverlap.OverlapCount;
2373
2374
// Two functions match with each other. Compute function-level overlap and
2375
// aggregate them into profile-level overlap.
2376
FuncOverlap.BaseName = Match->second->getContext();
2377
assert(BaseStats.count(FuncOverlap.BaseName) &&
2378
"BaseStats should have records for all functions in base profile "
2379
"except inlinees");
2380
FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2381
2382
FuncOverlap.Similarity = computeSampleFunctionOverlap(
2383
Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
2384
FuncOverlap.TestSample);
2385
ProfOverlap.Similarity +=
2386
weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample,
2387
FuncOverlap.TestSample);
2388
ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2389
ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2390
2391
// Accumulate the percentage of base unique and test unique samples into
2392
// ProfOverlap.
2393
ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2394
ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2395
2396
// Remove matched base functions for later reporting functions not found
2397
// in test profile.
2398
BaseFuncProf.erase(Match);
2399
Matched = true;
2400
}
2401
2402
// Print function-level similarity information if specified by options.
2403
assert(TestStats.count(FuncOverlap.TestName) &&
2404
"TestStats should have records for all functions in test profile "
2405
"except inlinees");
2406
if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2407
(Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2408
(Matched && !FuncFilter.NameFilter.empty() &&
2409
FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) !=
2410
std::string::npos)) {
2411
assert(ProfOverlap.BaseSample > 0 &&
2412
"Total samples in base profile should be greater than 0");
2413
FuncOverlap.BaseWeight =
2414
static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2415
assert(ProfOverlap.TestSample > 0 &&
2416
"Total samples in test profile should be greater than 0");
2417
FuncOverlap.TestWeight =
2418
static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2419
FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap);
2420
}
2421
}
2422
2423
// Traverse through functions in base profile but not in test profile.
2424
for (const auto &F : BaseFuncProf) {
2425
assert(BaseStats.count(F.second->getContext()) &&
2426
"BaseStats should have records for all functions in base profile "
2427
"except inlinees");
2428
const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2429
++ProfOverlap.BaseUniqueCount;
2430
ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2431
2432
updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount);
2433
2434
double FuncSimilarity = computeSampleFunctionOverlap(
2435
nullptr, nullptr, nullptr, FuncStats.SampleSum, 0);
2436
ProfOverlap.Similarity +=
2437
weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0);
2438
2439
ProfOverlap.UnionSample += FuncStats.SampleSum;
2440
}
2441
2442
// Now, ProfSimilarity may be a little greater than 1 due to imprecision
2443
// of floating point accumulations. Make it 1.0 if the difference is below
2444
// Epsilon.
2445
ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
2446
? 1
2447
: ProfOverlap.Similarity;
2448
2449
computeHotFuncOverlap();
2450
}
2451
2452
void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2453
const auto &BaseProf = BaseReader->getProfiles();
2454
for (const auto &I : BaseProf) {
2455
++ProfOverlap.BaseCount;
2456
FuncSampleStats FuncStats;
2457
getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
2458
ProfOverlap.BaseSample += FuncStats.SampleSum;
2459
BaseStats.emplace(I.second.getContext(), FuncStats);
2460
}
2461
2462
const auto &TestProf = TestReader->getProfiles();
2463
for (const auto &I : TestProf) {
2464
++ProfOverlap.TestCount;
2465
FuncSampleStats FuncStats;
2466
getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
2467
ProfOverlap.TestSample += FuncStats.SampleSum;
2468
TestStats.emplace(I.second.getContext(), FuncStats);
2469
}
2470
2471
ProfOverlap.BaseName = StringRef(BaseFilename);
2472
ProfOverlap.TestName = StringRef(TestFilename);
2473
}
2474
2475
void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2476
using namespace sampleprof;
2477
2478
if (FuncSimilarityDump.empty())
2479
return;
2480
2481
formatted_raw_ostream FOS(OS);
2482
FOS << "Function-level details:\n";
2483
FOS << "Base weight";
2484
FOS.PadToColumn(TestWeightCol);
2485
FOS << "Test weight";
2486
FOS.PadToColumn(SimilarityCol);
2487
FOS << "Similarity";
2488
FOS.PadToColumn(OverlapCol);
2489
FOS << "Overlap";
2490
FOS.PadToColumn(BaseUniqueCol);
2491
FOS << "Base unique";
2492
FOS.PadToColumn(TestUniqueCol);
2493
FOS << "Test unique";
2494
FOS.PadToColumn(BaseSampleCol);
2495
FOS << "Base samples";
2496
FOS.PadToColumn(TestSampleCol);
2497
FOS << "Test samples";
2498
FOS.PadToColumn(FuncNameCol);
2499
FOS << "Function name\n";
2500
for (const auto &F : FuncSimilarityDump) {
2501
double OverlapPercent =
2502
F.second.UnionSample > 0
2503
? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2504
: 0;
2505
double BaseUniquePercent =
2506
F.second.BaseSample > 0
2507
? static_cast<double>(F.second.BaseUniqueSample) /
2508
F.second.BaseSample
2509
: 0;
2510
double TestUniquePercent =
2511
F.second.TestSample > 0
2512
? static_cast<double>(F.second.TestUniqueSample) /
2513
F.second.TestSample
2514
: 0;
2515
2516
FOS << format("%.2f%%", F.second.BaseWeight * 100);
2517
FOS.PadToColumn(TestWeightCol);
2518
FOS << format("%.2f%%", F.second.TestWeight * 100);
2519
FOS.PadToColumn(SimilarityCol);
2520
FOS << format("%.2f%%", F.second.Similarity * 100);
2521
FOS.PadToColumn(OverlapCol);
2522
FOS << format("%.2f%%", OverlapPercent * 100);
2523
FOS.PadToColumn(BaseUniqueCol);
2524
FOS << format("%.2f%%", BaseUniquePercent * 100);
2525
FOS.PadToColumn(TestUniqueCol);
2526
FOS << format("%.2f%%", TestUniquePercent * 100);
2527
FOS.PadToColumn(BaseSampleCol);
2528
FOS << F.second.BaseSample;
2529
FOS.PadToColumn(TestSampleCol);
2530
FOS << F.second.TestSample;
2531
FOS.PadToColumn(FuncNameCol);
2532
FOS << F.second.TestName.toString() << "\n";
2533
}
2534
}
2535
2536
void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2537
OS << "Profile overlap infomation for base_profile: "
2538
<< ProfOverlap.BaseName.toString()
2539
<< " and test_profile: " << ProfOverlap.TestName.toString()
2540
<< "\nProgram level:\n";
2541
2542
OS << " Whole program profile similarity: "
2543
<< format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
2544
2545
assert(ProfOverlap.UnionSample > 0 &&
2546
"Total samples in two profile should be greater than 0");
2547
double OverlapPercent =
2548
static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2549
assert(ProfOverlap.BaseSample > 0 &&
2550
"Total samples in base profile should be greater than 0");
2551
double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2552
ProfOverlap.BaseSample;
2553
assert(ProfOverlap.TestSample > 0 &&
2554
"Total samples in test profile should be greater than 0");
2555
double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2556
ProfOverlap.TestSample;
2557
2558
OS << " Whole program sample overlap: "
2559
<< format("%.3f%%", OverlapPercent * 100) << "\n";
2560
OS << " percentage of samples unique in base profile: "
2561
<< format("%.3f%%", BaseUniquePercent * 100) << "\n";
2562
OS << " percentage of samples unique in test profile: "
2563
<< format("%.3f%%", TestUniquePercent * 100) << "\n";
2564
OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2565
<< " total samples in test profile: " << ProfOverlap.TestSample << "\n";
2566
2567
assert(ProfOverlap.UnionCount > 0 &&
2568
"There should be at least one function in two input profiles");
2569
double FuncOverlapPercent =
2570
static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2571
OS << " Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100)
2572
<< "\n";
2573
OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n";
2574
OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2575
<< "\n";
2576
OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount
2577
<< "\n";
2578
}
2579
2580
void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2581
raw_fd_ostream &OS) const {
2582
assert(HotFuncOverlap.UnionCount > 0 &&
2583
"There should be at least one hot function in two input profiles");
2584
OS << " Hot-function overlap: "
2585
<< format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) /
2586
HotFuncOverlap.UnionCount * 100)
2587
<< "\n";
2588
OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2589
OS << " hot functions unique in base profile: "
2590
<< HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2591
OS << " hot functions unique in test profile: "
2592
<< HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2593
2594
assert(HotBlockOverlap.UnionCount > 0 &&
2595
"There should be at least one hot block in two input profiles");
2596
OS << " Hot-block overlap: "
2597
<< format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) /
2598
HotBlockOverlap.UnionCount * 100)
2599
<< "\n";
2600
OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2601
OS << " hot blocks unique in base profile: "
2602
<< HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2603
OS << " hot blocks unique in test profile: "
2604
<< HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2605
}
2606
2607
std::error_code SampleOverlapAggregator::loadProfiles() {
2608
using namespace sampleprof;
2609
2610
LLVMContext Context;
2611
auto FS = vfs::getRealFileSystem();
2612
auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS,
2613
FSDiscriminatorPassOption);
2614
if (std::error_code EC = BaseReaderOrErr.getError())
2615
exitWithErrorCode(EC, BaseFilename);
2616
2617
auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS,
2618
FSDiscriminatorPassOption);
2619
if (std::error_code EC = TestReaderOrErr.getError())
2620
exitWithErrorCode(EC, TestFilename);
2621
2622
BaseReader = std::move(BaseReaderOrErr.get());
2623
TestReader = std::move(TestReaderOrErr.get());
2624
2625
if (std::error_code EC = BaseReader->read())
2626
exitWithErrorCode(EC, BaseFilename);
2627
if (std::error_code EC = TestReader->read())
2628
exitWithErrorCode(EC, TestFilename);
2629
if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2630
exitWithError(
2631
"cannot compare probe-based profile with non-probe-based profile");
2632
if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2633
exitWithError("cannot compare CS profile with non-CS profile");
2634
2635
// Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2636
// profile summary.
2637
ProfileSummary &BasePS = BaseReader->getSummary();
2638
ProfileSummary &TestPS = TestReader->getSummary();
2639
BaseHotThreshold =
2640
ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary());
2641
TestHotThreshold =
2642
ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary());
2643
2644
return std::error_code();
2645
}
2646
2647
void overlapSampleProfile(const std::string &BaseFilename,
2648
const std::string &TestFilename,
2649
const OverlapFuncFilters &FuncFilter,
2650
uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2651
using namespace sampleprof;
2652
2653
// We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2654
// report 2--3 places after decimal point in percentage numbers.
2655
SampleOverlapAggregator OverlapAggr(
2656
BaseFilename, TestFilename,
2657
static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2658
if (std::error_code EC = OverlapAggr.loadProfiles())
2659
exitWithErrorCode(EC);
2660
2661
OverlapAggr.initializeSampleProfileOverlap();
2662
if (OverlapAggr.detectZeroSampleProfile(OS))
2663
return;
2664
2665
OverlapAggr.computeSampleProfileOverlap(OS);
2666
2667
OverlapAggr.dumpProgramSummary(OS);
2668
OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2669
OverlapAggr.dumpFuncSimilarity(OS);
2670
}
2671
2672
static int overlap_main() {
2673
std::error_code EC;
2674
raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
2675
if (EC)
2676
exitWithErrorCode(EC, OutputFilename);
2677
2678
if (ProfileKind == instr)
2679
overlapInstrProfile(BaseFilename, TestFilename,
2680
OverlapFuncFilters{OverlapValueCutoff, FuncNameFilter},
2681
OS, IsCS);
2682
else
2683
overlapSampleProfile(BaseFilename, TestFilename,
2684
OverlapFuncFilters{OverlapValueCutoff, FuncNameFilter},
2685
SimilarityCutoff, OS);
2686
2687
return 0;
2688
}
2689
2690
namespace {
2691
struct ValueSitesStats {
2692
ValueSitesStats() = default;
2693
uint64_t TotalNumValueSites = 0;
2694
uint64_t TotalNumValueSitesWithValueProfile = 0;
2695
uint64_t TotalNumValues = 0;
2696
std::vector<unsigned> ValueSitesHistogram;
2697
};
2698
} // namespace
2699
2700
static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2701
ValueSitesStats &Stats, raw_fd_ostream &OS,
2702
InstrProfSymtab *Symtab) {
2703
uint32_t NS = Func.getNumValueSites(VK);
2704
Stats.TotalNumValueSites += NS;
2705
for (size_t I = 0; I < NS; ++I) {
2706
auto VD = Func.getValueArrayForSite(VK, I);
2707
uint32_t NV = VD.size();
2708
if (NV == 0)
2709
continue;
2710
Stats.TotalNumValues += NV;
2711
Stats.TotalNumValueSitesWithValueProfile++;
2712
if (NV > Stats.ValueSitesHistogram.size())
2713
Stats.ValueSitesHistogram.resize(NV, 0);
2714
Stats.ValueSitesHistogram[NV - 1]++;
2715
2716
uint64_t SiteSum = 0;
2717
for (const auto &V : VD)
2718
SiteSum += V.Count;
2719
if (SiteSum == 0)
2720
SiteSum = 1;
2721
2722
for (const auto &V : VD) {
2723
OS << "\t[ " << format("%2u", I) << ", ";
2724
if (Symtab == nullptr)
2725
OS << format("%4" PRIu64, V.Value);
2726
else
2727
OS << Symtab->getFuncOrVarName(V.Value);
2728
OS << ", " << format("%10" PRId64, V.Count) << " ] ("
2729
<< format("%.2f%%", (V.Count * 100.0 / SiteSum)) << ")\n";
2730
}
2731
}
2732
}
2733
2734
static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2735
ValueSitesStats &Stats) {
2736
OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n";
2737
OS << " Total number of sites with values: "
2738
<< Stats.TotalNumValueSitesWithValueProfile << "\n";
2739
OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n";
2740
2741
OS << " Value sites histogram:\n\tNumTargets, SiteCount\n";
2742
for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2743
if (Stats.ValueSitesHistogram[I] > 0)
2744
OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2745
}
2746
}
2747
2748
static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
2749
if (SFormat == ShowFormat::Json)
2750
exitWithError("JSON output is not supported for instr profiles");
2751
if (SFormat == ShowFormat::Yaml)
2752
exitWithError("YAML output is not supported for instr profiles");
2753
auto FS = vfs::getRealFileSystem();
2754
auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
2755
std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2756
if (ShowDetailedSummary && Cutoffs.empty()) {
2757
Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2758
}
2759
InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2760
if (Error E = ReaderOrErr.takeError())
2761
exitWithError(std::move(E), Filename);
2762
2763
auto Reader = std::move(ReaderOrErr.get());
2764
bool IsIRInstr = Reader->isIRLevelProfile();
2765
size_t ShownFunctions = 0;
2766
size_t BelowCutoffFunctions = 0;
2767
int NumVPKind = IPVK_Last - IPVK_First + 1;
2768
std::vector<ValueSitesStats> VPStats(NumVPKind);
2769
2770
auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2771
const std::pair<std::string, uint64_t> &v2) {
2772
return v1.second > v2.second;
2773
};
2774
2775
std::priority_queue<std::pair<std::string, uint64_t>,
2776
std::vector<std::pair<std::string, uint64_t>>,
2777
decltype(MinCmp)>
2778
HottestFuncs(MinCmp);
2779
2780
if (!TextFormat && OnlyListBelow) {
2781
OS << "The list of functions with the maximum counter less than "
2782
<< ShowValueCutoff << ":\n";
2783
}
2784
2785
// Add marker so that IR-level instrumentation round-trips properly.
2786
if (TextFormat && IsIRInstr)
2787
OS << ":ir\n";
2788
2789
for (const auto &Func : *Reader) {
2790
if (Reader->isIRLevelProfile()) {
2791
bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
2792
if (FuncIsCS != ShowCS)
2793
continue;
2794
}
2795
bool Show = ShowAllFunctions ||
2796
(!FuncNameFilter.empty() && Func.Name.contains(FuncNameFilter));
2797
2798
bool doTextFormatDump = (Show && TextFormat);
2799
2800
if (doTextFormatDump) {
2801
InstrProfSymtab &Symtab = Reader->getSymtab();
2802
InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab,
2803
OS);
2804
continue;
2805
}
2806
2807
assert(Func.Counts.size() > 0 && "function missing entry counter");
2808
Builder.addRecord(Func);
2809
2810
if (ShowCovered) {
2811
if (llvm::any_of(Func.Counts, [](uint64_t C) { return C; }))
2812
OS << Func.Name << "\n";
2813
continue;
2814
}
2815
2816
uint64_t FuncMax = 0;
2817
uint64_t FuncSum = 0;
2818
2819
auto PseudoKind = Func.getCountPseudoKind();
2820
if (PseudoKind != InstrProfRecord::NotPseudo) {
2821
if (Show) {
2822
if (!ShownFunctions)
2823
OS << "Counters:\n";
2824
++ShownFunctions;
2825
OS << " " << Func.Name << ":\n"
2826
<< " Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2827
<< " Counters: " << Func.Counts.size();
2828
if (PseudoKind == InstrProfRecord::PseudoHot)
2829
OS << " <PseudoHot>\n";
2830
else if (PseudoKind == InstrProfRecord::PseudoWarm)
2831
OS << " <PseudoWarm>\n";
2832
else
2833
llvm_unreachable("Unknown PseudoKind");
2834
}
2835
continue;
2836
}
2837
2838
for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
2839
FuncMax = std::max(FuncMax, Func.Counts[I]);
2840
FuncSum += Func.Counts[I];
2841
}
2842
2843
if (FuncMax < ShowValueCutoff) {
2844
++BelowCutoffFunctions;
2845
if (OnlyListBelow) {
2846
OS << " " << Func.Name << ": (Max = " << FuncMax
2847
<< " Sum = " << FuncSum << ")\n";
2848
}
2849
continue;
2850
} else if (OnlyListBelow)
2851
continue;
2852
2853
if (TopNFunctions) {
2854
if (HottestFuncs.size() == TopNFunctions) {
2855
if (HottestFuncs.top().second < FuncMax) {
2856
HottestFuncs.pop();
2857
HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2858
}
2859
} else
2860
HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2861
}
2862
2863
if (Show) {
2864
if (!ShownFunctions)
2865
OS << "Counters:\n";
2866
2867
++ShownFunctions;
2868
2869
OS << " " << Func.Name << ":\n"
2870
<< " Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2871
<< " Counters: " << Func.Counts.size() << "\n";
2872
if (!IsIRInstr)
2873
OS << " Function count: " << Func.Counts[0] << "\n";
2874
2875
if (ShowIndirectCallTargets)
2876
OS << " Indirect Call Site Count: "
2877
<< Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
2878
2879
if (ShowVTables)
2880
OS << " Number of instrumented vtables: "
2881
<< Func.getNumValueSites(IPVK_VTableTarget) << "\n";
2882
2883
uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
2884
if (ShowMemOPSizes && NumMemOPCalls > 0)
2885
OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2886
<< "\n";
2887
2888
if (ShowCounts) {
2889
OS << " Block counts: [";
2890
size_t Start = (IsIRInstr ? 0 : 1);
2891
for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2892
OS << (I == Start ? "" : ", ") << Func.Counts[I];
2893
}
2894
OS << "]\n";
2895
}
2896
2897
if (ShowIndirectCallTargets) {
2898
OS << " Indirect Target Results:\n";
2899
traverseAllValueSites(Func, IPVK_IndirectCallTarget,
2900
VPStats[IPVK_IndirectCallTarget], OS,
2901
&(Reader->getSymtab()));
2902
}
2903
2904
if (ShowVTables) {
2905
OS << " VTable Results:\n";
2906
traverseAllValueSites(Func, IPVK_VTableTarget,
2907
VPStats[IPVK_VTableTarget], OS,
2908
&(Reader->getSymtab()));
2909
}
2910
2911
if (ShowMemOPSizes && NumMemOPCalls > 0) {
2912
OS << " Memory Intrinsic Size Results:\n";
2913
traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
2914
nullptr);
2915
}
2916
}
2917
}
2918
if (Reader->hasError())
2919
exitWithError(Reader->getError(), Filename);
2920
2921
if (TextFormat || ShowCovered)
2922
return 0;
2923
std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
2924
bool IsIR = Reader->isIRLevelProfile();
2925
OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
2926
if (IsIR)
2927
OS << " entry_first = " << Reader->instrEntryBBEnabled();
2928
OS << "\n";
2929
if (ShowAllFunctions || !FuncNameFilter.empty())
2930
OS << "Functions shown: " << ShownFunctions << "\n";
2931
OS << "Total functions: " << PS->getNumFunctions() << "\n";
2932
if (ShowValueCutoff > 0) {
2933
OS << "Number of functions with maximum count (< " << ShowValueCutoff
2934
<< "): " << BelowCutoffFunctions << "\n";
2935
OS << "Number of functions with maximum count (>= " << ShowValueCutoff
2936
<< "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
2937
}
2938
OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
2939
OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
2940
2941
if (TopNFunctions) {
2942
std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
2943
while (!HottestFuncs.empty()) {
2944
SortedHottestFuncs.emplace_back(HottestFuncs.top());
2945
HottestFuncs.pop();
2946
}
2947
OS << "Top " << TopNFunctions
2948
<< " functions with the largest internal block counts: \n";
2949
for (auto &hotfunc : llvm::reverse(SortedHottestFuncs))
2950
OS << " " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
2951
}
2952
2953
if (ShownFunctions && ShowIndirectCallTargets) {
2954
OS << "Statistics for indirect call sites profile:\n";
2955
showValueSitesStats(OS, IPVK_IndirectCallTarget,
2956
VPStats[IPVK_IndirectCallTarget]);
2957
}
2958
2959
if (ShownFunctions && ShowVTables) {
2960
OS << "Statistics for vtable profile:\n";
2961
showValueSitesStats(OS, IPVK_VTableTarget, VPStats[IPVK_VTableTarget]);
2962
}
2963
2964
if (ShownFunctions && ShowMemOPSizes) {
2965
OS << "Statistics for memory intrinsic calls sizes profile:\n";
2966
showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
2967
}
2968
2969
if (ShowDetailedSummary) {
2970
OS << "Total number of blocks: " << PS->getNumCounts() << "\n";
2971
OS << "Total count: " << PS->getTotalCount() << "\n";
2972
PS->printDetailedSummary(OS);
2973
}
2974
2975
if (ShowBinaryIds)
2976
if (Error E = Reader->printBinaryIds(OS))
2977
exitWithError(std::move(E), Filename);
2978
2979
if (ShowProfileVersion)
2980
OS << "Profile version: " << Reader->getVersion() << "\n";
2981
2982
if (ShowTemporalProfTraces) {
2983
auto &Traces = Reader->getTemporalProfTraces();
2984
OS << "Temporal Profile Traces (samples=" << Traces.size()
2985
<< " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
2986
for (unsigned i = 0; i < Traces.size(); i++) {
2987
OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
2988
<< " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
2989
for (auto &NameRef : Traces[i].FunctionNameRefs)
2990
OS << " " << Reader->getSymtab().getFuncOrVarName(NameRef) << "\n";
2991
}
2992
}
2993
2994
return 0;
2995
}
2996
2997
static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
2998
raw_fd_ostream &OS) {
2999
if (!Reader->dumpSectionInfo(OS)) {
3000
WithColor::warning() << "-show-sec-info-only is only supported for "
3001
<< "sample profile in extbinary format and is "
3002
<< "ignored for other formats.\n";
3003
return;
3004
}
3005
}
3006
3007
namespace {
3008
struct HotFuncInfo {
3009
std::string FuncName;
3010
uint64_t TotalCount = 0;
3011
double TotalCountPercent = 0.0f;
3012
uint64_t MaxCount = 0;
3013
uint64_t EntryCount = 0;
3014
3015
HotFuncInfo() = default;
3016
3017
HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
3018
: FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
3019
MaxCount(MS), EntryCount(ES) {}
3020
};
3021
} // namespace
3022
3023
// Print out detailed information about hot functions in PrintValues vector.
3024
// Users specify titles and offset of every columns through ColumnTitle and
3025
// ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3026
// and at least 4. Besides, users can optionally give a HotFuncMetric string to
3027
// print out or let it be an empty string.
3028
static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
3029
const std::vector<int> &ColumnOffset,
3030
const std::vector<HotFuncInfo> &PrintValues,
3031
uint64_t HotFuncCount, uint64_t TotalFuncCount,
3032
uint64_t HotProfCount, uint64_t TotalProfCount,
3033
const std::string &HotFuncMetric,
3034
uint32_t TopNFunctions, raw_fd_ostream &OS) {
3035
assert(ColumnOffset.size() == ColumnTitle.size() &&
3036
"ColumnOffset and ColumnTitle should have the same size");
3037
assert(ColumnTitle.size() >= 4 &&
3038
"ColumnTitle should have at least 4 elements");
3039
assert(TotalFuncCount > 0 &&
3040
"There should be at least one function in the profile");
3041
double TotalProfPercent = 0;
3042
if (TotalProfCount > 0)
3043
TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
3044
3045
formatted_raw_ostream FOS(OS);
3046
FOS << HotFuncCount << " out of " << TotalFuncCount
3047
<< " functions with profile ("
3048
<< format("%.2f%%",
3049
(static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
3050
<< ") are considered hot functions";
3051
if (!HotFuncMetric.empty())
3052
FOS << " (" << HotFuncMetric << ")";
3053
FOS << ".\n";
3054
FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
3055
<< format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n";
3056
3057
for (size_t I = 0; I < ColumnTitle.size(); ++I) {
3058
FOS.PadToColumn(ColumnOffset[I]);
3059
FOS << ColumnTitle[I];
3060
}
3061
FOS << "\n";
3062
3063
uint32_t Count = 0;
3064
for (const auto &R : PrintValues) {
3065
if (TopNFunctions && (Count++ == TopNFunctions))
3066
break;
3067
FOS.PadToColumn(ColumnOffset[0]);
3068
FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
3069
FOS.PadToColumn(ColumnOffset[1]);
3070
FOS << R.MaxCount;
3071
FOS.PadToColumn(ColumnOffset[2]);
3072
FOS << R.EntryCount;
3073
FOS.PadToColumn(ColumnOffset[3]);
3074
FOS << R.FuncName << "\n";
3075
}
3076
}
3077
3078
static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
3079
ProfileSummary &PS, uint32_t TopN,
3080
raw_fd_ostream &OS) {
3081
using namespace sampleprof;
3082
3083
const uint32_t HotFuncCutoff = 990000;
3084
auto &SummaryVector = PS.getDetailedSummary();
3085
uint64_t MinCountThreshold = 0;
3086
for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
3087
if (SummaryEntry.Cutoff == HotFuncCutoff) {
3088
MinCountThreshold = SummaryEntry.MinCount;
3089
break;
3090
}
3091
}
3092
3093
// Traverse all functions in the profile and keep only hot functions.
3094
// The following loop also calculates the sum of total samples of all
3095
// functions.
3096
std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
3097
std::greater<uint64_t>>
3098
HotFunc;
3099
uint64_t ProfileTotalSample = 0;
3100
uint64_t HotFuncSample = 0;
3101
uint64_t HotFuncCount = 0;
3102
3103
for (const auto &I : Profiles) {
3104
FuncSampleStats FuncStats;
3105
const FunctionSamples &FuncProf = I.second;
3106
ProfileTotalSample += FuncProf.getTotalSamples();
3107
getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold);
3108
3109
if (isFunctionHot(FuncStats, MinCountThreshold)) {
3110
HotFunc.emplace(FuncProf.getTotalSamples(),
3111
std::make_pair(&(I.second), FuncStats.MaxSample));
3112
HotFuncSample += FuncProf.getTotalSamples();
3113
++HotFuncCount;
3114
}
3115
}
3116
3117
std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
3118
"Entry sample", "Function name"};
3119
std::vector<int> ColumnOffset{0, 24, 42, 58};
3120
std::string Metric =
3121
std::string("max sample >= ") + std::to_string(MinCountThreshold);
3122
std::vector<HotFuncInfo> PrintValues;
3123
for (const auto &FuncPair : HotFunc) {
3124
const FunctionSamples &Func = *FuncPair.second.first;
3125
double TotalSamplePercent =
3126
(ProfileTotalSample > 0)
3127
? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
3128
: 0;
3129
PrintValues.emplace_back(
3130
HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
3131
TotalSamplePercent, FuncPair.second.second,
3132
Func.getHeadSamplesEstimate()));
3133
}
3134
dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
3135
Profiles.size(), HotFuncSample, ProfileTotalSample,
3136
Metric, TopN, OS);
3137
3138
return 0;
3139
}
3140
3141
static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3142
if (SFormat == ShowFormat::Yaml)
3143
exitWithError("YAML output is not supported for sample profiles");
3144
using namespace sampleprof;
3145
LLVMContext Context;
3146
auto FS = vfs::getRealFileSystem();
3147
auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS,
3148
FSDiscriminatorPassOption);
3149
if (std::error_code EC = ReaderOrErr.getError())
3150
exitWithErrorCode(EC, Filename);
3151
3152
auto Reader = std::move(ReaderOrErr.get());
3153
if (ShowSectionInfoOnly) {
3154
showSectionInfo(Reader.get(), OS);
3155
return 0;
3156
}
3157
3158
if (std::error_code EC = Reader->read())
3159
exitWithErrorCode(EC, Filename);
3160
3161
if (ShowAllFunctions || FuncNameFilter.empty()) {
3162
if (SFormat == ShowFormat::Json)
3163
Reader->dumpJson(OS);
3164
else
3165
Reader->dump(OS);
3166
} else {
3167
if (SFormat == ShowFormat::Json)
3168
exitWithError(
3169
"the JSON format is supported only when all functions are to "
3170
"be printed");
3171
3172
// TODO: parse context string to support filtering by contexts.
3173
FunctionSamples *FS = Reader->getSamplesFor(StringRef(FuncNameFilter));
3174
Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
3175
}
3176
3177
if (ShowProfileSymbolList) {
3178
std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
3179
Reader->getProfileSymbolList();
3180
ReaderList->dump(OS);
3181
}
3182
3183
if (ShowDetailedSummary) {
3184
auto &PS = Reader->getSummary();
3185
PS.printSummary(OS);
3186
PS.printDetailedSummary(OS);
3187
}
3188
3189
if (ShowHotFuncList || TopNFunctions)
3190
showHotFunctionList(Reader->getProfiles(), Reader->getSummary(),
3191
TopNFunctions, OS);
3192
3193
return 0;
3194
}
3195
3196
static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3197
if (SFormat == ShowFormat::Json)
3198
exitWithError("JSON output is not supported for MemProf");
3199
auto ReaderOr = llvm::memprof::RawMemProfReader::create(
3200
Filename, ProfiledBinary, /*KeepNames=*/true);
3201
if (Error E = ReaderOr.takeError())
3202
// Since the error can be related to the profile or the binary we do not
3203
// pass whence. Instead additional context is provided where necessary in
3204
// the error message.
3205
exitWithError(std::move(E), /*Whence*/ "");
3206
3207
std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
3208
ReaderOr.get().release());
3209
3210
Reader->printYAML(OS);
3211
return 0;
3212
}
3213
3214
static int showDebugInfoCorrelation(const std::string &Filename,
3215
ShowFormat SFormat, raw_fd_ostream &OS) {
3216
if (SFormat == ShowFormat::Json)
3217
exitWithError("JSON output is not supported for debug info correlation");
3218
std::unique_ptr<InstrProfCorrelator> Correlator;
3219
if (auto Err =
3220
InstrProfCorrelator::get(Filename, InstrProfCorrelator::DEBUG_INFO)
3221
.moveInto(Correlator))
3222
exitWithError(std::move(Err), Filename);
3223
if (SFormat == ShowFormat::Yaml) {
3224
if (auto Err = Correlator->dumpYaml(MaxDbgCorrelationWarnings, OS))
3225
exitWithError(std::move(Err), Filename);
3226
return 0;
3227
}
3228
3229
if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
3230
exitWithError(std::move(Err), Filename);
3231
3232
InstrProfSymtab Symtab;
3233
if (auto Err = Symtab.create(
3234
StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
3235
exitWithError(std::move(Err), Filename);
3236
3237
if (ShowProfileSymbolList)
3238
Symtab.dumpNames(OS);
3239
// TODO: Read "Profile Data Type" from debug info to compute and show how many
3240
// counters the section holds.
3241
if (ShowDetailedSummary)
3242
OS << "Counters section size: 0x"
3243
<< Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n";
3244
OS << "Found " << Correlator->getDataSize() << " functions\n";
3245
3246
return 0;
3247
}
3248
3249
static int show_main(StringRef ProgName) {
3250
if (Filename.empty() && DebugInfoFilename.empty())
3251
exitWithError(
3252
"the positional argument '<profdata-file>' is required unless '--" +
3253
DebugInfoFilename.ArgStr + "' is provided");
3254
3255
if (Filename == OutputFilename) {
3256
errs() << ProgName
3257
<< " show: Input file name cannot be the same as the output file "
3258
"name!\n";
3259
return 1;
3260
}
3261
if (JsonFormat)
3262
SFormat = ShowFormat::Json;
3263
3264
std::error_code EC;
3265
raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3266
if (EC)
3267
exitWithErrorCode(EC, OutputFilename);
3268
3269
if (ShowAllFunctions && !FuncNameFilter.empty())
3270
WithColor::warning() << "-function argument ignored: showing all functions\n";
3271
3272
if (!DebugInfoFilename.empty())
3273
return showDebugInfoCorrelation(DebugInfoFilename, SFormat, OS);
3274
3275
if (ShowProfileKind == instr)
3276
return showInstrProfile(SFormat, OS);
3277
if (ShowProfileKind == sample)
3278
return showSampleProfile(SFormat, OS);
3279
return showMemProfProfile(SFormat, OS);
3280
}
3281
3282
static int order_main() {
3283
std::error_code EC;
3284
raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3285
if (EC)
3286
exitWithErrorCode(EC, OutputFilename);
3287
auto FS = vfs::getRealFileSystem();
3288
auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
3289
if (Error E = ReaderOrErr.takeError())
3290
exitWithError(std::move(E), Filename);
3291
3292
auto Reader = std::move(ReaderOrErr.get());
3293
for (auto &I : *Reader) {
3294
// Read all entries
3295
(void)I;
3296
}
3297
ArrayRef Traces = Reader->getTemporalProfTraces();
3298
if (NumTestTraces && NumTestTraces >= Traces.size())
3299
exitWithError(
3300
"--" + NumTestTraces.ArgStr +
3301
" must be smaller than the total number of traces: expected: < " +
3302
Twine(Traces.size()) + ", actual: " + Twine(NumTestTraces));
3303
ArrayRef TestTraces = Traces.take_back(NumTestTraces);
3304
Traces = Traces.drop_back(NumTestTraces);
3305
3306
std::vector<BPFunctionNode> Nodes;
3307
TemporalProfTraceTy::createBPFunctionNodes(Traces, Nodes);
3308
BalancedPartitioningConfig Config;
3309
BalancedPartitioning BP(Config);
3310
BP.run(Nodes);
3311
3312
OS << "# Ordered " << Nodes.size() << " functions\n";
3313
if (!TestTraces.empty()) {
3314
// Since we don't know the symbol sizes, we assume 32 functions per page.
3315
DenseMap<BPFunctionNode::IDT, unsigned> IdToPageNumber;
3316
for (auto &Node : Nodes)
3317
IdToPageNumber[Node.Id] = IdToPageNumber.size() / 32;
3318
3319
SmallSet<unsigned, 0> TouchedPages;
3320
unsigned Area = 0;
3321
for (auto &Trace : TestTraces) {
3322
for (auto Id : Trace.FunctionNameRefs) {
3323
auto It = IdToPageNumber.find(Id);
3324
if (It == IdToPageNumber.end())
3325
continue;
3326
TouchedPages.insert(It->getSecond());
3327
Area += TouchedPages.size();
3328
}
3329
TouchedPages.clear();
3330
}
3331
OS << "# Total area under the page fault curve: " << (float)Area << "\n";
3332
}
3333
OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3334
"linkage and this output does not take that into account. Some "
3335
"post-processing may be required before passing to the linker via "
3336
"-order_file.\n";
3337
for (auto &N : Nodes) {
3338
auto [Filename, ParsedFuncName] =
3339
getParsedIRPGOName(Reader->getSymtab().getFuncOrVarName(N.Id));
3340
if (!Filename.empty())
3341
OS << "# " << Filename << "\n";
3342
OS << ParsedFuncName << "\n";
3343
}
3344
return 0;
3345
}
3346
3347
int llvm_profdata_main(int argc, char **argvNonConst,
3348
const llvm::ToolContext &) {
3349
const char **argv = const_cast<const char **>(argvNonConst);
3350
3351
StringRef ProgName(sys::path::filename(argv[0]));
3352
3353
if (argc < 2) {
3354
errs() << ProgName
3355
<< ": No subcommand specified! Run llvm-profata --help for usage.\n";
3356
return 1;
3357
}
3358
3359
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data\n");
3360
3361
if (ShowSubcommand)
3362
return show_main(ProgName);
3363
3364
if (OrderSubcommand)
3365
return order_main();
3366
3367
if (OverlapSubcommand)
3368
return overlap_main();
3369
3370
if (MergeSubcommand)
3371
return merge_main(ProgName);
3372
3373
errs() << ProgName
3374
<< ": Unknown command. Run llvm-profdata --help for usage.\n";
3375
return 1;
3376
}
3377
3378