Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/ProfileData/MemProf.cpp
35234 views
1
#include "llvm/ProfileData/MemProf.h"
2
#include "llvm/ADT/SmallVector.h"
3
#include "llvm/IR/Function.h"
4
#include "llvm/ProfileData/InstrProf.h"
5
#include "llvm/ProfileData/SampleProf.h"
6
#include "llvm/Support/BLAKE3.h"
7
#include "llvm/Support/Endian.h"
8
#include "llvm/Support/EndianStream.h"
9
#include "llvm/Support/HashBuilder.h"
10
11
namespace llvm {
12
namespace memprof {
13
MemProfSchema getFullSchema() {
14
MemProfSchema List;
15
#define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name);
16
#include "llvm/ProfileData/MIBEntryDef.inc"
17
#undef MIBEntryDef
18
return List;
19
}
20
21
MemProfSchema getHotColdSchema() {
22
return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime,
23
Meta::TotalLifetimeAccessDensity};
24
}
25
26
static size_t serializedSizeV0(const IndexedAllocationInfo &IAI,
27
const MemProfSchema &Schema) {
28
size_t Size = 0;
29
// The number of frames to serialize.
30
Size += sizeof(uint64_t);
31
// The callstack frame ids.
32
Size += sizeof(FrameId) * IAI.CallStack.size();
33
// The size of the payload.
34
Size += PortableMemInfoBlock::serializedSize(Schema);
35
return Size;
36
}
37
38
static size_t serializedSizeV2(const IndexedAllocationInfo &IAI,
39
const MemProfSchema &Schema) {
40
size_t Size = 0;
41
// The CallStackId
42
Size += sizeof(CallStackId);
43
// The size of the payload.
44
Size += PortableMemInfoBlock::serializedSize(Schema);
45
return Size;
46
}
47
48
static size_t serializedSizeV3(const IndexedAllocationInfo &IAI,
49
const MemProfSchema &Schema) {
50
size_t Size = 0;
51
// The linear call stack ID.
52
Size += sizeof(LinearCallStackId);
53
// The size of the payload.
54
Size += PortableMemInfoBlock::serializedSize(Schema);
55
return Size;
56
}
57
58
size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
59
IndexedVersion Version) const {
60
switch (Version) {
61
case Version0:
62
case Version1:
63
return serializedSizeV0(*this, Schema);
64
case Version2:
65
return serializedSizeV2(*this, Schema);
66
case Version3:
67
return serializedSizeV3(*this, Schema);
68
}
69
llvm_unreachable("unsupported MemProf version");
70
}
71
72
static size_t serializedSizeV0(const IndexedMemProfRecord &Record,
73
const MemProfSchema &Schema) {
74
// The number of alloc sites to serialize.
75
size_t Result = sizeof(uint64_t);
76
for (const IndexedAllocationInfo &N : Record.AllocSites)
77
Result += N.serializedSize(Schema, Version0);
78
79
// The number of callsites we have information for.
80
Result += sizeof(uint64_t);
81
for (const auto &Frames : Record.CallSites) {
82
// The number of frame ids to serialize.
83
Result += sizeof(uint64_t);
84
Result += Frames.size() * sizeof(FrameId);
85
}
86
return Result;
87
}
88
89
static size_t serializedSizeV2(const IndexedMemProfRecord &Record,
90
const MemProfSchema &Schema) {
91
// The number of alloc sites to serialize.
92
size_t Result = sizeof(uint64_t);
93
for (const IndexedAllocationInfo &N : Record.AllocSites)
94
Result += N.serializedSize(Schema, Version2);
95
96
// The number of callsites we have information for.
97
Result += sizeof(uint64_t);
98
// The CallStackId
99
Result += Record.CallSiteIds.size() * sizeof(CallStackId);
100
return Result;
101
}
102
103
static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
104
const MemProfSchema &Schema) {
105
// The number of alloc sites to serialize.
106
size_t Result = sizeof(uint64_t);
107
for (const IndexedAllocationInfo &N : Record.AllocSites)
108
Result += N.serializedSize(Schema, Version3);
109
110
// The number of callsites we have information for.
111
Result += sizeof(uint64_t);
112
// The linear call stack ID.
113
Result += Record.CallSiteIds.size() * sizeof(LinearCallStackId);
114
return Result;
115
}
116
117
size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
118
IndexedVersion Version) const {
119
switch (Version) {
120
case Version0:
121
case Version1:
122
return serializedSizeV0(*this, Schema);
123
case Version2:
124
return serializedSizeV2(*this, Schema);
125
case Version3:
126
return serializedSizeV3(*this, Schema);
127
}
128
llvm_unreachable("unsupported MemProf version");
129
}
130
131
static void serializeV0(const IndexedMemProfRecord &Record,
132
const MemProfSchema &Schema, raw_ostream &OS) {
133
using namespace support;
134
135
endian::Writer LE(OS, llvm::endianness::little);
136
137
LE.write<uint64_t>(Record.AllocSites.size());
138
for (const IndexedAllocationInfo &N : Record.AllocSites) {
139
LE.write<uint64_t>(N.CallStack.size());
140
for (const FrameId &Id : N.CallStack)
141
LE.write<FrameId>(Id);
142
N.Info.serialize(Schema, OS);
143
}
144
145
// Related contexts.
146
LE.write<uint64_t>(Record.CallSites.size());
147
for (const auto &Frames : Record.CallSites) {
148
LE.write<uint64_t>(Frames.size());
149
for (const FrameId &Id : Frames)
150
LE.write<FrameId>(Id);
151
}
152
}
153
154
static void serializeV2(const IndexedMemProfRecord &Record,
155
const MemProfSchema &Schema, raw_ostream &OS) {
156
using namespace support;
157
158
endian::Writer LE(OS, llvm::endianness::little);
159
160
LE.write<uint64_t>(Record.AllocSites.size());
161
for (const IndexedAllocationInfo &N : Record.AllocSites) {
162
LE.write<CallStackId>(N.CSId);
163
N.Info.serialize(Schema, OS);
164
}
165
166
// Related contexts.
167
LE.write<uint64_t>(Record.CallSiteIds.size());
168
for (const auto &CSId : Record.CallSiteIds)
169
LE.write<CallStackId>(CSId);
170
}
171
172
static void serializeV3(
173
const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
174
raw_ostream &OS,
175
llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) {
176
using namespace support;
177
178
endian::Writer LE(OS, llvm::endianness::little);
179
180
LE.write<uint64_t>(Record.AllocSites.size());
181
for (const IndexedAllocationInfo &N : Record.AllocSites) {
182
assert(MemProfCallStackIndexes.contains(N.CSId));
183
LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]);
184
N.Info.serialize(Schema, OS);
185
}
186
187
// Related contexts.
188
LE.write<uint64_t>(Record.CallSiteIds.size());
189
for (const auto &CSId : Record.CallSiteIds) {
190
assert(MemProfCallStackIndexes.contains(CSId));
191
LE.write<LinearCallStackId>(MemProfCallStackIndexes[CSId]);
192
}
193
}
194
195
void IndexedMemProfRecord::serialize(
196
const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version,
197
llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
198
const {
199
switch (Version) {
200
case Version0:
201
case Version1:
202
serializeV0(*this, Schema, OS);
203
return;
204
case Version2:
205
serializeV2(*this, Schema, OS);
206
return;
207
case Version3:
208
serializeV3(*this, Schema, OS, *MemProfCallStackIndexes);
209
return;
210
}
211
llvm_unreachable("unsupported MemProf version");
212
}
213
214
static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema,
215
const unsigned char *Ptr) {
216
using namespace support;
217
218
IndexedMemProfRecord Record;
219
220
// Read the meminfo nodes.
221
const uint64_t NumNodes =
222
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
223
for (uint64_t I = 0; I < NumNodes; I++) {
224
IndexedAllocationInfo Node;
225
const uint64_t NumFrames =
226
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
227
for (uint64_t J = 0; J < NumFrames; J++) {
228
const FrameId Id =
229
endian::readNext<FrameId, llvm::endianness::little>(Ptr);
230
Node.CallStack.push_back(Id);
231
}
232
Node.CSId = hashCallStack(Node.CallStack);
233
Node.Info.deserialize(Schema, Ptr);
234
Ptr += PortableMemInfoBlock::serializedSize(Schema);
235
Record.AllocSites.push_back(Node);
236
}
237
238
// Read the callsite information.
239
const uint64_t NumCtxs =
240
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
241
for (uint64_t J = 0; J < NumCtxs; J++) {
242
const uint64_t NumFrames =
243
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
244
llvm::SmallVector<FrameId> Frames;
245
Frames.reserve(NumFrames);
246
for (uint64_t K = 0; K < NumFrames; K++) {
247
const FrameId Id =
248
endian::readNext<FrameId, llvm::endianness::little>(Ptr);
249
Frames.push_back(Id);
250
}
251
Record.CallSites.push_back(Frames);
252
Record.CallSiteIds.push_back(hashCallStack(Frames));
253
}
254
255
return Record;
256
}
257
258
static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema,
259
const unsigned char *Ptr) {
260
using namespace support;
261
262
IndexedMemProfRecord Record;
263
264
// Read the meminfo nodes.
265
const uint64_t NumNodes =
266
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
267
Record.AllocSites.reserve(NumNodes);
268
for (uint64_t I = 0; I < NumNodes; I++) {
269
IndexedAllocationInfo Node;
270
Node.CSId = endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
271
Node.Info.deserialize(Schema, Ptr);
272
Ptr += PortableMemInfoBlock::serializedSize(Schema);
273
Record.AllocSites.push_back(Node);
274
}
275
276
// Read the callsite information.
277
const uint64_t NumCtxs =
278
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
279
Record.CallSiteIds.reserve(NumCtxs);
280
for (uint64_t J = 0; J < NumCtxs; J++) {
281
CallStackId CSId =
282
endian::readNext<CallStackId, llvm::endianness::little>(Ptr);
283
Record.CallSiteIds.push_back(CSId);
284
}
285
286
return Record;
287
}
288
289
static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
290
const unsigned char *Ptr) {
291
using namespace support;
292
293
IndexedMemProfRecord Record;
294
295
// Read the meminfo nodes.
296
const uint64_t NumNodes =
297
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
298
Record.AllocSites.reserve(NumNodes);
299
for (uint64_t I = 0; I < NumNodes; I++) {
300
IndexedAllocationInfo Node;
301
Node.CSId =
302
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
303
Node.Info.deserialize(Schema, Ptr);
304
Ptr += PortableMemInfoBlock::serializedSize(Schema);
305
Record.AllocSites.push_back(Node);
306
}
307
308
// Read the callsite information.
309
const uint64_t NumCtxs =
310
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
311
Record.CallSiteIds.reserve(NumCtxs);
312
for (uint64_t J = 0; J < NumCtxs; J++) {
313
// We are storing LinearCallStackId in CallSiteIds, which is a vector of
314
// CallStackId. Assert that CallStackId is no smaller than
315
// LinearCallStackId.
316
static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
317
LinearCallStackId CSId =
318
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
319
Record.CallSiteIds.push_back(CSId);
320
}
321
322
return Record;
323
}
324
325
IndexedMemProfRecord
326
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
327
const unsigned char *Ptr,
328
IndexedVersion Version) {
329
switch (Version) {
330
case Version0:
331
case Version1:
332
return deserializeV0(Schema, Ptr);
333
case Version2:
334
return deserializeV2(Schema, Ptr);
335
case Version3:
336
return deserializeV3(Schema, Ptr);
337
}
338
llvm_unreachable("unsupported MemProf version");
339
}
340
341
MemProfRecord IndexedMemProfRecord::toMemProfRecord(
342
llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const {
343
MemProfRecord Record;
344
345
Record.AllocSites.reserve(AllocSites.size());
346
for (const IndexedAllocationInfo &IndexedAI : AllocSites) {
347
AllocationInfo AI;
348
AI.Info = IndexedAI.Info;
349
AI.CallStack = Callback(IndexedAI.CSId);
350
Record.AllocSites.push_back(std::move(AI));
351
}
352
353
Record.CallSites.reserve(CallSiteIds.size());
354
for (CallStackId CSId : CallSiteIds)
355
Record.CallSites.push_back(Callback(CSId));
356
357
return Record;
358
}
359
360
GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
361
// Canonicalize the function name to drop suffixes such as ".llvm.". Note
362
// we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop
363
// those by default. This is by design to differentiate internal linkage
364
// functions during matching. By dropping the other suffixes we can then match
365
// functions in the profile use phase prior to their addition. Note that this
366
// applies to both instrumented and sampled function names.
367
StringRef CanonicalName =
368
sampleprof::FunctionSamples::getCanonicalFnName(FunctionName);
369
370
// We use the function guid which we expect to be a uint64_t. At
371
// this time, it is the lower 64 bits of the md5 of the canonical
372
// function name.
373
return Function::getGUID(CanonicalName);
374
}
375
376
Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
377
using namespace support;
378
379
const unsigned char *Ptr = Buffer;
380
const uint64_t NumSchemaIds =
381
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
382
if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) {
383
return make_error<InstrProfError>(instrprof_error::malformed,
384
"memprof schema invalid");
385
}
386
387
MemProfSchema Result;
388
for (size_t I = 0; I < NumSchemaIds; I++) {
389
const uint64_t Tag =
390
endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
391
if (Tag >= static_cast<uint64_t>(Meta::Size)) {
392
return make_error<InstrProfError>(instrprof_error::malformed,
393
"memprof schema invalid");
394
}
395
Result.push_back(static_cast<Meta>(Tag));
396
}
397
// Advance the buffer to one past the schema if we succeeded.
398
Buffer = Ptr;
399
return Result;
400
}
401
402
CallStackId hashCallStack(ArrayRef<FrameId> CS) {
403
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
404
HashBuilder;
405
for (FrameId F : CS)
406
HashBuilder.add(F);
407
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
408
CallStackId CSId;
409
std::memcpy(&CSId, Hash.data(), sizeof(Hash));
410
return CSId;
411
}
412
413
// Encode a call stack into RadixArray. Return the starting index within
414
// RadixArray. For each call stack we encode, we emit two or three components
415
// into RadixArray. If a given call stack doesn't have a common prefix relative
416
// to the previous one, we emit:
417
//
418
// - the frames in the given call stack in the root-to-leaf order
419
//
420
// - the length of the given call stack
421
//
422
// If a given call stack has a non-empty common prefix relative to the previous
423
// one, we emit:
424
//
425
// - the relative location of the common prefix, encoded as a negative number.
426
//
427
// - a portion of the given call stack that's beyond the common prefix
428
//
429
// - the length of the given call stack, including the length of the common
430
// prefix.
431
//
432
// The resulting RadixArray requires a somewhat unintuitive backward traversal
433
// to reconstruct a call stack -- read the call stack length and scan backward
434
// while collecting frames in the leaf to root order. build, the caller of this
435
// function, reverses RadixArray in place so that we can reconstruct a call
436
// stack as if we were deserializing an array in a typical way -- the call stack
437
// length followed by the frames in the leaf-to-root order except that we need
438
// to handle pointers to parents along the way.
439
//
440
// To quickly determine the location of the common prefix within RadixArray,
441
// Indexes caches the indexes of the previous call stack's frames within
442
// RadixArray.
443
LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
444
const llvm::SmallVector<FrameId> *CallStack,
445
const llvm::SmallVector<FrameId> *Prev,
446
const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) {
447
// Compute the length of the common root prefix between Prev and CallStack.
448
uint32_t CommonLen = 0;
449
if (Prev) {
450
auto Pos = std::mismatch(Prev->rbegin(), Prev->rend(), CallStack->rbegin(),
451
CallStack->rend());
452
CommonLen = std::distance(CallStack->rbegin(), Pos.second);
453
}
454
455
// Drop the portion beyond CommonLen.
456
assert(CommonLen <= Indexes.size());
457
Indexes.resize(CommonLen);
458
459
// Append a pointer to the parent.
460
if (CommonLen) {
461
uint32_t CurrentIndex = RadixArray.size();
462
uint32_t ParentIndex = Indexes.back();
463
// The offset to the parent must be negative because we are pointing to an
464
// element we've already added to RadixArray.
465
assert(ParentIndex < CurrentIndex);
466
RadixArray.push_back(ParentIndex - CurrentIndex);
467
}
468
469
// Copy the part of the call stack beyond the common prefix to RadixArray.
470
assert(CommonLen <= CallStack->size());
471
for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
472
// Remember the index of F in RadixArray.
473
Indexes.push_back(RadixArray.size());
474
RadixArray.push_back(MemProfFrameIndexes.find(F)->second);
475
}
476
assert(CallStack->size() == Indexes.size());
477
478
// End with the call stack length.
479
RadixArray.push_back(CallStack->size());
480
481
// Return the index within RadixArray where we can start reconstructing a
482
// given call stack from.
483
return RadixArray.size() - 1;
484
}
485
486
void CallStackRadixTreeBuilder::build(
487
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
488
&&MemProfCallStackData,
489
const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
490
llvm::DenseMap<FrameId, FrameStat> &FrameHistogram) {
491
// Take the vector portion of MemProfCallStackData. The vector is exactly
492
// what we need to sort. Also, we no longer need its lookup capability.
493
llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector();
494
495
// Return early if we have no work to do.
496
if (CallStacks.empty()) {
497
RadixArray.clear();
498
CallStackPos.clear();
499
return;
500
}
501
502
// Sorting the list of call stacks in the dictionary order is sufficient to
503
// maximize the length of the common prefix between two adjacent call stacks
504
// and thus minimize the length of RadixArray. However, we go one step
505
// further and try to reduce the number of times we follow pointers to parents
506
// during deserilization. Consider a poorly encoded radix tree:
507
//
508
// CallStackId 1: f1 -> f2 -> f3
509
// |
510
// CallStackId 2: +--- f4 -> f5
511
// |
512
// CallStackId 3: +--> f6
513
//
514
// Here, f2 and f4 appear once and twice, respectively, in the call stacks.
515
// Once we encode CallStackId 1 into RadixArray, every other call stack with
516
// common prefix f1 ends up pointing to CallStackId 1. Since CallStackId 3
517
// share "f1 f4" with CallStackId 2, CallStackId 3 needs to follow pointers to
518
// parents twice.
519
//
520
// We try to alleviate the situation by sorting the list of call stacks by
521
// comparing the popularity of frames rather than the integer values of
522
// FrameIds. In the example above, f4 is more popular than f2, so we sort the
523
// call stacks and encode them as:
524
//
525
// CallStackId 2: f1 -- f4 -> f5
526
// | |
527
// CallStackId 3: | +--> f6
528
// |
529
// CallStackId 1: +--> f2 -> f3
530
//
531
// Notice that CallStackId 3 follows a pointer to a parent only once.
532
//
533
// All this is a quick-n-dirty trick to reduce the number of jumps. The
534
// proper way would be to compute the weight of each radix tree node -- how
535
// many call stacks use a given radix tree node, and encode a radix tree from
536
// the heaviest node first. We do not do so because that's a lot of work.
537
llvm::sort(CallStacks, [&](const CSIdPair &L, const CSIdPair &R) {
538
// Call stacks are stored from leaf to root. Perform comparisons from the
539
// root.
540
return std::lexicographical_compare(
541
L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(),
542
[&](FrameId F1, FrameId F2) {
543
uint64_t H1 = FrameHistogram[F1].Count;
544
uint64_t H2 = FrameHistogram[F2].Count;
545
// Popular frames should come later because we encode call stacks from
546
// the last one in the list.
547
if (H1 != H2)
548
return H1 < H2;
549
// For sort stability.
550
return F1 < F2;
551
});
552
});
553
554
// Reserve some reasonable amount of storage.
555
RadixArray.clear();
556
RadixArray.reserve(CallStacks.size() * 8);
557
558
// Indexes will grow as long as the longest call stack.
559
Indexes.clear();
560
Indexes.reserve(512);
561
562
// CallStackPos will grow to exactly CallStacks.size() entries.
563
CallStackPos.clear();
564
CallStackPos.reserve(CallStacks.size());
565
566
// Compute the radix array. We encode one call stack at a time, computing the
567
// longest prefix that's shared with the previous call stack we encode. For
568
// each call stack we encode, we remember a mapping from CallStackId to its
569
// position within RadixArray.
570
//
571
// As an optimization, we encode from the last call stack in CallStacks to
572
// reduce the number of times we follow pointers to the parents. Consider the
573
// list of call stacks that has been sorted in the dictionary order:
574
//
575
// Call Stack 1: F1
576
// Call Stack 2: F1 -> F2
577
// Call Stack 3: F1 -> F2 -> F3
578
//
579
// If we traversed CallStacks in the forward order, we would end up with a
580
// radix tree like:
581
//
582
// Call Stack 1: F1
583
// |
584
// Call Stack 2: +---> F2
585
// |
586
// Call Stack 3: +---> F3
587
//
588
// Notice that each call stack jumps to the previous one. However, if we
589
// traverse CallStacks in the reverse order, then Call Stack 3 has the
590
// complete call stack encoded without any pointers. Call Stack 1 and 2 point
591
// to appropriate prefixes of Call Stack 3.
592
const llvm::SmallVector<FrameId> *Prev = nullptr;
593
for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) {
594
LinearCallStackId Pos =
595
encodeCallStack(&CallStack, Prev, MemProfFrameIndexes);
596
CallStackPos.insert({CSId, Pos});
597
Prev = &CallStack;
598
}
599
600
// "RadixArray.size() - 1" below is problematic if RadixArray is empty.
601
assert(!RadixArray.empty());
602
603
// Reverse the radix array in place. We do so mostly for intuitive
604
// deserialization where we would read the length field and then the call
605
// stack frames proper just like any other array deserialization, except
606
// that we have occasional jumps to take advantage of prefixes.
607
for (size_t I = 0, J = RadixArray.size() - 1; I < J; ++I, --J)
608
std::swap(RadixArray[I], RadixArray[J]);
609
610
// "Reverse" the indexes stored in CallStackPos.
611
for (auto &[K, V] : CallStackPos)
612
V = RadixArray.size() - 1 - V;
613
}
614
615
llvm::DenseMap<FrameId, FrameStat>
616
computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
617
&MemProfCallStackData) {
618
llvm::DenseMap<FrameId, FrameStat> Histogram;
619
620
for (const auto &KV : MemProfCallStackData) {
621
const auto &CS = KV.second;
622
for (unsigned I = 0, E = CS.size(); I != E; ++I) {
623
auto &S = Histogram[CS[I]];
624
++S.Count;
625
S.PositionSum += I;
626
}
627
}
628
return Histogram;
629
}
630
631
void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) {
632
for (const auto &AS : Record.AllocSites) {
633
assert(AS.CSId == hashCallStack(AS.CallStack));
634
(void)AS;
635
}
636
}
637
638
void verifyFunctionProfileData(
639
const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>
640
&FunctionProfileData) {
641
for (const auto &[GUID, Record] : FunctionProfileData) {
642
(void)GUID;
643
verifyIndexedMemProfRecord(Record);
644
}
645
}
646
647
} // namespace memprof
648
} // namespace llvm
649
650