Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/ProfileData/IndexedMemProfData.cpp
213765 views
1
//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// MemProf data is serialized in writeMemProf provided in this file.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/ProfileData/DataAccessProf.h"
14
#include "llvm/ProfileData/InstrProf.h"
15
#include "llvm/ProfileData/InstrProfReader.h"
16
#include "llvm/ProfileData/MemProf.h"
17
#include "llvm/ProfileData/MemProfRadixTree.h"
18
#include "llvm/ProfileData/MemProfSummary.h"
19
#include "llvm/Support/FormatVariadic.h"
20
#include "llvm/Support/OnDiskHashTable.h"
21
22
namespace llvm {
23
24
// Serialize Schema.
25
static void writeMemProfSchema(ProfOStream &OS,
26
const memprof::MemProfSchema &Schema) {
27
OS.write(static_cast<uint64_t>(Schema.size()));
28
for (const auto Id : Schema)
29
OS.write(static_cast<uint64_t>(Id));
30
}
31
32
// Serialize MemProfRecordData. Return RecordTableOffset.
33
static uint64_t writeMemProfRecords(
34
ProfOStream &OS,
35
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
36
&MemProfRecordData,
37
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
38
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
39
*MemProfCallStackIndexes = nullptr) {
40
memprof::RecordWriterTrait RecordWriter(Schema, Version,
41
MemProfCallStackIndexes);
42
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
43
RecordTableGenerator;
44
for (auto &[GUID, Record] : MemProfRecordData) {
45
// Insert the key (func hash) and value (memprof record).
46
RecordTableGenerator.insert(GUID, Record, RecordWriter);
47
}
48
// Release the memory of this MapVector as it is no longer needed.
49
MemProfRecordData.clear();
50
51
// The call to Emit invokes RecordWriterTrait::EmitData which destructs
52
// the memprof record copies owned by the RecordTableGenerator. This works
53
// because the RecordTableGenerator is not used after this point.
54
return RecordTableGenerator.Emit(OS.OS, RecordWriter);
55
}
56
57
// Serialize MemProfFrameData. Return FrameTableOffset.
58
static uint64_t writeMemProfFrames(
59
ProfOStream &OS,
60
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
61
OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
62
FrameTableGenerator;
63
for (auto &[FrameId, Frame] : MemProfFrameData) {
64
// Insert the key (frame id) and value (frame contents).
65
FrameTableGenerator.insert(FrameId, Frame);
66
}
67
// Release the memory of this MapVector as it is no longer needed.
68
MemProfFrameData.clear();
69
70
return FrameTableGenerator.Emit(OS.OS);
71
}
72
73
// Serialize MemProfFrameData. Return the mapping from FrameIds to their
74
// indexes within the frame array.
75
static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
76
writeMemProfFrameArray(
77
ProfOStream &OS,
78
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
79
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
80
// Mappings from FrameIds to array indexes.
81
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
82
83
// Compute the order in which we serialize Frames. The order does not matter
84
// in terms of correctness, but we still compute it for deserialization
85
// performance. Specifically, if we serialize frequently used Frames one
86
// after another, we have better cache utilization. For two Frames that
87
// appear equally frequently, we break a tie by serializing the one that tends
88
// to appear earlier in call stacks. We implement the tie-breaking mechanism
89
// by computing the sum of indexes within call stacks for each Frame. If we
90
// still have a tie, then we just resort to compare two FrameIds, which is
91
// just for stability of output.
92
std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
93
FrameIdOrder.reserve(MemProfFrameData.size());
94
for (const auto &[Id, Frame] : MemProfFrameData)
95
FrameIdOrder.emplace_back(Id, &Frame);
96
assert(MemProfFrameData.size() == FrameIdOrder.size());
97
llvm::sort(FrameIdOrder,
98
[&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
99
const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
100
const auto &SL = FrameHistogram[L.first];
101
const auto &SR = FrameHistogram[R.first];
102
// Popular FrameIds should come first.
103
if (SL.Count != SR.Count)
104
return SL.Count > SR.Count;
105
// If they are equally popular, then the one that tends to appear
106
// earlier in call stacks should come first.
107
if (SL.PositionSum != SR.PositionSum)
108
return SL.PositionSum < SR.PositionSum;
109
// Compare their FrameIds for sort stability.
110
return L.first < R.first;
111
});
112
113
// Serialize all frames while creating mappings from linear IDs to FrameIds.
114
uint64_t Index = 0;
115
MemProfFrameIndexes.reserve(FrameIdOrder.size());
116
for (const auto &[Id, F] : FrameIdOrder) {
117
F->serialize(OS.OS);
118
MemProfFrameIndexes.insert({Id, Index});
119
++Index;
120
}
121
assert(MemProfFrameData.size() == Index);
122
assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
123
124
// Release the memory of this MapVector as it is no longer needed.
125
MemProfFrameData.clear();
126
127
return MemProfFrameIndexes;
128
}
129
130
static uint64_t writeMemProfCallStacks(
131
ProfOStream &OS,
132
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
133
&MemProfCallStackData) {
134
OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
135
CallStackTableGenerator;
136
for (auto &[CSId, CallStack] : MemProfCallStackData)
137
CallStackTableGenerator.insert(CSId, CallStack);
138
// Release the memory of this vector as it is no longer needed.
139
MemProfCallStackData.clear();
140
141
return CallStackTableGenerator.Emit(OS.OS);
142
}
143
144
static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
145
writeMemProfCallStackArray(
146
ProfOStream &OS,
147
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
148
&MemProfCallStackData,
149
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
150
&MemProfFrameIndexes,
151
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
152
unsigned &NumElements) {
153
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
154
MemProfCallStackIndexes;
155
156
memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
157
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
158
FrameHistogram);
159
for (auto I : Builder.getRadixArray())
160
OS.write32(I);
161
NumElements = Builder.getRadixArray().size();
162
MemProfCallStackIndexes = Builder.takeCallStackPos();
163
164
// Release the memory of this vector as it is no longer needed.
165
MemProfCallStackData.clear();
166
167
return MemProfCallStackIndexes;
168
}
169
170
// Write out MemProf Version2 as follows:
171
// uint64_t Version
172
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
173
// uint64_t FramePayloadOffset = Offset for the frame payload
174
// uint64_t FrameTableOffset = FrameTableGenerator.Emit
175
// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
176
// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
177
// uint64_t Num schema entries
178
// uint64_t Schema entry 0
179
// uint64_t Schema entry 1
180
// ....
181
// uint64_t Schema entry N - 1
182
// OnDiskChainedHashTable MemProfRecordData
183
// OnDiskChainedHashTable MemProfFrameData
184
// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
185
static Error writeMemProfV2(ProfOStream &OS,
186
memprof::IndexedMemProfData &MemProfData,
187
bool MemProfFullSchema) {
188
OS.write(memprof::Version2);
189
uint64_t HeaderUpdatePos = OS.tell();
190
OS.write(0ULL); // Reserve space for the memprof record table offset.
191
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
192
OS.write(0ULL); // Reserve space for the memprof frame table offset.
193
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
194
OS.write(0ULL); // Reserve space for the memprof call stack table offset.
195
196
auto Schema = memprof::getHotColdSchema();
197
if (MemProfFullSchema)
198
Schema = memprof::getFullSchema();
199
writeMemProfSchema(OS, Schema);
200
201
uint64_t RecordTableOffset =
202
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
203
204
uint64_t FramePayloadOffset = OS.tell();
205
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
206
207
uint64_t CallStackPayloadOffset = OS.tell();
208
uint64_t CallStackTableOffset =
209
writeMemProfCallStacks(OS, MemProfData.CallStacks);
210
211
uint64_t Header[] = {
212
RecordTableOffset, FramePayloadOffset, FrameTableOffset,
213
CallStackPayloadOffset, CallStackTableOffset,
214
};
215
OS.patch({{HeaderUpdatePos, Header}});
216
217
return Error::success();
218
}
219
220
static Error writeMemProfRadixTreeBased(
221
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
222
memprof::IndexedVersion Version, bool MemProfFullSchema,
223
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData =
224
nullptr,
225
std::unique_ptr<memprof::MemProfSummary> MemProfSum = nullptr) {
226
assert((Version == memprof::Version3 || Version == memprof::Version4) &&
227
"Unsupported version for radix tree format");
228
229
OS.write(Version); // Write the specific version (V3 or V4)
230
uint64_t HeaderUpdatePos = OS.tell();
231
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
232
OS.write(0ULL); // Reserve space for the memprof record payload offset.
233
OS.write(0ULL); // Reserve space for the memprof record table offset.
234
if (Version >= memprof::Version4) {
235
OS.write(0ULL); // Reserve space for the data access profile offset.
236
237
MemProfSum->write(OS);
238
}
239
240
auto Schema = memprof::getHotColdSchema();
241
if (MemProfFullSchema)
242
Schema = memprof::getFullSchema();
243
writeMemProfSchema(OS, Schema);
244
245
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
246
memprof::computeFrameHistogram(MemProfData.CallStacks);
247
assert(MemProfData.Frames.size() == FrameHistogram.size());
248
249
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
250
writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
251
252
uint64_t CallStackPayloadOffset = OS.tell();
253
// The number of elements in the call stack array.
254
unsigned NumElements = 0;
255
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
256
MemProfCallStackIndexes =
257
writeMemProfCallStackArray(OS, MemProfData.CallStacks,
258
MemProfFrameIndexes, FrameHistogram,
259
NumElements);
260
261
uint64_t RecordPayloadOffset = OS.tell();
262
uint64_t RecordTableOffset = writeMemProfRecords(
263
OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
264
265
uint64_t DataAccessProfOffset = 0;
266
if (DataAccessProfileData != nullptr) {
267
assert(Version >= memprof::Version4 &&
268
"Data access profiles are added starting from v4");
269
DataAccessProfOffset = OS.tell();
270
if (Error E = DataAccessProfileData->serialize(OS))
271
return E;
272
}
273
274
// Verify that the computation for the number of elements in the call stack
275
// array works.
276
assert(CallStackPayloadOffset +
277
NumElements * sizeof(memprof::LinearFrameId) ==
278
RecordPayloadOffset);
279
280
SmallVector<uint64_t, 4> Header = {
281
CallStackPayloadOffset,
282
RecordPayloadOffset,
283
RecordTableOffset,
284
};
285
if (Version >= memprof::Version4)
286
Header.push_back(DataAccessProfOffset);
287
288
OS.patch({{HeaderUpdatePos, Header}});
289
290
return Error::success();
291
}
292
293
// Write out MemProf Version3
294
static Error writeMemProfV3(ProfOStream &OS,
295
memprof::IndexedMemProfData &MemProfData,
296
bool MemProfFullSchema) {
297
return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
298
MemProfFullSchema);
299
}
300
301
// Write out MemProf Version4
302
static Error writeMemProfV4(
303
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
304
bool MemProfFullSchema,
305
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,
306
std::unique_ptr<memprof::MemProfSummary> MemProfSum) {
307
return writeMemProfRadixTreeBased(
308
OS, MemProfData, memprof::Version4, MemProfFullSchema,
309
std::move(DataAccessProfileData), std::move(MemProfSum));
310
}
311
312
// Write out the MemProf data in a requested version.
313
Error writeMemProf(
314
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
315
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
316
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,
317
std::unique_ptr<memprof::MemProfSummary> MemProfSum) {
318
switch (MemProfVersionRequested) {
319
case memprof::Version2:
320
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
321
case memprof::Version3:
322
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
323
case memprof::Version4:
324
return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
325
std::move(DataAccessProfileData),
326
std::move(MemProfSum));
327
}
328
329
return make_error<InstrProfError>(
330
instrprof_error::unsupported_version,
331
formatv("MemProf version {} not supported; "
332
"requires version between {} and {}, inclusive",
333
MemProfVersionRequested, memprof::MinimumSupportedVersion,
334
memprof::MaximumSupportedVersion));
335
}
336
337
Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
338
const unsigned char *Ptr) {
339
// The value returned from RecordTableGenerator.Emit.
340
const uint64_t RecordTableOffset =
341
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
342
// The offset in the stream right before invoking
343
// FrameTableGenerator.Emit.
344
const uint64_t FramePayloadOffset =
345
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
346
// The value returned from FrameTableGenerator.Emit.
347
const uint64_t FrameTableOffset =
348
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
349
350
// The offset in the stream right before invoking
351
// CallStackTableGenerator.Emit.
352
uint64_t CallStackPayloadOffset = 0;
353
// The value returned from CallStackTableGenerator.Emit.
354
uint64_t CallStackTableOffset = 0;
355
if (Version >= memprof::Version2) {
356
CallStackPayloadOffset =
357
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
358
CallStackTableOffset =
359
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
360
}
361
362
// Read the schema.
363
auto SchemaOr = memprof::readMemProfSchema(Ptr);
364
if (!SchemaOr)
365
return SchemaOr.takeError();
366
Schema = SchemaOr.get();
367
368
// Now initialize the table reader with a pointer into data buffer.
369
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
370
/*Buckets=*/Start + RecordTableOffset,
371
/*Payload=*/Ptr,
372
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
373
374
// Initialize the frame table reader with the payload and bucket offsets.
375
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
376
/*Buckets=*/Start + FrameTableOffset,
377
/*Payload=*/Start + FramePayloadOffset,
378
/*Base=*/Start));
379
380
if (Version >= memprof::Version2)
381
MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
382
/*Buckets=*/Start + CallStackTableOffset,
383
/*Payload=*/Start + CallStackPayloadOffset,
384
/*Base=*/Start));
385
386
return Error::success();
387
}
388
389
Error IndexedMemProfReader::deserializeRadixTreeBased(
390
const unsigned char *Start, const unsigned char *Ptr,
391
memprof::IndexedVersion Version) {
392
assert((Version == memprof::Version3 || Version == memprof::Version4) &&
393
"Unsupported version for radix tree format");
394
// The offset in the stream right before invoking
395
// CallStackTableGenerator.Emit.
396
const uint64_t CallStackPayloadOffset =
397
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
398
// The offset in the stream right before invoking RecordTableGenerator.Emit.
399
const uint64_t RecordPayloadOffset =
400
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
401
// The value returned from RecordTableGenerator.Emit.
402
const uint64_t RecordTableOffset =
403
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
404
405
uint64_t DataAccessProfOffset = 0;
406
if (Version >= memprof::Version4) {
407
DataAccessProfOffset =
408
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
409
MemProfSum = memprof::MemProfSummary::deserialize(Ptr);
410
}
411
412
// Read the schema.
413
auto SchemaOr = memprof::readMemProfSchema(Ptr);
414
if (!SchemaOr)
415
return SchemaOr.takeError();
416
Schema = SchemaOr.get();
417
418
FrameBase = Ptr;
419
CallStackBase = Start + CallStackPayloadOffset;
420
421
// Compute the number of elements in the radix tree array. Since we use this
422
// to reserve enough bits in a BitVector, it's totally OK if we overestimate
423
// this number a little bit because of padding just before the next section.
424
RadixTreeSize = (RecordPayloadOffset - CallStackPayloadOffset) /
425
sizeof(memprof::LinearFrameId);
426
427
// Now initialize the table reader with a pointer into data buffer.
428
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
429
/*Buckets=*/Start + RecordTableOffset,
430
/*Payload=*/Start + RecordPayloadOffset,
431
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
432
433
assert((!DataAccessProfOffset || DataAccessProfOffset > RecordTableOffset) &&
434
"Data access profile is either empty or after the record table");
435
if (DataAccessProfOffset > RecordTableOffset) {
436
DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>();
437
const unsigned char *DAPPtr = Start + DataAccessProfOffset;
438
if (Error E = DataAccessProfileData->deserialize(DAPPtr))
439
return E;
440
}
441
442
return Error::success();
443
}
444
445
Error IndexedMemProfReader::deserialize(const unsigned char *Start,
446
uint64_t MemProfOffset) {
447
const unsigned char *Ptr = Start + MemProfOffset;
448
449
// Read the MemProf version number.
450
const uint64_t FirstWord =
451
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
452
453
// Check if the version is supported
454
if (FirstWord >= memprof::MinimumSupportedVersion &&
455
FirstWord <= memprof::MaximumSupportedVersion) {
456
// Everything is good. We can proceed to deserialize the rest.
457
Version = static_cast<memprof::IndexedVersion>(FirstWord);
458
} else {
459
return make_error<InstrProfError>(
460
instrprof_error::unsupported_version,
461
formatv("MemProf version {} not supported; "
462
"requires version between {} and {}, inclusive",
463
FirstWord, memprof::MinimumSupportedVersion,
464
memprof::MaximumSupportedVersion));
465
}
466
467
switch (Version) {
468
case memprof::Version2:
469
if (Error E = deserializeV2(Start, Ptr))
470
return E;
471
break;
472
case memprof::Version3:
473
case memprof::Version4:
474
// V3 and V4 share the same high-level structure (radix tree, linear IDs).
475
if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))
476
return E;
477
break;
478
}
479
480
return Error::success();
481
}
482
} // namespace llvm
483
484