Path: blob/main/contrib/llvm-project/llvm/lib/ProfileData/IndexedMemProfData.cpp
213765 views
//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// MemProf data is serialized in writeMemProf provided in this file.9//10//===----------------------------------------------------------------------===//1112#include "llvm/ProfileData/DataAccessProf.h"13#include "llvm/ProfileData/InstrProf.h"14#include "llvm/ProfileData/InstrProfReader.h"15#include "llvm/ProfileData/MemProf.h"16#include "llvm/ProfileData/MemProfRadixTree.h"17#include "llvm/ProfileData/MemProfSummary.h"18#include "llvm/Support/FormatVariadic.h"19#include "llvm/Support/OnDiskHashTable.h"2021namespace llvm {2223// Serialize Schema.24static void writeMemProfSchema(ProfOStream &OS,25const memprof::MemProfSchema &Schema) {26OS.write(static_cast<uint64_t>(Schema.size()));27for (const auto Id : Schema)28OS.write(static_cast<uint64_t>(Id));29}3031// Serialize MemProfRecordData. Return RecordTableOffset.32static uint64_t writeMemProfRecords(33ProfOStream &OS,34llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>35&MemProfRecordData,36memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,37llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>38*MemProfCallStackIndexes = nullptr) {39memprof::RecordWriterTrait RecordWriter(Schema, Version,40MemProfCallStackIndexes);41OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>42RecordTableGenerator;43for (auto &[GUID, Record] : MemProfRecordData) {44// Insert the key (func hash) and value (memprof record).45RecordTableGenerator.insert(GUID, Record, RecordWriter);46}47// Release the memory of this MapVector as it is no longer needed.48MemProfRecordData.clear();4950// The call to Emit invokes RecordWriterTrait::EmitData which destructs51// the memprof record copies owned by the RecordTableGenerator. This works52// because the RecordTableGenerator is not used after this point.53return RecordTableGenerator.Emit(OS.OS, RecordWriter);54}5556// Serialize MemProfFrameData. Return FrameTableOffset.57static uint64_t writeMemProfFrames(58ProfOStream &OS,59llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {60OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>61FrameTableGenerator;62for (auto &[FrameId, Frame] : MemProfFrameData) {63// Insert the key (frame id) and value (frame contents).64FrameTableGenerator.insert(FrameId, Frame);65}66// Release the memory of this MapVector as it is no longer needed.67MemProfFrameData.clear();6869return FrameTableGenerator.Emit(OS.OS);70}7172// Serialize MemProfFrameData. Return the mapping from FrameIds to their73// indexes within the frame array.74static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>75writeMemProfFrameArray(76ProfOStream &OS,77llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,78llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {79// Mappings from FrameIds to array indexes.80llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;8182// Compute the order in which we serialize Frames. The order does not matter83// in terms of correctness, but we still compute it for deserialization84// performance. Specifically, if we serialize frequently used Frames one85// after another, we have better cache utilization. For two Frames that86// appear equally frequently, we break a tie by serializing the one that tends87// to appear earlier in call stacks. We implement the tie-breaking mechanism88// by computing the sum of indexes within call stacks for each Frame. If we89// still have a tie, then we just resort to compare two FrameIds, which is90// just for stability of output.91std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;92FrameIdOrder.reserve(MemProfFrameData.size());93for (const auto &[Id, Frame] : MemProfFrameData)94FrameIdOrder.emplace_back(Id, &Frame);95assert(MemProfFrameData.size() == FrameIdOrder.size());96llvm::sort(FrameIdOrder,97[&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,98const std::pair<memprof::FrameId, const memprof::Frame *> &R) {99const auto &SL = FrameHistogram[L.first];100const auto &SR = FrameHistogram[R.first];101// Popular FrameIds should come first.102if (SL.Count != SR.Count)103return SL.Count > SR.Count;104// If they are equally popular, then the one that tends to appear105// earlier in call stacks should come first.106if (SL.PositionSum != SR.PositionSum)107return SL.PositionSum < SR.PositionSum;108// Compare their FrameIds for sort stability.109return L.first < R.first;110});111112// Serialize all frames while creating mappings from linear IDs to FrameIds.113uint64_t Index = 0;114MemProfFrameIndexes.reserve(FrameIdOrder.size());115for (const auto &[Id, F] : FrameIdOrder) {116F->serialize(OS.OS);117MemProfFrameIndexes.insert({Id, Index});118++Index;119}120assert(MemProfFrameData.size() == Index);121assert(MemProfFrameData.size() == MemProfFrameIndexes.size());122123// Release the memory of this MapVector as it is no longer needed.124MemProfFrameData.clear();125126return MemProfFrameIndexes;127}128129static uint64_t writeMemProfCallStacks(130ProfOStream &OS,131llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>132&MemProfCallStackData) {133OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>134CallStackTableGenerator;135for (auto &[CSId, CallStack] : MemProfCallStackData)136CallStackTableGenerator.insert(CSId, CallStack);137// Release the memory of this vector as it is no longer needed.138MemProfCallStackData.clear();139140return CallStackTableGenerator.Emit(OS.OS);141}142143static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>144writeMemProfCallStackArray(145ProfOStream &OS,146llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>147&MemProfCallStackData,148llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>149&MemProfFrameIndexes,150llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,151unsigned &NumElements) {152llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>153MemProfCallStackIndexes;154155memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;156Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,157FrameHistogram);158for (auto I : Builder.getRadixArray())159OS.write32(I);160NumElements = Builder.getRadixArray().size();161MemProfCallStackIndexes = Builder.takeCallStackPos();162163// Release the memory of this vector as it is no longer needed.164MemProfCallStackData.clear();165166return MemProfCallStackIndexes;167}168169// Write out MemProf Version2 as follows:170// uint64_t Version171// uint64_t RecordTableOffset = RecordTableGenerator.Emit172// uint64_t FramePayloadOffset = Offset for the frame payload173// uint64_t FrameTableOffset = FrameTableGenerator.Emit174// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)175// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)176// uint64_t Num schema entries177// uint64_t Schema entry 0178// uint64_t Schema entry 1179// ....180// uint64_t Schema entry N - 1181// OnDiskChainedHashTable MemProfRecordData182// OnDiskChainedHashTable MemProfFrameData183// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)184static Error writeMemProfV2(ProfOStream &OS,185memprof::IndexedMemProfData &MemProfData,186bool MemProfFullSchema) {187OS.write(memprof::Version2);188uint64_t HeaderUpdatePos = OS.tell();189OS.write(0ULL); // Reserve space for the memprof record table offset.190OS.write(0ULL); // Reserve space for the memprof frame payload offset.191OS.write(0ULL); // Reserve space for the memprof frame table offset.192OS.write(0ULL); // Reserve space for the memprof call stack payload offset.193OS.write(0ULL); // Reserve space for the memprof call stack table offset.194195auto Schema = memprof::getHotColdSchema();196if (MemProfFullSchema)197Schema = memprof::getFullSchema();198writeMemProfSchema(OS, Schema);199200uint64_t RecordTableOffset =201writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);202203uint64_t FramePayloadOffset = OS.tell();204uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);205206uint64_t CallStackPayloadOffset = OS.tell();207uint64_t CallStackTableOffset =208writeMemProfCallStacks(OS, MemProfData.CallStacks);209210uint64_t Header[] = {211RecordTableOffset, FramePayloadOffset, FrameTableOffset,212CallStackPayloadOffset, CallStackTableOffset,213};214OS.patch({{HeaderUpdatePos, Header}});215216return Error::success();217}218219static Error writeMemProfRadixTreeBased(220ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,221memprof::IndexedVersion Version, bool MemProfFullSchema,222std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData =223nullptr,224std::unique_ptr<memprof::MemProfSummary> MemProfSum = nullptr) {225assert((Version == memprof::Version3 || Version == memprof::Version4) &&226"Unsupported version for radix tree format");227228OS.write(Version); // Write the specific version (V3 or V4)229uint64_t HeaderUpdatePos = OS.tell();230OS.write(0ULL); // Reserve space for the memprof call stack payload offset.231OS.write(0ULL); // Reserve space for the memprof record payload offset.232OS.write(0ULL); // Reserve space for the memprof record table offset.233if (Version >= memprof::Version4) {234OS.write(0ULL); // Reserve space for the data access profile offset.235236MemProfSum->write(OS);237}238239auto Schema = memprof::getHotColdSchema();240if (MemProfFullSchema)241Schema = memprof::getFullSchema();242writeMemProfSchema(OS, Schema);243244llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =245memprof::computeFrameHistogram(MemProfData.CallStacks);246assert(MemProfData.Frames.size() == FrameHistogram.size());247248llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =249writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);250251uint64_t CallStackPayloadOffset = OS.tell();252// The number of elements in the call stack array.253unsigned NumElements = 0;254llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>255MemProfCallStackIndexes =256writeMemProfCallStackArray(OS, MemProfData.CallStacks,257MemProfFrameIndexes, FrameHistogram,258NumElements);259260uint64_t RecordPayloadOffset = OS.tell();261uint64_t RecordTableOffset = writeMemProfRecords(262OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);263264uint64_t DataAccessProfOffset = 0;265if (DataAccessProfileData != nullptr) {266assert(Version >= memprof::Version4 &&267"Data access profiles are added starting from v4");268DataAccessProfOffset = OS.tell();269if (Error E = DataAccessProfileData->serialize(OS))270return E;271}272273// Verify that the computation for the number of elements in the call stack274// array works.275assert(CallStackPayloadOffset +276NumElements * sizeof(memprof::LinearFrameId) ==277RecordPayloadOffset);278279SmallVector<uint64_t, 4> Header = {280CallStackPayloadOffset,281RecordPayloadOffset,282RecordTableOffset,283};284if (Version >= memprof::Version4)285Header.push_back(DataAccessProfOffset);286287OS.patch({{HeaderUpdatePos, Header}});288289return Error::success();290}291292// Write out MemProf Version3293static Error writeMemProfV3(ProfOStream &OS,294memprof::IndexedMemProfData &MemProfData,295bool MemProfFullSchema) {296return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,297MemProfFullSchema);298}299300// Write out MemProf Version4301static Error writeMemProfV4(302ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,303bool MemProfFullSchema,304std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,305std::unique_ptr<memprof::MemProfSummary> MemProfSum) {306return writeMemProfRadixTreeBased(307OS, MemProfData, memprof::Version4, MemProfFullSchema,308std::move(DataAccessProfileData), std::move(MemProfSum));309}310311// Write out the MemProf data in a requested version.312Error writeMemProf(313ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,314memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,315std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,316std::unique_ptr<memprof::MemProfSummary> MemProfSum) {317switch (MemProfVersionRequested) {318case memprof::Version2:319return writeMemProfV2(OS, MemProfData, MemProfFullSchema);320case memprof::Version3:321return writeMemProfV3(OS, MemProfData, MemProfFullSchema);322case memprof::Version4:323return writeMemProfV4(OS, MemProfData, MemProfFullSchema,324std::move(DataAccessProfileData),325std::move(MemProfSum));326}327328return make_error<InstrProfError>(329instrprof_error::unsupported_version,330formatv("MemProf version {} not supported; "331"requires version between {} and {}, inclusive",332MemProfVersionRequested, memprof::MinimumSupportedVersion,333memprof::MaximumSupportedVersion));334}335336Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,337const unsigned char *Ptr) {338// The value returned from RecordTableGenerator.Emit.339const uint64_t RecordTableOffset =340support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);341// The offset in the stream right before invoking342// FrameTableGenerator.Emit.343const uint64_t FramePayloadOffset =344support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);345// The value returned from FrameTableGenerator.Emit.346const uint64_t FrameTableOffset =347support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);348349// The offset in the stream right before invoking350// CallStackTableGenerator.Emit.351uint64_t CallStackPayloadOffset = 0;352// The value returned from CallStackTableGenerator.Emit.353uint64_t CallStackTableOffset = 0;354if (Version >= memprof::Version2) {355CallStackPayloadOffset =356support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);357CallStackTableOffset =358support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);359}360361// Read the schema.362auto SchemaOr = memprof::readMemProfSchema(Ptr);363if (!SchemaOr)364return SchemaOr.takeError();365Schema = SchemaOr.get();366367// Now initialize the table reader with a pointer into data buffer.368MemProfRecordTable.reset(MemProfRecordHashTable::Create(369/*Buckets=*/Start + RecordTableOffset,370/*Payload=*/Ptr,371/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));372373// Initialize the frame table reader with the payload and bucket offsets.374MemProfFrameTable.reset(MemProfFrameHashTable::Create(375/*Buckets=*/Start + FrameTableOffset,376/*Payload=*/Start + FramePayloadOffset,377/*Base=*/Start));378379if (Version >= memprof::Version2)380MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(381/*Buckets=*/Start + CallStackTableOffset,382/*Payload=*/Start + CallStackPayloadOffset,383/*Base=*/Start));384385return Error::success();386}387388Error IndexedMemProfReader::deserializeRadixTreeBased(389const unsigned char *Start, const unsigned char *Ptr,390memprof::IndexedVersion Version) {391assert((Version == memprof::Version3 || Version == memprof::Version4) &&392"Unsupported version for radix tree format");393// The offset in the stream right before invoking394// CallStackTableGenerator.Emit.395const uint64_t CallStackPayloadOffset =396support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);397// The offset in the stream right before invoking RecordTableGenerator.Emit.398const uint64_t RecordPayloadOffset =399support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);400// The value returned from RecordTableGenerator.Emit.401const uint64_t RecordTableOffset =402support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);403404uint64_t DataAccessProfOffset = 0;405if (Version >= memprof::Version4) {406DataAccessProfOffset =407support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);408MemProfSum = memprof::MemProfSummary::deserialize(Ptr);409}410411// Read the schema.412auto SchemaOr = memprof::readMemProfSchema(Ptr);413if (!SchemaOr)414return SchemaOr.takeError();415Schema = SchemaOr.get();416417FrameBase = Ptr;418CallStackBase = Start + CallStackPayloadOffset;419420// Compute the number of elements in the radix tree array. Since we use this421// to reserve enough bits in a BitVector, it's totally OK if we overestimate422// this number a little bit because of padding just before the next section.423RadixTreeSize = (RecordPayloadOffset - CallStackPayloadOffset) /424sizeof(memprof::LinearFrameId);425426// Now initialize the table reader with a pointer into data buffer.427MemProfRecordTable.reset(MemProfRecordHashTable::Create(428/*Buckets=*/Start + RecordTableOffset,429/*Payload=*/Start + RecordPayloadOffset,430/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));431432assert((!DataAccessProfOffset || DataAccessProfOffset > RecordTableOffset) &&433"Data access profile is either empty or after the record table");434if (DataAccessProfOffset > RecordTableOffset) {435DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>();436const unsigned char *DAPPtr = Start + DataAccessProfOffset;437if (Error E = DataAccessProfileData->deserialize(DAPPtr))438return E;439}440441return Error::success();442}443444Error IndexedMemProfReader::deserialize(const unsigned char *Start,445uint64_t MemProfOffset) {446const unsigned char *Ptr = Start + MemProfOffset;447448// Read the MemProf version number.449const uint64_t FirstWord =450support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);451452// Check if the version is supported453if (FirstWord >= memprof::MinimumSupportedVersion &&454FirstWord <= memprof::MaximumSupportedVersion) {455// Everything is good. We can proceed to deserialize the rest.456Version = static_cast<memprof::IndexedVersion>(FirstWord);457} else {458return make_error<InstrProfError>(459instrprof_error::unsupported_version,460formatv("MemProf version {} not supported; "461"requires version between {} and {}, inclusive",462FirstWord, memprof::MinimumSupportedVersion,463memprof::MaximumSupportedVersion));464}465466switch (Version) {467case memprof::Version2:468if (Error E = deserializeV2(Start, Ptr))469return E;470break;471case memprof::Version3:472case memprof::Version4:473// V3 and V4 share the same high-level structure (radix tree, linear IDs).474if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))475return E;476break;477}478479return Error::success();480}481} // namespace llvm482483484