Path: blob/main/contrib/llvm-project/llvm/lib/ProfileData/InstrProfWriter.cpp
35233 views
//===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains support for writing profiling data for clang's9// instrumentation based PGO and coverage.10//11//===----------------------------------------------------------------------===//1213#include "llvm/ProfileData/InstrProfWriter.h"14#include "llvm/ADT/STLExtras.h"15#include "llvm/ADT/SetVector.h"16#include "llvm/ADT/StringRef.h"17#include "llvm/IR/ProfileSummary.h"18#include "llvm/ProfileData/InstrProf.h"19#include "llvm/ProfileData/MemProf.h"20#include "llvm/ProfileData/ProfileCommon.h"21#include "llvm/Support/Compression.h"22#include "llvm/Support/Endian.h"23#include "llvm/Support/EndianStream.h"24#include "llvm/Support/Error.h"25#include "llvm/Support/FormatVariadic.h"26#include "llvm/Support/MemoryBuffer.h"27#include "llvm/Support/OnDiskHashTable.h"28#include "llvm/Support/raw_ostream.h"29#include <cstdint>30#include <memory>31#include <string>32#include <tuple>33#include <utility>34#include <vector>3536using namespace llvm;3738// A struct to define how the data stream should be patched. For Indexed39// profiling, only uint64_t data type is needed.40struct PatchItem {41uint64_t Pos; // Where to patch.42ArrayRef<uint64_t> D; // An array of source data.43};4445namespace llvm {4647// A wrapper class to abstract writer stream with support of bytes48// back patching.49class ProfOStream {50public:51ProfOStream(raw_fd_ostream &FD)52: IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}53ProfOStream(raw_string_ostream &STR)54: IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}5556[[nodiscard]] uint64_t tell() const { return OS.tell(); }57void write(uint64_t V) { LE.write<uint64_t>(V); }58void write32(uint32_t V) { LE.write<uint32_t>(V); }59void writeByte(uint8_t V) { LE.write<uint8_t>(V); }6061// \c patch can only be called when all data is written and flushed.62// For raw_string_ostream, the patch is done on the target string63// directly and it won't be reflected in the stream's internal buffer.64void patch(ArrayRef<PatchItem> P) {65using namespace support;6667if (IsFDOStream) {68raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);69const uint64_t LastPos = FDOStream.tell();70for (const auto &K : P) {71FDOStream.seek(K.Pos);72for (uint64_t Elem : K.D)73write(Elem);74}75// Reset the stream to the last position after patching so that users76// don't accidentally overwrite data. This makes it consistent with77// the string stream below which replaces the data directly.78FDOStream.seek(LastPos);79} else {80raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);81std::string &Data = SOStream.str(); // with flush82for (const auto &K : P) {83for (int I = 0, E = K.D.size(); I != E; I++) {84uint64_t Bytes =85endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);86Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),87(const char *)&Bytes, sizeof(uint64_t));88}89}90}91}9293// If \c OS is an instance of \c raw_fd_ostream, this field will be94// true. Otherwise, \c OS will be an raw_string_ostream.95bool IsFDOStream;96raw_ostream &OS;97support::endian::Writer LE;98};99100class InstrProfRecordWriterTrait {101public:102using key_type = StringRef;103using key_type_ref = StringRef;104105using data_type = const InstrProfWriter::ProfilingData *const;106using data_type_ref = const InstrProfWriter::ProfilingData *const;107108using hash_value_type = uint64_t;109using offset_type = uint64_t;110111llvm::endianness ValueProfDataEndianness = llvm::endianness::little;112InstrProfSummaryBuilder *SummaryBuilder;113InstrProfSummaryBuilder *CSSummaryBuilder;114115InstrProfRecordWriterTrait() = default;116117static hash_value_type ComputeHash(key_type_ref K) {118return IndexedInstrProf::ComputeHash(K);119}120121static std::pair<offset_type, offset_type>122EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {123using namespace support;124125endian::Writer LE(Out, llvm::endianness::little);126127offset_type N = K.size();128LE.write<offset_type>(N);129130offset_type M = 0;131for (const auto &ProfileData : *V) {132const InstrProfRecord &ProfRecord = ProfileData.second;133M += sizeof(uint64_t); // The function hash134M += sizeof(uint64_t); // The size of the Counts vector135M += ProfRecord.Counts.size() * sizeof(uint64_t);136M += sizeof(uint64_t); // The size of the Bitmap vector137M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t);138139// Value data140M += ValueProfData::getSize(ProfileData.second);141}142LE.write<offset_type>(M);143144return std::make_pair(N, M);145}146147void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) {148Out.write(K.data(), N);149}150151void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) {152using namespace support;153154endian::Writer LE(Out, llvm::endianness::little);155for (const auto &ProfileData : *V) {156const InstrProfRecord &ProfRecord = ProfileData.second;157if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first))158CSSummaryBuilder->addRecord(ProfRecord);159else160SummaryBuilder->addRecord(ProfRecord);161162LE.write<uint64_t>(ProfileData.first); // Function hash163LE.write<uint64_t>(ProfRecord.Counts.size());164for (uint64_t I : ProfRecord.Counts)165LE.write<uint64_t>(I);166167LE.write<uint64_t>(ProfRecord.BitmapBytes.size());168for (uint64_t I : ProfRecord.BitmapBytes)169LE.write<uint64_t>(I);170171// Write value data172std::unique_ptr<ValueProfData> VDataPtr =173ValueProfData::serializeFrom(ProfileData.second);174uint32_t S = VDataPtr->getSize();175VDataPtr->swapBytesFromHost(ValueProfDataEndianness);176Out.write((const char *)VDataPtr.get(), S);177}178}179};180181} // end namespace llvm182183InstrProfWriter::InstrProfWriter(184bool Sparse, uint64_t TemporalProfTraceReservoirSize,185uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,186memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema)187: Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),188TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),189InfoObj(new InstrProfRecordWriterTrait()),190WritePrevVersion(WritePrevVersion),191MemProfVersionRequested(MemProfVersionRequested),192MemProfFullSchema(MemProfFullSchema) {}193194InstrProfWriter::~InstrProfWriter() { delete InfoObj; }195196// Internal interface for testing purpose only.197void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {198InfoObj->ValueProfDataEndianness = Endianness;199}200201void InstrProfWriter::setOutputSparse(bool Sparse) {202this->Sparse = Sparse;203}204205void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,206function_ref<void(Error)> Warn) {207auto Name = I.Name;208auto Hash = I.Hash;209addRecord(Name, Hash, std::move(I), Weight, Warn);210}211212void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,213OverlapStats &Overlap,214OverlapStats &FuncLevelOverlap,215const OverlapFuncFilters &FuncFilter) {216auto Name = Other.Name;217auto Hash = Other.Hash;218Other.accumulateCounts(FuncLevelOverlap.Test);219if (!FunctionData.contains(Name)) {220Overlap.addOneUnique(FuncLevelOverlap.Test);221return;222}223if (FuncLevelOverlap.Test.CountSum < 1.0f) {224Overlap.Overlap.NumEntries += 1;225return;226}227auto &ProfileDataMap = FunctionData[Name];228bool NewFunc;229ProfilingData::iterator Where;230std::tie(Where, NewFunc) =231ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));232if (NewFunc) {233Overlap.addOneMismatch(FuncLevelOverlap.Test);234return;235}236InstrProfRecord &Dest = Where->second;237238uint64_t ValueCutoff = FuncFilter.ValueCutoff;239if (!FuncFilter.NameFilter.empty() && Name.contains(FuncFilter.NameFilter))240ValueCutoff = 0;241242Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff);243}244245void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,246InstrProfRecord &&I, uint64_t Weight,247function_ref<void(Error)> Warn) {248auto &ProfileDataMap = FunctionData[Name];249250bool NewFunc;251ProfilingData::iterator Where;252std::tie(Where, NewFunc) =253ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));254InstrProfRecord &Dest = Where->second;255256auto MapWarn = [&](instrprof_error E) {257Warn(make_error<InstrProfError>(E));258};259260if (NewFunc) {261// We've never seen a function with this name and hash, add it.262Dest = std::move(I);263if (Weight > 1)264Dest.scale(Weight, 1, MapWarn);265} else {266// We're updating a function we've seen before.267Dest.merge(I, Weight, MapWarn);268}269270Dest.sortValueData();271}272273void InstrProfWriter::addMemProfRecord(274const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) {275auto [Iter, Inserted] = MemProfData.Records.insert({Id, Record});276// If we inserted a new record then we are done.277if (Inserted) {278return;279}280memprof::IndexedMemProfRecord &Existing = Iter->second;281Existing.merge(Record);282}283284bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,285const memprof::Frame &Frame,286function_ref<void(Error)> Warn) {287auto [Iter, Inserted] = MemProfData.Frames.insert({Id, Frame});288// If a mapping already exists for the current frame id and it does not289// match the new mapping provided then reset the existing contents and bail290// out. We don't support the merging of memprof data whose Frame -> Id291// mapping across profiles is inconsistent.292if (!Inserted && Iter->second != Frame) {293Warn(make_error<InstrProfError>(instrprof_error::malformed,294"frame to id mapping mismatch"));295return false;296}297return true;298}299300bool InstrProfWriter::addMemProfCallStack(301const memprof::CallStackId CSId,302const llvm::SmallVector<memprof::FrameId> &CallStack,303function_ref<void(Error)> Warn) {304auto [Iter, Inserted] = MemProfData.CallStacks.insert({CSId, CallStack});305// If a mapping already exists for the current call stack id and it does not306// match the new mapping provided then reset the existing contents and bail307// out. We don't support the merging of memprof data whose CallStack -> Id308// mapping across profiles is inconsistent.309if (!Inserted && Iter->second != CallStack) {310Warn(make_error<InstrProfError>(instrprof_error::malformed,311"call stack to id mapping mismatch"));312return false;313}314return true;315}316317void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {318llvm::append_range(BinaryIds, BIs);319}320321void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {322assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);323assert(!Trace.FunctionNameRefs.empty());324if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {325// Simply append the trace if we have not yet hit our reservoir size limit.326TemporalProfTraces.push_back(std::move(Trace));327} else {328// Otherwise, replace a random trace in the stream.329std::uniform_int_distribution<uint64_t> Distribution(3300, TemporalProfTraceStreamSize);331uint64_t RandomIndex = Distribution(RNG);332if (RandomIndex < TemporalProfTraces.size())333TemporalProfTraces[RandomIndex] = std::move(Trace);334}335++TemporalProfTraceStreamSize;336}337338void InstrProfWriter::addTemporalProfileTraces(339SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {340for (auto &Trace : SrcTraces)341if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)342Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);343llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });344// Assume that the source has the same reservoir size as the destination to345// avoid needing to record it in the indexed profile format.346bool IsDestSampled =347(TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);348bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);349if (!IsDestSampled && IsSrcSampled) {350// If one of the traces are sampled, ensure that it belongs to Dest.351std::swap(TemporalProfTraces, SrcTraces);352std::swap(TemporalProfTraceStreamSize, SrcStreamSize);353std::swap(IsDestSampled, IsSrcSampled);354}355if (!IsSrcSampled) {356// If the source stream is not sampled, we add each source trace normally.357for (auto &Trace : SrcTraces)358addTemporalProfileTrace(std::move(Trace));359return;360}361// Otherwise, we find the traces that would have been removed if we added362// the whole source stream.363SmallSetVector<uint64_t, 8> IndicesToReplace;364for (uint64_t I = 0; I < SrcStreamSize; I++) {365std::uniform_int_distribution<uint64_t> Distribution(3660, TemporalProfTraceStreamSize);367uint64_t RandomIndex = Distribution(RNG);368if (RandomIndex < TemporalProfTraces.size())369IndicesToReplace.insert(RandomIndex);370++TemporalProfTraceStreamSize;371}372// Then we insert a random sample of the source traces.373llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);374for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))375TemporalProfTraces[Index] = std::move(Trace);376}377378void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,379function_ref<void(Error)> Warn) {380for (auto &I : IPW.FunctionData)381for (auto &Func : I.getValue())382addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);383384BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size());385for (auto &I : IPW.BinaryIds)386addBinaryIds(I);387388addTemporalProfileTraces(IPW.TemporalProfTraces,389IPW.TemporalProfTraceStreamSize);390391MemProfData.Frames.reserve(IPW.MemProfData.Frames.size());392for (auto &[FrameId, Frame] : IPW.MemProfData.Frames) {393// If we weren't able to add the frame mappings then it doesn't make sense394// to try to merge the records from this profile.395if (!addMemProfFrame(FrameId, Frame, Warn))396return;397}398399MemProfData.CallStacks.reserve(IPW.MemProfData.CallStacks.size());400for (auto &[CSId, CallStack] : IPW.MemProfData.CallStacks) {401if (!addMemProfCallStack(CSId, CallStack, Warn))402return;403}404405MemProfData.Records.reserve(IPW.MemProfData.Records.size());406for (auto &[GUID, Record] : IPW.MemProfData.Records) {407addMemProfRecord(GUID, Record);408}409}410411bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {412if (!Sparse)413return true;414for (const auto &Func : PD) {415const InstrProfRecord &IPR = Func.second;416if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; }))417return true;418if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; }))419return true;420}421return false;422}423424static void setSummary(IndexedInstrProf::Summary *TheSummary,425ProfileSummary &PS) {426using namespace IndexedInstrProf;427428const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();429TheSummary->NumSummaryFields = Summary::NumKinds;430TheSummary->NumCutoffEntries = Res.size();431TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount());432TheSummary->set(Summary::MaxBlockCount, PS.getMaxCount());433TheSummary->set(Summary::MaxInternalBlockCount, PS.getMaxInternalCount());434TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount());435TheSummary->set(Summary::TotalNumBlocks, PS.getNumCounts());436TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions());437for (unsigned I = 0; I < Res.size(); I++)438TheSummary->setEntry(I, Res[I]);439}440441// Serialize Schema.442static void writeMemProfSchema(ProfOStream &OS,443const memprof::MemProfSchema &Schema) {444OS.write(static_cast<uint64_t>(Schema.size()));445for (const auto Id : Schema)446OS.write(static_cast<uint64_t>(Id));447}448449// Serialize MemProfRecordData. Return RecordTableOffset.450static uint64_t writeMemProfRecords(451ProfOStream &OS,452llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>453&MemProfRecordData,454memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,455llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>456*MemProfCallStackIndexes = nullptr) {457memprof::RecordWriterTrait RecordWriter(Schema, Version,458MemProfCallStackIndexes);459OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>460RecordTableGenerator;461for (auto &[GUID, Record] : MemProfRecordData) {462// Insert the key (func hash) and value (memprof record).463RecordTableGenerator.insert(GUID, Record, RecordWriter);464}465// Release the memory of this MapVector as it is no longer needed.466MemProfRecordData.clear();467468// The call to Emit invokes RecordWriterTrait::EmitData which destructs469// the memprof record copies owned by the RecordTableGenerator. This works470// because the RecordTableGenerator is not used after this point.471return RecordTableGenerator.Emit(OS.OS, RecordWriter);472}473474// Serialize MemProfFrameData. Return FrameTableOffset.475static uint64_t writeMemProfFrames(476ProfOStream &OS,477llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {478OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>479FrameTableGenerator;480for (auto &[FrameId, Frame] : MemProfFrameData) {481// Insert the key (frame id) and value (frame contents).482FrameTableGenerator.insert(FrameId, Frame);483}484// Release the memory of this MapVector as it is no longer needed.485MemProfFrameData.clear();486487return FrameTableGenerator.Emit(OS.OS);488}489490// Serialize MemProfFrameData. Return the mapping from FrameIds to their491// indexes within the frame array.492static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>493writeMemProfFrameArray(494ProfOStream &OS,495llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,496llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {497// Mappings from FrameIds to array indexes.498llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;499500// Compute the order in which we serialize Frames. The order does not matter501// in terms of correctness, but we still compute it for deserialization502// performance. Specifically, if we serialize frequently used Frames one503// after another, we have better cache utilization. For two Frames that504// appear equally frequently, we break a tie by serializing the one that tends505// to appear earlier in call stacks. We implement the tie-breaking mechanism506// by computing the sum of indexes within call stacks for each Frame. If we507// still have a tie, then we just resort to compare two FrameIds, which is508// just for stability of output.509std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;510FrameIdOrder.reserve(MemProfFrameData.size());511for (const auto &[Id, Frame] : MemProfFrameData)512FrameIdOrder.emplace_back(Id, &Frame);513assert(MemProfFrameData.size() == FrameIdOrder.size());514llvm::sort(FrameIdOrder,515[&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,516const std::pair<memprof::FrameId, const memprof::Frame *> &R) {517const auto &SL = FrameHistogram[L.first];518const auto &SR = FrameHistogram[R.first];519// Popular FrameIds should come first.520if (SL.Count != SR.Count)521return SL.Count > SR.Count;522// If they are equally popular, then the one that tends to appear523// earlier in call stacks should come first.524if (SL.PositionSum != SR.PositionSum)525return SL.PositionSum < SR.PositionSum;526// Compare their FrameIds for sort stability.527return L.first < R.first;528});529530// Serialize all frames while creating mappings from linear IDs to FrameIds.531uint64_t Index = 0;532MemProfFrameIndexes.reserve(FrameIdOrder.size());533for (const auto &[Id, F] : FrameIdOrder) {534F->serialize(OS.OS);535MemProfFrameIndexes.insert({Id, Index});536++Index;537}538assert(MemProfFrameData.size() == Index);539assert(MemProfFrameData.size() == MemProfFrameIndexes.size());540541// Release the memory of this MapVector as it is no longer needed.542MemProfFrameData.clear();543544return MemProfFrameIndexes;545}546547static uint64_t writeMemProfCallStacks(548ProfOStream &OS,549llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>550&MemProfCallStackData) {551OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>552CallStackTableGenerator;553for (auto &[CSId, CallStack] : MemProfCallStackData)554CallStackTableGenerator.insert(CSId, CallStack);555// Release the memory of this vector as it is no longer needed.556MemProfCallStackData.clear();557558return CallStackTableGenerator.Emit(OS.OS);559}560561static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>562writeMemProfCallStackArray(563ProfOStream &OS,564llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>565&MemProfCallStackData,566llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>567&MemProfFrameIndexes,568llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {569llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>570MemProfCallStackIndexes;571572memprof::CallStackRadixTreeBuilder Builder;573Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,574FrameHistogram);575for (auto I : Builder.getRadixArray())576OS.write32(I);577MemProfCallStackIndexes = Builder.takeCallStackPos();578579// Release the memory of this vector as it is no longer needed.580MemProfCallStackData.clear();581582return MemProfCallStackIndexes;583}584585// Write out MemProf Version0 as follows:586// uint64_t RecordTableOffset = RecordTableGenerator.Emit587// uint64_t FramePayloadOffset = Offset for the frame payload588// uint64_t FrameTableOffset = FrameTableGenerator.Emit589// uint64_t Num schema entries590// uint64_t Schema entry 0591// uint64_t Schema entry 1592// ....593// uint64_t Schema entry N - 1594// OnDiskChainedHashTable MemProfRecordData595// OnDiskChainedHashTable MemProfFrameData596static Error writeMemProfV0(ProfOStream &OS,597memprof::IndexedMemProfData &MemProfData) {598uint64_t HeaderUpdatePos = OS.tell();599OS.write(0ULL); // Reserve space for the memprof record table offset.600OS.write(0ULL); // Reserve space for the memprof frame payload offset.601OS.write(0ULL); // Reserve space for the memprof frame table offset.602603auto Schema = memprof::getFullSchema();604writeMemProfSchema(OS, Schema);605606uint64_t RecordTableOffset =607writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version0);608609uint64_t FramePayloadOffset = OS.tell();610uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);611612uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset};613OS.patch({{HeaderUpdatePos, Header}});614615return Error::success();616}617618// Write out MemProf Version1 as follows:619// uint64_t Version (NEW in V1)620// uint64_t RecordTableOffset = RecordTableGenerator.Emit621// uint64_t FramePayloadOffset = Offset for the frame payload622// uint64_t FrameTableOffset = FrameTableGenerator.Emit623// uint64_t Num schema entries624// uint64_t Schema entry 0625// uint64_t Schema entry 1626// ....627// uint64_t Schema entry N - 1628// OnDiskChainedHashTable MemProfRecordData629// OnDiskChainedHashTable MemProfFrameData630static Error writeMemProfV1(ProfOStream &OS,631memprof::IndexedMemProfData &MemProfData) {632OS.write(memprof::Version1);633uint64_t HeaderUpdatePos = OS.tell();634OS.write(0ULL); // Reserve space for the memprof record table offset.635OS.write(0ULL); // Reserve space for the memprof frame payload offset.636OS.write(0ULL); // Reserve space for the memprof frame table offset.637638auto Schema = memprof::getFullSchema();639writeMemProfSchema(OS, Schema);640641uint64_t RecordTableOffset =642writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version1);643644uint64_t FramePayloadOffset = OS.tell();645uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);646647uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset};648OS.patch({{HeaderUpdatePos, Header}});649650return Error::success();651}652653// Write out MemProf Version2 as follows:654// uint64_t Version655// uint64_t RecordTableOffset = RecordTableGenerator.Emit656// uint64_t FramePayloadOffset = Offset for the frame payload657// uint64_t FrameTableOffset = FrameTableGenerator.Emit658// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)659// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)660// uint64_t Num schema entries661// uint64_t Schema entry 0662// uint64_t Schema entry 1663// ....664// uint64_t Schema entry N - 1665// OnDiskChainedHashTable MemProfRecordData666// OnDiskChainedHashTable MemProfFrameData667// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)668static Error writeMemProfV2(ProfOStream &OS,669memprof::IndexedMemProfData &MemProfData,670bool MemProfFullSchema) {671OS.write(memprof::Version2);672uint64_t HeaderUpdatePos = OS.tell();673OS.write(0ULL); // Reserve space for the memprof record table offset.674OS.write(0ULL); // Reserve space for the memprof frame payload offset.675OS.write(0ULL); // Reserve space for the memprof frame table offset.676OS.write(0ULL); // Reserve space for the memprof call stack payload offset.677OS.write(0ULL); // Reserve space for the memprof call stack table offset.678679auto Schema = memprof::getHotColdSchema();680if (MemProfFullSchema)681Schema = memprof::getFullSchema();682writeMemProfSchema(OS, Schema);683684uint64_t RecordTableOffset =685writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);686687uint64_t FramePayloadOffset = OS.tell();688uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);689690uint64_t CallStackPayloadOffset = OS.tell();691uint64_t CallStackTableOffset =692writeMemProfCallStacks(OS, MemProfData.CallStacks);693694uint64_t Header[] = {695RecordTableOffset, FramePayloadOffset, FrameTableOffset,696CallStackPayloadOffset, CallStackTableOffset,697};698OS.patch({{HeaderUpdatePos, Header}});699700return Error::success();701}702703// Write out MemProf Version3 as follows:704// uint64_t Version705// uint64_t CallStackPayloadOffset = Offset for the call stack payload706// uint64_t RecordPayloadOffset = Offset for the record payload707// uint64_t RecordTableOffset = RecordTableGenerator.Emit708// uint64_t Num schema entries709// uint64_t Schema entry 0710// uint64_t Schema entry 1711// ....712// uint64_t Schema entry N - 1713// Frames serialized one after another714// Call stacks encoded as a radix tree715// OnDiskChainedHashTable MemProfRecordData716static Error writeMemProfV3(ProfOStream &OS,717memprof::IndexedMemProfData &MemProfData,718bool MemProfFullSchema) {719OS.write(memprof::Version3);720uint64_t HeaderUpdatePos = OS.tell();721OS.write(0ULL); // Reserve space for the memprof call stack payload offset.722OS.write(0ULL); // Reserve space for the memprof record payload offset.723OS.write(0ULL); // Reserve space for the memprof record table offset.724725auto Schema = memprof::getHotColdSchema();726if (MemProfFullSchema)727Schema = memprof::getFullSchema();728writeMemProfSchema(OS, Schema);729730llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =731memprof::computeFrameHistogram(MemProfData.CallStacks);732assert(MemProfData.Frames.size() == FrameHistogram.size());733734llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =735writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);736737uint64_t CallStackPayloadOffset = OS.tell();738llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>739MemProfCallStackIndexes = writeMemProfCallStackArray(740OS, MemProfData.CallStacks, MemProfFrameIndexes, FrameHistogram);741742uint64_t RecordPayloadOffset = OS.tell();743uint64_t RecordTableOffset =744writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,745&MemProfCallStackIndexes);746747uint64_t Header[] = {748CallStackPayloadOffset,749RecordPayloadOffset,750RecordTableOffset,751};752OS.patch({{HeaderUpdatePos, Header}});753754return Error::success();755}756757// Write out the MemProf data in a requested version.758static Error writeMemProf(ProfOStream &OS,759memprof::IndexedMemProfData &MemProfData,760memprof::IndexedVersion MemProfVersionRequested,761bool MemProfFullSchema) {762switch (MemProfVersionRequested) {763case memprof::Version0:764return writeMemProfV0(OS, MemProfData);765case memprof::Version1:766return writeMemProfV1(OS, MemProfData);767case memprof::Version2:768return writeMemProfV2(OS, MemProfData, MemProfFullSchema);769case memprof::Version3:770return writeMemProfV3(OS, MemProfData, MemProfFullSchema);771}772773return make_error<InstrProfError>(774instrprof_error::unsupported_version,775formatv("MemProf version {} not supported; "776"requires version between {} and {}, inclusive",777MemProfVersionRequested, memprof::MinimumSupportedVersion,778memprof::MaximumSupportedVersion));779}780781uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header,782const bool WritePrevVersion,783ProfOStream &OS) {784// Only write out the first four fields.785for (int I = 0; I < 4; I++)786OS.write(reinterpret_cast<const uint64_t *>(&Header)[I]);787788// Remember the offset of the remaining fields to allow back patching later.789auto BackPatchStartOffset = OS.tell();790791// Reserve the space for back patching later.792OS.write(0); // HashOffset793OS.write(0); // MemProfOffset794OS.write(0); // BinaryIdOffset795OS.write(0); // TemporalProfTracesOffset796if (!WritePrevVersion)797OS.write(0); // VTableNamesOffset798799return BackPatchStartOffset;800}801802Error InstrProfWriter::writeVTableNames(ProfOStream &OS) {803std::vector<std::string> VTableNameStrs;804for (StringRef VTableName : VTableNames.keys())805VTableNameStrs.push_back(VTableName.str());806807std::string CompressedVTableNames;808if (!VTableNameStrs.empty())809if (Error E = collectGlobalObjectNameStrings(810VTableNameStrs, compression::zlib::isAvailable(),811CompressedVTableNames))812return E;813814const uint64_t CompressedStringLen = CompressedVTableNames.length();815816// Record the length of compressed string.817OS.write(CompressedStringLen);818819// Write the chars in compressed strings.820for (auto &c : CompressedVTableNames)821OS.writeByte(static_cast<uint8_t>(c));822823// Pad up to a multiple of 8.824// InstrProfReader could read bytes according to 'CompressedStringLen'.825const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);826827for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)828OS.writeByte(0);829830return Error::success();831}832833Error InstrProfWriter::writeImpl(ProfOStream &OS) {834using namespace IndexedInstrProf;835using namespace support;836837OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator;838839InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs);840InfoObj->SummaryBuilder = &ISB;841InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs);842InfoObj->CSSummaryBuilder = &CSISB;843844// Populate the hash table generator.845SmallVector<std::pair<StringRef, const ProfilingData *>> OrderedData;846for (const auto &I : FunctionData)847if (shouldEncodeData(I.getValue()))848OrderedData.emplace_back((I.getKey()), &I.getValue());849llvm::sort(OrderedData, less_first());850for (const auto &I : OrderedData)851Generator.insert(I.first, I.second);852853// Write the header.854IndexedInstrProf::Header Header;855Header.Version = WritePrevVersion856? IndexedInstrProf::ProfVersion::Version11857: IndexedInstrProf::ProfVersion::CurrentVersion;858// The WritePrevVersion handling will either need to be removed or updated859// if the version is advanced beyond 12.860static_assert(IndexedInstrProf::ProfVersion::CurrentVersion ==861IndexedInstrProf::ProfVersion::Version12);862if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))863Header.Version |= VARIANT_MASK_IR_PROF;864if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))865Header.Version |= VARIANT_MASK_CSIR_PROF;866if (static_cast<bool>(ProfileKind &867InstrProfKind::FunctionEntryInstrumentation))868Header.Version |= VARIANT_MASK_INSTR_ENTRY;869if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))870Header.Version |= VARIANT_MASK_BYTE_COVERAGE;871if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))872Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;873if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf))874Header.Version |= VARIANT_MASK_MEMPROF;875if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))876Header.Version |= VARIANT_MASK_TEMPORAL_PROF;877878const uint64_t BackPatchStartOffset =879writeHeader(Header, WritePrevVersion, OS);880881// Reserve space to write profile summary data.882uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();883uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);884// Remember the summary offset.885uint64_t SummaryOffset = OS.tell();886for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)887OS.write(0);888uint64_t CSSummaryOffset = 0;889uint64_t CSSummarySize = 0;890if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {891CSSummaryOffset = OS.tell();892CSSummarySize = SummarySize / sizeof(uint64_t);893for (unsigned I = 0; I < CSSummarySize; I++)894OS.write(0);895}896897// Write the hash table.898uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);899900// Write the MemProf profile data if we have it.901uint64_t MemProfSectionStart = 0;902if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {903MemProfSectionStart = OS.tell();904if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested,905MemProfFullSchema))906return E;907}908909// BinaryIdSection has two parts:910// 1. uint64_t BinaryIdsSectionSize911// 2. list of binary ids that consist of:912// a. uint64_t BinaryIdLength913// b. uint8_t BinaryIdData914// c. uint8_t Padding (if necessary)915uint64_t BinaryIdSectionStart = OS.tell();916// Calculate size of binary section.917uint64_t BinaryIdsSectionSize = 0;918919// Remove duplicate binary ids.920llvm::sort(BinaryIds);921BinaryIds.erase(llvm::unique(BinaryIds), BinaryIds.end());922923for (const auto &BI : BinaryIds) {924// Increment by binary id length data type size.925BinaryIdsSectionSize += sizeof(uint64_t);926// Increment by binary id data length, aligned to 8 bytes.927BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t));928}929// Write binary ids section size.930OS.write(BinaryIdsSectionSize);931932for (const auto &BI : BinaryIds) {933uint64_t BILen = BI.size();934// Write binary id length.935OS.write(BILen);936// Write binary id data.937for (unsigned K = 0; K < BILen; K++)938OS.writeByte(BI[K]);939// Write padding if necessary.940uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen;941for (unsigned K = 0; K < PaddingSize; K++)942OS.writeByte(0);943}944945uint64_t VTableNamesSectionStart = OS.tell();946947if (!WritePrevVersion)948if (Error E = writeVTableNames(OS))949return E;950951uint64_t TemporalProfTracesSectionStart = 0;952if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {953TemporalProfTracesSectionStart = OS.tell();954OS.write(TemporalProfTraces.size());955OS.write(TemporalProfTraceStreamSize);956for (auto &Trace : TemporalProfTraces) {957OS.write(Trace.Weight);958OS.write(Trace.FunctionNameRefs.size());959for (auto &NameRef : Trace.FunctionNameRefs)960OS.write(NameRef);961}962}963964// Allocate space for data to be serialized out.965std::unique_ptr<IndexedInstrProf::Summary> TheSummary =966IndexedInstrProf::allocSummary(SummarySize);967// Compute the Summary and copy the data to the data968// structure to be serialized out (to disk or buffer).969std::unique_ptr<ProfileSummary> PS = ISB.getSummary();970setSummary(TheSummary.get(), *PS);971InfoObj->SummaryBuilder = nullptr;972973// For Context Sensitive summary.974std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;975if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {976TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);977std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();978setSummary(TheCSSummary.get(), *CSPS);979}980InfoObj->CSSummaryBuilder = nullptr;981982SmallVector<uint64_t, 8> HeaderOffsets = {HashTableStart, MemProfSectionStart,983BinaryIdSectionStart,984TemporalProfTracesSectionStart};985if (!WritePrevVersion)986HeaderOffsets.push_back(VTableNamesSectionStart);987988PatchItem PatchItems[] = {989// Patch the Header fields990{BackPatchStartOffset, HeaderOffsets},991// Patch the summary data.992{SummaryOffset,993ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheSummary.get()),994SummarySize / sizeof(uint64_t))},995{CSSummaryOffset,996ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheCSSummary.get()),997CSSummarySize)}};998999OS.patch(PatchItems);10001001for (const auto &I : FunctionData)1002for (const auto &F : I.getValue())1003if (Error E = validateRecord(F.second))1004return E;10051006return Error::success();1007}10081009Error InstrProfWriter::write(raw_fd_ostream &OS) {1010// Write the hash table.1011ProfOStream POS(OS);1012return writeImpl(POS);1013}10141015Error InstrProfWriter::write(raw_string_ostream &OS) {1016ProfOStream POS(OS);1017return writeImpl(POS);1018}10191020std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {1021std::string Data;1022raw_string_ostream OS(Data);1023// Write the hash table.1024if (Error E = write(OS))1025return nullptr;1026// Return this in an aligned memory buffer.1027return MemoryBuffer::getMemBufferCopy(Data);1028}10291030static const char *ValueProfKindStr[] = {1031#define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator,1032#include "llvm/ProfileData/InstrProfData.inc"1033};10341035Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) {1036for (uint32_t VK = 0; VK <= IPVK_Last; VK++) {1037if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)1038continue;1039uint32_t NS = Func.getNumValueSites(VK);1040for (uint32_t S = 0; S < NS; S++) {1041DenseSet<uint64_t> SeenValues;1042for (const auto &V : Func.getValueArrayForSite(VK, S))1043if (!SeenValues.insert(V.Value).second)1044return make_error<InstrProfError>(instrprof_error::invalid_prof);1045}1046}10471048return Error::success();1049}10501051void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,1052const InstrProfRecord &Func,1053InstrProfSymtab &Symtab,1054raw_fd_ostream &OS) {1055OS << Name << "\n";1056OS << "# Func Hash:\n" << Hash << "\n";1057OS << "# Num Counters:\n" << Func.Counts.size() << "\n";1058OS << "# Counter Values:\n";1059for (uint64_t Count : Func.Counts)1060OS << Count << "\n";10611062if (Func.BitmapBytes.size() > 0) {1063OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n";1064OS << "# Bitmap Byte Values:\n";1065for (uint8_t Byte : Func.BitmapBytes) {1066OS << "0x";1067OS.write_hex(Byte);1068OS << "\n";1069}1070OS << "\n";1071}10721073uint32_t NumValueKinds = Func.getNumValueKinds();1074if (!NumValueKinds) {1075OS << "\n";1076return;1077}10781079OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n";1080for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) {1081uint32_t NS = Func.getNumValueSites(VK);1082if (!NS)1083continue;1084OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n";1085OS << "# NumValueSites:\n" << NS << "\n";1086for (uint32_t S = 0; S < NS; S++) {1087auto VD = Func.getValueArrayForSite(VK, S);1088OS << VD.size() << "\n";1089for (const auto &V : VD) {1090if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget)1091OS << Symtab.getFuncOrVarNameIfDefined(V.Value) << ":" << V.Count1092<< "\n";1093else1094OS << V.Value << ":" << V.Count << "\n";1095}1096}1097}10981099OS << "\n";1100}11011102Error InstrProfWriter::writeText(raw_fd_ostream &OS) {1103// Check CS first since it implies an IR level profile.1104if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))1105OS << "# CSIR level Instrumentation Flag\n:csir\n";1106else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))1107OS << "# IR level Instrumentation Flag\n:ir\n";11081109if (static_cast<bool>(ProfileKind &1110InstrProfKind::FunctionEntryInstrumentation))1111OS << "# Always instrument the function entry block\n:entry_first\n";1112if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))1113OS << "# Instrument block coverage\n:single_byte_coverage\n";1114InstrProfSymtab Symtab;11151116using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>;1117using RecordType = std::pair<StringRef, FuncPair>;1118SmallVector<RecordType, 4> OrderedFuncData;11191120for (const auto &I : FunctionData) {1121if (shouldEncodeData(I.getValue())) {1122if (Error E = Symtab.addFuncName(I.getKey()))1123return E;1124for (const auto &Func : I.getValue())1125OrderedFuncData.push_back(std::make_pair(I.getKey(), Func));1126}1127}11281129for (const auto &VTableName : VTableNames)1130if (Error E = Symtab.addVTableName(VTableName.getKey()))1131return E;11321133if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))1134writeTextTemporalProfTraceData(OS, Symtab);11351136llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) {1137return std::tie(A.first, A.second.first) <1138std::tie(B.first, B.second.first);1139});11401141for (const auto &record : OrderedFuncData) {1142const StringRef &Name = record.first;1143const FuncPair &Func = record.second;1144writeRecordInText(Name, Func.first, Func.second, Symtab, OS);1145}11461147for (const auto &record : OrderedFuncData) {1148const FuncPair &Func = record.second;1149if (Error E = validateRecord(Func.second))1150return E;1151}11521153return Error::success();1154}11551156void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS,1157InstrProfSymtab &Symtab) {1158OS << ":temporal_prof_traces\n";1159OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n";1160OS << "# Temporal Profile Trace Stream Size:\n"1161<< TemporalProfTraceStreamSize << "\n";1162for (auto &Trace : TemporalProfTraces) {1163OS << "# Weight:\n" << Trace.Weight << "\n";1164for (auto &NameRef : Trace.FunctionNameRefs)1165OS << Symtab.getFuncOrVarName(NameRef) << ",";1166OS << "\n";1167}1168OS << "\n";1169}117011711172