Path: blob/main/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
35293 views
//===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "llvm/DebugInfo/PDB/Native/PDBFile.h"9#include "llvm/ADT/ArrayRef.h"10#include "llvm/DebugInfo/MSF/MSFCommon.h"11#include "llvm/DebugInfo/MSF/MappedBlockStream.h"12#include "llvm/DebugInfo/PDB/Native/DbiStream.h"13#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"14#include "llvm/DebugInfo/PDB/Native/InfoStream.h"15#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"16#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"17#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"18#include "llvm/DebugInfo/PDB/Native/RawError.h"19#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"20#include "llvm/DebugInfo/PDB/Native/TpiStream.h"21#include "llvm/Support/BinaryStream.h"22#include "llvm/Support/BinaryStreamArray.h"23#include "llvm/Support/BinaryStreamReader.h"24#include "llvm/Support/Endian.h"25#include "llvm/Support/Error.h"26#include "llvm/Support/Path.h"27#include <algorithm>28#include <cassert>29#include <cstdint>3031using namespace llvm;32using namespace llvm::codeview;33using namespace llvm::msf;34using namespace llvm::pdb;3536namespace {37typedef FixedStreamArray<support::ulittle32_t> ulittle_array;38} // end anonymous namespace3940PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,41BumpPtrAllocator &Allocator)42: FilePath(std::string(Path)), Allocator(Allocator),43Buffer(std::move(PdbFileBuffer)) {}4445PDBFile::~PDBFile() = default;4647StringRef PDBFile::getFilePath() const { return FilePath; }4849StringRef PDBFile::getFileDirectory() const {50return sys::path::parent_path(FilePath);51}5253uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }5455uint32_t PDBFile::getFreeBlockMapBlock() const {56return ContainerLayout.SB->FreeBlockMapBlock;57}5859uint32_t PDBFile::getBlockCount() const {60return ContainerLayout.SB->NumBlocks;61}6263uint32_t PDBFile::getNumDirectoryBytes() const {64return ContainerLayout.SB->NumDirectoryBytes;65}6667uint32_t PDBFile::getBlockMapIndex() const {68return ContainerLayout.SB->BlockMapAddr;69}7071uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }7273uint32_t PDBFile::getNumDirectoryBlocks() const {74return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,75ContainerLayout.SB->BlockSize);76}7778uint64_t PDBFile::getBlockMapOffset() const {79return (uint64_t)ContainerLayout.SB->BlockMapAddr *80ContainerLayout.SB->BlockSize;81}8283uint32_t PDBFile::getNumStreams() const {84return ContainerLayout.StreamSizes.size();85}8687uint32_t PDBFile::getMaxStreamSize() const {88return *llvm::max_element(ContainerLayout.StreamSizes);89}9091uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {92return ContainerLayout.StreamSizes[StreamIndex];93}9495ArrayRef<support::ulittle32_t>96PDBFile::getStreamBlockList(uint32_t StreamIndex) const {97return ContainerLayout.StreamMap[StreamIndex];98}99100uint64_t PDBFile::getFileSize() const { return Buffer->getLength(); }101102Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,103uint32_t NumBytes) const {104uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());105106ArrayRef<uint8_t> Result;107if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))108return std::move(EC);109return Result;110}111112Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,113ArrayRef<uint8_t> Data) const {114return make_error<RawError>(raw_error_code::not_writable,115"PDBFile is immutable");116}117118Error PDBFile::parseFileHeaders() {119BinaryStreamReader Reader(*Buffer);120121// Initialize SB.122const msf::SuperBlock *SB = nullptr;123if (auto EC = Reader.readObject(SB)) {124consumeError(std::move(EC));125return make_error<RawError>(raw_error_code::corrupt_file,126"MSF superblock is missing");127}128129if (auto EC = msf::validateSuperBlock(*SB))130return EC;131132if (Buffer->getLength() % SB->BlockSize != 0)133return make_error<RawError>(raw_error_code::corrupt_file,134"File size is not a multiple of block size");135ContainerLayout.SB = SB;136137// Initialize Free Page Map.138ContainerLayout.FreePageMap.resize(SB->NumBlocks);139// The Fpm exists either at block 1 or block 2 of the MSF. However, this140// allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and141// thusly an equal number of total blocks in the file. For a block size142// of 4KiB (very common), this would yield 32KiB total blocks in file, for a143// maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so144// the Fpm is split across the file at `getBlockSize()` intervals. As a145// result, every block whose index is of the form |{1,2} + getBlockSize() * k|146// for any non-negative integer k is an Fpm block. In theory, we only really147// need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but148// current versions of the MSF format already expect the Fpm to be arranged149// at getBlockSize() intervals, so we have to be compatible.150// See the function fpmPn() for more information:151// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489152auto FpmStream =153MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);154BinaryStreamReader FpmReader(*FpmStream);155ArrayRef<uint8_t> FpmBytes;156if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))157return EC;158uint32_t BlocksRemaining = getBlockCount();159uint32_t BI = 0;160for (auto Byte : FpmBytes) {161uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);162for (uint32_t I = 0; I < BlocksThisByte; ++I) {163if (Byte & (1 << I))164ContainerLayout.FreePageMap[BI] = true;165--BlocksRemaining;166++BI;167}168}169170Reader.setOffset(getBlockMapOffset());171if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,172getNumDirectoryBlocks()))173return EC;174175return Error::success();176}177178Error PDBFile::parseStreamData() {179assert(ContainerLayout.SB);180if (DirectoryStream)181return Error::success();182183uint32_t NumStreams = 0;184185// Normally you can't use a MappedBlockStream without having fully parsed the186// PDB file, because it accesses the directory and various other things, which187// is exactly what we are attempting to parse. By specifying a custom188// subclass of IPDBStreamData which only accesses the fields that have already189// been parsed, we can avoid this and reuse MappedBlockStream.190auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,191Allocator);192BinaryStreamReader Reader(*DS);193if (auto EC = Reader.readInteger(NumStreams))194return EC;195196if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))197return EC;198for (uint32_t I = 0; I < NumStreams; ++I) {199uint32_t StreamSize = getStreamByteSize(I);200// FIXME: What does StreamSize ~0U mean?201uint64_t NumExpectedStreamBlocks =202StreamSize == UINT32_MAX203? 0204: msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);205206// For convenience, we store the block array contiguously. This is because207// if someone calls setStreamMap(), it is more convenient to be able to call208// it with an ArrayRef instead of setting up a StreamRef. Since the209// DirectoryStream is cached in the class and thus lives for the life of the210// class, we can be guaranteed that readArray() will return a stable211// reference, even if it has to allocate from its internal pool.212ArrayRef<support::ulittle32_t> Blocks;213if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))214return EC;215for (uint32_t Block : Blocks) {216uint64_t BlockEndOffset =217(uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;218if (BlockEndOffset > getFileSize())219return make_error<RawError>(raw_error_code::corrupt_file,220"Stream block map is corrupt.");221}222ContainerLayout.StreamMap.push_back(Blocks);223}224225// We should have read exactly SB->NumDirectoryBytes bytes.226assert(Reader.bytesRemaining() == 0);227DirectoryStream = std::move(DS);228return Error::success();229}230231ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {232return ContainerLayout.DirectoryBlocks;233}234235std::unique_ptr<MappedBlockStream>236PDBFile::createIndexedStream(uint16_t SN) const {237if (SN == kInvalidStreamIndex)238return nullptr;239return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,240Allocator);241}242243MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {244MSFStreamLayout Result;245auto Blocks = getStreamBlockList(StreamIdx);246Result.Blocks.assign(Blocks.begin(), Blocks.end());247Result.Length = getStreamByteSize(StreamIdx);248return Result;249}250251msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {252return msf::getFpmStreamLayout(ContainerLayout);253}254255Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {256if (!Globals) {257auto DbiS = getPDBDbiStream();258if (!DbiS)259return DbiS.takeError();260261auto GlobalS =262safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());263if (!GlobalS)264return GlobalS.takeError();265auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));266if (auto EC = TempGlobals->reload())267return std::move(EC);268Globals = std::move(TempGlobals);269}270return *Globals;271}272273Expected<InfoStream &> PDBFile::getPDBInfoStream() {274if (!Info) {275auto InfoS = safelyCreateIndexedStream(StreamPDB);276if (!InfoS)277return InfoS.takeError();278auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));279if (auto EC = TempInfo->reload())280return std::move(EC);281Info = std::move(TempInfo);282}283return *Info;284}285286Expected<DbiStream &> PDBFile::getPDBDbiStream() {287if (!Dbi) {288auto DbiS = safelyCreateIndexedStream(StreamDBI);289if (!DbiS)290return DbiS.takeError();291auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));292if (auto EC = TempDbi->reload(this))293return std::move(EC);294Dbi = std::move(TempDbi);295}296return *Dbi;297}298299Expected<TpiStream &> PDBFile::getPDBTpiStream() {300if (!Tpi) {301auto TpiS = safelyCreateIndexedStream(StreamTPI);302if (!TpiS)303return TpiS.takeError();304auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));305if (auto EC = TempTpi->reload())306return std::move(EC);307Tpi = std::move(TempTpi);308}309return *Tpi;310}311312Expected<TpiStream &> PDBFile::getPDBIpiStream() {313if (!Ipi) {314if (!hasPDBIpiStream())315return make_error<RawError>(raw_error_code::no_stream);316317auto IpiS = safelyCreateIndexedStream(StreamIPI);318if (!IpiS)319return IpiS.takeError();320auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));321if (auto EC = TempIpi->reload())322return std::move(EC);323Ipi = std::move(TempIpi);324}325return *Ipi;326}327328Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {329if (!Publics) {330auto DbiS = getPDBDbiStream();331if (!DbiS)332return DbiS.takeError();333334auto PublicS =335safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());336if (!PublicS)337return PublicS.takeError();338auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));339if (auto EC = TempPublics->reload())340return std::move(EC);341Publics = std::move(TempPublics);342}343return *Publics;344}345346Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {347if (!Symbols) {348auto DbiS = getPDBDbiStream();349if (!DbiS)350return DbiS.takeError();351352uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();353auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);354if (!SymbolS)355return SymbolS.takeError();356357auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));358if (auto EC = TempSymbols->reload())359return std::move(EC);360Symbols = std::move(TempSymbols);361}362return *Symbols;363}364365Expected<PDBStringTable &> PDBFile::getStringTable() {366if (!Strings) {367auto NS = safelyCreateNamedStream("/names");368if (!NS)369return NS.takeError();370371auto N = std::make_unique<PDBStringTable>();372BinaryStreamReader Reader(**NS);373if (auto EC = N->reload(Reader))374return std::move(EC);375assert(Reader.bytesRemaining() == 0);376StringTableStream = std::move(*NS);377Strings = std::move(N);378}379return *Strings;380}381382Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {383if (!InjectedSources) {384auto IJS = safelyCreateNamedStream("/src/headerblock");385if (!IJS)386return IJS.takeError();387388auto Strings = getStringTable();389if (!Strings)390return Strings.takeError();391392auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));393if (auto EC = IJ->reload(*Strings))394return std::move(EC);395InjectedSources = std::move(IJ);396}397return *InjectedSources;398}399400uint32_t PDBFile::getPointerSize() {401auto DbiS = getPDBDbiStream();402if (!DbiS)403return 0;404PDB_Machine Machine = DbiS->getMachineType();405if (Machine == PDB_Machine::Amd64)406return 8;407return 4;408}409410bool PDBFile::hasPDBDbiStream() const {411return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;412}413414bool PDBFile::hasPDBGlobalsStream() {415auto DbiS = getPDBDbiStream();416if (!DbiS) {417consumeError(DbiS.takeError());418return false;419}420421return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();422}423424bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }425426bool PDBFile::hasPDBIpiStream() const {427if (!hasPDBInfoStream())428return false;429430if (StreamIPI >= getNumStreams())431return false;432433auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());434return InfoStream.containsIdStream();435}436437bool PDBFile::hasPDBPublicsStream() {438auto DbiS = getPDBDbiStream();439if (!DbiS) {440consumeError(DbiS.takeError());441return false;442}443return DbiS->getPublicSymbolStreamIndex() < getNumStreams();444}445446bool PDBFile::hasPDBSymbolStream() {447auto DbiS = getPDBDbiStream();448if (!DbiS)449return false;450return DbiS->getSymRecordStreamIndex() < getNumStreams();451}452453bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }454455bool PDBFile::hasPDBStringTable() {456auto IS = getPDBInfoStream();457if (!IS)458return false;459Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");460if (!ExpectedNSI) {461consumeError(ExpectedNSI.takeError());462return false;463}464assert(*ExpectedNSI < getNumStreams());465return true;466}467468bool PDBFile::hasPDBInjectedSourceStream() {469auto IS = getPDBInfoStream();470if (!IS)471return false;472Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");473if (!ExpectedNSI) {474consumeError(ExpectedNSI.takeError());475return false;476}477assert(*ExpectedNSI < getNumStreams());478return true;479}480481/// Wrapper around MappedBlockStream::createIndexedStream() that checks if a482/// stream with that index actually exists. If it does not, the return value483/// will have an MSFError with code msf_error_code::no_stream. Else, the return484/// value will contain the stream returned by createIndexedStream().485Expected<std::unique_ptr<MappedBlockStream>>486PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {487if (StreamIndex >= getNumStreams())488// This rejects kInvalidStreamIndex with an error as well.489return make_error<RawError>(raw_error_code::no_stream);490return createIndexedStream(StreamIndex);491}492493Expected<std::unique_ptr<MappedBlockStream>>494PDBFile::safelyCreateNamedStream(StringRef Name) {495auto IS = getPDBInfoStream();496if (!IS)497return IS.takeError();498499Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);500if (!ExpectedNSI)501return ExpectedNSI.takeError();502uint32_t NameStreamIndex = *ExpectedNSI;503504return safelyCreateIndexedStream(NameStreamIndex);505}506507508