Path: blob/main/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp
35293 views
//===- InputFile.cpp ------------------------------------------ *- C++ --*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "llvm/DebugInfo/PDB/Native/InputFile.h"910#include "llvm/ADT/StringExtras.h"11#include "llvm/BinaryFormat/Magic.h"12#include "llvm/DebugInfo/CodeView/CodeView.h"13#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"14#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"15#include "llvm/DebugInfo/MSF/MappedBlockStream.h"16#include "llvm/DebugInfo/PDB/Native/DbiStream.h"17#include "llvm/DebugInfo/PDB/Native/FormatUtil.h"18#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"19#include "llvm/DebugInfo/PDB/Native/NativeSession.h"20#include "llvm/DebugInfo/PDB/Native/PDBFile.h"21#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"22#include "llvm/DebugInfo/PDB/Native/RawError.h"23#include "llvm/DebugInfo/PDB/Native/TpiStream.h"24#include "llvm/DebugInfo/PDB/PDB.h"25#include "llvm/Object/COFF.h"26#include "llvm/Support/FileSystem.h"27#include "llvm/Support/FormatVariadic.h"2829using namespace llvm;30using namespace llvm::codeview;31using namespace llvm::object;32using namespace llvm::pdb;3334InputFile::InputFile() = default;35InputFile::~InputFile() = default;3637Expected<ModuleDebugStreamRef>38llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,39uint32_t Index) {40Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();41if (!DbiOrErr)42return DbiOrErr.takeError();43DbiStream &Dbi = *DbiOrErr;44const auto &Modules = Dbi.modules();45if (Index >= Modules.getModuleCount())46return make_error<RawError>(raw_error_code::index_out_of_bounds,47"Invalid module index");4849auto Modi = Modules.getModuleDescriptor(Index);5051ModuleName = Modi.getModuleName();5253uint16_t ModiStream = Modi.getModuleStreamIndex();54if (ModiStream == kInvalidStreamIndex)55return make_error<RawError>(raw_error_code::no_stream,56"Module stream not present");5758auto ModStreamData = File.createIndexedStream(ModiStream);5960ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));61if (auto EC = ModS.reload())62return make_error<RawError>(raw_error_code::corrupt_file,63"Invalid module stream");6465return std::move(ModS);66}6768Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,69uint32_t Index) {70Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();71if (!DbiOrErr)72return DbiOrErr.takeError();73DbiStream &Dbi = *DbiOrErr;74const auto &Modules = Dbi.modules();75auto Modi = Modules.getModuleDescriptor(Index);7677uint16_t ModiStream = Modi.getModuleStreamIndex();78if (ModiStream == kInvalidStreamIndex)79return make_error<RawError>(raw_error_code::no_stream,80"Module stream not present");8182auto ModStreamData = File.createIndexedStream(ModiStream);8384ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));85if (Error Err = ModS.reload())86return make_error<RawError>(raw_error_code::corrupt_file,87"Invalid module stream");8889return std::move(ModS);90}9192static inline bool isCodeViewDebugSubsection(object::SectionRef Section,93StringRef Name,94BinaryStreamReader &Reader) {95if (Expected<StringRef> NameOrErr = Section.getName()) {96if (*NameOrErr != Name)97return false;98} else {99consumeError(NameOrErr.takeError());100return false;101}102103Expected<StringRef> ContentsOrErr = Section.getContents();104if (!ContentsOrErr) {105consumeError(ContentsOrErr.takeError());106return false;107}108109Reader = BinaryStreamReader(*ContentsOrErr, llvm::endianness::little);110uint32_t Magic;111if (Reader.bytesRemaining() < sizeof(uint32_t))112return false;113cantFail(Reader.readInteger(Magic));114if (Magic != COFF::DEBUG_SECTION_MAGIC)115return false;116return true;117}118119static inline bool isDebugSSection(object::SectionRef Section,120DebugSubsectionArray &Subsections) {121BinaryStreamReader Reader;122if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))123return false;124125cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));126return true;127}128129static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {130BinaryStreamReader Reader;131if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&132!isCodeViewDebugSubsection(Section, ".debug$P", Reader))133return false;134cantFail(Reader.readArray(Types, Reader.bytesRemaining()));135return true;136}137138static std::string formatChecksumKind(FileChecksumKind Kind) {139switch (Kind) {140RETURN_CASE(FileChecksumKind, None, "None");141RETURN_CASE(FileChecksumKind, MD5, "MD5");142RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");143RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");144}145return formatUnknownEnum(Kind);146}147148template <typename... Args>149static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {150if (Append)151Printer.format(std::forward<Args>(args)...);152else153Printer.formatLine(std::forward<Args>(args)...);154}155156SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {157if (!File)158return;159160if (File->isPdb())161initializeForPdb(GroupIndex);162else {163Name = ".debug$S";164uint32_t I = 0;165for (const auto &S : File->obj().sections()) {166DebugSubsectionArray SS;167if (!isDebugSSection(S, SS))168continue;169170if (!SC.hasChecksums() || !SC.hasStrings())171SC.initialize(SS);172173if (I == GroupIndex)174Subsections = SS;175176if (SC.hasChecksums() && SC.hasStrings())177break;178}179rebuildChecksumMap();180}181}182183StringRef SymbolGroup::name() const { return Name; }184185void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {186Subsections = SS;187}188189void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }190191void SymbolGroup::initializeForPdb(uint32_t Modi) {192assert(File && File->isPdb());193194// PDB always uses the same string table, but each module has its own195// checksums. So we only set the strings if they're not already set.196if (!SC.hasStrings()) {197auto StringTable = File->pdb().getStringTable();198if (StringTable)199SC.setStrings(StringTable->getStringTable());200else201consumeError(StringTable.takeError());202}203204SC.resetChecksums();205auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);206if (!MDS) {207consumeError(MDS.takeError());208return;209}210211DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));212Subsections = DebugStream->getSubsectionsArray();213SC.initialize(Subsections);214rebuildChecksumMap();215}216217void SymbolGroup::rebuildChecksumMap() {218if (!SC.hasChecksums())219return;220221for (const auto &Entry : SC.checksums()) {222auto S = SC.strings().getString(Entry.FileNameOffset);223if (!S)224continue;225ChecksumsByFile[*S] = Entry;226}227}228229const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {230assert(File && File->isPdb() && DebugStream);231return *DebugStream;232}233234Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {235return SC.strings().getString(Offset);236}237238Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {239StringRef Name;240if (!SC.hasChecksums()) {241return std::move(Name);242}243244auto Iter = SC.checksums().getArray().at(Offset);245if (Iter == SC.checksums().getArray().end()) {246return std::move(Name);247}248249uint32_t FO = Iter->FileNameOffset;250auto ExpectedFile = getNameFromStringTable(FO);251if (!ExpectedFile) {252return std::move(Name);253}254255return *ExpectedFile;256}257258void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,259bool Append) const {260auto FC = ChecksumsByFile.find(File);261if (FC == ChecksumsByFile.end()) {262formatInternal(Printer, Append, "- (no checksum) {0}", File);263return;264}265266formatInternal(Printer, Append, "- ({0}: {1}) {2}",267formatChecksumKind(FC->getValue().Kind),268toHex(FC->getValue().Checksum), File);269}270271void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,272uint32_t Offset,273bool Append) const {274if (!SC.hasChecksums()) {275formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);276return;277}278279auto Iter = SC.checksums().getArray().at(Offset);280if (Iter == SC.checksums().getArray().end()) {281formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);282return;283}284285uint32_t FO = Iter->FileNameOffset;286auto ExpectedFile = getNameFromStringTable(FO);287if (!ExpectedFile) {288formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);289consumeError(ExpectedFile.takeError());290return;291}292if (Iter->Kind == FileChecksumKind::None) {293formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);294} else {295formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,296formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));297}298}299300Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {301InputFile IF;302if (!llvm::sys::fs::exists(Path))303return make_error<StringError>(formatv("File {0} not found", Path),304inconvertibleErrorCode());305306file_magic Magic;307if (auto EC = identify_magic(Path, Magic))308return make_error<StringError>(309formatv("Unable to identify file type for file {0}", Path), EC);310311if (Magic == file_magic::coff_object) {312Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);313if (!BinaryOrErr)314return BinaryOrErr.takeError();315316IF.CoffObject = std::move(*BinaryOrErr);317IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());318return std::move(IF);319}320321if (Magic == file_magic::pdb) {322std::unique_ptr<IPDBSession> Session;323if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))324return std::move(Err);325326IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));327IF.PdbOrObj = &IF.PdbSession->getPDBFile();328329return std::move(IF);330}331332if (!AllowUnknownFile)333return make_error<StringError>(334formatv("File {0} is not a supported file type", Path),335inconvertibleErrorCode());336337auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,338/*RequiresNullTerminator=*/false);339if (!Result)340return make_error<StringError>(341formatv("File {0} could not be opened", Path), Result.getError());342343IF.UnknownFile = std::move(*Result);344IF.PdbOrObj = IF.UnknownFile.get();345return std::move(IF);346}347348PDBFile &InputFile::pdb() {349assert(isPdb());350return *cast<PDBFile *>(PdbOrObj);351}352353const PDBFile &InputFile::pdb() const {354assert(isPdb());355return *cast<PDBFile *>(PdbOrObj);356}357358object::COFFObjectFile &InputFile::obj() {359assert(isObj());360return *cast<object::COFFObjectFile *>(PdbOrObj);361}362363const object::COFFObjectFile &InputFile::obj() const {364assert(isObj());365return *cast<object::COFFObjectFile *>(PdbOrObj);366}367368MemoryBuffer &InputFile::unknown() {369assert(isUnknown());370return *cast<MemoryBuffer *>(PdbOrObj);371}372373const MemoryBuffer &InputFile::unknown() const {374assert(isUnknown());375return *cast<MemoryBuffer *>(PdbOrObj);376}377378StringRef InputFile::getFilePath() const {379if (isPdb())380return pdb().getFilePath();381if (isObj())382return obj().getFileName();383assert(isUnknown());384return unknown().getBufferIdentifier();385}386387bool InputFile::hasTypes() const {388if (isPdb())389return pdb().hasPDBTpiStream();390391for (const auto &Section : obj().sections()) {392CVTypeArray Types;393if (isDebugTSection(Section, Types))394return true;395}396return false;397}398399bool InputFile::hasIds() const {400if (isObj())401return false;402return pdb().hasPDBIpiStream();403}404405bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); }406407bool InputFile::isObj() const {408return isa<object::COFFObjectFile *>(PdbOrObj);409}410411bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); }412413codeview::LazyRandomTypeCollection &414InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {415if (Types && Kind == kTypes)416return *Types;417if (Ids && Kind == kIds)418return *Ids;419420if (Kind == kIds) {421assert(isPdb() && pdb().hasPDBIpiStream());422}423424// If the collection was already initialized, we should have just returned it425// in step 1.426if (isPdb()) {427TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;428auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()429: pdb().getPDBTpiStream());430431auto &Array = Stream.typeArray();432uint32_t Count = Stream.getNumTypeRecords();433auto Offsets = Stream.getTypeIndexOffsets();434Collection =435std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);436return *Collection;437}438439assert(isObj());440assert(Kind == kTypes);441assert(!Types);442443for (const auto &Section : obj().sections()) {444CVTypeArray Records;445if (!isDebugTSection(Section, Records))446continue;447448Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);449return *Types;450}451452Types = std::make_unique<LazyRandomTypeCollection>(100);453return *Types;454}455456codeview::LazyRandomTypeCollection &InputFile::types() {457return getOrCreateTypeCollection(kTypes);458}459460codeview::LazyRandomTypeCollection &InputFile::ids() {461// Object files have only one type stream that contains both types and ids.462// Similarly, some PDBs don't contain an IPI stream, and for those both types463// and IDs are in the same stream.464if (isObj() || !pdb().hasPDBIpiStream())465return types();466467return getOrCreateTypeCollection(kIds);468}469470iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {471return make_range<SymbolGroupIterator>(symbol_groups_begin(),472symbol_groups_end());473}474475SymbolGroupIterator InputFile::symbol_groups_begin() {476return SymbolGroupIterator(*this);477}478479SymbolGroupIterator InputFile::symbol_groups_end() {480return SymbolGroupIterator();481}482483SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}484485SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {486if (File.isObj()) {487SectionIter = File.obj().section_begin();488scanToNextDebugS();489}490}491492bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {493bool E = isEnd();494bool RE = R.isEnd();495if (E || RE)496return E == RE;497498if (Value.File != R.Value.File)499return false;500return Index == R.Index;501}502503const SymbolGroup &SymbolGroupIterator::operator*() const {504assert(!isEnd());505return Value;506}507SymbolGroup &SymbolGroupIterator::operator*() {508assert(!isEnd());509return Value;510}511512SymbolGroupIterator &SymbolGroupIterator::operator++() {513assert(Value.File && !isEnd());514++Index;515if (isEnd())516return *this;517518if (Value.File->isPdb()) {519Value.updatePdbModi(Index);520return *this;521}522523scanToNextDebugS();524return *this;525}526527void SymbolGroupIterator::scanToNextDebugS() {528assert(SectionIter);529auto End = Value.File->obj().section_end();530auto &Iter = *SectionIter;531assert(!isEnd());532533while (++Iter != End) {534DebugSubsectionArray SS;535SectionRef SR = *Iter;536if (!isDebugSSection(SR, SS))537continue;538539Value.updateDebugS(SS);540return;541}542}543544bool SymbolGroupIterator::isEnd() const {545if (!Value.File)546return true;547if (Value.File->isPdb()) {548DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());549uint32_t Count = Dbi.modules().getModuleCount();550assert(Index <= Count);551return Index == Count;552}553554assert(SectionIter);555return *SectionIter == Value.File->obj().section_end();556}557558static bool isMyCode(const SymbolGroup &Group) {559if (Group.getFile().isObj())560return true;561562StringRef Name = Group.name();563if (Name.starts_with("Import:"))564return false;565if (Name.ends_with_insensitive(".dll"))566return false;567if (Name.equals_insensitive("* linker *"))568return false;569if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools"))570return false;571if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt"))572return false;573return true;574}575576bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,577const FilterOptions &Filters) {578if (Filters.JustMyCode && !isMyCode(Group))579return false;580581// If the arg was not specified on the command line, always dump all modules.582if (!Filters.DumpModi)583return true;584585// Otherwise, only dump if this is the same module specified.586return (Filters.DumpModi == Idx);587}588589590