Path: blob/main/contrib/llvm-project/llvm/lib/Object/IRSymtab.cpp
35233 views
//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "llvm/Object/IRSymtab.h"9#include "llvm/ADT/ArrayRef.h"10#include "llvm/ADT/DenseMap.h"11#include "llvm/ADT/SmallPtrSet.h"12#include "llvm/ADT/SmallString.h"13#include "llvm/ADT/SmallVector.h"14#include "llvm/ADT/StringRef.h"15#include "llvm/Bitcode/BitcodeReader.h"16#include "llvm/Config/llvm-config.h"17#include "llvm/IR/Comdat.h"18#include "llvm/IR/DataLayout.h"19#include "llvm/IR/GlobalAlias.h"20#include "llvm/IR/GlobalObject.h"21#include "llvm/IR/Mangler.h"22#include "llvm/IR/Metadata.h"23#include "llvm/IR/Module.h"24#include "llvm/IR/RuntimeLibcalls.h"25#include "llvm/MC/StringTableBuilder.h"26#include "llvm/Object/ModuleSymbolTable.h"27#include "llvm/Object/SymbolicFile.h"28#include "llvm/Support/Allocator.h"29#include "llvm/Support/Casting.h"30#include "llvm/Support/CommandLine.h"31#include "llvm/Support/Error.h"32#include "llvm/Support/StringSaver.h"33#include "llvm/Support/VCSRevision.h"34#include "llvm/Support/raw_ostream.h"35#include "llvm/TargetParser/Triple.h"36#include <cassert>37#include <string>38#include <utility>39#include <vector>4041using namespace llvm;42using namespace irsymtab;4344static cl::opt<bool> DisableBitcodeVersionUpgrade(45"disable-bitcode-version-upgrade", cl::Hidden,46cl::desc("Disable automatic bitcode upgrade for version mismatch"));4748static const char *PreservedSymbols[] = {49// There are global variables, so put it here instead of in50// RuntimeLibcalls.def.51// TODO: Are there similar such variables?52"__ssp_canary_word",53"__stack_chk_guard",54};5556namespace {5758const char *getExpectedProducerName() {59static char DefaultName[] = LLVM_VERSION_STRING60#ifdef LLVM_REVISION61" " LLVM_REVISION62#endif63;64// Allows for testing of the irsymtab writer and upgrade mechanism. This65// environment variable should not be set by users.66if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER"))67return OverrideName;68return DefaultName;69}7071const char *kExpectedProducerName = getExpectedProducerName();7273/// Stores the temporary state that is required to build an IR symbol table.74struct Builder {75SmallVector<char, 0> &Symtab;76StringTableBuilder &StrtabBuilder;77StringSaver Saver;7879// This ctor initializes a StringSaver using the passed in BumpPtrAllocator.80// The StringTableBuilder does not create a copy of any strings added to it,81// so this provides somewhere to store any strings that we create.82Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder,83BumpPtrAllocator &Alloc)84: Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {}8586DenseMap<const Comdat *, int> ComdatMap;87Mangler Mang;88Triple TT;8990std::vector<storage::Comdat> Comdats;91std::vector<storage::Module> Mods;92std::vector<storage::Symbol> Syms;93std::vector<storage::Uncommon> Uncommons;9495std::string COFFLinkerOpts;96raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts};9798std::vector<storage::Str> DependentLibraries;99100void setStr(storage::Str &S, StringRef Value) {101S.Offset = StrtabBuilder.add(Value);102S.Size = Value.size();103}104105template <typename T>106void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) {107R.Offset = Symtab.size();108R.Size = Objs.size();109Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()),110reinterpret_cast<const char *>(Objs.data() + Objs.size()));111}112113Expected<int> getComdatIndex(const Comdat *C, const Module *M);114115Error addModule(Module *M);116Error addSymbol(const ModuleSymbolTable &Msymtab,117const SmallPtrSet<GlobalValue *, 4> &Used,118ModuleSymbolTable::Symbol Sym);119120Error build(ArrayRef<Module *> Mods);121};122123Error Builder::addModule(Module *M) {124if (M->getDataLayoutStr().empty())125return make_error<StringError>("input module has no datalayout",126inconvertibleErrorCode());127128// Symbols in the llvm.used list will get the FB_Used bit and will not be129// internalized. We do this for llvm.compiler.used as well:130//131// IR symbol table tracks module-level asm symbol references but not inline132// asm. A symbol only referenced by inline asm is not in the IR symbol table,133// so we may not know that the definition (in another translation unit) is134// referenced. That definition may have __attribute__((used)) (which lowers to135// llvm.compiler.used on ELF targets) to communicate to the compiler that it136// may be used by inline asm. The usage is perfectly fine, so we treat137// llvm.compiler.used conservatively as llvm.used to work around our own138// limitation.139SmallVector<GlobalValue *, 4> UsedV;140collectUsedGlobalVariables(*M, UsedV, /*CompilerUsed=*/false);141collectUsedGlobalVariables(*M, UsedV, /*CompilerUsed=*/true);142SmallPtrSet<GlobalValue *, 4> Used(UsedV.begin(), UsedV.end());143144ModuleSymbolTable Msymtab;145Msymtab.addModule(M);146147storage::Module Mod;148Mod.Begin = Syms.size();149Mod.End = Syms.size() + Msymtab.symbols().size();150Mod.UncBegin = Uncommons.size();151Mods.push_back(Mod);152153if (TT.isOSBinFormatCOFF()) {154if (auto E = M->materializeMetadata())155return E;156if (NamedMDNode *LinkerOptions =157M->getNamedMetadata("llvm.linker.options")) {158for (MDNode *MDOptions : LinkerOptions->operands())159for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands())160COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString();161}162}163164if (TT.isOSBinFormatELF()) {165if (auto E = M->materializeMetadata())166return E;167if (NamedMDNode *N = M->getNamedMetadata("llvm.dependent-libraries")) {168for (MDNode *MDOptions : N->operands()) {169const auto OperandStr =170cast<MDString>(cast<MDNode>(MDOptions)->getOperand(0))->getString();171storage::Str Specifier;172setStr(Specifier, OperandStr);173DependentLibraries.emplace_back(Specifier);174}175}176}177178for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())179if (Error Err = addSymbol(Msymtab, Used, Msym))180return Err;181182return Error::success();183}184185Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {186auto P = ComdatMap.insert(std::make_pair(C, Comdats.size()));187if (P.second) {188std::string Name;189if (TT.isOSBinFormatCOFF()) {190const GlobalValue *GV = M->getNamedValue(C->getName());191if (!GV)192return make_error<StringError>("Could not find leader",193inconvertibleErrorCode());194// Internal leaders do not affect symbol resolution, therefore they do not195// appear in the symbol table.196if (GV->hasLocalLinkage()) {197P.first->second = -1;198return -1;199}200llvm::raw_string_ostream OS(Name);201Mang.getNameWithPrefix(OS, GV, false);202} else {203Name = std::string(C->getName());204}205206storage::Comdat Comdat;207setStr(Comdat.Name, Saver.save(Name));208Comdat.SelectionKind = C->getSelectionKind();209Comdats.push_back(Comdat);210}211212return P.first->second;213}214215static DenseSet<StringRef> buildPreservedSymbolsSet(const Triple &TT) {216DenseSet<StringRef> PreservedSymbolSet(std::begin(PreservedSymbols),217std::end(PreservedSymbols));218219RTLIB::RuntimeLibcallsInfo Libcalls(TT);220for (const char *Name : Libcalls.getLibcallNames()) {221if (Name)222PreservedSymbolSet.insert(Name);223}224return PreservedSymbolSet;225}226227Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,228const SmallPtrSet<GlobalValue *, 4> &Used,229ModuleSymbolTable::Symbol Msym) {230Syms.emplace_back();231storage::Symbol &Sym = Syms.back();232Sym = {};233234storage::Uncommon *Unc = nullptr;235auto Uncommon = [&]() -> storage::Uncommon & {236if (Unc)237return *Unc;238Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon;239Uncommons.emplace_back();240Unc = &Uncommons.back();241*Unc = {};242setStr(Unc->COFFWeakExternFallbackName, "");243setStr(Unc->SectionName, "");244return *Unc;245};246247SmallString<64> Name;248{249raw_svector_ostream OS(Name);250Msymtab.printSymbolName(OS, Msym);251}252setStr(Sym.Name, Saver.save(Name.str()));253254auto Flags = Msymtab.getSymbolFlags(Msym);255if (Flags & object::BasicSymbolRef::SF_Undefined)256Sym.Flags |= 1 << storage::Symbol::FB_undefined;257if (Flags & object::BasicSymbolRef::SF_Weak)258Sym.Flags |= 1 << storage::Symbol::FB_weak;259if (Flags & object::BasicSymbolRef::SF_Common)260Sym.Flags |= 1 << storage::Symbol::FB_common;261if (Flags & object::BasicSymbolRef::SF_Indirect)262Sym.Flags |= 1 << storage::Symbol::FB_indirect;263if (Flags & object::BasicSymbolRef::SF_Global)264Sym.Flags |= 1 << storage::Symbol::FB_global;265if (Flags & object::BasicSymbolRef::SF_FormatSpecific)266Sym.Flags |= 1 << storage::Symbol::FB_format_specific;267if (Flags & object::BasicSymbolRef::SF_Executable)268Sym.Flags |= 1 << storage::Symbol::FB_executable;269270Sym.ComdatIndex = -1;271auto *GV = dyn_cast_if_present<GlobalValue *>(Msym);272if (!GV) {273// Undefined module asm symbols act as GC roots and are implicitly used.274if (Flags & object::BasicSymbolRef::SF_Undefined)275Sym.Flags |= 1 << storage::Symbol::FB_used;276setStr(Sym.IRName, "");277return Error::success();278}279280setStr(Sym.IRName, GV->getName());281282static const DenseSet<StringRef> PreservedSymbolsSet =283buildPreservedSymbolsSet(284llvm::Triple(GV->getParent()->getTargetTriple()));285bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName());286287if (Used.count(GV) || IsPreservedSymbol)288Sym.Flags |= 1 << storage::Symbol::FB_used;289if (GV->isThreadLocal())290Sym.Flags |= 1 << storage::Symbol::FB_tls;291if (GV->hasGlobalUnnamedAddr())292Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr;293if (GV->canBeOmittedFromSymbolTable())294Sym.Flags |= 1 << storage::Symbol::FB_may_omit;295Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility;296297if (Flags & object::BasicSymbolRef::SF_Common) {298auto *GVar = dyn_cast<GlobalVariable>(GV);299if (!GVar)300return make_error<StringError>("Only variables can have common linkage!",301inconvertibleErrorCode());302Uncommon().CommonSize =303GV->getDataLayout().getTypeAllocSize(GV->getValueType());304Uncommon().CommonAlign = GVar->getAlign() ? GVar->getAlign()->value() : 0;305}306307const GlobalObject *GO = GV->getAliaseeObject();308if (!GO) {309if (isa<GlobalIFunc>(GV))310GO = cast<GlobalIFunc>(GV)->getResolverFunction();311if (!GO)312return make_error<StringError>("Unable to determine comdat of alias!",313inconvertibleErrorCode());314}315if (const Comdat *C = GO->getComdat()) {316Expected<int> ComdatIndexOrErr = getComdatIndex(C, GV->getParent());317if (!ComdatIndexOrErr)318return ComdatIndexOrErr.takeError();319Sym.ComdatIndex = *ComdatIndexOrErr;320}321322if (TT.isOSBinFormatCOFF()) {323emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang);324325if ((Flags & object::BasicSymbolRef::SF_Weak) &&326(Flags & object::BasicSymbolRef::SF_Indirect)) {327auto *Fallback = dyn_cast<GlobalValue>(328cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts());329if (!Fallback)330return make_error<StringError>("Invalid weak external",331inconvertibleErrorCode());332std::string FallbackName;333raw_string_ostream OS(FallbackName);334Msymtab.printSymbolName(OS, Fallback);335OS.flush();336setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName));337}338}339340if (!GO->getSection().empty())341setStr(Uncommon().SectionName, Saver.save(GO->getSection()));342343return Error::success();344}345346Error Builder::build(ArrayRef<Module *> IRMods) {347storage::Header Hdr;348349assert(!IRMods.empty());350Hdr.Version = storage::Header::kCurrentVersion;351setStr(Hdr.Producer, kExpectedProducerName);352setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple());353setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());354TT = Triple(IRMods[0]->getTargetTriple());355356for (auto *M : IRMods)357if (Error Err = addModule(M))358return Err;359360COFFLinkerOptsOS.flush();361setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts));362363// We are about to fill in the header's range fields, so reserve space for it364// and copy it in afterwards.365Symtab.resize(sizeof(storage::Header));366writeRange(Hdr.Modules, Mods);367writeRange(Hdr.Comdats, Comdats);368writeRange(Hdr.Symbols, Syms);369writeRange(Hdr.Uncommons, Uncommons);370writeRange(Hdr.DependentLibraries, DependentLibraries);371*reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr;372return Error::success();373}374375} // end anonymous namespace376377Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,378StringTableBuilder &StrtabBuilder,379BumpPtrAllocator &Alloc) {380return Builder(Symtab, StrtabBuilder, Alloc).build(Mods);381}382383// Upgrade a vector of bitcode modules created by an old version of LLVM by384// creating an irsymtab for them in the current format.385static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) {386FileContents FC;387388LLVMContext Ctx;389std::vector<Module *> Mods;390std::vector<std::unique_ptr<Module>> OwnedMods;391for (auto BM : BMs) {392Expected<std::unique_ptr<Module>> MOrErr =393BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true,394/*IsImporting*/ false);395if (!MOrErr)396return MOrErr.takeError();397398Mods.push_back(MOrErr->get());399OwnedMods.push_back(std::move(*MOrErr));400}401402StringTableBuilder StrtabBuilder(StringTableBuilder::RAW);403BumpPtrAllocator Alloc;404if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc))405return std::move(E);406407StrtabBuilder.finalizeInOrder();408FC.Strtab.resize(StrtabBuilder.getSize());409StrtabBuilder.write((uint8_t *)FC.Strtab.data());410411FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()},412{FC.Strtab.data(), FC.Strtab.size()}};413return std::move(FC);414}415416Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) {417if (BFC.Mods.empty())418return make_error<StringError>("Bitcode file does not contain any modules",419inconvertibleErrorCode());420421if (!DisableBitcodeVersionUpgrade) {422if (BFC.StrtabForSymtab.empty() ||423BFC.Symtab.size() < sizeof(storage::Header))424return upgrade(BFC.Mods);425426// We cannot use the regular reader to read the version and producer,427// because it will expect the header to be in the current format. The only428// thing we can rely on is that the version and producer will be present as429// the first struct elements.430auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data());431unsigned Version = Hdr->Version;432StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab);433if (Version != storage::Header::kCurrentVersion ||434Producer != kExpectedProducerName)435return upgrade(BFC.Mods);436}437438FileContents FC;439FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()},440{BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}};441442// Finally, make sure that the number of modules in the symbol table matches443// the number of modules in the bitcode file. If they differ, it may mean that444// the bitcode file was created by binary concatenation, so we need to create445// a new symbol table from scratch.446if (FC.TheReader.getNumModules() != BFC.Mods.size())447return upgrade(std::move(BFC.Mods));448449return std::move(FC);450}451452453