Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
35266 views
//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"9#include "llvm/Analysis/BasicAliasAnalysis.h"10#include "llvm/Analysis/ModuleSummaryAnalysis.h"11#include "llvm/Analysis/ProfileSummaryInfo.h"12#include "llvm/Analysis/TypeMetadataUtils.h"13#include "llvm/Bitcode/BitcodeWriter.h"14#include "llvm/IR/Constants.h"15#include "llvm/IR/DebugInfo.h"16#include "llvm/IR/Instructions.h"17#include "llvm/IR/Intrinsics.h"18#include "llvm/IR/Module.h"19#include "llvm/IR/PassManager.h"20#include "llvm/Object/ModuleSymbolTable.h"21#include "llvm/Support/raw_ostream.h"22#include "llvm/Transforms/IPO.h"23#include "llvm/Transforms/IPO/FunctionAttrs.h"24#include "llvm/Transforms/IPO/FunctionImport.h"25#include "llvm/Transforms/IPO/LowerTypeTests.h"26#include "llvm/Transforms/Utils/Cloning.h"27#include "llvm/Transforms/Utils/ModuleUtils.h"28using namespace llvm;2930namespace {3132// Determine if a promotion alias should be created for a symbol name.33static bool allowPromotionAlias(const std::string &Name) {34// Promotion aliases are used only in inline assembly. It's safe to35// simply skip unusual names. Subset of MCAsmInfo::isAcceptableChar()36// and MCAsmInfoXCOFF::isAcceptableChar().37for (const char &C : Name) {38if (isAlnum(C) || C == '_' || C == '.')39continue;40return false;41}42return true;43}4445// Promote each local-linkage entity defined by ExportM and used by ImportM by46// changing visibility and appending the given ModuleId.47void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,48SetVector<GlobalValue *> &PromoteExtra) {49DenseMap<const Comdat *, Comdat *> RenamedComdats;50for (auto &ExportGV : ExportM.global_values()) {51if (!ExportGV.hasLocalLinkage())52continue;5354auto Name = ExportGV.getName();55GlobalValue *ImportGV = nullptr;56if (!PromoteExtra.count(&ExportGV)) {57ImportGV = ImportM.getNamedValue(Name);58if (!ImportGV)59continue;60ImportGV->removeDeadConstantUsers();61if (ImportGV->use_empty()) {62ImportGV->eraseFromParent();63continue;64}65}6667std::string OldName = Name.str();68std::string NewName = (Name + ModuleId).str();6970if (const auto *C = ExportGV.getComdat())71if (C->getName() == Name)72RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));7374ExportGV.setName(NewName);75ExportGV.setLinkage(GlobalValue::ExternalLinkage);76ExportGV.setVisibility(GlobalValue::HiddenVisibility);7778if (ImportGV) {79ImportGV->setName(NewName);80ImportGV->setVisibility(GlobalValue::HiddenVisibility);81}8283if (isa<Function>(&ExportGV) && allowPromotionAlias(OldName)) {84// Create a local alias with the original name to avoid breaking85// references from inline assembly.86std::string Alias =87".lto_set_conditional " + OldName + "," + NewName + "\n";88ExportM.appendModuleInlineAsm(Alias);89}90}9192if (!RenamedComdats.empty())93for (auto &GO : ExportM.global_objects())94if (auto *C = GO.getComdat()) {95auto Replacement = RenamedComdats.find(C);96if (Replacement != RenamedComdats.end())97GO.setComdat(Replacement->second);98}99}100101// Promote all internal (i.e. distinct) type ids used by the module by replacing102// them with external type ids formed using the module id.103//104// Note that this needs to be done before we clone the module because each clone105// will receive its own set of distinct metadata nodes.106void promoteTypeIds(Module &M, StringRef ModuleId) {107DenseMap<Metadata *, Metadata *> LocalToGlobal;108auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {109Metadata *MD =110cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();111112if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {113Metadata *&GlobalMD = LocalToGlobal[MD];114if (!GlobalMD) {115std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();116GlobalMD = MDString::get(M.getContext(), NewName);117}118119CI->setArgOperand(ArgNo,120MetadataAsValue::get(M.getContext(), GlobalMD));121}122};123124if (Function *TypeTestFunc =125M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {126for (const Use &U : TypeTestFunc->uses()) {127auto CI = cast<CallInst>(U.getUser());128ExternalizeTypeId(CI, 1);129}130}131132if (Function *PublicTypeTestFunc =133M.getFunction(Intrinsic::getName(Intrinsic::public_type_test))) {134for (const Use &U : PublicTypeTestFunc->uses()) {135auto CI = cast<CallInst>(U.getUser());136ExternalizeTypeId(CI, 1);137}138}139140if (Function *TypeCheckedLoadFunc =141M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {142for (const Use &U : TypeCheckedLoadFunc->uses()) {143auto CI = cast<CallInst>(U.getUser());144ExternalizeTypeId(CI, 2);145}146}147148if (Function *TypeCheckedLoadRelativeFunc = M.getFunction(149Intrinsic::getName(Intrinsic::type_checked_load_relative))) {150for (const Use &U : TypeCheckedLoadRelativeFunc->uses()) {151auto CI = cast<CallInst>(U.getUser());152ExternalizeTypeId(CI, 2);153}154}155156for (GlobalObject &GO : M.global_objects()) {157SmallVector<MDNode *, 1> MDs;158GO.getMetadata(LLVMContext::MD_type, MDs);159160GO.eraseMetadata(LLVMContext::MD_type);161for (auto *MD : MDs) {162auto I = LocalToGlobal.find(MD->getOperand(1));163if (I == LocalToGlobal.end()) {164GO.addMetadata(LLVMContext::MD_type, *MD);165continue;166}167GO.addMetadata(168LLVMContext::MD_type,169*MDNode::get(M.getContext(), {MD->getOperand(0), I->second}));170}171}172}173174// Drop unused globals, and drop type information from function declarations.175// FIXME: If we made functions typeless then there would be no need to do this.176void simplifyExternals(Module &M) {177FunctionType *EmptyFT =178FunctionType::get(Type::getVoidTy(M.getContext()), false);179180for (Function &F : llvm::make_early_inc_range(M)) {181if (F.isDeclaration() && F.use_empty()) {182F.eraseFromParent();183continue;184}185186if (!F.isDeclaration() || F.getFunctionType() == EmptyFT ||187// Changing the type of an intrinsic may invalidate the IR.188F.getName().starts_with("llvm."))189continue;190191Function *NewF =192Function::Create(EmptyFT, GlobalValue::ExternalLinkage,193F.getAddressSpace(), "", &M);194NewF->copyAttributesFrom(&F);195// Only copy function attribtues.196NewF->setAttributes(AttributeList::get(M.getContext(),197AttributeList::FunctionIndex,198F.getAttributes().getFnAttrs()));199NewF->takeName(&F);200F.replaceAllUsesWith(NewF);201F.eraseFromParent();202}203204for (GlobalIFunc &I : llvm::make_early_inc_range(M.ifuncs())) {205if (I.use_empty())206I.eraseFromParent();207else208assert(I.getResolverFunction() && "ifunc misses its resolver function");209}210211for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {212if (GV.isDeclaration() && GV.use_empty()) {213GV.eraseFromParent();214continue;215}216}217}218219static void220filterModule(Module *M,221function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {222std::vector<GlobalValue *> V;223for (GlobalValue &GV : M->global_values())224if (!ShouldKeepDefinition(&GV))225V.push_back(&GV);226227for (GlobalValue *GV : V)228if (!convertToDeclaration(*GV))229GV->eraseFromParent();230}231232void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {233if (auto *F = dyn_cast<Function>(C))234return Fn(F);235if (isa<GlobalValue>(C))236return;237for (Value *Op : C->operands())238forEachVirtualFunction(cast<Constant>(Op), Fn);239}240241// Clone any @llvm[.compiler].used over to the new module and append242// values whose defs were cloned into that module.243static void cloneUsedGlobalVariables(const Module &SrcM, Module &DestM,244bool CompilerUsed) {245SmallVector<GlobalValue *, 4> Used, NewUsed;246// First collect those in the llvm[.compiler].used set.247collectUsedGlobalVariables(SrcM, Used, CompilerUsed);248// Next build a set of the equivalent values defined in DestM.249for (auto *V : Used) {250auto *GV = DestM.getNamedValue(V->getName());251if (GV && !GV->isDeclaration())252NewUsed.push_back(GV);253}254// Finally, add them to a llvm[.compiler].used variable in DestM.255if (CompilerUsed)256appendToCompilerUsed(DestM, NewUsed);257else258appendToUsed(DestM, NewUsed);259}260261#ifndef NDEBUG262static bool enableUnifiedLTO(Module &M) {263bool UnifiedLTO = false;264if (auto *MD =265mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("UnifiedLTO")))266UnifiedLTO = MD->getZExtValue();267return UnifiedLTO;268}269#endif270271// If it's possible to split M into regular and thin LTO parts, do so and write272// a multi-module bitcode file with the two parts to OS. Otherwise, write only a273// regular LTO bitcode file to OS.274void splitAndWriteThinLTOBitcode(275raw_ostream &OS, raw_ostream *ThinLinkOS,276function_ref<AAResults &(Function &)> AARGetter, Module &M) {277std::string ModuleId = getUniqueModuleId(&M);278if (ModuleId.empty()) {279assert(!enableUnifiedLTO(M));280// We couldn't generate a module ID for this module, write it out as a281// regular LTO module with an index for summary-based dead stripping.282ProfileSummaryInfo PSI(M);283M.addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));284ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);285WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index,286/*UnifiedLTO=*/false);287288if (ThinLinkOS)289// We don't have a ThinLTO part, but still write the module to the290// ThinLinkOS if requested so that the expected output file is produced.291WriteBitcodeToFile(M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,292&Index, /*UnifiedLTO=*/false);293294return;295}296297promoteTypeIds(M, ModuleId);298299// Returns whether a global or its associated global has attached type300// metadata. The former may participate in CFI or whole-program301// devirtualization, so they need to appear in the merged module instead of302// the thin LTO module. Similarly, globals that are associated with globals303// with type metadata need to appear in the merged module because they will304// reference the global's section directly.305auto HasTypeMetadata = [](const GlobalObject *GO) {306if (MDNode *MD = GO->getMetadata(LLVMContext::MD_associated))307if (auto *AssocVM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(0)))308if (auto *AssocGO = dyn_cast<GlobalObject>(AssocVM->getValue()))309if (AssocGO->hasMetadata(LLVMContext::MD_type))310return true;311return GO->hasMetadata(LLVMContext::MD_type);312};313314// Collect the set of virtual functions that are eligible for virtual constant315// propagation. Each eligible function must not access memory, must return316// an integer of width <=64 bits, must take at least one argument, must not317// use its first argument (assumed to be "this") and all arguments other than318// the first one must be of <=64 bit integer type.319//320// Note that we test whether this copy of the function is readnone, rather321// than testing function attributes, which must hold for any copy of the322// function, even a less optimized version substituted at link time. This is323// sound because the virtual constant propagation optimizations effectively324// inline all implementations of the virtual function into each call site,325// rather than using function attributes to perform local optimization.326DenseSet<const Function *> EligibleVirtualFns;327// If any member of a comdat lives in MergedM, put all members of that328// comdat in MergedM to keep the comdat together.329DenseSet<const Comdat *> MergedMComdats;330for (GlobalVariable &GV : M.globals())331if (!GV.isDeclaration() && HasTypeMetadata(&GV)) {332if (const auto *C = GV.getComdat())333MergedMComdats.insert(C);334forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {335auto *RT = dyn_cast<IntegerType>(F->getReturnType());336if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||337!F->arg_begin()->use_empty())338return;339for (auto &Arg : drop_begin(F->args())) {340auto *ArgT = dyn_cast<IntegerType>(Arg.getType());341if (!ArgT || ArgT->getBitWidth() > 64)342return;343}344if (!F->isDeclaration() &&345computeFunctionBodyMemoryAccess(*F, AARGetter(*F))346.doesNotAccessMemory())347EligibleVirtualFns.insert(F);348});349}350351ValueToValueMapTy VMap;352std::unique_ptr<Module> MergedM(353CloneModule(M, VMap, [&](const GlobalValue *GV) -> bool {354if (const auto *C = GV->getComdat())355if (MergedMComdats.count(C))356return true;357if (auto *F = dyn_cast<Function>(GV))358return EligibleVirtualFns.count(F);359if (auto *GVar =360dyn_cast_or_null<GlobalVariable>(GV->getAliaseeObject()))361return HasTypeMetadata(GVar);362return false;363}));364StripDebugInfo(*MergedM);365MergedM->setModuleInlineAsm("");366367// Clone any llvm.*used globals to ensure the included values are368// not deleted.369cloneUsedGlobalVariables(M, *MergedM, /*CompilerUsed*/ false);370cloneUsedGlobalVariables(M, *MergedM, /*CompilerUsed*/ true);371372for (Function &F : *MergedM)373if (!F.isDeclaration()) {374// Reset the linkage of all functions eligible for virtual constant375// propagation. The canonical definitions live in the thin LTO module so376// that they can be imported.377F.setLinkage(GlobalValue::AvailableExternallyLinkage);378F.setComdat(nullptr);379}380381SetVector<GlobalValue *> CfiFunctions;382for (auto &F : M)383if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F))384CfiFunctions.insert(&F);385386// Remove all globals with type metadata, globals with comdats that live in387// MergedM, and aliases pointing to such globals from the thin LTO module.388filterModule(&M, [&](const GlobalValue *GV) {389if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getAliaseeObject()))390if (HasTypeMetadata(GVar))391return false;392if (const auto *C = GV->getComdat())393if (MergedMComdats.count(C))394return false;395return true;396});397398promoteInternals(*MergedM, M, ModuleId, CfiFunctions);399promoteInternals(M, *MergedM, ModuleId, CfiFunctions);400401auto &Ctx = MergedM->getContext();402SmallVector<MDNode *, 8> CfiFunctionMDs;403for (auto *V : CfiFunctions) {404Function &F = *cast<Function>(V);405SmallVector<MDNode *, 2> Types;406F.getMetadata(LLVMContext::MD_type, Types);407408SmallVector<Metadata *, 4> Elts;409Elts.push_back(MDString::get(Ctx, F.getName()));410CfiFunctionLinkage Linkage;411if (lowertypetests::isJumpTableCanonical(&F))412Linkage = CFL_Definition;413else if (F.hasExternalWeakLinkage())414Linkage = CFL_WeakDeclaration;415else416Linkage = CFL_Declaration;417Elts.push_back(ConstantAsMetadata::get(418llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage)));419append_range(Elts, Types);420CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts));421}422423if(!CfiFunctionMDs.empty()) {424NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions");425for (auto *MD : CfiFunctionMDs)426NMD->addOperand(MD);427}428429SmallVector<MDNode *, 8> FunctionAliases;430for (auto &A : M.aliases()) {431if (!isa<Function>(A.getAliasee()))432continue;433434auto *F = cast<Function>(A.getAliasee());435436Metadata *Elts[] = {437MDString::get(Ctx, A.getName()),438MDString::get(Ctx, F->getName()),439ConstantAsMetadata::get(440ConstantInt::get(Type::getInt8Ty(Ctx), A.getVisibility())),441ConstantAsMetadata::get(442ConstantInt::get(Type::getInt8Ty(Ctx), A.isWeakForLinker())),443};444445FunctionAliases.push_back(MDTuple::get(Ctx, Elts));446}447448if (!FunctionAliases.empty()) {449NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("aliases");450for (auto *MD : FunctionAliases)451NMD->addOperand(MD);452}453454SmallVector<MDNode *, 8> Symvers;455ModuleSymbolTable::CollectAsmSymvers(M, [&](StringRef Name, StringRef Alias) {456Function *F = M.getFunction(Name);457if (!F || F->use_empty())458return;459460Symvers.push_back(MDTuple::get(461Ctx, {MDString::get(Ctx, Name), MDString::get(Ctx, Alias)}));462});463464if (!Symvers.empty()) {465NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("symvers");466for (auto *MD : Symvers)467NMD->addOperand(MD);468}469470simplifyExternals(*MergedM);471472// FIXME: Try to re-use BSI and PFI from the original module here.473ProfileSummaryInfo PSI(M);474ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);475476// Mark the merged module as requiring full LTO. We still want an index for477// it though, so that it can participate in summary-based dead stripping.478MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));479ModuleSummaryIndex MergedMIndex =480buildModuleSummaryIndex(*MergedM, nullptr, &PSI);481482SmallVector<char, 0> Buffer;483484BitcodeWriter W(Buffer);485// Save the module hash produced for the full bitcode, which will486// be used in the backends, and use that in the minimized bitcode487// produced for the full link.488ModuleHash ModHash = {{0}};489W.writeModule(M, /*ShouldPreserveUseListOrder=*/false, &Index,490/*GenerateHash=*/true, &ModHash);491W.writeModule(*MergedM, /*ShouldPreserveUseListOrder=*/false, &MergedMIndex);492W.writeSymtab();493W.writeStrtab();494OS << Buffer;495496// If a minimized bitcode module was requested for the thin link, only497// the information that is needed by thin link will be written in the498// given OS (the merged module will be written as usual).499if (ThinLinkOS) {500Buffer.clear();501BitcodeWriter W2(Buffer);502StripDebugInfo(M);503W2.writeThinLinkBitcode(M, Index, ModHash);504W2.writeModule(*MergedM, /*ShouldPreserveUseListOrder=*/false,505&MergedMIndex);506W2.writeSymtab();507W2.writeStrtab();508*ThinLinkOS << Buffer;509}510}511512// Check if the LTO Unit splitting has been enabled.513bool enableSplitLTOUnit(Module &M) {514bool EnableSplitLTOUnit = false;515if (auto *MD = mdconst::extract_or_null<ConstantInt>(516M.getModuleFlag("EnableSplitLTOUnit")))517EnableSplitLTOUnit = MD->getZExtValue();518return EnableSplitLTOUnit;519}520521// Returns whether this module needs to be split because it uses type metadata.522bool hasTypeMetadata(Module &M) {523for (auto &GO : M.global_objects()) {524if (GO.hasMetadata(LLVMContext::MD_type))525return true;526}527return false;528}529530bool writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,531function_ref<AAResults &(Function &)> AARGetter,532Module &M, const ModuleSummaryIndex *Index) {533std::unique_ptr<ModuleSummaryIndex> NewIndex = nullptr;534// See if this module has any type metadata. If so, we try to split it535// or at least promote type ids to enable WPD.536if (hasTypeMetadata(M)) {537if (enableSplitLTOUnit(M)) {538splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);539return true;540}541// Promote type ids as needed for index-based WPD.542std::string ModuleId = getUniqueModuleId(&M);543if (!ModuleId.empty()) {544promoteTypeIds(M, ModuleId);545// Need to rebuild the index so that it contains type metadata546// for the newly promoted type ids.547// FIXME: Probably should not bother building the index at all548// in the caller of writeThinLTOBitcode (which does so via the549// ModuleSummaryIndexAnalysis pass), since we have to rebuild it550// anyway whenever there is type metadata (here or in551// splitAndWriteThinLTOBitcode). Just always build it once via the552// buildModuleSummaryIndex when Module(s) are ready.553ProfileSummaryInfo PSI(M);554NewIndex = std::make_unique<ModuleSummaryIndex>(555buildModuleSummaryIndex(M, nullptr, &PSI));556Index = NewIndex.get();557}558}559560// Write it out as an unsplit ThinLTO module.561562// Save the module hash produced for the full bitcode, which will563// be used in the backends, and use that in the minimized bitcode564// produced for the full link.565ModuleHash ModHash = {{0}};566WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, Index,567/*GenerateHash=*/true, &ModHash);568// If a minimized bitcode module was requested for the thin link, only569// the information that is needed by thin link will be written in the570// given OS.571if (ThinLinkOS && Index)572writeThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);573return false;574}575576} // anonymous namespace577extern bool WriteNewDbgInfoFormatToBitcode;578PreservedAnalyses579llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {580FunctionAnalysisManager &FAM =581AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();582583ScopedDbgInfoFormatSetter FormatSetter(M, M.IsNewDbgInfoFormat &&584WriteNewDbgInfoFormatToBitcode);585if (M.IsNewDbgInfoFormat)586M.removeDebugIntrinsicDeclarations();587588bool Changed = writeThinLTOBitcode(589OS, ThinLinkOS,590[&FAM](Function &F) -> AAResults & {591return FAM.getResult<AAManager>(F);592},593M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));594595return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();596}597598599