Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
35266 views
//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass implements whole program optimization of virtual calls in cases9// where we know (via !type metadata) that the list of callees is fixed. This10// includes the following:11// - Single implementation devirtualization: if a virtual call has a single12// possible callee, replace all calls with a direct call to that callee.13// - Virtual constant propagation: if the virtual function's return type is an14// integer <=64 bits and all possible callees are readnone, for each class and15// each list of constant arguments: evaluate the function, store the return16// value alongside the virtual table, and rewrite each virtual call as a load17// from the virtual table.18// - Uniform return value optimization: if the conditions for virtual constant19// propagation hold and each function returns the same constant value, replace20// each virtual call with that constant.21// - Unique return value optimization for i1 return values: if the conditions22// for virtual constant propagation hold and a single vtable's function23// returns 0, or a single vtable's function returns 1, replace each virtual24// call with a comparison of the vptr against that vtable's address.25//26// This pass is intended to be used during the regular and thin LTO pipelines:27//28// During regular LTO, the pass determines the best optimization for each29// virtual call and applies the resolutions directly to virtual calls that are30// eligible for virtual call optimization (i.e. calls that use either of the31// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).32//33// During hybrid Regular/ThinLTO, the pass operates in two phases:34// - Export phase: this is run during the thin link over a single merged module35// that contains all vtables with !type metadata that participate in the link.36// The pass computes a resolution for each virtual call and stores it in the37// type identifier summary.38// - Import phase: this is run during the thin backends over the individual39// modules. The pass applies the resolutions previously computed during the40// import phase to each eligible virtual call.41//42// During ThinLTO, the pass operates in two phases:43// - Export phase: this is run during the thin link over the index which44// contains a summary of all vtables with !type metadata that participate in45// the link. It computes a resolution for each virtual call and stores it in46// the type identifier summary. Only single implementation devirtualization47// is supported.48// - Import phase: (same as with hybrid case above).49//50//===----------------------------------------------------------------------===//5152#include "llvm/Transforms/IPO/WholeProgramDevirt.h"53#include "llvm/ADT/ArrayRef.h"54#include "llvm/ADT/DenseMap.h"55#include "llvm/ADT/DenseMapInfo.h"56#include "llvm/ADT/DenseSet.h"57#include "llvm/ADT/MapVector.h"58#include "llvm/ADT/SmallVector.h"59#include "llvm/ADT/Statistic.h"60#include "llvm/Analysis/AssumptionCache.h"61#include "llvm/Analysis/BasicAliasAnalysis.h"62#include "llvm/Analysis/OptimizationRemarkEmitter.h"63#include "llvm/Analysis/TypeMetadataUtils.h"64#include "llvm/Bitcode/BitcodeReader.h"65#include "llvm/Bitcode/BitcodeWriter.h"66#include "llvm/IR/Constants.h"67#include "llvm/IR/DataLayout.h"68#include "llvm/IR/DebugLoc.h"69#include "llvm/IR/DerivedTypes.h"70#include "llvm/IR/Dominators.h"71#include "llvm/IR/Function.h"72#include "llvm/IR/GlobalAlias.h"73#include "llvm/IR/GlobalVariable.h"74#include "llvm/IR/IRBuilder.h"75#include "llvm/IR/InstrTypes.h"76#include "llvm/IR/Instruction.h"77#include "llvm/IR/Instructions.h"78#include "llvm/IR/Intrinsics.h"79#include "llvm/IR/LLVMContext.h"80#include "llvm/IR/MDBuilder.h"81#include "llvm/IR/Metadata.h"82#include "llvm/IR/Module.h"83#include "llvm/IR/ModuleSummaryIndexYAML.h"84#include "llvm/Support/Casting.h"85#include "llvm/Support/CommandLine.h"86#include "llvm/Support/Errc.h"87#include "llvm/Support/Error.h"88#include "llvm/Support/FileSystem.h"89#include "llvm/Support/GlobPattern.h"90#include "llvm/Support/MathExtras.h"91#include "llvm/TargetParser/Triple.h"92#include "llvm/Transforms/IPO.h"93#include "llvm/Transforms/IPO/FunctionAttrs.h"94#include "llvm/Transforms/Utils/BasicBlockUtils.h"95#include "llvm/Transforms/Utils/CallPromotionUtils.h"96#include "llvm/Transforms/Utils/Evaluator.h"97#include <algorithm>98#include <cstddef>99#include <map>100#include <set>101#include <string>102103using namespace llvm;104using namespace wholeprogramdevirt;105106#define DEBUG_TYPE "wholeprogramdevirt"107108STATISTIC(NumDevirtTargets, "Number of whole program devirtualization targets");109STATISTIC(NumSingleImpl, "Number of single implementation devirtualizations");110STATISTIC(NumBranchFunnel, "Number of branch funnels");111STATISTIC(NumUniformRetVal, "Number of uniform return value optimizations");112STATISTIC(NumUniqueRetVal, "Number of unique return value optimizations");113STATISTIC(NumVirtConstProp1Bit,114"Number of 1 bit virtual constant propagations");115STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");116117static cl::opt<PassSummaryAction> ClSummaryAction(118"wholeprogramdevirt-summary-action",119cl::desc("What to do with the summary when running this pass"),120cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),121clEnumValN(PassSummaryAction::Import, "import",122"Import typeid resolutions from summary and globals"),123clEnumValN(PassSummaryAction::Export, "export",124"Export typeid resolutions to summary and globals")),125cl::Hidden);126127static cl::opt<std::string> ClReadSummary(128"wholeprogramdevirt-read-summary",129cl::desc(130"Read summary from given bitcode or YAML file before running pass"),131cl::Hidden);132133static cl::opt<std::string> ClWriteSummary(134"wholeprogramdevirt-write-summary",135cl::desc("Write summary to given bitcode or YAML file after running pass. "136"Output file format is deduced from extension: *.bc means writing "137"bitcode, otherwise YAML"),138cl::Hidden);139140static cl::opt<unsigned>141ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden,142cl::init(10),143cl::desc("Maximum number of call targets per "144"call site to enable branch funnels"));145146static cl::opt<bool>147PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,148cl::desc("Print index-based devirtualization messages"));149150/// Provide a way to force enable whole program visibility in tests.151/// This is needed to support legacy tests that don't contain152/// !vcall_visibility metadata (the mere presense of type tests153/// previously implied hidden visibility).154static cl::opt<bool>155WholeProgramVisibility("whole-program-visibility", cl::Hidden,156cl::desc("Enable whole program visibility"));157158/// Provide a way to force disable whole program for debugging or workarounds,159/// when enabled via the linker.160static cl::opt<bool> DisableWholeProgramVisibility(161"disable-whole-program-visibility", cl::Hidden,162cl::desc("Disable whole program visibility (overrides enabling options)"));163164/// Provide way to prevent certain function from being devirtualized165static cl::list<std::string>166SkipFunctionNames("wholeprogramdevirt-skip",167cl::desc("Prevent function(s) from being devirtualized"),168cl::Hidden, cl::CommaSeparated);169170/// Mechanism to add runtime checking of devirtualization decisions, optionally171/// trapping or falling back to indirect call on any that are not correct.172/// Trapping mode is useful for debugging undefined behavior leading to failures173/// with WPD. Fallback mode is useful for ensuring safety when whole program174/// visibility may be compromised.175enum WPDCheckMode { None, Trap, Fallback };176static cl::opt<WPDCheckMode> DevirtCheckMode(177"wholeprogramdevirt-check", cl::Hidden,178cl::desc("Type of checking for incorrect devirtualizations"),179cl::values(clEnumValN(WPDCheckMode::None, "none", "No checking"),180clEnumValN(WPDCheckMode::Trap, "trap", "Trap when incorrect"),181clEnumValN(WPDCheckMode::Fallback, "fallback",182"Fallback to indirect when incorrect")));183184namespace {185struct PatternList {186std::vector<GlobPattern> Patterns;187template <class T> void init(const T &StringList) {188for (const auto &S : StringList)189if (Expected<GlobPattern> Pat = GlobPattern::create(S))190Patterns.push_back(std::move(*Pat));191}192bool match(StringRef S) {193for (const GlobPattern &P : Patterns)194if (P.match(S))195return true;196return false;197}198};199} // namespace200201// Find the minimum offset that we may store a value of size Size bits at. If202// IsAfter is set, look for an offset before the object, otherwise look for an203// offset after the object.204uint64_t205wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,206bool IsAfter, uint64_t Size) {207// Find a minimum offset taking into account only vtable sizes.208uint64_t MinByte = 0;209for (const VirtualCallTarget &Target : Targets) {210if (IsAfter)211MinByte = std::max(MinByte, Target.minAfterBytes());212else213MinByte = std::max(MinByte, Target.minBeforeBytes());214}215216// Build a vector of arrays of bytes covering, for each target, a slice of the217// used region (see AccumBitVector::BytesUsed in218// llvm/Transforms/IPO/WholeProgramDevirt.h) starting at MinByte. Effectively,219// this aligns the used regions to start at MinByte.220//221// In this example, A, B and C are vtables, # is a byte already allocated for222// a virtual function pointer, AAAA... (etc.) are the used regions for the223// vtables and Offset(X) is the value computed for the Offset variable below224// for X.225//226// Offset(A)227// | |228// |MinByte229// A: ################AAAAAAAA|AAAAAAAA230// B: ########BBBBBBBBBBBBBBBB|BBBB231// C: ########################|CCCCCCCCCCCCCCCC232// | Offset(B) |233//234// This code produces the slices of A, B and C that appear after the divider235// at MinByte.236std::vector<ArrayRef<uint8_t>> Used;237for (const VirtualCallTarget &Target : Targets) {238ArrayRef<uint8_t> VTUsed = IsAfter ? Target.TM->Bits->After.BytesUsed239: Target.TM->Bits->Before.BytesUsed;240uint64_t Offset = IsAfter ? MinByte - Target.minAfterBytes()241: MinByte - Target.minBeforeBytes();242243// Disregard used regions that are smaller than Offset. These are244// effectively all-free regions that do not need to be checked.245if (VTUsed.size() > Offset)246Used.push_back(VTUsed.slice(Offset));247}248249if (Size == 1) {250// Find a free bit in each member of Used.251for (unsigned I = 0;; ++I) {252uint8_t BitsUsed = 0;253for (auto &&B : Used)254if (I < B.size())255BitsUsed |= B[I];256if (BitsUsed != 0xff)257return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));258}259} else {260// Find a free (Size/8) byte region in each member of Used.261// FIXME: see if alignment helps.262for (unsigned I = 0;; ++I) {263for (auto &&B : Used) {264unsigned Byte = 0;265while ((I + Byte) < B.size() && Byte < (Size / 8)) {266if (B[I + Byte])267goto NextI;268++Byte;269}270}271return (MinByte + I) * 8;272NextI:;273}274}275}276277void wholeprogramdevirt::setBeforeReturnValues(278MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocBefore,279unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {280if (BitWidth == 1)281OffsetByte = -(AllocBefore / 8 + 1);282else283OffsetByte = -((AllocBefore + 7) / 8 + (BitWidth + 7) / 8);284OffsetBit = AllocBefore % 8;285286for (VirtualCallTarget &Target : Targets) {287if (BitWidth == 1)288Target.setBeforeBit(AllocBefore);289else290Target.setBeforeBytes(AllocBefore, (BitWidth + 7) / 8);291}292}293294void wholeprogramdevirt::setAfterReturnValues(295MutableArrayRef<VirtualCallTarget> Targets, uint64_t AllocAfter,296unsigned BitWidth, int64_t &OffsetByte, uint64_t &OffsetBit) {297if (BitWidth == 1)298OffsetByte = AllocAfter / 8;299else300OffsetByte = (AllocAfter + 7) / 8;301OffsetBit = AllocAfter % 8;302303for (VirtualCallTarget &Target : Targets) {304if (BitWidth == 1)305Target.setAfterBit(AllocAfter);306else307Target.setAfterBytes(AllocAfter, (BitWidth + 7) / 8);308}309}310311VirtualCallTarget::VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)312: Fn(Fn), TM(TM),313IsBigEndian(Fn->getDataLayout().isBigEndian()),314WasDevirt(false) {}315316namespace {317318// A slot in a set of virtual tables. The TypeID identifies the set of virtual319// tables, and the ByteOffset is the offset in bytes from the address point to320// the virtual function pointer.321struct VTableSlot {322Metadata *TypeID;323uint64_t ByteOffset;324};325326} // end anonymous namespace327328namespace llvm {329330template <> struct DenseMapInfo<VTableSlot> {331static VTableSlot getEmptyKey() {332return {DenseMapInfo<Metadata *>::getEmptyKey(),333DenseMapInfo<uint64_t>::getEmptyKey()};334}335static VTableSlot getTombstoneKey() {336return {DenseMapInfo<Metadata *>::getTombstoneKey(),337DenseMapInfo<uint64_t>::getTombstoneKey()};338}339static unsigned getHashValue(const VTableSlot &I) {340return DenseMapInfo<Metadata *>::getHashValue(I.TypeID) ^341DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);342}343static bool isEqual(const VTableSlot &LHS,344const VTableSlot &RHS) {345return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;346}347};348349template <> struct DenseMapInfo<VTableSlotSummary> {350static VTableSlotSummary getEmptyKey() {351return {DenseMapInfo<StringRef>::getEmptyKey(),352DenseMapInfo<uint64_t>::getEmptyKey()};353}354static VTableSlotSummary getTombstoneKey() {355return {DenseMapInfo<StringRef>::getTombstoneKey(),356DenseMapInfo<uint64_t>::getTombstoneKey()};357}358static unsigned getHashValue(const VTableSlotSummary &I) {359return DenseMapInfo<StringRef>::getHashValue(I.TypeID) ^360DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);361}362static bool isEqual(const VTableSlotSummary &LHS,363const VTableSlotSummary &RHS) {364return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;365}366};367368} // end namespace llvm369370// Returns true if the function must be unreachable based on ValueInfo.371//372// In particular, identifies a function as unreachable in the following373// conditions374// 1) All summaries are live.375// 2) All function summaries indicate it's unreachable376// 3) There is no non-function with the same GUID (which is rare)377static bool mustBeUnreachableFunction(ValueInfo TheFnVI) {378if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {379// Returns false if ValueInfo is absent, or the summary list is empty380// (e.g., function declarations).381return false;382}383384for (const auto &Summary : TheFnVI.getSummaryList()) {385// Conservatively returns false if any non-live functions are seen.386// In general either all summaries should be live or all should be dead.387if (!Summary->isLive())388return false;389if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {390if (!FS->fflags().MustBeUnreachable)391return false;392}393// Be conservative if a non-function has the same GUID (which is rare).394else395return false;396}397// All function summaries are live and all of them agree that the function is398// unreachble.399return true;400}401402namespace {403// A virtual call site. VTable is the loaded virtual table pointer, and CS is404// the indirect virtual call.405struct VirtualCallSite {406Value *VTable = nullptr;407CallBase &CB;408409// If non-null, this field points to the associated unsafe use count stored in410// the DevirtModule::NumUnsafeUsesForTypeTest map below. See the description411// of that field for details.412unsigned *NumUnsafeUses = nullptr;413414void415emitRemark(const StringRef OptName, const StringRef TargetName,416function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {417Function *F = CB.getCaller();418DebugLoc DLoc = CB.getDebugLoc();419BasicBlock *Block = CB.getParent();420421using namespace ore;422OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block)423<< NV("Optimization", OptName)424<< ": devirtualized a call to "425<< NV("FunctionName", TargetName));426}427428void replaceAndErase(429const StringRef OptName, const StringRef TargetName, bool RemarksEnabled,430function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,431Value *New) {432if (RemarksEnabled)433emitRemark(OptName, TargetName, OREGetter);434CB.replaceAllUsesWith(New);435if (auto *II = dyn_cast<InvokeInst>(&CB)) {436BranchInst::Create(II->getNormalDest(), CB.getIterator());437II->getUnwindDest()->removePredecessor(II->getParent());438}439CB.eraseFromParent();440// This use is no longer unsafe.441if (NumUnsafeUses)442--*NumUnsafeUses;443}444};445446// Call site information collected for a specific VTableSlot and possibly a list447// of constant integer arguments. The grouping by arguments is handled by the448// VTableSlotInfo class.449struct CallSiteInfo {450/// The set of call sites for this slot. Used during regular LTO and the451/// import phase of ThinLTO (as well as the export phase of ThinLTO for any452/// call sites that appear in the merged module itself); in each of these453/// cases we are directly operating on the call sites at the IR level.454std::vector<VirtualCallSite> CallSites;455456/// Whether all call sites represented by this CallSiteInfo, including those457/// in summaries, have been devirtualized. This starts off as true because a458/// default constructed CallSiteInfo represents no call sites.459bool AllCallSitesDevirted = true;460461// These fields are used during the export phase of ThinLTO and reflect462// information collected from function summaries.463464/// Whether any function summary contains an llvm.assume(llvm.type.test) for465/// this slot.466bool SummaryHasTypeTestAssumeUsers = false;467468/// CFI-specific: a vector containing the list of function summaries that use469/// the llvm.type.checked.load intrinsic and therefore will require470/// resolutions for llvm.type.test in order to implement CFI checks if471/// devirtualization was unsuccessful. If devirtualization was successful, the472/// pass will clear this vector by calling markDevirt(). If at the end of the473/// pass the vector is non-empty, we will need to add a use of llvm.type.test474/// to each of the function summaries in the vector.475std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;476std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;477478bool isExported() const {479return SummaryHasTypeTestAssumeUsers ||480!SummaryTypeCheckedLoadUsers.empty();481}482483void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) {484SummaryTypeCheckedLoadUsers.push_back(FS);485AllCallSitesDevirted = false;486}487488void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {489SummaryTypeTestAssumeUsers.push_back(FS);490SummaryHasTypeTestAssumeUsers = true;491AllCallSitesDevirted = false;492}493494void markDevirt() {495AllCallSitesDevirted = true;496497// As explained in the comment for SummaryTypeCheckedLoadUsers.498SummaryTypeCheckedLoadUsers.clear();499}500};501502// Call site information collected for a specific VTableSlot.503struct VTableSlotInfo {504// The set of call sites which do not have all constant integer arguments505// (excluding "this").506CallSiteInfo CSInfo;507508// The set of call sites with all constant integer arguments (excluding509// "this"), grouped by argument list.510std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;511512void addCallSite(Value *VTable, CallBase &CB, unsigned *NumUnsafeUses);513514private:515CallSiteInfo &findCallSiteInfo(CallBase &CB);516};517518CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallBase &CB) {519std::vector<uint64_t> Args;520auto *CBType = dyn_cast<IntegerType>(CB.getType());521if (!CBType || CBType->getBitWidth() > 64 || CB.arg_empty())522return CSInfo;523for (auto &&Arg : drop_begin(CB.args())) {524auto *CI = dyn_cast<ConstantInt>(Arg);525if (!CI || CI->getBitWidth() > 64)526return CSInfo;527Args.push_back(CI->getZExtValue());528}529return ConstCSInfo[Args];530}531532void VTableSlotInfo::addCallSite(Value *VTable, CallBase &CB,533unsigned *NumUnsafeUses) {534auto &CSI = findCallSiteInfo(CB);535CSI.AllCallSitesDevirted = false;536CSI.CallSites.push_back({VTable, CB, NumUnsafeUses});537}538539struct DevirtModule {540Module &M;541function_ref<AAResults &(Function &)> AARGetter;542function_ref<DominatorTree &(Function &)> LookupDomTree;543544ModuleSummaryIndex *ExportSummary;545const ModuleSummaryIndex *ImportSummary;546547IntegerType *Int8Ty;548PointerType *Int8PtrTy;549IntegerType *Int32Ty;550IntegerType *Int64Ty;551IntegerType *IntPtrTy;552/// Sizeless array type, used for imported vtables. This provides a signal553/// to analyzers that these imports may alias, as they do for example554/// when multiple unique return values occur in the same vtable.555ArrayType *Int8Arr0Ty;556557bool RemarksEnabled;558function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;559560MapVector<VTableSlot, VTableSlotInfo> CallSlots;561562// Calls that have already been optimized. We may add a call to multiple563// VTableSlotInfos if vtable loads are coalesced and need to make sure not to564// optimize a call more than once.565SmallPtrSet<CallBase *, 8> OptimizedCalls;566567// Store calls that had their ptrauth bundle removed. They are to be deleted568// at the end of the optimization.569SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;570571// This map keeps track of the number of "unsafe" uses of a loaded function572// pointer. The key is the associated llvm.type.test intrinsic call generated573// by this pass. An unsafe use is one that calls the loaded function pointer574// directly. Every time we eliminate an unsafe use (for example, by575// devirtualizing it or by applying virtual constant propagation), we576// decrement the value stored in this map. If a value reaches zero, we can577// eliminate the type check by RAUWing the associated llvm.type.test call with578// true.579std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;580PatternList FunctionsToSkip;581582DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,583function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,584function_ref<DominatorTree &(Function &)> LookupDomTree,585ModuleSummaryIndex *ExportSummary,586const ModuleSummaryIndex *ImportSummary)587: M(M), AARGetter(AARGetter), LookupDomTree(LookupDomTree),588ExportSummary(ExportSummary), ImportSummary(ImportSummary),589Int8Ty(Type::getInt8Ty(M.getContext())),590Int8PtrTy(PointerType::getUnqual(M.getContext())),591Int32Ty(Type::getInt32Ty(M.getContext())),592Int64Ty(Type::getInt64Ty(M.getContext())),593IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),594Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),595RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {596assert(!(ExportSummary && ImportSummary));597FunctionsToSkip.init(SkipFunctionNames);598}599600bool areRemarksEnabled();601602void603scanTypeTestUsers(Function *TypeTestFunc,604DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);605void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);606607void buildTypeIdentifierMap(608std::vector<VTableBits> &Bits,609DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);610611bool612tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,613const std::set<TypeMemberInfo> &TypeMemberInfos,614uint64_t ByteOffset,615ModuleSummaryIndex *ExportSummary);616617void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,618bool &IsExported);619bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary,620MutableArrayRef<VirtualCallTarget> TargetsForSlot,621VTableSlotInfo &SlotInfo,622WholeProgramDevirtResolution *Res);623624void applyICallBranchFunnel(VTableSlotInfo &SlotInfo, Constant *JT,625bool &IsExported);626void tryICallBranchFunnel(MutableArrayRef<VirtualCallTarget> TargetsForSlot,627VTableSlotInfo &SlotInfo,628WholeProgramDevirtResolution *Res, VTableSlot Slot);629630bool tryEvaluateFunctionsWithArgs(631MutableArrayRef<VirtualCallTarget> TargetsForSlot,632ArrayRef<uint64_t> Args);633634void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,635uint64_t TheRetVal);636bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,637CallSiteInfo &CSInfo,638WholeProgramDevirtResolution::ByArg *Res);639640// Returns the global symbol name that is used to export information about the641// given vtable slot and list of arguments.642std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,643StringRef Name);644645bool shouldExportConstantsAsAbsoluteSymbols();646647// This function is called during the export phase to create a symbol648// definition containing information about the given vtable slot and list of649// arguments.650void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,651Constant *C);652void exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,653uint32_t Const, uint32_t &Storage);654655// This function is called during the import phase to create a reference to656// the symbol definition created during the export phase.657Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,658StringRef Name);659Constant *importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,660StringRef Name, IntegerType *IntTy,661uint32_t Storage);662663Constant *getMemberAddr(const TypeMemberInfo *M);664665void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,666Constant *UniqueMemberAddr);667bool tryUniqueRetValOpt(unsigned BitWidth,668MutableArrayRef<VirtualCallTarget> TargetsForSlot,669CallSiteInfo &CSInfo,670WholeProgramDevirtResolution::ByArg *Res,671VTableSlot Slot, ArrayRef<uint64_t> Args);672673void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,674Constant *Byte, Constant *Bit);675bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,676VTableSlotInfo &SlotInfo,677WholeProgramDevirtResolution *Res, VTableSlot Slot);678679void rebuildGlobal(VTableBits &B);680681// Apply the summary resolution for Slot to all virtual calls in SlotInfo.682void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);683684// If we were able to eliminate all unsafe uses for a type checked load,685// eliminate the associated type tests by replacing them with true.686void removeRedundantTypeTests();687688bool run();689690// Look up the corresponding ValueInfo entry of `TheFn` in `ExportSummary`.691//692// Caller guarantees that `ExportSummary` is not nullptr.693static ValueInfo lookUpFunctionValueInfo(Function *TheFn,694ModuleSummaryIndex *ExportSummary);695696// Returns true if the function definition must be unreachable.697//698// Note if this helper function returns true, `F` is guaranteed699// to be unreachable; if it returns false, `F` might still700// be unreachable but not covered by this helper function.701//702// Implementation-wise, if function definition is present, IR is analyzed; if703// not, look up function flags from ExportSummary as a fallback.704static bool mustBeUnreachableFunction(Function *const F,705ModuleSummaryIndex *ExportSummary);706707// Lower the module using the action and summary passed as command line708// arguments. For testing purposes only.709static bool710runForTesting(Module &M, function_ref<AAResults &(Function &)> AARGetter,711function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,712function_ref<DominatorTree &(Function &)> LookupDomTree);713};714715struct DevirtIndex {716ModuleSummaryIndex &ExportSummary;717// The set in which to record GUIDs exported from their module by718// devirtualization, used by client to ensure they are not internalized.719std::set<GlobalValue::GUID> &ExportedGUIDs;720// A map in which to record the information necessary to locate the WPD721// resolution for local targets in case they are exported by cross module722// importing.723std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;724725MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;726727PatternList FunctionsToSkip;728729DevirtIndex(730ModuleSummaryIndex &ExportSummary,731std::set<GlobalValue::GUID> &ExportedGUIDs,732std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)733: ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),734LocalWPDTargetsMap(LocalWPDTargetsMap) {735FunctionsToSkip.init(SkipFunctionNames);736}737738bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,739const TypeIdCompatibleVtableInfo TIdInfo,740uint64_t ByteOffset);741742bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,743VTableSlotSummary &SlotSummary,744VTableSlotInfo &SlotInfo,745WholeProgramDevirtResolution *Res,746std::set<ValueInfo> &DevirtTargets);747748void run();749};750} // end anonymous namespace751752PreservedAnalyses WholeProgramDevirtPass::run(Module &M,753ModuleAnalysisManager &AM) {754auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();755auto AARGetter = [&](Function &F) -> AAResults & {756return FAM.getResult<AAManager>(F);757};758auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {759return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);760};761auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {762return FAM.getResult<DominatorTreeAnalysis>(F);763};764if (UseCommandLine) {765if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))766return PreservedAnalyses::all();767return PreservedAnalyses::none();768}769if (!DevirtModule(M, AARGetter, OREGetter, LookupDomTree, ExportSummary,770ImportSummary)771.run())772return PreservedAnalyses::all();773return PreservedAnalyses::none();774}775776// Enable whole program visibility if enabled by client (e.g. linker) or777// internal option, and not force disabled.778bool llvm::hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {779return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&780!DisableWholeProgramVisibility;781}782783static bool784typeIDVisibleToRegularObj(StringRef TypeID,785function_ref<bool(StringRef)> IsVisibleToRegularObj) {786// TypeID for member function pointer type is an internal construct787// and won't exist in IsVisibleToRegularObj. The full TypeID788// will be present and participate in invalidation.789if (TypeID.ends_with(".virtual"))790return false;791792// TypeID that doesn't start with Itanium mangling (_ZTS) will be793// non-externally visible types which cannot interact with794// external native files. See CodeGenModule::CreateMetadataIdentifierImpl.795if (!TypeID.consume_front("_ZTS"))796return false;797798// TypeID is keyed off the type name symbol (_ZTS). However, the native799// object may not contain this symbol if it does not contain a key800// function for the base type and thus only contains a reference to the801// type info (_ZTI). To catch this case we query using the type info802// symbol corresponding to the TypeID.803std::string typeInfo = ("_ZTI" + TypeID).str();804return IsVisibleToRegularObj(typeInfo);805}806807static bool808skipUpdateDueToValidation(GlobalVariable &GV,809function_ref<bool(StringRef)> IsVisibleToRegularObj) {810SmallVector<MDNode *, 2> Types;811GV.getMetadata(LLVMContext::MD_type, Types);812813for (auto Type : Types)814if (auto *TypeID = dyn_cast<MDString>(Type->getOperand(1).get()))815return typeIDVisibleToRegularObj(TypeID->getString(),816IsVisibleToRegularObj);817818return false;819}820821/// If whole program visibility asserted, then upgrade all public vcall822/// visibility metadata on vtable definitions to linkage unit visibility in823/// Module IR (for regular or hybrid LTO).824void llvm::updateVCallVisibilityInModule(825Module &M, bool WholeProgramVisibilityEnabledInLTO,826const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,827bool ValidateAllVtablesHaveTypeInfos,828function_ref<bool(StringRef)> IsVisibleToRegularObj) {829if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))830return;831for (GlobalVariable &GV : M.globals()) {832// Add linkage unit visibility to any variable with type metadata, which are833// the vtable definitions. We won't have an existing vcall_visibility834// metadata on vtable definitions with public visibility.835if (GV.hasMetadata(LLVMContext::MD_type) &&836GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic &&837// Don't upgrade the visibility for symbols exported to the dynamic838// linker, as we have no information on their eventual use.839!DynamicExportSymbols.count(GV.getGUID()) &&840// With validation enabled, we want to exclude symbols visible to841// regular objects. Local symbols will be in this group due to the842// current implementation but those with VCallVisibilityTranslationUnit843// will have already been marked in clang so are unaffected.844!(ValidateAllVtablesHaveTypeInfos &&845skipUpdateDueToValidation(GV, IsVisibleToRegularObj)))846GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit);847}848}849850void llvm::updatePublicTypeTestCalls(Module &M,851bool WholeProgramVisibilityEnabledInLTO) {852Function *PublicTypeTestFunc =853M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));854if (!PublicTypeTestFunc)855return;856if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) {857Function *TypeTestFunc =858Intrinsic::getDeclaration(&M, Intrinsic::type_test);859for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {860auto *CI = cast<CallInst>(U.getUser());861auto *NewCI = CallInst::Create(862TypeTestFunc, {CI->getArgOperand(0), CI->getArgOperand(1)},863std::nullopt, "", CI->getIterator());864CI->replaceAllUsesWith(NewCI);865CI->eraseFromParent();866}867} else {868auto *True = ConstantInt::getTrue(M.getContext());869for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) {870auto *CI = cast<CallInst>(U.getUser());871CI->replaceAllUsesWith(True);872CI->eraseFromParent();873}874}875}876877/// Based on typeID string, get all associated vtable GUIDS that are878/// visible to regular objects.879void llvm::getVisibleToRegularObjVtableGUIDs(880ModuleSummaryIndex &Index,881DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols,882function_ref<bool(StringRef)> IsVisibleToRegularObj) {883for (const auto &typeID : Index.typeIdCompatibleVtableMap()) {884if (typeIDVisibleToRegularObj(typeID.first, IsVisibleToRegularObj))885for (const TypeIdOffsetVtableInfo &P : typeID.second)886VisibleToRegularObjSymbols.insert(P.VTableVI.getGUID());887}888}889890/// If whole program visibility asserted, then upgrade all public vcall891/// visibility metadata on vtable definition summaries to linkage unit892/// visibility in Module summary index (for ThinLTO).893void llvm::updateVCallVisibilityInIndex(894ModuleSummaryIndex &Index, bool WholeProgramVisibilityEnabledInLTO,895const DenseSet<GlobalValue::GUID> &DynamicExportSymbols,896const DenseSet<GlobalValue::GUID> &VisibleToRegularObjSymbols) {897if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))898return;899for (auto &P : Index) {900// Don't upgrade the visibility for symbols exported to the dynamic901// linker, as we have no information on their eventual use.902if (DynamicExportSymbols.count(P.first))903continue;904for (auto &S : P.second.SummaryList) {905auto *GVar = dyn_cast<GlobalVarSummary>(S.get());906if (!GVar ||907GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)908continue;909// With validation enabled, we want to exclude symbols visible to regular910// objects. Local symbols will be in this group due to the current911// implementation but those with VCallVisibilityTranslationUnit will have912// already been marked in clang so are unaffected.913if (VisibleToRegularObjSymbols.count(P.first))914continue;915GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);916}917}918}919920void llvm::runWholeProgramDevirtOnIndex(921ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,922std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {923DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();924}925926void llvm::updateIndexWPDForExports(927ModuleSummaryIndex &Summary,928function_ref<bool(StringRef, ValueInfo)> isExported,929std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {930for (auto &T : LocalWPDTargetsMap) {931auto &VI = T.first;932// This was enforced earlier during trySingleImplDevirt.933assert(VI.getSummaryList().size() == 1 &&934"Devirt of local target has more than one copy");935auto &S = VI.getSummaryList()[0];936if (!isExported(S->modulePath(), VI))937continue;938939// It's been exported by a cross module import.940for (auto &SlotSummary : T.second) {941auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);942assert(TIdSum);943auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);944assert(WPDRes != TIdSum->WPDRes.end());945WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(946WPDRes->second.SingleImplName,947Summary.getModuleHash(S->modulePath()));948}949}950}951952static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) {953// Check that summary index contains regular LTO module when performing954// export to prevent occasional use of index from pure ThinLTO compilation955// (-fno-split-lto-module). This kind of summary index is passed to956// DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.957const auto &ModPaths = Summary->modulePaths();958if (ClSummaryAction != PassSummaryAction::Import &&959!ModPaths.contains(ModuleSummaryIndex::getRegularLTOModuleName()))960return createStringError(961errc::invalid_argument,962"combined summary should contain Regular LTO module");963return ErrorSuccess();964}965966bool DevirtModule::runForTesting(967Module &M, function_ref<AAResults &(Function &)> AARGetter,968function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,969function_ref<DominatorTree &(Function &)> LookupDomTree) {970std::unique_ptr<ModuleSummaryIndex> Summary =971std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);972973// Handle the command-line summary arguments. This code is for testing974// purposes only, so we handle errors directly.975if (!ClReadSummary.empty()) {976ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +977": ");978auto ReadSummaryFile =979ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));980if (Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr =981getModuleSummaryIndex(*ReadSummaryFile)) {982Summary = std::move(*SummaryOrErr);983ExitOnErr(checkCombinedSummaryForTesting(Summary.get()));984} else {985// Try YAML if we've failed with bitcode.986consumeError(SummaryOrErr.takeError());987yaml::Input In(ReadSummaryFile->getBuffer());988In >> *Summary;989ExitOnErr(errorCodeToError(In.error()));990}991}992993bool Changed =994DevirtModule(M, AARGetter, OREGetter, LookupDomTree,995ClSummaryAction == PassSummaryAction::Export ? Summary.get()996: nullptr,997ClSummaryAction == PassSummaryAction::Import ? Summary.get()998: nullptr)999.run();10001001if (!ClWriteSummary.empty()) {1002ExitOnError ExitOnErr(1003"-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");1004std::error_code EC;1005if (StringRef(ClWriteSummary).ends_with(".bc")) {1006raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_None);1007ExitOnErr(errorCodeToError(EC));1008writeIndexToFile(*Summary, OS);1009} else {1010raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);1011ExitOnErr(errorCodeToError(EC));1012yaml::Output Out(OS);1013Out << *Summary;1014}1015}10161017return Changed;1018}10191020void DevirtModule::buildTypeIdentifierMap(1021std::vector<VTableBits> &Bits,1022DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {1023DenseMap<GlobalVariable *, VTableBits *> GVToBits;1024Bits.reserve(M.global_size());1025SmallVector<MDNode *, 2> Types;1026for (GlobalVariable &GV : M.globals()) {1027Types.clear();1028GV.getMetadata(LLVMContext::MD_type, Types);1029if (GV.isDeclaration() || Types.empty())1030continue;10311032VTableBits *&BitsPtr = GVToBits[&GV];1033if (!BitsPtr) {1034Bits.emplace_back();1035Bits.back().GV = &GV;1036Bits.back().ObjectSize =1037M.getDataLayout().getTypeAllocSize(GV.getInitializer()->getType());1038BitsPtr = &Bits.back();1039}10401041for (MDNode *Type : Types) {1042auto TypeID = Type->getOperand(1).get();10431044uint64_t Offset =1045cast<ConstantInt>(1046cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())1047->getZExtValue();10481049TypeIdMap[TypeID].insert({BitsPtr, Offset});1050}1051}1052}10531054bool DevirtModule::tryFindVirtualCallTargets(1055std::vector<VirtualCallTarget> &TargetsForSlot,1056const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset,1057ModuleSummaryIndex *ExportSummary) {1058for (const TypeMemberInfo &TM : TypeMemberInfos) {1059if (!TM.Bits->GV->isConstant())1060return false;10611062// We cannot perform whole program devirtualization analysis on a vtable1063// with public LTO visibility.1064if (TM.Bits->GV->getVCallVisibility() ==1065GlobalObject::VCallVisibilityPublic)1066return false;10671068Function *Fn = nullptr;1069Constant *C = nullptr;1070std::tie(Fn, C) =1071getFunctionAtVTableOffset(TM.Bits->GV, TM.Offset + ByteOffset, M);10721073if (!Fn)1074return false;10751076if (FunctionsToSkip.match(Fn->getName()))1077return false;10781079// We can disregard __cxa_pure_virtual as a possible call target, as1080// calls to pure virtuals are UB.1081if (Fn->getName() == "__cxa_pure_virtual")1082continue;10831084// We can disregard unreachable functions as possible call targets, as1085// unreachable functions shouldn't be called.1086if (mustBeUnreachableFunction(Fn, ExportSummary))1087continue;10881089// Save the symbol used in the vtable to use as the devirtualization1090// target.1091auto GV = dyn_cast<GlobalValue>(C);1092assert(GV);1093TargetsForSlot.push_back({GV, &TM});1094}10951096// Give up if we couldn't find any targets.1097return !TargetsForSlot.empty();1098}10991100bool DevirtIndex::tryFindVirtualCallTargets(1101std::vector<ValueInfo> &TargetsForSlot,1102const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) {1103for (const TypeIdOffsetVtableInfo &P : TIdInfo) {1104// Find a representative copy of the vtable initializer.1105// We can have multiple available_externally, linkonce_odr and weak_odr1106// vtable initializers. We can also have multiple external vtable1107// initializers in the case of comdats, which we cannot check here.1108// The linker should give an error in this case.1109//1110// Also, handle the case of same-named local Vtables with the same path1111// and therefore the same GUID. This can happen if there isn't enough1112// distinguishing path when compiling the source file. In that case we1113// conservatively return false early.1114const GlobalVarSummary *VS = nullptr;1115bool LocalFound = false;1116for (const auto &S : P.VTableVI.getSummaryList()) {1117if (GlobalValue::isLocalLinkage(S->linkage())) {1118if (LocalFound)1119return false;1120LocalFound = true;1121}1122auto *CurVS = cast<GlobalVarSummary>(S->getBaseObject());1123if (!CurVS->vTableFuncs().empty() ||1124// Previously clang did not attach the necessary type metadata to1125// available_externally vtables, in which case there would not1126// be any vtable functions listed in the summary and we need1127// to treat this case conservatively (in case the bitcode is old).1128// However, we will also not have any vtable functions in the1129// case of a pure virtual base class. In that case we do want1130// to set VS to avoid treating it conservatively.1131!GlobalValue::isAvailableExternallyLinkage(S->linkage())) {1132VS = CurVS;1133// We cannot perform whole program devirtualization analysis on a vtable1134// with public LTO visibility.1135if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)1136return false;1137}1138}1139// There will be no VS if all copies are available_externally having no1140// type metadata. In that case we can't safely perform WPD.1141if (!VS)1142return false;1143if (!VS->isLive())1144continue;1145for (auto VTP : VS->vTableFuncs()) {1146if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)1147continue;11481149if (mustBeUnreachableFunction(VTP.FuncVI))1150continue;11511152TargetsForSlot.push_back(VTP.FuncVI);1153}1154}11551156// Give up if we couldn't find any targets.1157return !TargetsForSlot.empty();1158}11591160void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,1161Constant *TheFn, bool &IsExported) {1162// Don't devirtualize function if we're told to skip it1163// in -wholeprogramdevirt-skip.1164if (FunctionsToSkip.match(TheFn->stripPointerCasts()->getName()))1165return;1166auto Apply = [&](CallSiteInfo &CSInfo) {1167for (auto &&VCallSite : CSInfo.CallSites) {1168if (!OptimizedCalls.insert(&VCallSite.CB).second)1169continue;11701171if (RemarksEnabled)1172VCallSite.emitRemark("single-impl",1173TheFn->stripPointerCasts()->getName(), OREGetter);1174NumSingleImpl++;1175auto &CB = VCallSite.CB;1176assert(!CB.getCalledFunction() && "devirtualizing direct call?");1177IRBuilder<> Builder(&CB);1178Value *Callee =1179Builder.CreateBitCast(TheFn, CB.getCalledOperand()->getType());11801181// If trap checking is enabled, add support to compare the virtual1182// function pointer to the devirtualized target. In case of a mismatch,1183// perform a debug trap.1184if (DevirtCheckMode == WPDCheckMode::Trap) {1185auto *Cond = Builder.CreateICmpNE(CB.getCalledOperand(), Callee);1186Instruction *ThenTerm =1187SplitBlockAndInsertIfThen(Cond, &CB, /*Unreachable=*/false);1188Builder.SetInsertPoint(ThenTerm);1189Function *TrapFn = Intrinsic::getDeclaration(&M, Intrinsic::debugtrap);1190auto *CallTrap = Builder.CreateCall(TrapFn);1191CallTrap->setDebugLoc(CB.getDebugLoc());1192}11931194// If fallback checking is enabled, add support to compare the virtual1195// function pointer to the devirtualized target. In case of a mismatch,1196// fall back to indirect call.1197if (DevirtCheckMode == WPDCheckMode::Fallback) {1198MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights();1199// Version the indirect call site. If the called value is equal to the1200// given callee, 'NewInst' will be executed, otherwise the original call1201// site will be executed.1202CallBase &NewInst = versionCallSite(CB, Callee, Weights);1203NewInst.setCalledOperand(Callee);1204// Since the new call site is direct, we must clear metadata that1205// is only appropriate for indirect calls. This includes !prof and1206// !callees metadata.1207NewInst.setMetadata(LLVMContext::MD_prof, nullptr);1208NewInst.setMetadata(LLVMContext::MD_callees, nullptr);1209// Additionally, we should remove them from the fallback indirect call,1210// so that we don't attempt to perform indirect call promotion later.1211CB.setMetadata(LLVMContext::MD_prof, nullptr);1212CB.setMetadata(LLVMContext::MD_callees, nullptr);1213}12141215// In either trapping or non-checking mode, devirtualize original call.1216else {1217// Devirtualize unconditionally.1218CB.setCalledOperand(Callee);1219// Since the call site is now direct, we must clear metadata that1220// is only appropriate for indirect calls. This includes !prof and1221// !callees metadata.1222CB.setMetadata(LLVMContext::MD_prof, nullptr);1223CB.setMetadata(LLVMContext::MD_callees, nullptr);1224if (CB.getCalledOperand() &&1225CB.getOperandBundle(LLVMContext::OB_ptrauth)) {1226auto *NewCS = CallBase::removeOperandBundle(1227&CB, LLVMContext::OB_ptrauth, CB.getIterator());1228CB.replaceAllUsesWith(NewCS);1229// Schedule for deletion at the end of pass run.1230CallsWithPtrAuthBundleRemoved.push_back(&CB);1231}1232}12331234// This use is no longer unsafe.1235if (VCallSite.NumUnsafeUses)1236--*VCallSite.NumUnsafeUses;1237}1238if (CSInfo.isExported())1239IsExported = true;1240CSInfo.markDevirt();1241};1242Apply(SlotInfo.CSInfo);1243for (auto &P : SlotInfo.ConstCSInfo)1244Apply(P.second);1245}12461247static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) {1248// We can't add calls if we haven't seen a definition1249if (Callee.getSummaryList().empty())1250return false;12511252// Insert calls into the summary index so that the devirtualized targets1253// are eligible for import.1254// FIXME: Annotate type tests with hotness. For now, mark these as hot1255// to better ensure we have the opportunity to inline them.1256bool IsExported = false;1257auto &S = Callee.getSummaryList()[0];1258CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* HasTailCall = */ false,1259/* RelBF = */ 0);1260auto AddCalls = [&](CallSiteInfo &CSInfo) {1261for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {1262FS->addCall({Callee, CI});1263IsExported |= S->modulePath() != FS->modulePath();1264}1265for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {1266FS->addCall({Callee, CI});1267IsExported |= S->modulePath() != FS->modulePath();1268}1269};1270AddCalls(SlotInfo.CSInfo);1271for (auto &P : SlotInfo.ConstCSInfo)1272AddCalls(P.second);1273return IsExported;1274}12751276bool DevirtModule::trySingleImplDevirt(1277ModuleSummaryIndex *ExportSummary,1278MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,1279WholeProgramDevirtResolution *Res) {1280// See if the program contains a single implementation of this virtual1281// function.1282auto *TheFn = TargetsForSlot[0].Fn;1283for (auto &&Target : TargetsForSlot)1284if (TheFn != Target.Fn)1285return false;12861287// If so, update each call site to call that implementation directly.1288if (RemarksEnabled || AreStatisticsEnabled())1289TargetsForSlot[0].WasDevirt = true;12901291bool IsExported = false;1292applySingleImplDevirt(SlotInfo, TheFn, IsExported);1293if (!IsExported)1294return false;12951296// If the only implementation has local linkage, we must promote to external1297// to make it visible to thin LTO objects. We can only get here during the1298// ThinLTO export phase.1299if (TheFn->hasLocalLinkage()) {1300std::string NewName = (TheFn->getName() + ".llvm.merged").str();13011302// Since we are renaming the function, any comdats with the same name must1303// also be renamed. This is required when targeting COFF, as the comdat name1304// must match one of the names of the symbols in the comdat.1305if (Comdat *C = TheFn->getComdat()) {1306if (C->getName() == TheFn->getName()) {1307Comdat *NewC = M.getOrInsertComdat(NewName);1308NewC->setSelectionKind(C->getSelectionKind());1309for (GlobalObject &GO : M.global_objects())1310if (GO.getComdat() == C)1311GO.setComdat(NewC);1312}1313}13141315TheFn->setLinkage(GlobalValue::ExternalLinkage);1316TheFn->setVisibility(GlobalValue::HiddenVisibility);1317TheFn->setName(NewName);1318}1319if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID()))1320// Any needed promotion of 'TheFn' has already been done during1321// LTO unit split, so we can ignore return value of AddCalls.1322AddCalls(SlotInfo, TheFnVI);13231324Res->TheKind = WholeProgramDevirtResolution::SingleImpl;1325Res->SingleImplName = std::string(TheFn->getName());13261327return true;1328}13291330bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,1331VTableSlotSummary &SlotSummary,1332VTableSlotInfo &SlotInfo,1333WholeProgramDevirtResolution *Res,1334std::set<ValueInfo> &DevirtTargets) {1335// See if the program contains a single implementation of this virtual1336// function.1337auto TheFn = TargetsForSlot[0];1338for (auto &&Target : TargetsForSlot)1339if (TheFn != Target)1340return false;13411342// Don't devirtualize if we don't have target definition.1343auto Size = TheFn.getSummaryList().size();1344if (!Size)1345return false;13461347// Don't devirtualize function if we're told to skip it1348// in -wholeprogramdevirt-skip.1349if (FunctionsToSkip.match(TheFn.name()))1350return false;13511352// If the summary list contains multiple summaries where at least one is1353// a local, give up, as we won't know which (possibly promoted) name to use.1354for (const auto &S : TheFn.getSummaryList())1355if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)1356return false;13571358// Collect functions devirtualized at least for one call site for stats.1359if (PrintSummaryDevirt || AreStatisticsEnabled())1360DevirtTargets.insert(TheFn);13611362auto &S = TheFn.getSummaryList()[0];1363bool IsExported = AddCalls(SlotInfo, TheFn);1364if (IsExported)1365ExportedGUIDs.insert(TheFn.getGUID());13661367// Record in summary for use in devirtualization during the ThinLTO import1368// step.1369Res->TheKind = WholeProgramDevirtResolution::SingleImpl;1370if (GlobalValue::isLocalLinkage(S->linkage())) {1371if (IsExported)1372// If target is a local function and we are exporting it by1373// devirtualizing a call in another module, we need to record the1374// promoted name.1375Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(1376TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));1377else {1378LocalWPDTargetsMap[TheFn].push_back(SlotSummary);1379Res->SingleImplName = std::string(TheFn.name());1380}1381} else1382Res->SingleImplName = std::string(TheFn.name());13831384// Name will be empty if this thin link driven off of serialized combined1385// index (e.g. llvm-lto). However, WPD is not supported/invoked for the1386// legacy LTO API anyway.1387assert(!Res->SingleImplName.empty());13881389return true;1390}13911392void DevirtModule::tryICallBranchFunnel(1393MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,1394WholeProgramDevirtResolution *Res, VTableSlot Slot) {1395Triple T(M.getTargetTriple());1396if (T.getArch() != Triple::x86_64)1397return;13981399if (TargetsForSlot.size() > ClThreshold)1400return;14011402bool HasNonDevirt = !SlotInfo.CSInfo.AllCallSitesDevirted;1403if (!HasNonDevirt)1404for (auto &P : SlotInfo.ConstCSInfo)1405if (!P.second.AllCallSitesDevirted) {1406HasNonDevirt = true;1407break;1408}14091410if (!HasNonDevirt)1411return;14121413FunctionType *FT =1414FunctionType::get(Type::getVoidTy(M.getContext()), {Int8PtrTy}, true);1415Function *JT;1416if (isa<MDString>(Slot.TypeID)) {1417JT = Function::Create(FT, Function::ExternalLinkage,1418M.getDataLayout().getProgramAddressSpace(),1419getGlobalName(Slot, {}, "branch_funnel"), &M);1420JT->setVisibility(GlobalValue::HiddenVisibility);1421} else {1422JT = Function::Create(FT, Function::InternalLinkage,1423M.getDataLayout().getProgramAddressSpace(),1424"branch_funnel", &M);1425}1426JT->addParamAttr(0, Attribute::Nest);14271428std::vector<Value *> JTArgs;1429JTArgs.push_back(JT->arg_begin());1430for (auto &T : TargetsForSlot) {1431JTArgs.push_back(getMemberAddr(T.TM));1432JTArgs.push_back(T.Fn);1433}14341435BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);1436Function *Intr =1437Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});14381439auto *CI = CallInst::Create(Intr, JTArgs, "", BB);1440CI->setTailCallKind(CallInst::TCK_MustTail);1441ReturnInst::Create(M.getContext(), nullptr, BB);14421443bool IsExported = false;1444applyICallBranchFunnel(SlotInfo, JT, IsExported);1445if (IsExported)1446Res->TheKind = WholeProgramDevirtResolution::BranchFunnel;1447}14481449void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,1450Constant *JT, bool &IsExported) {1451auto Apply = [&](CallSiteInfo &CSInfo) {1452if (CSInfo.isExported())1453IsExported = true;1454if (CSInfo.AllCallSitesDevirted)1455return;14561457std::map<CallBase *, CallBase *> CallBases;1458for (auto &&VCallSite : CSInfo.CallSites) {1459CallBase &CB = VCallSite.CB;14601461if (CallBases.find(&CB) != CallBases.end()) {1462// When finding devirtualizable calls, it's possible to find the same1463// vtable passed to multiple llvm.type.test or llvm.type.checked.load1464// calls, which can cause duplicate call sites to be recorded in1465// [Const]CallSites. If we've already found one of these1466// call instances, just ignore it. It will be replaced later.1467continue;1468}14691470// Jump tables are only profitable if the retpoline mitigation is enabled.1471Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");1472if (!FSAttr.isValid() ||1473!FSAttr.getValueAsString().contains("+retpoline"))1474continue;14751476NumBranchFunnel++;1477if (RemarksEnabled)1478VCallSite.emitRemark("branch-funnel",1479JT->stripPointerCasts()->getName(), OREGetter);14801481// Pass the address of the vtable in the nest register, which is r10 on1482// x86_64.1483std::vector<Type *> NewArgs;1484NewArgs.push_back(Int8PtrTy);1485append_range(NewArgs, CB.getFunctionType()->params());1486FunctionType *NewFT =1487FunctionType::get(CB.getFunctionType()->getReturnType(), NewArgs,1488CB.getFunctionType()->isVarArg());1489PointerType *NewFTPtr = PointerType::getUnqual(NewFT);14901491IRBuilder<> IRB(&CB);1492std::vector<Value *> Args;1493Args.push_back(VCallSite.VTable);1494llvm::append_range(Args, CB.args());14951496CallBase *NewCS = nullptr;1497if (isa<CallInst>(CB))1498NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);1499else1500NewCS = IRB.CreateInvoke(NewFT, IRB.CreateBitCast(JT, NewFTPtr),1501cast<InvokeInst>(CB).getNormalDest(),1502cast<InvokeInst>(CB).getUnwindDest(), Args);1503NewCS->setCallingConv(CB.getCallingConv());15041505AttributeList Attrs = CB.getAttributes();1506std::vector<AttributeSet> NewArgAttrs;1507NewArgAttrs.push_back(AttributeSet::get(1508M.getContext(), ArrayRef<Attribute>{Attribute::get(1509M.getContext(), Attribute::Nest)}));1510for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)1511NewArgAttrs.push_back(Attrs.getParamAttrs(I));1512NewCS->setAttributes(1513AttributeList::get(M.getContext(), Attrs.getFnAttrs(),1514Attrs.getRetAttrs(), NewArgAttrs));15151516CallBases[&CB] = NewCS;15171518// This use is no longer unsafe.1519if (VCallSite.NumUnsafeUses)1520--*VCallSite.NumUnsafeUses;1521}1522// Don't mark as devirtualized because there may be callers compiled without1523// retpoline mitigation, which would mean that they are lowered to1524// llvm.type.test and therefore require an llvm.type.test resolution for the1525// type identifier.15261527for (auto &[Old, New] : CallBases) {1528Old->replaceAllUsesWith(New);1529Old->eraseFromParent();1530}1531};1532Apply(SlotInfo.CSInfo);1533for (auto &P : SlotInfo.ConstCSInfo)1534Apply(P.second);1535}15361537bool DevirtModule::tryEvaluateFunctionsWithArgs(1538MutableArrayRef<VirtualCallTarget> TargetsForSlot,1539ArrayRef<uint64_t> Args) {1540// Evaluate each function and store the result in each target's RetVal1541// field.1542for (VirtualCallTarget &Target : TargetsForSlot) {1543// TODO: Skip for now if the vtable symbol was an alias to a function,1544// need to evaluate whether it would be correct to analyze the aliasee1545// function for this optimization.1546auto Fn = dyn_cast<Function>(Target.Fn);1547if (!Fn)1548return false;15491550if (Fn->arg_size() != Args.size() + 1)1551return false;15521553Evaluator Eval(M.getDataLayout(), nullptr);1554SmallVector<Constant *, 2> EvalArgs;1555EvalArgs.push_back(1556Constant::getNullValue(Fn->getFunctionType()->getParamType(0)));1557for (unsigned I = 0; I != Args.size(); ++I) {1558auto *ArgTy =1559dyn_cast<IntegerType>(Fn->getFunctionType()->getParamType(I + 1));1560if (!ArgTy)1561return false;1562EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));1563}15641565Constant *RetVal;1566if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||1567!isa<ConstantInt>(RetVal))1568return false;1569Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();1570}1571return true;1572}15731574void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,1575uint64_t TheRetVal) {1576for (auto Call : CSInfo.CallSites) {1577if (!OptimizedCalls.insert(&Call.CB).second)1578continue;1579NumUniformRetVal++;1580Call.replaceAndErase(1581"uniform-ret-val", FnName, RemarksEnabled, OREGetter,1582ConstantInt::get(cast<IntegerType>(Call.CB.getType()), TheRetVal));1583}1584CSInfo.markDevirt();1585}15861587bool DevirtModule::tryUniformRetValOpt(1588MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,1589WholeProgramDevirtResolution::ByArg *Res) {1590// Uniform return value optimization. If all functions return the same1591// constant, replace all calls with that constant.1592uint64_t TheRetVal = TargetsForSlot[0].RetVal;1593for (const VirtualCallTarget &Target : TargetsForSlot)1594if (Target.RetVal != TheRetVal)1595return false;15961597if (CSInfo.isExported()) {1598Res->TheKind = WholeProgramDevirtResolution::ByArg::UniformRetVal;1599Res->Info = TheRetVal;1600}16011602applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);1603if (RemarksEnabled || AreStatisticsEnabled())1604for (auto &&Target : TargetsForSlot)1605Target.WasDevirt = true;1606return true;1607}16081609std::string DevirtModule::getGlobalName(VTableSlot Slot,1610ArrayRef<uint64_t> Args,1611StringRef Name) {1612std::string FullName = "__typeid_";1613raw_string_ostream OS(FullName);1614OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;1615for (uint64_t Arg : Args)1616OS << '_' << Arg;1617OS << '_' << Name;1618return FullName;1619}16201621bool DevirtModule::shouldExportConstantsAsAbsoluteSymbols() {1622Triple T(M.getTargetTriple());1623return T.isX86() && T.getObjectFormat() == Triple::ELF;1624}16251626void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,1627StringRef Name, Constant *C) {1628GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,1629getGlobalName(Slot, Args, Name), C, &M);1630GA->setVisibility(GlobalValue::HiddenVisibility);1631}16321633void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,1634StringRef Name, uint32_t Const,1635uint32_t &Storage) {1636if (shouldExportConstantsAsAbsoluteSymbols()) {1637exportGlobal(1638Slot, Args, Name,1639ConstantExpr::getIntToPtr(ConstantInt::get(Int32Ty, Const), Int8PtrTy));1640return;1641}16421643Storage = Const;1644}16451646Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,1647StringRef Name) {1648Constant *C =1649M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);1650auto *GV = dyn_cast<GlobalVariable>(C);1651if (GV)1652GV->setVisibility(GlobalValue::HiddenVisibility);1653return C;1654}16551656Constant *DevirtModule::importConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,1657StringRef Name, IntegerType *IntTy,1658uint32_t Storage) {1659if (!shouldExportConstantsAsAbsoluteSymbols())1660return ConstantInt::get(IntTy, Storage);16611662Constant *C = importGlobal(Slot, Args, Name);1663auto *GV = cast<GlobalVariable>(C->stripPointerCasts());1664C = ConstantExpr::getPtrToInt(C, IntTy);16651666// We only need to set metadata if the global is newly created, in which1667// case it would not have hidden visibility.1668if (GV->hasMetadata(LLVMContext::MD_absolute_symbol))1669return C;16701671auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {1672auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));1673auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));1674GV->setMetadata(LLVMContext::MD_absolute_symbol,1675MDNode::get(M.getContext(), {MinC, MaxC}));1676};1677unsigned AbsWidth = IntTy->getBitWidth();1678if (AbsWidth == IntPtrTy->getBitWidth())1679SetAbsRange(~0ull, ~0ull); // Full set.1680else1681SetAbsRange(0, 1ull << AbsWidth);1682return C;1683}16841685void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,1686bool IsOne,1687Constant *UniqueMemberAddr) {1688for (auto &&Call : CSInfo.CallSites) {1689if (!OptimizedCalls.insert(&Call.CB).second)1690continue;1691IRBuilder<> B(&Call.CB);1692Value *Cmp =1693B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,1694B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));1695Cmp = B.CreateZExt(Cmp, Call.CB.getType());1696NumUniqueRetVal++;1697Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,1698Cmp);1699}1700CSInfo.markDevirt();1701}17021703Constant *DevirtModule::getMemberAddr(const TypeMemberInfo *M) {1704return ConstantExpr::getGetElementPtr(Int8Ty, M->Bits->GV,1705ConstantInt::get(Int64Ty, M->Offset));1706}17071708bool DevirtModule::tryUniqueRetValOpt(1709unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,1710CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,1711VTableSlot Slot, ArrayRef<uint64_t> Args) {1712// IsOne controls whether we look for a 0 or a 1.1713auto tryUniqueRetValOptFor = [&](bool IsOne) {1714const TypeMemberInfo *UniqueMember = nullptr;1715for (const VirtualCallTarget &Target : TargetsForSlot) {1716if (Target.RetVal == (IsOne ? 1 : 0)) {1717if (UniqueMember)1718return false;1719UniqueMember = Target.TM;1720}1721}17221723// We should have found a unique member or bailed out by now. We already1724// checked for a uniform return value in tryUniformRetValOpt.1725assert(UniqueMember);17261727Constant *UniqueMemberAddr = getMemberAddr(UniqueMember);1728if (CSInfo.isExported()) {1729Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal;1730Res->Info = IsOne;17311732exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);1733}17341735// Replace each call with the comparison.1736applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,1737UniqueMemberAddr);17381739// Update devirtualization statistics for targets.1740if (RemarksEnabled || AreStatisticsEnabled())1741for (auto &&Target : TargetsForSlot)1742Target.WasDevirt = true;17431744return true;1745};17461747if (BitWidth == 1) {1748if (tryUniqueRetValOptFor(true))1749return true;1750if (tryUniqueRetValOptFor(false))1751return true;1752}1753return false;1754}17551756void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,1757Constant *Byte, Constant *Bit) {1758for (auto Call : CSInfo.CallSites) {1759if (!OptimizedCalls.insert(&Call.CB).second)1760continue;1761auto *RetType = cast<IntegerType>(Call.CB.getType());1762IRBuilder<> B(&Call.CB);1763Value *Addr = B.CreatePtrAdd(Call.VTable, Byte);1764if (RetType->getBitWidth() == 1) {1765Value *Bits = B.CreateLoad(Int8Ty, Addr);1766Value *BitsAndBit = B.CreateAnd(Bits, Bit);1767auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));1768NumVirtConstProp1Bit++;1769Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,1770OREGetter, IsBitSet);1771} else {1772Value *Val = B.CreateLoad(RetType, Addr);1773NumVirtConstProp++;1774Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,1775OREGetter, Val);1776}1777}1778CSInfo.markDevirt();1779}17801781bool DevirtModule::tryVirtualConstProp(1782MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,1783WholeProgramDevirtResolution *Res, VTableSlot Slot) {1784// TODO: Skip for now if the vtable symbol was an alias to a function,1785// need to evaluate whether it would be correct to analyze the aliasee1786// function for this optimization.1787auto Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);1788if (!Fn)1789return false;1790// This only works if the function returns an integer.1791auto RetType = dyn_cast<IntegerType>(Fn->getReturnType());1792if (!RetType)1793return false;1794unsigned BitWidth = RetType->getBitWidth();1795if (BitWidth > 64)1796return false;17971798// Make sure that each function is defined, does not access memory, takes at1799// least one argument, does not use its first argument (which we assume is1800// 'this'), and has the same return type.1801//1802// Note that we test whether this copy of the function is readnone, rather1803// than testing function attributes, which must hold for any copy of the1804// function, even a less optimized version substituted at link time. This is1805// sound because the virtual constant propagation optimizations effectively1806// inline all implementations of the virtual function into each call site,1807// rather than using function attributes to perform local optimization.1808for (VirtualCallTarget &Target : TargetsForSlot) {1809// TODO: Skip for now if the vtable symbol was an alias to a function,1810// need to evaluate whether it would be correct to analyze the aliasee1811// function for this optimization.1812auto Fn = dyn_cast<Function>(Target.Fn);1813if (!Fn)1814return false;18151816if (Fn->isDeclaration() ||1817!computeFunctionBodyMemoryAccess(*Fn, AARGetter(*Fn))1818.doesNotAccessMemory() ||1819Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||1820Fn->getReturnType() != RetType)1821return false;1822}18231824for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {1825if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))1826continue;18271828WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;1829if (Res)1830ResByArg = &Res->ResByArg[CSByConstantArg.first];18311832if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))1833continue;18341835if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,1836ResByArg, Slot, CSByConstantArg.first))1837continue;18381839// Find an allocation offset in bits in all vtables associated with the1840// type.1841uint64_t AllocBefore =1842findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);1843uint64_t AllocAfter =1844findLowestOffset(TargetsForSlot, /*IsAfter=*/true, BitWidth);18451846// Calculate the total amount of padding needed to store a value at both1847// ends of the object.1848uint64_t TotalPaddingBefore = 0, TotalPaddingAfter = 0;1849for (auto &&Target : TargetsForSlot) {1850TotalPaddingBefore += std::max<int64_t>(1851(AllocBefore + 7) / 8 - Target.allocatedBeforeBytes() - 1, 0);1852TotalPaddingAfter += std::max<int64_t>(1853(AllocAfter + 7) / 8 - Target.allocatedAfterBytes() - 1, 0);1854}18551856// If the amount of padding is too large, give up.1857// FIXME: do something smarter here.1858if (std::min(TotalPaddingBefore, TotalPaddingAfter) > 128)1859continue;18601861// Calculate the offset to the value as a (possibly negative) byte offset1862// and (if applicable) a bit offset, and store the values in the targets.1863int64_t OffsetByte;1864uint64_t OffsetBit;1865if (TotalPaddingBefore <= TotalPaddingAfter)1866setBeforeReturnValues(TargetsForSlot, AllocBefore, BitWidth, OffsetByte,1867OffsetBit);1868else1869setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,1870OffsetBit);18711872if (RemarksEnabled || AreStatisticsEnabled())1873for (auto &&Target : TargetsForSlot)1874Target.WasDevirt = true;187518761877if (CSByConstantArg.second.isExported()) {1878ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp;1879exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte,1880ResByArg->Byte);1881exportConstant(Slot, CSByConstantArg.first, "bit", 1ULL << OffsetBit,1882ResByArg->Bit);1883}18841885// Rewrite each call to a load from OffsetByte/OffsetBit.1886Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);1887Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);1888applyVirtualConstProp(CSByConstantArg.second,1889TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);1890}1891return true;1892}18931894void DevirtModule::rebuildGlobal(VTableBits &B) {1895if (B.Before.Bytes.empty() && B.After.Bytes.empty())1896return;18971898// Align the before byte array to the global's minimum alignment so that we1899// don't break any alignment requirements on the global.1900Align Alignment = M.getDataLayout().getValueOrABITypeAlignment(1901B.GV->getAlign(), B.GV->getValueType());1902B.Before.Bytes.resize(alignTo(B.Before.Bytes.size(), Alignment));19031904// Before was stored in reverse order; flip it now.1905for (size_t I = 0, Size = B.Before.Bytes.size(); I != Size / 2; ++I)1906std::swap(B.Before.Bytes[I], B.Before.Bytes[Size - 1 - I]);19071908// Build an anonymous global containing the before bytes, followed by the1909// original initializer, followed by the after bytes.1910auto NewInit = ConstantStruct::getAnon(1911{ConstantDataArray::get(M.getContext(), B.Before.Bytes),1912B.GV->getInitializer(),1913ConstantDataArray::get(M.getContext(), B.After.Bytes)});1914auto NewGV =1915new GlobalVariable(M, NewInit->getType(), B.GV->isConstant(),1916GlobalVariable::PrivateLinkage, NewInit, "", B.GV);1917NewGV->setSection(B.GV->getSection());1918NewGV->setComdat(B.GV->getComdat());1919NewGV->setAlignment(B.GV->getAlign());19201921// Copy the original vtable's metadata to the anonymous global, adjusting1922// offsets as required.1923NewGV->copyMetadata(B.GV, B.Before.Bytes.size());19241925// Build an alias named after the original global, pointing at the second1926// element (the original initializer).1927auto Alias = GlobalAlias::create(1928B.GV->getInitializer()->getType(), 0, B.GV->getLinkage(), "",1929ConstantExpr::getInBoundsGetElementPtr(1930NewInit->getType(), NewGV,1931ArrayRef<Constant *>{ConstantInt::get(Int32Ty, 0),1932ConstantInt::get(Int32Ty, 1)}),1933&M);1934Alias->setVisibility(B.GV->getVisibility());1935Alias->takeName(B.GV);19361937B.GV->replaceAllUsesWith(Alias);1938B.GV->eraseFromParent();1939}19401941bool DevirtModule::areRemarksEnabled() {1942const auto &FL = M.getFunctionList();1943for (const Function &Fn : FL) {1944if (Fn.empty())1945continue;1946auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &Fn.front());1947return DI.isEnabled();1948}1949return false;1950}19511952void DevirtModule::scanTypeTestUsers(1953Function *TypeTestFunc,1954DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {1955// Find all virtual calls via a virtual table pointer %p under an assumption1956// of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p1957// points to a member of the type identifier %md. Group calls by (type ID,1958// offset) pair (effectively the identity of the virtual function) and store1959// to CallSlots.1960for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {1961auto *CI = dyn_cast<CallInst>(U.getUser());1962if (!CI)1963continue;19641965// Search for virtual calls based on %p and add them to DevirtCalls.1966SmallVector<DevirtCallSite, 1> DevirtCalls;1967SmallVector<CallInst *, 1> Assumes;1968auto &DT = LookupDomTree(*CI->getFunction());1969findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);19701971Metadata *TypeId =1972cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();1973// If we found any, add them to CallSlots.1974if (!Assumes.empty()) {1975Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();1976for (DevirtCallSite Call : DevirtCalls)1977CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);1978}19791980auto RemoveTypeTestAssumes = [&]() {1981// We no longer need the assumes or the type test.1982for (auto *Assume : Assumes)1983Assume->eraseFromParent();1984// We can't use RecursivelyDeleteTriviallyDeadInstructions here because we1985// may use the vtable argument later.1986if (CI->use_empty())1987CI->eraseFromParent();1988};19891990// At this point we could remove all type test assume sequences, as they1991// were originally inserted for WPD. However, we can keep these in the1992// code stream for later analysis (e.g. to help drive more efficient ICP1993// sequences). They will eventually be removed by a second LowerTypeTests1994// invocation that cleans them up. In order to do this correctly, the first1995// LowerTypeTests invocation needs to know that they have "Unknown" type1996// test resolution, so that they aren't treated as Unsat and lowered to1997// False, which will break any uses on assumes. Below we remove any type1998// test assumes that will not be treated as Unknown by LTT.19992000// The type test assumes will be treated by LTT as Unsat if the type id is2001// not used on a global (in which case it has no entry in the TypeIdMap).2002if (!TypeIdMap.count(TypeId))2003RemoveTypeTestAssumes();20042005// For ThinLTO importing, we need to remove the type test assumes if this is2006// an MDString type id without a corresponding TypeIdSummary. Any2007// non-MDString type ids are ignored and treated as Unknown by LTT, so their2008// type test assumes can be kept. If the MDString type id is missing a2009// TypeIdSummary (e.g. because there was no use on a vcall, preventing the2010// exporting phase of WPD from analyzing it), then it would be treated as2011// Unsat by LTT and we need to remove its type test assumes here. If not2012// used on a vcall we don't need them for later optimization use in any2013// case.2014else if (ImportSummary && isa<MDString>(TypeId)) {2015const TypeIdSummary *TidSummary =2016ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());2017if (!TidSummary)2018RemoveTypeTestAssumes();2019else2020// If one was created it should not be Unsat, because if we reached here2021// the type id was used on a global.2022assert(TidSummary->TTRes.TheKind != TypeTestResolution::Unsat);2023}2024}2025}20262027void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {2028Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);20292030for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {2031auto *CI = dyn_cast<CallInst>(U.getUser());2032if (!CI)2033continue;20342035Value *Ptr = CI->getArgOperand(0);2036Value *Offset = CI->getArgOperand(1);2037Value *TypeIdValue = CI->getArgOperand(2);2038Metadata *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();20392040SmallVector<DevirtCallSite, 1> DevirtCalls;2041SmallVector<Instruction *, 1> LoadedPtrs;2042SmallVector<Instruction *, 1> Preds;2043bool HasNonCallUses = false;2044auto &DT = LookupDomTree(*CI->getFunction());2045findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,2046HasNonCallUses, CI, DT);20472048// Start by generating "pessimistic" code that explicitly loads the function2049// pointer from the vtable and performs the type check. If possible, we will2050// eliminate the load and the type check later.20512052// If possible, only generate the load at the point where it is used.2053// This helps avoid unnecessary spills.2054IRBuilder<> LoadB(2055(LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);20562057Value *LoadedValue = nullptr;2058if (TypeCheckedLoadFunc->getIntrinsicID() ==2059Intrinsic::type_checked_load_relative) {2060Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);2061LoadedValue = LoadB.CreateLoad(Int32Ty, GEP);2062LoadedValue = LoadB.CreateSExt(LoadedValue, IntPtrTy);2063GEP = LoadB.CreatePtrToInt(GEP, IntPtrTy);2064LoadedValue = LoadB.CreateAdd(GEP, LoadedValue);2065LoadedValue = LoadB.CreateIntToPtr(LoadedValue, Int8PtrTy);2066} else {2067Value *GEP = LoadB.CreatePtrAdd(Ptr, Offset);2068LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEP);2069}20702071for (Instruction *LoadedPtr : LoadedPtrs) {2072LoadedPtr->replaceAllUsesWith(LoadedValue);2073LoadedPtr->eraseFromParent();2074}20752076// Likewise for the type test.2077IRBuilder<> CallB((Preds.size() == 1 && !HasNonCallUses) ? Preds[0] : CI);2078CallInst *TypeTestCall = CallB.CreateCall(TypeTestFunc, {Ptr, TypeIdValue});20792080for (Instruction *Pred : Preds) {2081Pred->replaceAllUsesWith(TypeTestCall);2082Pred->eraseFromParent();2083}20842085// We have already erased any extractvalue instructions that refer to the2086// intrinsic call, but the intrinsic may have other non-extractvalue uses2087// (although this is unlikely). In that case, explicitly build a pair and2088// RAUW it.2089if (!CI->use_empty()) {2090Value *Pair = PoisonValue::get(CI->getType());2091IRBuilder<> B(CI);2092Pair = B.CreateInsertValue(Pair, LoadedValue, {0});2093Pair = B.CreateInsertValue(Pair, TypeTestCall, {1});2094CI->replaceAllUsesWith(Pair);2095}20962097// The number of unsafe uses is initially the number of uses.2098auto &NumUnsafeUses = NumUnsafeUsesForTypeTest[TypeTestCall];2099NumUnsafeUses = DevirtCalls.size();21002101// If the function pointer has a non-call user, we cannot eliminate the type2102// check, as one of those users may eventually call the pointer. Increment2103// the unsafe use count to make sure it cannot reach zero.2104if (HasNonCallUses)2105++NumUnsafeUses;2106for (DevirtCallSite Call : DevirtCalls) {2107CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB,2108&NumUnsafeUses);2109}21102111CI->eraseFromParent();2112}2113}21142115void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {2116auto *TypeId = dyn_cast<MDString>(Slot.TypeID);2117if (!TypeId)2118return;2119const TypeIdSummary *TidSummary =2120ImportSummary->getTypeIdSummary(TypeId->getString());2121if (!TidSummary)2122return;2123auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);2124if (ResI == TidSummary->WPDRes.end())2125return;2126const WholeProgramDevirtResolution &Res = ResI->second;21272128if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {2129assert(!Res.SingleImplName.empty());2130// The type of the function in the declaration is irrelevant because every2131// call site will cast it to the correct type.2132Constant *SingleImpl =2133cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,2134Type::getVoidTy(M.getContext()))2135.getCallee());21362137// This is the import phase so we should not be exporting anything.2138bool IsExported = false;2139applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);2140assert(!IsExported);2141}21422143for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {2144auto I = Res.ResByArg.find(CSByConstantArg.first);2145if (I == Res.ResByArg.end())2146continue;2147auto &ResByArg = I->second;2148// FIXME: We should figure out what to do about the "function name" argument2149// to the apply* functions, as the function names are unavailable during the2150// importing phase. For now we just pass the empty string. This does not2151// impact correctness because the function names are just used for remarks.2152switch (ResByArg.TheKind) {2153case WholeProgramDevirtResolution::ByArg::UniformRetVal:2154applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);2155break;2156case WholeProgramDevirtResolution::ByArg::UniqueRetVal: {2157Constant *UniqueMemberAddr =2158importGlobal(Slot, CSByConstantArg.first, "unique_member");2159applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,2160UniqueMemberAddr);2161break;2162}2163case WholeProgramDevirtResolution::ByArg::VirtualConstProp: {2164Constant *Byte = importConstant(Slot, CSByConstantArg.first, "byte",2165Int32Ty, ResByArg.Byte);2166Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty,2167ResByArg.Bit);2168applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);2169break;2170}2171default:2172break;2173}2174}21752176if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) {2177// The type of the function is irrelevant, because it's bitcast at calls2178// anyhow.2179Constant *JT = cast<Constant>(2180M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),2181Type::getVoidTy(M.getContext()))2182.getCallee());2183bool IsExported = false;2184applyICallBranchFunnel(SlotInfo, JT, IsExported);2185assert(!IsExported);2186}2187}21882189void DevirtModule::removeRedundantTypeTests() {2190auto True = ConstantInt::getTrue(M.getContext());2191for (auto &&U : NumUnsafeUsesForTypeTest) {2192if (U.second == 0) {2193U.first->replaceAllUsesWith(True);2194U.first->eraseFromParent();2195}2196}2197}21982199ValueInfo2200DevirtModule::lookUpFunctionValueInfo(Function *TheFn,2201ModuleSummaryIndex *ExportSummary) {2202assert((ExportSummary != nullptr) &&2203"Caller guarantees ExportSummary is not nullptr");22042205const auto TheFnGUID = TheFn->getGUID();2206const auto TheFnGUIDWithExportedName = GlobalValue::getGUID(TheFn->getName());2207// Look up ValueInfo with the GUID in the current linkage.2208ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFnGUID);2209// If no entry is found and GUID is different from GUID computed using2210// exported name, look up ValueInfo with the exported name unconditionally.2211// This is a fallback.2212//2213// The reason to have a fallback:2214// 1. LTO could enable global value internalization via2215// `enable-lto-internalization`.2216// 2. The GUID in ExportedSummary is computed using exported name.2217if ((!TheFnVI) && (TheFnGUID != TheFnGUIDWithExportedName)) {2218TheFnVI = ExportSummary->getValueInfo(TheFnGUIDWithExportedName);2219}2220return TheFnVI;2221}22222223bool DevirtModule::mustBeUnreachableFunction(2224Function *const F, ModuleSummaryIndex *ExportSummary) {2225// First, learn unreachability by analyzing function IR.2226if (!F->isDeclaration()) {2227// A function must be unreachable if its entry block ends with an2228// 'unreachable'.2229return isa<UnreachableInst>(F->getEntryBlock().getTerminator());2230}2231// Learn unreachability from ExportSummary if ExportSummary is present.2232return ExportSummary &&2233::mustBeUnreachableFunction(2234DevirtModule::lookUpFunctionValueInfo(F, ExportSummary));2235}22362237bool DevirtModule::run() {2238// If only some of the modules were split, we cannot correctly perform2239// this transformation. We already checked for the presense of type tests2240// with partially split modules during the thin link, and would have emitted2241// an error if any were found, so here we can simply return.2242if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||2243(ImportSummary && ImportSummary->partiallySplitLTOUnits()))2244return false;22452246Function *TypeTestFunc =2247M.getFunction(Intrinsic::getName(Intrinsic::type_test));2248Function *TypeCheckedLoadFunc =2249M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));2250Function *TypeCheckedLoadRelativeFunc =2251M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load_relative));2252Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));22532254// Normally if there are no users of the devirtualization intrinsics in the2255// module, this pass has nothing to do. But if we are exporting, we also need2256// to handle any users that appear only in the function summaries.2257if (!ExportSummary &&2258(!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||2259AssumeFunc->use_empty()) &&2260(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&2261(!TypeCheckedLoadRelativeFunc ||2262TypeCheckedLoadRelativeFunc->use_empty()))2263return false;22642265// Rebuild type metadata into a map for easy lookup.2266std::vector<VTableBits> Bits;2267DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;2268buildTypeIdentifierMap(Bits, TypeIdMap);22692270if (TypeTestFunc && AssumeFunc)2271scanTypeTestUsers(TypeTestFunc, TypeIdMap);22722273if (TypeCheckedLoadFunc)2274scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);22752276if (TypeCheckedLoadRelativeFunc)2277scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);22782279if (ImportSummary) {2280for (auto &S : CallSlots)2281importResolution(S.first, S.second);22822283removeRedundantTypeTests();22842285// We have lowered or deleted the type intrinsics, so we will no longer have2286// enough information to reason about the liveness of virtual function2287// pointers in GlobalDCE.2288for (GlobalVariable &GV : M.globals())2289GV.eraseMetadata(LLVMContext::MD_vcall_visibility);22902291// The rest of the code is only necessary when exporting or during regular2292// LTO, so we are done.2293return true;2294}22952296if (TypeIdMap.empty())2297return true;22982299// Collect information from summary about which calls to try to devirtualize.2300if (ExportSummary) {2301DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;2302for (auto &P : TypeIdMap) {2303if (auto *TypeId = dyn_cast<MDString>(P.first))2304MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(2305TypeId);2306}23072308for (auto &P : *ExportSummary) {2309for (auto &S : P.second.SummaryList) {2310auto *FS = dyn_cast<FunctionSummary>(S.get());2311if (!FS)2312continue;2313// FIXME: Only add live functions.2314for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {2315for (Metadata *MD : MetadataByGUID[VF.GUID]) {2316CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);2317}2318}2319for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {2320for (Metadata *MD : MetadataByGUID[VF.GUID]) {2321CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);2322}2323}2324for (const FunctionSummary::ConstVCall &VC :2325FS->type_test_assume_const_vcalls()) {2326for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {2327CallSlots[{MD, VC.VFunc.Offset}]2328.ConstCSInfo[VC.Args]2329.addSummaryTypeTestAssumeUser(FS);2330}2331}2332for (const FunctionSummary::ConstVCall &VC :2333FS->type_checked_load_const_vcalls()) {2334for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {2335CallSlots[{MD, VC.VFunc.Offset}]2336.ConstCSInfo[VC.Args]2337.addSummaryTypeCheckedLoadUser(FS);2338}2339}2340}2341}2342}23432344// For each (type, offset) pair:2345bool DidVirtualConstProp = false;2346std::map<std::string, GlobalValue *> DevirtTargets;2347for (auto &S : CallSlots) {2348// Search each of the members of the type identifier for the virtual2349// function implementation at offset S.first.ByteOffset, and add to2350// TargetsForSlot.2351std::vector<VirtualCallTarget> TargetsForSlot;2352WholeProgramDevirtResolution *Res = nullptr;2353const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];2354if (ExportSummary && isa<MDString>(S.first.TypeID) &&2355TypeMemberInfos.size())2356// For any type id used on a global's type metadata, create the type id2357// summary resolution regardless of whether we can devirtualize, so that2358// lower type tests knows the type id is not Unsat. If it was not used on2359// a global's type metadata, the TypeIdMap entry set will be empty, and2360// we don't want to create an entry (with the default Unknown type2361// resolution), which can prevent detection of the Unsat.2362Res = &ExportSummary2363->getOrInsertTypeIdSummary(2364cast<MDString>(S.first.TypeID)->getString())2365.WPDRes[S.first.ByteOffset];2366if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,2367S.first.ByteOffset, ExportSummary)) {23682369if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {2370DidVirtualConstProp |=2371tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first);23722373tryICallBranchFunnel(TargetsForSlot, S.second, Res, S.first);2374}23752376// Collect functions devirtualized at least for one call site for stats.2377if (RemarksEnabled || AreStatisticsEnabled())2378for (const auto &T : TargetsForSlot)2379if (T.WasDevirt)2380DevirtTargets[std::string(T.Fn->getName())] = T.Fn;2381}23822383// CFI-specific: if we are exporting and any llvm.type.checked.load2384// intrinsics were *not* devirtualized, we need to add the resulting2385// llvm.type.test intrinsics to the function summaries so that the2386// LowerTypeTests pass will export them.2387if (ExportSummary && isa<MDString>(S.first.TypeID)) {2388auto GUID =2389GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());2390for (auto *FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)2391FS->addTypeTest(GUID);2392for (auto &CCS : S.second.ConstCSInfo)2393for (auto *FS : CCS.second.SummaryTypeCheckedLoadUsers)2394FS->addTypeTest(GUID);2395}2396}23972398if (RemarksEnabled) {2399// Generate remarks for each devirtualized function.2400for (const auto &DT : DevirtTargets) {2401GlobalValue *GV = DT.second;2402auto F = dyn_cast<Function>(GV);2403if (!F) {2404auto A = dyn_cast<GlobalAlias>(GV);2405assert(A && isa<Function>(A->getAliasee()));2406F = dyn_cast<Function>(A->getAliasee());2407assert(F);2408}24092410using namespace ore;2411OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)2412<< "devirtualized "2413<< NV("FunctionName", DT.first));2414}2415}24162417NumDevirtTargets += DevirtTargets.size();24182419removeRedundantTypeTests();24202421// Rebuild each global we touched as part of virtual constant propagation to2422// include the before and after bytes.2423if (DidVirtualConstProp)2424for (VTableBits &B : Bits)2425rebuildGlobal(B);24262427// We have lowered or deleted the type intrinsics, so we will no longer have2428// enough information to reason about the liveness of virtual function2429// pointers in GlobalDCE.2430for (GlobalVariable &GV : M.globals())2431GV.eraseMetadata(LLVMContext::MD_vcall_visibility);24322433for (auto *CI : CallsWithPtrAuthBundleRemoved)2434CI->eraseFromParent();24352436return true;2437}24382439void DevirtIndex::run() {2440if (ExportSummary.typeIdCompatibleVtableMap().empty())2441return;24422443DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;2444for (const auto &P : ExportSummary.typeIdCompatibleVtableMap()) {2445NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);2446// Create the type id summary resolution regardlness of whether we can2447// devirtualize, so that lower type tests knows the type id is used on2448// a global and not Unsat. We do this here rather than in the loop over the2449// CallSlots, since that handling will only see type tests that directly2450// feed assumes, and we would miss any that aren't currently handled by WPD2451// (such as type tests that feed assumes via phis).2452ExportSummary.getOrInsertTypeIdSummary(P.first);2453}24542455// Collect information from summary about which calls to try to devirtualize.2456for (auto &P : ExportSummary) {2457for (auto &S : P.second.SummaryList) {2458auto *FS = dyn_cast<FunctionSummary>(S.get());2459if (!FS)2460continue;2461// FIXME: Only add live functions.2462for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {2463for (StringRef Name : NameByGUID[VF.GUID]) {2464CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);2465}2466}2467for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {2468for (StringRef Name : NameByGUID[VF.GUID]) {2469CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);2470}2471}2472for (const FunctionSummary::ConstVCall &VC :2473FS->type_test_assume_const_vcalls()) {2474for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {2475CallSlots[{Name, VC.VFunc.Offset}]2476.ConstCSInfo[VC.Args]2477.addSummaryTypeTestAssumeUser(FS);2478}2479}2480for (const FunctionSummary::ConstVCall &VC :2481FS->type_checked_load_const_vcalls()) {2482for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {2483CallSlots[{Name, VC.VFunc.Offset}]2484.ConstCSInfo[VC.Args]2485.addSummaryTypeCheckedLoadUser(FS);2486}2487}2488}2489}24902491std::set<ValueInfo> DevirtTargets;2492// For each (type, offset) pair:2493for (auto &S : CallSlots) {2494// Search each of the members of the type identifier for the virtual2495// function implementation at offset S.first.ByteOffset, and add to2496// TargetsForSlot.2497std::vector<ValueInfo> TargetsForSlot;2498auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);2499assert(TidSummary);2500// The type id summary would have been created while building the NameByGUID2501// map earlier.2502WholeProgramDevirtResolution *Res =2503&ExportSummary.getTypeIdSummary(S.first.TypeID)2504->WPDRes[S.first.ByteOffset];2505if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,2506S.first.ByteOffset)) {25072508if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,2509DevirtTargets))2510continue;2511}2512}25132514// Optionally have the thin link print message for each devirtualized2515// function.2516if (PrintSummaryDevirt)2517for (const auto &DT : DevirtTargets)2518errs() << "Devirtualized call to " << DT << "\n";25192520NumDevirtTargets += DevirtTargets.size();2521}252225232524