Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
35266 views
//===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file is a part of SanitizerBinaryMetadata.9//10//===----------------------------------------------------------------------===//1112#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"13#include "llvm/ADT/SetVector.h"14#include "llvm/ADT/SmallVector.h"15#include "llvm/ADT/Statistic.h"16#include "llvm/ADT/StringExtras.h"17#include "llvm/ADT/StringRef.h"18#include "llvm/ADT/Twine.h"19#include "llvm/Analysis/CaptureTracking.h"20#include "llvm/Analysis/ValueTracking.h"21#include "llvm/IR/Constant.h"22#include "llvm/IR/DerivedTypes.h"23#include "llvm/IR/Function.h"24#include "llvm/IR/GlobalValue.h"25#include "llvm/IR/GlobalVariable.h"26#include "llvm/IR/IRBuilder.h"27#include "llvm/IR/Instruction.h"28#include "llvm/IR/Instructions.h"29#include "llvm/IR/LLVMContext.h"30#include "llvm/IR/MDBuilder.h"31#include "llvm/IR/Metadata.h"32#include "llvm/IR/Module.h"33#include "llvm/IR/Type.h"34#include "llvm/IR/Value.h"35#include "llvm/ProfileData/InstrProf.h"36#include "llvm/Support/Allocator.h"37#include "llvm/Support/CommandLine.h"38#include "llvm/Support/Debug.h"39#include "llvm/Support/SpecialCaseList.h"40#include "llvm/Support/StringSaver.h"41#include "llvm/Support/VirtualFileSystem.h"42#include "llvm/TargetParser/Triple.h"43#include "llvm/Transforms/Utils/ModuleUtils.h"4445#include <array>46#include <cstdint>47#include <memory>4849using namespace llvm;5051#define DEBUG_TYPE "sanmd"5253namespace {5455//===--- Constants --------------------------------------------------------===//5657constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits58constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized59constexpr int kCtorDtorPriority = 2;6061// Pairs of names of initialization callback functions and which section62// contains the relevant metadata.63class MetadataInfo {64public:65const StringRef FunctionPrefix;66const StringRef SectionSuffix;6768static const MetadataInfo Covered;69static const MetadataInfo Atomics;7071private:72// Forbid construction elsewhere.73explicit constexpr MetadataInfo(StringRef FunctionPrefix,74StringRef SectionSuffix)75: FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}76};77const MetadataInfo MetadataInfo::Covered{78"__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};79const MetadataInfo MetadataInfo::Atomics{80"__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};8182// The only instances of MetadataInfo are the constants above, so a set of83// them may simply store pointers to them. To deterministically generate code,84// we need to use a set with stable iteration order, such as SetVector.85using MetadataInfoSet = SetVector<const MetadataInfo *>;8687//===--- Command-line options ---------------------------------------------===//8889cl::opt<bool> ClWeakCallbacks(90"sanitizer-metadata-weak-callbacks",91cl::desc("Declare callbacks extern weak, and only call if non-null."),92cl::Hidden, cl::init(true));93cl::opt<bool>94ClNoSanitize("sanitizer-metadata-nosanitize-attr",95cl::desc("Mark some metadata features uncovered in functions "96"with associated no_sanitize attributes."),97cl::Hidden, cl::init(true));9899cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",100cl::desc("Emit PCs for covered functions."),101cl::Hidden, cl::init(false));102cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",103cl::desc("Emit PCs for atomic operations."),104cl::Hidden, cl::init(false));105cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",106cl::desc("Emit PCs for start of functions that are "107"subject for use-after-return checking"),108cl::Hidden, cl::init(false));109110//===--- Statistics -------------------------------------------------------===//111112STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");113STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");114STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");115116//===----------------------------------------------------------------------===//117118// Apply opt overrides.119SanitizerBinaryMetadataOptions &&120transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {121Opts.Covered |= ClEmitCovered;122Opts.Atomics |= ClEmitAtomics;123Opts.UAR |= ClEmitUAR;124return std::move(Opts);125}126127class SanitizerBinaryMetadata {128public:129SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,130std::unique_ptr<SpecialCaseList> Ignorelist)131: Mod(M), Options(transformOptionsFromCl(std::move(Opts))),132Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),133VersionStr(utostr(getVersion())), IRB(M.getContext()) {134// FIXME: Make it work with other formats.135assert(TargetTriple.isOSBinFormatELF() && "ELF only");136assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) &&137"Device targets are not supported");138}139140bool run();141142private:143uint32_t getVersion() const {144uint32_t Version = kVersionBase;145const auto CM = Mod.getCodeModel();146if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))147Version |= kVersionPtrSizeRel;148return Version;149}150151void runOn(Function &F, MetadataInfoSet &MIS);152153// Determines which set of metadata to collect for this instruction.154//155// Returns true if covered metadata is required to unambiguously interpret156// other metadata. For example, if we are interested in atomics metadata, any157// function with memory operations (atomic or not) requires covered metadata158// to determine if a memory operation is atomic or not in modules compiled159// with SanitizerBinaryMetadata.160bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,161uint64_t &FeatureMask);162163// Get start/end section marker pointer.164GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);165166// Returns the target-dependent section name.167StringRef getSectionName(StringRef SectionSuffix);168169// Returns the section start marker name.170StringRef getSectionStart(StringRef SectionSuffix);171172// Returns the section end marker name.173StringRef getSectionEnd(StringRef SectionSuffix);174175// Returns true if the access to the address should be considered "atomic".176bool pretendAtomicAccess(const Value *Addr);177178Module &Mod;179const SanitizerBinaryMetadataOptions Options;180std::unique_ptr<SpecialCaseList> Ignorelist;181const Triple TargetTriple;182const std::string VersionStr;183IRBuilder<> IRB;184BumpPtrAllocator Alloc;185UniqueStringSaver StringPool{Alloc};186};187188bool SanitizerBinaryMetadata::run() {189MetadataInfoSet MIS;190191for (Function &F : Mod)192runOn(F, MIS);193194if (MIS.empty())195return false;196197//198// Setup constructors and call all initialization functions for requested199// metadata features.200//201202auto *PtrTy = IRB.getPtrTy();203auto *Int32Ty = IRB.getInt32Ty();204const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy};205auto *Version = ConstantInt::get(Int32Ty, getVersion());206207for (const MetadataInfo *MI : MIS) {208const std::array<Value *, InitTypes.size()> InitArgs = {209Version,210getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy),211getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy),212};213214// Calls to the initialization functions with different versions cannot be215// merged. Give the structors unique names based on the version, which will216// also be used as the COMDAT key.217const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str();218219// We declare the _add and _del functions as weak, and only call them if220// there is a valid symbol linked. This allows building binaries with221// semantic metadata, but without having callbacks. When a tool that wants222// the metadata is linked which provides the callbacks, they will be called.223Function *Ctor =224createSanitizerCtorAndInitFunctions(225Mod, StructorPrefix + ".module_ctor",226(MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,227/*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)228.first;229Function *Dtor =230createSanitizerCtorAndInitFunctions(231Mod, StructorPrefix + ".module_dtor",232(MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,233/*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)234.first;235Constant *CtorComdatKey = nullptr;236Constant *DtorComdatKey = nullptr;237if (TargetTriple.supportsCOMDAT()) {238// Use COMDAT to deduplicate constructor/destructor function. The COMDAT239// key needs to be a non-local linkage.240Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));241Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));242Ctor->setLinkage(GlobalValue::ExternalLinkage);243Dtor->setLinkage(GlobalValue::ExternalLinkage);244// DSOs should _not_ call another constructor/destructor!245Ctor->setVisibility(GlobalValue::HiddenVisibility);246Dtor->setVisibility(GlobalValue::HiddenVisibility);247CtorComdatKey = Ctor;248DtorComdatKey = Dtor;249}250appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey);251appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey);252}253254return true;255}256257void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {258if (F.empty())259return;260if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))261return;262if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName()))263return;264// Don't touch available_externally functions, their actual body is elsewhere.265if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)266return;267268MDBuilder MDB(F.getContext());269270// The metadata features enabled for this function, stored along covered271// metadata (if enabled).272uint64_t FeatureMask = 0;273// Don't emit unnecessary covered metadata for all functions to save space.274bool RequiresCovered = false;275276if (Options.Atomics || Options.UAR) {277for (BasicBlock &BB : F)278for (Instruction &I : BB)279RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);280}281282if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread"))283FeatureMask &= ~kSanitizerBinaryMetadataAtomics;284if (F.isVarArg())285FeatureMask &= ~kSanitizerBinaryMetadataUAR;286if (FeatureMask & kSanitizerBinaryMetadataUAR) {287RequiresCovered = true;288NumMetadataUAR++;289}290291// Covered metadata is always emitted if explicitly requested, otherwise only292// if some other metadata requires it to unambiguously interpret it for293// modules compiled with SanitizerBinaryMetadata.294if (Options.Covered || (FeatureMask && RequiresCovered)) {295NumMetadataCovered++;296const auto *MI = &MetadataInfo::Covered;297MIS.insert(MI);298const StringRef Section = getSectionName(MI->SectionSuffix);299// The feature mask will be placed after the function size.300Constant *CFM = IRB.getInt64(FeatureMask);301F.setMetadata(LLVMContext::MD_pcsections,302MDB.createPCSections({{Section, {CFM}}}));303}304}305306bool isUARSafeCall(CallInst *CI) {307auto *F = CI->getCalledFunction();308// There are no intrinsic functions that leak arguments.309// If the called function does not return, the current function310// does not return as well, so no possibility of use-after-return.311// Sanitizer function also don't leak or don't return.312// It's safe to both pass pointers to local variables to them313// and to tail-call them.314return F && (F->isIntrinsic() || F->doesNotReturn() ||315F->getName().starts_with("__asan_") ||316F->getName().starts_with("__hwsan_") ||317F->getName().starts_with("__ubsan_") ||318F->getName().starts_with("__msan_") ||319F->getName().starts_with("__tsan_"));320}321322bool hasUseAfterReturnUnsafeUses(Value &V) {323for (User *U : V.users()) {324if (auto *I = dyn_cast<Instruction>(U)) {325if (I->isLifetimeStartOrEnd() || I->isDroppable())326continue;327if (auto *CI = dyn_cast<CallInst>(U)) {328if (isUARSafeCall(CI))329continue;330}331if (isa<LoadInst>(U))332continue;333if (auto *SI = dyn_cast<StoreInst>(U)) {334// If storing TO the alloca, then the address isn't taken.335if (SI->getOperand(1) == &V)336continue;337}338if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {339if (!hasUseAfterReturnUnsafeUses(*GEPI))340continue;341} else if (auto *BCI = dyn_cast<BitCastInst>(U)) {342if (!hasUseAfterReturnUnsafeUses(*BCI))343continue;344}345}346return true;347}348return false;349}350351bool useAfterReturnUnsafe(Instruction &I) {352if (isa<AllocaInst>(I))353return hasUseAfterReturnUnsafeUses(I);354// Tail-called functions are not necessary intercepted355// at runtime because there is no call instruction.356// So conservatively mark the caller as requiring checking.357else if (auto *CI = dyn_cast<CallInst>(&I))358return CI->isTailCall() && !isUARSafeCall(CI);359return false;360}361362bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {363if (!Addr)364return false;365366Addr = Addr->stripInBoundsOffsets();367auto *GV = dyn_cast<GlobalVariable>(Addr);368if (!GV)369return false;370371// Some compiler-generated accesses are known racy, to avoid false positives372// in data-race analysis pretend they're atomic.373if (GV->hasSection()) {374const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();375const auto ProfSec =376getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false);377if (GV->getSection().ends_with(ProfSec))378return true;379}380if (GV->getName().starts_with("__llvm_gcov") ||381GV->getName().starts_with("__llvm_gcda"))382return true;383384return false;385}386387// Returns true if the memory at `Addr` may be shared with other threads.388bool maybeSharedMutable(const Value *Addr) {389// By default assume memory may be shared.390if (!Addr)391return true;392393if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&394!PointerMayBeCaptured(Addr, true, true))395return false; // Object is on stack but does not escape.396397Addr = Addr->stripInBoundsOffsets();398if (auto *GV = dyn_cast<GlobalVariable>(Addr)) {399if (GV->isConstant())400return false; // Shared, but not mutable.401}402403return true;404}405406bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,407MDBuilder &MDB, uint64_t &FeatureMask) {408SmallVector<const MetadataInfo *, 1> InstMetadata;409bool RequiresCovered = false;410411// Only call if at least 1 type of metadata is requested.412assert(Options.UAR || Options.Atomics);413414if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {415if (useAfterReturnUnsafe(I))416FeatureMask |= kSanitizerBinaryMetadataUAR;417}418419if (Options.Atomics) {420const Value *Addr = nullptr;421if (auto *SI = dyn_cast<StoreInst>(&I))422Addr = SI->getPointerOperand();423else if (auto *LI = dyn_cast<LoadInst>(&I))424Addr = LI->getPointerOperand();425426if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {427auto SSID = getAtomicSyncScopeID(&I);428if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||429pretendAtomicAccess(Addr)) {430NumMetadataAtomics++;431InstMetadata.push_back(&MetadataInfo::Atomics);432}433FeatureMask |= kSanitizerBinaryMetadataAtomics;434RequiresCovered = true;435}436}437438// Attach MD_pcsections to instruction.439if (!InstMetadata.empty()) {440MIS.insert(InstMetadata.begin(), InstMetadata.end());441SmallVector<MDBuilder::PCSection, 1> Sections;442for (const auto &MI : InstMetadata)443Sections.push_back({getSectionName(MI->SectionSuffix), {}});444I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));445}446447return RequiresCovered;448}449450GlobalVariable *451SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {452// Use ExternalWeak so that if all sections are discarded due to section453// garbage collection, the linker will not report undefined symbol errors.454auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,455GlobalVariable::ExternalWeakLinkage,456/*Initializer=*/nullptr, MarkerName);457Marker->setVisibility(GlobalValue::HiddenVisibility);458return Marker;459}460461StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {462// FIXME: Other TargetTriples.463// Request ULEB128 encoding for all integer constants.464return StringPool.save(SectionSuffix + VersionStr + "!C");465}466467StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {468// Twine only concatenates 2 strings; with >2 strings, concatenating them469// creates Twine temporaries, and returning the final Twine no longer works470// because we'd end up with a stack-use-after-return. So here we also use the471// StringPool to store the new string.472return StringPool.save("__start_" + SectionSuffix + VersionStr);473}474475StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {476return StringPool.save("__stop_" + SectionSuffix + VersionStr);477}478479} // namespace480481SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(482SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)483: Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}484485PreservedAnalyses486SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {487std::unique_ptr<SpecialCaseList> Ignorelist;488if (!IgnorelistFiles.empty()) {489Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,490*vfs::getRealFileSystem());491if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))492return PreservedAnalyses::all();493}494495SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));496if (Pass.run())497return PreservedAnalyses::none();498return PreservedAnalyses::all();499}500501502