Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
35266 views
//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file9/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow10/// analysis.11///12/// Unlike other Sanitizer tools, this tool is not designed to detect a specific13/// class of bugs on its own. Instead, it provides a generic dynamic data flow14/// analysis framework to be used by clients to help detect application-specific15/// issues within their own code.16///17/// The analysis is based on automatic propagation of data flow labels (also18/// known as taint labels) through a program as it performs computation.19///20/// Argument and return value labels are passed through TLS variables21/// __dfsan_arg_tls and __dfsan_retval_tls.22///23/// Each byte of application memory is backed by a shadow memory byte. The24/// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then25/// laid out as follows:26///27/// +--------------------+ 0x800000000000 (top of memory)28/// | application 3 |29/// +--------------------+ 0x70000000000030/// | invalid |31/// +--------------------+ 0x61000000000032/// | origin 1 |33/// +--------------------+ 0x60000000000034/// | application 2 |35/// +--------------------+ 0x51000000000036/// | shadow 1 |37/// +--------------------+ 0x50000000000038/// | invalid |39/// +--------------------+ 0x40000000000040/// | origin 3 |41/// +--------------------+ 0x30000000000042/// | shadow 3 |43/// +--------------------+ 0x20000000000044/// | origin 2 |45/// +--------------------+ 0x11000000000046/// | invalid |47/// +--------------------+ 0x10000000000048/// | shadow 2 |49/// +--------------------+ 0x01000000000050/// | application 1 |51/// +--------------------+ 0x00000000000052///53/// MEM_TO_SHADOW(mem) = mem ^ 0x50000000000054/// SHADOW_TO_ORIGIN(shadow) = shadow + 0x10000000000055///56/// For more information, please refer to the design document:57/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html58//59//===----------------------------------------------------------------------===//6061#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"62#include "llvm/ADT/DenseMap.h"63#include "llvm/ADT/DenseSet.h"64#include "llvm/ADT/DepthFirstIterator.h"65#include "llvm/ADT/SmallPtrSet.h"66#include "llvm/ADT/SmallVector.h"67#include "llvm/ADT/StringRef.h"68#include "llvm/ADT/StringSet.h"69#include "llvm/ADT/iterator.h"70#include "llvm/Analysis/DomTreeUpdater.h"71#include "llvm/Analysis/GlobalsModRef.h"72#include "llvm/Analysis/TargetLibraryInfo.h"73#include "llvm/Analysis/ValueTracking.h"74#include "llvm/IR/Argument.h"75#include "llvm/IR/AttributeMask.h"76#include "llvm/IR/Attributes.h"77#include "llvm/IR/BasicBlock.h"78#include "llvm/IR/Constant.h"79#include "llvm/IR/Constants.h"80#include "llvm/IR/DataLayout.h"81#include "llvm/IR/DerivedTypes.h"82#include "llvm/IR/Dominators.h"83#include "llvm/IR/Function.h"84#include "llvm/IR/GlobalAlias.h"85#include "llvm/IR/GlobalValue.h"86#include "llvm/IR/GlobalVariable.h"87#include "llvm/IR/IRBuilder.h"88#include "llvm/IR/InstVisitor.h"89#include "llvm/IR/InstrTypes.h"90#include "llvm/IR/Instruction.h"91#include "llvm/IR/Instructions.h"92#include "llvm/IR/IntrinsicInst.h"93#include "llvm/IR/MDBuilder.h"94#include "llvm/IR/Module.h"95#include "llvm/IR/PassManager.h"96#include "llvm/IR/Type.h"97#include "llvm/IR/User.h"98#include "llvm/IR/Value.h"99#include "llvm/Support/Alignment.h"100#include "llvm/Support/Casting.h"101#include "llvm/Support/CommandLine.h"102#include "llvm/Support/ErrorHandling.h"103#include "llvm/Support/SpecialCaseList.h"104#include "llvm/Support/VirtualFileSystem.h"105#include "llvm/TargetParser/Triple.h"106#include "llvm/Transforms/Instrumentation.h"107#include "llvm/Transforms/Utils/BasicBlockUtils.h"108#include "llvm/Transforms/Utils/Local.h"109#include <algorithm>110#include <cassert>111#include <cstddef>112#include <cstdint>113#include <memory>114#include <set>115#include <string>116#include <utility>117#include <vector>118119using namespace llvm;120121// This must be consistent with ShadowWidthBits.122static const Align ShadowTLSAlignment = Align(2);123124static const Align MinOriginAlignment = Align(4);125126// The size of TLS variables. These constants must be kept in sync with the ones127// in dfsan.cpp.128static const unsigned ArgTLSSize = 800;129static const unsigned RetvalTLSSize = 800;130131// The -dfsan-preserve-alignment flag controls whether this pass assumes that132// alignment requirements provided by the input IR are correct. For example,133// if the input IR contains a load with alignment 8, this flag will cause134// the shadow load to have alignment 16. This flag is disabled by default as135// we have unfortunately encountered too much code (including Clang itself;136// see PR14291) which performs misaligned access.137static cl::opt<bool> ClPreserveAlignment(138"dfsan-preserve-alignment",139cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,140cl::init(false));141142// The ABI list files control how shadow parameters are passed. The pass treats143// every function labelled "uninstrumented" in the ABI list file as conforming144// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains145// additional annotations for those functions, a call to one of those functions146// will produce a warning message, as the labelling behaviour of the function is147// unknown. The other supported annotations for uninstrumented functions are148// "functional" and "discard", which are described below under149// DataFlowSanitizer::WrapperKind.150// Functions will often be labelled with both "uninstrumented" and one of151// "functional" or "discard". This will leave the function unchanged by this152// pass, and create a wrapper function that will call the original.153//154// Instrumented functions can also be annotated as "force_zero_labels", which155// will make all shadow and return values set zero labels.156// Functions should never be labelled with both "force_zero_labels" and157// "uninstrumented" or any of the unistrumented wrapper kinds.158static cl::list<std::string> ClABIListFiles(159"dfsan-abilist",160cl::desc("File listing native ABI functions and how the pass treats them"),161cl::Hidden);162163// Controls whether the pass includes or ignores the labels of pointers in load164// instructions.165static cl::opt<bool> ClCombinePointerLabelsOnLoad(166"dfsan-combine-pointer-labels-on-load",167cl::desc("Combine the label of the pointer with the label of the data when "168"loading from memory."),169cl::Hidden, cl::init(true));170171// Controls whether the pass includes or ignores the labels of pointers in172// stores instructions.173static cl::opt<bool> ClCombinePointerLabelsOnStore(174"dfsan-combine-pointer-labels-on-store",175cl::desc("Combine the label of the pointer with the label of the data when "176"storing in memory."),177cl::Hidden, cl::init(false));178179// Controls whether the pass propagates labels of offsets in GEP instructions.180static cl::opt<bool> ClCombineOffsetLabelsOnGEP(181"dfsan-combine-offset-labels-on-gep",182cl::desc(183"Combine the label of the offset with the label of the pointer when "184"doing pointer arithmetic."),185cl::Hidden, cl::init(true));186187static cl::list<std::string> ClCombineTaintLookupTables(188"dfsan-combine-taint-lookup-table",189cl::desc(190"When dfsan-combine-offset-labels-on-gep and/or "191"dfsan-combine-pointer-labels-on-load are false, this flag can "192"be used to re-enable combining offset and/or pointer taint when "193"loading specific constant global variables (i.e. lookup tables)."),194cl::Hidden);195196static cl::opt<bool> ClDebugNonzeroLabels(197"dfsan-debug-nonzero-labels",198cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "199"load or return with a nonzero label"),200cl::Hidden);201202// Experimental feature that inserts callbacks for certain data events.203// Currently callbacks are only inserted for loads, stores, memory transfers204// (i.e. memcpy and memmove), and comparisons.205//206// If this flag is set to true, the user must provide definitions for the207// following callback functions:208// void __dfsan_load_callback(dfsan_label Label, void* addr);209// void __dfsan_store_callback(dfsan_label Label, void* addr);210// void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);211// void __dfsan_cmp_callback(dfsan_label CombinedLabel);212static cl::opt<bool> ClEventCallbacks(213"dfsan-event-callbacks",214cl::desc("Insert calls to __dfsan_*_callback functions on data events."),215cl::Hidden, cl::init(false));216217// Experimental feature that inserts callbacks for conditionals, including:218// conditional branch, switch, select.219// This must be true for dfsan_set_conditional_callback() to have effect.220static cl::opt<bool> ClConditionalCallbacks(221"dfsan-conditional-callbacks",222cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,223cl::init(false));224225// Experimental feature that inserts callbacks for data reaching a function,226// either via function arguments and loads.227// This must be true for dfsan_set_reaches_function_callback() to have effect.228static cl::opt<bool> ClReachesFunctionCallbacks(229"dfsan-reaches-function-callbacks",230cl::desc("Insert calls to callback functions on data reaching a function."),231cl::Hidden, cl::init(false));232233// Controls whether the pass tracks the control flow of select instructions.234static cl::opt<bool> ClTrackSelectControlFlow(235"dfsan-track-select-control-flow",236cl::desc("Propagate labels from condition values of select instructions "237"to results."),238cl::Hidden, cl::init(true));239240// TODO: This default value follows MSan. DFSan may use a different value.241static cl::opt<int> ClInstrumentWithCallThreshold(242"dfsan-instrument-with-call-threshold",243cl::desc("If the function being instrumented requires more than "244"this number of origin stores, use callbacks instead of "245"inline checks (-1 means never use callbacks)."),246cl::Hidden, cl::init(3500));247248// Controls how to track origins.249// * 0: do not track origins.250// * 1: track origins at memory store operations.251// * 2: track origins at memory load and store operations.252// TODO: track callsites.253static cl::opt<int> ClTrackOrigins("dfsan-track-origins",254cl::desc("Track origins of labels"),255cl::Hidden, cl::init(0));256257static cl::opt<bool> ClIgnorePersonalityRoutine(258"dfsan-ignore-personality-routine",259cl::desc("If a personality routine is marked uninstrumented from the ABI "260"list, do not create a wrapper for it."),261cl::Hidden, cl::init(false));262263static StringRef getGlobalTypeString(const GlobalValue &G) {264// Types of GlobalVariables are always pointer types.265Type *GType = G.getValueType();266// For now we support excluding struct types only.267if (StructType *SGType = dyn_cast<StructType>(GType)) {268if (!SGType->isLiteral())269return SGType->getName();270}271return "<unknown type>";272}273274namespace {275276// Memory map parameters used in application-to-shadow address calculation.277// Offset = (Addr & ~AndMask) ^ XorMask278// Shadow = ShadowBase + Offset279// Origin = (OriginBase + Offset) & ~3ULL280struct MemoryMapParams {281uint64_t AndMask;282uint64_t XorMask;283uint64_t ShadowBase;284uint64_t OriginBase;285};286287} // end anonymous namespace288289// NOLINTBEGIN(readability-identifier-naming)290// aarch64 Linux291const MemoryMapParams Linux_AArch64_MemoryMapParams = {2920, // AndMask (not used)2930x0B00000000000, // XorMask2940, // ShadowBase (not used)2950x0200000000000, // OriginBase296};297298// x86_64 Linux299const MemoryMapParams Linux_X86_64_MemoryMapParams = {3000, // AndMask (not used)3010x500000000000, // XorMask3020, // ShadowBase (not used)3030x100000000000, // OriginBase304};305// NOLINTEND(readability-identifier-naming)306307// loongarch64 Linux308const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {3090, // AndMask (not used)3100x500000000000, // XorMask3110, // ShadowBase (not used)3120x100000000000, // OriginBase313};314315namespace {316317class DFSanABIList {318std::unique_ptr<SpecialCaseList> SCL;319320public:321DFSanABIList() = default;322323void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }324325/// Returns whether either this function or its source file are listed in the326/// given category.327bool isIn(const Function &F, StringRef Category) const {328return isIn(*F.getParent(), Category) ||329SCL->inSection("dataflow", "fun", F.getName(), Category);330}331332/// Returns whether this global alias is listed in the given category.333///334/// If GA aliases a function, the alias's name is matched as a function name335/// would be. Similarly, aliases of globals are matched like globals.336bool isIn(const GlobalAlias &GA, StringRef Category) const {337if (isIn(*GA.getParent(), Category))338return true;339340if (isa<FunctionType>(GA.getValueType()))341return SCL->inSection("dataflow", "fun", GA.getName(), Category);342343return SCL->inSection("dataflow", "global", GA.getName(), Category) ||344SCL->inSection("dataflow", "type", getGlobalTypeString(GA),345Category);346}347348/// Returns whether this module is listed in the given category.349bool isIn(const Module &M, StringRef Category) const {350return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);351}352};353354/// TransformedFunction is used to express the result of transforming one355/// function type into another. This struct is immutable. It holds metadata356/// useful for updating calls of the old function to the new type.357struct TransformedFunction {358TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,359const std::vector<unsigned> &ArgumentIndexMapping)360: OriginalType(OriginalType), TransformedType(TransformedType),361ArgumentIndexMapping(ArgumentIndexMapping) {}362363// Disallow copies.364TransformedFunction(const TransformedFunction &) = delete;365TransformedFunction &operator=(const TransformedFunction &) = delete;366367// Allow moves.368TransformedFunction(TransformedFunction &&) = default;369TransformedFunction &operator=(TransformedFunction &&) = default;370371/// Type of the function before the transformation.372FunctionType *OriginalType;373374/// Type of the function after the transformation.375FunctionType *TransformedType;376377/// Transforming a function may change the position of arguments. This378/// member records the mapping from each argument's old position to its new379/// position. Argument positions are zero-indexed. If the transformation380/// from F to F' made the first argument of F into the third argument of F',381/// then ArgumentIndexMapping[0] will equal 2.382std::vector<unsigned> ArgumentIndexMapping;383};384385/// Given function attributes from a call site for the original function,386/// return function attributes appropriate for a call to the transformed387/// function.388AttributeList389transformFunctionAttributes(const TransformedFunction &TransformedFunction,390LLVMContext &Ctx, AttributeList CallSiteAttrs) {391392// Construct a vector of AttributeSet for each function argument.393std::vector<llvm::AttributeSet> ArgumentAttributes(394TransformedFunction.TransformedType->getNumParams());395396// Copy attributes from the parameter of the original function to the397// transformed version. 'ArgumentIndexMapping' holds the mapping from398// old argument position to new.399for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();400I < IE; ++I) {401unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];402ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);403}404405// Copy annotations on varargs arguments.406for (unsigned I = TransformedFunction.OriginalType->getNumParams(),407IE = CallSiteAttrs.getNumAttrSets();408I < IE; ++I) {409ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));410}411412return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),413CallSiteAttrs.getRetAttrs(),414llvm::ArrayRef(ArgumentAttributes));415}416417class DataFlowSanitizer {418friend struct DFSanFunction;419friend class DFSanVisitor;420421enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 };422423enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };424425/// How should calls to uninstrumented functions be handled?426enum WrapperKind {427/// This function is present in an uninstrumented form but we don't know428/// how it should be handled. Print a warning and call the function anyway.429/// Don't label the return value.430WK_Warning,431432/// This function does not write to (user-accessible) memory, and its return433/// value is unlabelled.434WK_Discard,435436/// This function does not write to (user-accessible) memory, and the label437/// of its return value is the union of the label of its arguments.438WK_Functional,439440/// Instead of calling the function, a custom wrapper __dfsw_F is called,441/// where F is the name of the function. This function may wrap the442/// original function or provide its own implementation. WK_Custom uses an443/// extra pointer argument to return the shadow. This allows the wrapped444/// form of the function type to be expressed in C.445WK_Custom446};447448Module *Mod;449LLVMContext *Ctx;450Type *Int8Ptr;451IntegerType *OriginTy;452PointerType *OriginPtrTy;453ConstantInt *ZeroOrigin;454/// The shadow type for all primitive types and vector types.455IntegerType *PrimitiveShadowTy;456PointerType *PrimitiveShadowPtrTy;457IntegerType *IntptrTy;458ConstantInt *ZeroPrimitiveShadow;459Constant *ArgTLS;460ArrayType *ArgOriginTLSTy;461Constant *ArgOriginTLS;462Constant *RetvalTLS;463Constant *RetvalOriginTLS;464FunctionType *DFSanUnionLoadFnTy;465FunctionType *DFSanLoadLabelAndOriginFnTy;466FunctionType *DFSanUnimplementedFnTy;467FunctionType *DFSanWrapperExternWeakNullFnTy;468FunctionType *DFSanSetLabelFnTy;469FunctionType *DFSanNonzeroLabelFnTy;470FunctionType *DFSanVarargWrapperFnTy;471FunctionType *DFSanConditionalCallbackFnTy;472FunctionType *DFSanConditionalCallbackOriginFnTy;473FunctionType *DFSanReachesFunctionCallbackFnTy;474FunctionType *DFSanReachesFunctionCallbackOriginFnTy;475FunctionType *DFSanCmpCallbackFnTy;476FunctionType *DFSanLoadStoreCallbackFnTy;477FunctionType *DFSanMemTransferCallbackFnTy;478FunctionType *DFSanChainOriginFnTy;479FunctionType *DFSanChainOriginIfTaintedFnTy;480FunctionType *DFSanMemOriginTransferFnTy;481FunctionType *DFSanMemShadowOriginTransferFnTy;482FunctionType *DFSanMemShadowOriginConditionalExchangeFnTy;483FunctionType *DFSanMaybeStoreOriginFnTy;484FunctionCallee DFSanUnionLoadFn;485FunctionCallee DFSanLoadLabelAndOriginFn;486FunctionCallee DFSanUnimplementedFn;487FunctionCallee DFSanWrapperExternWeakNullFn;488FunctionCallee DFSanSetLabelFn;489FunctionCallee DFSanNonzeroLabelFn;490FunctionCallee DFSanVarargWrapperFn;491FunctionCallee DFSanLoadCallbackFn;492FunctionCallee DFSanStoreCallbackFn;493FunctionCallee DFSanMemTransferCallbackFn;494FunctionCallee DFSanConditionalCallbackFn;495FunctionCallee DFSanConditionalCallbackOriginFn;496FunctionCallee DFSanReachesFunctionCallbackFn;497FunctionCallee DFSanReachesFunctionCallbackOriginFn;498FunctionCallee DFSanCmpCallbackFn;499FunctionCallee DFSanChainOriginFn;500FunctionCallee DFSanChainOriginIfTaintedFn;501FunctionCallee DFSanMemOriginTransferFn;502FunctionCallee DFSanMemShadowOriginTransferFn;503FunctionCallee DFSanMemShadowOriginConditionalExchangeFn;504FunctionCallee DFSanMaybeStoreOriginFn;505SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;506MDNode *ColdCallWeights;507MDNode *OriginStoreWeights;508DFSanABIList ABIList;509DenseMap<Value *, Function *> UnwrappedFnMap;510AttributeMask ReadOnlyNoneAttrs;511StringSet<> CombineTaintLookupTableNames;512513/// Memory map parameters used in calculation mapping application addresses514/// to shadow addresses and origin addresses.515const MemoryMapParams *MapParams;516517Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);518Value *getShadowAddress(Value *Addr, BasicBlock::iterator Pos);519Value *getShadowAddress(Value *Addr, BasicBlock::iterator Pos,520Value *ShadowOffset);521std::pair<Value *, Value *> getShadowOriginAddress(Value *Addr,522Align InstAlignment,523BasicBlock::iterator Pos);524bool isInstrumented(const Function *F);525bool isInstrumented(const GlobalAlias *GA);526bool isForceZeroLabels(const Function *F);527TransformedFunction getCustomFunctionType(FunctionType *T);528WrapperKind getWrapperKind(Function *F);529void addGlobalNameSuffix(GlobalValue *GV);530void buildExternWeakCheckIfNeeded(IRBuilder<> &IRB, Function *F);531Function *buildWrapperFunction(Function *F, StringRef NewFName,532GlobalValue::LinkageTypes NewFLink,533FunctionType *NewFT);534void initializeCallbackFunctions(Module &M);535void initializeRuntimeFunctions(Module &M);536bool initializeModule(Module &M);537538/// Advances \p OriginAddr to point to the next 32-bit origin and then loads539/// from it. Returns the origin's loaded value.540Value *loadNextOrigin(BasicBlock::iterator Pos, Align OriginAlign,541Value **OriginAddr);542543/// Returns whether the given load byte size is amenable to inlined544/// optimization patterns.545bool hasLoadSizeForFastPath(uint64_t Size);546547/// Returns whether the pass tracks origins. Supports only TLS ABI mode.548bool shouldTrackOrigins();549550/// Returns a zero constant with the shadow type of OrigTy.551///552/// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}553/// getZeroShadow([n x T]) = [n x getZeroShadow(T)]554/// getZeroShadow(other type) = i16(0)555Constant *getZeroShadow(Type *OrigTy);556/// Returns a zero constant with the shadow type of V's type.557Constant *getZeroShadow(Value *V);558559/// Checks if V is a zero shadow.560bool isZeroShadow(Value *V);561562/// Returns the shadow type of OrigTy.563///564/// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}565/// getShadowTy([n x T]) = [n x getShadowTy(T)]566/// getShadowTy(other type) = i16567Type *getShadowTy(Type *OrigTy);568/// Returns the shadow type of V's type.569Type *getShadowTy(Value *V);570571const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;572573public:574DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);575576bool runImpl(Module &M,577llvm::function_ref<TargetLibraryInfo &(Function &)> GetTLI);578};579580struct DFSanFunction {581DataFlowSanitizer &DFS;582Function *F;583DominatorTree DT;584bool IsNativeABI;585bool IsForceZeroLabels;586TargetLibraryInfo &TLI;587AllocaInst *LabelReturnAlloca = nullptr;588AllocaInst *OriginReturnAlloca = nullptr;589DenseMap<Value *, Value *> ValShadowMap;590DenseMap<Value *, Value *> ValOriginMap;591DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;592DenseMap<AllocaInst *, AllocaInst *> AllocaOriginMap;593594struct PHIFixupElement {595PHINode *Phi;596PHINode *ShadowPhi;597PHINode *OriginPhi;598};599std::vector<PHIFixupElement> PHIFixups;600601DenseSet<Instruction *> SkipInsts;602std::vector<Value *> NonZeroChecks;603604struct CachedShadow {605BasicBlock *Block; // The block where Shadow is defined.606Value *Shadow;607};608/// Maps a value to its latest shadow value in terms of domination tree.609DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;610/// Maps a value to its latest collapsed shadow value it was converted to in611/// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is612/// used at a post process where CFG blocks are split. So it does not cache613/// BasicBlock like CachedShadows, but uses domination between values.614DenseMap<Value *, Value *> CachedCollapsedShadows;615DenseMap<Value *, std::set<Value *>> ShadowElements;616617DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,618bool IsForceZeroLabels, TargetLibraryInfo &TLI)619: DFS(DFS), F(F), IsNativeABI(IsNativeABI),620IsForceZeroLabels(IsForceZeroLabels), TLI(TLI) {621DT.recalculate(*F);622}623624/// Computes the shadow address for a given function argument.625///626/// Shadow = ArgTLS+ArgOffset.627Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);628629/// Computes the shadow address for a return value.630Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);631632/// Computes the origin address for a given function argument.633///634/// Origin = ArgOriginTLS[ArgNo].635Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);636637/// Computes the origin address for a return value.638Value *getRetvalOriginTLS();639640Value *getOrigin(Value *V);641void setOrigin(Instruction *I, Value *Origin);642/// Generates IR to compute the origin of the last operand with a taint label.643Value *combineOperandOrigins(Instruction *Inst);644/// Before the instruction Pos, generates IR to compute the last origin with a645/// taint label. Labels and origins are from vectors Shadows and Origins646/// correspondingly. The generated IR is like647/// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0648/// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be649/// zeros with other bitwidths.650Value *combineOrigins(const std::vector<Value *> &Shadows,651const std::vector<Value *> &Origins,652BasicBlock::iterator Pos, ConstantInt *Zero = nullptr);653654Value *getShadow(Value *V);655void setShadow(Instruction *I, Value *Shadow);656/// Generates IR to compute the union of the two given shadows, inserting it657/// before Pos. The combined value is with primitive type.658Value *combineShadows(Value *V1, Value *V2, BasicBlock::iterator Pos);659/// Combines the shadow values of V1 and V2, then converts the combined value660/// with primitive type into a shadow value with the original type T.661Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,662BasicBlock::iterator Pos);663Value *combineOperandShadows(Instruction *Inst);664665/// Generates IR to load shadow and origin corresponding to bytes [\p666/// Addr, \p Addr + \p Size), where addr has alignment \p667/// InstAlignment, and take the union of each of those shadows. The returned668/// shadow always has primitive type.669///670/// When tracking loads is enabled, the returned origin is a chain at the671/// current stack if the returned shadow is tainted.672std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,673Align InstAlignment,674BasicBlock::iterator Pos);675676void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,677Align InstAlignment, Value *PrimitiveShadow,678Value *Origin, BasicBlock::iterator Pos);679/// Applies PrimitiveShadow to all primitive subtypes of T, returning680/// the expanded shadow value.681///682/// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}683/// EFP([n x T], PS) = [n x EFP(T,PS)]684/// EFP(other types, PS) = PS685Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,686BasicBlock::iterator Pos);687/// Collapses Shadow into a single primitive shadow value, unioning all688/// primitive shadow values in the process. Returns the final primitive689/// shadow value.690///691/// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)692/// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)693/// CTP(other types, PS) = PS694Value *collapseToPrimitiveShadow(Value *Shadow, BasicBlock::iterator Pos);695696void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,697BasicBlock::iterator Pos);698699Align getShadowAlign(Align InstAlignment);700701// If ClConditionalCallbacks is enabled, insert a callback after a given702// branch instruction using the given conditional expression.703void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);704705// If ClReachesFunctionCallbacks is enabled, insert a callback for each706// argument and load instruction.707void addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB, Instruction &I,708Value *Data);709710bool isLookupTableConstant(Value *P);711712private:713/// Collapses the shadow with aggregate type into a single primitive shadow714/// value.715template <class AggregateType>716Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,717IRBuilder<> &IRB);718719Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);720721/// Returns the shadow value of an argument A.722Value *getShadowForTLSArgument(Argument *A);723724/// The fast path of loading shadows.725std::pair<Value *, Value *>726loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,727Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,728BasicBlock::iterator Pos);729730Align getOriginAlign(Align InstAlignment);731732/// Because 4 contiguous bytes share one 4-byte origin, the most accurate load733/// is __dfsan_load_label_and_origin. This function returns the union of all734/// labels and the origin of the first taint label. However this is an735/// additional call with many instructions. To ensure common cases are fast,736/// checks if it is possible to load labels and origins without using the737/// callback function.738///739/// When enabling tracking load instructions, we always use740/// __dfsan_load_label_and_origin to reduce code size.741bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);742743/// Returns a chain at the current stack with previous origin V.744Value *updateOrigin(Value *V, IRBuilder<> &IRB);745746/// Returns a chain at the current stack with previous origin V if Shadow is747/// tainted.748Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);749750/// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns751/// Origin otherwise.752Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);753754/// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +755/// Size).756void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,757uint64_t StoreOriginSize, Align Alignment);758759/// Stores Origin in terms of its Shadow value.760/// * Do not write origins for zero shadows because we do not trace origins761/// for untainted sinks.762/// * Use __dfsan_maybe_store_origin if there are too many origin store763/// instrumentations.764void storeOrigin(BasicBlock::iterator Pos, Value *Addr, uint64_t Size,765Value *Shadow, Value *Origin, Value *StoreOriginAddr,766Align InstAlignment);767768/// Convert a scalar value to an i1 by comparing with 0.769Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");770771bool shouldInstrumentWithCall();772773/// Generates IR to load shadow and origin corresponding to bytes [\p774/// Addr, \p Addr + \p Size), where addr has alignment \p775/// InstAlignment, and take the union of each of those shadows. The returned776/// shadow always has primitive type.777std::pair<Value *, Value *>778loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,779Align InstAlignment,780BasicBlock::iterator Pos);781int NumOriginStores = 0;782};783784class DFSanVisitor : public InstVisitor<DFSanVisitor> {785public:786DFSanFunction &DFSF;787788DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}789790const DataLayout &getDataLayout() const {791return DFSF.F->getDataLayout();792}793794// Combines shadow values and origins for all of I's operands.795void visitInstOperands(Instruction &I);796797void visitUnaryOperator(UnaryOperator &UO);798void visitBinaryOperator(BinaryOperator &BO);799void visitBitCastInst(BitCastInst &BCI);800void visitCastInst(CastInst &CI);801void visitCmpInst(CmpInst &CI);802void visitLandingPadInst(LandingPadInst &LPI);803void visitGetElementPtrInst(GetElementPtrInst &GEPI);804void visitLoadInst(LoadInst &LI);805void visitStoreInst(StoreInst &SI);806void visitAtomicRMWInst(AtomicRMWInst &I);807void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);808void visitReturnInst(ReturnInst &RI);809void visitLibAtomicLoad(CallBase &CB);810void visitLibAtomicStore(CallBase &CB);811void visitLibAtomicExchange(CallBase &CB);812void visitLibAtomicCompareExchange(CallBase &CB);813void visitCallBase(CallBase &CB);814void visitPHINode(PHINode &PN);815void visitExtractElementInst(ExtractElementInst &I);816void visitInsertElementInst(InsertElementInst &I);817void visitShuffleVectorInst(ShuffleVectorInst &I);818void visitExtractValueInst(ExtractValueInst &I);819void visitInsertValueInst(InsertValueInst &I);820void visitAllocaInst(AllocaInst &I);821void visitSelectInst(SelectInst &I);822void visitMemSetInst(MemSetInst &I);823void visitMemTransferInst(MemTransferInst &I);824void visitBranchInst(BranchInst &BR);825void visitSwitchInst(SwitchInst &SW);826827private:828void visitCASOrRMW(Align InstAlignment, Instruction &I);829830// Returns false when this is an invoke of a custom function.831bool visitWrappedCallBase(Function &F, CallBase &CB);832833// Combines origins for all of I's operands.834void visitInstOperandOrigins(Instruction &I);835836void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,837IRBuilder<> &IRB);838839void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,840IRBuilder<> &IRB);841842Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB);843Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB);844};845846bool LibAtomicFunction(const Function &F) {847// This is a bit of a hack because TargetLibraryInfo is a function pass.848// The DFSan pass would need to be refactored to be function pass oriented849// (like MSan is) in order to fit together nicely with TargetLibraryInfo.850// We need this check to prevent them from being instrumented, or wrapped.851// Match on name and number of arguments.852if (!F.hasName() || F.isVarArg())853return false;854switch (F.arg_size()) {855case 4:856return F.getName() == "__atomic_load" || F.getName() == "__atomic_store";857case 5:858return F.getName() == "__atomic_exchange";859case 6:860return F.getName() == "__atomic_compare_exchange";861default:862return false;863}864}865866} // end anonymous namespace867868DataFlowSanitizer::DataFlowSanitizer(869const std::vector<std::string> &ABIListFiles) {870std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));871llvm::append_range(AllABIListFiles, ClABIListFiles);872// FIXME: should we propagate vfs::FileSystem to this constructor?873ABIList.set(874SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));875876for (StringRef v : ClCombineTaintLookupTables)877CombineTaintLookupTableNames.insert(v);878}879880TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {881SmallVector<Type *, 4> ArgTypes;882883// Some parameters of the custom function being constructed are884// parameters of T. Record the mapping from parameters of T to885// parameters of the custom function, so that parameter attributes886// at call sites can be updated.887std::vector<unsigned> ArgumentIndexMapping;888for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {889Type *ParamType = T->getParamType(I);890ArgumentIndexMapping.push_back(ArgTypes.size());891ArgTypes.push_back(ParamType);892}893for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)894ArgTypes.push_back(PrimitiveShadowTy);895if (T->isVarArg())896ArgTypes.push_back(PrimitiveShadowPtrTy);897Type *RetType = T->getReturnType();898if (!RetType->isVoidTy())899ArgTypes.push_back(PrimitiveShadowPtrTy);900901if (shouldTrackOrigins()) {902for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)903ArgTypes.push_back(OriginTy);904if (T->isVarArg())905ArgTypes.push_back(OriginPtrTy);906if (!RetType->isVoidTy())907ArgTypes.push_back(OriginPtrTy);908}909910return TransformedFunction(911T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),912ArgumentIndexMapping);913}914915bool DataFlowSanitizer::isZeroShadow(Value *V) {916Type *T = V->getType();917if (!isa<ArrayType>(T) && !isa<StructType>(T)) {918if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))919return CI->isZero();920return false;921}922923return isa<ConstantAggregateZero>(V);924}925926bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {927uint64_t ShadowSize = Size * ShadowWidthBytes;928return ShadowSize % 8 == 0 || ShadowSize == 4;929}930931bool DataFlowSanitizer::shouldTrackOrigins() {932static const bool ShouldTrackOrigins = ClTrackOrigins;933return ShouldTrackOrigins;934}935936Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {937if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))938return ZeroPrimitiveShadow;939Type *ShadowTy = getShadowTy(OrigTy);940return ConstantAggregateZero::get(ShadowTy);941}942943Constant *DataFlowSanitizer::getZeroShadow(Value *V) {944return getZeroShadow(V->getType());945}946947static Value *expandFromPrimitiveShadowRecursive(948Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,949Value *PrimitiveShadow, IRBuilder<> &IRB) {950if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))951return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);952953if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {954for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {955Indices.push_back(Idx);956Shadow = expandFromPrimitiveShadowRecursive(957Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);958Indices.pop_back();959}960return Shadow;961}962963if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {964for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {965Indices.push_back(Idx);966Shadow = expandFromPrimitiveShadowRecursive(967Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);968Indices.pop_back();969}970return Shadow;971}972llvm_unreachable("Unexpected shadow type");973}974975bool DFSanFunction::shouldInstrumentWithCall() {976return ClInstrumentWithCallThreshold >= 0 &&977NumOriginStores >= ClInstrumentWithCallThreshold;978}979980Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,981BasicBlock::iterator Pos) {982Type *ShadowTy = DFS.getShadowTy(T);983984if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))985return PrimitiveShadow;986987if (DFS.isZeroShadow(PrimitiveShadow))988return DFS.getZeroShadow(ShadowTy);989990IRBuilder<> IRB(Pos->getParent(), Pos);991SmallVector<unsigned, 4> Indices;992Value *Shadow = UndefValue::get(ShadowTy);993Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,994PrimitiveShadow, IRB);995996// Caches the primitive shadow value that built the shadow value.997CachedCollapsedShadows[Shadow] = PrimitiveShadow;998return Shadow;999}10001001template <class AggregateType>1002Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,1003IRBuilder<> &IRB) {1004if (!AT->getNumElements())1005return DFS.ZeroPrimitiveShadow;10061007Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);1008Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);10091010for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {1011Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);1012Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);1013Aggregator = IRB.CreateOr(Aggregator, ShadowInner);1014}1015return Aggregator;1016}10171018Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,1019IRBuilder<> &IRB) {1020Type *ShadowTy = Shadow->getType();1021if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))1022return Shadow;1023if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))1024return collapseAggregateShadow<>(AT, Shadow, IRB);1025if (StructType *ST = dyn_cast<StructType>(ShadowTy))1026return collapseAggregateShadow<>(ST, Shadow, IRB);1027llvm_unreachable("Unexpected shadow type");1028}10291030Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,1031BasicBlock::iterator Pos) {1032Type *ShadowTy = Shadow->getType();1033if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))1034return Shadow;10351036// Checks if the cached collapsed shadow value dominates Pos.1037Value *&CS = CachedCollapsedShadows[Shadow];1038if (CS && DT.dominates(CS, Pos))1039return CS;10401041IRBuilder<> IRB(Pos->getParent(), Pos);1042Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);1043// Caches the converted primitive shadow value.1044CS = PrimitiveShadow;1045return PrimitiveShadow;1046}10471048void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,1049Value *Condition) {1050if (!ClConditionalCallbacks) {1051return;1052}1053IRBuilder<> IRB(&I);1054Value *CondShadow = getShadow(Condition);1055CallInst *CI;1056if (DFS.shouldTrackOrigins()) {1057Value *CondOrigin = getOrigin(Condition);1058CI = IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,1059{CondShadow, CondOrigin});1060} else {1061CI = IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});1062}1063CI->addParamAttr(0, Attribute::ZExt);1064}10651066void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB,1067Instruction &I,1068Value *Data) {1069if (!ClReachesFunctionCallbacks) {1070return;1071}1072const DebugLoc &dbgloc = I.getDebugLoc();1073Value *DataShadow = collapseToPrimitiveShadow(getShadow(Data), IRB);1074ConstantInt *CILine;1075llvm::Value *FilePathPtr;10761077if (dbgloc.get() == nullptr) {1078CILine = llvm::ConstantInt::get(I.getContext(), llvm::APInt(32, 0));1079FilePathPtr = IRB.CreateGlobalStringPtr(1080I.getFunction()->getParent()->getSourceFileName());1081} else {1082CILine = llvm::ConstantInt::get(I.getContext(),1083llvm::APInt(32, dbgloc.getLine()));1084FilePathPtr =1085IRB.CreateGlobalStringPtr(dbgloc->getFilename());1086}10871088llvm::Value *FunctionNamePtr =1089IRB.CreateGlobalStringPtr(I.getFunction()->getName());10901091CallInst *CB;1092std::vector<Value *> args;10931094if (DFS.shouldTrackOrigins()) {1095Value *DataOrigin = getOrigin(Data);1096args = { DataShadow, DataOrigin, FilePathPtr, CILine, FunctionNamePtr };1097CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackOriginFn, args);1098} else {1099args = { DataShadow, FilePathPtr, CILine, FunctionNamePtr };1100CB = IRB.CreateCall(DFS.DFSanReachesFunctionCallbackFn, args);1101}1102CB->addParamAttr(0, Attribute::ZExt);1103CB->setDebugLoc(dbgloc);1104}11051106Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {1107if (!OrigTy->isSized())1108return PrimitiveShadowTy;1109if (isa<IntegerType>(OrigTy))1110return PrimitiveShadowTy;1111if (isa<VectorType>(OrigTy))1112return PrimitiveShadowTy;1113if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))1114return ArrayType::get(getShadowTy(AT->getElementType()),1115AT->getNumElements());1116if (StructType *ST = dyn_cast<StructType>(OrigTy)) {1117SmallVector<Type *, 4> Elements;1118for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)1119Elements.push_back(getShadowTy(ST->getElementType(I)));1120return StructType::get(*Ctx, Elements);1121}1122return PrimitiveShadowTy;1123}11241125Type *DataFlowSanitizer::getShadowTy(Value *V) {1126return getShadowTy(V->getType());1127}11281129bool DataFlowSanitizer::initializeModule(Module &M) {1130Triple TargetTriple(M.getTargetTriple());1131const DataLayout &DL = M.getDataLayout();11321133if (TargetTriple.getOS() != Triple::Linux)1134report_fatal_error("unsupported operating system");1135switch (TargetTriple.getArch()) {1136case Triple::aarch64:1137MapParams = &Linux_AArch64_MemoryMapParams;1138break;1139case Triple::x86_64:1140MapParams = &Linux_X86_64_MemoryMapParams;1141break;1142case Triple::loongarch64:1143MapParams = &Linux_LoongArch64_MemoryMapParams;1144break;1145default:1146report_fatal_error("unsupported architecture");1147}11481149Mod = &M;1150Ctx = &M.getContext();1151Int8Ptr = PointerType::getUnqual(*Ctx);1152OriginTy = IntegerType::get(*Ctx, OriginWidthBits);1153OriginPtrTy = PointerType::getUnqual(OriginTy);1154PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);1155PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);1156IntptrTy = DL.getIntPtrType(*Ctx);1157ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);1158ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);11591160Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};1161DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,1162/*isVarArg=*/false);1163Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};1164DFSanLoadLabelAndOriginFnTy =1165FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,1166/*isVarArg=*/false);1167DFSanUnimplementedFnTy = FunctionType::get(1168Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);1169Type *DFSanWrapperExternWeakNullArgs[2] = {Int8Ptr, Int8Ptr};1170DFSanWrapperExternWeakNullFnTy =1171FunctionType::get(Type::getVoidTy(*Ctx), DFSanWrapperExternWeakNullArgs,1172/*isVarArg=*/false);1173Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,1174PointerType::getUnqual(*Ctx), IntptrTy};1175DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),1176DFSanSetLabelArgs, /*isVarArg=*/false);1177DFSanNonzeroLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), std::nullopt,1178/*isVarArg=*/false);1179DFSanVarargWrapperFnTy = FunctionType::get(1180Type::getVoidTy(*Ctx), PointerType::getUnqual(*Ctx), /*isVarArg=*/false);1181DFSanConditionalCallbackFnTy =1182FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,1183/*isVarArg=*/false);1184Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};1185DFSanConditionalCallbackOriginFnTy = FunctionType::get(1186Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,1187/*isVarArg=*/false);1188Type *DFSanReachesFunctionCallbackArgs[4] = {PrimitiveShadowTy, Int8Ptr,1189OriginTy, Int8Ptr};1190DFSanReachesFunctionCallbackFnTy =1191FunctionType::get(Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackArgs,1192/*isVarArg=*/false);1193Type *DFSanReachesFunctionCallbackOriginArgs[5] = {1194PrimitiveShadowTy, OriginTy, Int8Ptr, OriginTy, Int8Ptr};1195DFSanReachesFunctionCallbackOriginFnTy = FunctionType::get(1196Type::getVoidTy(*Ctx), DFSanReachesFunctionCallbackOriginArgs,1197/*isVarArg=*/false);1198DFSanCmpCallbackFnTy =1199FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,1200/*isVarArg=*/false);1201DFSanChainOriginFnTy =1202FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);1203Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};1204DFSanChainOriginIfTaintedFnTy = FunctionType::get(1205OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);1206Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),1207Int8Ptr, IntptrTy, OriginTy};1208DFSanMaybeStoreOriginFnTy = FunctionType::get(1209Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);1210Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};1211DFSanMemOriginTransferFnTy = FunctionType::get(1212Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);1213Type *DFSanMemShadowOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};1214DFSanMemShadowOriginTransferFnTy =1215FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemShadowOriginTransferArgs,1216/*isVarArg=*/false);1217Type *DFSanMemShadowOriginConditionalExchangeArgs[5] = {1218IntegerType::get(*Ctx, 8), Int8Ptr, Int8Ptr, Int8Ptr, IntptrTy};1219DFSanMemShadowOriginConditionalExchangeFnTy = FunctionType::get(1220Type::getVoidTy(*Ctx), DFSanMemShadowOriginConditionalExchangeArgs,1221/*isVarArg=*/false);1222Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};1223DFSanLoadStoreCallbackFnTy =1224FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,1225/*isVarArg=*/false);1226Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};1227DFSanMemTransferCallbackFnTy =1228FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,1229/*isVarArg=*/false);12301231ColdCallWeights = MDBuilder(*Ctx).createUnlikelyBranchWeights();1232OriginStoreWeights = MDBuilder(*Ctx).createUnlikelyBranchWeights();1233return true;1234}12351236bool DataFlowSanitizer::isInstrumented(const Function *F) {1237return !ABIList.isIn(*F, "uninstrumented");1238}12391240bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {1241return !ABIList.isIn(*GA, "uninstrumented");1242}12431244bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {1245return ABIList.isIn(*F, "force_zero_labels");1246}12471248DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {1249if (ABIList.isIn(*F, "functional"))1250return WK_Functional;1251if (ABIList.isIn(*F, "discard"))1252return WK_Discard;1253if (ABIList.isIn(*F, "custom"))1254return WK_Custom;12551256return WK_Warning;1257}12581259void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {1260std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";1261GV->setName(GVName + Suffix);12621263// Try to change the name of the function in module inline asm. We only do1264// this for specific asm directives, currently only ".symver", to try to avoid1265// corrupting asm which happens to contain the symbol name as a substring.1266// Note that the substitution for .symver assumes that the versioned symbol1267// also has an instrumented name.1268std::string Asm = GV->getParent()->getModuleInlineAsm();1269std::string SearchStr = ".symver " + GVName + ",";1270size_t Pos = Asm.find(SearchStr);1271if (Pos != std::string::npos) {1272Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");1273Pos = Asm.find('@');12741275if (Pos == std::string::npos)1276report_fatal_error(Twine("unsupported .symver: ", Asm));12771278Asm.replace(Pos, 1, Suffix + "@");1279GV->getParent()->setModuleInlineAsm(Asm);1280}1281}12821283void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB,1284Function *F) {1285// If the function we are wrapping was ExternWeak, it may be null.1286// The original code before calling this wrapper may have checked for null,1287// but replacing with a known-to-not-be-null wrapper can break this check.1288// When replacing uses of the extern weak function with the wrapper we try1289// to avoid replacing uses in conditionals, but this is not perfect.1290// In the case where we fail, and accidentally optimize out a null check1291// for a extern weak function, add a check here to help identify the issue.1292if (GlobalValue::isExternalWeakLinkage(F->getLinkage())) {1293std::vector<Value *> Args;1294Args.push_back(F);1295Args.push_back(IRB.CreateGlobalStringPtr(F->getName()));1296IRB.CreateCall(DFSanWrapperExternWeakNullFn, Args);1297}1298}12991300Function *1301DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,1302GlobalValue::LinkageTypes NewFLink,1303FunctionType *NewFT) {1304FunctionType *FT = F->getFunctionType();1305Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),1306NewFName, F->getParent());1307NewF->copyAttributesFrom(F);1308NewF->removeRetAttrs(1309AttributeFuncs::typeIncompatible(NewFT->getReturnType()));13101311BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);1312if (F->isVarArg()) {1313NewF->removeFnAttr("split-stack");1314CallInst::Create(DFSanVarargWrapperFn,1315IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",1316BB);1317new UnreachableInst(*Ctx, BB);1318} else {1319auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());1320std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());13211322CallInst *CI = CallInst::Create(F, Args, "", BB);1323if (FT->getReturnType()->isVoidTy())1324ReturnInst::Create(*Ctx, BB);1325else1326ReturnInst::Create(*Ctx, CI, BB);1327}13281329return NewF;1330}13311332// Initialize DataFlowSanitizer runtime functions and declare them in the module1333void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {1334LLVMContext &C = M.getContext();1335{1336AttributeList AL;1337AL = AL.addFnAttribute(C, Attribute::NoUnwind);1338AL = AL.addFnAttribute(1339C, Attribute::getWithMemoryEffects(C, MemoryEffects::readOnly()));1340AL = AL.addRetAttribute(C, Attribute::ZExt);1341DFSanUnionLoadFn =1342Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);1343}1344{1345AttributeList AL;1346AL = AL.addFnAttribute(C, Attribute::NoUnwind);1347AL = AL.addFnAttribute(1348C, Attribute::getWithMemoryEffects(C, MemoryEffects::readOnly()));1349AL = AL.addRetAttribute(C, Attribute::ZExt);1350DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(1351"__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);1352}1353DFSanUnimplementedFn =1354Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);1355DFSanWrapperExternWeakNullFn = Mod->getOrInsertFunction(1356"__dfsan_wrapper_extern_weak_null", DFSanWrapperExternWeakNullFnTy);1357{1358AttributeList AL;1359AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1360AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);1361DFSanSetLabelFn =1362Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);1363}1364DFSanNonzeroLabelFn =1365Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);1366DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",1367DFSanVarargWrapperFnTy);1368{1369AttributeList AL;1370AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1371AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);1372DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",1373DFSanChainOriginFnTy, AL);1374}1375{1376AttributeList AL;1377AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1378AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);1379AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);1380DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(1381"__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);1382}1383DFSanMemOriginTransferFn = Mod->getOrInsertFunction(1384"__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);13851386DFSanMemShadowOriginTransferFn = Mod->getOrInsertFunction(1387"__dfsan_mem_shadow_origin_transfer", DFSanMemShadowOriginTransferFnTy);13881389DFSanMemShadowOriginConditionalExchangeFn =1390Mod->getOrInsertFunction("__dfsan_mem_shadow_origin_conditional_exchange",1391DFSanMemShadowOriginConditionalExchangeFnTy);13921393{1394AttributeList AL;1395AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1396AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);1397DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(1398"__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);1399}14001401DFSanRuntimeFunctions.insert(1402DFSanUnionLoadFn.getCallee()->stripPointerCasts());1403DFSanRuntimeFunctions.insert(1404DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());1405DFSanRuntimeFunctions.insert(1406DFSanUnimplementedFn.getCallee()->stripPointerCasts());1407DFSanRuntimeFunctions.insert(1408DFSanWrapperExternWeakNullFn.getCallee()->stripPointerCasts());1409DFSanRuntimeFunctions.insert(1410DFSanSetLabelFn.getCallee()->stripPointerCasts());1411DFSanRuntimeFunctions.insert(1412DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());1413DFSanRuntimeFunctions.insert(1414DFSanVarargWrapperFn.getCallee()->stripPointerCasts());1415DFSanRuntimeFunctions.insert(1416DFSanLoadCallbackFn.getCallee()->stripPointerCasts());1417DFSanRuntimeFunctions.insert(1418DFSanStoreCallbackFn.getCallee()->stripPointerCasts());1419DFSanRuntimeFunctions.insert(1420DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());1421DFSanRuntimeFunctions.insert(1422DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());1423DFSanRuntimeFunctions.insert(1424DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());1425DFSanRuntimeFunctions.insert(1426DFSanReachesFunctionCallbackFn.getCallee()->stripPointerCasts());1427DFSanRuntimeFunctions.insert(1428DFSanReachesFunctionCallbackOriginFn.getCallee()->stripPointerCasts());1429DFSanRuntimeFunctions.insert(1430DFSanCmpCallbackFn.getCallee()->stripPointerCasts());1431DFSanRuntimeFunctions.insert(1432DFSanChainOriginFn.getCallee()->stripPointerCasts());1433DFSanRuntimeFunctions.insert(1434DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());1435DFSanRuntimeFunctions.insert(1436DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());1437DFSanRuntimeFunctions.insert(1438DFSanMemShadowOriginTransferFn.getCallee()->stripPointerCasts());1439DFSanRuntimeFunctions.insert(1440DFSanMemShadowOriginConditionalExchangeFn.getCallee()1441->stripPointerCasts());1442DFSanRuntimeFunctions.insert(1443DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());1444}14451446// Initializes event callback functions and declare them in the module1447void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {1448{1449AttributeList AL;1450AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1451DFSanLoadCallbackFn = Mod->getOrInsertFunction(1452"__dfsan_load_callback", DFSanLoadStoreCallbackFnTy, AL);1453}1454{1455AttributeList AL;1456AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1457DFSanStoreCallbackFn = Mod->getOrInsertFunction(1458"__dfsan_store_callback", DFSanLoadStoreCallbackFnTy, AL);1459}1460DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(1461"__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);1462{1463AttributeList AL;1464AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1465DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback",1466DFSanCmpCallbackFnTy, AL);1467}1468{1469AttributeList AL;1470AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1471DFSanConditionalCallbackFn = Mod->getOrInsertFunction(1472"__dfsan_conditional_callback", DFSanConditionalCallbackFnTy, AL);1473}1474{1475AttributeList AL;1476AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1477DFSanConditionalCallbackOriginFn =1478Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",1479DFSanConditionalCallbackOriginFnTy, AL);1480}1481{1482AttributeList AL;1483AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1484DFSanReachesFunctionCallbackFn =1485Mod->getOrInsertFunction("__dfsan_reaches_function_callback",1486DFSanReachesFunctionCallbackFnTy, AL);1487}1488{1489AttributeList AL;1490AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);1491DFSanReachesFunctionCallbackOriginFn =1492Mod->getOrInsertFunction("__dfsan_reaches_function_callback_origin",1493DFSanReachesFunctionCallbackOriginFnTy, AL);1494}1495}14961497bool DataFlowSanitizer::runImpl(1498Module &M, llvm::function_ref<TargetLibraryInfo &(Function &)> GetTLI) {1499initializeModule(M);15001501if (ABIList.isIn(M, "skip"))1502return false;15031504const unsigned InitialGlobalSize = M.global_size();1505const unsigned InitialModuleSize = M.size();15061507bool Changed = false;15081509auto GetOrInsertGlobal = [this, &Changed](StringRef Name,1510Type *Ty) -> Constant * {1511Constant *C = Mod->getOrInsertGlobal(Name, Ty);1512if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {1513Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;1514G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);1515}1516return C;1517};15181519// These globals must be kept in sync with the ones in dfsan.cpp.1520ArgTLS =1521GetOrInsertGlobal("__dfsan_arg_tls",1522ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));1523RetvalTLS = GetOrInsertGlobal(1524"__dfsan_retval_tls",1525ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));1526ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);1527ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);1528RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);15291530(void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {1531Changed = true;1532return new GlobalVariable(1533M, OriginTy, true, GlobalValue::WeakODRLinkage,1534ConstantInt::getSigned(OriginTy,1535shouldTrackOrigins() ? ClTrackOrigins : 0),1536"__dfsan_track_origins");1537});15381539initializeCallbackFunctions(M);1540initializeRuntimeFunctions(M);15411542std::vector<Function *> FnsToInstrument;1543SmallPtrSet<Function *, 2> FnsWithNativeABI;1544SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;1545SmallPtrSet<Constant *, 1> PersonalityFns;1546for (Function &F : M)1547if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F) &&1548!LibAtomicFunction(F) &&1549!F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) {1550FnsToInstrument.push_back(&F);1551if (F.hasPersonalityFn())1552PersonalityFns.insert(F.getPersonalityFn()->stripPointerCasts());1553}15541555if (ClIgnorePersonalityRoutine) {1556for (auto *C : PersonalityFns) {1557assert(isa<Function>(C) && "Personality routine is not a function!");1558Function *F = cast<Function>(C);1559if (!isInstrumented(F))1560llvm::erase(FnsToInstrument, F);1561}1562}15631564// Give function aliases prefixes when necessary, and build wrappers where the1565// instrumentedness is inconsistent.1566for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {1567// Don't stop on weak. We assume people aren't playing games with the1568// instrumentedness of overridden weak aliases.1569auto *F = dyn_cast<Function>(GA.getAliaseeObject());1570if (!F)1571continue;15721573bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);1574if (GAInst && FInst) {1575addGlobalNameSuffix(&GA);1576} else if (GAInst != FInst) {1577// Non-instrumented alias of an instrumented function, or vice versa.1578// Replace the alias with a native-ABI wrapper of the aliasee. The pass1579// below will take care of instrumenting it.1580Function *NewF =1581buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());1582GA.replaceAllUsesWith(NewF);1583NewF->takeName(&GA);1584GA.eraseFromParent();1585FnsToInstrument.push_back(NewF);1586}1587}15881589// TODO: This could be more precise.1590ReadOnlyNoneAttrs.addAttribute(Attribute::Memory);15911592// First, change the ABI of every function in the module. ABI-listed1593// functions keep their original ABI and get a wrapper function.1594for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),1595FE = FnsToInstrument.end();1596FI != FE; ++FI) {1597Function &F = **FI;1598FunctionType *FT = F.getFunctionType();15991600bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&1601FT->getReturnType()->isVoidTy());16021603if (isInstrumented(&F)) {1604if (isForceZeroLabels(&F))1605FnsWithForceZeroLabel.insert(&F);16061607// Instrumented functions get a '.dfsan' suffix. This allows us to more1608// easily identify cases of mismatching ABIs. This naming scheme is1609// mangling-compatible (see Itanium ABI), using a vendor-specific suffix.1610addGlobalNameSuffix(&F);1611} else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {1612// Build a wrapper function for F. The wrapper simply calls F, and is1613// added to FnsToInstrument so that any instrumentation according to its1614// WrapperKind is done in the second pass below.16151616// If the function being wrapped has local linkage, then preserve the1617// function's linkage in the wrapper function.1618GlobalValue::LinkageTypes WrapperLinkage =1619F.hasLocalLinkage() ? F.getLinkage()1620: GlobalValue::LinkOnceODRLinkage;16211622Function *NewF = buildWrapperFunction(1623&F,1624(shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +1625std::string(F.getName()),1626WrapperLinkage, FT);1627NewF->removeFnAttrs(ReadOnlyNoneAttrs);16281629// Extern weak functions can sometimes be null at execution time.1630// Code will sometimes check if an extern weak function is null.1631// This could look something like:1632// declare extern_weak i8 @my_func(i8)1633// br i1 icmp ne (i8 (i8)* @my_func, i8 (i8)* null), label %use_my_func,1634// label %avoid_my_func1635// The @"dfsw$my_func" wrapper is never null, so if we replace this use1636// in the comparison, the icmp will simplify to false and we have1637// accidentally optimized away a null check that is necessary.1638// This can lead to a crash when the null extern_weak my_func is called.1639//1640// To prevent (the most common pattern of) this problem,1641// do not replace uses in comparisons with the wrapper.1642// We definitely want to replace uses in call instructions.1643// Other uses (e.g. store the function address somewhere) might be1644// called or compared or both - this case may not be handled correctly.1645// We will default to replacing with wrapper in cases we are unsure.1646auto IsNotCmpUse = [](Use &U) -> bool {1647User *Usr = U.getUser();1648if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Usr)) {1649// This is the most common case for icmp ne null1650if (CE->getOpcode() == Instruction::ICmp) {1651return false;1652}1653}1654if (Instruction *I = dyn_cast<Instruction>(Usr)) {1655if (I->getOpcode() == Instruction::ICmp) {1656return false;1657}1658}1659return true;1660};1661F.replaceUsesWithIf(NewF, IsNotCmpUse);16621663UnwrappedFnMap[NewF] = &F;1664*FI = NewF;16651666if (!F.isDeclaration()) {1667// This function is probably defining an interposition of an1668// uninstrumented function and hence needs to keep the original ABI.1669// But any functions it may call need to use the instrumented ABI, so1670// we instrument it in a mode which preserves the original ABI.1671FnsWithNativeABI.insert(&F);16721673// This code needs to rebuild the iterators, as they may be invalidated1674// by the push_back, taking care that the new range does not include1675// any functions added by this code.1676size_t N = FI - FnsToInstrument.begin(),1677Count = FE - FnsToInstrument.begin();1678FnsToInstrument.push_back(&F);1679FI = FnsToInstrument.begin() + N;1680FE = FnsToInstrument.begin() + Count;1681}1682// Hopefully, nobody will try to indirectly call a vararg1683// function... yet.1684} else if (FT->isVarArg()) {1685UnwrappedFnMap[&F] = &F;1686*FI = nullptr;1687}1688}16891690for (Function *F : FnsToInstrument) {1691if (!F || F->isDeclaration())1692continue;16931694removeUnreachableBlocks(*F);16951696DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),1697FnsWithForceZeroLabel.count(F), GetTLI(*F));16981699if (ClReachesFunctionCallbacks) {1700// Add callback for arguments reaching this function.1701for (auto &FArg : F->args()) {1702Instruction *Next = &F->getEntryBlock().front();1703Value *FArgShadow = DFSF.getShadow(&FArg);1704if (isZeroShadow(FArgShadow))1705continue;1706if (Instruction *FArgShadowInst = dyn_cast<Instruction>(FArgShadow)) {1707Next = FArgShadowInst->getNextNode();1708}1709if (shouldTrackOrigins()) {1710if (Instruction *Origin =1711dyn_cast<Instruction>(DFSF.getOrigin(&FArg))) {1712// Ensure IRB insertion point is after loads for shadow and origin.1713Instruction *OriginNext = Origin->getNextNode();1714if (Next->comesBefore(OriginNext)) {1715Next = OriginNext;1716}1717}1718}1719IRBuilder<> IRB(Next);1720DFSF.addReachesFunctionCallbacksIfEnabled(IRB, *Next, &FArg);1721}1722}17231724// DFSanVisitor may create new basic blocks, which confuses df_iterator.1725// Build a copy of the list before iterating over it.1726SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));17271728for (BasicBlock *BB : BBList) {1729Instruction *Inst = &BB->front();1730while (true) {1731// DFSanVisitor may split the current basic block, changing the current1732// instruction's next pointer and moving the next instruction to the1733// tail block from which we should continue.1734Instruction *Next = Inst->getNextNode();1735// DFSanVisitor may delete Inst, so keep track of whether it was a1736// terminator.1737bool IsTerminator = Inst->isTerminator();1738if (!DFSF.SkipInsts.count(Inst))1739DFSanVisitor(DFSF).visit(Inst);1740if (IsTerminator)1741break;1742Inst = Next;1743}1744}17451746// We will not necessarily be able to compute the shadow for every phi node1747// until we have visited every block. Therefore, the code that handles phi1748// nodes adds them to the PHIFixups list so that they can be properly1749// handled here.1750for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {1751for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;1752++Val) {1753P.ShadowPhi->setIncomingValue(1754Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));1755if (P.OriginPhi)1756P.OriginPhi->setIncomingValue(1757Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));1758}1759}17601761// -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy1762// places (i.e. instructions in basic blocks we haven't even begun visiting1763// yet). To make our life easier, do this work in a pass after the main1764// instrumentation.1765if (ClDebugNonzeroLabels) {1766for (Value *V : DFSF.NonZeroChecks) {1767BasicBlock::iterator Pos;1768if (Instruction *I = dyn_cast<Instruction>(V))1769Pos = std::next(I->getIterator());1770else1771Pos = DFSF.F->getEntryBlock().begin();1772while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))1773Pos = std::next(Pos->getIterator());1774IRBuilder<> IRB(Pos->getParent(), Pos);1775Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);1776Value *Ne =1777IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);1778BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(1779Ne, Pos, /*Unreachable=*/false, ColdCallWeights));1780IRBuilder<> ThenIRB(BI);1781ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});1782}1783}1784}17851786return Changed || !FnsToInstrument.empty() ||1787M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;1788}17891790Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {1791Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);1792if (ArgOffset)1793Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));1794return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),1795"_dfsarg");1796}17971798Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {1799return IRB.CreatePointerCast(1800DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");1801}18021803Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }18041805Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {1806return IRB.CreateConstInBoundsGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0,1807ArgNo, "_dfsarg_o");1808}18091810Value *DFSanFunction::getOrigin(Value *V) {1811assert(DFS.shouldTrackOrigins());1812if (!isa<Argument>(V) && !isa<Instruction>(V))1813return DFS.ZeroOrigin;1814Value *&Origin = ValOriginMap[V];1815if (!Origin) {1816if (Argument *A = dyn_cast<Argument>(V)) {1817if (IsNativeABI)1818return DFS.ZeroOrigin;1819if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {1820Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();1821IRBuilder<> IRB(ArgOriginTLSPos);1822Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);1823Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);1824} else {1825// Overflow1826Origin = DFS.ZeroOrigin;1827}1828} else {1829Origin = DFS.ZeroOrigin;1830}1831}1832return Origin;1833}18341835void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {1836if (!DFS.shouldTrackOrigins())1837return;1838assert(!ValOriginMap.count(I));1839assert(Origin->getType() == DFS.OriginTy);1840ValOriginMap[I] = Origin;1841}18421843Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {1844unsigned ArgOffset = 0;1845const DataLayout &DL = F->getDataLayout();1846for (auto &FArg : F->args()) {1847if (!FArg.getType()->isSized()) {1848if (A == &FArg)1849break;1850continue;1851}18521853unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));1854if (A != &FArg) {1855ArgOffset += alignTo(Size, ShadowTLSAlignment);1856if (ArgOffset > ArgTLSSize)1857break; // ArgTLS overflows, uses a zero shadow.1858continue;1859}18601861if (ArgOffset + Size > ArgTLSSize)1862break; // ArgTLS overflows, uses a zero shadow.18631864Instruction *ArgTLSPos = &*F->getEntryBlock().begin();1865IRBuilder<> IRB(ArgTLSPos);1866Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);1867return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,1868ShadowTLSAlignment);1869}18701871return DFS.getZeroShadow(A);1872}18731874Value *DFSanFunction::getShadow(Value *V) {1875if (!isa<Argument>(V) && !isa<Instruction>(V))1876return DFS.getZeroShadow(V);1877if (IsForceZeroLabels)1878return DFS.getZeroShadow(V);1879Value *&Shadow = ValShadowMap[V];1880if (!Shadow) {1881if (Argument *A = dyn_cast<Argument>(V)) {1882if (IsNativeABI)1883return DFS.getZeroShadow(V);1884Shadow = getShadowForTLSArgument(A);1885NonZeroChecks.push_back(Shadow);1886} else {1887Shadow = DFS.getZeroShadow(V);1888}1889}1890return Shadow;1891}18921893void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {1894assert(!ValShadowMap.count(I));1895ValShadowMap[I] = Shadow;1896}18971898/// Compute the integer shadow offset that corresponds to a given1899/// application address.1900///1901/// Offset = (Addr & ~AndMask) ^ XorMask1902Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {1903assert(Addr != RetvalTLS && "Reinstrumenting?");1904Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);19051906uint64_t AndMask = MapParams->AndMask;1907if (AndMask)1908OffsetLong =1909IRB.CreateAnd(OffsetLong, ConstantInt::get(IntptrTy, ~AndMask));19101911uint64_t XorMask = MapParams->XorMask;1912if (XorMask)1913OffsetLong = IRB.CreateXor(OffsetLong, ConstantInt::get(IntptrTy, XorMask));1914return OffsetLong;1915}19161917std::pair<Value *, Value *>1918DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,1919BasicBlock::iterator Pos) {1920// Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL1921IRBuilder<> IRB(Pos->getParent(), Pos);1922Value *ShadowOffset = getShadowOffset(Addr, IRB);1923Value *ShadowLong = ShadowOffset;1924uint64_t ShadowBase = MapParams->ShadowBase;1925if (ShadowBase != 0) {1926ShadowLong =1927IRB.CreateAdd(ShadowLong, ConstantInt::get(IntptrTy, ShadowBase));1928}1929IntegerType *ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);1930Value *ShadowPtr =1931IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));1932Value *OriginPtr = nullptr;1933if (shouldTrackOrigins()) {1934Value *OriginLong = ShadowOffset;1935uint64_t OriginBase = MapParams->OriginBase;1936if (OriginBase != 0)1937OriginLong =1938IRB.CreateAdd(OriginLong, ConstantInt::get(IntptrTy, OriginBase));1939const Align Alignment = llvm::assumeAligned(InstAlignment.value());1940// When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.1941// So Mask is unnecessary.1942if (Alignment < MinOriginAlignment) {1943uint64_t Mask = MinOriginAlignment.value() - 1;1944OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));1945}1946OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);1947}1948return std::make_pair(ShadowPtr, OriginPtr);1949}19501951Value *DataFlowSanitizer::getShadowAddress(Value *Addr,1952BasicBlock::iterator Pos,1953Value *ShadowOffset) {1954IRBuilder<> IRB(Pos->getParent(), Pos);1955return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);1956}19571958Value *DataFlowSanitizer::getShadowAddress(Value *Addr,1959BasicBlock::iterator Pos) {1960IRBuilder<> IRB(Pos->getParent(), Pos);1961Value *ShadowOffset = getShadowOffset(Addr, IRB);1962return getShadowAddress(Addr, Pos, ShadowOffset);1963}19641965Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,1966BasicBlock::iterator Pos) {1967Value *PrimitiveValue = combineShadows(V1, V2, Pos);1968return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);1969}19701971// Generates IR to compute the union of the two given shadows, inserting it1972// before Pos. The combined value is with primitive type.1973Value *DFSanFunction::combineShadows(Value *V1, Value *V2,1974BasicBlock::iterator Pos) {1975if (DFS.isZeroShadow(V1))1976return collapseToPrimitiveShadow(V2, Pos);1977if (DFS.isZeroShadow(V2))1978return collapseToPrimitiveShadow(V1, Pos);1979if (V1 == V2)1980return collapseToPrimitiveShadow(V1, Pos);19811982auto V1Elems = ShadowElements.find(V1);1983auto V2Elems = ShadowElements.find(V2);1984if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {1985if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),1986V2Elems->second.begin(), V2Elems->second.end())) {1987return collapseToPrimitiveShadow(V1, Pos);1988}1989if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),1990V1Elems->second.begin(), V1Elems->second.end())) {1991return collapseToPrimitiveShadow(V2, Pos);1992}1993} else if (V1Elems != ShadowElements.end()) {1994if (V1Elems->second.count(V2))1995return collapseToPrimitiveShadow(V1, Pos);1996} else if (V2Elems != ShadowElements.end()) {1997if (V2Elems->second.count(V1))1998return collapseToPrimitiveShadow(V2, Pos);1999}20002001auto Key = std::make_pair(V1, V2);2002if (V1 > V2)2003std::swap(Key.first, Key.second);2004CachedShadow &CCS = CachedShadows[Key];2005if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))2006return CCS.Shadow;20072008// Converts inputs shadows to shadows with primitive types.2009Value *PV1 = collapseToPrimitiveShadow(V1, Pos);2010Value *PV2 = collapseToPrimitiveShadow(V2, Pos);20112012IRBuilder<> IRB(Pos->getParent(), Pos);2013CCS.Block = Pos->getParent();2014CCS.Shadow = IRB.CreateOr(PV1, PV2);20152016std::set<Value *> UnionElems;2017if (V1Elems != ShadowElements.end()) {2018UnionElems = V1Elems->second;2019} else {2020UnionElems.insert(V1);2021}2022if (V2Elems != ShadowElements.end()) {2023UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());2024} else {2025UnionElems.insert(V2);2026}2027ShadowElements[CCS.Shadow] = std::move(UnionElems);20282029return CCS.Shadow;2030}20312032// A convenience function which folds the shadows of each of the operands2033// of the provided instruction Inst, inserting the IR before Inst. Returns2034// the computed union Value.2035Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {2036if (Inst->getNumOperands() == 0)2037return DFS.getZeroShadow(Inst);20382039Value *Shadow = getShadow(Inst->getOperand(0));2040for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)2041Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)),2042Inst->getIterator());20432044return expandFromPrimitiveShadow(Inst->getType(), Shadow,2045Inst->getIterator());2046}20472048void DFSanVisitor::visitInstOperands(Instruction &I) {2049Value *CombinedShadow = DFSF.combineOperandShadows(&I);2050DFSF.setShadow(&I, CombinedShadow);2051visitInstOperandOrigins(I);2052}20532054Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,2055const std::vector<Value *> &Origins,2056BasicBlock::iterator Pos,2057ConstantInt *Zero) {2058assert(Shadows.size() == Origins.size());2059size_t Size = Origins.size();2060if (Size == 0)2061return DFS.ZeroOrigin;2062Value *Origin = nullptr;2063if (!Zero)2064Zero = DFS.ZeroPrimitiveShadow;2065for (size_t I = 0; I != Size; ++I) {2066Value *OpOrigin = Origins[I];2067Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);2068if (ConstOpOrigin && ConstOpOrigin->isNullValue())2069continue;2070if (!Origin) {2071Origin = OpOrigin;2072continue;2073}2074Value *OpShadow = Shadows[I];2075Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);2076IRBuilder<> IRB(Pos->getParent(), Pos);2077Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);2078Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);2079}2080return Origin ? Origin : DFS.ZeroOrigin;2081}20822083Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {2084size_t Size = Inst->getNumOperands();2085std::vector<Value *> Shadows(Size);2086std::vector<Value *> Origins(Size);2087for (unsigned I = 0; I != Size; ++I) {2088Shadows[I] = getShadow(Inst->getOperand(I));2089Origins[I] = getOrigin(Inst->getOperand(I));2090}2091return combineOrigins(Shadows, Origins, Inst->getIterator());2092}20932094void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {2095if (!DFSF.DFS.shouldTrackOrigins())2096return;2097Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);2098DFSF.setOrigin(&I, CombinedOrigin);2099}21002101Align DFSanFunction::getShadowAlign(Align InstAlignment) {2102const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);2103return Align(Alignment.value() * DFS.ShadowWidthBytes);2104}21052106Align DFSanFunction::getOriginAlign(Align InstAlignment) {2107const Align Alignment = llvm::assumeAligned(InstAlignment.value());2108return Align(std::max(MinOriginAlignment, Alignment));2109}21102111bool DFSanFunction::isLookupTableConstant(Value *P) {2112if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts()))2113if (GV->isConstant() && GV->hasName())2114return DFS.CombineTaintLookupTableNames.count(GV->getName());21152116return false;2117}21182119bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,2120Align InstAlignment) {2121// When enabling tracking load instructions, we always use2122// __dfsan_load_label_and_origin to reduce code size.2123if (ClTrackOrigins == 2)2124return true;21252126assert(Size != 0);2127// * if Size == 1, it is sufficient to load its origin aligned at 4.2128// * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to2129// load its origin aligned at 4. If not, although origins may be lost, it2130// should not happen very often.2131// * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When2132// Size % 4 == 0, it is more efficient to load origins without callbacks.2133// * Otherwise we use __dfsan_load_label_and_origin.2134// This should ensure that common cases run efficiently.2135if (Size <= 2)2136return false;21372138const Align Alignment = llvm::assumeAligned(InstAlignment.value());2139return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);2140}21412142Value *DataFlowSanitizer::loadNextOrigin(BasicBlock::iterator Pos,2143Align OriginAlign,2144Value **OriginAddr) {2145IRBuilder<> IRB(Pos->getParent(), Pos);2146*OriginAddr =2147IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));2148return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);2149}21502151std::pair<Value *, Value *> DFSanFunction::loadShadowFast(2152Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,2153Align OriginAlign, Value *FirstOrigin, BasicBlock::iterator Pos) {2154const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();2155const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;21562157assert(Size >= 4 && "Not large enough load size for fast path!");21582159// Used for origin tracking.2160std::vector<Value *> Shadows;2161std::vector<Value *> Origins;21622163// Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)2164// but this function is only used in a subset of cases that make it possible2165// to optimize the instrumentation.2166//2167// Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow2168// per byte) is either:2169// - a multiple of 8 (common)2170// - equal to 4 (only for load32)2171//2172// For the second case, we can fit the wide shadow in a 32-bit integer. In all2173// other cases, we use a 64-bit integer to hold the wide shadow.2174Type *WideShadowTy =2175ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);21762177IRBuilder<> IRB(Pos->getParent(), Pos);2178Value *CombinedWideShadow =2179IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);21802181unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();2182const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;21832184auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {2185if (BytesPerWideShadow > 4) {2186assert(BytesPerWideShadow == 8);2187// The wide shadow relates to two origin pointers: one for the first four2188// application bytes, and one for the latest four. We use a left shift to2189// get just the shadow bytes that correspond to the first origin pointer,2190// and then the entire shadow for the second origin pointer (which will be2191// chosen by combineOrigins() iff the least-significant half of the wide2192// shadow was empty but the other half was not).2193Value *WideShadowLo = IRB.CreateShl(2194WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));2195Shadows.push_back(WideShadow);2196Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));21972198Shadows.push_back(WideShadowLo);2199Origins.push_back(Origin);2200} else {2201Shadows.push_back(WideShadow);2202Origins.push_back(Origin);2203}2204};22052206if (ShouldTrackOrigins)2207AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);22082209// First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;2210// then OR individual shadows within the combined WideShadow by binary ORing.2211// This is fewer instructions than ORing shadows individually, since it2212// needs logN shift/or instructions (N being the bytes of the combined wide2213// shadow).2214for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;2215ByteOfs += BytesPerWideShadow) {2216ShadowAddr = IRB.CreateGEP(WideShadowTy, ShadowAddr,2217ConstantInt::get(DFS.IntptrTy, 1));2218Value *NextWideShadow =2219IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);2220CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);2221if (ShouldTrackOrigins) {2222Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);2223AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);2224}2225}2226for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;2227Width >>= 1) {2228Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);2229CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);2230}2231return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),2232ShouldTrackOrigins2233? combineOrigins(Shadows, Origins, Pos,2234ConstantInt::getSigned(IRB.getInt64Ty(), 0))2235: DFS.ZeroOrigin};2236}22372238std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(2239Value *Addr, uint64_t Size, Align InstAlignment, BasicBlock::iterator Pos) {2240const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();22412242// Non-escaped loads.2243if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {2244const auto SI = AllocaShadowMap.find(AI);2245if (SI != AllocaShadowMap.end()) {2246IRBuilder<> IRB(Pos->getParent(), Pos);2247Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);2248const auto OI = AllocaOriginMap.find(AI);2249assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());2250return {ShadowLI, ShouldTrackOrigins2251? IRB.CreateLoad(DFS.OriginTy, OI->second)2252: nullptr};2253}2254}22552256// Load from constant addresses.2257SmallVector<const Value *, 2> Objs;2258getUnderlyingObjects(Addr, Objs);2259bool AllConstants = true;2260for (const Value *Obj : Objs) {2261if (isa<Function>(Obj) || isa<BlockAddress>(Obj))2262continue;2263if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())2264continue;22652266AllConstants = false;2267break;2268}2269if (AllConstants)2270return {DFS.ZeroPrimitiveShadow,2271ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};22722273if (Size == 0)2274return {DFS.ZeroPrimitiveShadow,2275ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};22762277// Use callback to load if this is not an optimizable case for origin2278// tracking.2279if (ShouldTrackOrigins &&2280useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {2281IRBuilder<> IRB(Pos->getParent(), Pos);2282CallInst *Call =2283IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,2284{Addr, ConstantInt::get(DFS.IntptrTy, Size)});2285Call->addRetAttr(Attribute::ZExt);2286return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),2287DFS.PrimitiveShadowTy),2288IRB.CreateTrunc(Call, DFS.OriginTy)};2289}22902291// Other cases that support loading shadows or origins in a fast way.2292Value *ShadowAddr, *OriginAddr;2293std::tie(ShadowAddr, OriginAddr) =2294DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);22952296const Align ShadowAlign = getShadowAlign(InstAlignment);2297const Align OriginAlign = getOriginAlign(InstAlignment);2298Value *Origin = nullptr;2299if (ShouldTrackOrigins) {2300IRBuilder<> IRB(Pos->getParent(), Pos);2301Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);2302}23032304// When the byte size is small enough, we can load the shadow directly with2305// just a few instructions.2306switch (Size) {2307case 1: {2308LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);2309LI->setAlignment(ShadowAlign);2310return {LI, Origin};2311}2312case 2: {2313IRBuilder<> IRB(Pos->getParent(), Pos);2314Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,2315ConstantInt::get(DFS.IntptrTy, 1));2316Value *Load =2317IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);2318Value *Load1 =2319IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);2320return {combineShadows(Load, Load1, Pos), Origin};2321}2322}2323bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);23242325if (HasSizeForFastPath)2326return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,2327OriginAlign, Origin, Pos);23282329IRBuilder<> IRB(Pos->getParent(), Pos);2330CallInst *FallbackCall = IRB.CreateCall(2331DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});2332FallbackCall->addRetAttr(Attribute::ZExt);2333return {FallbackCall, Origin};2334}23352336std::pair<Value *, Value *>2337DFSanFunction::loadShadowOrigin(Value *Addr, uint64_t Size, Align InstAlignment,2338BasicBlock::iterator Pos) {2339Value *PrimitiveShadow, *Origin;2340std::tie(PrimitiveShadow, Origin) =2341loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);2342if (DFS.shouldTrackOrigins()) {2343if (ClTrackOrigins == 2) {2344IRBuilder<> IRB(Pos->getParent(), Pos);2345auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);2346if (!ConstantShadow || !ConstantShadow->isZeroValue())2347Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);2348}2349}2350return {PrimitiveShadow, Origin};2351}23522353static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {2354switch (AO) {2355case AtomicOrdering::NotAtomic:2356return AtomicOrdering::NotAtomic;2357case AtomicOrdering::Unordered:2358case AtomicOrdering::Monotonic:2359case AtomicOrdering::Acquire:2360return AtomicOrdering::Acquire;2361case AtomicOrdering::Release:2362case AtomicOrdering::AcquireRelease:2363return AtomicOrdering::AcquireRelease;2364case AtomicOrdering::SequentiallyConsistent:2365return AtomicOrdering::SequentiallyConsistent;2366}2367llvm_unreachable("Unknown ordering");2368}23692370Value *StripPointerGEPsAndCasts(Value *V) {2371if (!V->getType()->isPointerTy())2372return V;23732374// DFSan pass should be running on valid IR, but we'll2375// keep a seen set to ensure there are no issues.2376SmallPtrSet<const Value *, 4> Visited;2377Visited.insert(V);2378do {2379if (auto *GEP = dyn_cast<GEPOperator>(V)) {2380V = GEP->getPointerOperand();2381} else if (Operator::getOpcode(V) == Instruction::BitCast) {2382V = cast<Operator>(V)->getOperand(0);2383if (!V->getType()->isPointerTy())2384return V;2385} else if (isa<GlobalAlias>(V)) {2386V = cast<GlobalAlias>(V)->getAliasee();2387}2388} while (Visited.insert(V).second);23892390return V;2391}23922393void DFSanVisitor::visitLoadInst(LoadInst &LI) {2394auto &DL = LI.getDataLayout();2395uint64_t Size = DL.getTypeStoreSize(LI.getType());2396if (Size == 0) {2397DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));2398DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);2399return;2400}24012402// When an application load is atomic, increase atomic ordering between2403// atomic application loads and stores to ensure happen-before order; load2404// shadow data after application data; store zero shadow data before2405// application data. This ensure shadow loads return either labels of the2406// initial application data or zeros.2407if (LI.isAtomic())2408LI.setOrdering(addAcquireOrdering(LI.getOrdering()));24092410BasicBlock::iterator AfterLi = std::next(LI.getIterator());2411BasicBlock::iterator Pos = LI.getIterator();2412if (LI.isAtomic())2413Pos = std::next(Pos);24142415std::vector<Value *> Shadows;2416std::vector<Value *> Origins;2417Value *PrimitiveShadow, *Origin;2418std::tie(PrimitiveShadow, Origin) =2419DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);2420const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();2421if (ShouldTrackOrigins) {2422Shadows.push_back(PrimitiveShadow);2423Origins.push_back(Origin);2424}2425if (ClCombinePointerLabelsOnLoad ||2426DFSF.isLookupTableConstant(2427StripPointerGEPsAndCasts(LI.getPointerOperand()))) {2428Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());2429PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);2430if (ShouldTrackOrigins) {2431Shadows.push_back(PtrShadow);2432Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));2433}2434}2435if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))2436DFSF.NonZeroChecks.push_back(PrimitiveShadow);24372438Value *Shadow =2439DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);2440DFSF.setShadow(&LI, Shadow);24412442if (ShouldTrackOrigins) {2443DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));2444}24452446if (ClEventCallbacks) {2447IRBuilder<> IRB(Pos->getParent(), Pos);2448Value *Addr = LI.getPointerOperand();2449CallInst *CI =2450IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr});2451CI->addParamAttr(0, Attribute::ZExt);2452}24532454IRBuilder<> IRB(AfterLi->getParent(), AfterLi);2455DFSF.addReachesFunctionCallbacksIfEnabled(IRB, LI, &LI);2456}24572458Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,2459IRBuilder<> &IRB) {2460assert(DFS.shouldTrackOrigins());2461return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});2462}24632464Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {2465if (!DFS.shouldTrackOrigins())2466return V;2467return IRB.CreateCall(DFS.DFSanChainOriginFn, V);2468}24692470Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {2471const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;2472const DataLayout &DL = F->getDataLayout();2473unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);2474if (IntptrSize == OriginSize)2475return Origin;2476assert(IntptrSize == OriginSize * 2);2477Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);2478return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));2479}24802481void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,2482Value *StoreOriginAddr,2483uint64_t StoreOriginSize, Align Alignment) {2484const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;2485const DataLayout &DL = F->getDataLayout();2486const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);2487unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);2488assert(IntptrAlignment >= MinOriginAlignment);2489assert(IntptrSize >= OriginSize);24902491unsigned Ofs = 0;2492Align CurrentAlignment = Alignment;2493if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {2494Value *IntptrOrigin = originToIntptr(IRB, Origin);2495Value *IntptrStoreOriginPtr = IRB.CreatePointerCast(2496StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0));2497for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {2498Value *Ptr =2499I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)2500: IntptrStoreOriginPtr;2501IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);2502Ofs += IntptrSize / OriginSize;2503CurrentAlignment = IntptrAlignment;2504}2505}25062507for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;2508++I) {2509Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)2510: StoreOriginAddr;2511IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);2512CurrentAlignment = MinOriginAlignment;2513}2514}25152516Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,2517const Twine &Name) {2518Type *VTy = V->getType();2519assert(VTy->isIntegerTy());2520if (VTy->getIntegerBitWidth() == 1)2521// Just converting a bool to a bool, so do nothing.2522return V;2523return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);2524}25252526void DFSanFunction::storeOrigin(BasicBlock::iterator Pos, Value *Addr,2527uint64_t Size, Value *Shadow, Value *Origin,2528Value *StoreOriginAddr, Align InstAlignment) {2529// Do not write origins for zero shadows because we do not trace origins for2530// untainted sinks.2531const Align OriginAlignment = getOriginAlign(InstAlignment);2532Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);2533IRBuilder<> IRB(Pos->getParent(), Pos);2534if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {2535if (!ConstantShadow->isZeroValue())2536paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,2537OriginAlignment);2538return;2539}25402541if (shouldInstrumentWithCall()) {2542IRB.CreateCall(2543DFS.DFSanMaybeStoreOriginFn,2544{CollapsedShadow, Addr, ConstantInt::get(DFS.IntptrTy, Size), Origin});2545} else {2546Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");2547DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);2548Instruction *CheckTerm = SplitBlockAndInsertIfThen(2549Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DTU);2550IRBuilder<> IRBNew(CheckTerm);2551paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,2552OriginAlignment);2553++NumOriginStores;2554}2555}25562557void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,2558Align ShadowAlign,2559BasicBlock::iterator Pos) {2560IRBuilder<> IRB(Pos->getParent(), Pos);2561IntegerType *ShadowTy =2562IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);2563Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);2564Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);2565IRB.CreateAlignedStore(ExtZeroShadow, ShadowAddr, ShadowAlign);2566// Do not write origins for 0 shadows because we do not trace origins for2567// untainted sinks.2568}25692570void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,2571Align InstAlignment,2572Value *PrimitiveShadow,2573Value *Origin,2574BasicBlock::iterator Pos) {2575const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;25762577if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {2578const auto SI = AllocaShadowMap.find(AI);2579if (SI != AllocaShadowMap.end()) {2580IRBuilder<> IRB(Pos->getParent(), Pos);2581IRB.CreateStore(PrimitiveShadow, SI->second);25822583// Do not write origins for 0 shadows because we do not trace origins for2584// untainted sinks.2585if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {2586const auto OI = AllocaOriginMap.find(AI);2587assert(OI != AllocaOriginMap.end() && Origin);2588IRB.CreateStore(Origin, OI->second);2589}2590return;2591}2592}25932594const Align ShadowAlign = getShadowAlign(InstAlignment);2595if (DFS.isZeroShadow(PrimitiveShadow)) {2596storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);2597return;2598}25992600IRBuilder<> IRB(Pos->getParent(), Pos);2601Value *ShadowAddr, *OriginAddr;2602std::tie(ShadowAddr, OriginAddr) =2603DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);26042605const unsigned ShadowVecSize = 8;2606assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&2607"Shadow vector is too large!");26082609uint64_t Offset = 0;2610uint64_t LeftSize = Size;2611if (LeftSize >= ShadowVecSize) {2612auto *ShadowVecTy =2613FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);2614Value *ShadowVec = PoisonValue::get(ShadowVecTy);2615for (unsigned I = 0; I != ShadowVecSize; ++I) {2616ShadowVec = IRB.CreateInsertElement(2617ShadowVec, PrimitiveShadow,2618ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));2619}2620do {2621Value *CurShadowVecAddr =2622IRB.CreateConstGEP1_32(ShadowVecTy, ShadowAddr, Offset);2623IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);2624LeftSize -= ShadowVecSize;2625++Offset;2626} while (LeftSize >= ShadowVecSize);2627Offset *= ShadowVecSize;2628}2629while (LeftSize > 0) {2630Value *CurShadowAddr =2631IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);2632IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);2633--LeftSize;2634++Offset;2635}26362637if (ShouldTrackOrigins) {2638storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,2639InstAlignment);2640}2641}26422643static AtomicOrdering addReleaseOrdering(AtomicOrdering AO) {2644switch (AO) {2645case AtomicOrdering::NotAtomic:2646return AtomicOrdering::NotAtomic;2647case AtomicOrdering::Unordered:2648case AtomicOrdering::Monotonic:2649case AtomicOrdering::Release:2650return AtomicOrdering::Release;2651case AtomicOrdering::Acquire:2652case AtomicOrdering::AcquireRelease:2653return AtomicOrdering::AcquireRelease;2654case AtomicOrdering::SequentiallyConsistent:2655return AtomicOrdering::SequentiallyConsistent;2656}2657llvm_unreachable("Unknown ordering");2658}26592660void DFSanVisitor::visitStoreInst(StoreInst &SI) {2661auto &DL = SI.getDataLayout();2662Value *Val = SI.getValueOperand();2663uint64_t Size = DL.getTypeStoreSize(Val->getType());2664if (Size == 0)2665return;26662667// When an application store is atomic, increase atomic ordering between2668// atomic application loads and stores to ensure happen-before order; load2669// shadow data after application data; store zero shadow data before2670// application data. This ensure shadow loads return either labels of the2671// initial application data or zeros.2672if (SI.isAtomic())2673SI.setOrdering(addReleaseOrdering(SI.getOrdering()));26742675const bool ShouldTrackOrigins =2676DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();2677std::vector<Value *> Shadows;2678std::vector<Value *> Origins;26792680Value *Shadow =2681SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);26822683if (ShouldTrackOrigins) {2684Shadows.push_back(Shadow);2685Origins.push_back(DFSF.getOrigin(Val));2686}26872688Value *PrimitiveShadow;2689if (ClCombinePointerLabelsOnStore) {2690Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());2691if (ShouldTrackOrigins) {2692Shadows.push_back(PtrShadow);2693Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));2694}2695PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, SI.getIterator());2696} else {2697PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, SI.getIterator());2698}2699Value *Origin = nullptr;2700if (ShouldTrackOrigins)2701Origin = DFSF.combineOrigins(Shadows, Origins, SI.getIterator());2702DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),2703PrimitiveShadow, Origin, SI.getIterator());2704if (ClEventCallbacks) {2705IRBuilder<> IRB(&SI);2706Value *Addr = SI.getPointerOperand();2707CallInst *CI =2708IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr});2709CI->addParamAttr(0, Attribute::ZExt);2710}2711}27122713void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {2714assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));27152716Value *Val = I.getOperand(1);2717const auto &DL = I.getDataLayout();2718uint64_t Size = DL.getTypeStoreSize(Val->getType());2719if (Size == 0)2720return;27212722// Conservatively set data at stored addresses and return with zero shadow to2723// prevent shadow data races.2724IRBuilder<> IRB(&I);2725Value *Addr = I.getOperand(0);2726const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);2727DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, I.getIterator());2728DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));2729DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);2730}27312732void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {2733visitCASOrRMW(I.getAlign(), I);2734// TODO: The ordering change follows MSan. It is possible not to change2735// ordering because we always set and use 0 shadows.2736I.setOrdering(addReleaseOrdering(I.getOrdering()));2737}27382739void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {2740visitCASOrRMW(I.getAlign(), I);2741// TODO: The ordering change follows MSan. It is possible not to change2742// ordering because we always set and use 0 shadows.2743I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));2744}27452746void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {2747visitInstOperands(UO);2748}27492750void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {2751visitInstOperands(BO);2752}27532754void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {2755// Special case: if this is the bitcast (there is exactly 1 allowed) between2756// a musttail call and a ret, don't instrument. New instructions are not2757// allowed after a musttail call.2758if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))2759if (CI->isMustTailCall())2760return;2761visitInstOperands(BCI);2762}27632764void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }27652766void DFSanVisitor::visitCmpInst(CmpInst &CI) {2767visitInstOperands(CI);2768if (ClEventCallbacks) {2769IRBuilder<> IRB(&CI);2770Value *CombinedShadow = DFSF.getShadow(&CI);2771CallInst *CallI =2772IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);2773CallI->addParamAttr(0, Attribute::ZExt);2774}2775}27762777void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {2778// We do not need to track data through LandingPadInst.2779//2780// For the C++ exceptions, if a value is thrown, this value will be stored2781// in a memory location provided by __cxa_allocate_exception(...) (on the2782// throw side) or __cxa_begin_catch(...) (on the catch side).2783// This memory will have a shadow, so with the loads and stores we will be2784// able to propagate labels on data thrown through exceptions, without any2785// special handling of the LandingPadInst.2786//2787// The second element in the pair result of the LandingPadInst is a2788// register value, but it is for a type ID and should never be tainted.2789DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI));2790DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin);2791}27922793void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {2794if (ClCombineOffsetLabelsOnGEP ||2795DFSF.isLookupTableConstant(2796StripPointerGEPsAndCasts(GEPI.getPointerOperand()))) {2797visitInstOperands(GEPI);2798return;2799}28002801// Only propagate shadow/origin of base pointer value but ignore those of2802// offset operands.2803Value *BasePointer = GEPI.getPointerOperand();2804DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));2805if (DFSF.DFS.shouldTrackOrigins())2806DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));2807}28082809void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {2810visitInstOperands(I);2811}28122813void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {2814visitInstOperands(I);2815}28162817void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {2818visitInstOperands(I);2819}28202821void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {2822IRBuilder<> IRB(&I);2823Value *Agg = I.getAggregateOperand();2824Value *AggShadow = DFSF.getShadow(Agg);2825Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());2826DFSF.setShadow(&I, ResShadow);2827visitInstOperandOrigins(I);2828}28292830void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {2831IRBuilder<> IRB(&I);2832Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());2833Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());2834Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());2835DFSF.setShadow(&I, Res);2836visitInstOperandOrigins(I);2837}28382839void DFSanVisitor::visitAllocaInst(AllocaInst &I) {2840bool AllLoadsStores = true;2841for (User *U : I.users()) {2842if (isa<LoadInst>(U))2843continue;28442845if (StoreInst *SI = dyn_cast<StoreInst>(U)) {2846if (SI->getPointerOperand() == &I)2847continue;2848}28492850AllLoadsStores = false;2851break;2852}2853if (AllLoadsStores) {2854IRBuilder<> IRB(&I);2855DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);2856if (DFSF.DFS.shouldTrackOrigins()) {2857DFSF.AllocaOriginMap[&I] =2858IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");2859}2860}2861DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);2862DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);2863}28642865void DFSanVisitor::visitSelectInst(SelectInst &I) {2866Value *CondShadow = DFSF.getShadow(I.getCondition());2867Value *TrueShadow = DFSF.getShadow(I.getTrueValue());2868Value *FalseShadow = DFSF.getShadow(I.getFalseValue());2869Value *ShadowSel = nullptr;2870const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();2871std::vector<Value *> Shadows;2872std::vector<Value *> Origins;2873Value *TrueOrigin =2874ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;2875Value *FalseOrigin =2876ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;28772878DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());28792880if (isa<VectorType>(I.getCondition()->getType())) {2881ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,2882FalseShadow, I.getIterator());2883if (ShouldTrackOrigins) {2884Shadows.push_back(TrueShadow);2885Shadows.push_back(FalseShadow);2886Origins.push_back(TrueOrigin);2887Origins.push_back(FalseOrigin);2888}2889} else {2890if (TrueShadow == FalseShadow) {2891ShadowSel = TrueShadow;2892if (ShouldTrackOrigins) {2893Shadows.push_back(TrueShadow);2894Origins.push_back(TrueOrigin);2895}2896} else {2897ShadowSel = SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow,2898"", I.getIterator());2899if (ShouldTrackOrigins) {2900Shadows.push_back(ShadowSel);2901Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,2902FalseOrigin, "", I.getIterator()));2903}2904}2905}2906DFSF.setShadow(&I, ClTrackSelectControlFlow ? DFSF.combineShadowsThenConvert(2907I.getType(), CondShadow,2908ShadowSel, I.getIterator())2909: ShadowSel);2910if (ShouldTrackOrigins) {2911if (ClTrackSelectControlFlow) {2912Shadows.push_back(CondShadow);2913Origins.push_back(DFSF.getOrigin(I.getCondition()));2914}2915DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, I.getIterator()));2916}2917}29182919void DFSanVisitor::visitMemSetInst(MemSetInst &I) {2920IRBuilder<> IRB(&I);2921Value *ValShadow = DFSF.getShadow(I.getValue());2922Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()2923? DFSF.getOrigin(I.getValue())2924: DFSF.DFS.ZeroOrigin;2925IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,2926{ValShadow, ValOrigin, I.getDest(),2927IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});2928}29292930void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {2931IRBuilder<> IRB(&I);29322933// CopyOrMoveOrigin transfers origins by refering to their shadows. So we2934// need to move origins before moving shadows.2935if (DFSF.DFS.shouldTrackOrigins()) {2936IRB.CreateCall(2937DFSF.DFS.DFSanMemOriginTransferFn,2938{I.getArgOperand(0), I.getArgOperand(1),2939IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});2940}29412942Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), I.getIterator());2943Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), I.getIterator());2944Value *LenShadow =2945IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),2946DFSF.DFS.ShadowWidthBytes));2947auto *MTI = cast<MemTransferInst>(2948IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),2949{DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));2950MTI->setDestAlignment(DFSF.getShadowAlign(I.getDestAlign().valueOrOne()));2951MTI->setSourceAlignment(DFSF.getShadowAlign(I.getSourceAlign().valueOrOne()));2952if (ClEventCallbacks) {2953IRB.CreateCall(2954DFSF.DFS.DFSanMemTransferCallbackFn,2955{DestShadow, IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});2956}2957}29582959void DFSanVisitor::visitBranchInst(BranchInst &BR) {2960if (!BR.isConditional())2961return;29622963DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());2964}29652966void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {2967DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());2968}29692970static bool isAMustTailRetVal(Value *RetVal) {2971// Tail call may have a bitcast between return.2972if (auto *I = dyn_cast<BitCastInst>(RetVal)) {2973RetVal = I->getOperand(0);2974}2975if (auto *I = dyn_cast<CallInst>(RetVal)) {2976return I->isMustTailCall();2977}2978return false;2979}29802981void DFSanVisitor::visitReturnInst(ReturnInst &RI) {2982if (!DFSF.IsNativeABI && RI.getReturnValue()) {2983// Don't emit the instrumentation for musttail call returns.2984if (isAMustTailRetVal(RI.getReturnValue()))2985return;29862987Value *S = DFSF.getShadow(RI.getReturnValue());2988IRBuilder<> IRB(&RI);2989Type *RT = DFSF.F->getFunctionType()->getReturnType();2990unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));2991if (Size <= RetvalTLSSize) {2992// If the size overflows, stores nothing. At callsite, oversized return2993// shadows are set to zero.2994IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);2995}2996if (DFSF.DFS.shouldTrackOrigins()) {2997Value *O = DFSF.getOrigin(RI.getReturnValue());2998IRB.CreateStore(O, DFSF.getRetvalOriginTLS());2999}3000}3001}30023003void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,3004std::vector<Value *> &Args,3005IRBuilder<> &IRB) {3006FunctionType *FT = F.getFunctionType();30073008auto *I = CB.arg_begin();30093010// Adds non-variable argument shadows.3011for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)3012Args.push_back(3013DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), CB.getIterator()));30143015// Adds variable argument shadows.3016if (FT->isVarArg()) {3017auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,3018CB.arg_size() - FT->getNumParams());3019auto *LabelVAAlloca =3020new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),3021"labelva", DFSF.F->getEntryBlock().begin());30223023for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {3024auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);3025IRB.CreateStore(3026DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), CB.getIterator()),3027LabelVAPtr);3028}30293030Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));3031}30323033// Adds the return value shadow.3034if (!FT->getReturnType()->isVoidTy()) {3035if (!DFSF.LabelReturnAlloca) {3036DFSF.LabelReturnAlloca = new AllocaInst(3037DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),3038"labelreturn", DFSF.F->getEntryBlock().begin());3039}3040Args.push_back(DFSF.LabelReturnAlloca);3041}3042}30433044void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,3045std::vector<Value *> &Args,3046IRBuilder<> &IRB) {3047FunctionType *FT = F.getFunctionType();30483049auto *I = CB.arg_begin();30503051// Add non-variable argument origins.3052for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)3053Args.push_back(DFSF.getOrigin(*I));30543055// Add variable argument origins.3056if (FT->isVarArg()) {3057auto *OriginVATy =3058ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());3059auto *OriginVAAlloca =3060new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),3061"originva", DFSF.F->getEntryBlock().begin());30623063for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {3064auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);3065IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);3066}30673068Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));3069}30703071// Add the return value origin.3072if (!FT->getReturnType()->isVoidTy()) {3073if (!DFSF.OriginReturnAlloca) {3074DFSF.OriginReturnAlloca = new AllocaInst(3075DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),3076"originreturn", DFSF.F->getEntryBlock().begin());3077}3078Args.push_back(DFSF.OriginReturnAlloca);3079}3080}30813082bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {3083IRBuilder<> IRB(&CB);3084switch (DFSF.DFS.getWrapperKind(&F)) {3085case DataFlowSanitizer::WK_Warning:3086CB.setCalledFunction(&F);3087IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,3088IRB.CreateGlobalStringPtr(F.getName()));3089DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);3090DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));3091DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);3092return true;3093case DataFlowSanitizer::WK_Discard:3094CB.setCalledFunction(&F);3095DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);3096DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));3097DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);3098return true;3099case DataFlowSanitizer::WK_Functional:3100CB.setCalledFunction(&F);3101DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);3102visitInstOperands(CB);3103return true;3104case DataFlowSanitizer::WK_Custom:3105// Don't try to handle invokes of custom functions, it's too complicated.3106// Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_3107// wrapper.3108CallInst *CI = dyn_cast<CallInst>(&CB);3109if (!CI)3110return false;31113112const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();3113FunctionType *FT = F.getFunctionType();3114TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);3115std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";3116CustomFName += F.getName();3117FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(3118CustomFName, CustomFn.TransformedType);3119if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {3120CustomFn->copyAttributesFrom(&F);31213122// Custom functions returning non-void will write to the return label.3123if (!FT->getReturnType()->isVoidTy()) {3124CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);3125}3126}31273128std::vector<Value *> Args;31293130// Adds non-variable arguments.3131auto *I = CB.arg_begin();3132for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {3133Args.push_back(*I);3134}31353136// Adds shadow arguments.3137const unsigned ShadowArgStart = Args.size();3138addShadowArguments(F, CB, Args, IRB);31393140// Adds origin arguments.3141const unsigned OriginArgStart = Args.size();3142if (ShouldTrackOrigins)3143addOriginArguments(F, CB, Args, IRB);31443145// Adds variable arguments.3146append_range(Args, drop_begin(CB.args(), FT->getNumParams()));31473148CallInst *CustomCI = IRB.CreateCall(CustomF, Args);3149CustomCI->setCallingConv(CI->getCallingConv());3150CustomCI->setAttributes(transformFunctionAttributes(3151CustomFn, CI->getContext(), CI->getAttributes()));31523153// Update the parameter attributes of the custom call instruction to3154// zero extend the shadow parameters. This is required for targets3155// which consider PrimitiveShadowTy an illegal type.3156for (unsigned N = 0; N < FT->getNumParams(); N++) {3157const unsigned ArgNo = ShadowArgStart + N;3158if (CustomCI->getArgOperand(ArgNo)->getType() ==3159DFSF.DFS.PrimitiveShadowTy)3160CustomCI->addParamAttr(ArgNo, Attribute::ZExt);3161if (ShouldTrackOrigins) {3162const unsigned OriginArgNo = OriginArgStart + N;3163if (CustomCI->getArgOperand(OriginArgNo)->getType() ==3164DFSF.DFS.OriginTy)3165CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);3166}3167}31683169// Loads the return value shadow and origin.3170if (!FT->getReturnType()->isVoidTy()) {3171LoadInst *LabelLoad =3172IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);3173DFSF.setShadow(CustomCI,3174DFSF.expandFromPrimitiveShadow(3175FT->getReturnType(), LabelLoad, CB.getIterator()));3176if (ShouldTrackOrigins) {3177LoadInst *OriginLoad =3178IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);3179DFSF.setOrigin(CustomCI, OriginLoad);3180}3181}31823183CI->replaceAllUsesWith(CustomCI);3184CI->eraseFromParent();3185return true;3186}3187return false;3188}31893190Value *DFSanVisitor::makeAddAcquireOrderingTable(IRBuilder<> &IRB) {3191constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;3192uint32_t OrderingTable[NumOrderings] = {};31933194OrderingTable[(int)AtomicOrderingCABI::relaxed] =3195OrderingTable[(int)AtomicOrderingCABI::acquire] =3196OrderingTable[(int)AtomicOrderingCABI::consume] =3197(int)AtomicOrderingCABI::acquire;3198OrderingTable[(int)AtomicOrderingCABI::release] =3199OrderingTable[(int)AtomicOrderingCABI::acq_rel] =3200(int)AtomicOrderingCABI::acq_rel;3201OrderingTable[(int)AtomicOrderingCABI::seq_cst] =3202(int)AtomicOrderingCABI::seq_cst;32033204return ConstantDataVector::get(IRB.getContext(), OrderingTable);3205}32063207void DFSanVisitor::visitLibAtomicLoad(CallBase &CB) {3208// Since we use getNextNode here, we can't have CB terminate the BB.3209assert(isa<CallInst>(CB));32103211IRBuilder<> IRB(&CB);3212Value *Size = CB.getArgOperand(0);3213Value *SrcPtr = CB.getArgOperand(1);3214Value *DstPtr = CB.getArgOperand(2);3215Value *Ordering = CB.getArgOperand(3);3216// Convert the call to have at least Acquire ordering to make sure3217// the shadow operations aren't reordered before it.3218Value *NewOrdering =3219IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);3220CB.setArgOperand(3, NewOrdering);32213222IRBuilder<> NextIRB(CB.getNextNode());3223NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());32243225// TODO: Support ClCombinePointerLabelsOnLoad3226// TODO: Support ClEventCallbacks32273228NextIRB.CreateCall(3229DFSF.DFS.DFSanMemShadowOriginTransferFn,3230{DstPtr, SrcPtr, NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});3231}32323233Value *DFSanVisitor::makeAddReleaseOrderingTable(IRBuilder<> &IRB) {3234constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;3235uint32_t OrderingTable[NumOrderings] = {};32363237OrderingTable[(int)AtomicOrderingCABI::relaxed] =3238OrderingTable[(int)AtomicOrderingCABI::release] =3239(int)AtomicOrderingCABI::release;3240OrderingTable[(int)AtomicOrderingCABI::consume] =3241OrderingTable[(int)AtomicOrderingCABI::acquire] =3242OrderingTable[(int)AtomicOrderingCABI::acq_rel] =3243(int)AtomicOrderingCABI::acq_rel;3244OrderingTable[(int)AtomicOrderingCABI::seq_cst] =3245(int)AtomicOrderingCABI::seq_cst;32463247return ConstantDataVector::get(IRB.getContext(), OrderingTable);3248}32493250void DFSanVisitor::visitLibAtomicStore(CallBase &CB) {3251IRBuilder<> IRB(&CB);3252Value *Size = CB.getArgOperand(0);3253Value *SrcPtr = CB.getArgOperand(1);3254Value *DstPtr = CB.getArgOperand(2);3255Value *Ordering = CB.getArgOperand(3);3256// Convert the call to have at least Release ordering to make sure3257// the shadow operations aren't reordered after it.3258Value *NewOrdering =3259IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);3260CB.setArgOperand(3, NewOrdering);32613262// TODO: Support ClCombinePointerLabelsOnStore3263// TODO: Support ClEventCallbacks32643265IRB.CreateCall(3266DFSF.DFS.DFSanMemShadowOriginTransferFn,3267{DstPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});3268}32693270void DFSanVisitor::visitLibAtomicExchange(CallBase &CB) {3271// void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, int3272// ordering)3273IRBuilder<> IRB(&CB);3274Value *Size = CB.getArgOperand(0);3275Value *TargetPtr = CB.getArgOperand(1);3276Value *SrcPtr = CB.getArgOperand(2);3277Value *DstPtr = CB.getArgOperand(3);32783279// This operation is not atomic for the shadow and origin memory.3280// This could result in DFSan false positives or false negatives.3281// For now we will assume these operations are rare, and3282// the additional complexity to address this is not warrented.32833284// Current Target to Dest3285IRB.CreateCall(3286DFSF.DFS.DFSanMemShadowOriginTransferFn,3287{DstPtr, TargetPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});32883289// Current Src to Target (overriding)3290IRB.CreateCall(3291DFSF.DFS.DFSanMemShadowOriginTransferFn,3292{TargetPtr, SrcPtr, IRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});3293}32943295void DFSanVisitor::visitLibAtomicCompareExchange(CallBase &CB) {3296// bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void3297// *desired, int success_order, int failure_order)3298Value *Size = CB.getArgOperand(0);3299Value *TargetPtr = CB.getArgOperand(1);3300Value *ExpectedPtr = CB.getArgOperand(2);3301Value *DesiredPtr = CB.getArgOperand(3);33023303// This operation is not atomic for the shadow and origin memory.3304// This could result in DFSan false positives or false negatives.3305// For now we will assume these operations are rare, and3306// the additional complexity to address this is not warrented.33073308IRBuilder<> NextIRB(CB.getNextNode());3309NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());33103311DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));33123313// If original call returned true, copy Desired to Target.3314// If original call returned false, copy Target to Expected.3315NextIRB.CreateCall(DFSF.DFS.DFSanMemShadowOriginConditionalExchangeFn,3316{NextIRB.CreateIntCast(&CB, NextIRB.getInt8Ty(), false),3317TargetPtr, ExpectedPtr, DesiredPtr,3318NextIRB.CreateIntCast(Size, DFSF.DFS.IntptrTy, false)});3319}33203321void DFSanVisitor::visitCallBase(CallBase &CB) {3322Function *F = CB.getCalledFunction();3323if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {3324visitInstOperands(CB);3325return;3326}33273328// Calls to this function are synthesized in wrappers, and we shouldn't3329// instrument them.3330if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())3331return;33323333LibFunc LF;3334if (DFSF.TLI.getLibFunc(CB, LF)) {3335// libatomic.a functions need to have special handling because there isn't3336// a good way to intercept them or compile the library with3337// instrumentation.3338switch (LF) {3339case LibFunc_atomic_load:3340if (!isa<CallInst>(CB)) {3341llvm::errs() << "DFSAN -- cannot instrument invoke of libatomic load. "3342"Ignoring!\n";3343break;3344}3345visitLibAtomicLoad(CB);3346return;3347case LibFunc_atomic_store:3348visitLibAtomicStore(CB);3349return;3350default:3351break;3352}3353}33543355// TODO: These are not supported by TLI? They are not in the enum.3356if (F && F->hasName() && !F->isVarArg()) {3357if (F->getName() == "__atomic_exchange") {3358visitLibAtomicExchange(CB);3359return;3360}3361if (F->getName() == "__atomic_compare_exchange") {3362visitLibAtomicCompareExchange(CB);3363return;3364}3365}33663367DenseMap<Value *, Function *>::iterator UnwrappedFnIt =3368DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());3369if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())3370if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))3371return;33723373IRBuilder<> IRB(&CB);33743375const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();3376FunctionType *FT = CB.getFunctionType();3377const DataLayout &DL = getDataLayout();33783379// Stores argument shadows.3380unsigned ArgOffset = 0;3381for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {3382if (ShouldTrackOrigins) {3383// Ignore overflowed origins3384Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));3385if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&3386!DFSF.DFS.isZeroShadow(ArgShadow))3387IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),3388DFSF.getArgOriginTLS(I, IRB));3389}33903391unsigned Size =3392DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));3393// Stop storing if arguments' size overflows. Inside a function, arguments3394// after overflow have zero shadow values.3395if (ArgOffset + Size > ArgTLSSize)3396break;3397IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),3398DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),3399ShadowTLSAlignment);3400ArgOffset += alignTo(Size, ShadowTLSAlignment);3401}34023403Instruction *Next = nullptr;3404if (!CB.getType()->isVoidTy()) {3405if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {3406if (II->getNormalDest()->getSinglePredecessor()) {3407Next = &II->getNormalDest()->front();3408} else {3409BasicBlock *NewBB =3410SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);3411Next = &NewBB->front();3412}3413} else {3414assert(CB.getIterator() != CB.getParent()->end());3415Next = CB.getNextNode();3416}34173418// Don't emit the epilogue for musttail call returns.3419if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())3420return;34213422// Loads the return value shadow.3423IRBuilder<> NextIRB(Next);3424unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));3425if (Size > RetvalTLSSize) {3426// Set overflowed return shadow to be zero.3427DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));3428} else {3429LoadInst *LI = NextIRB.CreateAlignedLoad(3430DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),3431ShadowTLSAlignment, "_dfsret");3432DFSF.SkipInsts.insert(LI);3433DFSF.setShadow(&CB, LI);3434DFSF.NonZeroChecks.push_back(LI);3435}34363437if (ShouldTrackOrigins) {3438LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,3439DFSF.getRetvalOriginTLS(), "_dfsret_o");3440DFSF.SkipInsts.insert(LI);3441DFSF.setOrigin(&CB, LI);3442}34433444DFSF.addReachesFunctionCallbacksIfEnabled(NextIRB, CB, &CB);3445}3446}34473448void DFSanVisitor::visitPHINode(PHINode &PN) {3449Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);3450PHINode *ShadowPN = PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "",3451PN.getIterator());34523453// Give the shadow phi node valid predecessors to fool SplitEdge into working.3454Value *UndefShadow = UndefValue::get(ShadowTy);3455for (BasicBlock *BB : PN.blocks())3456ShadowPN->addIncoming(UndefShadow, BB);34573458DFSF.setShadow(&PN, ShadowPN);34593460PHINode *OriginPN = nullptr;3461if (DFSF.DFS.shouldTrackOrigins()) {3462OriginPN = PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "",3463PN.getIterator());3464Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy);3465for (BasicBlock *BB : PN.blocks())3466OriginPN->addIncoming(UndefOrigin, BB);3467DFSF.setOrigin(&PN, OriginPN);3468}34693470DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});3471}34723473PreservedAnalyses DataFlowSanitizerPass::run(Module &M,3474ModuleAnalysisManager &AM) {3475auto GetTLI = [&](Function &F) -> TargetLibraryInfo & {3476auto &FAM =3477AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();3478return FAM.getResult<TargetLibraryAnalysis>(F);3479};3480if (!DataFlowSanitizer(ABIListFiles).runImpl(M, GetTLI))3481return PreservedAnalyses::all();34823483PreservedAnalyses PA = PreservedAnalyses::none();3484// GlobalsAA is considered stateless and does not get invalidated unless3485// explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers3486// make changes that require GlobalsAA to be invalidated.3487PA.abandon<GlobalsAA>();3488return PA;3489}349034913492