Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
35266 views
//===- PoisonChecking.cpp - -----------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// Implements a transform pass which instruments IR such that poison semantics9// are made explicit. That is, it provides a (possibly partial) executable10// semantics for every instruction w.r.t. poison as specified in the LLVM11// LangRef. There are obvious parallels to the sanitizer tools, but this pass12// is focused purely on the semantics of LLVM IR, not any particular source13// language. If you're looking for something to see if your C/C++ contains14// UB, this is not it.15//16// The rewritten semantics of each instruction will include the following17// components:18//19// 1) The original instruction, unmodified.20// 2) A propagation rule which translates dynamic information about the poison21// state of each input to whether the dynamic output of the instruction22// produces poison.23// 3) A creation rule which validates any poison producing flags on the24// instruction itself (e.g. checks for overflow on nsw).25// 4) A check rule which traps (to a handler function) if this instruction must26// execute undefined behavior given the poison state of it's inputs.27//28// This is a must analysis based transform; that is, the resulting code may29// produce a false negative result (not report UB when actually exists30// according to the LangRef spec), but should never produce a false positive31// (report UB where it doesn't exist).32//33// Use cases for this pass include:34// - Understanding (and testing!) the implications of the definition of poison35// from the LangRef.36// - Validating the output of a IR fuzzer to ensure that all programs produced37// are well defined on the specific input used.38// - Finding/confirming poison specific miscompiles by checking the poison39// status of an input/IR pair is the same before and after an optimization40// transform.41// - Checking that a bugpoint reduction does not introduce UB which didn't42// exist in the original program being reduced.43//44// The major sources of inaccuracy are currently:45// - Most validation rules not yet implemented for instructions with poison46// relavant flags. At the moment, only nsw/nuw on add/sub are supported.47// - UB which is control dependent on a branch on poison is not yet48// reported. Currently, only data flow dependence is modeled.49// - Poison which is propagated through memory is not modeled. As such,50// storing poison to memory and then reloading it will cause a false negative51// as we consider the reloaded value to not be poisoned.52// - Poison propagation across function boundaries is not modeled. At the53// moment, all arguments and return values are assumed not to be poison.54// - Undef is not modeled. In particular, the optimizer's freedom to pick55// concrete values for undef bits so as to maximize potential for producing56// poison is not modeled.57//58//===----------------------------------------------------------------------===//5960#include "llvm/Transforms/Instrumentation/PoisonChecking.h"61#include "llvm/ADT/DenseMap.h"62#include "llvm/Analysis/ValueTracking.h"63#include "llvm/IR/IRBuilder.h"64#include "llvm/IR/Module.h"65#include "llvm/Support/CommandLine.h"6667using namespace llvm;6869#define DEBUG_TYPE "poison-checking"7071static cl::opt<bool>72LocalCheck("poison-checking-function-local",73cl::init(false),74cl::desc("Check that returns are non-poison (for testing)"));757677static bool isConstantFalse(Value* V) {78assert(V->getType()->isIntegerTy(1));79if (auto *CI = dyn_cast<ConstantInt>(V))80return CI->isZero();81return false;82}8384static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {85if (Ops.size() == 0)86return B.getFalse();87unsigned i = 0;88for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}89if (i == Ops.size())90return B.getFalse();91Value *Accum = Ops[i++];92for (Value *Op : llvm::drop_begin(Ops, i))93if (!isConstantFalse(Op))94Accum = B.CreateOr(Accum, Op);95return Accum;96}9798static void generateCreationChecksForBinOp(Instruction &I,99SmallVectorImpl<Value*> &Checks) {100assert(isa<BinaryOperator>(I));101102IRBuilder<> B(&I);103Value *LHS = I.getOperand(0);104Value *RHS = I.getOperand(1);105switch (I.getOpcode()) {106default:107return;108case Instruction::Add: {109if (I.hasNoSignedWrap()) {110auto *OverflowOp =111B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);112Checks.push_back(B.CreateExtractValue(OverflowOp, 1));113}114if (I.hasNoUnsignedWrap()) {115auto *OverflowOp =116B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);117Checks.push_back(B.CreateExtractValue(OverflowOp, 1));118}119break;120}121case Instruction::Sub: {122if (I.hasNoSignedWrap()) {123auto *OverflowOp =124B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);125Checks.push_back(B.CreateExtractValue(OverflowOp, 1));126}127if (I.hasNoUnsignedWrap()) {128auto *OverflowOp =129B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);130Checks.push_back(B.CreateExtractValue(OverflowOp, 1));131}132break;133}134case Instruction::Mul: {135if (I.hasNoSignedWrap()) {136auto *OverflowOp =137B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);138Checks.push_back(B.CreateExtractValue(OverflowOp, 1));139}140if (I.hasNoUnsignedWrap()) {141auto *OverflowOp =142B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);143Checks.push_back(B.CreateExtractValue(OverflowOp, 1));144}145break;146}147case Instruction::UDiv: {148if (I.isExact()) {149auto *Check =150B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),151ConstantInt::get(LHS->getType(), 0));152Checks.push_back(Check);153}154break;155}156case Instruction::SDiv: {157if (I.isExact()) {158auto *Check =159B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),160ConstantInt::get(LHS->getType(), 0));161Checks.push_back(Check);162}163break;164}165case Instruction::AShr:166case Instruction::LShr:167case Instruction::Shl: {168Value *ShiftCheck =169B.CreateICmp(ICmpInst::ICMP_UGE, RHS,170ConstantInt::get(RHS->getType(),171LHS->getType()->getScalarSizeInBits()));172Checks.push_back(ShiftCheck);173break;174}175};176}177178/// Given an instruction which can produce poison on non-poison inputs179/// (i.e. canCreatePoison returns true), generate runtime checks to produce180/// boolean indicators of when poison would result.181static void generateCreationChecks(Instruction &I,182SmallVectorImpl<Value*> &Checks) {183IRBuilder<> B(&I);184if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())185generateCreationChecksForBinOp(I, Checks);186187// Handle non-binops separately188switch (I.getOpcode()) {189default:190// Note there are a couple of missing cases here, once implemented, this191// should become an llvm_unreachable.192break;193case Instruction::ExtractElement: {194Value *Vec = I.getOperand(0);195auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());196if (!VecVTy)197break;198Value *Idx = I.getOperand(1);199unsigned NumElts = VecVTy->getNumElements();200Value *Check =201B.CreateICmp(ICmpInst::ICMP_UGE, Idx,202ConstantInt::get(Idx->getType(), NumElts));203Checks.push_back(Check);204break;205}206case Instruction::InsertElement: {207Value *Vec = I.getOperand(0);208auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());209if (!VecVTy)210break;211Value *Idx = I.getOperand(2);212unsigned NumElts = VecVTy->getNumElements();213Value *Check =214B.CreateICmp(ICmpInst::ICMP_UGE, Idx,215ConstantInt::get(Idx->getType(), NumElts));216Checks.push_back(Check);217break;218}219};220}221222static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {223auto Itr = ValToPoison.find(V);224if (Itr != ValToPoison.end())225return Itr->second;226if (isa<Constant>(V)) {227return ConstantInt::getFalse(V->getContext());228}229// Return false for unknwon values - this implements a non-strict mode where230// unhandled IR constructs are simply considered to never produce poison. At231// some point in the future, we probably want a "strict mode" for testing if232// nothing else.233return ConstantInt::getFalse(V->getContext());234}235236static void CreateAssert(IRBuilder<> &B, Value *Cond) {237assert(Cond->getType()->isIntegerTy(1));238if (auto *CI = dyn_cast<ConstantInt>(Cond))239if (CI->isAllOnesValue())240return;241242Module *M = B.GetInsertBlock()->getModule();243M->getOrInsertFunction("__poison_checker_assert",244Type::getVoidTy(M->getContext()),245Type::getInt1Ty(M->getContext()));246Function *TrapFunc = M->getFunction("__poison_checker_assert");247B.CreateCall(TrapFunc, Cond);248}249250static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {251assert(Cond->getType()->isIntegerTy(1));252CreateAssert(B, B.CreateNot(Cond));253}254255static bool rewrite(Function &F) {256auto * const Int1Ty = Type::getInt1Ty(F.getContext());257258DenseMap<Value *, Value *> ValToPoison;259260for (BasicBlock &BB : F)261for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {262auto *OldPHI = cast<PHINode>(&*I);263auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());264for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)265NewPHI->addIncoming(UndefValue::get(Int1Ty),266OldPHI->getIncomingBlock(i));267NewPHI->insertBefore(OldPHI);268ValToPoison[OldPHI] = NewPHI;269}270271for (BasicBlock &BB : F)272for (Instruction &I : BB) {273if (isa<PHINode>(I)) continue;274275IRBuilder<> B(cast<Instruction>(&I));276277// Note: There are many more sources of documented UB, but this pass only278// attempts to find UB triggered by propagation of poison.279SmallVector<const Value *, 4> NonPoisonOps;280SmallPtrSet<const Value *, 4> SeenNonPoisonOps;281getGuaranteedNonPoisonOps(&I, NonPoisonOps);282for (const Value *Op : NonPoisonOps)283if (SeenNonPoisonOps.insert(Op).second)284CreateAssertNot(B,285getPoisonFor(ValToPoison, const_cast<Value *>(Op)));286287if (LocalCheck)288if (auto *RI = dyn_cast<ReturnInst>(&I))289if (RI->getNumOperands() != 0) {290Value *Op = RI->getOperand(0);291CreateAssertNot(B, getPoisonFor(ValToPoison, Op));292}293294SmallVector<Value*, 4> Checks;295for (const Use &U : I.operands()) {296if (ValToPoison.count(U) && propagatesPoison(U))297Checks.push_back(getPoisonFor(ValToPoison, U));298}299300if (canCreatePoison(cast<Operator>(&I)))301generateCreationChecks(I, Checks);302ValToPoison[&I] = buildOrChain(B, Checks);303}304305for (BasicBlock &BB : F)306for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {307auto *OldPHI = cast<PHINode>(&*I);308if (!ValToPoison.count(OldPHI))309continue; // skip the newly inserted phis310auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);311for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {312auto *OldVal = OldPHI->getIncomingValue(i);313NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));314}315}316return true;317}318319320PreservedAnalyses PoisonCheckingPass::run(Module &M,321ModuleAnalysisManager &AM) {322bool Changed = false;323for (auto &F : M)324Changed |= rewrite(F);325326return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();327}328329PreservedAnalyses PoisonCheckingPass::run(Function &F,330FunctionAnalysisManager &AM) {331return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();332}333334/* Major TODO Items:335- Control dependent poison UB336- Strict mode - (i.e. must analyze every operand)337- Poison through memory338- Function ABIs339- Full coverage of intrinsics, etc.. (ouch)340341Instructions w/Unclear Semantics:342- shufflevector - It would seem reasonable for an out of bounds mask element343to produce poison, but the LangRef does not state.344- all binary ops w/vector operands - The likely interpretation would be that345any element overflowing should produce poison for the entire result, but346the LangRef does not state.347- Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems348strange that only certian flags should be documented as producing poison.349350Cases of clear poison semantics not yet implemented:351- Exact flags on ashr/lshr produce poison352- NSW/NUW flags on shl produce poison353- Inbounds flag on getelementptr produce poison354- fptosi/fptoui (out of bounds input) produce poison355- Scalable vector types for insertelement/extractelement356- Floating point binary ops w/fmf nnan/noinfs flags produce poison357*/358359360