Path: blob/main/contrib/llvm-project/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
35233 views
//===-- RandomIRBuilder.cpp -----------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "llvm/FuzzMutate/RandomIRBuilder.h"9#include "llvm/ADT/STLExtras.h"10#include "llvm/FuzzMutate/OpDescriptor.h"11#include "llvm/FuzzMutate/Random.h"12#include "llvm/IR/BasicBlock.h"13#include "llvm/IR/Constants.h"14#include "llvm/IR/DataLayout.h"15#include "llvm/IR/Dominators.h"16#include "llvm/IR/Function.h"17#include "llvm/IR/Instructions.h"18#include "llvm/IR/IntrinsicInst.h"19#include "llvm/IR/Module.h"2021using namespace llvm;22using namespace fuzzerop;2324/// Return a vector of Blocks that dominates this block, excluding current25/// block.26static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {27std::vector<BasicBlock *> ret;28DominatorTree DT(*BB->getParent());29DomTreeNode *Node = DT.getNode(BB);30// It's possible that an orphan block is not in the dom tree. In that case we31// just return nothing.32if (!Node)33return ret;34Node = Node->getIDom();35while (Node && Node->getBlock()) {36ret.push_back(Node->getBlock());37// Get parent block.38Node = Node->getIDom();39}40return ret;41}4243/// Return a vector of Blocks that is dominated by this block, excluding current44/// block45static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {46DominatorTree DT(*BB->getParent());47std::vector<BasicBlock *> ret;48DomTreeNode *Parent = DT.getNode(BB);49// It's possible that an orphan block is not in the dom tree. In that case we50// just return nothing.51if (!Parent)52return ret;53for (DomTreeNode *Child : Parent->children())54ret.push_back(Child->getBlock());55uint64_t Idx = 0;56while (Idx < ret.size()) {57DomTreeNode *Node = DT[ret[Idx]];58Idx++;59for (DomTreeNode *Child : Node->children())60ret.push_back(Child->getBlock());61}62return ret;63}6465AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,66Value *Init) {67/// TODO: For all Allocas, maybe allocate an array.68BasicBlock *EntryBB = &F->getEntryBlock();69DataLayout DL(F->getParent());70AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",71&*EntryBB->getFirstInsertionPt());72if (Init)73new StoreInst(Init, Alloca, Alloca->getNextNode());74return Alloca;75}7677std::pair<GlobalVariable *, bool>78RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,79fuzzerop::SourcePred Pred) {80auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {81// Can't directly compare GV's type, as it would be a pointer to the actual82// type.83return Pred.matches(Srcs, UndefValue::get(GV->getValueType()));84};85bool DidCreate = false;86SmallVector<GlobalVariable *, 4> GlobalVars;87for (GlobalVariable &GV : M->globals()) {88GlobalVars.push_back(&GV);89}90auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred));91RS.sample(nullptr, 1);92GlobalVariable *GV = RS.getSelection();93if (!GV) {94DidCreate = true;95using LinkageTypes = GlobalVariable::LinkageTypes;96auto TRS = makeSampler<Constant *>(Rand);97TRS.sample(Pred.generate(Srcs, KnownTypes));98Constant *Init = TRS.getSelection();99Type *Ty = Init->getType();100GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,101"G", nullptr,102GlobalValue::ThreadLocalMode::NotThreadLocal,103M->getDataLayout().getDefaultGlobalsAddressSpace());104}105return {GV, DidCreate};106}107108Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,109ArrayRef<Instruction *> Insts) {110return findOrCreateSource(BB, Insts, {}, anyType());111}112113Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,114ArrayRef<Instruction *> Insts,115ArrayRef<Value *> Srcs,116SourcePred Pred,117bool allowConstant) {118auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); };119SmallVector<uint64_t, 8> SrcTys;120for (uint64_t i = 0; i < EndOfValueSource; i++)121SrcTys.push_back(i);122std::shuffle(SrcTys.begin(), SrcTys.end(), Rand);123for (uint64_t SrcTy : SrcTys) {124switch (SrcTy) {125case SrcFromInstInCurBlock: {126auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));127if (!RS.isEmpty()) {128return RS.getSelection();129}130break;131}132case FunctionArgument: {133Function *F = BB.getParent();134SmallVector<Argument *, 8> Args;135for (uint64_t i = 0; i < F->arg_size(); i++) {136Args.push_back(F->getArg(i));137}138auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred));139if (!RS.isEmpty()) {140return RS.getSelection();141}142break;143}144case InstInDominator: {145auto Dominators = getDominators(&BB);146std::shuffle(Dominators.begin(), Dominators.end(), Rand);147for (BasicBlock *Dom : Dominators) {148SmallVector<Instruction *, 16> Instructions;149for (Instruction &I : *Dom) {150Instructions.push_back(&I);151}152auto RS =153makeSampler(Rand, make_filter_range(Instructions, MatchesPred));154// Also consider choosing no source, meaning we want a new one.155if (!RS.isEmpty()) {156return RS.getSelection();157}158}159break;160}161case SrcFromGlobalVariable: {162Module *M = BB.getParent()->getParent();163auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);164Type *Ty = GV->getValueType();165LoadInst *LoadGV = nullptr;166if (BB.getTerminator()) {167LoadGV = new LoadInst(Ty, GV, "LGV", &*BB.getFirstInsertionPt());168} else {169LoadGV = new LoadInst(Ty, GV, "LGV", &BB);170}171// Because we might be generating new values, we have to check if it172// matches again.173if (DidCreate) {174if (Pred.matches(Srcs, LoadGV)) {175return LoadGV;176}177LoadGV->eraseFromParent();178// If no one is using this GlobalVariable, delete it too.179if (GV->use_empty()) {180GV->eraseFromParent();181}182}183break;184}185case NewConstOrStack: {186return newSource(BB, Insts, Srcs, Pred, allowConstant);187}188default:189case EndOfValueSource: {190llvm_unreachable("EndOfValueSource executed");191}192}193}194llvm_unreachable("Can't find a source");195}196197Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,198ArrayRef<Value *> Srcs, SourcePred Pred,199bool allowConstant) {200// Generate some constants to choose from.201auto RS = makeSampler<Value *>(Rand);202RS.sample(Pred.generate(Srcs, KnownTypes));203204// If we can find a pointer to load from, use it half the time.205Value *Ptr = findPointer(BB, Insts);206if (Ptr) {207// Create load from the chosen pointer208auto IP = BB.getFirstInsertionPt();209if (auto *I = dyn_cast<Instruction>(Ptr)) {210IP = ++I->getIterator();211assert(IP != BB.end() && "guaranteed by the findPointer");212}213// Pick the type independently.214Type *AccessTy = RS.getSelection()->getType();215auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", &*IP);216217// Only sample this load if it really matches the descriptor218if (Pred.matches(Srcs, NewLoad))219RS.sample(NewLoad, RS.totalWeight());220else221NewLoad->eraseFromParent();222}223224Value *newSrc = RS.getSelection();225// Generate a stack alloca and store the constant to it if constant is not226// allowed, our hope is that later mutations can generate some values and227// store to this placeholder.228if (!allowConstant && isa<Constant>(newSrc)) {229Type *Ty = newSrc->getType();230Function *F = BB.getParent();231AllocaInst *Alloca = createStackMemory(F, Ty, newSrc);232if (BB.getTerminator()) {233newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", BB.getTerminator());234} else {235newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", &BB);236}237}238return newSrc;239}240241static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,242const Value *Replacement) {243unsigned int OperandNo = Operand.getOperandNo();244if (Operand->getType() != Replacement->getType())245return false;246switch (I->getOpcode()) {247case Instruction::GetElementPtr:248case Instruction::ExtractElement:249case Instruction::ExtractValue:250// TODO: We could potentially validate these, but for now just leave indices251// alone.252if (OperandNo >= 1)253return false;254break;255case Instruction::InsertValue:256case Instruction::InsertElement:257case Instruction::ShuffleVector:258if (OperandNo >= 2)259return false;260break;261// For Br/Switch, we only try to modify the 1st Operand (condition).262// Modify other operands, like switch case may accidently change case from263// ConstantInt to a register, which is illegal.264case Instruction::Switch:265case Instruction::Br:266if (OperandNo >= 1)267return false;268break;269case Instruction::Call:270case Instruction::Invoke:271case Instruction::CallBr: {272const Function *Callee = cast<CallBase>(I)->getCalledFunction();273// If it's an indirect call, give up.274if (!Callee)275return false;276// If callee is not an intrinsic, operand 0 is the function to be called.277// Since we cannot assume that the replacement is a function pointer,278// we give up.279if (!Callee->getIntrinsicID() && OperandNo == 0)280return false;281return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg);282}283default:284break;285}286return true;287}288289Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,290ArrayRef<Instruction *> Insts,291Value *V) {292SmallVector<uint64_t, 8> SinkTys;293for (uint64_t i = 0; i < EndOfValueSink; i++)294SinkTys.push_back(i);295std::shuffle(SinkTys.begin(), SinkTys.end(), Rand);296auto findSinkAndConnect =297[this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {298auto RS = makeSampler<Use *>(Rand);299for (auto &I : Instructions) {300for (Use &U : I->operands())301if (isCompatibleReplacement(I, U, V))302RS.sample(&U, 1);303}304if (!RS.isEmpty()) {305Use *Sink = RS.getSelection();306User *U = Sink->getUser();307unsigned OpNo = Sink->getOperandNo();308U->setOperand(OpNo, V);309return cast<Instruction>(U);310}311return nullptr;312};313Instruction *Sink = nullptr;314for (uint64_t SinkTy : SinkTys) {315switch (SinkTy) {316case SinkToInstInCurBlock:317Sink = findSinkAndConnect(Insts);318if (Sink)319return Sink;320break;321case PointersInDominator: {322auto Dominators = getDominators(&BB);323std::shuffle(Dominators.begin(), Dominators.end(), Rand);324for (BasicBlock *Dom : Dominators) {325for (Instruction &I : *Dom) {326if (isa<PointerType>(I.getType()))327return new StoreInst(V, &I, Insts.back());328}329}330break;331}332case InstInDominatee: {333auto Dominatees = getDominatees(&BB);334std::shuffle(Dominatees.begin(), Dominatees.end(), Rand);335for (BasicBlock *Dominee : Dominatees) {336std::vector<Instruction *> Instructions;337for (Instruction &I : *Dominee)338Instructions.push_back(&I);339Sink = findSinkAndConnect(Instructions);340if (Sink) {341return Sink;342}343}344break;345}346case NewStore:347/// TODO: allocate a new stack memory.348return newSink(BB, Insts, V);349case SinkToGlobalVariable: {350Module *M = BB.getParent()->getParent();351auto [GV, DidCreate] =352findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType()));353return new StoreInst(V, GV, Insts.back());354}355case EndOfValueSink:356default:357llvm_unreachable("EndOfValueSink executed");358}359}360llvm_unreachable("Can't find a sink");361}362363Instruction *RandomIRBuilder::newSink(BasicBlock &BB,364ArrayRef<Instruction *> Insts, Value *V) {365Value *Ptr = findPointer(BB, Insts);366if (!Ptr) {367if (uniform(Rand, 0, 1)) {368Type *Ty = V->getType();369Ptr = createStackMemory(BB.getParent(), Ty, UndefValue::get(Ty));370} else {371Ptr = UndefValue::get(PointerType::get(V->getType(), 0));372}373}374375return new StoreInst(V, Ptr, Insts.back());376}377378Value *RandomIRBuilder::findPointer(BasicBlock &BB,379ArrayRef<Instruction *> Insts) {380auto IsMatchingPtr = [](Instruction *Inst) {381// Invoke instructions sometimes produce valid pointers but currently382// we can't insert loads or stores from them383if (Inst->isTerminator())384return false;385386return Inst->getType()->isPointerTy();387};388if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr)))389return RS.getSelection();390return nullptr;391}392393Type *RandomIRBuilder::randomType() {394uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1);395return KnownTypes[TyIdx];396}397398Function *RandomIRBuilder::createFunctionDeclaration(Module &M,399uint64_t ArgNum) {400Type *RetType = randomType();401402SmallVector<Type *, 2> Args;403for (uint64_t i = 0; i < ArgNum; i++) {404Args.push_back(randomType());405}406407Function *F = Function::Create(FunctionType::get(RetType, Args,408/*isVarArg=*/false),409GlobalValue::ExternalLinkage, "f", &M);410return F;411}412Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {413return createFunctionDeclaration(414M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));415}416417Function *RandomIRBuilder::createFunctionDefinition(Module &M,418uint64_t ArgNum) {419Function *F = this->createFunctionDeclaration(M, ArgNum);420421// TODO: Some arguments and a return value would probably be more422// interesting.423LLVMContext &Context = M.getContext();424DataLayout DL(&M);425BasicBlock *BB = BasicBlock::Create(Context, "BB", F);426Type *RetTy = F->getReturnType();427if (RetTy != Type::getVoidTy(Context)) {428Instruction *RetAlloca =429new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);430Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);431ReturnInst::Create(Context, RetLoad, BB);432} else {433ReturnInst::Create(Context, BB);434}435436return F;437}438Function *RandomIRBuilder::createFunctionDefinition(Module &M) {439return createFunctionDefinition(440M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));441}442443444