Path: blob/main/contrib/llvm-project/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
35291 views
//===-- DifferenceEngine.cpp - Structural function/module comparison ------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This header defines the implementation of the LLVM difference9// engine, which structurally compares global values within a module.10//11//===----------------------------------------------------------------------===//1213#include "DifferenceEngine.h"14#include "llvm/ADT/DenseMap.h"15#include "llvm/ADT/DenseSet.h"16#include "llvm/ADT/SmallString.h"17#include "llvm/ADT/SmallVector.h"18#include "llvm/ADT/StringSet.h"19#include "llvm/IR/BasicBlock.h"20#include "llvm/IR/CFG.h"21#include "llvm/IR/Constants.h"22#include "llvm/IR/Function.h"23#include "llvm/IR/Instructions.h"24#include "llvm/IR/Module.h"25#include "llvm/Support/ErrorHandling.h"26#include "llvm/Support/raw_ostream.h"27#include "llvm/Support/type_traits.h"28#include <utility>2930using namespace llvm;3132namespace {3334/// A priority queue, implemented as a heap.35template <class T, class Sorter, unsigned InlineCapacity>36class PriorityQueue {37Sorter Precedes;38llvm::SmallVector<T, InlineCapacity> Storage;3940public:41PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}4243/// Checks whether the heap is empty.44bool empty() const { return Storage.empty(); }4546/// Insert a new value on the heap.47void insert(const T &V) {48unsigned Index = Storage.size();49Storage.push_back(V);50if (Index == 0) return;5152T *data = Storage.data();53while (true) {54unsigned Target = (Index + 1) / 2 - 1;55if (!Precedes(data[Index], data[Target])) return;56std::swap(data[Index], data[Target]);57if (Target == 0) return;58Index = Target;59}60}6162/// Remove the minimum value in the heap. Only valid on a non-empty heap.63T remove_min() {64assert(!empty());65T tmp = Storage[0];6667unsigned NewSize = Storage.size() - 1;68if (NewSize) {69// Move the slot at the end to the beginning.70if (std::is_trivially_copyable<T>::value)71Storage[0] = Storage[NewSize];72else73std::swap(Storage[0], Storage[NewSize]);7475// Bubble the root up as necessary.76unsigned Index = 0;77while (true) {78// With a 1-based index, the children would be Index*2 and Index*2+1.79unsigned R = (Index + 1) * 2;80unsigned L = R - 1;8182// If R is out of bounds, we're done after this in any case.83if (R >= NewSize) {84// If L is also out of bounds, we're done immediately.85if (L >= NewSize) break;8687// Otherwise, test whether we should swap L and Index.88if (Precedes(Storage[L], Storage[Index]))89std::swap(Storage[L], Storage[Index]);90break;91}9293// Otherwise, we need to compare with the smaller of L and R.94// Prefer R because it's closer to the end of the array.95unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);9697// If Index is >= the min of L and R, then heap ordering is restored.98if (!Precedes(Storage[IndexToTest], Storage[Index]))99break;100101// Otherwise, keep bubbling up.102std::swap(Storage[IndexToTest], Storage[Index]);103Index = IndexToTest;104}105}106Storage.pop_back();107108return tmp;109}110};111112/// A function-scope difference engine.113class FunctionDifferenceEngine {114DifferenceEngine &Engine;115116// Some initializers may reference the variable we're currently checking. This117// can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent118// recursing.119const Value *SavedLHS;120const Value *SavedRHS;121122// The current mapping from old local values to new local values.123DenseMap<const Value *, const Value *> Values;124125// The current mapping from old blocks to new blocks.126DenseMap<const BasicBlock *, const BasicBlock *> Blocks;127128// The tentative mapping from old local values while comparing a pair of129// basic blocks. Once the pair has been processed, the tentative mapping is130// committed to the Values map.131DenseSet<std::pair<const Value *, const Value *>> TentativeValues;132133// Equivalence Assumptions134//135// For basic blocks in loops, some values in phi nodes may depend on136// values from not yet processed basic blocks in the loop. When encountering137// such values, we optimistically asssume their equivalence and store this138// assumption in a BlockDiffCandidate for the pair of compared BBs.139//140// Once we have diffed all BBs, for every BlockDiffCandidate, we check all141// stored assumptions using the Values map that stores proven equivalences142// between the old and new values, and report a diff if an assumption cannot143// be proven to be true.144//145// Note that after having made an assumption, all further determined146// equivalences implicitly depend on that assumption. These will not be147// reverted or reported if the assumption proves to be false, because these148// are considered indirect diffs caused by earlier direct diffs.149//150// We aim to avoid false negatives in llvm-diff, that is, ensure that151// whenever no diff is reported, the functions are indeed equal. If152// assumptions were made, this is not entirely clear, because in principle we153// could end up with a circular proof where the proof of equivalence of two154// nodes is depending on the assumption of their equivalence.155//156// To see that assumptions do not add false negatives, note that if we do not157// report a diff, this means that there is an equivalence mapping between old158// and new values that is consistent with all assumptions made. The circular159// dependency that exists on an IR value level does not exist at run time,160// because the values selected by the phi nodes must always already have been161// computed. Hence, we can prove equivalence of the old and new functions by162// considering step-wise parallel execution, and incrementally proving163// equivalence of every new computed value. Another way to think about it is164// to imagine cloning the loop BBs for every iteration, turning the loops165// into (possibly infinite) DAGs, and proving equivalence by induction on the166// iteration, using the computed value mapping.167168// The class BlockDiffCandidate stores pairs which either have already been169// proven to differ, or pairs whose equivalence depends on assumptions to be170// verified later.171struct BlockDiffCandidate {172const BasicBlock *LBB;173const BasicBlock *RBB;174// Maps old values to assumed-to-be-equivalent new values175SmallDenseMap<const Value *, const Value *> EquivalenceAssumptions;176// If set, we already know the blocks differ.177bool KnownToDiffer;178};179180// List of block diff candidates in the order found by processing.181// We generate reports in this order.182// For every LBB, there may only be one corresponding RBB.183SmallVector<BlockDiffCandidate> BlockDiffCandidates;184// Maps LBB to the index of its BlockDiffCandidate, if existing.185DenseMap<const BasicBlock *, uint64_t> BlockDiffCandidateIndices;186187// Note: Every LBB must always be queried together with the same RBB.188// The returned reference is not permanently valid and should not be stored.189BlockDiffCandidate &getOrCreateBlockDiffCandidate(const BasicBlock *LBB,190const BasicBlock *RBB) {191auto It = BlockDiffCandidateIndices.find(LBB);192// Check if LBB already has a diff candidate193if (It == BlockDiffCandidateIndices.end()) {194// Add new one195BlockDiffCandidateIndices[LBB] = BlockDiffCandidates.size();196BlockDiffCandidates.push_back(197{LBB, RBB, SmallDenseMap<const Value *, const Value *>(), false});198return BlockDiffCandidates.back();199}200// Use existing one201BlockDiffCandidate &Result = BlockDiffCandidates[It->second];202assert(Result.RBB == RBB && "Inconsistent basic block pairing!");203return Result;204}205206// Optionally passed to equivalence checker functions, so these can add207// assumptions in BlockDiffCandidates. Its presence controls whether208// assumptions are generated.209struct AssumptionContext {210// The two basic blocks that need the two compared values to be equivalent.211const BasicBlock *LBB;212const BasicBlock *RBB;213};214215unsigned getUnprocPredCount(const BasicBlock *Block) const {216return llvm::count_if(predecessors(Block), [&](const BasicBlock *Pred) {217return !Blocks.contains(Pred);218});219}220221typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair;222223/// A type which sorts a priority queue by the number of unprocessed224/// predecessor blocks it has remaining.225///226/// This is actually really expensive to calculate.227struct QueueSorter {228const FunctionDifferenceEngine &fde;229explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}230231bool operator()(BlockPair &Old, BlockPair &New) {232return fde.getUnprocPredCount(Old.first)233< fde.getUnprocPredCount(New.first);234}235};236237/// A queue of unified blocks to process.238PriorityQueue<BlockPair, QueueSorter, 20> Queue;239240/// Try to unify the given two blocks. Enqueues them for processing241/// if they haven't already been processed.242///243/// Returns true if there was a problem unifying them.244bool tryUnify(const BasicBlock *L, const BasicBlock *R) {245const BasicBlock *&Ref = Blocks[L];246247if (Ref) {248if (Ref == R) return false;249250Engine.logf("successor %l cannot be equivalent to %r; "251"it's already equivalent to %r")252<< L << R << Ref;253return true;254}255256Ref = R;257Queue.insert(BlockPair(L, R));258return false;259}260261/// Unifies two instructions, given that they're known not to have262/// structural differences.263void unify(const Instruction *L, const Instruction *R) {264DifferenceEngine::Context C(Engine, L, R);265266bool Result = diff(L, R, true, true, true);267assert(!Result && "structural differences second time around?");268(void) Result;269if (!L->use_empty())270Values[L] = R;271}272273void processQueue() {274while (!Queue.empty()) {275BlockPair Pair = Queue.remove_min();276diff(Pair.first, Pair.second);277}278}279280void checkAndReportDiffCandidates() {281for (BlockDiffCandidate &BDC : BlockDiffCandidates) {282283// Check assumptions284for (const auto &[L, R] : BDC.EquivalenceAssumptions) {285auto It = Values.find(L);286if (It == Values.end() || It->second != R) {287BDC.KnownToDiffer = true;288break;289}290}291292// Run block diff if the BBs differ293if (BDC.KnownToDiffer) {294DifferenceEngine::Context C(Engine, BDC.LBB, BDC.RBB);295runBlockDiff(BDC.LBB->begin(), BDC.RBB->begin());296}297}298}299300void diff(const BasicBlock *L, const BasicBlock *R) {301DifferenceEngine::Context C(Engine, L, R);302303BasicBlock::const_iterator LI = L->begin(), LE = L->end();304BasicBlock::const_iterator RI = R->begin();305306do {307assert(LI != LE && RI != R->end());308const Instruction *LeftI = &*LI, *RightI = &*RI;309310// If the instructions differ, start the more sophisticated diff311// algorithm at the start of the block.312if (diff(LeftI, RightI, false, false, true)) {313TentativeValues.clear();314// Register (L, R) as diffing pair. Note that we could directly emit a315// block diff here, but this way we ensure all diffs are emitted in one316// consistent order, independent of whether the diffs were detected317// immediately or via invalid assumptions.318getOrCreateBlockDiffCandidate(L, R).KnownToDiffer = true;319return;320}321322// Otherwise, tentatively unify them.323if (!LeftI->use_empty())324TentativeValues.insert(std::make_pair(LeftI, RightI));325326++LI;327++RI;328} while (LI != LE); // This is sufficient: we can't get equality of329// terminators if there are residual instructions.330331// Unify everything in the block, non-tentatively this time.332TentativeValues.clear();333for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)334unify(&*LI, &*RI);335}336337bool matchForBlockDiff(const Instruction *L, const Instruction *R);338void runBlockDiff(BasicBlock::const_iterator LI,339BasicBlock::const_iterator RI);340341bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {342// FIXME: call attributes343AssumptionContext AC = {L.getParent(), R.getParent()};344if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand(),345&AC)) {346if (Complain) Engine.log("called functions differ");347return true;348}349if (L.arg_size() != R.arg_size()) {350if (Complain) Engine.log("argument counts differ");351return true;352}353for (unsigned I = 0, E = L.arg_size(); I != E; ++I)354if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I), &AC)) {355if (Complain)356Engine.logf("arguments %l and %r differ")357<< L.getArgOperand(I) << R.getArgOperand(I);358return true;359}360return false;361}362363// If AllowAssumptions is enabled, whenever we encounter a pair of values364// that we cannot prove to be equivalent, we assume equivalence and store that365// assumption to be checked later in BlockDiffCandidates.366bool diff(const Instruction *L, const Instruction *R, bool Complain,367bool TryUnify, bool AllowAssumptions) {368// FIXME: metadata (if Complain is set)369AssumptionContext ACValue = {L->getParent(), R->getParent()};370// nullptr AssumptionContext disables assumption generation.371const AssumptionContext *AC = AllowAssumptions ? &ACValue : nullptr;372373// Different opcodes always imply different operations.374if (L->getOpcode() != R->getOpcode()) {375if (Complain) Engine.log("different instruction types");376return true;377}378379if (isa<CmpInst>(L)) {380if (cast<CmpInst>(L)->getPredicate()381!= cast<CmpInst>(R)->getPredicate()) {382if (Complain) Engine.log("different predicates");383return true;384}385} else if (isa<CallInst>(L)) {386return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain);387} else if (isa<PHINode>(L)) {388const PHINode &LI = cast<PHINode>(*L);389const PHINode &RI = cast<PHINode>(*R);390391// This is really weird; type uniquing is broken?392if (LI.getType() != RI.getType()) {393if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) {394if (Complain) Engine.log("different phi types");395return true;396}397}398399if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) {400if (Complain)401Engine.log("PHI node # of incoming values differ");402return true;403}404405for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) {406if (TryUnify)407tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I));408409if (!equivalentAsOperands(LI.getIncomingValue(I),410RI.getIncomingValue(I), AC)) {411if (Complain)412Engine.log("PHI node incoming values differ");413return true;414}415}416417return false;418419// Terminators.420} else if (isa<InvokeInst>(L)) {421const InvokeInst &LI = cast<InvokeInst>(*L);422const InvokeInst &RI = cast<InvokeInst>(*R);423if (diffCallSites(LI, RI, Complain))424return true;425426if (TryUnify) {427tryUnify(LI.getNormalDest(), RI.getNormalDest());428tryUnify(LI.getUnwindDest(), RI.getUnwindDest());429}430return false;431432} else if (isa<CallBrInst>(L)) {433const CallBrInst &LI = cast<CallBrInst>(*L);434const CallBrInst &RI = cast<CallBrInst>(*R);435if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) {436if (Complain)437Engine.log("callbr # of indirect destinations differ");438return true;439}440441// Perform the "try unify" step so that we can equate the indirect442// destinations before checking the call site.443for (unsigned I = 0; I < LI.getNumIndirectDests(); I++)444tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I));445446if (diffCallSites(LI, RI, Complain))447return true;448449if (TryUnify)450tryUnify(LI.getDefaultDest(), RI.getDefaultDest());451return false;452453} else if (isa<BranchInst>(L)) {454const BranchInst *LI = cast<BranchInst>(L);455const BranchInst *RI = cast<BranchInst>(R);456if (LI->isConditional() != RI->isConditional()) {457if (Complain) Engine.log("branch conditionality differs");458return true;459}460461if (LI->isConditional()) {462if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) {463if (Complain) Engine.log("branch conditions differ");464return true;465}466if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));467}468if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));469return false;470471} else if (isa<IndirectBrInst>(L)) {472const IndirectBrInst *LI = cast<IndirectBrInst>(L);473const IndirectBrInst *RI = cast<IndirectBrInst>(R);474if (LI->getNumDestinations() != RI->getNumDestinations()) {475if (Complain) Engine.log("indirectbr # of destinations differ");476return true;477}478479if (!equivalentAsOperands(LI->getAddress(), RI->getAddress(), AC)) {480if (Complain) Engine.log("indirectbr addresses differ");481return true;482}483484if (TryUnify) {485for (unsigned i = 0; i < LI->getNumDestinations(); i++) {486tryUnify(LI->getDestination(i), RI->getDestination(i));487}488}489return false;490491} else if (isa<SwitchInst>(L)) {492const SwitchInst *LI = cast<SwitchInst>(L);493const SwitchInst *RI = cast<SwitchInst>(R);494if (!equivalentAsOperands(LI->getCondition(), RI->getCondition(), AC)) {495if (Complain) Engine.log("switch conditions differ");496return true;497}498if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());499500bool Difference = false;501502DenseMap<const ConstantInt *, const BasicBlock *> LCases;503for (auto Case : LI->cases())504LCases[Case.getCaseValue()] = Case.getCaseSuccessor();505506for (auto Case : RI->cases()) {507const ConstantInt *CaseValue = Case.getCaseValue();508const BasicBlock *LCase = LCases[CaseValue];509if (LCase) {510if (TryUnify)511tryUnify(LCase, Case.getCaseSuccessor());512LCases.erase(CaseValue);513} else if (Complain || !Difference) {514if (Complain)515Engine.logf("right switch has extra case %r") << CaseValue;516Difference = true;517}518}519if (!Difference)520for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator521I = LCases.begin(),522E = LCases.end();523I != E; ++I) {524if (Complain)525Engine.logf("left switch has extra case %l") << I->first;526Difference = true;527}528return Difference;529} else if (isa<UnreachableInst>(L)) {530return false;531}532533if (L->getNumOperands() != R->getNumOperands()) {534if (Complain) Engine.log("instructions have different operand counts");535return true;536}537538for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {539Value *LO = L->getOperand(I), *RO = R->getOperand(I);540if (!equivalentAsOperands(LO, RO, AC)) {541if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;542return true;543}544}545546return false;547}548549public:550bool equivalentAsOperands(const Constant *L, const Constant *R,551const AssumptionContext *AC) {552// Use equality as a preliminary filter.553if (L == R)554return true;555556if (L->getValueID() != R->getValueID())557return false;558559// Ask the engine about global values.560if (isa<GlobalValue>(L))561return Engine.equivalentAsOperands(cast<GlobalValue>(L),562cast<GlobalValue>(R));563564// Compare constant expressions structurally.565if (isa<ConstantExpr>(L))566return equivalentAsOperands(cast<ConstantExpr>(L), cast<ConstantExpr>(R),567AC);568569// Constants of the "same type" don't always actually have the same570// type; I don't know why. Just white-list them.571if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L))572return true;573574// Block addresses only match if we've already encountered the575// block. FIXME: tentative matches?576if (isa<BlockAddress>(L))577return Blocks[cast<BlockAddress>(L)->getBasicBlock()]578== cast<BlockAddress>(R)->getBasicBlock();579580// If L and R are ConstantVectors, compare each element581if (isa<ConstantVector>(L)) {582const ConstantVector *CVL = cast<ConstantVector>(L);583const ConstantVector *CVR = cast<ConstantVector>(R);584if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())585return false;586for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {587if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i), AC))588return false;589}590return true;591}592593// If L and R are ConstantArrays, compare the element count and types.594if (isa<ConstantArray>(L)) {595const ConstantArray *CAL = cast<ConstantArray>(L);596const ConstantArray *CAR = cast<ConstantArray>(R);597// Sometimes a type may be equivalent, but not uniquified---e.g. it may598// contain a GEP instruction. Do a deeper comparison of the types.599if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements())600return false;601602for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {603if (!equivalentAsOperands(CAL->getAggregateElement(I),604CAR->getAggregateElement(I), AC))605return false;606}607608return true;609}610611// If L and R are ConstantStructs, compare each field and type.612if (isa<ConstantStruct>(L)) {613const ConstantStruct *CSL = cast<ConstantStruct>(L);614const ConstantStruct *CSR = cast<ConstantStruct>(R);615616const StructType *LTy = cast<StructType>(CSL->getType());617const StructType *RTy = cast<StructType>(CSR->getType());618619// The StructTypes should have the same attributes. Don't use620// isLayoutIdentical(), because that just checks the element pointers,621// which may not work here.622if (LTy->getNumElements() != RTy->getNumElements() ||623LTy->isPacked() != RTy->isPacked())624return false;625626for (unsigned I = 0; I < LTy->getNumElements(); I++) {627const Value *LAgg = CSL->getAggregateElement(I);628const Value *RAgg = CSR->getAggregateElement(I);629630if (LAgg == SavedLHS || RAgg == SavedRHS) {631if (LAgg != SavedLHS || RAgg != SavedRHS)632// If the left and right operands aren't both re-analyzing the633// variable, then the initialiers don't match, so report "false".634// Otherwise, we skip these operands..635return false;636637continue;638}639640if (!equivalentAsOperands(LAgg, RAgg, AC)) {641return false;642}643}644645return true;646}647648return false;649}650651bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R,652const AssumptionContext *AC) {653if (L == R)654return true;655656if (L->getOpcode() != R->getOpcode())657return false;658659switch (L->getOpcode()) {660case Instruction::GetElementPtr:661// FIXME: inbounds?662break;663664default:665break;666}667668if (L->getNumOperands() != R->getNumOperands())669return false;670671for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {672const auto *LOp = L->getOperand(I);673const auto *ROp = R->getOperand(I);674675if (LOp == SavedLHS || ROp == SavedRHS) {676if (LOp != SavedLHS || ROp != SavedRHS)677// If the left and right operands aren't both re-analyzing the678// variable, then the initialiers don't match, so report "false".679// Otherwise, we skip these operands..680return false;681682continue;683}684685if (!equivalentAsOperands(LOp, ROp, AC))686return false;687}688689return true;690}691692// There are cases where we cannot determine whether two values are693// equivalent, because it depends on not yet processed basic blocks -- see the694// documentation on assumptions.695//696// AC is the context in which we are currently performing a diff.697// When we encounter a pair of values for which we can neither prove698// equivalence nor the opposite, we do the following:699// * If AC is nullptr, we treat the pair as non-equivalent.700// * If AC is set, we add an assumption for the basic blocks given by AC,701// and treat the pair as equivalent. The assumption is checked later.702bool equivalentAsOperands(const Value *L, const Value *R,703const AssumptionContext *AC) {704// Fall out if the values have different kind.705// This possibly shouldn't take priority over oracles.706if (L->getValueID() != R->getValueID())707return false;708709// Value subtypes: Argument, Constant, Instruction, BasicBlock,710// InlineAsm, MDNode, MDString, PseudoSourceValue711712if (isa<Constant>(L))713return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R), AC);714715if (isa<Instruction>(L)) {716auto It = Values.find(L);717if (It != Values.end())718return It->second == R;719720if (TentativeValues.count(std::make_pair(L, R)))721return true;722723// L and R might be equivalent, this could depend on not yet processed724// basic blocks, so we cannot decide here.725if (AC) {726// Add an assumption, unless there is a conflict with an existing one727BlockDiffCandidate &BDC =728getOrCreateBlockDiffCandidate(AC->LBB, AC->RBB);729auto InsertionResult = BDC.EquivalenceAssumptions.insert({L, R});730if (!InsertionResult.second && InsertionResult.first->second != R) {731// We already have a conflicting equivalence assumption for L, so at732// least one must be wrong, and we know that there is a diff.733BDC.KnownToDiffer = true;734BDC.EquivalenceAssumptions.clear();735return false;736}737// Optimistically assume equivalence, and check later once all BBs738// have been processed.739return true;740}741742// Assumptions disabled, so pessimistically assume non-equivalence.743return false;744}745746if (isa<Argument>(L))747return Values[L] == R;748749if (isa<BasicBlock>(L))750return Blocks[cast<BasicBlock>(L)] != R;751752// Pretend everything else is identical.753return true;754}755756// Avoid a gcc warning about accessing 'this' in an initializer.757FunctionDifferenceEngine *this_() { return this; }758759public:760FunctionDifferenceEngine(DifferenceEngine &Engine,761const Value *SavedLHS = nullptr,762const Value *SavedRHS = nullptr)763: Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS),764Queue(QueueSorter(*this_())) {}765766void diff(const Function *L, const Function *R) {767assert(Values.empty() && "Multiple diffs per engine are not supported!");768769if (L->arg_size() != R->arg_size())770Engine.log("different argument counts");771772// Map the arguments.773for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(),774RI = R->arg_begin(), RE = R->arg_end();775LI != LE && RI != RE; ++LI, ++RI)776Values[&*LI] = &*RI;777778tryUnify(&*L->begin(), &*R->begin());779processQueue();780checkAndReportDiffCandidates();781}782};783784struct DiffEntry {785DiffEntry() = default;786787unsigned Cost = 0;788llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange789};790791bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,792const Instruction *R) {793return !diff(L, R, false, false, false);794}795796void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,797BasicBlock::const_iterator RStart) {798BasicBlock::const_iterator LE = LStart->getParent()->end();799BasicBlock::const_iterator RE = RStart->getParent()->end();800801unsigned NL = std::distance(LStart, LE);802803SmallVector<DiffEntry, 20> Paths1(NL+1);804SmallVector<DiffEntry, 20> Paths2(NL+1);805806DiffEntry *Cur = Paths1.data();807DiffEntry *Next = Paths2.data();808809const unsigned LeftCost = 2;810const unsigned RightCost = 2;811const unsigned MatchCost = 0;812813assert(TentativeValues.empty());814815// Initialize the first column.816for (unsigned I = 0; I != NL+1; ++I) {817Cur[I].Cost = I * LeftCost;818for (unsigned J = 0; J != I; ++J)819Cur[I].Path.push_back(DC_left);820}821822for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) {823// Initialize the first row.824Next[0] = Cur[0];825Next[0].Cost += RightCost;826Next[0].Path.push_back(DC_right);827828unsigned Index = 1;829for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) {830if (matchForBlockDiff(&*LI, &*RI)) {831Next[Index] = Cur[Index-1];832Next[Index].Cost += MatchCost;833Next[Index].Path.push_back(DC_match);834TentativeValues.insert(std::make_pair(&*LI, &*RI));835} else if (Next[Index-1].Cost <= Cur[Index].Cost) {836Next[Index] = Next[Index-1];837Next[Index].Cost += LeftCost;838Next[Index].Path.push_back(DC_left);839} else {840Next[Index] = Cur[Index];841Next[Index].Cost += RightCost;842Next[Index].Path.push_back(DC_right);843}844}845846std::swap(Cur, Next);847}848849// We don't need the tentative values anymore; everything from here850// on out should be non-tentative.851TentativeValues.clear();852853SmallVectorImpl<char> &Path = Cur[NL].Path;854BasicBlock::const_iterator LI = LStart, RI = RStart;855856DiffLogBuilder Diff(Engine.getConsumer());857858// Drop trailing matches.859while (Path.size() && Path.back() == DC_match)860Path.pop_back();861862// Skip leading matches.863SmallVectorImpl<char>::iterator864PI = Path.begin(), PE = Path.end();865while (PI != PE && *PI == DC_match) {866unify(&*LI, &*RI);867++PI;868++LI;869++RI;870}871872for (; PI != PE; ++PI) {873switch (static_cast<DiffChange>(*PI)) {874case DC_match:875assert(LI != LE && RI != RE);876{877const Instruction *L = &*LI, *R = &*RI;878unify(L, R);879Diff.addMatch(L, R);880}881++LI; ++RI;882break;883884case DC_left:885assert(LI != LE);886Diff.addLeft(&*LI);887++LI;888break;889890case DC_right:891assert(RI != RE);892Diff.addRight(&*RI);893++RI;894break;895}896}897898// Finishing unifying and complaining about the tails of the block,899// which should be matches all the way through.900while (LI != LE) {901assert(RI != RE);902unify(&*LI, &*RI);903++LI;904++RI;905}906907// If the terminators have different kinds, but one is an invoke and the908// other is an unconditional branch immediately following a call, unify909// the results and the destinations.910const Instruction *LTerm = LStart->getParent()->getTerminator();911const Instruction *RTerm = RStart->getParent()->getTerminator();912if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {913if (cast<BranchInst>(LTerm)->isConditional()) return;914BasicBlock::const_iterator I = LTerm->getIterator();915if (I == LStart->getParent()->begin()) return;916--I;917if (!isa<CallInst>(*I)) return;918const CallInst *LCall = cast<CallInst>(&*I);919const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);920if (!equivalentAsOperands(LCall->getCalledOperand(),921RInvoke->getCalledOperand(), nullptr))922return;923if (!LCall->use_empty())924Values[LCall] = RInvoke;925tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());926} else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {927if (cast<BranchInst>(RTerm)->isConditional()) return;928BasicBlock::const_iterator I = RTerm->getIterator();929if (I == RStart->getParent()->begin()) return;930--I;931if (!isa<CallInst>(*I)) return;932const CallInst *RCall = cast<CallInst>(I);933const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);934if (!equivalentAsOperands(LInvoke->getCalledOperand(),935RCall->getCalledOperand(), nullptr))936return;937if (!LInvoke->use_empty())938Values[LInvoke] = RCall;939tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));940}941}942}943944void DifferenceEngine::Oracle::anchor() { }945946void DifferenceEngine::diff(const Function *L, const Function *R) {947Context C(*this, L, R);948949// FIXME: types950// FIXME: attributes and CC951// FIXME: parameter attributes952953// If both are declarations, we're done.954if (L->empty() && R->empty())955return;956else if (L->empty())957log("left function is declaration, right function is definition");958else if (R->empty())959log("right function is declaration, left function is definition");960else961FunctionDifferenceEngine(*this).diff(L, R);962}963964void DifferenceEngine::diff(const Module *L, const Module *R) {965StringSet<> LNames;966SmallVector<std::pair<const Function *, const Function *>, 20> Queue;967968unsigned LeftAnonCount = 0;969unsigned RightAnonCount = 0;970971for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) {972const Function *LFn = &*I;973StringRef Name = LFn->getName();974if (Name.empty()) {975++LeftAnonCount;976continue;977}978979LNames.insert(Name);980981if (Function *RFn = R->getFunction(LFn->getName()))982Queue.push_back(std::make_pair(LFn, RFn));983else984logf("function %l exists only in left module") << LFn;985}986987for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) {988const Function *RFn = &*I;989StringRef Name = RFn->getName();990if (Name.empty()) {991++RightAnonCount;992continue;993}994995if (!LNames.count(Name))996logf("function %r exists only in right module") << RFn;997}998999if (LeftAnonCount != 0 || RightAnonCount != 0) {1000SmallString<32> Tmp;1001logf(("not comparing " + Twine(LeftAnonCount) +1002" anonymous functions in the left module and " +1003Twine(RightAnonCount) + " in the right module")1004.toStringRef(Tmp));1005}10061007for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator1008I = Queue.begin(),1009E = Queue.end();1010I != E; ++I)1011diff(I->first, I->second);1012}10131014bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L,1015const GlobalValue *R) {1016if (globalValueOracle) return (*globalValueOracle)(L, R);10171018if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) {1019const GlobalVariable *GVL = cast<GlobalVariable>(L);1020const GlobalVariable *GVR = cast<GlobalVariable>(R);1021if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&1022GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())1023return FunctionDifferenceEngine(*this, GVL, GVR)1024.equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer(),1025nullptr);1026}10271028return L->getName() == R->getName();1029}103010311032