Path: blob/main/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
35233 views
//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// \file9// Implementation file for the IRSimilarityIdentifier for identifying10// similarities in IR including the IRInstructionMapper.11//12//===----------------------------------------------------------------------===//1314#include "llvm/Analysis/IRSimilarityIdentifier.h"15#include "llvm/ADT/DenseMap.h"16#include "llvm/ADT/SetOperations.h"17#include "llvm/IR/Intrinsics.h"18#include "llvm/IR/Operator.h"19#include "llvm/IR/User.h"20#include "llvm/InitializePasses.h"21#include "llvm/Support/SuffixTree.h"2223using namespace llvm;24using namespace IRSimilarity;2526namespace llvm {27cl::opt<bool>28DisableBranches("no-ir-sim-branch-matching", cl::init(false),29cl::ReallyHidden,30cl::desc("disable similarity matching, and outlining, "31"across branches for debugging purposes."));3233cl::opt<bool>34DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),35cl::ReallyHidden,36cl::desc("disable outlining indirect calls."));3738cl::opt<bool>39MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,40cl::desc("only allow matching call instructions if the "41"name and type signature match."));4243cl::opt<bool>44DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,45cl::desc("Don't match or outline intrinsics"));46} // namespace llvm4748IRInstructionData::IRInstructionData(Instruction &I, bool Legality,49IRInstructionDataList &IDList)50: Inst(&I), Legal(Legality), IDL(&IDList) {51initializeInstruction();52}5354void IRInstructionData::initializeInstruction() {55// We check for whether we have a comparison instruction. If it is, we56// find the "less than" version of the predicate for consistency for57// comparison instructions throught the program.58if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {59CmpInst::Predicate Predicate = predicateForConsistency(C);60if (Predicate != C->getPredicate())61RevisedPredicate = Predicate;62}6364// Here we collect the operands and their types for determining whether65// the structure of the operand use matches between two different candidates.66for (Use &OI : Inst->operands()) {67if (isa<CmpInst>(Inst) && RevisedPredicate) {68// If we have a CmpInst where the predicate is reversed, it means the69// operands must be reversed as well.70OperVals.insert(OperVals.begin(), OI.get());71continue;72}7374OperVals.push_back(OI.get());75}7677// We capture the incoming BasicBlocks as values as well as the incoming78// Values in order to check for structural similarity.79if (PHINode *PN = dyn_cast<PHINode>(Inst))80for (BasicBlock *BB : PN->blocks())81OperVals.push_back(BB);82}8384IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)85: IDL(&IDList) {}8687void IRInstructionData::setBranchSuccessors(88DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {89assert(isa<BranchInst>(Inst) && "Instruction must be branch");9091BranchInst *BI = cast<BranchInst>(Inst);92DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;9394BBNumIt = BasicBlockToInteger.find(BI->getParent());95assert(BBNumIt != BasicBlockToInteger.end() &&96"Could not find location for BasicBlock!");9798int CurrentBlockNumber = static_cast<int>(BBNumIt->second);99100for (Value *V : getBlockOperVals()) {101BasicBlock *Successor = cast<BasicBlock>(V);102BBNumIt = BasicBlockToInteger.find(Successor);103assert(BBNumIt != BasicBlockToInteger.end() &&104"Could not find number for BasicBlock!");105int OtherBlockNumber = static_cast<int>(BBNumIt->second);106107int Relative = OtherBlockNumber - CurrentBlockNumber;108RelativeBlockLocations.push_back(Relative);109}110}111112ArrayRef<Value *> IRInstructionData::getBlockOperVals() {113assert((isa<BranchInst>(Inst) ||114isa<PHINode>(Inst)) && "Instruction must be branch or PHINode");115116if (BranchInst *BI = dyn_cast<BranchInst>(Inst))117return ArrayRef<Value *>(118std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),119OperVals.end()120);121122if (PHINode *PN = dyn_cast<PHINode>(Inst))123return ArrayRef<Value *>(124std::next(OperVals.begin(), PN->getNumIncomingValues()),125OperVals.end()126);127128return ArrayRef<Value *>();129}130131void IRInstructionData::setCalleeName(bool MatchByName) {132CallInst *CI = dyn_cast<CallInst>(Inst);133assert(CI && "Instruction must be call");134135CalleeName = "";136if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {137// To hash intrinsics, we use the opcode, and types like the other138// instructions, but also, the Intrinsic ID, and the Name of the139// intrinsic.140Intrinsic::ID IntrinsicID = II->getIntrinsicID();141FunctionType *FT = II->getFunctionType();142// If there is an overloaded name, we have to use the complex version143// of getName to get the entire string.144if (Intrinsic::isOverloaded(IntrinsicID))145CalleeName =146Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);147// If there is not an overloaded name, we only need to use this version.148else149CalleeName = Intrinsic::getName(IntrinsicID).str();150151return;152}153154if (!CI->isIndirectCall() && MatchByName)155CalleeName = CI->getCalledFunction()->getName().str();156}157158void IRInstructionData::setPHIPredecessors(159DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {160assert(isa<PHINode>(Inst) && "Instruction must be phi node");161162PHINode *PN = cast<PHINode>(Inst);163DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;164165BBNumIt = BasicBlockToInteger.find(PN->getParent());166assert(BBNumIt != BasicBlockToInteger.end() &&167"Could not find location for BasicBlock!");168169int CurrentBlockNumber = static_cast<int>(BBNumIt->second);170171// Convert the incoming blocks of the PHINode to an integer value, based on172// the relative distances between the current block and the incoming block.173for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {174BasicBlock *Incoming = PN->getIncomingBlock(Idx);175BBNumIt = BasicBlockToInteger.find(Incoming);176assert(BBNumIt != BasicBlockToInteger.end() &&177"Could not find number for BasicBlock!");178int OtherBlockNumber = static_cast<int>(BBNumIt->second);179180int Relative = OtherBlockNumber - CurrentBlockNumber;181RelativeBlockLocations.push_back(Relative);182}183}184185CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {186switch (CI->getPredicate()) {187case CmpInst::FCMP_OGT:188case CmpInst::FCMP_UGT:189case CmpInst::FCMP_OGE:190case CmpInst::FCMP_UGE:191case CmpInst::ICMP_SGT:192case CmpInst::ICMP_UGT:193case CmpInst::ICMP_SGE:194case CmpInst::ICMP_UGE:195return CI->getSwappedPredicate();196default:197return CI->getPredicate();198}199}200201CmpInst::Predicate IRInstructionData::getPredicate() const {202assert(isa<CmpInst>(Inst) &&203"Can only get a predicate from a compare instruction");204205if (RevisedPredicate)206return *RevisedPredicate;207208return cast<CmpInst>(Inst)->getPredicate();209}210211StringRef IRInstructionData::getCalleeName() const {212assert(isa<CallInst>(Inst) &&213"Can only get a name from a call instruction");214215assert(CalleeName && "CalleeName has not been set");216217return *CalleeName;218}219220bool IRSimilarity::isClose(const IRInstructionData &A,221const IRInstructionData &B) {222223if (!A.Legal || !B.Legal)224return false;225226// Check if we are performing the same sort of operation on the same types227// but not on the same values.228if (!A.Inst->isSameOperationAs(B.Inst)) {229// If there is a predicate, this means that either there is a swapped230// predicate, or that the types are different, we want to make sure that231// the predicates are equivalent via swapping.232if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {233234if (A.getPredicate() != B.getPredicate())235return false;236237// If the predicates are the same via swap, make sure that the types are238// still the same.239auto ZippedTypes = zip(A.OperVals, B.OperVals);240241return all_of(242ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {243return std::get<0>(R)->getType() == std::get<1>(R)->getType();244});245}246247return false;248}249250// Since any GEP Instruction operands after the first operand cannot be251// defined by a register, we must make sure that the operands after the first252// are the same in the two instructions253if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {254auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);255256// If the instructions do not have the same inbounds restrictions, we do257// not consider them the same.258if (GEP->isInBounds() != OtherGEP->isInBounds())259return false;260261auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());262263// We increment here since we do not care about the first instruction,264// we only care about the following operands since they must be the265// exact same to be considered similar.266return all_of(drop_begin(ZippedOperands),267[](std::tuple<llvm::Use &, llvm::Use &> R) {268return std::get<0>(R) == std::get<1>(R);269});270}271272// If the instructions are functions calls, we make sure that the function273// name is the same. We already know that the types are since is274// isSameOperationAs is true.275if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {276if (A.getCalleeName() != B.getCalleeName())277return false;278}279280if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&281A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())282return false;283284return true;285}286287// TODO: This is the same as the MachineOutliner, and should be consolidated288// into the same interface.289void IRInstructionMapper::convertToUnsignedVec(290BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,291std::vector<unsigned> &IntegerMapping) {292BasicBlock::iterator It = BB.begin();293294std::vector<unsigned> IntegerMappingForBB;295std::vector<IRInstructionData *> InstrListForBB;296297for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {298switch (InstClassifier.visit(*It)) {299case InstrType::Legal:300mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);301break;302case InstrType::Illegal:303mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);304break;305case InstrType::Invisible:306AddedIllegalLastTime = false;307break;308}309}310311if (AddedIllegalLastTime)312mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);313for (IRInstructionData *ID : InstrListForBB)314this->IDL->push_back(*ID);315llvm::append_range(InstrList, InstrListForBB);316llvm::append_range(IntegerMapping, IntegerMappingForBB);317}318319// TODO: This is the same as the MachineOutliner, and should be consolidated320// into the same interface.321unsigned IRInstructionMapper::mapToLegalUnsigned(322BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,323std::vector<IRInstructionData *> &InstrListForBB) {324// We added something legal, so we should unset the AddedLegalLastTime325// flag.326AddedIllegalLastTime = false;327328// If we have at least two adjacent legal instructions (which may have329// invisible instructions in between), remember that.330if (CanCombineWithPrevInstr)331HaveLegalRange = true;332CanCombineWithPrevInstr = true;333334// Get the integer for this instruction or give it the current335// LegalInstrNumber.336IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);337InstrListForBB.push_back(ID);338339if (isa<BranchInst>(*It))340ID->setBranchSuccessors(BasicBlockToInteger);341342if (isa<CallInst>(*It))343ID->setCalleeName(EnableMatchCallsByName);344345if (isa<PHINode>(*It))346ID->setPHIPredecessors(BasicBlockToInteger);347348// Add to the instruction list349bool WasInserted;350DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator351ResultIt;352std::tie(ResultIt, WasInserted) =353InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));354unsigned INumber = ResultIt->second;355356// There was an insertion.357if (WasInserted)358LegalInstrNumber++;359360IntegerMappingForBB.push_back(INumber);361362// Make sure we don't overflow or use any integers reserved by the DenseMap.363assert(LegalInstrNumber < IllegalInstrNumber &&364"Instruction mapping overflow!");365366assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&367"Tried to assign DenseMap tombstone or empty key to instruction.");368assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&369"Tried to assign DenseMap tombstone or empty key to instruction.");370371return INumber;372}373374IRInstructionData *375IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,376IRInstructionDataList &IDL) {377return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);378}379380IRInstructionData *381IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {382return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);383}384385IRInstructionDataList *386IRInstructionMapper::allocateIRInstructionDataList() {387return new (IDLAllocator->Allocate()) IRInstructionDataList();388}389390// TODO: This is the same as the MachineOutliner, and should be consolidated391// into the same interface.392unsigned IRInstructionMapper::mapToIllegalUnsigned(393BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,394std::vector<IRInstructionData *> &InstrListForBB, bool End) {395// Can't combine an illegal instruction. Set the flag.396CanCombineWithPrevInstr = false;397398// Only add one illegal number per range of legal numbers.399if (AddedIllegalLastTime)400return IllegalInstrNumber;401402IRInstructionData *ID = nullptr;403if (!End)404ID = allocateIRInstructionData(*It, false, *IDL);405else406ID = allocateIRInstructionData(*IDL);407InstrListForBB.push_back(ID);408409// Remember that we added an illegal number last time.410AddedIllegalLastTime = true;411unsigned INumber = IllegalInstrNumber;412IntegerMappingForBB.push_back(IllegalInstrNumber--);413414assert(LegalInstrNumber < IllegalInstrNumber &&415"Instruction mapping overflow!");416417assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&418"IllegalInstrNumber cannot be DenseMap tombstone or empty key!");419420assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&421"IllegalInstrNumber cannot be DenseMap tombstone or empty key!");422423return INumber;424}425426IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,427IRInstructionData *FirstInstIt,428IRInstructionData *LastInstIt)429: StartIdx(StartIdx), Len(Len) {430431assert(FirstInstIt != nullptr && "Instruction is nullptr!");432assert(LastInstIt != nullptr && "Instruction is nullptr!");433assert(StartIdx + Len > StartIdx &&434"Overflow for IRSimilarityCandidate range?");435assert(Len - 1 == static_cast<unsigned>(std::distance(436iterator(FirstInstIt), iterator(LastInstIt))) &&437"Length of the first and last IRInstructionData do not match the "438"given length");439440// We iterate over the given instructions, and map each unique value441// to a unique number in the IRSimilarityCandidate ValueToNumber and442// NumberToValue maps. A constant get its own value globally, the individual443// uses of the constants are not considered to be unique.444//445// IR: Mapping Added:446// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2447// %add2 = add i32 %a, %1 %add2 -> 4448// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5449//450// when replace with global values, starting from 1, would be451//452// 3 = add i32 1, 2453// 4 = add i32 1, 3454// 6 = add i32 5, 2455unsigned LocalValNumber = 1;456IRInstructionDataList::iterator ID = iterator(*FirstInstIt);457for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {458// Map the operand values to an unsigned integer if it does not already459// have an unsigned integer assigned to it.460for (Value *Arg : ID->OperVals)461if (!ValueToNumber.contains(Arg)) {462ValueToNumber.try_emplace(Arg, LocalValNumber);463NumberToValue.try_emplace(LocalValNumber, Arg);464LocalValNumber++;465}466467// Mapping the instructions to an unsigned integer if it is not already468// exist in the mapping.469if (!ValueToNumber.contains(ID->Inst)) {470ValueToNumber.try_emplace(ID->Inst, LocalValNumber);471NumberToValue.try_emplace(LocalValNumber, ID->Inst);472LocalValNumber++;473}474}475476// Setting the first and last instruction data pointers for the candidate. If477// we got through the entire for loop without hitting an assert, we know478// that both of these instructions are not nullptrs.479FirstInst = FirstInstIt;480LastInst = LastInstIt;481482// Add the basic blocks contained in the set into the global value numbering.483DenseSet<BasicBlock *> BBSet;484getBasicBlocks(BBSet);485for (BasicBlock *BB : BBSet) {486if (ValueToNumber.contains(BB))487continue;488489ValueToNumber.try_emplace(BB, LocalValNumber);490NumberToValue.try_emplace(LocalValNumber, BB);491LocalValNumber++;492}493}494495bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,496const IRSimilarityCandidate &B) {497if (A.getLength() != B.getLength())498return false;499500auto InstrDataForBoth =501zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));502503return all_of(InstrDataForBoth,504[](std::tuple<IRInstructionData &, IRInstructionData &> R) {505IRInstructionData &A = std::get<0>(R);506IRInstructionData &B = std::get<1>(R);507if (!A.Legal || !B.Legal)508return false;509return isClose(A, B);510});511}512513/// Determine if one or more of the assigned global value numbers for the514/// operands in \p TargetValueNumbers is in the current mapping set for operand515/// numbers in \p SourceOperands. The set of possible corresponding global516/// value numbers are replaced with the most recent version of compatible517/// values.518///519/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global520/// value number for the source IRInstructionCandidate.521/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source522/// IRSimilarityCandidate global value numbers to a set of possible numbers in523/// the target.524/// \param [in] SourceOperands - The operands in the original525/// IRSimilarityCandidate in the current instruction.526/// \param [in] TargetValueNumbers - The global value numbers of the operands in527/// the corresponding Instruction in the other IRSimilarityCandidate.528/// \returns true if there exists a possible mapping between the source529/// Instruction operands and the target Instruction operands, and false if not.530static bool checkNumberingAndReplaceCommutative(531const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,532DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,533ArrayRef<Value *> &SourceOperands,534DenseSet<unsigned> &TargetValueNumbers){535536DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;537538unsigned ArgVal;539bool WasInserted;540541// Iterate over the operands in the source IRSimilarityCandidate to determine542// whether there exists an operand in the other IRSimilarityCandidate that543// creates a valid mapping of Value to Value between the544// IRSimilarityCaniddates.545for (Value *V : SourceOperands) {546ArgVal = SourceValueToNumberMapping.find(V)->second;547548// Instead of finding a current mapping, we attempt to insert a set.549std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(550std::make_pair(ArgVal, TargetValueNumbers));551552// We need to iterate over the items in other IRSimilarityCandidate's553// Instruction to determine whether there is a valid mapping of554// Value to Value.555DenseSet<unsigned> NewSet;556for (unsigned &Curr : ValueMappingIt->second)557// If we can find the value in the mapping, we add it to the new set.558if (TargetValueNumbers.contains(Curr))559NewSet.insert(Curr);560561// If we could not find a Value, return 0.562if (NewSet.empty())563return false;564565// Otherwise replace the old mapping with the newly constructed one.566if (NewSet.size() != ValueMappingIt->second.size())567ValueMappingIt->second.swap(NewSet);568569// We have reached no conclusions about the mapping, and cannot remove570// any items from the other operands, so we move to check the next operand.571if (ValueMappingIt->second.size() != 1)572continue;573574unsigned ValToRemove = *ValueMappingIt->second.begin();575// When there is only one item left in the mapping for and operand, remove576// the value from the other operands. If it results in there being no577// mapping, return false, it means the mapping is wrong578for (Value *InnerV : SourceOperands) {579if (V == InnerV)580continue;581582unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;583ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);584if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())585continue;586587ValueMappingIt->second.erase(ValToRemove);588if (ValueMappingIt->second.empty())589return false;590}591}592593return true;594}595596/// Determine if operand number \p TargetArgVal is in the current mapping set597/// for operand number \p SourceArgVal.598///599/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global600/// value numbers from source IRSimilarityCandidate to target601/// IRSimilarityCandidate.602/// \param [in] SourceArgVal The global value number for an operand in the603/// in the original candidate.604/// \param [in] TargetArgVal The global value number for the corresponding605/// operand in the other candidate.606/// \returns True if there exists a mapping and false if not.607bool checkNumberingAndReplace(608DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,609unsigned SourceArgVal, unsigned TargetArgVal) {610// We are given two unsigned integers representing the global values of611// the operands in different IRSimilarityCandidates and a current mapping612// between the two.613//614// Source Operand GVN: 1615// Target Operand GVN: 2616// CurrentMapping: {1: {1, 2}}617//618// Since we have mapping, and the target operand is contained in the set, we619// update it to:620// CurrentMapping: {1: {2}}621// and can return true. But, if the mapping was622// CurrentMapping: {1: {3}}623// we would return false.624625bool WasInserted;626DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;627628std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(629std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));630631// If we created a new mapping, then we are done.632if (WasInserted)633return true;634635// If there is more than one option in the mapping set, and the target value636// is included in the mapping set replace that set with one that only includes637// the target value, as it is the only valid mapping via the non commutative638// instruction.639640DenseSet<unsigned> &TargetSet = Val->second;641if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {642TargetSet.clear();643TargetSet.insert(TargetArgVal);644return true;645}646647// Return true if we can find the value in the set.648return TargetSet.contains(TargetArgVal);649}650651bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(652OperandMapping A, OperandMapping B) {653// Iterators to keep track of where we are in the operands for each654// Instruction.655ArrayRef<Value *>::iterator VItA = A.OperVals.begin();656ArrayRef<Value *>::iterator VItB = B.OperVals.begin();657unsigned OperandLength = A.OperVals.size();658659// For each operand, get the value numbering and ensure it is consistent.660for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {661unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;662unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;663664// Attempt to add a set with only the target value. If there is no mapping665// we can create it here.666//667// For an instruction like a subtraction:668// IRSimilarityCandidateA: IRSimilarityCandidateB:669// %resultA = sub %a, %b %resultB = sub %d, %e670//671// We map %a -> %d and %b -> %e.672//673// And check to see whether their mapping is consistent in674// checkNumberingAndReplace.675676if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))677return false;678679if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))680return false;681}682return true;683}684685bool IRSimilarityCandidate::compareCommutativeOperandMapping(686OperandMapping A, OperandMapping B) {687DenseSet<unsigned> ValueNumbersA;688DenseSet<unsigned> ValueNumbersB;689690ArrayRef<Value *>::iterator VItA = A.OperVals.begin();691ArrayRef<Value *>::iterator VItB = B.OperVals.begin();692unsigned OperandLength = A.OperVals.size();693694// Find the value number sets for the operands.695for (unsigned Idx = 0; Idx < OperandLength;696Idx++, VItA++, VItB++) {697ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);698ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);699}700701// Iterate over the operands in the first IRSimilarityCandidate and make sure702// there exists a possible mapping with the operands in the second703// IRSimilarityCandidate.704if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,705A.ValueNumberMapping, A.OperVals,706ValueNumbersB))707return false;708709// Iterate over the operands in the second IRSimilarityCandidate and make sure710// there exists a possible mapping with the operands in the first711// IRSimilarityCandidate.712if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,713B.ValueNumberMapping, B.OperVals,714ValueNumbersA))715return false;716717return true;718}719720bool IRSimilarityCandidate::compareAssignmentMapping(721const unsigned InstValA, const unsigned &InstValB,722DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,723DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {724DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;725bool WasInserted;726std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(727std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));728if (!WasInserted && !ValueMappingIt->second.contains(InstValB))729return false;730else if (ValueMappingIt->second.size() != 1) {731for (unsigned OtherVal : ValueMappingIt->second) {732if (OtherVal == InstValB)733continue;734if (!ValueNumberMappingA.contains(OtherVal))735continue;736if (!ValueNumberMappingA[OtherVal].contains(InstValA))737continue;738ValueNumberMappingA[OtherVal].erase(InstValA);739}740ValueNumberMappingA.erase(ValueMappingIt);741std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(742std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));743}744745return true;746}747748bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,749RelativeLocMapping B) {750// Get the basic blocks the label refers to.751BasicBlock *ABB = cast<BasicBlock>(A.OperVal);752BasicBlock *BBB = cast<BasicBlock>(B.OperVal);753754// Get the basic blocks contained in each region.755DenseSet<BasicBlock *> BasicBlockA;756DenseSet<BasicBlock *> BasicBlockB;757A.IRSC.getBasicBlocks(BasicBlockA);758B.IRSC.getBasicBlocks(BasicBlockB);759760// Determine if the block is contained in the region.761bool AContained = BasicBlockA.contains(ABB);762bool BContained = BasicBlockB.contains(BBB);763764// Both blocks need to be contained in the region, or both need to be outside765// the region.766if (AContained != BContained)767return false;768769// If both are contained, then we need to make sure that the relative770// distance to the target blocks are the same.771if (AContained)772return A.RelativeLocation == B.RelativeLocation;773return true;774}775776bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,777const IRSimilarityCandidate &B) {778DenseMap<unsigned, DenseSet<unsigned>> MappingA;779DenseMap<unsigned, DenseSet<unsigned>> MappingB;780return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);781}782783typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,784SmallVector<int, 4> &, ArrayRef<Value *> &,785ArrayRef<Value *> &>786ZippedRelativeLocationsT;787788bool IRSimilarityCandidate::compareStructure(789const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,790DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,791DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {792if (A.getLength() != B.getLength())793return false;794795if (A.ValueToNumber.size() != B.ValueToNumber.size())796return false;797798iterator ItA = A.begin();799iterator ItB = B.begin();800801// These ValueNumber Mapping sets create a create a mapping between the values802// in one candidate to values in the other candidate. If we create a set with803// one element, and that same element maps to the original element in the804// candidate we have a good mapping.805806// Iterate over the instructions contained in each candidate807unsigned SectionLength = A.getStartIdx() + A.getLength();808for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;809ItA++, ItB++, Loc++) {810// Make sure the instructions are similar to one another.811if (!isClose(*ItA, *ItB))812return false;813814Instruction *IA = ItA->Inst;815Instruction *IB = ItB->Inst;816817if (!ItA->Legal || !ItB->Legal)818return false;819820// Get the operand sets for the instructions.821ArrayRef<Value *> OperValsA = ItA->OperVals;822ArrayRef<Value *> OperValsB = ItB->OperVals;823824unsigned InstValA = A.ValueToNumber.find(IA)->second;825unsigned InstValB = B.ValueToNumber.find(IB)->second;826827// Ensure that the mappings for the instructions exists.828if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,829ValueNumberMappingB))830return false;831832if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,833ValueNumberMappingA))834return false;835836// We have different paths for commutative instructions and non-commutative837// instructions since commutative instructions could allow multiple mappings838// to certain values.839if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&840!isa<IntrinsicInst>(IA)) {841if (!compareCommutativeOperandMapping(842{A, OperValsA, ValueNumberMappingA},843{B, OperValsB, ValueNumberMappingB}))844return false;845continue;846}847848// Handle the non-commutative cases.849if (!compareNonCommutativeOperandMapping(850{A, OperValsA, ValueNumberMappingA},851{B, OperValsB, ValueNumberMappingB}))852return false;853854// Here we check that between two corresponding instructions,855// when referring to a basic block in the same region, the856// relative locations are the same. And, that the instructions refer to857// basic blocks outside the region in the same corresponding locations.858859// We are able to make the assumption about blocks outside of the region860// since the target block labels are considered values and will follow the861// same number matching that we defined for the other instructions in the862// region. So, at this point, in each location we target a specific block863// outside the region, we are targeting a corresponding block in each864// analagous location in the region we are comparing to.865if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&866!(isa<PHINode>(IA) && isa<PHINode>(IB)))867continue;868869SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;870SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;871ArrayRef<Value *> ABL = ItA->getBlockOperVals();872ArrayRef<Value *> BBL = ItB->getBlockOperVals();873874// Check to make sure that the number of operands, and branching locations875// between BranchInsts is the same.876if (RelBlockLocsA.size() != RelBlockLocsB.size() &&877ABL.size() != BBL.size())878return false;879880assert(RelBlockLocsA.size() == ABL.size() &&881"Block information vectors not the same size.");882assert(RelBlockLocsB.size() == BBL.size() &&883"Block information vectors not the same size.");884885ZippedRelativeLocationsT ZippedRelativeLocations =886zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);887if (any_of(ZippedRelativeLocations,888[&A, &B](std::tuple<int, int, Value *, Value *> R) {889return !checkRelativeLocations(890{A, std::get<0>(R), std::get<2>(R)},891{B, std::get<1>(R), std::get<3>(R)});892}))893return false;894}895return true;896}897898bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,899const IRSimilarityCandidate &B) {900auto DoesOverlap = [](const IRSimilarityCandidate &X,901const IRSimilarityCandidate &Y) {902// Check:903// XXXXXX X starts before Y ends904// YYYYYYY Y starts after X starts905return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;906};907908return DoesOverlap(A, B) || DoesOverlap(B, A);909}910911void IRSimilarityIdentifier::populateMapper(912Module &M, std::vector<IRInstructionData *> &InstrList,913std::vector<unsigned> &IntegerMapping) {914915std::vector<IRInstructionData *> InstrListForModule;916std::vector<unsigned> IntegerMappingForModule;917// Iterate over the functions in the module to map each Instruction in each918// BasicBlock to an unsigned integer.919Mapper.initializeForBBs(M);920921for (Function &F : M) {922923if (F.empty())924continue;925926for (BasicBlock &BB : F) {927928// BB has potential to have similarity since it has a size greater than 2929// and can therefore match other regions greater than 2. Map it to a list930// of unsigned integers.931Mapper.convertToUnsignedVec(BB, InstrListForModule,932IntegerMappingForModule);933}934935BasicBlock::iterator It = F.begin()->end();936Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,937true);938if (InstrListForModule.size() > 0)939Mapper.IDL->push_back(*InstrListForModule.back());940}941942// Insert the InstrListForModule at the end of the overall InstrList so that943// we can have a long InstrList for the entire set of Modules being analyzed.944llvm::append_range(InstrList, InstrListForModule);945// Do the same as above, but for IntegerMapping.946llvm::append_range(IntegerMapping, IntegerMappingForModule);947}948949void IRSimilarityIdentifier::populateMapper(950ArrayRef<std::unique_ptr<Module>> &Modules,951std::vector<IRInstructionData *> &InstrList,952std::vector<unsigned> &IntegerMapping) {953954// Iterate over, and map the instructions in each module.955for (const std::unique_ptr<Module> &M : Modules)956populateMapper(*M, InstrList, IntegerMapping);957}958959/// From a repeated subsequence, find all the different instances of the960/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from961/// the IRInstructionData in subsequence.962///963/// \param [in] Mapper - The instruction mapper for basic correctness checks.964/// \param [in] InstrList - The vector that holds the instruction data.965/// \param [in] IntegerMapping - The vector that holds the mapped integers.966/// \param [out] CandsForRepSubstring - The vector to store the generated967/// IRSimilarityCandidates.968static void createCandidatesFromSuffixTree(969const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,970std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,971std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {972973unsigned StringLen = RS.Length;974if (StringLen < 2)975return;976977// Create an IRSimilarityCandidate for instance of this subsequence \p RS.978for (const unsigned &StartIdx : RS.StartIndices) {979unsigned EndIdx = StartIdx + StringLen - 1;980981// Check that this subsequence does not contain an illegal instruction.982bool ContainsIllegal = false;983for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {984unsigned Key = IntegerMapping[CurrIdx];985if (Key > Mapper.IllegalInstrNumber) {986ContainsIllegal = true;987break;988}989}990991// If we have an illegal instruction, we should not create an992// IRSimilarityCandidate for this region.993if (ContainsIllegal)994continue;995996// We are getting iterators to the instructions in this region of code997// by advancing the start and end indices from the start of the998// InstrList.999std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();1000std::advance(StartIt, StartIdx);1001std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();1002std::advance(EndIt, EndIdx);10031004CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);1005}1006}10071008void IRSimilarityCandidate::createCanonicalRelationFrom(1009IRSimilarityCandidate &SourceCand,1010DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,1011DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {1012assert(SourceCand.CanonNumToNumber.size() != 0 &&1013"Base canonical relationship is empty!");1014assert(SourceCand.NumberToCanonNum.size() != 0 &&1015"Base canonical relationship is empty!");10161017assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");1018assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");10191020DenseSet<unsigned> UsedGVNs;1021// Iterate over the mappings provided from this candidate to SourceCand. We1022// are then able to map the GVN in this candidate to the same canonical number1023// given to the corresponding GVN in SourceCand.1024for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {1025unsigned SourceGVN = GVNMapping.first;10261027assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");10281029unsigned ResultGVN;1030// We need special handling if we have more than one potential value. This1031// means that there are at least two GVNs that could correspond to this GVN.1032// This could lead to potential swapping later on, so we make a decision1033// here to ensure a one-to-one mapping.1034if (GVNMapping.second.size() > 1) {1035bool Found = false;1036for (unsigned Val : GVNMapping.second) {1037// We make sure the target value number hasn't already been reserved.1038if (UsedGVNs.contains(Val))1039continue;10401041// We make sure that the opposite mapping is still consistent.1042DenseMap<unsigned, DenseSet<unsigned>>::iterator It =1043FromSourceMapping.find(Val);10441045if (!It->second.contains(SourceGVN))1046continue;10471048// We pick the first item that satisfies these conditions.1049Found = true;1050ResultGVN = Val;1051break;1052}10531054assert(Found && "Could not find matching value for source GVN");1055(void)Found;10561057} else1058ResultGVN = *GVNMapping.second.begin();10591060// Whatever GVN is found, we mark it as used.1061UsedGVNs.insert(ResultGVN);10621063unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);1064CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));1065NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));1066}10671068DenseSet<BasicBlock *> BBSet;1069getBasicBlocks(BBSet);1070// Find canonical numbers for the BasicBlocks in the current candidate.1071// This is done by finding the corresponding value for the first instruction1072// in the block in the current candidate, finding the matching value in the1073// source candidate. Then by finding the parent of this value, use the1074// canonical number of the block in the source candidate for the canonical1075// number in the current candidate.1076for (BasicBlock *BB : BBSet) {1077unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;10781079// We can skip the BasicBlock if the canonical numbering has already been1080// found in a separate instruction.1081if (NumberToCanonNum.contains(BBGVNForCurrCand))1082continue;10831084// If the basic block is the starting block, then the shared instruction may1085// not be the first instruction in the block, it will be the first1086// instruction in the similarity region.1087Value *FirstOutlineInst = BB == getStartBB()1088? frontInstruction()1089: &*BB->instructionsWithoutDebug().begin();10901091unsigned FirstInstGVN = *getGVN(FirstOutlineInst);1092unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);1093unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);1094Value *SourceV = *SourceCand.fromGVN(SourceGVN);1095BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();1096unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);1097unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);1098CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));1099NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));1100}1101}11021103void IRSimilarityCandidate::createCanonicalRelationFrom(1104IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,1105IRSimilarityCandidate &TargetCandLarge) {1106assert(!SourceCand.CanonNumToNumber.empty() &&1107"Canonical Relationship is non-empty");1108assert(!SourceCand.NumberToCanonNum.empty() &&1109"Canonical Relationship is non-empty");11101111assert(!SourceCandLarge.CanonNumToNumber.empty() &&1112"Canonical Relationship is non-empty");1113assert(!SourceCandLarge.NumberToCanonNum.empty() &&1114"Canonical Relationship is non-empty");11151116assert(!TargetCandLarge.CanonNumToNumber.empty() &&1117"Canonical Relationship is non-empty");1118assert(!TargetCandLarge.NumberToCanonNum.empty() &&1119"Canonical Relationship is non-empty");11201121assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");1122assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");11231124// We're going to use the larger candidates as a "bridge" to create the1125// canonical number for the target candidate since we have idetified two1126// candidates as subsequences of larger sequences, and therefore must be1127// structurally similar.1128for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {1129Value *CurrVal = ValueNumPair.first;1130unsigned TargetCandGVN = ValueNumPair.second;11311132// Find the numbering in the large candidate that surrounds the1133// current candidate.1134std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);1135assert(OLargeTargetGVN.has_value() && "GVN not found for Value");11361137// Get the canonical numbering in the large target candidate.1138std::optional<unsigned> OTargetCandCanon =1139TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());1140assert(OTargetCandCanon.has_value() &&1141"Canononical Number not found for GVN");11421143// Get the GVN in the large source candidate from the canonical numbering.1144std::optional<unsigned> OLargeSourceGVN =1145SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());1146assert(OLargeSourceGVN.has_value() &&1147"GVN Number not found for Canonical Number");11481149// Get the Value from the GVN in the large source candidate.1150std::optional<Value *> OLargeSourceV =1151SourceCandLarge.fromGVN(OLargeSourceGVN.value());1152assert(OLargeSourceV.has_value() && "Value not found for GVN");11531154// Get the GVN number for the Value in the source candidate.1155std::optional<unsigned> OSourceGVN =1156SourceCand.getGVN(OLargeSourceV.value());1157assert(OSourceGVN.has_value() && "GVN Number not found for Value");11581159// Get the canonical numbering from the GVN/1160std::optional<unsigned> OSourceCanon =1161SourceCand.getCanonicalNum(OSourceGVN.value());1162assert(OSourceCanon.has_value() && "Canon Number not found for GVN");11631164// Insert the canonical numbering and GVN pair into their respective1165// mappings.1166CanonNumToNumber.insert(1167std::make_pair(OSourceCanon.value(), TargetCandGVN));1168NumberToCanonNum.insert(1169std::make_pair(TargetCandGVN, OSourceCanon.value()));1170}1171}11721173void IRSimilarityCandidate::createCanonicalMappingFor(1174IRSimilarityCandidate &CurrCand) {1175assert(CurrCand.CanonNumToNumber.size() == 0 &&1176"Canonical Relationship is non-empty");1177assert(CurrCand.NumberToCanonNum.size() == 0 &&1178"Canonical Relationship is non-empty");11791180unsigned CanonNum = 0;1181// Iterate over the value numbers found, the order does not matter in this1182// case.1183for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {1184CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));1185CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));1186CanonNum++;1187}1188}11891190/// Look for larger IRSimilarityCandidates From the previously matched1191/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is1192/// an overlap, return a pair of structurally similar, larger1193/// IRSimilarityCandidates.1194///1195/// \param [in] CandA - The first candidate we are trying to determine the1196/// structure of.1197/// \param [in] CandB - The second candidate we are trying to determine the1198/// structure of.1199/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in1200/// a circuit to the IRSimilarityCandidates that include this instruction.1201/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a1202/// number representing the structural group assigned to it.1203static std::optional<1204std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>1205CheckLargerCands(1206IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB,1207DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,1208DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) {1209DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;1210DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;1211DenseSet<unsigned> IncludedGroupsA;1212DenseSet<unsigned> IncludedGroupsB;12131214// Find the overall similarity group numbers that fully contain the candidate,1215// and record the larger candidate for each group.1216auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());1217std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>1218Result;12191220unsigned CandAStart = CandA.getStartIdx();1221unsigned CandAEnd = CandA.getEndIdx();1222unsigned CandBStart = CandB.getStartIdx();1223unsigned CandBEnd = CandB.getEndIdx();1224if (IdxToCandidateIt == IndexToIncludedCand.end())1225return Result;1226for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {1227if (MatchedCand->getStartIdx() > CandAStart ||1228(MatchedCand->getEndIdx() < CandAEnd))1229continue;1230unsigned GroupNum = CandToGroup.find(MatchedCand)->second;1231IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));1232IncludedGroupsA.insert(GroupNum);1233}12341235// Find the overall similarity group numbers that fully contain the next1236// candidate, and record the larger candidate for each group.1237IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);1238if (IdxToCandidateIt == IndexToIncludedCand.end())1239return Result;1240for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {1241if (MatchedCand->getStartIdx() > CandBStart ||1242MatchedCand->getEndIdx() < CandBEnd)1243continue;1244unsigned GroupNum = CandToGroup.find(MatchedCand)->second;1245IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));1246IncludedGroupsB.insert(GroupNum);1247}12481249// Find the intersection between the two groups, these are the groups where1250// the larger candidates exist.1251set_intersect(IncludedGroupsA, IncludedGroupsB);12521253// If there is no intersection between the sets, then we cannot determine1254// whether or not there is a match.1255if (IncludedGroupsA.empty())1256return Result;12571258// Create a pair that contains the larger candidates.1259auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());1260auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());1261Result = std::make_pair(ItA->second, ItB->second);1262return Result;1263}12641265/// From the list of IRSimilarityCandidates, perform a comparison between each1266/// IRSimilarityCandidate to determine if there are overlapping1267/// IRInstructionData, or if they do not have the same structure.1268///1269/// \param [in] CandsForRepSubstring - The vector containing the1270/// IRSimilarityCandidates.1271/// \param [out] StructuralGroups - the mapping of unsigned integers to vector1272/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the1273/// vector are structurally similar to one another.1274/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in1275/// a circuit to the IRSimilarityCandidates that include this instruction.1276/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a1277/// number representing the structural group assigned to it.1278static void findCandidateStructures(1279std::vector<IRSimilarityCandidate> &CandsForRepSubstring,1280DenseMap<unsigned, SimilarityGroup> &StructuralGroups,1281DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,1282DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup1283) {1284std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,1285InnerCandEndIt;12861287// IRSimilarityCandidates each have a structure for operand use. It is1288// possible that two instances of the same subsequences have different1289// structure. Each type of structure found is assigned a number. This1290// DenseMap maps an IRSimilarityCandidate to which type of similarity1291// discovered it fits within.1292DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;12931294// Find the compatibility from each candidate to the others to determine1295// which candidates overlap and which have the same structure by mapping1296// each structure to a different group.1297bool SameStructure;1298bool Inserted;1299unsigned CurrentGroupNum = 0;1300unsigned OuterGroupNum;1301DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;1302DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;1303DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;13041305// Iterate over the candidates to determine its structural and overlapping1306// compatibility with other instructions1307DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;1308DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;1309for (CandIt = CandsForRepSubstring.begin(),1310CandEndIt = CandsForRepSubstring.end();1311CandIt != CandEndIt; CandIt++) {13121313// Determine if it has an assigned structural group already.1314CandToGroupIt = CandToGroup.find(&*CandIt);1315if (CandToGroupIt == CandToGroup.end()) {1316// If not, we assign it one, and add it to our mapping.1317std::tie(CandToGroupIt, Inserted) =1318CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));1319}13201321// Get the structural group number from the iterator.1322OuterGroupNum = CandToGroupIt->second;13231324// Check if we already have a list of IRSimilarityCandidates for the current1325// structural group. Create one if one does not exist.1326CurrentGroupPair = StructuralGroups.find(OuterGroupNum);1327if (CurrentGroupPair == StructuralGroups.end()) {1328IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);1329std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(1330std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));1331}13321333// Iterate over the IRSimilarityCandidates following the current1334// IRSimilarityCandidate in the list to determine whether the two1335// IRSimilarityCandidates are compatible. This is so we do not repeat pairs1336// of IRSimilarityCandidates.1337for (InnerCandIt = std::next(CandIt),1338InnerCandEndIt = CandsForRepSubstring.end();1339InnerCandIt != InnerCandEndIt; InnerCandIt++) {13401341// We check if the inner item has a group already, if it does, we skip it.1342CandToGroupItInner = CandToGroup.find(&*InnerCandIt);1343if (CandToGroupItInner != CandToGroup.end())1344continue;13451346// Check if we have found structural similarity between two candidates1347// that fully contains the first and second candidates.1348std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>1349LargerPair = CheckLargerCands(1350*CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);13511352// If a pair was found, it means that we can assume that these smaller1353// substrings are also structurally similar. Use the larger candidates to1354// determine the canonical mapping between the two sections.1355if (LargerPair.has_value()) {1356SameStructure = true;1357InnerCandIt->createCanonicalRelationFrom(1358*CandIt, *LargerPair.value().first, *LargerPair.value().second);1359CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));1360CurrentGroupPair->second.push_back(*InnerCandIt);1361continue;1362}13631364// Otherwise we determine if they have the same structure and add it to1365// vector if they match.1366ValueNumberMappingA.clear();1367ValueNumberMappingB.clear();1368SameStructure = IRSimilarityCandidate::compareStructure(1369*CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);1370if (!SameStructure)1371continue;13721373InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,1374ValueNumberMappingB);1375CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));1376CurrentGroupPair->second.push_back(*InnerCandIt);1377}1378}1379}13801381void IRSimilarityIdentifier::findCandidates(1382std::vector<IRInstructionData *> &InstrList,1383std::vector<unsigned> &IntegerMapping) {1384SuffixTree ST(IntegerMapping);13851386std::vector<IRSimilarityCandidate> CandsForRepSubstring;1387std::vector<SimilarityGroup> NewCandidateGroups;13881389DenseMap<unsigned, SimilarityGroup> StructuralGroups;1390DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;1391DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;13921393// Iterate over the subsequences found by the Suffix Tree to create1394// IRSimilarityCandidates for each repeated subsequence and determine which1395// instances are structurally similar to one another.13961397// Sort the suffix tree from longest substring to shortest.1398std::vector<SuffixTree::RepeatedSubstring> RSes;1399for (SuffixTree::RepeatedSubstring &RS : ST)1400RSes.push_back(RS);14011402llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,1403const SuffixTree::RepeatedSubstring &RHS) {1404return LHS.Length > RHS.Length;1405});1406for (SuffixTree::RepeatedSubstring &RS : RSes) {1407createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,1408CandsForRepSubstring);14091410if (CandsForRepSubstring.size() < 2)1411continue;14121413findCandidateStructures(CandsForRepSubstring, StructuralGroups,1414IndexToIncludedCand, CandToGroup);1415for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {1416// We only add the group if it contains more than one1417// IRSimilarityCandidate. If there is only one, that means there is no1418// other repeated subsequence with the same structure.1419if (Group.second.size() > 1) {1420SimilarityCandidates->push_back(Group.second);1421// Iterate over each candidate in the group, and add an entry for each1422// instruction included with a mapping to a set of1423// IRSimilarityCandidates that include that instruction.1424for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {1425for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();1426Idx <= Edx; ++Idx) {1427DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator1428IdIt;1429IdIt = IndexToIncludedCand.find(Idx);1430bool Inserted = false;1431if (IdIt == IndexToIncludedCand.end())1432std::tie(IdIt, Inserted) = IndexToIncludedCand.insert(1433std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>()));1434IdIt->second.insert(&IRCand);1435}1436// Add mapping of candidate to the overall similarity group number.1437CandToGroup.insert(1438std::make_pair(&IRCand, SimilarityCandidates->size() - 1));1439}1440}1441}14421443CandsForRepSubstring.clear();1444StructuralGroups.clear();1445NewCandidateGroups.clear();1446}1447}14481449SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(1450ArrayRef<std::unique_ptr<Module>> Modules) {1451resetSimilarityCandidates();14521453std::vector<IRInstructionData *> InstrList;1454std::vector<unsigned> IntegerMapping;1455Mapper.InstClassifier.EnableBranches = this->EnableBranches;1456Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;1457Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;1458Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;1459Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;14601461populateMapper(Modules, InstrList, IntegerMapping);1462findCandidates(InstrList, IntegerMapping);14631464return *SimilarityCandidates;1465}14661467SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {1468resetSimilarityCandidates();1469Mapper.InstClassifier.EnableBranches = this->EnableBranches;1470Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;1471Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;1472Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;1473Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;14741475std::vector<IRInstructionData *> InstrList;1476std::vector<unsigned> IntegerMapping;14771478populateMapper(M, InstrList, IntegerMapping);1479findCandidates(InstrList, IntegerMapping);14801481return *SimilarityCandidates;1482}14831484INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",1485"ir-similarity-identifier", false, true)14861487IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()1488: ModulePass(ID) {1489initializeIRSimilarityIdentifierWrapperPassPass(1490*PassRegistry::getPassRegistry());1491}14921493bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {1494IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,1495MatchCallsByName, !DisableIntrinsics,1496false));1497return false;1498}14991500bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {1501IRSI.reset();1502return false;1503}15041505bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {1506IRSI->findSimilarity(M);1507return false;1508}15091510AnalysisKey IRSimilarityAnalysis::Key;1511IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,1512ModuleAnalysisManager &) {1513auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,1514MatchCallsByName, !DisableIntrinsics,1515false);1516IRSI.findSimilarity(M);1517return IRSI;1518}15191520PreservedAnalyses1521IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {1522IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);1523std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =1524IRSI.getSimilarity();15251526for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {1527OS << CandVec.size() << " candidates of length "1528<< CandVec.begin()->getLength() << ". Found in: \n";1529for (IRSimilarityCandidate &Cand : CandVec) {1530OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()1531<< ", Basic Block: ";1532if (Cand.front()->Inst->getParent()->getName().str() == "")1533OS << "(unnamed)";1534else1535OS << Cand.front()->Inst->getParent()->getName().str();1536OS << "\n Start Instruction: ";1537Cand.frontInstruction()->print(OS);1538OS << "\n End Instruction: ";1539Cand.backInstruction()->print(OS);1540OS << "\n";1541}1542}15431544return PreservedAnalyses::all();1545}15461547char IRSimilarityIdentifierWrapperPass::ID = 0;154815491550