Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
35233 views
1
//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// \file
10
// Implementation file for the IRSimilarityIdentifier for identifying
11
// similarities in IR including the IRInstructionMapper.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "llvm/Analysis/IRSimilarityIdentifier.h"
16
#include "llvm/ADT/DenseMap.h"
17
#include "llvm/ADT/SetOperations.h"
18
#include "llvm/IR/Intrinsics.h"
19
#include "llvm/IR/Operator.h"
20
#include "llvm/IR/User.h"
21
#include "llvm/InitializePasses.h"
22
#include "llvm/Support/SuffixTree.h"
23
24
using namespace llvm;
25
using namespace IRSimilarity;
26
27
namespace llvm {
28
cl::opt<bool>
29
DisableBranches("no-ir-sim-branch-matching", cl::init(false),
30
cl::ReallyHidden,
31
cl::desc("disable similarity matching, and outlining, "
32
"across branches for debugging purposes."));
33
34
cl::opt<bool>
35
DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
36
cl::ReallyHidden,
37
cl::desc("disable outlining indirect calls."));
38
39
cl::opt<bool>
40
MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41
cl::desc("only allow matching call instructions if the "
42
"name and type signature match."));
43
44
cl::opt<bool>
45
DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46
cl::desc("Don't match or outline intrinsics"));
47
} // namespace llvm
48
49
IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
50
IRInstructionDataList &IDList)
51
: Inst(&I), Legal(Legality), IDL(&IDList) {
52
initializeInstruction();
53
}
54
55
void IRInstructionData::initializeInstruction() {
56
// We check for whether we have a comparison instruction. If it is, we
57
// find the "less than" version of the predicate for consistency for
58
// comparison instructions throught the program.
59
if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
60
CmpInst::Predicate Predicate = predicateForConsistency(C);
61
if (Predicate != C->getPredicate())
62
RevisedPredicate = Predicate;
63
}
64
65
// Here we collect the operands and their types for determining whether
66
// the structure of the operand use matches between two different candidates.
67
for (Use &OI : Inst->operands()) {
68
if (isa<CmpInst>(Inst) && RevisedPredicate) {
69
// If we have a CmpInst where the predicate is reversed, it means the
70
// operands must be reversed as well.
71
OperVals.insert(OperVals.begin(), OI.get());
72
continue;
73
}
74
75
OperVals.push_back(OI.get());
76
}
77
78
// We capture the incoming BasicBlocks as values as well as the incoming
79
// Values in order to check for structural similarity.
80
if (PHINode *PN = dyn_cast<PHINode>(Inst))
81
for (BasicBlock *BB : PN->blocks())
82
OperVals.push_back(BB);
83
}
84
85
IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
86
: IDL(&IDList) {}
87
88
void IRInstructionData::setBranchSuccessors(
89
DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
90
assert(isa<BranchInst>(Inst) && "Instruction must be branch");
91
92
BranchInst *BI = cast<BranchInst>(Inst);
93
DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
94
95
BBNumIt = BasicBlockToInteger.find(BI->getParent());
96
assert(BBNumIt != BasicBlockToInteger.end() &&
97
"Could not find location for BasicBlock!");
98
99
int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
100
101
for (Value *V : getBlockOperVals()) {
102
BasicBlock *Successor = cast<BasicBlock>(V);
103
BBNumIt = BasicBlockToInteger.find(Successor);
104
assert(BBNumIt != BasicBlockToInteger.end() &&
105
"Could not find number for BasicBlock!");
106
int OtherBlockNumber = static_cast<int>(BBNumIt->second);
107
108
int Relative = OtherBlockNumber - CurrentBlockNumber;
109
RelativeBlockLocations.push_back(Relative);
110
}
111
}
112
113
ArrayRef<Value *> IRInstructionData::getBlockOperVals() {
114
assert((isa<BranchInst>(Inst) ||
115
isa<PHINode>(Inst)) && "Instruction must be branch or PHINode");
116
117
if (BranchInst *BI = dyn_cast<BranchInst>(Inst))
118
return ArrayRef<Value *>(
119
std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),
120
OperVals.end()
121
);
122
123
if (PHINode *PN = dyn_cast<PHINode>(Inst))
124
return ArrayRef<Value *>(
125
std::next(OperVals.begin(), PN->getNumIncomingValues()),
126
OperVals.end()
127
);
128
129
return ArrayRef<Value *>();
130
}
131
132
void IRInstructionData::setCalleeName(bool MatchByName) {
133
CallInst *CI = dyn_cast<CallInst>(Inst);
134
assert(CI && "Instruction must be call");
135
136
CalleeName = "";
137
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
138
// To hash intrinsics, we use the opcode, and types like the other
139
// instructions, but also, the Intrinsic ID, and the Name of the
140
// intrinsic.
141
Intrinsic::ID IntrinsicID = II->getIntrinsicID();
142
FunctionType *FT = II->getFunctionType();
143
// If there is an overloaded name, we have to use the complex version
144
// of getName to get the entire string.
145
if (Intrinsic::isOverloaded(IntrinsicID))
146
CalleeName =
147
Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
148
// If there is not an overloaded name, we only need to use this version.
149
else
150
CalleeName = Intrinsic::getName(IntrinsicID).str();
151
152
return;
153
}
154
155
if (!CI->isIndirectCall() && MatchByName)
156
CalleeName = CI->getCalledFunction()->getName().str();
157
}
158
159
void IRInstructionData::setPHIPredecessors(
160
DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
161
assert(isa<PHINode>(Inst) && "Instruction must be phi node");
162
163
PHINode *PN = cast<PHINode>(Inst);
164
DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
165
166
BBNumIt = BasicBlockToInteger.find(PN->getParent());
167
assert(BBNumIt != BasicBlockToInteger.end() &&
168
"Could not find location for BasicBlock!");
169
170
int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
171
172
// Convert the incoming blocks of the PHINode to an integer value, based on
173
// the relative distances between the current block and the incoming block.
174
for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
175
BasicBlock *Incoming = PN->getIncomingBlock(Idx);
176
BBNumIt = BasicBlockToInteger.find(Incoming);
177
assert(BBNumIt != BasicBlockToInteger.end() &&
178
"Could not find number for BasicBlock!");
179
int OtherBlockNumber = static_cast<int>(BBNumIt->second);
180
181
int Relative = OtherBlockNumber - CurrentBlockNumber;
182
RelativeBlockLocations.push_back(Relative);
183
}
184
}
185
186
CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
187
switch (CI->getPredicate()) {
188
case CmpInst::FCMP_OGT:
189
case CmpInst::FCMP_UGT:
190
case CmpInst::FCMP_OGE:
191
case CmpInst::FCMP_UGE:
192
case CmpInst::ICMP_SGT:
193
case CmpInst::ICMP_UGT:
194
case CmpInst::ICMP_SGE:
195
case CmpInst::ICMP_UGE:
196
return CI->getSwappedPredicate();
197
default:
198
return CI->getPredicate();
199
}
200
}
201
202
CmpInst::Predicate IRInstructionData::getPredicate() const {
203
assert(isa<CmpInst>(Inst) &&
204
"Can only get a predicate from a compare instruction");
205
206
if (RevisedPredicate)
207
return *RevisedPredicate;
208
209
return cast<CmpInst>(Inst)->getPredicate();
210
}
211
212
StringRef IRInstructionData::getCalleeName() const {
213
assert(isa<CallInst>(Inst) &&
214
"Can only get a name from a call instruction");
215
216
assert(CalleeName && "CalleeName has not been set");
217
218
return *CalleeName;
219
}
220
221
bool IRSimilarity::isClose(const IRInstructionData &A,
222
const IRInstructionData &B) {
223
224
if (!A.Legal || !B.Legal)
225
return false;
226
227
// Check if we are performing the same sort of operation on the same types
228
// but not on the same values.
229
if (!A.Inst->isSameOperationAs(B.Inst)) {
230
// If there is a predicate, this means that either there is a swapped
231
// predicate, or that the types are different, we want to make sure that
232
// the predicates are equivalent via swapping.
233
if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
234
235
if (A.getPredicate() != B.getPredicate())
236
return false;
237
238
// If the predicates are the same via swap, make sure that the types are
239
// still the same.
240
auto ZippedTypes = zip(A.OperVals, B.OperVals);
241
242
return all_of(
243
ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
244
return std::get<0>(R)->getType() == std::get<1>(R)->getType();
245
});
246
}
247
248
return false;
249
}
250
251
// Since any GEP Instruction operands after the first operand cannot be
252
// defined by a register, we must make sure that the operands after the first
253
// are the same in the two instructions
254
if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
255
auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
256
257
// If the instructions do not have the same inbounds restrictions, we do
258
// not consider them the same.
259
if (GEP->isInBounds() != OtherGEP->isInBounds())
260
return false;
261
262
auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
263
264
// We increment here since we do not care about the first instruction,
265
// we only care about the following operands since they must be the
266
// exact same to be considered similar.
267
return all_of(drop_begin(ZippedOperands),
268
[](std::tuple<llvm::Use &, llvm::Use &> R) {
269
return std::get<0>(R) == std::get<1>(R);
270
});
271
}
272
273
// If the instructions are functions calls, we make sure that the function
274
// name is the same. We already know that the types are since is
275
// isSameOperationAs is true.
276
if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
277
if (A.getCalleeName() != B.getCalleeName())
278
return false;
279
}
280
281
if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
282
A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
283
return false;
284
285
return true;
286
}
287
288
// TODO: This is the same as the MachineOutliner, and should be consolidated
289
// into the same interface.
290
void IRInstructionMapper::convertToUnsignedVec(
291
BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
292
std::vector<unsigned> &IntegerMapping) {
293
BasicBlock::iterator It = BB.begin();
294
295
std::vector<unsigned> IntegerMappingForBB;
296
std::vector<IRInstructionData *> InstrListForBB;
297
298
for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
299
switch (InstClassifier.visit(*It)) {
300
case InstrType::Legal:
301
mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
302
break;
303
case InstrType::Illegal:
304
mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
305
break;
306
case InstrType::Invisible:
307
AddedIllegalLastTime = false;
308
break;
309
}
310
}
311
312
if (AddedIllegalLastTime)
313
mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
314
for (IRInstructionData *ID : InstrListForBB)
315
this->IDL->push_back(*ID);
316
llvm::append_range(InstrList, InstrListForBB);
317
llvm::append_range(IntegerMapping, IntegerMappingForBB);
318
}
319
320
// TODO: This is the same as the MachineOutliner, and should be consolidated
321
// into the same interface.
322
unsigned IRInstructionMapper::mapToLegalUnsigned(
323
BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
324
std::vector<IRInstructionData *> &InstrListForBB) {
325
// We added something legal, so we should unset the AddedLegalLastTime
326
// flag.
327
AddedIllegalLastTime = false;
328
329
// If we have at least two adjacent legal instructions (which may have
330
// invisible instructions in between), remember that.
331
if (CanCombineWithPrevInstr)
332
HaveLegalRange = true;
333
CanCombineWithPrevInstr = true;
334
335
// Get the integer for this instruction or give it the current
336
// LegalInstrNumber.
337
IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
338
InstrListForBB.push_back(ID);
339
340
if (isa<BranchInst>(*It))
341
ID->setBranchSuccessors(BasicBlockToInteger);
342
343
if (isa<CallInst>(*It))
344
ID->setCalleeName(EnableMatchCallsByName);
345
346
if (isa<PHINode>(*It))
347
ID->setPHIPredecessors(BasicBlockToInteger);
348
349
// Add to the instruction list
350
bool WasInserted;
351
DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
352
ResultIt;
353
std::tie(ResultIt, WasInserted) =
354
InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
355
unsigned INumber = ResultIt->second;
356
357
// There was an insertion.
358
if (WasInserted)
359
LegalInstrNumber++;
360
361
IntegerMappingForBB.push_back(INumber);
362
363
// Make sure we don't overflow or use any integers reserved by the DenseMap.
364
assert(LegalInstrNumber < IllegalInstrNumber &&
365
"Instruction mapping overflow!");
366
367
assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
368
"Tried to assign DenseMap tombstone or empty key to instruction.");
369
assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
370
"Tried to assign DenseMap tombstone or empty key to instruction.");
371
372
return INumber;
373
}
374
375
IRInstructionData *
376
IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
377
IRInstructionDataList &IDL) {
378
return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
379
}
380
381
IRInstructionData *
382
IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
383
return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
384
}
385
386
IRInstructionDataList *
387
IRInstructionMapper::allocateIRInstructionDataList() {
388
return new (IDLAllocator->Allocate()) IRInstructionDataList();
389
}
390
391
// TODO: This is the same as the MachineOutliner, and should be consolidated
392
// into the same interface.
393
unsigned IRInstructionMapper::mapToIllegalUnsigned(
394
BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
395
std::vector<IRInstructionData *> &InstrListForBB, bool End) {
396
// Can't combine an illegal instruction. Set the flag.
397
CanCombineWithPrevInstr = false;
398
399
// Only add one illegal number per range of legal numbers.
400
if (AddedIllegalLastTime)
401
return IllegalInstrNumber;
402
403
IRInstructionData *ID = nullptr;
404
if (!End)
405
ID = allocateIRInstructionData(*It, false, *IDL);
406
else
407
ID = allocateIRInstructionData(*IDL);
408
InstrListForBB.push_back(ID);
409
410
// Remember that we added an illegal number last time.
411
AddedIllegalLastTime = true;
412
unsigned INumber = IllegalInstrNumber;
413
IntegerMappingForBB.push_back(IllegalInstrNumber--);
414
415
assert(LegalInstrNumber < IllegalInstrNumber &&
416
"Instruction mapping overflow!");
417
418
assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
419
"IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
420
421
assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
422
"IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
423
424
return INumber;
425
}
426
427
IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
428
IRInstructionData *FirstInstIt,
429
IRInstructionData *LastInstIt)
430
: StartIdx(StartIdx), Len(Len) {
431
432
assert(FirstInstIt != nullptr && "Instruction is nullptr!");
433
assert(LastInstIt != nullptr && "Instruction is nullptr!");
434
assert(StartIdx + Len > StartIdx &&
435
"Overflow for IRSimilarityCandidate range?");
436
assert(Len - 1 == static_cast<unsigned>(std::distance(
437
iterator(FirstInstIt), iterator(LastInstIt))) &&
438
"Length of the first and last IRInstructionData do not match the "
439
"given length");
440
441
// We iterate over the given instructions, and map each unique value
442
// to a unique number in the IRSimilarityCandidate ValueToNumber and
443
// NumberToValue maps. A constant get its own value globally, the individual
444
// uses of the constants are not considered to be unique.
445
//
446
// IR: Mapping Added:
447
// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
448
// %add2 = add i32 %a, %1 %add2 -> 4
449
// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
450
//
451
// when replace with global values, starting from 1, would be
452
//
453
// 3 = add i32 1, 2
454
// 4 = add i32 1, 3
455
// 6 = add i32 5, 2
456
unsigned LocalValNumber = 1;
457
IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
458
for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
459
// Map the operand values to an unsigned integer if it does not already
460
// have an unsigned integer assigned to it.
461
for (Value *Arg : ID->OperVals)
462
if (!ValueToNumber.contains(Arg)) {
463
ValueToNumber.try_emplace(Arg, LocalValNumber);
464
NumberToValue.try_emplace(LocalValNumber, Arg);
465
LocalValNumber++;
466
}
467
468
// Mapping the instructions to an unsigned integer if it is not already
469
// exist in the mapping.
470
if (!ValueToNumber.contains(ID->Inst)) {
471
ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
472
NumberToValue.try_emplace(LocalValNumber, ID->Inst);
473
LocalValNumber++;
474
}
475
}
476
477
// Setting the first and last instruction data pointers for the candidate. If
478
// we got through the entire for loop without hitting an assert, we know
479
// that both of these instructions are not nullptrs.
480
FirstInst = FirstInstIt;
481
LastInst = LastInstIt;
482
483
// Add the basic blocks contained in the set into the global value numbering.
484
DenseSet<BasicBlock *> BBSet;
485
getBasicBlocks(BBSet);
486
for (BasicBlock *BB : BBSet) {
487
if (ValueToNumber.contains(BB))
488
continue;
489
490
ValueToNumber.try_emplace(BB, LocalValNumber);
491
NumberToValue.try_emplace(LocalValNumber, BB);
492
LocalValNumber++;
493
}
494
}
495
496
bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
497
const IRSimilarityCandidate &B) {
498
if (A.getLength() != B.getLength())
499
return false;
500
501
auto InstrDataForBoth =
502
zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
503
504
return all_of(InstrDataForBoth,
505
[](std::tuple<IRInstructionData &, IRInstructionData &> R) {
506
IRInstructionData &A = std::get<0>(R);
507
IRInstructionData &B = std::get<1>(R);
508
if (!A.Legal || !B.Legal)
509
return false;
510
return isClose(A, B);
511
});
512
}
513
514
/// Determine if one or more of the assigned global value numbers for the
515
/// operands in \p TargetValueNumbers is in the current mapping set for operand
516
/// numbers in \p SourceOperands. The set of possible corresponding global
517
/// value numbers are replaced with the most recent version of compatible
518
/// values.
519
///
520
/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
521
/// value number for the source IRInstructionCandidate.
522
/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
523
/// IRSimilarityCandidate global value numbers to a set of possible numbers in
524
/// the target.
525
/// \param [in] SourceOperands - The operands in the original
526
/// IRSimilarityCandidate in the current instruction.
527
/// \param [in] TargetValueNumbers - The global value numbers of the operands in
528
/// the corresponding Instruction in the other IRSimilarityCandidate.
529
/// \returns true if there exists a possible mapping between the source
530
/// Instruction operands and the target Instruction operands, and false if not.
531
static bool checkNumberingAndReplaceCommutative(
532
const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
533
DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
534
ArrayRef<Value *> &SourceOperands,
535
DenseSet<unsigned> &TargetValueNumbers){
536
537
DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
538
539
unsigned ArgVal;
540
bool WasInserted;
541
542
// Iterate over the operands in the source IRSimilarityCandidate to determine
543
// whether there exists an operand in the other IRSimilarityCandidate that
544
// creates a valid mapping of Value to Value between the
545
// IRSimilarityCaniddates.
546
for (Value *V : SourceOperands) {
547
ArgVal = SourceValueToNumberMapping.find(V)->second;
548
549
// Instead of finding a current mapping, we attempt to insert a set.
550
std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
551
std::make_pair(ArgVal, TargetValueNumbers));
552
553
// We need to iterate over the items in other IRSimilarityCandidate's
554
// Instruction to determine whether there is a valid mapping of
555
// Value to Value.
556
DenseSet<unsigned> NewSet;
557
for (unsigned &Curr : ValueMappingIt->second)
558
// If we can find the value in the mapping, we add it to the new set.
559
if (TargetValueNumbers.contains(Curr))
560
NewSet.insert(Curr);
561
562
// If we could not find a Value, return 0.
563
if (NewSet.empty())
564
return false;
565
566
// Otherwise replace the old mapping with the newly constructed one.
567
if (NewSet.size() != ValueMappingIt->second.size())
568
ValueMappingIt->second.swap(NewSet);
569
570
// We have reached no conclusions about the mapping, and cannot remove
571
// any items from the other operands, so we move to check the next operand.
572
if (ValueMappingIt->second.size() != 1)
573
continue;
574
575
unsigned ValToRemove = *ValueMappingIt->second.begin();
576
// When there is only one item left in the mapping for and operand, remove
577
// the value from the other operands. If it results in there being no
578
// mapping, return false, it means the mapping is wrong
579
for (Value *InnerV : SourceOperands) {
580
if (V == InnerV)
581
continue;
582
583
unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
584
ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
585
if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
586
continue;
587
588
ValueMappingIt->second.erase(ValToRemove);
589
if (ValueMappingIt->second.empty())
590
return false;
591
}
592
}
593
594
return true;
595
}
596
597
/// Determine if operand number \p TargetArgVal is in the current mapping set
598
/// for operand number \p SourceArgVal.
599
///
600
/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
601
/// value numbers from source IRSimilarityCandidate to target
602
/// IRSimilarityCandidate.
603
/// \param [in] SourceArgVal The global value number for an operand in the
604
/// in the original candidate.
605
/// \param [in] TargetArgVal The global value number for the corresponding
606
/// operand in the other candidate.
607
/// \returns True if there exists a mapping and false if not.
608
bool checkNumberingAndReplace(
609
DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
610
unsigned SourceArgVal, unsigned TargetArgVal) {
611
// We are given two unsigned integers representing the global values of
612
// the operands in different IRSimilarityCandidates and a current mapping
613
// between the two.
614
//
615
// Source Operand GVN: 1
616
// Target Operand GVN: 2
617
// CurrentMapping: {1: {1, 2}}
618
//
619
// Since we have mapping, and the target operand is contained in the set, we
620
// update it to:
621
// CurrentMapping: {1: {2}}
622
// and can return true. But, if the mapping was
623
// CurrentMapping: {1: {3}}
624
// we would return false.
625
626
bool WasInserted;
627
DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
628
629
std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
630
std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
631
632
// If we created a new mapping, then we are done.
633
if (WasInserted)
634
return true;
635
636
// If there is more than one option in the mapping set, and the target value
637
// is included in the mapping set replace that set with one that only includes
638
// the target value, as it is the only valid mapping via the non commutative
639
// instruction.
640
641
DenseSet<unsigned> &TargetSet = Val->second;
642
if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
643
TargetSet.clear();
644
TargetSet.insert(TargetArgVal);
645
return true;
646
}
647
648
// Return true if we can find the value in the set.
649
return TargetSet.contains(TargetArgVal);
650
}
651
652
bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
653
OperandMapping A, OperandMapping B) {
654
// Iterators to keep track of where we are in the operands for each
655
// Instruction.
656
ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
657
ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
658
unsigned OperandLength = A.OperVals.size();
659
660
// For each operand, get the value numbering and ensure it is consistent.
661
for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
662
unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
663
unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
664
665
// Attempt to add a set with only the target value. If there is no mapping
666
// we can create it here.
667
//
668
// For an instruction like a subtraction:
669
// IRSimilarityCandidateA: IRSimilarityCandidateB:
670
// %resultA = sub %a, %b %resultB = sub %d, %e
671
//
672
// We map %a -> %d and %b -> %e.
673
//
674
// And check to see whether their mapping is consistent in
675
// checkNumberingAndReplace.
676
677
if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
678
return false;
679
680
if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
681
return false;
682
}
683
return true;
684
}
685
686
bool IRSimilarityCandidate::compareCommutativeOperandMapping(
687
OperandMapping A, OperandMapping B) {
688
DenseSet<unsigned> ValueNumbersA;
689
DenseSet<unsigned> ValueNumbersB;
690
691
ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
692
ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
693
unsigned OperandLength = A.OperVals.size();
694
695
// Find the value number sets for the operands.
696
for (unsigned Idx = 0; Idx < OperandLength;
697
Idx++, VItA++, VItB++) {
698
ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
699
ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
700
}
701
702
// Iterate over the operands in the first IRSimilarityCandidate and make sure
703
// there exists a possible mapping with the operands in the second
704
// IRSimilarityCandidate.
705
if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
706
A.ValueNumberMapping, A.OperVals,
707
ValueNumbersB))
708
return false;
709
710
// Iterate over the operands in the second IRSimilarityCandidate and make sure
711
// there exists a possible mapping with the operands in the first
712
// IRSimilarityCandidate.
713
if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
714
B.ValueNumberMapping, B.OperVals,
715
ValueNumbersA))
716
return false;
717
718
return true;
719
}
720
721
bool IRSimilarityCandidate::compareAssignmentMapping(
722
const unsigned InstValA, const unsigned &InstValB,
723
DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
724
DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
725
DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
726
bool WasInserted;
727
std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
728
std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
729
if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
730
return false;
731
else if (ValueMappingIt->second.size() != 1) {
732
for (unsigned OtherVal : ValueMappingIt->second) {
733
if (OtherVal == InstValB)
734
continue;
735
if (!ValueNumberMappingA.contains(OtherVal))
736
continue;
737
if (!ValueNumberMappingA[OtherVal].contains(InstValA))
738
continue;
739
ValueNumberMappingA[OtherVal].erase(InstValA);
740
}
741
ValueNumberMappingA.erase(ValueMappingIt);
742
std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
743
std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
744
}
745
746
return true;
747
}
748
749
bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
750
RelativeLocMapping B) {
751
// Get the basic blocks the label refers to.
752
BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
753
BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
754
755
// Get the basic blocks contained in each region.
756
DenseSet<BasicBlock *> BasicBlockA;
757
DenseSet<BasicBlock *> BasicBlockB;
758
A.IRSC.getBasicBlocks(BasicBlockA);
759
B.IRSC.getBasicBlocks(BasicBlockB);
760
761
// Determine if the block is contained in the region.
762
bool AContained = BasicBlockA.contains(ABB);
763
bool BContained = BasicBlockB.contains(BBB);
764
765
// Both blocks need to be contained in the region, or both need to be outside
766
// the region.
767
if (AContained != BContained)
768
return false;
769
770
// If both are contained, then we need to make sure that the relative
771
// distance to the target blocks are the same.
772
if (AContained)
773
return A.RelativeLocation == B.RelativeLocation;
774
return true;
775
}
776
777
bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
778
const IRSimilarityCandidate &B) {
779
DenseMap<unsigned, DenseSet<unsigned>> MappingA;
780
DenseMap<unsigned, DenseSet<unsigned>> MappingB;
781
return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
782
}
783
784
typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
785
SmallVector<int, 4> &, ArrayRef<Value *> &,
786
ArrayRef<Value *> &>
787
ZippedRelativeLocationsT;
788
789
bool IRSimilarityCandidate::compareStructure(
790
const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
791
DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
792
DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
793
if (A.getLength() != B.getLength())
794
return false;
795
796
if (A.ValueToNumber.size() != B.ValueToNumber.size())
797
return false;
798
799
iterator ItA = A.begin();
800
iterator ItB = B.begin();
801
802
// These ValueNumber Mapping sets create a create a mapping between the values
803
// in one candidate to values in the other candidate. If we create a set with
804
// one element, and that same element maps to the original element in the
805
// candidate we have a good mapping.
806
807
// Iterate over the instructions contained in each candidate
808
unsigned SectionLength = A.getStartIdx() + A.getLength();
809
for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
810
ItA++, ItB++, Loc++) {
811
// Make sure the instructions are similar to one another.
812
if (!isClose(*ItA, *ItB))
813
return false;
814
815
Instruction *IA = ItA->Inst;
816
Instruction *IB = ItB->Inst;
817
818
if (!ItA->Legal || !ItB->Legal)
819
return false;
820
821
// Get the operand sets for the instructions.
822
ArrayRef<Value *> OperValsA = ItA->OperVals;
823
ArrayRef<Value *> OperValsB = ItB->OperVals;
824
825
unsigned InstValA = A.ValueToNumber.find(IA)->second;
826
unsigned InstValB = B.ValueToNumber.find(IB)->second;
827
828
// Ensure that the mappings for the instructions exists.
829
if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
830
ValueNumberMappingB))
831
return false;
832
833
if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
834
ValueNumberMappingA))
835
return false;
836
837
// We have different paths for commutative instructions and non-commutative
838
// instructions since commutative instructions could allow multiple mappings
839
// to certain values.
840
if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
841
!isa<IntrinsicInst>(IA)) {
842
if (!compareCommutativeOperandMapping(
843
{A, OperValsA, ValueNumberMappingA},
844
{B, OperValsB, ValueNumberMappingB}))
845
return false;
846
continue;
847
}
848
849
// Handle the non-commutative cases.
850
if (!compareNonCommutativeOperandMapping(
851
{A, OperValsA, ValueNumberMappingA},
852
{B, OperValsB, ValueNumberMappingB}))
853
return false;
854
855
// Here we check that between two corresponding instructions,
856
// when referring to a basic block in the same region, the
857
// relative locations are the same. And, that the instructions refer to
858
// basic blocks outside the region in the same corresponding locations.
859
860
// We are able to make the assumption about blocks outside of the region
861
// since the target block labels are considered values and will follow the
862
// same number matching that we defined for the other instructions in the
863
// region. So, at this point, in each location we target a specific block
864
// outside the region, we are targeting a corresponding block in each
865
// analagous location in the region we are comparing to.
866
if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
867
!(isa<PHINode>(IA) && isa<PHINode>(IB)))
868
continue;
869
870
SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
871
SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
872
ArrayRef<Value *> ABL = ItA->getBlockOperVals();
873
ArrayRef<Value *> BBL = ItB->getBlockOperVals();
874
875
// Check to make sure that the number of operands, and branching locations
876
// between BranchInsts is the same.
877
if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
878
ABL.size() != BBL.size())
879
return false;
880
881
assert(RelBlockLocsA.size() == ABL.size() &&
882
"Block information vectors not the same size.");
883
assert(RelBlockLocsB.size() == BBL.size() &&
884
"Block information vectors not the same size.");
885
886
ZippedRelativeLocationsT ZippedRelativeLocations =
887
zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
888
if (any_of(ZippedRelativeLocations,
889
[&A, &B](std::tuple<int, int, Value *, Value *> R) {
890
return !checkRelativeLocations(
891
{A, std::get<0>(R), std::get<2>(R)},
892
{B, std::get<1>(R), std::get<3>(R)});
893
}))
894
return false;
895
}
896
return true;
897
}
898
899
bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
900
const IRSimilarityCandidate &B) {
901
auto DoesOverlap = [](const IRSimilarityCandidate &X,
902
const IRSimilarityCandidate &Y) {
903
// Check:
904
// XXXXXX X starts before Y ends
905
// YYYYYYY Y starts after X starts
906
return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
907
};
908
909
return DoesOverlap(A, B) || DoesOverlap(B, A);
910
}
911
912
void IRSimilarityIdentifier::populateMapper(
913
Module &M, std::vector<IRInstructionData *> &InstrList,
914
std::vector<unsigned> &IntegerMapping) {
915
916
std::vector<IRInstructionData *> InstrListForModule;
917
std::vector<unsigned> IntegerMappingForModule;
918
// Iterate over the functions in the module to map each Instruction in each
919
// BasicBlock to an unsigned integer.
920
Mapper.initializeForBBs(M);
921
922
for (Function &F : M) {
923
924
if (F.empty())
925
continue;
926
927
for (BasicBlock &BB : F) {
928
929
// BB has potential to have similarity since it has a size greater than 2
930
// and can therefore match other regions greater than 2. Map it to a list
931
// of unsigned integers.
932
Mapper.convertToUnsignedVec(BB, InstrListForModule,
933
IntegerMappingForModule);
934
}
935
936
BasicBlock::iterator It = F.begin()->end();
937
Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
938
true);
939
if (InstrListForModule.size() > 0)
940
Mapper.IDL->push_back(*InstrListForModule.back());
941
}
942
943
// Insert the InstrListForModule at the end of the overall InstrList so that
944
// we can have a long InstrList for the entire set of Modules being analyzed.
945
llvm::append_range(InstrList, InstrListForModule);
946
// Do the same as above, but for IntegerMapping.
947
llvm::append_range(IntegerMapping, IntegerMappingForModule);
948
}
949
950
void IRSimilarityIdentifier::populateMapper(
951
ArrayRef<std::unique_ptr<Module>> &Modules,
952
std::vector<IRInstructionData *> &InstrList,
953
std::vector<unsigned> &IntegerMapping) {
954
955
// Iterate over, and map the instructions in each module.
956
for (const std::unique_ptr<Module> &M : Modules)
957
populateMapper(*M, InstrList, IntegerMapping);
958
}
959
960
/// From a repeated subsequence, find all the different instances of the
961
/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
962
/// the IRInstructionData in subsequence.
963
///
964
/// \param [in] Mapper - The instruction mapper for basic correctness checks.
965
/// \param [in] InstrList - The vector that holds the instruction data.
966
/// \param [in] IntegerMapping - The vector that holds the mapped integers.
967
/// \param [out] CandsForRepSubstring - The vector to store the generated
968
/// IRSimilarityCandidates.
969
static void createCandidatesFromSuffixTree(
970
const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
971
std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
972
std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
973
974
unsigned StringLen = RS.Length;
975
if (StringLen < 2)
976
return;
977
978
// Create an IRSimilarityCandidate for instance of this subsequence \p RS.
979
for (const unsigned &StartIdx : RS.StartIndices) {
980
unsigned EndIdx = StartIdx + StringLen - 1;
981
982
// Check that this subsequence does not contain an illegal instruction.
983
bool ContainsIllegal = false;
984
for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
985
unsigned Key = IntegerMapping[CurrIdx];
986
if (Key > Mapper.IllegalInstrNumber) {
987
ContainsIllegal = true;
988
break;
989
}
990
}
991
992
// If we have an illegal instruction, we should not create an
993
// IRSimilarityCandidate for this region.
994
if (ContainsIllegal)
995
continue;
996
997
// We are getting iterators to the instructions in this region of code
998
// by advancing the start and end indices from the start of the
999
// InstrList.
1000
std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
1001
std::advance(StartIt, StartIdx);
1002
std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
1003
std::advance(EndIt, EndIdx);
1004
1005
CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1006
}
1007
}
1008
1009
void IRSimilarityCandidate::createCanonicalRelationFrom(
1010
IRSimilarityCandidate &SourceCand,
1011
DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
1012
DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
1013
assert(SourceCand.CanonNumToNumber.size() != 0 &&
1014
"Base canonical relationship is empty!");
1015
assert(SourceCand.NumberToCanonNum.size() != 0 &&
1016
"Base canonical relationship is empty!");
1017
1018
assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1019
assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1020
1021
DenseSet<unsigned> UsedGVNs;
1022
// Iterate over the mappings provided from this candidate to SourceCand. We
1023
// are then able to map the GVN in this candidate to the same canonical number
1024
// given to the corresponding GVN in SourceCand.
1025
for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1026
unsigned SourceGVN = GVNMapping.first;
1027
1028
assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1029
1030
unsigned ResultGVN;
1031
// We need special handling if we have more than one potential value. This
1032
// means that there are at least two GVNs that could correspond to this GVN.
1033
// This could lead to potential swapping later on, so we make a decision
1034
// here to ensure a one-to-one mapping.
1035
if (GVNMapping.second.size() > 1) {
1036
bool Found = false;
1037
for (unsigned Val : GVNMapping.second) {
1038
// We make sure the target value number hasn't already been reserved.
1039
if (UsedGVNs.contains(Val))
1040
continue;
1041
1042
// We make sure that the opposite mapping is still consistent.
1043
DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
1044
FromSourceMapping.find(Val);
1045
1046
if (!It->second.contains(SourceGVN))
1047
continue;
1048
1049
// We pick the first item that satisfies these conditions.
1050
Found = true;
1051
ResultGVN = Val;
1052
break;
1053
}
1054
1055
assert(Found && "Could not find matching value for source GVN");
1056
(void)Found;
1057
1058
} else
1059
ResultGVN = *GVNMapping.second.begin();
1060
1061
// Whatever GVN is found, we mark it as used.
1062
UsedGVNs.insert(ResultGVN);
1063
1064
unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1065
CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1066
NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1067
}
1068
1069
DenseSet<BasicBlock *> BBSet;
1070
getBasicBlocks(BBSet);
1071
// Find canonical numbers for the BasicBlocks in the current candidate.
1072
// This is done by finding the corresponding value for the first instruction
1073
// in the block in the current candidate, finding the matching value in the
1074
// source candidate. Then by finding the parent of this value, use the
1075
// canonical number of the block in the source candidate for the canonical
1076
// number in the current candidate.
1077
for (BasicBlock *BB : BBSet) {
1078
unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1079
1080
// We can skip the BasicBlock if the canonical numbering has already been
1081
// found in a separate instruction.
1082
if (NumberToCanonNum.contains(BBGVNForCurrCand))
1083
continue;
1084
1085
// If the basic block is the starting block, then the shared instruction may
1086
// not be the first instruction in the block, it will be the first
1087
// instruction in the similarity region.
1088
Value *FirstOutlineInst = BB == getStartBB()
1089
? frontInstruction()
1090
: &*BB->instructionsWithoutDebug().begin();
1091
1092
unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1093
unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1094
unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1095
Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1096
BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1097
unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1098
unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1099
CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1100
NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1101
}
1102
}
1103
1104
void IRSimilarityCandidate::createCanonicalRelationFrom(
1105
IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1106
IRSimilarityCandidate &TargetCandLarge) {
1107
assert(!SourceCand.CanonNumToNumber.empty() &&
1108
"Canonical Relationship is non-empty");
1109
assert(!SourceCand.NumberToCanonNum.empty() &&
1110
"Canonical Relationship is non-empty");
1111
1112
assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1113
"Canonical Relationship is non-empty");
1114
assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1115
"Canonical Relationship is non-empty");
1116
1117
assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1118
"Canonical Relationship is non-empty");
1119
assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1120
"Canonical Relationship is non-empty");
1121
1122
assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1123
assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1124
1125
// We're going to use the larger candidates as a "bridge" to create the
1126
// canonical number for the target candidate since we have idetified two
1127
// candidates as subsequences of larger sequences, and therefore must be
1128
// structurally similar.
1129
for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1130
Value *CurrVal = ValueNumPair.first;
1131
unsigned TargetCandGVN = ValueNumPair.second;
1132
1133
// Find the numbering in the large candidate that surrounds the
1134
// current candidate.
1135
std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1136
assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1137
1138
// Get the canonical numbering in the large target candidate.
1139
std::optional<unsigned> OTargetCandCanon =
1140
TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1141
assert(OTargetCandCanon.has_value() &&
1142
"Canononical Number not found for GVN");
1143
1144
// Get the GVN in the large source candidate from the canonical numbering.
1145
std::optional<unsigned> OLargeSourceGVN =
1146
SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1147
assert(OLargeSourceGVN.has_value() &&
1148
"GVN Number not found for Canonical Number");
1149
1150
// Get the Value from the GVN in the large source candidate.
1151
std::optional<Value *> OLargeSourceV =
1152
SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1153
assert(OLargeSourceV.has_value() && "Value not found for GVN");
1154
1155
// Get the GVN number for the Value in the source candidate.
1156
std::optional<unsigned> OSourceGVN =
1157
SourceCand.getGVN(OLargeSourceV.value());
1158
assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1159
1160
// Get the canonical numbering from the GVN/
1161
std::optional<unsigned> OSourceCanon =
1162
SourceCand.getCanonicalNum(OSourceGVN.value());
1163
assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1164
1165
// Insert the canonical numbering and GVN pair into their respective
1166
// mappings.
1167
CanonNumToNumber.insert(
1168
std::make_pair(OSourceCanon.value(), TargetCandGVN));
1169
NumberToCanonNum.insert(
1170
std::make_pair(TargetCandGVN, OSourceCanon.value()));
1171
}
1172
}
1173
1174
void IRSimilarityCandidate::createCanonicalMappingFor(
1175
IRSimilarityCandidate &CurrCand) {
1176
assert(CurrCand.CanonNumToNumber.size() == 0 &&
1177
"Canonical Relationship is non-empty");
1178
assert(CurrCand.NumberToCanonNum.size() == 0 &&
1179
"Canonical Relationship is non-empty");
1180
1181
unsigned CanonNum = 0;
1182
// Iterate over the value numbers found, the order does not matter in this
1183
// case.
1184
for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1185
CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1186
CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1187
CanonNum++;
1188
}
1189
}
1190
1191
/// Look for larger IRSimilarityCandidates From the previously matched
1192
/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
1193
/// an overlap, return a pair of structurally similar, larger
1194
/// IRSimilarityCandidates.
1195
///
1196
/// \param [in] CandA - The first candidate we are trying to determine the
1197
/// structure of.
1198
/// \param [in] CandB - The second candidate we are trying to determine the
1199
/// structure of.
1200
/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1201
/// a circuit to the IRSimilarityCandidates that include this instruction.
1202
/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1203
/// number representing the structural group assigned to it.
1204
static std::optional<
1205
std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1206
CheckLargerCands(
1207
IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB,
1208
DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1209
DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) {
1210
DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1211
DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1212
DenseSet<unsigned> IncludedGroupsA;
1213
DenseSet<unsigned> IncludedGroupsB;
1214
1215
// Find the overall similarity group numbers that fully contain the candidate,
1216
// and record the larger candidate for each group.
1217
auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1218
std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1219
Result;
1220
1221
unsigned CandAStart = CandA.getStartIdx();
1222
unsigned CandAEnd = CandA.getEndIdx();
1223
unsigned CandBStart = CandB.getStartIdx();
1224
unsigned CandBEnd = CandB.getEndIdx();
1225
if (IdxToCandidateIt == IndexToIncludedCand.end())
1226
return Result;
1227
for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1228
if (MatchedCand->getStartIdx() > CandAStart ||
1229
(MatchedCand->getEndIdx() < CandAEnd))
1230
continue;
1231
unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1232
IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1233
IncludedGroupsA.insert(GroupNum);
1234
}
1235
1236
// Find the overall similarity group numbers that fully contain the next
1237
// candidate, and record the larger candidate for each group.
1238
IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1239
if (IdxToCandidateIt == IndexToIncludedCand.end())
1240
return Result;
1241
for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1242
if (MatchedCand->getStartIdx() > CandBStart ||
1243
MatchedCand->getEndIdx() < CandBEnd)
1244
continue;
1245
unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1246
IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1247
IncludedGroupsB.insert(GroupNum);
1248
}
1249
1250
// Find the intersection between the two groups, these are the groups where
1251
// the larger candidates exist.
1252
set_intersect(IncludedGroupsA, IncludedGroupsB);
1253
1254
// If there is no intersection between the sets, then we cannot determine
1255
// whether or not there is a match.
1256
if (IncludedGroupsA.empty())
1257
return Result;
1258
1259
// Create a pair that contains the larger candidates.
1260
auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1261
auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1262
Result = std::make_pair(ItA->second, ItB->second);
1263
return Result;
1264
}
1265
1266
/// From the list of IRSimilarityCandidates, perform a comparison between each
1267
/// IRSimilarityCandidate to determine if there are overlapping
1268
/// IRInstructionData, or if they do not have the same structure.
1269
///
1270
/// \param [in] CandsForRepSubstring - The vector containing the
1271
/// IRSimilarityCandidates.
1272
/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1273
/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1274
/// vector are structurally similar to one another.
1275
/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1276
/// a circuit to the IRSimilarityCandidates that include this instruction.
1277
/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1278
/// number representing the structural group assigned to it.
1279
static void findCandidateStructures(
1280
std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1281
DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1282
DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1283
DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup
1284
) {
1285
std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1286
InnerCandEndIt;
1287
1288
// IRSimilarityCandidates each have a structure for operand use. It is
1289
// possible that two instances of the same subsequences have different
1290
// structure. Each type of structure found is assigned a number. This
1291
// DenseMap maps an IRSimilarityCandidate to which type of similarity
1292
// discovered it fits within.
1293
DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1294
1295
// Find the compatibility from each candidate to the others to determine
1296
// which candidates overlap and which have the same structure by mapping
1297
// each structure to a different group.
1298
bool SameStructure;
1299
bool Inserted;
1300
unsigned CurrentGroupNum = 0;
1301
unsigned OuterGroupNum;
1302
DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
1303
DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
1304
DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
1305
1306
// Iterate over the candidates to determine its structural and overlapping
1307
// compatibility with other instructions
1308
DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1309
DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1310
for (CandIt = CandsForRepSubstring.begin(),
1311
CandEndIt = CandsForRepSubstring.end();
1312
CandIt != CandEndIt; CandIt++) {
1313
1314
// Determine if it has an assigned structural group already.
1315
CandToGroupIt = CandToGroup.find(&*CandIt);
1316
if (CandToGroupIt == CandToGroup.end()) {
1317
// If not, we assign it one, and add it to our mapping.
1318
std::tie(CandToGroupIt, Inserted) =
1319
CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1320
}
1321
1322
// Get the structural group number from the iterator.
1323
OuterGroupNum = CandToGroupIt->second;
1324
1325
// Check if we already have a list of IRSimilarityCandidates for the current
1326
// structural group. Create one if one does not exist.
1327
CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1328
if (CurrentGroupPair == StructuralGroups.end()) {
1329
IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
1330
std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1331
std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1332
}
1333
1334
// Iterate over the IRSimilarityCandidates following the current
1335
// IRSimilarityCandidate in the list to determine whether the two
1336
// IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1337
// of IRSimilarityCandidates.
1338
for (InnerCandIt = std::next(CandIt),
1339
InnerCandEndIt = CandsForRepSubstring.end();
1340
InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1341
1342
// We check if the inner item has a group already, if it does, we skip it.
1343
CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1344
if (CandToGroupItInner != CandToGroup.end())
1345
continue;
1346
1347
// Check if we have found structural similarity between two candidates
1348
// that fully contains the first and second candidates.
1349
std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1350
LargerPair = CheckLargerCands(
1351
*CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1352
1353
// If a pair was found, it means that we can assume that these smaller
1354
// substrings are also structurally similar. Use the larger candidates to
1355
// determine the canonical mapping between the two sections.
1356
if (LargerPair.has_value()) {
1357
SameStructure = true;
1358
InnerCandIt->createCanonicalRelationFrom(
1359
*CandIt, *LargerPair.value().first, *LargerPair.value().second);
1360
CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1361
CurrentGroupPair->second.push_back(*InnerCandIt);
1362
continue;
1363
}
1364
1365
// Otherwise we determine if they have the same structure and add it to
1366
// vector if they match.
1367
ValueNumberMappingA.clear();
1368
ValueNumberMappingB.clear();
1369
SameStructure = IRSimilarityCandidate::compareStructure(
1370
*CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1371
if (!SameStructure)
1372
continue;
1373
1374
InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1375
ValueNumberMappingB);
1376
CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1377
CurrentGroupPair->second.push_back(*InnerCandIt);
1378
}
1379
}
1380
}
1381
1382
void IRSimilarityIdentifier::findCandidates(
1383
std::vector<IRInstructionData *> &InstrList,
1384
std::vector<unsigned> &IntegerMapping) {
1385
SuffixTree ST(IntegerMapping);
1386
1387
std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1388
std::vector<SimilarityGroup> NewCandidateGroups;
1389
1390
DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1391
DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
1392
DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1393
1394
// Iterate over the subsequences found by the Suffix Tree to create
1395
// IRSimilarityCandidates for each repeated subsequence and determine which
1396
// instances are structurally similar to one another.
1397
1398
// Sort the suffix tree from longest substring to shortest.
1399
std::vector<SuffixTree::RepeatedSubstring> RSes;
1400
for (SuffixTree::RepeatedSubstring &RS : ST)
1401
RSes.push_back(RS);
1402
1403
llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
1404
const SuffixTree::RepeatedSubstring &RHS) {
1405
return LHS.Length > RHS.Length;
1406
});
1407
for (SuffixTree::RepeatedSubstring &RS : RSes) {
1408
createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1409
CandsForRepSubstring);
1410
1411
if (CandsForRepSubstring.size() < 2)
1412
continue;
1413
1414
findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1415
IndexToIncludedCand, CandToGroup);
1416
for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1417
// We only add the group if it contains more than one
1418
// IRSimilarityCandidate. If there is only one, that means there is no
1419
// other repeated subsequence with the same structure.
1420
if (Group.second.size() > 1) {
1421
SimilarityCandidates->push_back(Group.second);
1422
// Iterate over each candidate in the group, and add an entry for each
1423
// instruction included with a mapping to a set of
1424
// IRSimilarityCandidates that include that instruction.
1425
for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1426
for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1427
Idx <= Edx; ++Idx) {
1428
DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator
1429
IdIt;
1430
IdIt = IndexToIncludedCand.find(Idx);
1431
bool Inserted = false;
1432
if (IdIt == IndexToIncludedCand.end())
1433
std::tie(IdIt, Inserted) = IndexToIncludedCand.insert(
1434
std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>()));
1435
IdIt->second.insert(&IRCand);
1436
}
1437
// Add mapping of candidate to the overall similarity group number.
1438
CandToGroup.insert(
1439
std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1440
}
1441
}
1442
}
1443
1444
CandsForRepSubstring.clear();
1445
StructuralGroups.clear();
1446
NewCandidateGroups.clear();
1447
}
1448
}
1449
1450
SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
1451
ArrayRef<std::unique_ptr<Module>> Modules) {
1452
resetSimilarityCandidates();
1453
1454
std::vector<IRInstructionData *> InstrList;
1455
std::vector<unsigned> IntegerMapping;
1456
Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1457
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1458
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1459
Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1460
Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1461
1462
populateMapper(Modules, InstrList, IntegerMapping);
1463
findCandidates(InstrList, IntegerMapping);
1464
1465
return *SimilarityCandidates;
1466
}
1467
1468
SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
1469
resetSimilarityCandidates();
1470
Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1471
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1472
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1473
Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1474
Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1475
1476
std::vector<IRInstructionData *> InstrList;
1477
std::vector<unsigned> IntegerMapping;
1478
1479
populateMapper(M, InstrList, IntegerMapping);
1480
findCandidates(InstrList, IntegerMapping);
1481
1482
return *SimilarityCandidates;
1483
}
1484
1485
INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
1486
"ir-similarity-identifier", false, true)
1487
1488
IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
1489
: ModulePass(ID) {
1490
initializeIRSimilarityIdentifierWrapperPassPass(
1491
*PassRegistry::getPassRegistry());
1492
}
1493
1494
bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
1495
IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1496
MatchCallsByName, !DisableIntrinsics,
1497
false));
1498
return false;
1499
}
1500
1501
bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
1502
IRSI.reset();
1503
return false;
1504
}
1505
1506
bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
1507
IRSI->findSimilarity(M);
1508
return false;
1509
}
1510
1511
AnalysisKey IRSimilarityAnalysis::Key;
1512
IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
1513
ModuleAnalysisManager &) {
1514
auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1515
MatchCallsByName, !DisableIntrinsics,
1516
false);
1517
IRSI.findSimilarity(M);
1518
return IRSI;
1519
}
1520
1521
PreservedAnalyses
1522
IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
1523
IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
1524
std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1525
IRSI.getSimilarity();
1526
1527
for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1528
OS << CandVec.size() << " candidates of length "
1529
<< CandVec.begin()->getLength() << ". Found in: \n";
1530
for (IRSimilarityCandidate &Cand : CandVec) {
1531
OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1532
<< ", Basic Block: ";
1533
if (Cand.front()->Inst->getParent()->getName().str() == "")
1534
OS << "(unnamed)";
1535
else
1536
OS << Cand.front()->Inst->getParent()->getName().str();
1537
OS << "\n Start Instruction: ";
1538
Cand.frontInstruction()->print(OS);
1539
OS << "\n End Instruction: ";
1540
Cand.backInstruction()->print(OS);
1541
OS << "\n";
1542
}
1543
}
1544
1545
return PreservedAnalyses::all();
1546
}
1547
1548
char IRSimilarityIdentifierWrapperPass::ID = 0;
1549
1550