Path: blob/main/contrib/llvm-project/llvm/lib/Analysis/DependenceAnalysis.cpp
35234 views
//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// DependenceAnalysis is an LLVM pass that analyses dependences between memory9// accesses. Currently, it is an (incomplete) implementation of the approach10// described in11//12// Practical Dependence Testing13// Goff, Kennedy, Tseng14// PLDI 199115//16// There's a single entry point that analyzes the dependence between a pair17// of memory references in a function, returning either NULL, for no dependence,18// or a more-or-less detailed description of the dependence between them.19//20// Currently, the implementation cannot propagate constraints between21// coupled RDIV subscripts and lacks a multi-subscript MIV test.22// Both of these are conservative weaknesses;23// that is, not a source of correctness problems.24//25// Since Clang linearizes some array subscripts, the dependence26// analysis is using SCEV->delinearize to recover the representation of multiple27// subscripts, and thus avoid the more expensive and less precise MIV tests. The28// delinearization is controlled by the flag -da-delinearize.29//30// We should pay some careful attention to the possibility of integer overflow31// in the implementation of the various tests. This could happen with Add,32// Subtract, or Multiply, with both APInt's and SCEV's.33//34// Some non-linear subscript pairs can be handled by the GCD test35// (and perhaps other tests).36// Should explore how often these things occur.37//38// Finally, it seems like certain test cases expose weaknesses in the SCEV39// simplification, especially in the handling of sign and zero extensions.40// It could be useful to spend time exploring these.41//42// Please note that this is work in progress and the interface is subject to43// change.44//45//===----------------------------------------------------------------------===//46// //47// In memory of Ken Kennedy, 1945 - 2007 //48// //49//===----------------------------------------------------------------------===//5051#include "llvm/Analysis/DependenceAnalysis.h"52#include "llvm/ADT/Statistic.h"53#include "llvm/Analysis/AliasAnalysis.h"54#include "llvm/Analysis/Delinearization.h"55#include "llvm/Analysis/LoopInfo.h"56#include "llvm/Analysis/ScalarEvolution.h"57#include "llvm/Analysis/ScalarEvolutionExpressions.h"58#include "llvm/Analysis/ValueTracking.h"59#include "llvm/IR/InstIterator.h"60#include "llvm/IR/Module.h"61#include "llvm/InitializePasses.h"62#include "llvm/Support/CommandLine.h"63#include "llvm/Support/Debug.h"64#include "llvm/Support/ErrorHandling.h"65#include "llvm/Support/raw_ostream.h"6667using namespace llvm;6869#define DEBUG_TYPE "da"7071//===----------------------------------------------------------------------===//72// statistics7374STATISTIC(TotalArrayPairs, "Array pairs tested");75STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");76STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");77STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");78STATISTIC(ZIVapplications, "ZIV applications");79STATISTIC(ZIVindependence, "ZIV independence");80STATISTIC(StrongSIVapplications, "Strong SIV applications");81STATISTIC(StrongSIVsuccesses, "Strong SIV successes");82STATISTIC(StrongSIVindependence, "Strong SIV independence");83STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");84STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");85STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");86STATISTIC(ExactSIVapplications, "Exact SIV applications");87STATISTIC(ExactSIVsuccesses, "Exact SIV successes");88STATISTIC(ExactSIVindependence, "Exact SIV independence");89STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");90STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");91STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");92STATISTIC(ExactRDIVapplications, "Exact RDIV applications");93STATISTIC(ExactRDIVindependence, "Exact RDIV independence");94STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");95STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");96STATISTIC(DeltaApplications, "Delta applications");97STATISTIC(DeltaSuccesses, "Delta successes");98STATISTIC(DeltaIndependence, "Delta independence");99STATISTIC(DeltaPropagations, "Delta propagations");100STATISTIC(GCDapplications, "GCD applications");101STATISTIC(GCDsuccesses, "GCD successes");102STATISTIC(GCDindependence, "GCD independence");103STATISTIC(BanerjeeApplications, "Banerjee applications");104STATISTIC(BanerjeeIndependence, "Banerjee independence");105STATISTIC(BanerjeeSuccesses, "Banerjee successes");106107static cl::opt<bool>108Delinearize("da-delinearize", cl::init(true), cl::Hidden,109cl::desc("Try to delinearize array references."));110static cl::opt<bool> DisableDelinearizationChecks(111"da-disable-delinearization-checks", cl::Hidden,112cl::desc(113"Disable checks that try to statically verify validity of "114"delinearized subscripts. Enabling this option may result in incorrect "115"dependence vectors for languages that allow the subscript of one "116"dimension to underflow or overflow into another dimension."));117118static cl::opt<unsigned> MIVMaxLevelThreshold(119"da-miv-max-level-threshold", cl::init(7), cl::Hidden,120cl::desc("Maximum depth allowed for the recursive algorithm used to "121"explore MIV direction vectors."));122123//===----------------------------------------------------------------------===//124// basics125126DependenceAnalysis::Result127DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {128auto &AA = FAM.getResult<AAManager>(F);129auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);130auto &LI = FAM.getResult<LoopAnalysis>(F);131return DependenceInfo(&F, &AA, &SE, &LI);132}133134AnalysisKey DependenceAnalysis::Key;135136INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",137"Dependence Analysis", true, true)138INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)139INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)140INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)141INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis",142true, true)143144char DependenceAnalysisWrapperPass::ID = 0;145146DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass()147: FunctionPass(ID) {148initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());149}150151FunctionPass *llvm::createDependenceAnalysisWrapperPass() {152return new DependenceAnalysisWrapperPass();153}154155bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) {156auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();157auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();158auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();159info.reset(new DependenceInfo(&F, &AA, &SE, &LI));160return false;161}162163DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; }164165void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); }166167void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {168AU.setPreservesAll();169AU.addRequiredTransitive<AAResultsWrapperPass>();170AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();171AU.addRequiredTransitive<LoopInfoWrapperPass>();172}173174// Used to test the dependence analyzer.175// Looks through the function, noting instructions that may access memory.176// Calls depends() on every possible pair and prints out the result.177// Ignores all other instructions.178static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,179ScalarEvolution &SE, bool NormalizeResults) {180auto *F = DA->getFunction();181for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE;182++SrcI) {183if (SrcI->mayReadOrWriteMemory()) {184for (inst_iterator DstI = SrcI, DstE = inst_end(F);185DstI != DstE; ++DstI) {186if (DstI->mayReadOrWriteMemory()) {187OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";188OS << " da analyze - ";189if (auto D = DA->depends(&*SrcI, &*DstI, true)) {190// Normalize negative direction vectors if required by clients.191if (NormalizeResults && D->normalize(&SE))192OS << "normalized - ";193D->dump(OS);194for (unsigned Level = 1; Level <= D->getLevels(); Level++) {195if (D->isSplitable(Level)) {196OS << " da analyze - split level = " << Level;197OS << ", iteration = " << *DA->getSplitIteration(*D, Level);198OS << "!\n";199}200}201}202else203OS << "none!\n";204}205}206}207}208}209210void DependenceAnalysisWrapperPass::print(raw_ostream &OS,211const Module *) const {212dumpExampleDependence(OS, info.get(),213getAnalysis<ScalarEvolutionWrapperPass>().getSE(), false);214}215216PreservedAnalyses217DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {218OS << "'Dependence Analysis' for function '" << F.getName() << "':\n";219dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F),220FAM.getResult<ScalarEvolutionAnalysis>(F),221NormalizeResults);222return PreservedAnalyses::all();223}224225//===----------------------------------------------------------------------===//226// Dependence methods227228// Returns true if this is an input dependence.229bool Dependence::isInput() const {230return Src->mayReadFromMemory() && Dst->mayReadFromMemory();231}232233234// Returns true if this is an output dependence.235bool Dependence::isOutput() const {236return Src->mayWriteToMemory() && Dst->mayWriteToMemory();237}238239240// Returns true if this is an flow (aka true) dependence.241bool Dependence::isFlow() const {242return Src->mayWriteToMemory() && Dst->mayReadFromMemory();243}244245246// Returns true if this is an anti dependence.247bool Dependence::isAnti() const {248return Src->mayReadFromMemory() && Dst->mayWriteToMemory();249}250251252// Returns true if a particular level is scalar; that is,253// if no subscript in the source or destination mention the induction254// variable associated with the loop at this level.255// Leave this out of line, so it will serve as a virtual method anchor256bool Dependence::isScalar(unsigned level) const {257return false;258}259260261//===----------------------------------------------------------------------===//262// FullDependence methods263264FullDependence::FullDependence(Instruction *Source, Instruction *Destination,265bool PossiblyLoopIndependent,266unsigned CommonLevels)267: Dependence(Source, Destination), Levels(CommonLevels),268LoopIndependent(PossiblyLoopIndependent) {269Consistent = true;270if (CommonLevels)271DV = std::make_unique<DVEntry[]>(CommonLevels);272}273274// FIXME: in some cases the meaning of a negative direction vector275// may not be straightforward, e.g.,276// for (int i = 0; i < 32; ++i) {277// Src: A[i] = ...;278// Dst: use(A[31 - i]);279// }280// The dependency is281// flow { Src[i] -> Dst[31 - i] : when i >= 16 } and282// anti { Dst[i] -> Src[31 - i] : when i < 16 },283// -- hence a [<>].284// As long as a dependence result contains '>' ('<>', '<=>', "*"), it285// means that a reversed/normalized dependence needs to be considered286// as well. Nevertheless, current isDirectionNegative() only returns287// true with a '>' or '>=' dependency for ease of canonicalizing the288// dependency vector, since the reverse of '<>', '<=>' and "*" is itself.289bool FullDependence::isDirectionNegative() const {290for (unsigned Level = 1; Level <= Levels; ++Level) {291unsigned char Direction = DV[Level - 1].Direction;292if (Direction == Dependence::DVEntry::EQ)293continue;294if (Direction == Dependence::DVEntry::GT ||295Direction == Dependence::DVEntry::GE)296return true;297return false;298}299return false;300}301302bool FullDependence::normalize(ScalarEvolution *SE) {303if (!isDirectionNegative())304return false;305306LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";307dump(dbgs()););308std::swap(Src, Dst);309for (unsigned Level = 1; Level <= Levels; ++Level) {310unsigned char Direction = DV[Level - 1].Direction;311// Reverse the direction vector, this means LT becomes GT312// and GT becomes LT.313unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;314if (Direction & Dependence::DVEntry::LT)315RevDirection |= Dependence::DVEntry::GT;316if (Direction & Dependence::DVEntry::GT)317RevDirection |= Dependence::DVEntry::LT;318DV[Level - 1].Direction = RevDirection;319// Reverse the dependence distance as well.320if (DV[Level - 1].Distance != nullptr)321DV[Level - 1].Distance =322SE->getNegativeSCEV(DV[Level - 1].Distance);323}324325LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";326dump(dbgs()););327return true;328}329330// The rest are simple getters that hide the implementation.331332// getDirection - Returns the direction associated with a particular level.333unsigned FullDependence::getDirection(unsigned Level) const {334assert(0 < Level && Level <= Levels && "Level out of range");335return DV[Level - 1].Direction;336}337338339// Returns the distance (or NULL) associated with a particular level.340const SCEV *FullDependence::getDistance(unsigned Level) const {341assert(0 < Level && Level <= Levels && "Level out of range");342return DV[Level - 1].Distance;343}344345346// Returns true if a particular level is scalar; that is,347// if no subscript in the source or destination mention the induction348// variable associated with the loop at this level.349bool FullDependence::isScalar(unsigned Level) const {350assert(0 < Level && Level <= Levels && "Level out of range");351return DV[Level - 1].Scalar;352}353354355// Returns true if peeling the first iteration from this loop356// will break this dependence.357bool FullDependence::isPeelFirst(unsigned Level) const {358assert(0 < Level && Level <= Levels && "Level out of range");359return DV[Level - 1].PeelFirst;360}361362363// Returns true if peeling the last iteration from this loop364// will break this dependence.365bool FullDependence::isPeelLast(unsigned Level) const {366assert(0 < Level && Level <= Levels && "Level out of range");367return DV[Level - 1].PeelLast;368}369370371// Returns true if splitting this loop will break the dependence.372bool FullDependence::isSplitable(unsigned Level) const {373assert(0 < Level && Level <= Levels && "Level out of range");374return DV[Level - 1].Splitable;375}376377378//===----------------------------------------------------------------------===//379// DependenceInfo::Constraint methods380381// If constraint is a point <X, Y>, returns X.382// Otherwise assert.383const SCEV *DependenceInfo::Constraint::getX() const {384assert(Kind == Point && "Kind should be Point");385return A;386}387388389// If constraint is a point <X, Y>, returns Y.390// Otherwise assert.391const SCEV *DependenceInfo::Constraint::getY() const {392assert(Kind == Point && "Kind should be Point");393return B;394}395396397// If constraint is a line AX + BY = C, returns A.398// Otherwise assert.399const SCEV *DependenceInfo::Constraint::getA() const {400assert((Kind == Line || Kind == Distance) &&401"Kind should be Line (or Distance)");402return A;403}404405406// If constraint is a line AX + BY = C, returns B.407// Otherwise assert.408const SCEV *DependenceInfo::Constraint::getB() const {409assert((Kind == Line || Kind == Distance) &&410"Kind should be Line (or Distance)");411return B;412}413414415// If constraint is a line AX + BY = C, returns C.416// Otherwise assert.417const SCEV *DependenceInfo::Constraint::getC() const {418assert((Kind == Line || Kind == Distance) &&419"Kind should be Line (or Distance)");420return C;421}422423424// If constraint is a distance, returns D.425// Otherwise assert.426const SCEV *DependenceInfo::Constraint::getD() const {427assert(Kind == Distance && "Kind should be Distance");428return SE->getNegativeSCEV(C);429}430431432// Returns the loop associated with this constraint.433const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {434assert((Kind == Distance || Kind == Line || Kind == Point) &&435"Kind should be Distance, Line, or Point");436return AssociatedLoop;437}438439void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,440const Loop *CurLoop) {441Kind = Point;442A = X;443B = Y;444AssociatedLoop = CurLoop;445}446447void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,448const SCEV *CC, const Loop *CurLoop) {449Kind = Line;450A = AA;451B = BB;452C = CC;453AssociatedLoop = CurLoop;454}455456void DependenceInfo::Constraint::setDistance(const SCEV *D,457const Loop *CurLoop) {458Kind = Distance;459A = SE->getOne(D->getType());460B = SE->getNegativeSCEV(A);461C = SE->getNegativeSCEV(D);462AssociatedLoop = CurLoop;463}464465void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }466467void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {468SE = NewSE;469Kind = Any;470}471472#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)473// For debugging purposes. Dumps the constraint out to OS.474LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {475if (isEmpty())476OS << " Empty\n";477else if (isAny())478OS << " Any\n";479else if (isPoint())480OS << " Point is <" << *getX() << ", " << *getY() << ">\n";481else if (isDistance())482OS << " Distance is " << *getD() <<483" (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";484else if (isLine())485OS << " Line is " << *getA() << "*X + " <<486*getB() << "*Y = " << *getC() << "\n";487else488llvm_unreachable("unknown constraint type in Constraint::dump");489}490#endif491492493// Updates X with the intersection494// of the Constraints X and Y. Returns true if X has changed.495// Corresponds to Figure 4 from the paper496//497// Practical Dependence Testing498// Goff, Kennedy, Tseng499// PLDI 1991500bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {501++DeltaApplications;502LLVM_DEBUG(dbgs() << "\tintersect constraints\n");503LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs()));504LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs()));505assert(!Y->isPoint() && "Y must not be a Point");506if (X->isAny()) {507if (Y->isAny())508return false;509*X = *Y;510return true;511}512if (X->isEmpty())513return false;514if (Y->isEmpty()) {515X->setEmpty();516return true;517}518519if (X->isDistance() && Y->isDistance()) {520LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n");521if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))522return false;523if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {524X->setEmpty();525++DeltaSuccesses;526return true;527}528// Hmmm, interesting situation.529// I guess if either is constant, keep it and ignore the other.530if (isa<SCEVConstant>(Y->getD())) {531*X = *Y;532return true;533}534return false;535}536537// At this point, the pseudo-code in Figure 4 of the paper538// checks if (X->isPoint() && Y->isPoint()).539// This case can't occur in our implementation,540// since a Point can only arise as the result of intersecting541// two Line constraints, and the right-hand value, Y, is never542// the result of an intersection.543assert(!(X->isPoint() && Y->isPoint()) &&544"We shouldn't ever see X->isPoint() && Y->isPoint()");545546if (X->isLine() && Y->isLine()) {547LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n");548const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());549const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());550if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {551// slopes are equal, so lines are parallel552LLVM_DEBUG(dbgs() << "\t\tsame slope\n");553Prod1 = SE->getMulExpr(X->getC(), Y->getB());554Prod2 = SE->getMulExpr(X->getB(), Y->getC());555if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))556return false;557if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {558X->setEmpty();559++DeltaSuccesses;560return true;561}562return false;563}564if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {565// slopes differ, so lines intersect566LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n");567const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());568const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());569const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());570const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());571const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());572const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());573const SCEVConstant *C1A2_C2A1 =574dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));575const SCEVConstant *C1B2_C2B1 =576dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));577const SCEVConstant *A1B2_A2B1 =578dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));579const SCEVConstant *A2B1_A1B2 =580dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));581if (!C1B2_C2B1 || !C1A2_C2A1 ||582!A1B2_A2B1 || !A2B1_A1B2)583return false;584APInt Xtop = C1B2_C2B1->getAPInt();585APInt Xbot = A1B2_A2B1->getAPInt();586APInt Ytop = C1A2_C2A1->getAPInt();587APInt Ybot = A2B1_A1B2->getAPInt();588LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");589LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");590LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");591LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");592APInt Xq = Xtop; // these need to be initialized, even593APInt Xr = Xtop; // though they're just going to be overwritten594APInt::sdivrem(Xtop, Xbot, Xq, Xr);595APInt Yq = Ytop;596APInt Yr = Ytop;597APInt::sdivrem(Ytop, Ybot, Yq, Yr);598if (Xr != 0 || Yr != 0) {599X->setEmpty();600++DeltaSuccesses;601return true;602}603LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");604if (Xq.slt(0) || Yq.slt(0)) {605X->setEmpty();606++DeltaSuccesses;607return true;608}609if (const SCEVConstant *CUB =610collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {611const APInt &UpperBound = CUB->getAPInt();612LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");613if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {614X->setEmpty();615++DeltaSuccesses;616return true;617}618}619X->setPoint(SE->getConstant(Xq),620SE->getConstant(Yq),621X->getAssociatedLoop());622++DeltaSuccesses;623return true;624}625return false;626}627628// if (X->isLine() && Y->isPoint()) This case can't occur.629assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");630631if (X->isPoint() && Y->isLine()) {632LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n");633const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());634const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());635const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);636if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))637return false;638if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {639X->setEmpty();640++DeltaSuccesses;641return true;642}643return false;644}645646llvm_unreachable("shouldn't reach the end of Constraint intersection");647return false;648}649650651//===----------------------------------------------------------------------===//652// DependenceInfo methods653654// For debugging purposes. Dumps a dependence to OS.655void Dependence::dump(raw_ostream &OS) const {656bool Splitable = false;657if (isConfused())658OS << "confused";659else {660if (isConsistent())661OS << "consistent ";662if (isFlow())663OS << "flow";664else if (isOutput())665OS << "output";666else if (isAnti())667OS << "anti";668else if (isInput())669OS << "input";670unsigned Levels = getLevels();671OS << " [";672for (unsigned II = 1; II <= Levels; ++II) {673if (isSplitable(II))674Splitable = true;675if (isPeelFirst(II))676OS << 'p';677const SCEV *Distance = getDistance(II);678if (Distance)679OS << *Distance;680else if (isScalar(II))681OS << "S";682else {683unsigned Direction = getDirection(II);684if (Direction == DVEntry::ALL)685OS << "*";686else {687if (Direction & DVEntry::LT)688OS << "<";689if (Direction & DVEntry::EQ)690OS << "=";691if (Direction & DVEntry::GT)692OS << ">";693}694}695if (isPeelLast(II))696OS << 'p';697if (II < Levels)698OS << " ";699}700if (isLoopIndependent())701OS << "|<";702OS << "]";703if (Splitable)704OS << " splitable";705}706OS << "!\n";707}708709// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their710// underlaying objects. If LocA and LocB are known to not alias (for any reason:711// tbaa, non-overlapping regions etc), then it is known there is no dependecy.712// Otherwise the underlying objects are checked to see if they point to713// different identifiable objects.714static AliasResult underlyingObjectsAlias(AAResults *AA,715const DataLayout &DL,716const MemoryLocation &LocA,717const MemoryLocation &LocB) {718// Check the original locations (minus size) for noalias, which can happen for719// tbaa, incompatible underlying object locations, etc.720MemoryLocation LocAS =721MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags);722MemoryLocation LocBS =723MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags);724if (AA->isNoAlias(LocAS, LocBS))725return AliasResult::NoAlias;726727// Check the underlying objects are the same728const Value *AObj = getUnderlyingObject(LocA.Ptr);729const Value *BObj = getUnderlyingObject(LocB.Ptr);730731// If the underlying objects are the same, they must alias732if (AObj == BObj)733return AliasResult::MustAlias;734735// We may have hit the recursion limit for underlying objects, or have736// underlying objects where we don't know they will alias.737if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj))738return AliasResult::MayAlias;739740// Otherwise we know the objects are different and both identified objects so741// must not alias.742return AliasResult::NoAlias;743}744745746// Returns true if the load or store can be analyzed. Atomic and volatile747// operations have properties which this analysis does not understand.748static749bool isLoadOrStore(const Instruction *I) {750if (const LoadInst *LI = dyn_cast<LoadInst>(I))751return LI->isUnordered();752else if (const StoreInst *SI = dyn_cast<StoreInst>(I))753return SI->isUnordered();754return false;755}756757758// Examines the loop nesting of the Src and Dst759// instructions and establishes their shared loops. Sets the variables760// CommonLevels, SrcLevels, and MaxLevels.761// The source and destination instructions needn't be contained in the same762// loop. The routine establishNestingLevels finds the level of most deeply763// nested loop that contains them both, CommonLevels. An instruction that's764// not contained in a loop is at level = 0. MaxLevels is equal to the level765// of the source plus the level of the destination, minus CommonLevels.766// This lets us allocate vectors MaxLevels in length, with room for every767// distinct loop referenced in both the source and destination subscripts.768// The variable SrcLevels is the nesting depth of the source instruction.769// It's used to help calculate distinct loops referenced by the destination.770// Here's the map from loops to levels:771// 0 - unused772// 1 - outermost common loop773// ... - other common loops774// CommonLevels - innermost common loop775// ... - loops containing Src but not Dst776// SrcLevels - innermost loop containing Src but not Dst777// ... - loops containing Dst but not Src778// MaxLevels - innermost loops containing Dst but not Src779// Consider the follow code fragment:780// for (a = ...) {781// for (b = ...) {782// for (c = ...) {783// for (d = ...) {784// A[] = ...;785// }786// }787// for (e = ...) {788// for (f = ...) {789// for (g = ...) {790// ... = A[];791// }792// }793// }794// }795// }796// If we're looking at the possibility of a dependence between the store797// to A (the Src) and the load from A (the Dst), we'll note that they798// have 2 loops in common, so CommonLevels will equal 2 and the direction799// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.800// A map from loop names to loop numbers would look like801// a - 1802// b - 2 = CommonLevels803// c - 3804// d - 4 = SrcLevels805// e - 5806// f - 6807// g - 7 = MaxLevels808void DependenceInfo::establishNestingLevels(const Instruction *Src,809const Instruction *Dst) {810const BasicBlock *SrcBlock = Src->getParent();811const BasicBlock *DstBlock = Dst->getParent();812unsigned SrcLevel = LI->getLoopDepth(SrcBlock);813unsigned DstLevel = LI->getLoopDepth(DstBlock);814const Loop *SrcLoop = LI->getLoopFor(SrcBlock);815const Loop *DstLoop = LI->getLoopFor(DstBlock);816SrcLevels = SrcLevel;817MaxLevels = SrcLevel + DstLevel;818while (SrcLevel > DstLevel) {819SrcLoop = SrcLoop->getParentLoop();820SrcLevel--;821}822while (DstLevel > SrcLevel) {823DstLoop = DstLoop->getParentLoop();824DstLevel--;825}826while (SrcLoop != DstLoop) {827SrcLoop = SrcLoop->getParentLoop();828DstLoop = DstLoop->getParentLoop();829SrcLevel--;830}831CommonLevels = SrcLevel;832MaxLevels -= CommonLevels;833}834835836// Given one of the loops containing the source, return837// its level index in our numbering scheme.838unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {839return SrcLoop->getLoopDepth();840}841842843// Given one of the loops containing the destination,844// return its level index in our numbering scheme.845unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const {846unsigned D = DstLoop->getLoopDepth();847if (D > CommonLevels)848// This tries to make sure that we assign unique numbers to src and dst when849// the memory accesses reside in different loops that have the same depth.850return D - CommonLevels + SrcLevels;851else852return D;853}854855856// Returns true if Expression is loop invariant in LoopNest.857bool DependenceInfo::isLoopInvariant(const SCEV *Expression,858const Loop *LoopNest) const {859// Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of860// any loop as invariant, because we only consier expression evaluation at a861// specific position (where the array access takes place), and not across the862// entire function.863if (!LoopNest)864return true;865866// If the expression is invariant in the outermost loop of the loop nest, it867// is invariant anywhere in the loop nest.868return SE->isLoopInvariant(Expression, LoopNest->getOutermostLoop());869}870871872873// Finds the set of loops from the LoopNest that874// have a level <= CommonLevels and are referred to by the SCEV Expression.875void DependenceInfo::collectCommonLoops(const SCEV *Expression,876const Loop *LoopNest,877SmallBitVector &Loops) const {878while (LoopNest) {879unsigned Level = LoopNest->getLoopDepth();880if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))881Loops.set(Level);882LoopNest = LoopNest->getParentLoop();883}884}885886void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) {887888unsigned widestWidthSeen = 0;889Type *widestType;890891// Go through each pair and find the widest bit to which we need892// to extend all of them.893for (Subscript *Pair : Pairs) {894const SCEV *Src = Pair->Src;895const SCEV *Dst = Pair->Dst;896IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());897IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());898if (SrcTy == nullptr || DstTy == nullptr) {899assert(SrcTy == DstTy && "This function only unify integer types and "900"expect Src and Dst share the same type "901"otherwise.");902continue;903}904if (SrcTy->getBitWidth() > widestWidthSeen) {905widestWidthSeen = SrcTy->getBitWidth();906widestType = SrcTy;907}908if (DstTy->getBitWidth() > widestWidthSeen) {909widestWidthSeen = DstTy->getBitWidth();910widestType = DstTy;911}912}913914915assert(widestWidthSeen > 0);916917// Now extend each pair to the widest seen.918for (Subscript *Pair : Pairs) {919const SCEV *Src = Pair->Src;920const SCEV *Dst = Pair->Dst;921IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());922IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());923if (SrcTy == nullptr || DstTy == nullptr) {924assert(SrcTy == DstTy && "This function only unify integer types and "925"expect Src and Dst share the same type "926"otherwise.");927continue;928}929if (SrcTy->getBitWidth() < widestWidthSeen)930// Sign-extend Src to widestType931Pair->Src = SE->getSignExtendExpr(Src, widestType);932if (DstTy->getBitWidth() < widestWidthSeen) {933// Sign-extend Dst to widestType934Pair->Dst = SE->getSignExtendExpr(Dst, widestType);935}936}937}938939// removeMatchingExtensions - Examines a subscript pair.940// If the source and destination are identically sign (or zero)941// extended, it strips off the extension in an effect to simplify942// the actual analysis.943void DependenceInfo::removeMatchingExtensions(Subscript *Pair) {944const SCEV *Src = Pair->Src;945const SCEV *Dst = Pair->Dst;946if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||947(isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {948const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src);949const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst);950const SCEV *SrcCastOp = SrcCast->getOperand();951const SCEV *DstCastOp = DstCast->getOperand();952if (SrcCastOp->getType() == DstCastOp->getType()) {953Pair->Src = SrcCastOp;954Pair->Dst = DstCastOp;955}956}957}958959// Examine the scev and return true iff it's affine.960// Collect any loops mentioned in the set of "Loops".961bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,962SmallBitVector &Loops, bool IsSrc) {963const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);964if (!AddRec)965return isLoopInvariant(Expr, LoopNest);966967// The AddRec must depend on one of the containing loops. Otherwise,968// mapSrcLoop and mapDstLoop return indices outside the intended range. This969// can happen when a subscript in one loop references an IV from a sibling970// loop that could not be replaced with a concrete exit value by971// getSCEVAtScope.972const Loop *L = LoopNest;973while (L && AddRec->getLoop() != L)974L = L->getParentLoop();975if (!L)976return false;977978const SCEV *Start = AddRec->getStart();979const SCEV *Step = AddRec->getStepRecurrence(*SE);980const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());981if (!isa<SCEVCouldNotCompute>(UB)) {982if (SE->getTypeSizeInBits(Start->getType()) <983SE->getTypeSizeInBits(UB->getType())) {984if (!AddRec->getNoWrapFlags())985return false;986}987}988if (!isLoopInvariant(Step, LoopNest))989return false;990if (IsSrc)991Loops.set(mapSrcLoop(AddRec->getLoop()));992else993Loops.set(mapDstLoop(AddRec->getLoop()));994return checkSubscript(Start, LoopNest, Loops, IsSrc);995}996997// Examine the scev and return true iff it's linear.998// Collect any loops mentioned in the set of "Loops".999bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest,1000SmallBitVector &Loops) {1001return checkSubscript(Src, LoopNest, Loops, true);1002}10031004// Examine the scev and return true iff it's linear.1005// Collect any loops mentioned in the set of "Loops".1006bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest,1007SmallBitVector &Loops) {1008return checkSubscript(Dst, LoopNest, Loops, false);1009}101010111012// Examines the subscript pair (the Src and Dst SCEVs)1013// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.1014// Collects the associated loops in a set.1015DependenceInfo::Subscript::ClassificationKind1016DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,1017const SCEV *Dst, const Loop *DstLoopNest,1018SmallBitVector &Loops) {1019SmallBitVector SrcLoops(MaxLevels + 1);1020SmallBitVector DstLoops(MaxLevels + 1);1021if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))1022return Subscript::NonLinear;1023if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))1024return Subscript::NonLinear;1025Loops = SrcLoops;1026Loops |= DstLoops;1027unsigned N = Loops.count();1028if (N == 0)1029return Subscript::ZIV;1030if (N == 1)1031return Subscript::SIV;1032if (N == 2 && (SrcLoops.count() == 0 ||1033DstLoops.count() == 0 ||1034(SrcLoops.count() == 1 && DstLoops.count() == 1)))1035return Subscript::RDIV;1036return Subscript::MIV;1037}103810391040// A wrapper around SCEV::isKnownPredicate.1041// Looks for cases where we're interested in comparing for equality.1042// If both X and Y have been identically sign or zero extended,1043// it strips off the (confusing) extensions before invoking1044// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package1045// will be similarly updated.1046//1047// If SCEV::isKnownPredicate can't prove the predicate,1048// we try simple subtraction, which seems to help in some cases1049// involving symbolics.1050bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X,1051const SCEV *Y) const {1052if (Pred == CmpInst::ICMP_EQ ||1053Pred == CmpInst::ICMP_NE) {1054if ((isa<SCEVSignExtendExpr>(X) &&1055isa<SCEVSignExtendExpr>(Y)) ||1056(isa<SCEVZeroExtendExpr>(X) &&1057isa<SCEVZeroExtendExpr>(Y))) {1058const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X);1059const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y);1060const SCEV *Xop = CX->getOperand();1061const SCEV *Yop = CY->getOperand();1062if (Xop->getType() == Yop->getType()) {1063X = Xop;1064Y = Yop;1065}1066}1067}1068if (SE->isKnownPredicate(Pred, X, Y))1069return true;1070// If SE->isKnownPredicate can't prove the condition,1071// we try the brute-force approach of subtracting1072// and testing the difference.1073// By testing with SE->isKnownPredicate first, we avoid1074// the possibility of overflow when the arguments are constants.1075const SCEV *Delta = SE->getMinusSCEV(X, Y);1076switch (Pred) {1077case CmpInst::ICMP_EQ:1078return Delta->isZero();1079case CmpInst::ICMP_NE:1080return SE->isKnownNonZero(Delta);1081case CmpInst::ICMP_SGE:1082return SE->isKnownNonNegative(Delta);1083case CmpInst::ICMP_SLE:1084return SE->isKnownNonPositive(Delta);1085case CmpInst::ICMP_SGT:1086return SE->isKnownPositive(Delta);1087case CmpInst::ICMP_SLT:1088return SE->isKnownNegative(Delta);1089default:1090llvm_unreachable("unexpected predicate in isKnownPredicate");1091}1092}10931094/// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1))1095/// with some extra checking if S is an AddRec and we can prove less-than using1096/// the loop bounds.1097bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const {1098// First unify to the same type1099auto *SType = dyn_cast<IntegerType>(S->getType());1100auto *SizeType = dyn_cast<IntegerType>(Size->getType());1101if (!SType || !SizeType)1102return false;1103Type *MaxType =1104(SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;1105S = SE->getTruncateOrZeroExtend(S, MaxType);1106Size = SE->getTruncateOrZeroExtend(Size, MaxType);11071108// Special check for addrecs using BE taken count1109const SCEV *Bound = SE->getMinusSCEV(S, Size);1110if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) {1111if (AddRec->isAffine()) {1112const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop());1113if (!isa<SCEVCouldNotCompute>(BECount)) {1114const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE);1115if (SE->isKnownNegative(Limit))1116return true;1117}1118}1119}11201121// Check using normal isKnownNegative1122const SCEV *LimitedBound =1123SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType())));1124return SE->isKnownNegative(LimitedBound);1125}11261127bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const {1128bool Inbounds = false;1129if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr))1130Inbounds = SrcGEP->isInBounds();1131if (Inbounds) {1132if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {1133if (AddRec->isAffine()) {1134// We know S is for Ptr, the operand on a load/store, so doesn't wrap.1135// If both parts are NonNegative, the end result will be NonNegative1136if (SE->isKnownNonNegative(AddRec->getStart()) &&1137SE->isKnownNonNegative(AddRec->getOperand(1)))1138return true;1139}1140}1141}11421143return SE->isKnownNonNegative(S);1144}11451146// All subscripts are all the same type.1147// Loop bound may be smaller (e.g., a char).1148// Should zero extend loop bound, since it's always >= 0.1149// This routine collects upper bound and extends or truncates if needed.1150// Truncating is safe when subscripts are known not to wrap. Cases without1151// nowrap flags should have been rejected earlier.1152// Return null if no bound available.1153const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const {1154if (SE->hasLoopInvariantBackedgeTakenCount(L)) {1155const SCEV *UB = SE->getBackedgeTakenCount(L);1156return SE->getTruncateOrZeroExtend(UB, T);1157}1158return nullptr;1159}116011611162// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.1163// If the cast fails, returns NULL.1164const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L,1165Type *T) const {1166if (const SCEV *UB = collectUpperBound(L, T))1167return dyn_cast<SCEVConstant>(UB);1168return nullptr;1169}117011711172// testZIV -1173// When we have a pair of subscripts of the form [c1] and [c2],1174// where c1 and c2 are both loop invariant, we attack it using1175// the ZIV test. Basically, we test by comparing the two values,1176// but there are actually three possible results:1177// 1) the values are equal, so there's a dependence1178// 2) the values are different, so there's no dependence1179// 3) the values might be equal, so we have to assume a dependence.1180//1181// Return true if dependence disproved.1182bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,1183FullDependence &Result) const {1184LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");1185LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");1186++ZIVapplications;1187if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {1188LLVM_DEBUG(dbgs() << " provably dependent\n");1189return false; // provably dependent1190}1191if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {1192LLVM_DEBUG(dbgs() << " provably independent\n");1193++ZIVindependence;1194return true; // provably independent1195}1196LLVM_DEBUG(dbgs() << " possibly dependent\n");1197Result.Consistent = false;1198return false; // possibly dependent1199}120012011202// strongSIVtest -1203// From the paper, Practical Dependence Testing, Section 4.2.11204//1205// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],1206// where i is an induction variable, c1 and c2 are loop invariant,1207// and a is a constant, we can solve it exactly using the Strong SIV test.1208//1209// Can prove independence. Failing that, can compute distance (and direction).1210// In the presence of symbolic terms, we can sometimes make progress.1211//1212// If there's a dependence,1213//1214// c1 + a*i = c2 + a*i'1215//1216// The dependence distance is1217//1218// d = i' - i = (c1 - c2)/a1219//1220// A dependence only exists if d is an integer and abs(d) <= U, where U is the1221// loop's upper bound. If a dependence exists, the dependence direction is1222// defined as1223//1224// { < if d > 01225// direction = { = if d = 01226// { > if d < 01227//1228// Return true if dependence disproved.1229bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,1230const SCEV *DstConst, const Loop *CurLoop,1231unsigned Level, FullDependence &Result,1232Constraint &NewConstraint) const {1233LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");1234LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff);1235LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");1236LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst);1237LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");1238LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst);1239LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");1240++StrongSIVapplications;1241assert(0 < Level && Level <= CommonLevels && "level out of range");1242Level--;12431244const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);1245LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta);1246LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");12471248// check that |Delta| < iteration count1249if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {1250LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound);1251LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");1252const SCEV *AbsDelta =1253SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);1254const SCEV *AbsCoeff =1255SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);1256const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);1257if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {1258// Distance greater than trip count - no dependence1259++StrongSIVindependence;1260++StrongSIVsuccesses;1261return true;1262}1263}12641265// Can we compute distance?1266if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {1267APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();1268APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();1269APInt Distance = ConstDelta; // these need to be initialized1270APInt Remainder = ConstDelta;1271APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);1272LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");1273LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");1274// Make sure Coeff divides Delta exactly1275if (Remainder != 0) {1276// Coeff doesn't divide Distance, no dependence1277++StrongSIVindependence;1278++StrongSIVsuccesses;1279return true;1280}1281Result.DV[Level].Distance = SE->getConstant(Distance);1282NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);1283if (Distance.sgt(0))1284Result.DV[Level].Direction &= Dependence::DVEntry::LT;1285else if (Distance.slt(0))1286Result.DV[Level].Direction &= Dependence::DVEntry::GT;1287else1288Result.DV[Level].Direction &= Dependence::DVEntry::EQ;1289++StrongSIVsuccesses;1290}1291else if (Delta->isZero()) {1292// since 0/X == 01293Result.DV[Level].Distance = Delta;1294NewConstraint.setDistance(Delta, CurLoop);1295Result.DV[Level].Direction &= Dependence::DVEntry::EQ;1296++StrongSIVsuccesses;1297}1298else {1299if (Coeff->isOne()) {1300LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n");1301Result.DV[Level].Distance = Delta; // since X/1 == X1302NewConstraint.setDistance(Delta, CurLoop);1303}1304else {1305Result.Consistent = false;1306NewConstraint.setLine(Coeff,1307SE->getNegativeSCEV(Coeff),1308SE->getNegativeSCEV(Delta), CurLoop);1309}13101311// maybe we can get a useful direction1312bool DeltaMaybeZero = !SE->isKnownNonZero(Delta);1313bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);1314bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);1315bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);1316bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);1317// The double negatives above are confusing.1318// It helps to read !SE->isKnownNonZero(Delta)1319// as "Delta might be Zero"1320unsigned NewDirection = Dependence::DVEntry::NONE;1321if ((DeltaMaybePositive && CoeffMaybePositive) ||1322(DeltaMaybeNegative && CoeffMaybeNegative))1323NewDirection = Dependence::DVEntry::LT;1324if (DeltaMaybeZero)1325NewDirection |= Dependence::DVEntry::EQ;1326if ((DeltaMaybeNegative && CoeffMaybePositive) ||1327(DeltaMaybePositive && CoeffMaybeNegative))1328NewDirection |= Dependence::DVEntry::GT;1329if (NewDirection < Result.DV[Level].Direction)1330++StrongSIVsuccesses;1331Result.DV[Level].Direction &= NewDirection;1332}1333return false;1334}133513361337// weakCrossingSIVtest -1338// From the paper, Practical Dependence Testing, Section 4.2.21339//1340// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],1341// where i is an induction variable, c1 and c2 are loop invariant,1342// and a is a constant, we can solve it exactly using the1343// Weak-Crossing SIV test.1344//1345// Given c1 + a*i = c2 - a*i', we can look for the intersection of1346// the two lines, where i = i', yielding1347//1348// c1 + a*i = c2 - a*i1349// 2a*i = c2 - c11350// i = (c2 - c1)/2a1351//1352// If i < 0, there is no dependence.1353// If i > upperbound, there is no dependence.1354// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.1355// If i = upperbound, there's a dependence with distance = 0.1356// If i is integral, there's a dependence (all directions).1357// If the non-integer part = 1/2, there's a dependence (<> directions).1358// Otherwise, there's no dependence.1359//1360// Can prove independence. Failing that,1361// can sometimes refine the directions.1362// Can determine iteration for splitting.1363//1364// Return true if dependence disproved.1365bool DependenceInfo::weakCrossingSIVtest(1366const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,1367const Loop *CurLoop, unsigned Level, FullDependence &Result,1368Constraint &NewConstraint, const SCEV *&SplitIter) const {1369LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");1370LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n");1371LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");1372LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");1373++WeakCrossingSIVapplications;1374assert(0 < Level && Level <= CommonLevels && "Level out of range");1375Level--;1376Result.Consistent = false;1377const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);1378LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");1379NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);1380if (Delta->isZero()) {1381Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;1382Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;1383++WeakCrossingSIVsuccesses;1384if (!Result.DV[Level].Direction) {1385++WeakCrossingSIVindependence;1386return true;1387}1388Result.DV[Level].Distance = Delta; // = 01389return false;1390}1391const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);1392if (!ConstCoeff)1393return false;13941395Result.DV[Level].Splitable = true;1396if (SE->isKnownNegative(ConstCoeff)) {1397ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));1398assert(ConstCoeff &&1399"dynamic cast of negative of ConstCoeff should yield constant");1400Delta = SE->getNegativeSCEV(Delta);1401}1402assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");14031404// compute SplitIter for use by DependenceInfo::getSplitIteration()1405SplitIter = SE->getUDivExpr(1406SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),1407SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));1408LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n");14091410const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);1411if (!ConstDelta)1412return false;14131414// We're certain that ConstCoeff > 0; therefore,1415// if Delta < 0, then no dependence.1416LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");1417LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n");1418if (SE->isKnownNegative(Delta)) {1419// No dependence, Delta < 01420++WeakCrossingSIVindependence;1421++WeakCrossingSIVsuccesses;1422return true;1423}14241425// We're certain that Delta > 0 and ConstCoeff > 0.1426// Check Delta/(2*ConstCoeff) against upper loop bound1427if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {1428LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");1429const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);1430const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),1431ConstantTwo);1432LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n");1433if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {1434// Delta too big, no dependence1435++WeakCrossingSIVindependence;1436++WeakCrossingSIVsuccesses;1437return true;1438}1439if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {1440// i = i' = UB1441Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;1442Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;1443++WeakCrossingSIVsuccesses;1444if (!Result.DV[Level].Direction) {1445++WeakCrossingSIVindependence;1446return true;1447}1448Result.DV[Level].Splitable = false;1449Result.DV[Level].Distance = SE->getZero(Delta->getType());1450return false;1451}1452}14531454// check that Coeff divides Delta1455APInt APDelta = ConstDelta->getAPInt();1456APInt APCoeff = ConstCoeff->getAPInt();1457APInt Distance = APDelta; // these need to be initialzed1458APInt Remainder = APDelta;1459APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);1460LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");1461if (Remainder != 0) {1462// Coeff doesn't divide Delta, no dependence1463++WeakCrossingSIVindependence;1464++WeakCrossingSIVsuccesses;1465return true;1466}1467LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n");14681469// if 2*Coeff doesn't divide Delta, then the equal direction isn't possible1470APInt Two = APInt(Distance.getBitWidth(), 2, true);1471Remainder = Distance.srem(Two);1472LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n");1473if (Remainder != 0) {1474// Equal direction isn't possible1475Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ;1476++WeakCrossingSIVsuccesses;1477}1478return false;1479}148014811482// Kirch's algorithm, from1483//1484// Optimizing Supercompilers for Supercomputers1485// Michael Wolfe1486// MIT Press, 19891487//1488// Program 2.1, page 29.1489// Computes the GCD of AM and BM.1490// Also finds a solution to the equation ax - by = gcd(a, b).1491// Returns true if dependence disproved; i.e., gcd does not divide Delta.1492static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM,1493const APInt &Delta, APInt &G, APInt &X, APInt &Y) {1494APInt A0(Bits, 1, true), A1(Bits, 0, true);1495APInt B0(Bits, 0, true), B1(Bits, 1, true);1496APInt G0 = AM.abs();1497APInt G1 = BM.abs();1498APInt Q = G0; // these need to be initialized1499APInt R = G0;1500APInt::sdivrem(G0, G1, Q, R);1501while (R != 0) {1502APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;1503APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;1504G0 = G1; G1 = R;1505APInt::sdivrem(G0, G1, Q, R);1506}1507G = G1;1508LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n");1509X = AM.slt(0) ? -A1 : A1;1510Y = BM.slt(0) ? B1 : -B1;15111512// make sure gcd divides Delta1513R = Delta.srem(G);1514if (R != 0)1515return true; // gcd doesn't divide Delta, no dependence1516Q = Delta.sdiv(G);1517return false;1518}15191520static APInt floorOfQuotient(const APInt &A, const APInt &B) {1521APInt Q = A; // these need to be initialized1522APInt R = A;1523APInt::sdivrem(A, B, Q, R);1524if (R == 0)1525return Q;1526if ((A.sgt(0) && B.sgt(0)) ||1527(A.slt(0) && B.slt(0)))1528return Q;1529else1530return Q - 1;1531}15321533static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {1534APInt Q = A; // these need to be initialized1535APInt R = A;1536APInt::sdivrem(A, B, Q, R);1537if (R == 0)1538return Q;1539if ((A.sgt(0) && B.sgt(0)) ||1540(A.slt(0) && B.slt(0)))1541return Q + 1;1542else1543return Q;1544}15451546// exactSIVtest -1547// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],1548// where i is an induction variable, c1 and c2 are loop invariant, and a11549// and a2 are constant, we can solve it exactly using an algorithm developed1550// by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in:1551//1552// Dependence Analysis for Supercomputing1553// Utpal Banerjee1554// Kluwer Academic Publishers, 19881555//1556// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),1557// so use them if possible. They're also a bit better with symbolics and,1558// in the case of the strong SIV test, can compute Distances.1559//1560// Return true if dependence disproved.1561//1562// This is a modified version of the original Banerjee algorithm. The original1563// only tested whether Dst depends on Src. This algorithm extends that and1564// returns all the dependencies that exist between Dst and Src.1565bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,1566const SCEV *SrcConst, const SCEV *DstConst,1567const Loop *CurLoop, unsigned Level,1568FullDependence &Result,1569Constraint &NewConstraint) const {1570LLVM_DEBUG(dbgs() << "\tExact SIV test\n");1571LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");1572LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");1573LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");1574LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");1575++ExactSIVapplications;1576assert(0 < Level && Level <= CommonLevels && "Level out of range");1577Level--;1578Result.Consistent = false;1579const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);1580LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");1581NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta,1582CurLoop);1583const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);1584const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);1585const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);1586if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)1587return false;15881589// find gcd1590APInt G, X, Y;1591APInt AM = ConstSrcCoeff->getAPInt();1592APInt BM = ConstDstCoeff->getAPInt();1593APInt CM = ConstDelta->getAPInt();1594unsigned Bits = AM.getBitWidth();1595if (findGCD(Bits, AM, BM, CM, G, X, Y)) {1596// gcd doesn't divide Delta, no dependence1597++ExactSIVindependence;1598++ExactSIVsuccesses;1599return true;1600}16011602LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");16031604// since SCEV construction normalizes, LM = 01605APInt UM(Bits, 1, true);1606bool UMValid = false;1607// UM is perhaps unavailable, let's check1608if (const SCEVConstant *CUB =1609collectConstantUpperBound(CurLoop, Delta->getType())) {1610UM = CUB->getAPInt();1611LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n");1612UMValid = true;1613}16141615APInt TU(APInt::getSignedMaxValue(Bits));1616APInt TL(APInt::getSignedMinValue(Bits));1617APInt TC = CM.sdiv(G);1618APInt TX = X * TC;1619APInt TY = Y * TC;1620LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");1621LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");1622LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");16231624SmallVector<APInt, 2> TLVec, TUVec;1625APInt TB = BM.sdiv(G);1626if (TB.sgt(0)) {1627TLVec.push_back(ceilingOfQuotient(-TX, TB));1628LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");1629// New bound check - modification to Banerjee's e3 check1630if (UMValid) {1631TUVec.push_back(floorOfQuotient(UM - TX, TB));1632LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");1633}1634} else {1635TUVec.push_back(floorOfQuotient(-TX, TB));1636LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");1637// New bound check - modification to Banerjee's e3 check1638if (UMValid) {1639TLVec.push_back(ceilingOfQuotient(UM - TX, TB));1640LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");1641}1642}16431644APInt TA = AM.sdiv(G);1645if (TA.sgt(0)) {1646if (UMValid) {1647TUVec.push_back(floorOfQuotient(UM - TY, TA));1648LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");1649}1650// New bound check - modification to Banerjee's e3 check1651TLVec.push_back(ceilingOfQuotient(-TY, TA));1652LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");1653} else {1654if (UMValid) {1655TLVec.push_back(ceilingOfQuotient(UM - TY, TA));1656LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");1657}1658// New bound check - modification to Banerjee's e3 check1659TUVec.push_back(floorOfQuotient(-TY, TA));1660LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");1661}16621663LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");1664LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");16651666if (TLVec.empty() || TUVec.empty())1667return false;1668TL = APIntOps::smax(TLVec.front(), TLVec.back());1669TU = APIntOps::smin(TUVec.front(), TUVec.back());1670LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");1671LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");16721673if (TL.sgt(TU)) {1674++ExactSIVindependence;1675++ExactSIVsuccesses;1676return true;1677}16781679// explore directions1680unsigned NewDirection = Dependence::DVEntry::NONE;1681APInt LowerDistance, UpperDistance;1682if (TA.sgt(TB)) {1683LowerDistance = (TY - TX) + (TA - TB) * TL;1684UpperDistance = (TY - TX) + (TA - TB) * TU;1685} else {1686LowerDistance = (TY - TX) + (TA - TB) * TU;1687UpperDistance = (TY - TX) + (TA - TB) * TL;1688}16891690LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n");1691LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n");16921693APInt Zero(Bits, 0, true);1694if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) {1695NewDirection |= Dependence::DVEntry::EQ;1696++ExactSIVsuccesses;1697}1698if (LowerDistance.slt(0)) {1699NewDirection |= Dependence::DVEntry::GT;1700++ExactSIVsuccesses;1701}1702if (UpperDistance.sgt(0)) {1703NewDirection |= Dependence::DVEntry::LT;1704++ExactSIVsuccesses;1705}17061707// finished1708Result.DV[Level].Direction &= NewDirection;1709if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)1710++ExactSIVindependence;1711LLVM_DEBUG(dbgs() << "\t Result = ");1712LLVM_DEBUG(Result.dump(dbgs()));1713return Result.DV[Level].Direction == Dependence::DVEntry::NONE;1714}171517161717// Return true if the divisor evenly divides the dividend.1718static1719bool isRemainderZero(const SCEVConstant *Dividend,1720const SCEVConstant *Divisor) {1721const APInt &ConstDividend = Dividend->getAPInt();1722const APInt &ConstDivisor = Divisor->getAPInt();1723return ConstDividend.srem(ConstDivisor) == 0;1724}172517261727// weakZeroSrcSIVtest -1728// From the paper, Practical Dependence Testing, Section 4.2.21729//1730// When we have a pair of subscripts of the form [c1] and [c2 + a*i],1731// where i is an induction variable, c1 and c2 are loop invariant,1732// and a is a constant, we can solve it exactly using the1733// Weak-Zero SIV test.1734//1735// Given1736//1737// c1 = c2 + a*i1738//1739// we get1740//1741// (c1 - c2)/a = i1742//1743// If i is not an integer, there's no dependence.1744// If i < 0 or > UB, there's no dependence.1745// If i = 0, the direction is >= and peeling the1746// 1st iteration will break the dependence.1747// If i = UB, the direction is <= and peeling the1748// last iteration will break the dependence.1749// Otherwise, the direction is *.1750//1751// Can prove independence. Failing that, we can sometimes refine1752// the directions. Can sometimes show that first or last1753// iteration carries all the dependences (so worth peeling).1754//1755// (see also weakZeroDstSIVtest)1756//1757// Return true if dependence disproved.1758bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,1759const SCEV *SrcConst,1760const SCEV *DstConst,1761const Loop *CurLoop, unsigned Level,1762FullDependence &Result,1763Constraint &NewConstraint) const {1764// For the WeakSIV test, it's possible the loop isn't common to1765// the Src and Dst loops. If it isn't, then there's no need to1766// record a direction.1767LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");1768LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n");1769LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");1770LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");1771++WeakZeroSIVapplications;1772assert(0 < Level && Level <= MaxLevels && "Level out of range");1773Level--;1774Result.Consistent = false;1775const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);1776NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,1777CurLoop);1778LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");1779if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {1780if (Level < CommonLevels) {1781Result.DV[Level].Direction &= Dependence::DVEntry::GE;1782Result.DV[Level].PeelFirst = true;1783++WeakZeroSIVsuccesses;1784}1785return false; // dependences caused by first iteration1786}1787const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);1788if (!ConstCoeff)1789return false;1790const SCEV *AbsCoeff =1791SE->isKnownNegative(ConstCoeff) ?1792SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;1793const SCEV *NewDelta =1794SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;17951796// check that Delta/SrcCoeff < iteration count1797// really check NewDelta < count*AbsCoeff1798if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {1799LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");1800const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);1801if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {1802++WeakZeroSIVindependence;1803++WeakZeroSIVsuccesses;1804return true;1805}1806if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {1807// dependences caused by last iteration1808if (Level < CommonLevels) {1809Result.DV[Level].Direction &= Dependence::DVEntry::LE;1810Result.DV[Level].PeelLast = true;1811++WeakZeroSIVsuccesses;1812}1813return false;1814}1815}18161817// check that Delta/SrcCoeff >= 01818// really check that NewDelta >= 01819if (SE->isKnownNegative(NewDelta)) {1820// No dependence, newDelta < 01821++WeakZeroSIVindependence;1822++WeakZeroSIVsuccesses;1823return true;1824}18251826// if SrcCoeff doesn't divide Delta, then no dependence1827if (isa<SCEVConstant>(Delta) &&1828!isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {1829++WeakZeroSIVindependence;1830++WeakZeroSIVsuccesses;1831return true;1832}1833return false;1834}183518361837// weakZeroDstSIVtest -1838// From the paper, Practical Dependence Testing, Section 4.2.21839//1840// When we have a pair of subscripts of the form [c1 + a*i] and [c2],1841// where i is an induction variable, c1 and c2 are loop invariant,1842// and a is a constant, we can solve it exactly using the1843// Weak-Zero SIV test.1844//1845// Given1846//1847// c1 + a*i = c21848//1849// we get1850//1851// i = (c2 - c1)/a1852//1853// If i is not an integer, there's no dependence.1854// If i < 0 or > UB, there's no dependence.1855// If i = 0, the direction is <= and peeling the1856// 1st iteration will break the dependence.1857// If i = UB, the direction is >= and peeling the1858// last iteration will break the dependence.1859// Otherwise, the direction is *.1860//1861// Can prove independence. Failing that, we can sometimes refine1862// the directions. Can sometimes show that first or last1863// iteration carries all the dependences (so worth peeling).1864//1865// (see also weakZeroSrcSIVtest)1866//1867// Return true if dependence disproved.1868bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,1869const SCEV *SrcConst,1870const SCEV *DstConst,1871const Loop *CurLoop, unsigned Level,1872FullDependence &Result,1873Constraint &NewConstraint) const {1874// For the WeakSIV test, it's possible the loop isn't common to the1875// Src and Dst loops. If it isn't, then there's no need to record a direction.1876LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");1877LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n");1878LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");1879LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");1880++WeakZeroSIVapplications;1881assert(0 < Level && Level <= SrcLevels && "Level out of range");1882Level--;1883Result.Consistent = false;1884const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);1885NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,1886CurLoop);1887LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");1888if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {1889if (Level < CommonLevels) {1890Result.DV[Level].Direction &= Dependence::DVEntry::LE;1891Result.DV[Level].PeelFirst = true;1892++WeakZeroSIVsuccesses;1893}1894return false; // dependences caused by first iteration1895}1896const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);1897if (!ConstCoeff)1898return false;1899const SCEV *AbsCoeff =1900SE->isKnownNegative(ConstCoeff) ?1901SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;1902const SCEV *NewDelta =1903SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;19041905// check that Delta/SrcCoeff < iteration count1906// really check NewDelta < count*AbsCoeff1907if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {1908LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n");1909const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);1910if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {1911++WeakZeroSIVindependence;1912++WeakZeroSIVsuccesses;1913return true;1914}1915if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {1916// dependences caused by last iteration1917if (Level < CommonLevels) {1918Result.DV[Level].Direction &= Dependence::DVEntry::GE;1919Result.DV[Level].PeelLast = true;1920++WeakZeroSIVsuccesses;1921}1922return false;1923}1924}19251926// check that Delta/SrcCoeff >= 01927// really check that NewDelta >= 01928if (SE->isKnownNegative(NewDelta)) {1929// No dependence, newDelta < 01930++WeakZeroSIVindependence;1931++WeakZeroSIVsuccesses;1932return true;1933}19341935// if SrcCoeff doesn't divide Delta, then no dependence1936if (isa<SCEVConstant>(Delta) &&1937!isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {1938++WeakZeroSIVindependence;1939++WeakZeroSIVsuccesses;1940return true;1941}1942return false;1943}194419451946// exactRDIVtest - Tests the RDIV subscript pair for dependence.1947// Things of the form [c1 + a*i] and [c2 + b*j],1948// where i and j are induction variable, c1 and c2 are loop invariant,1949// and a and b are constants.1950// Returns true if any possible dependence is disproved.1951// Marks the result as inconsistent.1952// Works in some cases that symbolicRDIVtest doesn't, and vice versa.1953bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,1954const SCEV *SrcConst, const SCEV *DstConst,1955const Loop *SrcLoop, const Loop *DstLoop,1956FullDependence &Result) const {1957LLVM_DEBUG(dbgs() << "\tExact RDIV test\n");1958LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n");1959LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n");1960LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n");1961LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n");1962++ExactRDIVapplications;1963Result.Consistent = false;1964const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);1965LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");1966const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);1967const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);1968const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);1969if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)1970return false;19711972// find gcd1973APInt G, X, Y;1974APInt AM = ConstSrcCoeff->getAPInt();1975APInt BM = ConstDstCoeff->getAPInt();1976APInt CM = ConstDelta->getAPInt();1977unsigned Bits = AM.getBitWidth();1978if (findGCD(Bits, AM, BM, CM, G, X, Y)) {1979// gcd doesn't divide Delta, no dependence1980++ExactRDIVindependence;1981return true;1982}19831984LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n");19851986// since SCEV construction seems to normalize, LM = 01987APInt SrcUM(Bits, 1, true);1988bool SrcUMvalid = false;1989// SrcUM is perhaps unavailable, let's check1990if (const SCEVConstant *UpperBound =1991collectConstantUpperBound(SrcLoop, Delta->getType())) {1992SrcUM = UpperBound->getAPInt();1993LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n");1994SrcUMvalid = true;1995}19961997APInt DstUM(Bits, 1, true);1998bool DstUMvalid = false;1999// UM is perhaps unavailable, let's check2000if (const SCEVConstant *UpperBound =2001collectConstantUpperBound(DstLoop, Delta->getType())) {2002DstUM = UpperBound->getAPInt();2003LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n");2004DstUMvalid = true;2005}20062007APInt TU(APInt::getSignedMaxValue(Bits));2008APInt TL(APInt::getSignedMinValue(Bits));2009APInt TC = CM.sdiv(G);2010APInt TX = X * TC;2011APInt TY = Y * TC;2012LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n");2013LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n");2014LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n");20152016SmallVector<APInt, 2> TLVec, TUVec;2017APInt TB = BM.sdiv(G);2018if (TB.sgt(0)) {2019TLVec.push_back(ceilingOfQuotient(-TX, TB));2020LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");2021if (SrcUMvalid) {2022TUVec.push_back(floorOfQuotient(SrcUM - TX, TB));2023LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");2024}2025} else {2026TUVec.push_back(floorOfQuotient(-TX, TB));2027LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");2028if (SrcUMvalid) {2029TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB));2030LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");2031}2032}20332034APInt TA = AM.sdiv(G);2035if (TA.sgt(0)) {2036TLVec.push_back(ceilingOfQuotient(-TY, TA));2037LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");2038if (DstUMvalid) {2039TUVec.push_back(floorOfQuotient(DstUM - TY, TA));2040LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");2041}2042} else {2043TUVec.push_back(floorOfQuotient(-TY, TA));2044LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n");2045if (DstUMvalid) {2046TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA));2047LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n");2048}2049}20502051if (TLVec.empty() || TUVec.empty())2052return false;20532054LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n");2055LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n");20562057TL = APIntOps::smax(TLVec.front(), TLVec.back());2058TU = APIntOps::smin(TUVec.front(), TUVec.back());2059LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n");2060LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n");20612062if (TL.sgt(TU))2063++ExactRDIVindependence;2064return TL.sgt(TU);2065}206620672068// symbolicRDIVtest -2069// In Section 4.5 of the Practical Dependence Testing paper,the authors2070// introduce a special case of Banerjee's Inequalities (also called the2071// Extreme-Value Test) that can handle some of the SIV and RDIV cases,2072// particularly cases with symbolics. Since it's only able to disprove2073// dependence (not compute distances or directions), we'll use it as a2074// fall back for the other tests.2075//2076// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]2077// where i and j are induction variables and c1 and c2 are loop invariants,2078// we can use the symbolic tests to disprove some dependences, serving as a2079// backup for the RDIV test. Note that i and j can be the same variable,2080// letting this test serve as a backup for the various SIV tests.2081//2082// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some2083// 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)2084// loop bounds for the i and j loops, respectively. So, ...2085//2086// c1 + a1*i = c2 + a2*j2087// a1*i - a2*j = c2 - c12088//2089// To test for a dependence, we compute c2 - c1 and make sure it's in the2090// range of the maximum and minimum possible values of a1*i - a2*j.2091// Considering the signs of a1 and a2, we have 4 possible cases:2092//2093// 1) If a1 >= 0 and a2 >= 0, then2094// a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*02095// -a2*N2 <= c2 - c1 <= a1*N12096//2097// 2) If a1 >= 0 and a2 <= 0, then2098// a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N22099// 0 <= c2 - c1 <= a1*N1 - a2*N22100//2101// 3) If a1 <= 0 and a2 >= 0, then2102// a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*02103// a1*N1 - a2*N2 <= c2 - c1 <= 02104//2105// 4) If a1 <= 0 and a2 <= 0, then2106// a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N22107// a1*N1 <= c2 - c1 <= -a2*N22108//2109// return true if dependence disproved2110bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2,2111const SCEV *C1, const SCEV *C2,2112const Loop *Loop1,2113const Loop *Loop2) const {2114++SymbolicRDIVapplications;2115LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n");2116LLVM_DEBUG(dbgs() << "\t A1 = " << *A1);2117LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");2118LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n");2119LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n");2120LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n");2121const SCEV *N1 = collectUpperBound(Loop1, A1->getType());2122const SCEV *N2 = collectUpperBound(Loop2, A1->getType());2123LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n");2124LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n");2125const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);2126const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);2127LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n");2128LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n");2129if (SE->isKnownNonNegative(A1)) {2130if (SE->isKnownNonNegative(A2)) {2131// A1 >= 0 && A2 >= 02132if (N1) {2133// make sure that c2 - c1 <= a1*N12134const SCEV *A1N1 = SE->getMulExpr(A1, N1);2135LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");2136if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {2137++SymbolicRDIVindependence;2138return true;2139}2140}2141if (N2) {2142// make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c22143const SCEV *A2N2 = SE->getMulExpr(A2, N2);2144LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");2145if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {2146++SymbolicRDIVindependence;2147return true;2148}2149}2150}2151else if (SE->isKnownNonPositive(A2)) {2152// a1 >= 0 && a2 <= 02153if (N1 && N2) {2154// make sure that c2 - c1 <= a1*N1 - a2*N22155const SCEV *A1N1 = SE->getMulExpr(A1, N1);2156const SCEV *A2N2 = SE->getMulExpr(A2, N2);2157const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);2158LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");2159if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {2160++SymbolicRDIVindependence;2161return true;2162}2163}2164// make sure that 0 <= c2 - c12165if (SE->isKnownNegative(C2_C1)) {2166++SymbolicRDIVindependence;2167return true;2168}2169}2170}2171else if (SE->isKnownNonPositive(A1)) {2172if (SE->isKnownNonNegative(A2)) {2173// a1 <= 0 && a2 >= 02174if (N1 && N2) {2175// make sure that a1*N1 - a2*N2 <= c2 - c12176const SCEV *A1N1 = SE->getMulExpr(A1, N1);2177const SCEV *A2N2 = SE->getMulExpr(A2, N2);2178const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);2179LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");2180if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {2181++SymbolicRDIVindependence;2182return true;2183}2184}2185// make sure that c2 - c1 <= 02186if (SE->isKnownPositive(C2_C1)) {2187++SymbolicRDIVindependence;2188return true;2189}2190}2191else if (SE->isKnownNonPositive(A2)) {2192// a1 <= 0 && a2 <= 02193if (N1) {2194// make sure that a1*N1 <= c2 - c12195const SCEV *A1N1 = SE->getMulExpr(A1, N1);2196LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n");2197if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {2198++SymbolicRDIVindependence;2199return true;2200}2201}2202if (N2) {2203// make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N22204const SCEV *A2N2 = SE->getMulExpr(A2, N2);2205LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n");2206if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {2207++SymbolicRDIVindependence;2208return true;2209}2210}2211}2212}2213return false;2214}221522162217// testSIV -2218// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]2219// where i is an induction variable, c1 and c2 are loop invariant, and a1 and2220// a2 are constant, we attack it with an SIV test. While they can all be2221// solved with the Exact SIV test, it's worthwhile to use simpler tests when2222// they apply; they're cheaper and sometimes more precise.2223//2224// Return true if dependence disproved.2225bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,2226FullDependence &Result, Constraint &NewConstraint,2227const SCEV *&SplitIter) const {2228LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");2229LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");2230const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);2231const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);2232if (SrcAddRec && DstAddRec) {2233const SCEV *SrcConst = SrcAddRec->getStart();2234const SCEV *DstConst = DstAddRec->getStart();2235const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);2236const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);2237const Loop *CurLoop = SrcAddRec->getLoop();2238assert(CurLoop == DstAddRec->getLoop() &&2239"both loops in SIV should be same");2240Level = mapSrcLoop(CurLoop);2241bool disproven;2242if (SrcCoeff == DstCoeff)2243disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,2244Level, Result, NewConstraint);2245else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))2246disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,2247Level, Result, NewConstraint, SplitIter);2248else2249disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,2250Level, Result, NewConstraint);2251return disproven ||2252gcdMIVtest(Src, Dst, Result) ||2253symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);2254}2255if (SrcAddRec) {2256const SCEV *SrcConst = SrcAddRec->getStart();2257const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);2258const SCEV *DstConst = Dst;2259const Loop *CurLoop = SrcAddRec->getLoop();2260Level = mapSrcLoop(CurLoop);2261return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,2262Level, Result, NewConstraint) ||2263gcdMIVtest(Src, Dst, Result);2264}2265if (DstAddRec) {2266const SCEV *DstConst = DstAddRec->getStart();2267const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);2268const SCEV *SrcConst = Src;2269const Loop *CurLoop = DstAddRec->getLoop();2270Level = mapDstLoop(CurLoop);2271return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,2272CurLoop, Level, Result, NewConstraint) ||2273gcdMIVtest(Src, Dst, Result);2274}2275llvm_unreachable("SIV test expected at least one AddRec");2276return false;2277}227822792280// testRDIV -2281// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]2282// where i and j are induction variables, c1 and c2 are loop invariant,2283// and a1 and a2 are constant, we can solve it exactly with an easy adaptation2284// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.2285// It doesn't make sense to talk about distance or direction in this case,2286// so there's no point in making special versions of the Strong SIV test or2287// the Weak-crossing SIV test.2288//2289// With minor algebra, this test can also be used for things like2290// [c1 + a1*i + a2*j][c2].2291//2292// Return true if dependence disproved.2293bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,2294FullDependence &Result) const {2295// we have 3 possible situations here:2296// 1) [a*i + b] and [c*j + d]2297// 2) [a*i + c*j + b] and [d]2298// 3) [b] and [a*i + c*j + d]2299// We need to find what we've got and get organized23002301const SCEV *SrcConst, *DstConst;2302const SCEV *SrcCoeff, *DstCoeff;2303const Loop *SrcLoop, *DstLoop;23042305LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");2306LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");2307const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);2308const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);2309if (SrcAddRec && DstAddRec) {2310SrcConst = SrcAddRec->getStart();2311SrcCoeff = SrcAddRec->getStepRecurrence(*SE);2312SrcLoop = SrcAddRec->getLoop();2313DstConst = DstAddRec->getStart();2314DstCoeff = DstAddRec->getStepRecurrence(*SE);2315DstLoop = DstAddRec->getLoop();2316}2317else if (SrcAddRec) {2318if (const SCEVAddRecExpr *tmpAddRec =2319dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {2320SrcConst = tmpAddRec->getStart();2321SrcCoeff = tmpAddRec->getStepRecurrence(*SE);2322SrcLoop = tmpAddRec->getLoop();2323DstConst = Dst;2324DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));2325DstLoop = SrcAddRec->getLoop();2326}2327else2328llvm_unreachable("RDIV reached by surprising SCEVs");2329}2330else if (DstAddRec) {2331if (const SCEVAddRecExpr *tmpAddRec =2332dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {2333DstConst = tmpAddRec->getStart();2334DstCoeff = tmpAddRec->getStepRecurrence(*SE);2335DstLoop = tmpAddRec->getLoop();2336SrcConst = Src;2337SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));2338SrcLoop = DstAddRec->getLoop();2339}2340else2341llvm_unreachable("RDIV reached by surprising SCEVs");2342}2343else2344llvm_unreachable("RDIV expected at least one AddRec");2345return exactRDIVtest(SrcCoeff, DstCoeff,2346SrcConst, DstConst,2347SrcLoop, DstLoop,2348Result) ||2349gcdMIVtest(Src, Dst, Result) ||2350symbolicRDIVtest(SrcCoeff, DstCoeff,2351SrcConst, DstConst,2352SrcLoop, DstLoop);2353}235423552356// Tests the single-subscript MIV pair (Src and Dst) for dependence.2357// Return true if dependence disproved.2358// Can sometimes refine direction vectors.2359bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst,2360const SmallBitVector &Loops,2361FullDependence &Result) const {2362LLVM_DEBUG(dbgs() << " src = " << *Src << "\n");2363LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n");2364Result.Consistent = false;2365return gcdMIVtest(Src, Dst, Result) ||2366banerjeeMIVtest(Src, Dst, Loops, Result);2367}236823692370// Given a product, e.g., 10*X*Y, returns the first constant operand,2371// in this case 10. If there is no constant part, returns NULL.2372static2373const SCEVConstant *getConstantPart(const SCEV *Expr) {2374if (const auto *Constant = dyn_cast<SCEVConstant>(Expr))2375return Constant;2376else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr))2377if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0)))2378return Constant;2379return nullptr;2380}238123822383//===----------------------------------------------------------------------===//2384// gcdMIVtest -2385// Tests an MIV subscript pair for dependence.2386// Returns true if any possible dependence is disproved.2387// Marks the result as inconsistent.2388// Can sometimes disprove the equal direction for 1 or more loops,2389// as discussed in Michael Wolfe's book,2390// High Performance Compilers for Parallel Computing, page 235.2391//2392// We spend some effort (code!) to handle cases like2393// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,2394// but M and N are just loop-invariant variables.2395// This should help us handle linearized subscripts;2396// also makes this test a useful backup to the various SIV tests.2397//2398// It occurs to me that the presence of loop-invariant variables2399// changes the nature of the test from "greatest common divisor"2400// to "a common divisor".2401bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,2402FullDependence &Result) const {2403LLVM_DEBUG(dbgs() << "starting gcd\n");2404++GCDapplications;2405unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());2406APInt RunningGCD = APInt::getZero(BitWidth);24072408// Examine Src coefficients.2409// Compute running GCD and record source constant.2410// Because we're looking for the constant at the end of the chain,2411// we can't quit the loop just because the GCD == 1.2412const SCEV *Coefficients = Src;2413while (const SCEVAddRecExpr *AddRec =2414dyn_cast<SCEVAddRecExpr>(Coefficients)) {2415const SCEV *Coeff = AddRec->getStepRecurrence(*SE);2416// If the coefficient is the product of a constant and other stuff,2417// we can use the constant in the GCD computation.2418const auto *Constant = getConstantPart(Coeff);2419if (!Constant)2420return false;2421APInt ConstCoeff = Constant->getAPInt();2422RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());2423Coefficients = AddRec->getStart();2424}2425const SCEV *SrcConst = Coefficients;24262427// Examine Dst coefficients.2428// Compute running GCD and record destination constant.2429// Because we're looking for the constant at the end of the chain,2430// we can't quit the loop just because the GCD == 1.2431Coefficients = Dst;2432while (const SCEVAddRecExpr *AddRec =2433dyn_cast<SCEVAddRecExpr>(Coefficients)) {2434const SCEV *Coeff = AddRec->getStepRecurrence(*SE);2435// If the coefficient is the product of a constant and other stuff,2436// we can use the constant in the GCD computation.2437const auto *Constant = getConstantPart(Coeff);2438if (!Constant)2439return false;2440APInt ConstCoeff = Constant->getAPInt();2441RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());2442Coefficients = AddRec->getStart();2443}2444const SCEV *DstConst = Coefficients;24452446APInt ExtraGCD = APInt::getZero(BitWidth);2447const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);2448LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");2449const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);2450if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {2451// If Delta is a sum of products, we may be able to make further progress.2452for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {2453const SCEV *Operand = Sum->getOperand(Op);2454if (isa<SCEVConstant>(Operand)) {2455assert(!Constant && "Surprised to find multiple constants");2456Constant = cast<SCEVConstant>(Operand);2457}2458else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {2459// Search for constant operand to participate in GCD;2460// If none found; return false.2461const SCEVConstant *ConstOp = getConstantPart(Product);2462if (!ConstOp)2463return false;2464APInt ConstOpValue = ConstOp->getAPInt();2465ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,2466ConstOpValue.abs());2467}2468else2469return false;2470}2471}2472if (!Constant)2473return false;2474APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();2475LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n");2476if (ConstDelta == 0)2477return false;2478RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);2479LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n");2480APInt Remainder = ConstDelta.srem(RunningGCD);2481if (Remainder != 0) {2482++GCDindependence;2483return true;2484}24852486// Try to disprove equal directions.2487// For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],2488// the code above can't disprove the dependence because the GCD = 1.2489// So we consider what happen if i = i' and what happens if j = j'.2490// If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],2491// which is infeasible, so we can disallow the = direction for the i level.2492// Setting j = j' doesn't help matters, so we end up with a direction vector2493// of [<>, *]2494//2495// Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],2496// we need to remember that the constant part is 5 and the RunningGCD should2497// be initialized to ExtraGCD = 30.2498LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n');24992500bool Improved = false;2501Coefficients = Src;2502while (const SCEVAddRecExpr *AddRec =2503dyn_cast<SCEVAddRecExpr>(Coefficients)) {2504Coefficients = AddRec->getStart();2505const Loop *CurLoop = AddRec->getLoop();2506RunningGCD = ExtraGCD;2507const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);2508const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);2509const SCEV *Inner = Src;2510while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {2511AddRec = cast<SCEVAddRecExpr>(Inner);2512const SCEV *Coeff = AddRec->getStepRecurrence(*SE);2513if (CurLoop == AddRec->getLoop())2514; // SrcCoeff == Coeff2515else {2516// If the coefficient is the product of a constant and other stuff,2517// we can use the constant in the GCD computation.2518Constant = getConstantPart(Coeff);2519if (!Constant)2520return false;2521APInt ConstCoeff = Constant->getAPInt();2522RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());2523}2524Inner = AddRec->getStart();2525}2526Inner = Dst;2527while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {2528AddRec = cast<SCEVAddRecExpr>(Inner);2529const SCEV *Coeff = AddRec->getStepRecurrence(*SE);2530if (CurLoop == AddRec->getLoop())2531DstCoeff = Coeff;2532else {2533// If the coefficient is the product of a constant and other stuff,2534// we can use the constant in the GCD computation.2535Constant = getConstantPart(Coeff);2536if (!Constant)2537return false;2538APInt ConstCoeff = Constant->getAPInt();2539RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());2540}2541Inner = AddRec->getStart();2542}2543Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);2544// If the coefficient is the product of a constant and other stuff,2545// we can use the constant in the GCD computation.2546Constant = getConstantPart(Delta);2547if (!Constant)2548// The difference of the two coefficients might not be a product2549// or constant, in which case we give up on this direction.2550continue;2551APInt ConstCoeff = Constant->getAPInt();2552RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());2553LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");2554if (RunningGCD != 0) {2555Remainder = ConstDelta.srem(RunningGCD);2556LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");2557if (Remainder != 0) {2558unsigned Level = mapSrcLoop(CurLoop);2559Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ;2560Improved = true;2561}2562}2563}2564if (Improved)2565++GCDsuccesses;2566LLVM_DEBUG(dbgs() << "all done\n");2567return false;2568}256925702571//===----------------------------------------------------------------------===//2572// banerjeeMIVtest -2573// Use Banerjee's Inequalities to test an MIV subscript pair.2574// (Wolfe, in the race-car book, calls this the Extreme Value Test.)2575// Generally follows the discussion in Section 2.5.2 of2576//2577// Optimizing Supercompilers for Supercomputers2578// Michael Wolfe2579//2580// The inequalities given on page 25 are simplified in that loops are2581// normalized so that the lower bound is always 0 and the stride is always 1.2582// For example, Wolfe gives2583//2584// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k2585//2586// where A_k is the coefficient of the kth index in the source subscript,2587// B_k is the coefficient of the kth index in the destination subscript,2588// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth2589// index, and N_k is the stride of the kth index. Since all loops are normalized2590// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the2591// equation to2592//2593// LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 12594// = (A^-_k - B_k)^- (U_k - 1) - B_k2595//2596// Similar simplifications are possible for the other equations.2597//2598// When we can't determine the number of iterations for a loop,2599// we use NULL as an indicator for the worst case, infinity.2600// When computing the upper bound, NULL denotes +inf;2601// for the lower bound, NULL denotes -inf.2602//2603// Return true if dependence disproved.2604bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst,2605const SmallBitVector &Loops,2606FullDependence &Result) const {2607LLVM_DEBUG(dbgs() << "starting Banerjee\n");2608++BanerjeeApplications;2609LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n');2610const SCEV *A0;2611CoefficientInfo *A = collectCoeffInfo(Src, true, A0);2612LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n');2613const SCEV *B0;2614CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);2615BoundInfo *Bound = new BoundInfo[MaxLevels + 1];2616const SCEV *Delta = SE->getMinusSCEV(B0, A0);2617LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');26182619// Compute bounds for all the * directions.2620LLVM_DEBUG(dbgs() << "\tBounds[*]\n");2621for (unsigned K = 1; K <= MaxLevels; ++K) {2622Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;2623Bound[K].Direction = Dependence::DVEntry::ALL;2624Bound[K].DirSet = Dependence::DVEntry::NONE;2625findBoundsALL(A, B, Bound, K);2626#ifndef NDEBUG2627LLVM_DEBUG(dbgs() << "\t " << K << '\t');2628if (Bound[K].Lower[Dependence::DVEntry::ALL])2629LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');2630else2631LLVM_DEBUG(dbgs() << "-inf\t");2632if (Bound[K].Upper[Dependence::DVEntry::ALL])2633LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');2634else2635LLVM_DEBUG(dbgs() << "+inf\n");2636#endif2637}26382639// Test the *, *, *, ... case.2640bool Disproved = false;2641if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {2642// Explore the direction vector hierarchy.2643unsigned DepthExpanded = 0;2644unsigned NewDeps = exploreDirections(1, A, B, Bound,2645Loops, DepthExpanded, Delta);2646if (NewDeps > 0) {2647bool Improved = false;2648for (unsigned K = 1; K <= CommonLevels; ++K) {2649if (Loops[K]) {2650unsigned Old = Result.DV[K - 1].Direction;2651Result.DV[K - 1].Direction = Old & Bound[K].DirSet;2652Improved |= Old != Result.DV[K - 1].Direction;2653if (!Result.DV[K - 1].Direction) {2654Improved = false;2655Disproved = true;2656break;2657}2658}2659}2660if (Improved)2661++BanerjeeSuccesses;2662}2663else {2664++BanerjeeIndependence;2665Disproved = true;2666}2667}2668else {2669++BanerjeeIndependence;2670Disproved = true;2671}2672delete [] Bound;2673delete [] A;2674delete [] B;2675return Disproved;2676}267726782679// Hierarchically expands the direction vector2680// search space, combining the directions of discovered dependences2681// in the DirSet field of Bound. Returns the number of distinct2682// dependences discovered. If the dependence is disproved,2683// it will return 0.2684unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,2685CoefficientInfo *B, BoundInfo *Bound,2686const SmallBitVector &Loops,2687unsigned &DepthExpanded,2688const SCEV *Delta) const {2689// This algorithm has worst case complexity of O(3^n), where 'n' is the number2690// of common loop levels. To avoid excessive compile-time, pessimize all the2691// results and immediately return when the number of common levels is beyond2692// the given threshold.2693if (CommonLevels > MIVMaxLevelThreshold) {2694LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "2695"direction exploration is terminated.\n");2696for (unsigned K = 1; K <= CommonLevels; ++K)2697if (Loops[K])2698Bound[K].DirSet = Dependence::DVEntry::ALL;2699return 1;2700}27012702if (Level > CommonLevels) {2703// record result2704LLVM_DEBUG(dbgs() << "\t[");2705for (unsigned K = 1; K <= CommonLevels; ++K) {2706if (Loops[K]) {2707Bound[K].DirSet |= Bound[K].Direction;2708#ifndef NDEBUG2709switch (Bound[K].Direction) {2710case Dependence::DVEntry::LT:2711LLVM_DEBUG(dbgs() << " <");2712break;2713case Dependence::DVEntry::EQ:2714LLVM_DEBUG(dbgs() << " =");2715break;2716case Dependence::DVEntry::GT:2717LLVM_DEBUG(dbgs() << " >");2718break;2719case Dependence::DVEntry::ALL:2720LLVM_DEBUG(dbgs() << " *");2721break;2722default:2723llvm_unreachable("unexpected Bound[K].Direction");2724}2725#endif2726}2727}2728LLVM_DEBUG(dbgs() << " ]\n");2729return 1;2730}2731if (Loops[Level]) {2732if (Level > DepthExpanded) {2733DepthExpanded = Level;2734// compute bounds for <, =, > at current level2735findBoundsLT(A, B, Bound, Level);2736findBoundsGT(A, B, Bound, Level);2737findBoundsEQ(A, B, Bound, Level);2738#ifndef NDEBUG2739LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n');2740LLVM_DEBUG(dbgs() << "\t <\t");2741if (Bound[Level].Lower[Dependence::DVEntry::LT])2742LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT]2743<< '\t');2744else2745LLVM_DEBUG(dbgs() << "-inf\t");2746if (Bound[Level].Upper[Dependence::DVEntry::LT])2747LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT]2748<< '\n');2749else2750LLVM_DEBUG(dbgs() << "+inf\n");2751LLVM_DEBUG(dbgs() << "\t =\t");2752if (Bound[Level].Lower[Dependence::DVEntry::EQ])2753LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ]2754<< '\t');2755else2756LLVM_DEBUG(dbgs() << "-inf\t");2757if (Bound[Level].Upper[Dependence::DVEntry::EQ])2758LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ]2759<< '\n');2760else2761LLVM_DEBUG(dbgs() << "+inf\n");2762LLVM_DEBUG(dbgs() << "\t >\t");2763if (Bound[Level].Lower[Dependence::DVEntry::GT])2764LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT]2765<< '\t');2766else2767LLVM_DEBUG(dbgs() << "-inf\t");2768if (Bound[Level].Upper[Dependence::DVEntry::GT])2769LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT]2770<< '\n');2771else2772LLVM_DEBUG(dbgs() << "+inf\n");2773#endif2774}27752776unsigned NewDeps = 0;27772778// test bounds for <, *, *, ...2779if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))2780NewDeps += exploreDirections(Level + 1, A, B, Bound,2781Loops, DepthExpanded, Delta);27822783// Test bounds for =, *, *, ...2784if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))2785NewDeps += exploreDirections(Level + 1, A, B, Bound,2786Loops, DepthExpanded, Delta);27872788// test bounds for >, *, *, ...2789if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))2790NewDeps += exploreDirections(Level + 1, A, B, Bound,2791Loops, DepthExpanded, Delta);27922793Bound[Level].Direction = Dependence::DVEntry::ALL;2794return NewDeps;2795}2796else2797return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);2798}279928002801// Returns true iff the current bounds are plausible.2802bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level,2803BoundInfo *Bound, const SCEV *Delta) const {2804Bound[Level].Direction = DirKind;2805if (const SCEV *LowerBound = getLowerBound(Bound))2806if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))2807return false;2808if (const SCEV *UpperBound = getUpperBound(Bound))2809if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))2810return false;2811return true;2812}281328142815// Computes the upper and lower bounds for level K2816// using the * direction. Records them in Bound.2817// Wolfe gives the equations2818//2819// LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k2820// UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k2821//2822// Since we normalize loops, we can simplify these equations to2823//2824// LB^*_k = (A^-_k - B^+_k)U_k2825// UB^*_k = (A^+_k - B^-_k)U_k2826//2827// We must be careful to handle the case where the upper bound is unknown.2828// Note that the lower bound is always <= 02829// and the upper bound is always >= 0.2830void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B,2831BoundInfo *Bound, unsigned K) const {2832Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.2833Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.2834if (Bound[K].Iterations) {2835Bound[K].Lower[Dependence::DVEntry::ALL] =2836SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),2837Bound[K].Iterations);2838Bound[K].Upper[Dependence::DVEntry::ALL] =2839SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),2840Bound[K].Iterations);2841}2842else {2843// If the difference is 0, we won't need to know the number of iterations.2844if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))2845Bound[K].Lower[Dependence::DVEntry::ALL] =2846SE->getZero(A[K].Coeff->getType());2847if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))2848Bound[K].Upper[Dependence::DVEntry::ALL] =2849SE->getZero(A[K].Coeff->getType());2850}2851}285228532854// Computes the upper and lower bounds for level K2855// using the = direction. Records them in Bound.2856// Wolfe gives the equations2857//2858// LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k2859// UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k2860//2861// Since we normalize loops, we can simplify these equations to2862//2863// LB^=_k = (A_k - B_k)^- U_k2864// UB^=_k = (A_k - B_k)^+ U_k2865//2866// We must be careful to handle the case where the upper bound is unknown.2867// Note that the lower bound is always <= 02868// and the upper bound is always >= 0.2869void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B,2870BoundInfo *Bound, unsigned K) const {2871Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.2872Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.2873if (Bound[K].Iterations) {2874const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);2875const SCEV *NegativePart = getNegativePart(Delta);2876Bound[K].Lower[Dependence::DVEntry::EQ] =2877SE->getMulExpr(NegativePart, Bound[K].Iterations);2878const SCEV *PositivePart = getPositivePart(Delta);2879Bound[K].Upper[Dependence::DVEntry::EQ] =2880SE->getMulExpr(PositivePart, Bound[K].Iterations);2881}2882else {2883// If the positive/negative part of the difference is 0,2884// we won't need to know the number of iterations.2885const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);2886const SCEV *NegativePart = getNegativePart(Delta);2887if (NegativePart->isZero())2888Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero2889const SCEV *PositivePart = getPositivePart(Delta);2890if (PositivePart->isZero())2891Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero2892}2893}289428952896// Computes the upper and lower bounds for level K2897// using the < direction. Records them in Bound.2898// Wolfe gives the equations2899//2900// LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k2901// UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k2902//2903// Since we normalize loops, we can simplify these equations to2904//2905// LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k2906// UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k2907//2908// We must be careful to handle the case where the upper bound is unknown.2909void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B,2910BoundInfo *Bound, unsigned K) const {2911Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.2912Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.2913if (Bound[K].Iterations) {2914const SCEV *Iter_1 = SE->getMinusSCEV(2915Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));2916const SCEV *NegPart =2917getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));2918Bound[K].Lower[Dependence::DVEntry::LT] =2919SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);2920const SCEV *PosPart =2921getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));2922Bound[K].Upper[Dependence::DVEntry::LT] =2923SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);2924}2925else {2926// If the positive/negative part of the difference is 0,2927// we won't need to know the number of iterations.2928const SCEV *NegPart =2929getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));2930if (NegPart->isZero())2931Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);2932const SCEV *PosPart =2933getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));2934if (PosPart->isZero())2935Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);2936}2937}293829392940// Computes the upper and lower bounds for level K2941// using the > direction. Records them in Bound.2942// Wolfe gives the equations2943//2944// LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k2945// UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k2946//2947// Since we normalize loops, we can simplify these equations to2948//2949// LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k2950// UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k2951//2952// We must be careful to handle the case where the upper bound is unknown.2953void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B,2954BoundInfo *Bound, unsigned K) const {2955Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.2956Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.2957if (Bound[K].Iterations) {2958const SCEV *Iter_1 = SE->getMinusSCEV(2959Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));2960const SCEV *NegPart =2961getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));2962Bound[K].Lower[Dependence::DVEntry::GT] =2963SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);2964const SCEV *PosPart =2965getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));2966Bound[K].Upper[Dependence::DVEntry::GT] =2967SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);2968}2969else {2970// If the positive/negative part of the difference is 0,2971// we won't need to know the number of iterations.2972const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));2973if (NegPart->isZero())2974Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;2975const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));2976if (PosPart->isZero())2977Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;2978}2979}298029812982// X^+ = max(X, 0)2983const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const {2984return SE->getSMaxExpr(X, SE->getZero(X->getType()));2985}298629872988// X^- = min(X, 0)2989const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const {2990return SE->getSMinExpr(X, SE->getZero(X->getType()));2991}299229932994// Walks through the subscript,2995// collecting each coefficient, the associated loop bounds,2996// and recording its positive and negative parts for later use.2997DependenceInfo::CoefficientInfo *2998DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag,2999const SCEV *&Constant) const {3000const SCEV *Zero = SE->getZero(Subscript->getType());3001CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];3002for (unsigned K = 1; K <= MaxLevels; ++K) {3003CI[K].Coeff = Zero;3004CI[K].PosPart = Zero;3005CI[K].NegPart = Zero;3006CI[K].Iterations = nullptr;3007}3008while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {3009const Loop *L = AddRec->getLoop();3010unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);3011CI[K].Coeff = AddRec->getStepRecurrence(*SE);3012CI[K].PosPart = getPositivePart(CI[K].Coeff);3013CI[K].NegPart = getNegativePart(CI[K].Coeff);3014CI[K].Iterations = collectUpperBound(L, Subscript->getType());3015Subscript = AddRec->getStart();3016}3017Constant = Subscript;3018#ifndef NDEBUG3019LLVM_DEBUG(dbgs() << "\tCoefficient Info\n");3020for (unsigned K = 1; K <= MaxLevels; ++K) {3021LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff);3022LLVM_DEBUG(dbgs() << "\tPos Part = ");3023LLVM_DEBUG(dbgs() << *CI[K].PosPart);3024LLVM_DEBUG(dbgs() << "\tNeg Part = ");3025LLVM_DEBUG(dbgs() << *CI[K].NegPart);3026LLVM_DEBUG(dbgs() << "\tUpper Bound = ");3027if (CI[K].Iterations)3028LLVM_DEBUG(dbgs() << *CI[K].Iterations);3029else3030LLVM_DEBUG(dbgs() << "+inf");3031LLVM_DEBUG(dbgs() << '\n');3032}3033LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n');3034#endif3035return CI;3036}303730383039// Looks through all the bounds info and3040// computes the lower bound given the current direction settings3041// at each level. If the lower bound for any level is -inf,3042// the result is -inf.3043const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const {3044const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];3045for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {3046if (Bound[K].Lower[Bound[K].Direction])3047Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);3048else3049Sum = nullptr;3050}3051return Sum;3052}305330543055// Looks through all the bounds info and3056// computes the upper bound given the current direction settings3057// at each level. If the upper bound at any level is +inf,3058// the result is +inf.3059const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const {3060const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];3061for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {3062if (Bound[K].Upper[Bound[K].Direction])3063Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);3064else3065Sum = nullptr;3066}3067return Sum;3068}306930703071//===----------------------------------------------------------------------===//3072// Constraint manipulation for Delta test.30733074// Given a linear SCEV,3075// return the coefficient (the step)3076// corresponding to the specified loop.3077// If there isn't one, return 0.3078// For example, given a*i + b*j + c*k, finding the coefficient3079// corresponding to the j loop would yield b.3080const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr,3081const Loop *TargetLoop) const {3082const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);3083if (!AddRec)3084return SE->getZero(Expr->getType());3085if (AddRec->getLoop() == TargetLoop)3086return AddRec->getStepRecurrence(*SE);3087return findCoefficient(AddRec->getStart(), TargetLoop);3088}308930903091// Given a linear SCEV,3092// return the SCEV given by zeroing out the coefficient3093// corresponding to the specified loop.3094// For example, given a*i + b*j + c*k, zeroing the coefficient3095// corresponding to the j loop would yield a*i + c*k.3096const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr,3097const Loop *TargetLoop) const {3098const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);3099if (!AddRec)3100return Expr; // ignore3101if (AddRec->getLoop() == TargetLoop)3102return AddRec->getStart();3103return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),3104AddRec->getStepRecurrence(*SE),3105AddRec->getLoop(),3106AddRec->getNoWrapFlags());3107}310831093110// Given a linear SCEV Expr,3111// return the SCEV given by adding some Value to the3112// coefficient corresponding to the specified TargetLoop.3113// For example, given a*i + b*j + c*k, adding 1 to the coefficient3114// corresponding to the j loop would yield a*i + (b+1)*j + c*k.3115const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr,3116const Loop *TargetLoop,3117const SCEV *Value) const {3118const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);3119if (!AddRec) // create a new addRec3120return SE->getAddRecExpr(Expr,3121Value,3122TargetLoop,3123SCEV::FlagAnyWrap); // Worst case, with no info.3124if (AddRec->getLoop() == TargetLoop) {3125const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);3126if (Sum->isZero())3127return AddRec->getStart();3128return SE->getAddRecExpr(AddRec->getStart(),3129Sum,3130AddRec->getLoop(),3131AddRec->getNoWrapFlags());3132}3133if (SE->isLoopInvariant(AddRec, TargetLoop))3134return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);3135return SE->getAddRecExpr(3136addToCoefficient(AddRec->getStart(), TargetLoop, Value),3137AddRec->getStepRecurrence(*SE), AddRec->getLoop(),3138AddRec->getNoWrapFlags());3139}314031413142// Review the constraints, looking for opportunities3143// to simplify a subscript pair (Src and Dst).3144// Return true if some simplification occurs.3145// If the simplification isn't exact (that is, if it is conservative3146// in terms of dependence), set consistent to false.3147// Corresponds to Figure 5 from the paper3148//3149// Practical Dependence Testing3150// Goff, Kennedy, Tseng3151// PLDI 19913152bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,3153SmallBitVector &Loops,3154SmallVectorImpl<Constraint> &Constraints,3155bool &Consistent) {3156bool Result = false;3157for (unsigned LI : Loops.set_bits()) {3158LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is");3159LLVM_DEBUG(Constraints[LI].dump(dbgs()));3160if (Constraints[LI].isDistance())3161Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);3162else if (Constraints[LI].isLine())3163Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);3164else if (Constraints[LI].isPoint())3165Result |= propagatePoint(Src, Dst, Constraints[LI]);3166}3167return Result;3168}316931703171// Attempt to propagate a distance3172// constraint into a subscript pair (Src and Dst).3173// Return true if some simplification occurs.3174// If the simplification isn't exact (that is, if it is conservative3175// in terms of dependence), set consistent to false.3176bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,3177Constraint &CurConstraint,3178bool &Consistent) {3179const Loop *CurLoop = CurConstraint.getAssociatedLoop();3180LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");3181const SCEV *A_K = findCoefficient(Src, CurLoop);3182if (A_K->isZero())3183return false;3184const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());3185Src = SE->getMinusSCEV(Src, DA_K);3186Src = zeroCoefficient(Src, CurLoop);3187LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");3188LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");3189Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));3190LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");3191if (!findCoefficient(Dst, CurLoop)->isZero())3192Consistent = false;3193return true;3194}319531963197// Attempt to propagate a line3198// constraint into a subscript pair (Src and Dst).3199// Return true if some simplification occurs.3200// If the simplification isn't exact (that is, if it is conservative3201// in terms of dependence), set consistent to false.3202bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,3203Constraint &CurConstraint,3204bool &Consistent) {3205const Loop *CurLoop = CurConstraint.getAssociatedLoop();3206const SCEV *A = CurConstraint.getA();3207const SCEV *B = CurConstraint.getB();3208const SCEV *C = CurConstraint.getC();3209LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C3210<< "\n");3211LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");3212LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");3213if (A->isZero()) {3214const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);3215const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);3216if (!Bconst || !Cconst) return false;3217APInt Beta = Bconst->getAPInt();3218APInt Charlie = Cconst->getAPInt();3219APInt CdivB = Charlie.sdiv(Beta);3220assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");3221const SCEV *AP_K = findCoefficient(Dst, CurLoop);3222// Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));3223Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));3224Dst = zeroCoefficient(Dst, CurLoop);3225if (!findCoefficient(Src, CurLoop)->isZero())3226Consistent = false;3227}3228else if (B->isZero()) {3229const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);3230const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);3231if (!Aconst || !Cconst) return false;3232APInt Alpha = Aconst->getAPInt();3233APInt Charlie = Cconst->getAPInt();3234APInt CdivA = Charlie.sdiv(Alpha);3235assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");3236const SCEV *A_K = findCoefficient(Src, CurLoop);3237Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));3238Src = zeroCoefficient(Src, CurLoop);3239if (!findCoefficient(Dst, CurLoop)->isZero())3240Consistent = false;3241}3242else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {3243const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);3244const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);3245if (!Aconst || !Cconst) return false;3246APInt Alpha = Aconst->getAPInt();3247APInt Charlie = Cconst->getAPInt();3248APInt CdivA = Charlie.sdiv(Alpha);3249assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");3250const SCEV *A_K = findCoefficient(Src, CurLoop);3251Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));3252Src = zeroCoefficient(Src, CurLoop);3253Dst = addToCoefficient(Dst, CurLoop, A_K);3254if (!findCoefficient(Dst, CurLoop)->isZero())3255Consistent = false;3256}3257else {3258// paper is incorrect here, or perhaps just misleading3259const SCEV *A_K = findCoefficient(Src, CurLoop);3260Src = SE->getMulExpr(Src, A);3261Dst = SE->getMulExpr(Dst, A);3262Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));3263Src = zeroCoefficient(Src, CurLoop);3264Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));3265if (!findCoefficient(Dst, CurLoop)->isZero())3266Consistent = false;3267}3268LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");3269LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");3270return true;3271}327232733274// Attempt to propagate a point3275// constraint into a subscript pair (Src and Dst).3276// Return true if some simplification occurs.3277bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,3278Constraint &CurConstraint) {3279const Loop *CurLoop = CurConstraint.getAssociatedLoop();3280const SCEV *A_K = findCoefficient(Src, CurLoop);3281const SCEV *AP_K = findCoefficient(Dst, CurLoop);3282const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());3283const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());3284LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");3285Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));3286Src = zeroCoefficient(Src, CurLoop);3287LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");3288LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");3289Dst = zeroCoefficient(Dst, CurLoop);3290LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");3291return true;3292}329332943295// Update direction vector entry based on the current constraint.3296void DependenceInfo::updateDirection(Dependence::DVEntry &Level,3297const Constraint &CurConstraint) const {3298LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint =");3299LLVM_DEBUG(CurConstraint.dump(dbgs()));3300if (CurConstraint.isAny())3301; // use defaults3302else if (CurConstraint.isDistance()) {3303// this one is consistent, the others aren't3304Level.Scalar = false;3305Level.Distance = CurConstraint.getD();3306unsigned NewDirection = Dependence::DVEntry::NONE;3307if (!SE->isKnownNonZero(Level.Distance)) // if may be zero3308NewDirection = Dependence::DVEntry::EQ;3309if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive3310NewDirection |= Dependence::DVEntry::LT;3311if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative3312NewDirection |= Dependence::DVEntry::GT;3313Level.Direction &= NewDirection;3314}3315else if (CurConstraint.isLine()) {3316Level.Scalar = false;3317Level.Distance = nullptr;3318// direction should be accurate3319}3320else if (CurConstraint.isPoint()) {3321Level.Scalar = false;3322Level.Distance = nullptr;3323unsigned NewDirection = Dependence::DVEntry::NONE;3324if (!isKnownPredicate(CmpInst::ICMP_NE,3325CurConstraint.getY(),3326CurConstraint.getX()))3327// if X may be = Y3328NewDirection |= Dependence::DVEntry::EQ;3329if (!isKnownPredicate(CmpInst::ICMP_SLE,3330CurConstraint.getY(),3331CurConstraint.getX()))3332// if Y may be > X3333NewDirection |= Dependence::DVEntry::LT;3334if (!isKnownPredicate(CmpInst::ICMP_SGE,3335CurConstraint.getY(),3336CurConstraint.getX()))3337// if Y may be < X3338NewDirection |= Dependence::DVEntry::GT;3339Level.Direction &= NewDirection;3340}3341else3342llvm_unreachable("constraint has unexpected kind");3343}33443345/// Check if we can delinearize the subscripts. If the SCEVs representing the3346/// source and destination array references are recurrences on a nested loop,3347/// this function flattens the nested recurrences into separate recurrences3348/// for each loop level.3349bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,3350SmallVectorImpl<Subscript> &Pair) {3351assert(isLoadOrStore(Src) && "instruction is not load or store");3352assert(isLoadOrStore(Dst) && "instruction is not load or store");3353Value *SrcPtr = getLoadStorePointerOperand(Src);3354Value *DstPtr = getLoadStorePointerOperand(Dst);3355Loop *SrcLoop = LI->getLoopFor(Src->getParent());3356Loop *DstLoop = LI->getLoopFor(Dst->getParent());3357const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);3358const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);3359const SCEVUnknown *SrcBase =3360dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));3361const SCEVUnknown *DstBase =3362dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));33633364if (!SrcBase || !DstBase || SrcBase != DstBase)3365return false;33663367SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;33683369if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,3370SrcSubscripts, DstSubscripts) &&3371!tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,3372SrcSubscripts, DstSubscripts))3373return false;33743375int Size = SrcSubscripts.size();3376LLVM_DEBUG({3377dbgs() << "\nSrcSubscripts: ";3378for (int I = 0; I < Size; I++)3379dbgs() << *SrcSubscripts[I];3380dbgs() << "\nDstSubscripts: ";3381for (int I = 0; I < Size; I++)3382dbgs() << *DstSubscripts[I];3383});33843385// The delinearization transforms a single-subscript MIV dependence test into3386// a multi-subscript SIV dependence test that is easier to compute. So we3387// resize Pair to contain as many pairs of subscripts as the delinearization3388// has found, and then initialize the pairs following the delinearization.3389Pair.resize(Size);3390for (int I = 0; I < Size; ++I) {3391Pair[I].Src = SrcSubscripts[I];3392Pair[I].Dst = DstSubscripts[I];3393unifySubscriptType(&Pair[I]);3394}33953396return true;3397}33983399/// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying3400/// arrays accessed are fixed-size arrays. Return true if delinearization was3401/// successful.3402bool DependenceInfo::tryDelinearizeFixedSize(3403Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,3404const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,3405SmallVectorImpl<const SCEV *> &DstSubscripts) {3406LLVM_DEBUG({3407const SCEVUnknown *SrcBase =3408dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));3409const SCEVUnknown *DstBase =3410dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));3411assert(SrcBase && DstBase && SrcBase == DstBase &&3412"expected src and dst scev unknowns to be equal");3413});34143415SmallVector<int, 4> SrcSizes;3416SmallVector<int, 4> DstSizes;3417if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts,3418SrcSizes) ||3419!tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts,3420DstSizes))3421return false;34223423// Check that the two size arrays are non-empty and equal in length and3424// value.3425if (SrcSizes.size() != DstSizes.size() ||3426!std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {3427SrcSubscripts.clear();3428DstSubscripts.clear();3429return false;3430}34313432assert(SrcSubscripts.size() == DstSubscripts.size() &&3433"Expected equal number of entries in the list of SrcSubscripts and "3434"DstSubscripts.");34353436Value *SrcPtr = getLoadStorePointerOperand(Src);3437Value *DstPtr = getLoadStorePointerOperand(Dst);34383439// In general we cannot safely assume that the subscripts recovered from GEPs3440// are in the range of values defined for their corresponding array3441// dimensions. For example some C language usage/interpretation make it3442// impossible to verify this at compile-time. As such we can only delinearize3443// iff the subscripts are positive and are less than the range of the3444// dimension.3445if (!DisableDelinearizationChecks) {3446auto AllIndicesInRange = [&](SmallVector<int, 4> &DimensionSizes,3447SmallVectorImpl<const SCEV *> &Subscripts,3448Value *Ptr) {3449size_t SSize = Subscripts.size();3450for (size_t I = 1; I < SSize; ++I) {3451const SCEV *S = Subscripts[I];3452if (!isKnownNonNegative(S, Ptr))3453return false;3454if (auto *SType = dyn_cast<IntegerType>(S->getType())) {3455const SCEV *Range = SE->getConstant(3456ConstantInt::get(SType, DimensionSizes[I - 1], false));3457if (!isKnownLessThan(S, Range))3458return false;3459}3460}3461return true;3462};34633464if (!AllIndicesInRange(SrcSizes, SrcSubscripts, SrcPtr) ||3465!AllIndicesInRange(DstSizes, DstSubscripts, DstPtr)) {3466SrcSubscripts.clear();3467DstSubscripts.clear();3468return false;3469}3470}3471LLVM_DEBUG({3472dbgs() << "Delinearized subscripts of fixed-size array\n"3473<< "SrcGEP:" << *SrcPtr << "\n"3474<< "DstGEP:" << *DstPtr << "\n";3475});3476return true;3477}34783479bool DependenceInfo::tryDelinearizeParametricSize(3480Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,3481const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,3482SmallVectorImpl<const SCEV *> &DstSubscripts) {34833484Value *SrcPtr = getLoadStorePointerOperand(Src);3485Value *DstPtr = getLoadStorePointerOperand(Dst);3486const SCEVUnknown *SrcBase =3487dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));3488const SCEVUnknown *DstBase =3489dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));3490assert(SrcBase && DstBase && SrcBase == DstBase &&3491"expected src and dst scev unknowns to be equal");34923493const SCEV *ElementSize = SE->getElementSize(Src);3494if (ElementSize != SE->getElementSize(Dst))3495return false;34963497const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);3498const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);34993500const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);3501const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);3502if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())3503return false;35043505// First step: collect parametric terms in both array references.3506SmallVector<const SCEV *, 4> Terms;3507collectParametricTerms(*SE, SrcAR, Terms);3508collectParametricTerms(*SE, DstAR, Terms);35093510// Second step: find subscript sizes.3511SmallVector<const SCEV *, 4> Sizes;3512findArrayDimensions(*SE, Terms, Sizes, ElementSize);35133514// Third step: compute the access functions for each subscript.3515computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);3516computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);35173518// Fail when there is only a subscript: that's a linearized access function.3519if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||3520SrcSubscripts.size() != DstSubscripts.size())3521return false;35223523size_t Size = SrcSubscripts.size();35243525// Statically check that the array bounds are in-range. The first subscript we3526// don't have a size for and it cannot overflow into another subscript, so is3527// always safe. The others need to be 0 <= subscript[i] < bound, for both src3528// and dst.3529// FIXME: It may be better to record these sizes and add them as constraints3530// to the dependency checks.3531if (!DisableDelinearizationChecks)3532for (size_t I = 1; I < Size; ++I) {3533if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))3534return false;35353536if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))3537return false;35383539if (!isKnownNonNegative(DstSubscripts[I], DstPtr))3540return false;35413542if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))3543return false;3544}35453546return true;3547}35483549//===----------------------------------------------------------------------===//35503551#ifndef NDEBUG3552// For debugging purposes, dump a small bit vector to dbgs().3553static void dumpSmallBitVector(SmallBitVector &BV) {3554dbgs() << "{";3555for (unsigned VI : BV.set_bits()) {3556dbgs() << VI;3557if (BV.find_next(VI) >= 0)3558dbgs() << ' ';3559}3560dbgs() << "}\n";3561}3562#endif35633564bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,3565FunctionAnalysisManager::Invalidator &Inv) {3566// Check if the analysis itself has been invalidated.3567auto PAC = PA.getChecker<DependenceAnalysis>();3568if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())3569return true;35703571// Check transitive dependencies.3572return Inv.invalidate<AAManager>(F, PA) ||3573Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||3574Inv.invalidate<LoopAnalysis>(F, PA);3575}35763577// depends -3578// Returns NULL if there is no dependence.3579// Otherwise, return a Dependence with as many details as possible.3580// Corresponds to Section 3.1 in the paper3581//3582// Practical Dependence Testing3583// Goff, Kennedy, Tseng3584// PLDI 19913585//3586// Care is required to keep the routine below, getSplitIteration(),3587// up to date with respect to this routine.3588std::unique_ptr<Dependence>3589DependenceInfo::depends(Instruction *Src, Instruction *Dst,3590bool PossiblyLoopIndependent) {3591if (Src == Dst)3592PossiblyLoopIndependent = false;35933594if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))3595// if both instructions don't reference memory, there's no dependence3596return nullptr;35973598if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {3599// can only analyze simple loads and stores, i.e., no calls, invokes, etc.3600LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");3601return std::make_unique<Dependence>(Src, Dst);3602}36033604assert(isLoadOrStore(Src) && "instruction is not load or store");3605assert(isLoadOrStore(Dst) && "instruction is not load or store");3606Value *SrcPtr = getLoadStorePointerOperand(Src);3607Value *DstPtr = getLoadStorePointerOperand(Dst);36083609switch (underlyingObjectsAlias(AA, F->getDataLayout(),3610MemoryLocation::get(Dst),3611MemoryLocation::get(Src))) {3612case AliasResult::MayAlias:3613case AliasResult::PartialAlias:3614// cannot analyse objects if we don't understand their aliasing.3615LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");3616return std::make_unique<Dependence>(Src, Dst);3617case AliasResult::NoAlias:3618// If the objects noalias, they are distinct, accesses are independent.3619LLVM_DEBUG(dbgs() << "no alias\n");3620return nullptr;3621case AliasResult::MustAlias:3622break; // The underlying objects alias; test accesses for dependence.3623}36243625// establish loop nesting levels3626establishNestingLevels(Src, Dst);3627LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");3628LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");36293630FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);3631++TotalArrayPairs;36323633unsigned Pairs = 1;3634SmallVector<Subscript, 2> Pair(Pairs);3635const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);3636const SCEV *DstSCEV = SE->getSCEV(DstPtr);3637LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");3638LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");3639if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) {3640// If two pointers have different bases, trying to analyze indexes won't3641// work; we can't compare them to each other. This can happen, for example,3642// if one is produced by an LCSSA PHI node.3643//3644// We check this upfront so we don't crash in cases where getMinusSCEV()3645// returns a SCEVCouldNotCompute.3646LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");3647return std::make_unique<Dependence>(Src, Dst);3648}3649Pair[0].Src = SrcSCEV;3650Pair[0].Dst = DstSCEV;36513652if (Delinearize) {3653if (tryDelinearize(Src, Dst, Pair)) {3654LLVM_DEBUG(dbgs() << " delinearized\n");3655Pairs = Pair.size();3656}3657}36583659for (unsigned P = 0; P < Pairs; ++P) {3660Pair[P].Loops.resize(MaxLevels + 1);3661Pair[P].GroupLoops.resize(MaxLevels + 1);3662Pair[P].Group.resize(Pairs);3663removeMatchingExtensions(&Pair[P]);3664Pair[P].Classification =3665classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),3666Pair[P].Dst, LI->getLoopFor(Dst->getParent()),3667Pair[P].Loops);3668Pair[P].GroupLoops = Pair[P].Loops;3669Pair[P].Group.set(P);3670LLVM_DEBUG(dbgs() << " subscript " << P << "\n");3671LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");3672LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");3673LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");3674LLVM_DEBUG(dbgs() << "\tloops = ");3675LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops));3676}36773678SmallBitVector Separable(Pairs);3679SmallBitVector Coupled(Pairs);36803681// Partition subscripts into separable and minimally-coupled groups3682// Algorithm in paper is algorithmically better;3683// this may be faster in practice. Check someday.3684//3685// Here's an example of how it works. Consider this code:3686//3687// for (i = ...) {3688// for (j = ...) {3689// for (k = ...) {3690// for (l = ...) {3691// for (m = ...) {3692// A[i][j][k][m] = ...;3693// ... = A[0][j][l][i + j];3694// }3695// }3696// }3697// }3698// }3699//3700// There are 4 subscripts here:3701// 0 [i] and [0]3702// 1 [j] and [j]3703// 2 [k] and [l]3704// 3 [m] and [i + j]3705//3706// We've already classified each subscript pair as ZIV, SIV, etc.,3707// and collected all the loops mentioned by pair P in Pair[P].Loops.3708// In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops3709// and set Pair[P].Group = {P}.3710//3711// Src Dst Classification Loops GroupLoops Group3712// 0 [i] [0] SIV {1} {1} {0}3713// 1 [j] [j] SIV {2} {2} {1}3714// 2 [k] [l] RDIV {3,4} {3,4} {2}3715// 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3}3716//3717// For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.3718// So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.3719//3720// We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.3721// Next, 0 and 2. Again, the intersection of their GroupLoops is empty.3722// Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,3723// so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added3724// to either Separable or Coupled).3725//3726// Next, we consider 1 and 2. The intersection of the GroupLoops is empty.3727// Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,3728// so Pair[3].Group = {0, 1, 3} and Done = false.3729//3730// Next, we compare 2 against 3. The intersection of the GroupLoops is empty.3731// Since Done remains true, we add 2 to the set of Separable pairs.3732//3733// Finally, we consider 3. There's nothing to compare it with,3734// so Done remains true and we add it to the Coupled set.3735// Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.3736//3737// In the end, we've got 1 separable subscript and 1 coupled group.3738for (unsigned SI = 0; SI < Pairs; ++SI) {3739if (Pair[SI].Classification == Subscript::NonLinear) {3740// ignore these, but collect loops for later3741++NonlinearSubscriptPairs;3742collectCommonLoops(Pair[SI].Src,3743LI->getLoopFor(Src->getParent()),3744Pair[SI].Loops);3745collectCommonLoops(Pair[SI].Dst,3746LI->getLoopFor(Dst->getParent()),3747Pair[SI].Loops);3748Result.Consistent = false;3749} else if (Pair[SI].Classification == Subscript::ZIV) {3750// always separable3751Separable.set(SI);3752}3753else {3754// SIV, RDIV, or MIV, so check for coupled group3755bool Done = true;3756for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {3757SmallBitVector Intersection = Pair[SI].GroupLoops;3758Intersection &= Pair[SJ].GroupLoops;3759if (Intersection.any()) {3760// accumulate set of all the loops in group3761Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;3762// accumulate set of all subscripts in group3763Pair[SJ].Group |= Pair[SI].Group;3764Done = false;3765}3766}3767if (Done) {3768if (Pair[SI].Group.count() == 1) {3769Separable.set(SI);3770++SeparableSubscriptPairs;3771}3772else {3773Coupled.set(SI);3774++CoupledSubscriptPairs;3775}3776}3777}3778}37793780LLVM_DEBUG(dbgs() << " Separable = ");3781LLVM_DEBUG(dumpSmallBitVector(Separable));3782LLVM_DEBUG(dbgs() << " Coupled = ");3783LLVM_DEBUG(dumpSmallBitVector(Coupled));37843785Constraint NewConstraint;3786NewConstraint.setAny(SE);37873788// test separable subscripts3789for (unsigned SI : Separable.set_bits()) {3790LLVM_DEBUG(dbgs() << "testing subscript " << SI);3791switch (Pair[SI].Classification) {3792case Subscript::ZIV:3793LLVM_DEBUG(dbgs() << ", ZIV\n");3794if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))3795return nullptr;3796break;3797case Subscript::SIV: {3798LLVM_DEBUG(dbgs() << ", SIV\n");3799unsigned Level;3800const SCEV *SplitIter = nullptr;3801if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,3802SplitIter))3803return nullptr;3804break;3805}3806case Subscript::RDIV:3807LLVM_DEBUG(dbgs() << ", RDIV\n");3808if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))3809return nullptr;3810break;3811case Subscript::MIV:3812LLVM_DEBUG(dbgs() << ", MIV\n");3813if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))3814return nullptr;3815break;3816default:3817llvm_unreachable("subscript has unexpected classification");3818}3819}38203821if (Coupled.count()) {3822// test coupled subscript groups3823LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n");3824LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");3825SmallVector<Constraint, 4> Constraints(MaxLevels + 1);3826for (unsigned II = 0; II <= MaxLevels; ++II)3827Constraints[II].setAny(SE);3828for (unsigned SI : Coupled.set_bits()) {3829LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { ");3830SmallBitVector Group(Pair[SI].Group);3831SmallBitVector Sivs(Pairs);3832SmallBitVector Mivs(Pairs);3833SmallBitVector ConstrainedLevels(MaxLevels + 1);3834SmallVector<Subscript *, 4> PairsInGroup;3835for (unsigned SJ : Group.set_bits()) {3836LLVM_DEBUG(dbgs() << SJ << " ");3837if (Pair[SJ].Classification == Subscript::SIV)3838Sivs.set(SJ);3839else3840Mivs.set(SJ);3841PairsInGroup.push_back(&Pair[SJ]);3842}3843unifySubscriptType(PairsInGroup);3844LLVM_DEBUG(dbgs() << "}\n");3845while (Sivs.any()) {3846bool Changed = false;3847for (unsigned SJ : Sivs.set_bits()) {3848LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");3849// SJ is an SIV subscript that's part of the current coupled group3850unsigned Level;3851const SCEV *SplitIter = nullptr;3852LLVM_DEBUG(dbgs() << "SIV\n");3853if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,3854SplitIter))3855return nullptr;3856ConstrainedLevels.set(Level);3857if (intersectConstraints(&Constraints[Level], &NewConstraint)) {3858if (Constraints[Level].isEmpty()) {3859++DeltaIndependence;3860return nullptr;3861}3862Changed = true;3863}3864Sivs.reset(SJ);3865}3866if (Changed) {3867// propagate, possibly creating new SIVs and ZIVs3868LLVM_DEBUG(dbgs() << " propagating\n");3869LLVM_DEBUG(dbgs() << "\tMivs = ");3870LLVM_DEBUG(dumpSmallBitVector(Mivs));3871for (unsigned SJ : Mivs.set_bits()) {3872// SJ is an MIV subscript that's part of the current coupled group3873LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n");3874if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,3875Constraints, Result.Consistent)) {3876LLVM_DEBUG(dbgs() << "\t Changed\n");3877++DeltaPropagations;3878Pair[SJ].Classification =3879classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),3880Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),3881Pair[SJ].Loops);3882switch (Pair[SJ].Classification) {3883case Subscript::ZIV:3884LLVM_DEBUG(dbgs() << "ZIV\n");3885if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))3886return nullptr;3887Mivs.reset(SJ);3888break;3889case Subscript::SIV:3890Sivs.set(SJ);3891Mivs.reset(SJ);3892break;3893case Subscript::RDIV:3894case Subscript::MIV:3895break;3896default:3897llvm_unreachable("bad subscript classification");3898}3899}3900}3901}3902}39033904// test & propagate remaining RDIVs3905for (unsigned SJ : Mivs.set_bits()) {3906if (Pair[SJ].Classification == Subscript::RDIV) {3907LLVM_DEBUG(dbgs() << "RDIV test\n");3908if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))3909return nullptr;3910// I don't yet understand how to propagate RDIV results3911Mivs.reset(SJ);3912}3913}39143915// test remaining MIVs3916// This code is temporary.3917// Better to somehow test all remaining subscripts simultaneously.3918for (unsigned SJ : Mivs.set_bits()) {3919if (Pair[SJ].Classification == Subscript::MIV) {3920LLVM_DEBUG(dbgs() << "MIV test\n");3921if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))3922return nullptr;3923}3924else3925llvm_unreachable("expected only MIV subscripts at this point");3926}39273928// update Result.DV from constraint vector3929LLVM_DEBUG(dbgs() << " updating\n");3930for (unsigned SJ : ConstrainedLevels.set_bits()) {3931if (SJ > CommonLevels)3932break;3933updateDirection(Result.DV[SJ - 1], Constraints[SJ]);3934if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)3935return nullptr;3936}3937}3938}39393940// Make sure the Scalar flags are set correctly.3941SmallBitVector CompleteLoops(MaxLevels + 1);3942for (unsigned SI = 0; SI < Pairs; ++SI)3943CompleteLoops |= Pair[SI].Loops;3944for (unsigned II = 1; II <= CommonLevels; ++II)3945if (CompleteLoops[II])3946Result.DV[II - 1].Scalar = false;39473948if (PossiblyLoopIndependent) {3949// Make sure the LoopIndependent flag is set correctly.3950// All directions must include equal, otherwise no3951// loop-independent dependence is possible.3952for (unsigned II = 1; II <= CommonLevels; ++II) {3953if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {3954Result.LoopIndependent = false;3955break;3956}3957}3958}3959else {3960// On the other hand, if all directions are equal and there's no3961// loop-independent dependence possible, then no dependence exists.3962bool AllEqual = true;3963for (unsigned II = 1; II <= CommonLevels; ++II) {3964if (Result.getDirection(II) != Dependence::DVEntry::EQ) {3965AllEqual = false;3966break;3967}3968}3969if (AllEqual)3970return nullptr;3971}39723973return std::make_unique<FullDependence>(std::move(Result));3974}39753976//===----------------------------------------------------------------------===//3977// getSplitIteration -3978// Rather than spend rarely-used space recording the splitting iteration3979// during the Weak-Crossing SIV test, we re-compute it on demand.3980// The re-computation is basically a repeat of the entire dependence test,3981// though simplified since we know that the dependence exists.3982// It's tedious, since we must go through all propagations, etc.3983//3984// Care is required to keep this code up to date with respect to the routine3985// above, depends().3986//3987// Generally, the dependence analyzer will be used to build3988// a dependence graph for a function (basically a map from instructions3989// to dependences). Looking for cycles in the graph shows us loops3990// that cannot be trivially vectorized/parallelized.3991//3992// We can try to improve the situation by examining all the dependences3993// that make up the cycle, looking for ones we can break.3994// Sometimes, peeling the first or last iteration of a loop will break3995// dependences, and we've got flags for those possibilities.3996// Sometimes, splitting a loop at some other iteration will do the trick,3997// and we've got a flag for that case. Rather than waste the space to3998// record the exact iteration (since we rarely know), we provide3999// a method that calculates the iteration. It's a drag that it must work4000// from scratch, but wonderful in that it's possible.4001//4002// Here's an example:4003//4004// for (i = 0; i < 10; i++)4005// A[i] = ...4006// ... = A[11 - i]4007//4008// There's a loop-carried flow dependence from the store to the load,4009// found by the weak-crossing SIV test. The dependence will have a flag,4010// indicating that the dependence can be broken by splitting the loop.4011// Calling getSplitIteration will return 5.4012// Splitting the loop breaks the dependence, like so:4013//4014// for (i = 0; i <= 5; i++)4015// A[i] = ...4016// ... = A[11 - i]4017// for (i = 6; i < 10; i++)4018// A[i] = ...4019// ... = A[11 - i]4020//4021// breaks the dependence and allows us to vectorize/parallelize4022// both loops.4023const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,4024unsigned SplitLevel) {4025assert(Dep.isSplitable(SplitLevel) &&4026"Dep should be splitable at SplitLevel");4027Instruction *Src = Dep.getSrc();4028Instruction *Dst = Dep.getDst();4029assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());4030assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());4031assert(isLoadOrStore(Src));4032assert(isLoadOrStore(Dst));4033Value *SrcPtr = getLoadStorePointerOperand(Src);4034Value *DstPtr = getLoadStorePointerOperand(Dst);4035assert(underlyingObjectsAlias(4036AA, F->getDataLayout(), MemoryLocation::get(Dst),4037MemoryLocation::get(Src)) == AliasResult::MustAlias);40384039// establish loop nesting levels4040establishNestingLevels(Src, Dst);40414042FullDependence Result(Src, Dst, false, CommonLevels);40434044unsigned Pairs = 1;4045SmallVector<Subscript, 2> Pair(Pairs);4046const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);4047const SCEV *DstSCEV = SE->getSCEV(DstPtr);4048Pair[0].Src = SrcSCEV;4049Pair[0].Dst = DstSCEV;40504051if (Delinearize) {4052if (tryDelinearize(Src, Dst, Pair)) {4053LLVM_DEBUG(dbgs() << " delinearized\n");4054Pairs = Pair.size();4055}4056}40574058for (unsigned P = 0; P < Pairs; ++P) {4059Pair[P].Loops.resize(MaxLevels + 1);4060Pair[P].GroupLoops.resize(MaxLevels + 1);4061Pair[P].Group.resize(Pairs);4062removeMatchingExtensions(&Pair[P]);4063Pair[P].Classification =4064classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),4065Pair[P].Dst, LI->getLoopFor(Dst->getParent()),4066Pair[P].Loops);4067Pair[P].GroupLoops = Pair[P].Loops;4068Pair[P].Group.set(P);4069}40704071SmallBitVector Separable(Pairs);4072SmallBitVector Coupled(Pairs);40734074// partition subscripts into separable and minimally-coupled groups4075for (unsigned SI = 0; SI < Pairs; ++SI) {4076if (Pair[SI].Classification == Subscript::NonLinear) {4077// ignore these, but collect loops for later4078collectCommonLoops(Pair[SI].Src,4079LI->getLoopFor(Src->getParent()),4080Pair[SI].Loops);4081collectCommonLoops(Pair[SI].Dst,4082LI->getLoopFor(Dst->getParent()),4083Pair[SI].Loops);4084Result.Consistent = false;4085}4086else if (Pair[SI].Classification == Subscript::ZIV)4087Separable.set(SI);4088else {4089// SIV, RDIV, or MIV, so check for coupled group4090bool Done = true;4091for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {4092SmallBitVector Intersection = Pair[SI].GroupLoops;4093Intersection &= Pair[SJ].GroupLoops;4094if (Intersection.any()) {4095// accumulate set of all the loops in group4096Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;4097// accumulate set of all subscripts in group4098Pair[SJ].Group |= Pair[SI].Group;4099Done = false;4100}4101}4102if (Done) {4103if (Pair[SI].Group.count() == 1)4104Separable.set(SI);4105else4106Coupled.set(SI);4107}4108}4109}41104111Constraint NewConstraint;4112NewConstraint.setAny(SE);41134114// test separable subscripts4115for (unsigned SI : Separable.set_bits()) {4116switch (Pair[SI].Classification) {4117case Subscript::SIV: {4118unsigned Level;4119const SCEV *SplitIter = nullptr;4120(void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,4121Result, NewConstraint, SplitIter);4122if (Level == SplitLevel) {4123assert(SplitIter != nullptr);4124return SplitIter;4125}4126break;4127}4128case Subscript::ZIV:4129case Subscript::RDIV:4130case Subscript::MIV:4131break;4132default:4133llvm_unreachable("subscript has unexpected classification");4134}4135}41364137if (Coupled.count()) {4138// test coupled subscript groups4139SmallVector<Constraint, 4> Constraints(MaxLevels + 1);4140for (unsigned II = 0; II <= MaxLevels; ++II)4141Constraints[II].setAny(SE);4142for (unsigned SI : Coupled.set_bits()) {4143SmallBitVector Group(Pair[SI].Group);4144SmallBitVector Sivs(Pairs);4145SmallBitVector Mivs(Pairs);4146SmallBitVector ConstrainedLevels(MaxLevels + 1);4147for (unsigned SJ : Group.set_bits()) {4148if (Pair[SJ].Classification == Subscript::SIV)4149Sivs.set(SJ);4150else4151Mivs.set(SJ);4152}4153while (Sivs.any()) {4154bool Changed = false;4155for (unsigned SJ : Sivs.set_bits()) {4156// SJ is an SIV subscript that's part of the current coupled group4157unsigned Level;4158const SCEV *SplitIter = nullptr;4159(void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,4160Result, NewConstraint, SplitIter);4161if (Level == SplitLevel && SplitIter)4162return SplitIter;4163ConstrainedLevels.set(Level);4164if (intersectConstraints(&Constraints[Level], &NewConstraint))4165Changed = true;4166Sivs.reset(SJ);4167}4168if (Changed) {4169// propagate, possibly creating new SIVs and ZIVs4170for (unsigned SJ : Mivs.set_bits()) {4171// SJ is an MIV subscript that's part of the current coupled group4172if (propagate(Pair[SJ].Src, Pair[SJ].Dst,4173Pair[SJ].Loops, Constraints, Result.Consistent)) {4174Pair[SJ].Classification =4175classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),4176Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),4177Pair[SJ].Loops);4178switch (Pair[SJ].Classification) {4179case Subscript::ZIV:4180Mivs.reset(SJ);4181break;4182case Subscript::SIV:4183Sivs.set(SJ);4184Mivs.reset(SJ);4185break;4186case Subscript::RDIV:4187case Subscript::MIV:4188break;4189default:4190llvm_unreachable("bad subscript classification");4191}4192}4193}4194}4195}4196}4197}4198llvm_unreachable("somehow reached end of routine");4199return nullptr;4200}420142024203