Path: blob/main/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
35266 views
//===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements a pass to prepare loops for ppc preferred addressing9// modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with10// update)11// Additional PHIs are created for loop induction variables used by load/store12// instructions so that preferred addressing modes can be used.13//14// 1: DS/DQ form preparation, prepare the load/store instructions so that they15// can satisfy the DS/DQ form displacement requirements.16// Generically, this means transforming loops like this:17// for (int i = 0; i < n; ++i) {18// unsigned long x1 = *(unsigned long *)(p + i + 5);19// unsigned long x2 = *(unsigned long *)(p + i + 9);20// }21//22// to look like this:23//24// unsigned NewP = p + 5;25// for (int i = 0; i < n; ++i) {26// unsigned long x1 = *(unsigned long *)(i + NewP);27// unsigned long x2 = *(unsigned long *)(i + NewP + 4);28// }29//30// 2: D/DS form with update preparation, prepare the load/store instructions so31// that we can use update form to do pre-increment.32// Generically, this means transforming loops like this:33// for (int i = 0; i < n; ++i)34// array[i] = c;35//36// to look like this:37//38// T *p = array[-1];39// for (int i = 0; i < n; ++i)40// *++p = c;41//42// 3: common multiple chains for the load/stores with same offsets in the loop,43// so that we can reuse the offsets and reduce the register pressure in the44// loop. This transformation can also increase the loop ILP as now each chain45// uses its own loop induction add/addi. But this will increase the number of46// add/addi in the loop.47//48// Generically, this means transforming loops like this:49//50// char *p;51// A1 = p + base152// A2 = p + base1 + offset53// B1 = p + base254// B2 = p + base2 + offset55//56// for (int i = 0; i < n; i++)57// unsigned long x1 = *(unsigned long *)(A1 + i);58// unsigned long x2 = *(unsigned long *)(A2 + i)59// unsigned long x3 = *(unsigned long *)(B1 + i);60// unsigned long x4 = *(unsigned long *)(B2 + i);61// }62//63// to look like this:64//65// A1_new = p + base1 // chain 166// B1_new = p + base2 // chain 2, now inside the loop, common offset is67// // reused.68//69// for (long long i = 0; i < n; i+=count) {70// unsigned long x1 = *(unsigned long *)(A1_new + i);71// unsigned long x2 = *(unsigned long *)((A1_new + i) + offset);72// unsigned long x3 = *(unsigned long *)(B1_new + i);73// unsigned long x4 = *(unsigned long *)((B1_new + i) + offset);74// }75//===----------------------------------------------------------------------===//7677#include "PPC.h"78#include "PPCSubtarget.h"79#include "PPCTargetMachine.h"80#include "llvm/ADT/DepthFirstIterator.h"81#include "llvm/ADT/SmallPtrSet.h"82#include "llvm/ADT/SmallSet.h"83#include "llvm/ADT/SmallVector.h"84#include "llvm/ADT/Statistic.h"85#include "llvm/Analysis/LoopInfo.h"86#include "llvm/Analysis/ScalarEvolution.h"87#include "llvm/Analysis/ScalarEvolutionExpressions.h"88#include "llvm/IR/BasicBlock.h"89#include "llvm/IR/CFG.h"90#include "llvm/IR/Dominators.h"91#include "llvm/IR/Instruction.h"92#include "llvm/IR/Instructions.h"93#include "llvm/IR/IntrinsicInst.h"94#include "llvm/IR/IntrinsicsPowerPC.h"95#include "llvm/IR/Module.h"96#include "llvm/IR/Type.h"97#include "llvm/IR/Value.h"98#include "llvm/InitializePasses.h"99#include "llvm/Pass.h"100#include "llvm/Support/Casting.h"101#include "llvm/Support/CommandLine.h"102#include "llvm/Support/Debug.h"103#include "llvm/Transforms/Scalar.h"104#include "llvm/Transforms/Utils.h"105#include "llvm/Transforms/Utils/BasicBlockUtils.h"106#include "llvm/Transforms/Utils/Local.h"107#include "llvm/Transforms/Utils/LoopUtils.h"108#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"109#include <cassert>110#include <cmath>111#include <iterator>112#include <utility>113114#define DEBUG_TYPE "ppc-loop-instr-form-prep"115116using namespace llvm;117118static cl::opt<unsigned>119MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24),120cl::desc("Potential common base number threshold per function "121"for PPC loop prep"));122123static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",124cl::init(true), cl::Hidden,125cl::desc("prefer update form when ds form is also a update form"));126127static cl::opt<bool> EnableUpdateFormForNonConstInc(128"ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden,129cl::desc("prepare update form when the load/store increment is a loop "130"invariant non-const value."));131132static cl::opt<bool> EnableChainCommoning(133"ppc-formprep-chain-commoning", cl::init(false), cl::Hidden,134cl::desc("Enable chain commoning in PPC loop prepare pass."));135136// Sum of following 3 per loop thresholds for all loops can not be larger137// than MaxVarsPrep.138// now the thresholds for each kind prep are exterimental values on Power9.139static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars",140cl::Hidden, cl::init(3),141cl::desc("Potential PHI threshold per loop for PPC loop prep of update "142"form"));143144static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars",145cl::Hidden, cl::init(3),146cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form"));147148static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",149cl::Hidden, cl::init(8),150cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));151152// Commoning chain will reduce the register pressure, so we don't consider about153// the PHI nodes number.154// But commoning chain will increase the addi/add number in the loop and also155// increase loop ILP. Maximum chain number should be same with hardware156// IssueWidth, because we won't benefit from ILP if the parallel chains number157// is bigger than IssueWidth. We assume there are 2 chains in one bucket, so158// there would be 4 buckets at most on P9(IssueWidth is 8).159static cl::opt<unsigned> MaxVarsChainCommon(160"ppc-chaincommon-max-vars", cl::Hidden, cl::init(4),161cl::desc("Bucket number per loop for PPC loop chain common"));162163// If would not be profitable if the common base has only one load/store, ISEL164// should already be able to choose best load/store form based on offset for165// single load/store. Set minimal profitable value default to 2 and make it as166// an option.167static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",168cl::Hidden, cl::init(2),169cl::desc("Minimal common base load/store instructions triggering DS/DQ form "170"preparation"));171172static cl::opt<unsigned> ChainCommonPrepMinThreshold(173"ppc-chaincommon-min-threshold", cl::Hidden, cl::init(4),174cl::desc("Minimal common base load/store instructions triggering chain "175"commoning preparation. Must be not smaller than 4"));176177STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form");178STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form");179STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form");180STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten");181STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten");182STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");183STATISTIC(ChainCommoningRewritten, "Num of commoning chains");184185namespace {186struct BucketElement {187BucketElement(const SCEV *O, Instruction *I) : Offset(O), Instr(I) {}188BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}189190const SCEV *Offset;191Instruction *Instr;192};193194struct Bucket {195Bucket(const SCEV *B, Instruction *I)196: BaseSCEV(B), Elements(1, BucketElement(I)) {197ChainSize = 0;198}199200// The base of the whole bucket.201const SCEV *BaseSCEV;202203// All elements in the bucket. In the bucket, the element with the BaseSCEV204// has no offset and all other elements are stored as offsets to the205// BaseSCEV.206SmallVector<BucketElement, 16> Elements;207208// The potential chains size. This is used for chain commoning only.209unsigned ChainSize;210211// The base for each potential chain. This is used for chain commoning only.212SmallVector<BucketElement, 16> ChainBases;213};214215// "UpdateForm" is not a real PPC instruction form, it stands for dform216// load/store with update like ldu/stdu, or Prefetch intrinsic.217// For DS form instructions, their displacements must be multiple of 4.218// For DQ form instructions, their displacements must be multiple of 16.219enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };220221class PPCLoopInstrFormPrep : public FunctionPass {222public:223static char ID; // Pass ID, replacement for typeid224225PPCLoopInstrFormPrep() : FunctionPass(ID) {226initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());227}228229PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {230initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry());231}232233void getAnalysisUsage(AnalysisUsage &AU) const override {234AU.addPreserved<DominatorTreeWrapperPass>();235AU.addRequired<LoopInfoWrapperPass>();236AU.addPreserved<LoopInfoWrapperPass>();237AU.addRequired<ScalarEvolutionWrapperPass>();238}239240bool runOnFunction(Function &F) override;241242private:243PPCTargetMachine *TM = nullptr;244const PPCSubtarget *ST;245DominatorTree *DT;246LoopInfo *LI;247ScalarEvolution *SE;248bool PreserveLCSSA;249bool HasCandidateForPrepare;250251/// Successful preparation number for Update/DS/DQ form in all inner most252/// loops. One successful preparation will put one common base out of loop,253/// this may leads to register presure like LICM does.254/// Make sure total preparation number can be controlled by option.255unsigned SuccPrepCount;256257bool runOnLoop(Loop *L);258259/// Check if required PHI node is already exist in Loop \p L.260bool alreadyPrepared(Loop *L, Instruction *MemI,261const SCEV *BasePtrStartSCEV,262const SCEV *BasePtrIncSCEV, PrepForm Form);263264/// Get the value which defines the increment SCEV \p BasePtrIncSCEV.265Value *getNodeForInc(Loop *L, Instruction *MemI,266const SCEV *BasePtrIncSCEV);267268/// Common chains to reuse offsets for a loop to reduce register pressure.269bool chainCommoning(Loop *L, SmallVector<Bucket, 16> &Buckets);270271/// Find out the potential commoning chains and their bases.272bool prepareBasesForCommoningChains(Bucket &BucketChain);273274/// Rewrite load/store according to the common chains.275bool276rewriteLoadStoresForCommoningChains(Loop *L, Bucket &Bucket,277SmallSet<BasicBlock *, 16> &BBChanged);278279/// Collect condition matched(\p isValidCandidate() returns true)280/// candidates in Loop \p L.281SmallVector<Bucket, 16> collectCandidates(282Loop *L,283std::function<bool(const Instruction *, Value *, const Type *)>284isValidCandidate,285std::function<bool(const SCEV *)> isValidDiff,286unsigned MaxCandidateNum);287288/// Add a candidate to candidates \p Buckets if diff between candidate and289/// one base in \p Buckets matches \p isValidDiff.290void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,291SmallVector<Bucket, 16> &Buckets,292std::function<bool(const SCEV *)> isValidDiff,293unsigned MaxCandidateNum);294295/// Prepare all candidates in \p Buckets for update form.296bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets);297298/// Prepare all candidates in \p Buckets for displacement form, now for299/// ds/dq.300bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, PrepForm Form);301302/// Prepare for one chain \p BucketChain, find the best base element and303/// update all other elements in \p BucketChain accordingly.304/// \p Form is used to find the best base element.305/// If success, best base element must be stored as the first element of306/// \p BucketChain.307/// Return false if no base element found, otherwise return true.308bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form);309310/// Prepare for one chain \p BucketChain, find the best base element and311/// update all other elements in \p BucketChain accordingly.312/// If success, best base element must be stored as the first element of313/// \p BucketChain.314/// Return false if no base element found, otherwise return true.315bool prepareBaseForUpdateFormChain(Bucket &BucketChain);316317/// Rewrite load/store instructions in \p BucketChain according to318/// preparation.319bool rewriteLoadStores(Loop *L, Bucket &BucketChain,320SmallSet<BasicBlock *, 16> &BBChanged,321PrepForm Form);322323/// Rewrite for the base load/store of a chain.324std::pair<Instruction *, Instruction *>325rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,326Instruction *BaseMemI, bool CanPreInc, PrepForm Form,327SCEVExpander &SCEVE, SmallPtrSet<Value *, 16> &DeletedPtrs);328329/// Rewrite for the other load/stores of a chain according to the new \p330/// Base.331Instruction *332rewriteForBucketElement(std::pair<Instruction *, Instruction *> Base,333const BucketElement &Element, Value *OffToBase,334SmallPtrSet<Value *, 16> &DeletedPtrs);335};336337} // end anonymous namespace338339char PPCLoopInstrFormPrep::ID = 0;340static const char *name = "Prepare loop for ppc preferred instruction forms";341INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)342INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)343INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)344INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false)345346static constexpr StringRef PHINodeNameSuffix = ".phi";347static constexpr StringRef CastNodeNameSuffix = ".cast";348static constexpr StringRef GEPNodeIncNameSuffix = ".inc";349static constexpr StringRef GEPNodeOffNameSuffix = ".off";350351FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) {352return new PPCLoopInstrFormPrep(TM);353}354355static bool IsPtrInBounds(Value *BasePtr) {356Value *StrippedBasePtr = BasePtr;357while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr))358StrippedBasePtr = BC->getOperand(0);359if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr))360return GEP->isInBounds();361362return false;363}364365static std::string getInstrName(const Value *I, StringRef Suffix) {366assert(I && "Invalid paramater!");367if (I->hasName())368return (I->getName() + Suffix).str();369else370return "";371}372373static Value *getPointerOperandAndType(Value *MemI,374Type **PtrElementType = nullptr) {375376Value *PtrValue = nullptr;377Type *PointerElementType = nullptr;378379if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {380PtrValue = LMemI->getPointerOperand();381PointerElementType = LMemI->getType();382} else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {383PtrValue = SMemI->getPointerOperand();384PointerElementType = SMemI->getValueOperand()->getType();385} else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {386PointerElementType = Type::getInt8Ty(MemI->getContext());387if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||388IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {389PtrValue = IMemI->getArgOperand(0);390} else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {391PtrValue = IMemI->getArgOperand(1);392}393}394/*Get ElementType if PtrElementType is not null.*/395if (PtrElementType)396*PtrElementType = PointerElementType;397398return PtrValue;399}400401bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {402if (skipFunction(F))403return false;404405LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();406SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();407auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();408DT = DTWP ? &DTWP->getDomTree() : nullptr;409PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);410ST = TM ? TM->getSubtargetImpl(F) : nullptr;411SuccPrepCount = 0;412413bool MadeChange = false;414415for (Loop *I : *LI)416for (Loop *L : depth_first(I))417MadeChange |= runOnLoop(L);418419return MadeChange;420}421422// Finding the minimal(chain_number + reusable_offset_number) is a complicated423// algorithmic problem.424// For now, the algorithm used here is simply adjusted to handle the case for425// manually unrolling cases.426// FIXME: use a more powerful algorithm to find minimal sum of chain_number and427// reusable_offset_number for one base with multiple offsets.428bool PPCLoopInstrFormPrep::prepareBasesForCommoningChains(Bucket &CBucket) {429// The minimal size for profitable chain commoning:430// A1 = base + offset1431// A2 = base + offset2 (offset2 - offset1 = X)432// A3 = base + offset3433// A4 = base + offset4 (offset4 - offset3 = X)434// ======>435// base1 = base + offset1436// base2 = base + offset3437// A1 = base1438// A2 = base1 + X439// A3 = base2440// A4 = base2 + X441//442// There is benefit because of reuse of offest 'X'.443444assert(ChainCommonPrepMinThreshold >= 4 &&445"Thredhold can not be smaller than 4!\n");446if (CBucket.Elements.size() < ChainCommonPrepMinThreshold)447return false;448449// We simply select the FirstOffset as the first reusable offset between each450// chain element 1 and element 0.451const SCEV *FirstOffset = CBucket.Elements[1].Offset;452453// Figure out how many times above FirstOffset is used in the chain.454// For a success commoning chain candidate, offset difference between each455// chain element 1 and element 0 must be also FirstOffset.456unsigned FirstOffsetReusedCount = 1;457458// Figure out how many times above FirstOffset is used in the first chain.459// Chain number is FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain460unsigned FirstOffsetReusedCountInFirstChain = 1;461462unsigned EleNum = CBucket.Elements.size();463bool SawChainSeparater = false;464for (unsigned j = 2; j != EleNum; ++j) {465if (SE->getMinusSCEV(CBucket.Elements[j].Offset,466CBucket.Elements[j - 1].Offset) == FirstOffset) {467if (!SawChainSeparater)468FirstOffsetReusedCountInFirstChain++;469FirstOffsetReusedCount++;470} else471// For now, if we meet any offset which is not FirstOffset, we assume we472// find a new Chain.473// This makes us miss some opportunities.474// For example, we can common:475//476// {OffsetA, Offset A, OffsetB, OffsetA, OffsetA, OffsetB}477//478// as two chains:479// {{OffsetA, Offset A, OffsetB}, {OffsetA, OffsetA, OffsetB}}480// FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 2481//482// But we fail to common:483//484// {OffsetA, OffsetB, OffsetA, OffsetA, OffsetB, OffsetA}485// FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 1486487SawChainSeparater = true;488}489490// FirstOffset is not reused, skip this bucket.491if (FirstOffsetReusedCount == 1)492return false;493494unsigned ChainNum =495FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain;496497// All elements are increased by FirstOffset.498// The number of chains should be sqrt(EleNum).499if (!SawChainSeparater)500ChainNum = (unsigned)sqrt((double)EleNum);501502CBucket.ChainSize = (unsigned)(EleNum / ChainNum);503504// If this is not a perfect chain(eg: not all elements can be put inside505// commoning chains.), skip now.506if (CBucket.ChainSize * ChainNum != EleNum)507return false;508509if (SawChainSeparater) {510// Check that the offset seqs are the same for all chains.511for (unsigned i = 1; i < CBucket.ChainSize; i++)512for (unsigned j = 1; j < ChainNum; j++)513if (CBucket.Elements[i].Offset !=514SE->getMinusSCEV(CBucket.Elements[i + j * CBucket.ChainSize].Offset,515CBucket.Elements[j * CBucket.ChainSize].Offset))516return false;517}518519for (unsigned i = 0; i < ChainNum; i++)520CBucket.ChainBases.push_back(CBucket.Elements[i * CBucket.ChainSize]);521522LLVM_DEBUG(dbgs() << "Bucket has " << ChainNum << " chains.\n");523524return true;525}526527bool PPCLoopInstrFormPrep::chainCommoning(Loop *L,528SmallVector<Bucket, 16> &Buckets) {529bool MadeChange = false;530531if (Buckets.empty())532return MadeChange;533534SmallSet<BasicBlock *, 16> BBChanged;535536for (auto &Bucket : Buckets) {537if (prepareBasesForCommoningChains(Bucket))538MadeChange |= rewriteLoadStoresForCommoningChains(L, Bucket, BBChanged);539}540541if (MadeChange)542for (auto *BB : BBChanged)543DeleteDeadPHIs(BB);544return MadeChange;545}546547bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(548Loop *L, Bucket &Bucket, SmallSet<BasicBlock *, 16> &BBChanged) {549bool MadeChange = false;550551assert(Bucket.Elements.size() ==552Bucket.ChainBases.size() * Bucket.ChainSize &&553"invalid bucket for chain commoning!\n");554SmallPtrSet<Value *, 16> DeletedPtrs;555556BasicBlock *Header = L->getHeader();557BasicBlock *LoopPredecessor = L->getLoopPredecessor();558559SCEVExpander SCEVE(*SE, Header->getDataLayout(),560"loopprepare-chaincommon");561562for (unsigned ChainIdx = 0; ChainIdx < Bucket.ChainBases.size(); ++ChainIdx) {563unsigned BaseElemIdx = Bucket.ChainSize * ChainIdx;564const SCEV *BaseSCEV =565ChainIdx ? SE->getAddExpr(Bucket.BaseSCEV,566Bucket.Elements[BaseElemIdx].Offset)567: Bucket.BaseSCEV;568const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV);569570// Make sure the base is able to expand.571if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart()))572return MadeChange;573574assert(BasePtrSCEV->isAffine() &&575"Invalid SCEV type for the base ptr for a candidate chain!\n");576577std::pair<Instruction *, Instruction *> Base = rewriteForBase(578L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,579false /* CanPreInc */, ChainCommoning, SCEVE, DeletedPtrs);580581if (!Base.first || !Base.second)582return MadeChange;583584// Keep track of the replacement pointer values we've inserted so that we585// don't generate more pointer values than necessary.586SmallPtrSet<Value *, 16> NewPtrs;587NewPtrs.insert(Base.first);588589for (unsigned Idx = BaseElemIdx + 1; Idx < BaseElemIdx + Bucket.ChainSize;590++Idx) {591BucketElement &I = Bucket.Elements[Idx];592Value *Ptr = getPointerOperandAndType(I.Instr);593assert(Ptr && "No pointer operand");594if (NewPtrs.count(Ptr))595continue;596597const SCEV *OffsetSCEV =598BaseElemIdx ? SE->getMinusSCEV(Bucket.Elements[Idx].Offset,599Bucket.Elements[BaseElemIdx].Offset)600: Bucket.Elements[Idx].Offset;601602// Make sure offset is able to expand. Only need to check one time as the603// offsets are reused between different chains.604if (!BaseElemIdx)605if (!SCEVE.isSafeToExpand(OffsetSCEV))606return false;607608Value *OffsetValue = SCEVE.expandCodeFor(609OffsetSCEV, OffsetSCEV->getType(), LoopPredecessor->getTerminator());610611Instruction *NewPtr = rewriteForBucketElement(Base, Bucket.Elements[Idx],612OffsetValue, DeletedPtrs);613614assert(NewPtr && "Wrong rewrite!\n");615NewPtrs.insert(NewPtr);616}617618++ChainCommoningRewritten;619}620621// Clear the rewriter cache, because values that are in the rewriter's cache622// can be deleted below, causing the AssertingVH in the cache to trigger.623SCEVE.clear();624625for (auto *Ptr : DeletedPtrs) {626if (Instruction *IDel = dyn_cast<Instruction>(Ptr))627BBChanged.insert(IDel->getParent());628RecursivelyDeleteTriviallyDeadInstructions(Ptr);629}630631MadeChange = true;632return MadeChange;633}634635// Rewrite the new base according to BasePtrSCEV.636// bb.loop.preheader:637// %newstart = ...638// bb.loop.body:639// %phinode = phi [ %newstart, %bb.loop.preheader ], [ %add, %bb.loop.body ]640// ...641// %add = getelementptr %phinode, %inc642//643// First returned instruciton is %phinode (or a type cast to %phinode), caller644// needs this value to rewrite other load/stores in the same chain.645// Second returned instruction is %add, caller needs this value to rewrite other646// load/stores in the same chain.647std::pair<Instruction *, Instruction *>648PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,649Instruction *BaseMemI, bool CanPreInc,650PrepForm Form, SCEVExpander &SCEVE,651SmallPtrSet<Value *, 16> &DeletedPtrs) {652653LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");654655assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");656657Value *BasePtr = getPointerOperandAndType(BaseMemI);658assert(BasePtr && "No pointer operand");659660Type *I8Ty = Type::getInt8Ty(BaseMemI->getParent()->getContext());661Type *I8PtrTy =662PointerType::get(BaseMemI->getParent()->getContext(),663BasePtr->getType()->getPointerAddressSpace());664665bool IsConstantInc = false;666const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);667Value *IncNode = getNodeForInc(L, BaseMemI, BasePtrIncSCEV);668669const SCEVConstant *BasePtrIncConstantSCEV =670dyn_cast<SCEVConstant>(BasePtrIncSCEV);671if (BasePtrIncConstantSCEV)672IsConstantInc = true;673674// No valid representation for the increment.675if (!IncNode) {676LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n");677return std::make_pair(nullptr, nullptr);678}679680if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) {681LLVM_DEBUG(682dbgs()683<< "Update form prepare for non-const increment is not enabled!\n");684return std::make_pair(nullptr, nullptr);685}686687const SCEV *BasePtrStartSCEV = nullptr;688if (CanPreInc) {689assert(SE->isLoopInvariant(BasePtrIncSCEV, L) &&690"Increment is not loop invariant!\n");691BasePtrStartSCEV = SE->getMinusSCEV(BasePtrSCEV->getStart(),692IsConstantInc ? BasePtrIncConstantSCEV693: BasePtrIncSCEV);694} else695BasePtrStartSCEV = BasePtrSCEV->getStart();696697if (alreadyPrepared(L, BaseMemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) {698LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n");699return std::make_pair(nullptr, nullptr);700}701702LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");703704BasicBlock *Header = L->getHeader();705unsigned HeaderLoopPredCount = pred_size(Header);706BasicBlock *LoopPredecessor = L->getLoopPredecessor();707708PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,709getInstrName(BaseMemI, PHINodeNameSuffix));710NewPHI->insertBefore(Header->getFirstNonPHIIt());711712Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,713LoopPredecessor->getTerminator());714715// Note that LoopPredecessor might occur in the predecessor list multiple716// times, and we need to add it the right number of times.717for (auto *PI : predecessors(Header)) {718if (PI != LoopPredecessor)719continue;720721NewPHI->addIncoming(BasePtrStart, LoopPredecessor);722}723724Instruction *PtrInc = nullptr;725Instruction *NewBasePtr = nullptr;726if (CanPreInc) {727BasicBlock::iterator InsPoint = Header->getFirstInsertionPt();728PtrInc = GetElementPtrInst::Create(729I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),730InsPoint);731cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));732for (auto *PI : predecessors(Header)) {733if (PI == LoopPredecessor)734continue;735736NewPHI->addIncoming(PtrInc, PI);737}738if (PtrInc->getType() != BasePtr->getType())739NewBasePtr =740new BitCastInst(PtrInc, BasePtr->getType(),741getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);742else743NewBasePtr = PtrInc;744} else {745// Note that LoopPredecessor might occur in the predecessor list multiple746// times, and we need to make sure no more incoming value for them in PHI.747for (auto *PI : predecessors(Header)) {748if (PI == LoopPredecessor)749continue;750751// For the latch predecessor, we need to insert a GEP just before the752// terminator to increase the address.753BasicBlock *BB = PI;754BasicBlock::iterator InsPoint = BB->getTerminator()->getIterator();755PtrInc = GetElementPtrInst::Create(756I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),757InsPoint);758cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));759760NewPHI->addIncoming(PtrInc, PI);761}762PtrInc = NewPHI;763if (NewPHI->getType() != BasePtr->getType())764NewBasePtr = new BitCastInst(NewPHI, BasePtr->getType(),765getInstrName(NewPHI, CastNodeNameSuffix),766Header->getFirstInsertionPt());767else768NewBasePtr = NewPHI;769}770771BasePtr->replaceAllUsesWith(NewBasePtr);772773DeletedPtrs.insert(BasePtr);774775return std::make_pair(NewBasePtr, PtrInc);776}777778Instruction *PPCLoopInstrFormPrep::rewriteForBucketElement(779std::pair<Instruction *, Instruction *> Base, const BucketElement &Element,780Value *OffToBase, SmallPtrSet<Value *, 16> &DeletedPtrs) {781Instruction *NewBasePtr = Base.first;782Instruction *PtrInc = Base.second;783assert((NewBasePtr && PtrInc) && "base does not exist!\n");784785Type *I8Ty = Type::getInt8Ty(PtrInc->getParent()->getContext());786787Value *Ptr = getPointerOperandAndType(Element.Instr);788assert(Ptr && "No pointer operand");789790Instruction *RealNewPtr;791if (!Element.Offset ||792(isa<SCEVConstant>(Element.Offset) &&793cast<SCEVConstant>(Element.Offset)->getValue()->isZero())) {794RealNewPtr = NewBasePtr;795} else {796std::optional<BasicBlock::iterator> PtrIP = std::nullopt;797if (Instruction *I = dyn_cast<Instruction>(Ptr))798PtrIP = I->getIterator();799800if (PtrIP && isa<Instruction>(NewBasePtr) &&801cast<Instruction>(NewBasePtr)->getParent() == (*PtrIP)->getParent())802PtrIP = std::nullopt;803else if (PtrIP && isa<PHINode>(*PtrIP))804PtrIP = (*PtrIP)->getParent()->getFirstInsertionPt();805else if (!PtrIP)806PtrIP = Element.Instr->getIterator();807808assert(OffToBase && "There should be an offset for non base element!\n");809GetElementPtrInst *NewPtr = GetElementPtrInst::Create(810I8Ty, PtrInc, OffToBase,811getInstrName(Element.Instr, GEPNodeOffNameSuffix));812if (PtrIP)813NewPtr->insertBefore(*(*PtrIP)->getParent(), *PtrIP);814else815NewPtr->insertAfter(cast<Instruction>(PtrInc));816NewPtr->setIsInBounds(IsPtrInBounds(Ptr));817RealNewPtr = NewPtr;818}819820Instruction *ReplNewPtr;821if (Ptr->getType() != RealNewPtr->getType()) {822ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),823getInstrName(Ptr, CastNodeNameSuffix));824ReplNewPtr->insertAfter(RealNewPtr);825} else826ReplNewPtr = RealNewPtr;827828Ptr->replaceAllUsesWith(ReplNewPtr);829DeletedPtrs.insert(Ptr);830831return ReplNewPtr;832}833834void PPCLoopInstrFormPrep::addOneCandidate(835Instruction *MemI, const SCEV *LSCEV, SmallVector<Bucket, 16> &Buckets,836std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {837assert((MemI && getPointerOperandAndType(MemI)) &&838"Candidate should be a memory instruction.");839assert(LSCEV && "Invalid SCEV for Ptr value.");840841bool FoundBucket = false;842for (auto &B : Buckets) {843if (cast<SCEVAddRecExpr>(B.BaseSCEV)->getStepRecurrence(*SE) !=844cast<SCEVAddRecExpr>(LSCEV)->getStepRecurrence(*SE))845continue;846const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);847if (isValidDiff(Diff)) {848B.Elements.push_back(BucketElement(Diff, MemI));849FoundBucket = true;850break;851}852}853854if (!FoundBucket) {855if (Buckets.size() == MaxCandidateNum) {856LLVM_DEBUG(dbgs() << "Can not prepare more chains, reach maximum limit "857<< MaxCandidateNum << "\n");858return;859}860Buckets.push_back(Bucket(LSCEV, MemI));861}862}863864SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(865Loop *L,866std::function<bool(const Instruction *, Value *, const Type *)>867isValidCandidate,868std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {869SmallVector<Bucket, 16> Buckets;870871for (const auto &BB : L->blocks())872for (auto &J : *BB) {873Value *PtrValue = nullptr;874Type *PointerElementType = nullptr;875PtrValue = getPointerOperandAndType(&J, &PointerElementType);876877if (!PtrValue)878continue;879880if (PtrValue->getType()->getPointerAddressSpace())881continue;882883if (L->isLoopInvariant(PtrValue))884continue;885886const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);887const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);888if (!LARSCEV || LARSCEV->getLoop() != L)889continue;890891// Mark that we have candidates for preparing.892HasCandidateForPrepare = true;893894if (isValidCandidate(&J, PtrValue, PointerElementType))895addOneCandidate(&J, LSCEV, Buckets, isValidDiff, MaxCandidateNum);896}897return Buckets;898}899900bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,901PrepForm Form) {902// RemainderOffsetInfo details:903// key: value of (Offset urem DispConstraint). For DSForm, it can904// be [0, 4).905// first of pair: the index of first BucketElement whose remainder is equal906// to key. For key 0, this value must be 0.907// second of pair: number of load/stores with the same remainder.908DenseMap<unsigned, std::pair<unsigned, unsigned>> RemainderOffsetInfo;909910for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) {911if (!BucketChain.Elements[j].Offset)912RemainderOffsetInfo[0] = std::make_pair(0, 1);913else {914unsigned Remainder = cast<SCEVConstant>(BucketChain.Elements[j].Offset)915->getAPInt()916.urem(Form);917if (!RemainderOffsetInfo.contains(Remainder))918RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);919else920RemainderOffsetInfo[Remainder].second++;921}922}923// Currently we choose the most profitable base as the one which has the max924// number of load/store with same remainder.925// FIXME: adjust the base selection strategy according to load/store offset926// distribution.927// For example, if we have one candidate chain for DS form preparation, which928// contains following load/stores with different remainders:929// 1: 10 load/store whose remainder is 1;930// 2: 9 load/store whose remainder is 2;931// 3: 1 for remainder 3 and 0 for remainder 0;932// Now we will choose the first load/store whose remainder is 1 as base and933// adjust all other load/stores according to new base, so we will get 10 DS934// form and 10 X form.935// But we should be more clever, for this case we could use two bases, one for936// remainder 1 and the other for remainder 2, thus we could get 19 DS form and937// 1 X form.938unsigned MaxCountRemainder = 0;939for (unsigned j = 0; j < (unsigned)Form; j++)940if ((RemainderOffsetInfo.contains(j)) &&941RemainderOffsetInfo[j].second >942RemainderOffsetInfo[MaxCountRemainder].second)943MaxCountRemainder = j;944945// Abort when there are too few insts with common base.946if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold)947return false;948949// If the first value is most profitable, no needed to adjust BucketChain950// elements as they are substracted the first value when collecting.951if (MaxCountRemainder == 0)952return true;953954// Adjust load/store to the new chosen base.955const SCEV *Offset =956BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset;957BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);958for (auto &E : BucketChain.Elements) {959if (E.Offset)960E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));961else962E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));963}964965std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first],966BucketChain.Elements[0]);967return true;968}969970// FIXME: implement a more clever base choosing policy.971// Currently we always choose an exist load/store offset. This maybe lead to972// suboptimal code sequences. For example, for one DS chain with offsets973// {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp974// for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a975// multipler of 4, it cannot be represented by sint16.976bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {977// We have a choice now of which instruction's memory operand we use as the978// base for the generated PHI. Always picking the first instruction in each979// bucket does not work well, specifically because that instruction might980// be a prefetch (and there are no pre-increment dcbt variants). Otherwise,981// the choice is somewhat arbitrary, because the backend will happily982// generate direct offsets from both the pre-incremented and983// post-incremented pointer values. Thus, we'll pick the first non-prefetch984// instruction in each bucket, and adjust the recurrence and other offsets985// accordingly.986for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) {987if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr))988if (II->getIntrinsicID() == Intrinsic::prefetch)989continue;990991// If we'd otherwise pick the first element anyway, there's nothing to do.992if (j == 0)993break;994995// If our chosen element has no offset from the base pointer, there's996// nothing to do.997if (!BucketChain.Elements[j].Offset ||998cast<SCEVConstant>(BucketChain.Elements[j].Offset)->isZero())999break;10001001const SCEV *Offset = BucketChain.Elements[j].Offset;1002BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset);1003for (auto &E : BucketChain.Elements) {1004if (E.Offset)1005E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));1006else1007E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));1008}10091010std::swap(BucketChain.Elements[j], BucketChain.Elements[0]);1011break;1012}1013return true;1014}10151016bool PPCLoopInstrFormPrep::rewriteLoadStores(1017Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged,1018PrepForm Form) {1019bool MadeChange = false;10201021const SCEVAddRecExpr *BasePtrSCEV =1022cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);1023if (!BasePtrSCEV->isAffine())1024return MadeChange;10251026BasicBlock *Header = L->getHeader();1027SCEVExpander SCEVE(*SE, Header->getDataLayout(),1028"loopprepare-formrewrite");1029if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart()))1030return MadeChange;10311032SmallPtrSet<Value *, 16> DeletedPtrs;10331034// For some DS form load/store instructions, it can also be an update form,1035// if the stride is constant and is a multipler of 4. Use update form if1036// prefer it.1037bool CanPreInc = (Form == UpdateForm ||1038((Form == DSForm) &&1039isa<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)) &&1040!cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE))1041->getAPInt()1042.urem(4) &&1043PreferUpdateForm));10441045std::pair<Instruction *, Instruction *> Base =1046rewriteForBase(L, BasePtrSCEV, BucketChain.Elements.begin()->Instr,1047CanPreInc, Form, SCEVE, DeletedPtrs);10481049if (!Base.first || !Base.second)1050return MadeChange;10511052// Keep track of the replacement pointer values we've inserted so that we1053// don't generate more pointer values than necessary.1054SmallPtrSet<Value *, 16> NewPtrs;1055NewPtrs.insert(Base.first);10561057for (const BucketElement &BE : llvm::drop_begin(BucketChain.Elements)) {1058Value *Ptr = getPointerOperandAndType(BE.Instr);1059assert(Ptr && "No pointer operand");1060if (NewPtrs.count(Ptr))1061continue;10621063Instruction *NewPtr = rewriteForBucketElement(1064Base, BE,1065BE.Offset ? cast<SCEVConstant>(BE.Offset)->getValue() : nullptr,1066DeletedPtrs);1067assert(NewPtr && "wrong rewrite!\n");1068NewPtrs.insert(NewPtr);1069}10701071// Clear the rewriter cache, because values that are in the rewriter's cache1072// can be deleted below, causing the AssertingVH in the cache to trigger.1073SCEVE.clear();10741075for (auto *Ptr : DeletedPtrs) {1076if (Instruction *IDel = dyn_cast<Instruction>(Ptr))1077BBChanged.insert(IDel->getParent());1078RecursivelyDeleteTriviallyDeadInstructions(Ptr);1079}10801081MadeChange = true;10821083SuccPrepCount++;10841085if (Form == DSForm && !CanPreInc)1086DSFormChainRewritten++;1087else if (Form == DQForm)1088DQFormChainRewritten++;1089else if (Form == UpdateForm || (Form == DSForm && CanPreInc))1090UpdFormChainRewritten++;10911092return MadeChange;1093}10941095bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,1096SmallVector<Bucket, 16> &Buckets) {1097bool MadeChange = false;1098if (Buckets.empty())1099return MadeChange;1100SmallSet<BasicBlock *, 16> BBChanged;1101for (auto &Bucket : Buckets)1102// The base address of each bucket is transformed into a phi and the others1103// are rewritten based on new base.1104if (prepareBaseForUpdateFormChain(Bucket))1105MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);11061107if (MadeChange)1108for (auto *BB : BBChanged)1109DeleteDeadPHIs(BB);1110return MadeChange;1111}11121113bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L,1114SmallVector<Bucket, 16> &Buckets,1115PrepForm Form) {1116bool MadeChange = false;11171118if (Buckets.empty())1119return MadeChange;11201121SmallSet<BasicBlock *, 16> BBChanged;1122for (auto &Bucket : Buckets) {1123if (Bucket.Elements.size() < DispFormPrepMinThreshold)1124continue;1125if (prepareBaseForDispFormChain(Bucket, Form))1126MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, Form);1127}11281129if (MadeChange)1130for (auto *BB : BBChanged)1131DeleteDeadPHIs(BB);1132return MadeChange;1133}11341135// Find the loop invariant increment node for SCEV BasePtrIncSCEV.1136// bb.loop.preheader:1137// %start = ...1138// bb.loop.body:1139// %phinode = phi [ %start, %bb.loop.preheader ], [ %add, %bb.loop.body ]1140// ...1141// %add = add %phinode, %inc ; %inc is what we want to get.1142//1143Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,1144const SCEV *BasePtrIncSCEV) {1145// If the increment is a constant, no definition is needed.1146// Return the value directly.1147if (isa<SCEVConstant>(BasePtrIncSCEV))1148return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();11491150if (!SE->isLoopInvariant(BasePtrIncSCEV, L))1151return nullptr;11521153BasicBlock *BB = MemI->getParent();1154if (!BB)1155return nullptr;11561157BasicBlock *LatchBB = L->getLoopLatch();11581159if (!LatchBB)1160return nullptr;11611162// Run through the PHIs and check their operands to find valid representation1163// for the increment SCEV.1164iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();1165for (auto &CurrentPHI : PHIIter) {1166PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);1167if (!CurrentPHINode)1168continue;11691170if (!SE->isSCEVable(CurrentPHINode->getType()))1171continue;11721173const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);11741175const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);1176if (!PHIBasePtrSCEV)1177continue;11781179const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE);11801181if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV))1182continue;11831184// Get the incoming value from the loop latch and check if the value has1185// the add form with the required increment.1186if (CurrentPHINode->getBasicBlockIndex(LatchBB) < 0)1187continue;1188if (Instruction *I = dyn_cast<Instruction>(1189CurrentPHINode->getIncomingValueForBlock(LatchBB))) {1190Value *StrippedBaseI = I;1191while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI))1192StrippedBaseI = BC->getOperand(0);11931194Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI);1195if (!StrippedI)1196continue;11971198// LSR pass may add a getelementptr instruction to do the loop increment,1199// also search in that getelementptr instruction.1200if (StrippedI->getOpcode() == Instruction::Add ||1201(StrippedI->getOpcode() == Instruction::GetElementPtr &&1202StrippedI->getNumOperands() == 2)) {1203if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV)1204return StrippedI->getOperand(0);1205if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV)1206return StrippedI->getOperand(1);1207}1208}1209}1210return nullptr;1211}12121213// In order to prepare for the preferred instruction form, a PHI is added.1214// This function will check to see if that PHI already exists and will return1215// true if it found an existing PHI with the matched start and increment as the1216// one we wanted to create.1217bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,1218const SCEV *BasePtrStartSCEV,1219const SCEV *BasePtrIncSCEV,1220PrepForm Form) {1221BasicBlock *BB = MemI->getParent();1222if (!BB)1223return false;12241225BasicBlock *PredBB = L->getLoopPredecessor();1226BasicBlock *LatchBB = L->getLoopLatch();12271228if (!PredBB || !LatchBB)1229return false;12301231// Run through the PHIs and see if we have some that looks like a preparation1232iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();1233for (auto & CurrentPHI : PHIIter) {1234PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);1235if (!CurrentPHINode)1236continue;12371238if (!SE->isSCEVable(CurrentPHINode->getType()))1239continue;12401241const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);12421243const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);1244if (!PHIBasePtrSCEV)1245continue;12461247const SCEVConstant *PHIBasePtrIncSCEV =1248dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE));1249if (!PHIBasePtrIncSCEV)1250continue;12511252if (CurrentPHINode->getNumIncomingValues() == 2) {1253if ((CurrentPHINode->getIncomingBlock(0) == LatchBB &&1254CurrentPHINode->getIncomingBlock(1) == PredBB) ||1255(CurrentPHINode->getIncomingBlock(1) == LatchBB &&1256CurrentPHINode->getIncomingBlock(0) == PredBB)) {1257if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {1258// The existing PHI (CurrentPHINode) has the same start and increment1259// as the PHI that we wanted to create.1260if ((Form == UpdateForm || Form == ChainCommoning ) &&1261PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {1262++PHINodeAlreadyExistsUpdate;1263return true;1264}1265if (Form == DSForm || Form == DQForm) {1266const SCEVConstant *Diff = dyn_cast<SCEVConstant>(1267SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));1268if (Diff && !Diff->getAPInt().urem(Form)) {1269if (Form == DSForm)1270++PHINodeAlreadyExistsDS;1271else1272++PHINodeAlreadyExistsDQ;1273return true;1274}1275}1276}1277}1278}1279}1280return false;1281}12821283bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {1284bool MadeChange = false;12851286// Only prep. the inner-most loop1287if (!L->isInnermost())1288return MadeChange;12891290// Return if already done enough preparation.1291if (SuccPrepCount >= MaxVarsPrep)1292return MadeChange;12931294LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");12951296BasicBlock *LoopPredecessor = L->getLoopPredecessor();1297// If there is no loop predecessor, or the loop predecessor's terminator1298// returns a value (which might contribute to determining the loop's1299// iteration space), insert a new preheader for the loop.1300if (!LoopPredecessor ||1301!LoopPredecessor->getTerminator()->getType()->isVoidTy()) {1302LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);1303if (LoopPredecessor)1304MadeChange = true;1305}1306if (!LoopPredecessor) {1307LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n");1308return MadeChange;1309}1310// Check if a load/store has update form. This lambda is used by function1311// collectCandidates which can collect candidates for types defined by lambda.1312auto isUpdateFormCandidate = [&](const Instruction *I, Value *PtrValue,1313const Type *PointerElementType) {1314assert((PtrValue && I) && "Invalid parameter!");1315// There are no update forms for Altivec vector load/stores.1316if (ST && ST->hasAltivec() && PointerElementType->isVectorTy())1317return false;1318// There are no update forms for P10 lxvp/stxvp intrinsic.1319auto *II = dyn_cast<IntrinsicInst>(I);1320if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) ||1321II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp))1322return false;1323// See getPreIndexedAddressParts, the displacement for LDU/STDU has to1324// be 4's multiple (DS-form). For i64 loads/stores when the displacement1325// fits in a 16-bit signed field but isn't a multiple of 4, it will be1326// useless and possible to break some original well-form addressing mode1327// to make this pre-inc prep for it.1328if (PointerElementType->isIntegerTy(64)) {1329const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L);1330const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV);1331if (!LARSCEV || LARSCEV->getLoop() != L)1332return false;1333if (const SCEVConstant *StepConst =1334dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) {1335const APInt &ConstInt = StepConst->getValue()->getValue();1336if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0)1337return false;1338}1339}1340return true;1341};13421343// Check if a load/store has DS form.1344auto isDSFormCandidate = [](const Instruction *I, Value *PtrValue,1345const Type *PointerElementType) {1346assert((PtrValue && I) && "Invalid parameter!");1347if (isa<IntrinsicInst>(I))1348return false;1349return (PointerElementType->isIntegerTy(64)) ||1350(PointerElementType->isFloatTy()) ||1351(PointerElementType->isDoubleTy()) ||1352(PointerElementType->isIntegerTy(32) &&1353llvm::any_of(I->users(),1354[](const User *U) { return isa<SExtInst>(U); }));1355};13561357// Check if a load/store has DQ form.1358auto isDQFormCandidate = [&](const Instruction *I, Value *PtrValue,1359const Type *PointerElementType) {1360assert((PtrValue && I) && "Invalid parameter!");1361// Check if it is a P10 lxvp/stxvp intrinsic.1362auto *II = dyn_cast<IntrinsicInst>(I);1363if (II)1364return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp ||1365II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp;1366// Check if it is a P9 vector load/store.1367return ST && ST->hasP9Vector() && (PointerElementType->isVectorTy());1368};13691370// Check if a load/store is candidate for chain commoning.1371// If the SCEV is only with one ptr operand in its start, we can use that1372// start as a chain separator. Mark this load/store as a candidate.1373auto isChainCommoningCandidate = [&](const Instruction *I, Value *PtrValue,1374const Type *PointerElementType) {1375const SCEVAddRecExpr *ARSCEV =1376cast<SCEVAddRecExpr>(SE->getSCEVAtScope(PtrValue, L));1377if (!ARSCEV)1378return false;13791380if (!ARSCEV->isAffine())1381return false;13821383const SCEV *Start = ARSCEV->getStart();13841385// A single pointer. We can treat it as offset 0.1386if (isa<SCEVUnknown>(Start) && Start->getType()->isPointerTy())1387return true;13881389const SCEVAddExpr *ASCEV = dyn_cast<SCEVAddExpr>(Start);13901391// We need a SCEVAddExpr to include both base and offset.1392if (!ASCEV)1393return false;13941395// Make sure there is only one pointer operand(base) and all other operands1396// are integer type.1397bool SawPointer = false;1398for (const SCEV *Op : ASCEV->operands()) {1399if (Op->getType()->isPointerTy()) {1400if (SawPointer)1401return false;1402SawPointer = true;1403} else if (!Op->getType()->isIntegerTy())1404return false;1405}14061407return SawPointer;1408};14091410// Check if the diff is a constant type. This is used for update/DS/DQ form1411// preparation.1412auto isValidConstantDiff = [](const SCEV *Diff) {1413return dyn_cast<SCEVConstant>(Diff) != nullptr;1414};14151416// Make sure the diff between the base and new candidate is required type.1417// This is used for chain commoning preparation.1418auto isValidChainCommoningDiff = [](const SCEV *Diff) {1419assert(Diff && "Invalid Diff!\n");14201421// Don't mess up previous dform prepare.1422if (isa<SCEVConstant>(Diff))1423return false;14241425// A single integer type offset.1426if (isa<SCEVUnknown>(Diff) && Diff->getType()->isIntegerTy())1427return true;14281429const SCEVNAryExpr *ADiff = dyn_cast<SCEVNAryExpr>(Diff);1430if (!ADiff)1431return false;14321433for (const SCEV *Op : ADiff->operands())1434if (!Op->getType()->isIntegerTy())1435return false;14361437return true;1438};14391440HasCandidateForPrepare = false;14411442LLVM_DEBUG(dbgs() << "Start to prepare for update form.\n");1443// Collect buckets of comparable addresses used by loads and stores for update1444// form.1445SmallVector<Bucket, 16> UpdateFormBuckets = collectCandidates(1446L, isUpdateFormCandidate, isValidConstantDiff, MaxVarsUpdateForm);14471448// Prepare for update form.1449if (!UpdateFormBuckets.empty())1450MadeChange |= updateFormPrep(L, UpdateFormBuckets);1451else if (!HasCandidateForPrepare) {1452LLVM_DEBUG(1453dbgs()1454<< "No prepare candidates found, stop praparation for current loop!\n");1455// If no candidate for preparing, return early.1456return MadeChange;1457}14581459LLVM_DEBUG(dbgs() << "Start to prepare for DS form.\n");1460// Collect buckets of comparable addresses used by loads and stores for DS1461// form.1462SmallVector<Bucket, 16> DSFormBuckets = collectCandidates(1463L, isDSFormCandidate, isValidConstantDiff, MaxVarsDSForm);14641465// Prepare for DS form.1466if (!DSFormBuckets.empty())1467MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);14681469LLVM_DEBUG(dbgs() << "Start to prepare for DQ form.\n");1470// Collect buckets of comparable addresses used by loads and stores for DQ1471// form.1472SmallVector<Bucket, 16> DQFormBuckets = collectCandidates(1473L, isDQFormCandidate, isValidConstantDiff, MaxVarsDQForm);14741475// Prepare for DQ form.1476if (!DQFormBuckets.empty())1477MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);14781479// Collect buckets of comparable addresses used by loads and stores for chain1480// commoning. With chain commoning, we reuse offsets between the chains, so1481// the register pressure will be reduced.1482if (!EnableChainCommoning) {1483LLVM_DEBUG(dbgs() << "Chain commoning is not enabled.\n");1484return MadeChange;1485}14861487LLVM_DEBUG(dbgs() << "Start to prepare for chain commoning.\n");1488SmallVector<Bucket, 16> Buckets =1489collectCandidates(L, isChainCommoningCandidate, isValidChainCommoningDiff,1490MaxVarsChainCommon);14911492// Prepare for chain commoning.1493if (!Buckets.empty())1494MadeChange |= chainCommoning(L, Buckets);14951496return MadeChange;1497}149814991500