Path: blob/main/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
35294 views
//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass identifies loops where we can generate the Hexagon hardware9// loop instruction. The hardware loop can perform loop branches with a10// zero-cycle overhead.11//12// The pattern that defines the induction variable can changed depending on13// prior optimizations. For example, the IndVarSimplify phase run by 'opt'14// normalizes induction variables, and the Loop Strength Reduction pass15// run by 'llc' may also make changes to the induction variable.16// The pattern detected by this phase is due to running Strength Reduction.17//18// Criteria for hardware loops:19// - Countable loops (w/ ind. var for a trip count)20// - Assumes loops are normalized by IndVarSimplify21// - Try inner-most loops first22// - No function calls in loops.23//24//===----------------------------------------------------------------------===//2526#include "HexagonInstrInfo.h"27#include "HexagonSubtarget.h"28#include "llvm/ADT/ArrayRef.h"29#include "llvm/ADT/STLExtras.h"30#include "llvm/ADT/SmallSet.h"31#include "llvm/ADT/SmallVector.h"32#include "llvm/ADT/Statistic.h"33#include "llvm/ADT/StringRef.h"34#include "llvm/CodeGen/MachineBasicBlock.h"35#include "llvm/CodeGen/MachineDominators.h"36#include "llvm/CodeGen/MachineFunction.h"37#include "llvm/CodeGen/MachineFunctionPass.h"38#include "llvm/CodeGen/MachineInstr.h"39#include "llvm/CodeGen/MachineInstrBuilder.h"40#include "llvm/CodeGen/MachineLoopInfo.h"41#include "llvm/CodeGen/MachineOperand.h"42#include "llvm/CodeGen/MachineRegisterInfo.h"43#include "llvm/CodeGen/TargetRegisterInfo.h"44#include "llvm/IR/Constants.h"45#include "llvm/IR/DebugLoc.h"46#include "llvm/InitializePasses.h"47#include "llvm/Pass.h"48#include "llvm/Support/CommandLine.h"49#include "llvm/Support/Debug.h"50#include "llvm/Support/ErrorHandling.h"51#include "llvm/Support/MathExtras.h"52#include "llvm/Support/raw_ostream.h"53#include <cassert>54#include <cstdint>55#include <cstdlib>56#include <iterator>57#include <map>58#include <set>59#include <string>60#include <utility>61#include <vector>6263using namespace llvm;6465#define DEBUG_TYPE "hwloops"6667#ifndef NDEBUG68static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));6970// Option to create preheader only for a specific function.71static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,72cl::init(""));73#endif7475// Option to create a preheader if one doesn't exist.76static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",77cl::Hidden, cl::init(true),78cl::desc("Add a preheader to a hardware loop if one doesn't exist"));7980// Turn it off by default. If a preheader block is not created here, the81// software pipeliner may be unable to find a block suitable to serve as82// a preheader. In that case SWP will not run.83static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden,84cl::desc("Allow speculation of preheader "85"instructions"));8687STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");8889namespace llvm {9091FunctionPass *createHexagonHardwareLoops();92void initializeHexagonHardwareLoopsPass(PassRegistry&);9394} // end namespace llvm9596namespace {9798class CountValue;99100struct HexagonHardwareLoops : public MachineFunctionPass {101MachineLoopInfo *MLI;102MachineRegisterInfo *MRI;103MachineDominatorTree *MDT;104const HexagonInstrInfo *TII;105const HexagonRegisterInfo *TRI;106#ifndef NDEBUG107static int Counter;108#endif109110public:111static char ID;112113HexagonHardwareLoops() : MachineFunctionPass(ID) {}114115bool runOnMachineFunction(MachineFunction &MF) override;116117StringRef getPassName() const override { return "Hexagon Hardware Loops"; }118119void getAnalysisUsage(AnalysisUsage &AU) const override {120AU.addRequired<MachineDominatorTreeWrapperPass>();121AU.addRequired<MachineLoopInfoWrapperPass>();122MachineFunctionPass::getAnalysisUsage(AU);123}124125private:126using LoopFeederMap = std::map<Register, MachineInstr *>;127128/// Kinds of comparisons in the compare instructions.129struct Comparison {130enum Kind {131EQ = 0x01,132NE = 0x02,133L = 0x04,134G = 0x08,135U = 0x40,136LTs = L,137LEs = L | EQ,138GTs = G,139GEs = G | EQ,140LTu = L | U,141LEu = L | EQ | U,142GTu = G | U,143GEu = G | EQ | U144};145146static Kind getSwappedComparison(Kind Cmp) {147assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");148if ((Cmp & L) || (Cmp & G))149return (Kind)(Cmp ^ (L|G));150return Cmp;151}152153static Kind getNegatedComparison(Kind Cmp) {154if ((Cmp & L) || (Cmp & G))155return (Kind)((Cmp ^ (L | G)) ^ EQ);156if ((Cmp & NE) || (Cmp & EQ))157return (Kind)(Cmp ^ (EQ | NE));158return (Kind)0;159}160161static bool isSigned(Kind Cmp) {162return (Cmp & (L | G) && !(Cmp & U));163}164165static bool isUnsigned(Kind Cmp) {166return (Cmp & U);167}168};169170/// Find the register that contains the loop controlling171/// induction variable.172/// If successful, it will return true and set the \p Reg, \p IVBump173/// and \p IVOp arguments. Otherwise it will return false.174/// The returned induction register is the register R that follows the175/// following induction pattern:176/// loop:177/// R = phi ..., [ R.next, LatchBlock ]178/// R.next = R + #bump179/// if (R.next < #N) goto loop180/// IVBump is the immediate value added to R, and IVOp is the instruction181/// "R.next = R + #bump".182bool findInductionRegister(MachineLoop *L, Register &Reg,183int64_t &IVBump, MachineInstr *&IVOp) const;184185/// Return the comparison kind for the specified opcode.186Comparison::Kind getComparisonKind(unsigned CondOpc,187MachineOperand *InitialValue,188const MachineOperand *Endvalue,189int64_t IVBump) const;190191/// Analyze the statements in a loop to determine if the loop192/// has a computable trip count and, if so, return a value that represents193/// the trip count expression.194CountValue *getLoopTripCount(MachineLoop *L,195SmallVectorImpl<MachineInstr *> &OldInsts);196197/// Return the expression that represents the number of times198/// a loop iterates. The function takes the operands that represent the199/// loop start value, loop end value, and induction value. Based upon200/// these operands, the function attempts to compute the trip count.201/// If the trip count is not directly available (as an immediate value,202/// or a register), the function will attempt to insert computation of it203/// to the loop's preheader.204CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,205const MachineOperand *End, Register IVReg,206int64_t IVBump, Comparison::Kind Cmp) const;207208/// Return true if the instruction is not valid within a hardware209/// loop.210bool isInvalidLoopOperation(const MachineInstr *MI,211bool IsInnerHWLoop) const;212213/// Return true if the loop contains an instruction that inhibits214/// using the hardware loop.215bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;216217/// Given a loop, check if we can convert it to a hardware loop.218/// If so, then perform the conversion and return true.219bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);220221/// Return true if the instruction is now dead.222bool isDead(const MachineInstr *MI,223SmallVectorImpl<MachineInstr *> &DeadPhis) const;224225/// Remove the instruction if it is now dead.226void removeIfDead(MachineInstr *MI);227228/// Make sure that the "bump" instruction executes before the229/// compare. We need that for the IV fixup, so that the compare230/// instruction would not use a bumped value that has not yet been231/// defined. If the instructions are out of order, try to reorder them.232bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);233234/// Return true if MO and MI pair is visited only once. If visited235/// more than once, this indicates there is recursion. In such a case,236/// return false.237bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,238const MachineOperand *MO,239LoopFeederMap &LoopFeederPhi) const;240241/// Return true if the Phi may generate a value that may underflow,242/// or may wrap.243bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,244MachineBasicBlock *MBB, MachineLoop *L,245LoopFeederMap &LoopFeederPhi) const;246247/// Return true if the induction variable may underflow an unsigned248/// value in the first iteration.249bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,250const MachineOperand *EndVal,251MachineBasicBlock *MBB, MachineLoop *L,252LoopFeederMap &LoopFeederPhi) const;253254/// Check if the given operand has a compile-time known constant255/// value. Return true if yes, and false otherwise. When returning true, set256/// Val to the corresponding constant value.257bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;258259/// Check if the operand has a compile-time known constant value.260bool isImmediate(const MachineOperand &MO) const {261int64_t V;262return checkForImmediate(MO, V);263}264265/// Return the immediate for the specified operand.266int64_t getImmediate(const MachineOperand &MO) const {267int64_t V;268if (!checkForImmediate(MO, V))269llvm_unreachable("Invalid operand");270return V;271}272273/// Reset the given machine operand to now refer to a new immediate274/// value. Assumes that the operand was already referencing an immediate275/// value, either directly, or via a register.276void setImmediate(MachineOperand &MO, int64_t Val);277278/// Fix the data flow of the induction variable.279/// The desired flow is: phi ---> bump -+-> comparison-in-latch.280/// |281/// +-> back to phi282/// where "bump" is the increment of the induction variable:283/// iv = iv + #const.284/// Due to some prior code transformations, the actual flow may look285/// like this:286/// phi -+-> bump ---> back to phi287/// |288/// +-> comparison-in-latch (against upper_bound-bump),289/// i.e. the comparison that controls the loop execution may be using290/// the value of the induction variable from before the increment.291///292/// Return true if the loop's flow is the desired one (i.e. it's293/// either been fixed, or no fixing was necessary).294/// Otherwise, return false. This can happen if the induction variable295/// couldn't be identified, or if the value in the latch's comparison296/// cannot be adjusted to reflect the post-bump value.297bool fixupInductionVariable(MachineLoop *L);298299/// Given a loop, if it does not have a preheader, create one.300/// Return the block that is the preheader.301MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);302};303304char HexagonHardwareLoops::ID = 0;305#ifndef NDEBUG306int HexagonHardwareLoops::Counter = 0;307#endif308309/// Abstraction for a trip count of a loop. A smaller version310/// of the MachineOperand class without the concerns of changing the311/// operand representation.312class CountValue {313public:314enum CountValueType {315CV_Register,316CV_Immediate317};318319private:320CountValueType Kind;321union Values {322Values() : R{Register(), 0} {}323Values(const Values&) = default;324struct {325Register Reg;326unsigned Sub;327} R;328unsigned ImmVal;329} Contents;330331public:332explicit CountValue(CountValueType t, Register v, unsigned u = 0) {333Kind = t;334if (Kind == CV_Register) {335Contents.R.Reg = v;336Contents.R.Sub = u;337} else {338Contents.ImmVal = v;339}340}341342bool isReg() const { return Kind == CV_Register; }343bool isImm() const { return Kind == CV_Immediate; }344345Register getReg() const {346assert(isReg() && "Wrong CountValue accessor");347return Contents.R.Reg;348}349350unsigned getSubReg() const {351assert(isReg() && "Wrong CountValue accessor");352return Contents.R.Sub;353}354355unsigned getImm() const {356assert(isImm() && "Wrong CountValue accessor");357return Contents.ImmVal;358}359360void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {361if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); }362if (isImm()) { OS << Contents.ImmVal; }363}364};365366} // end anonymous namespace367368INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",369"Hexagon Hardware Loops", false, false)370INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)371INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)372INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",373"Hexagon Hardware Loops", false, false)374375FunctionPass *llvm::createHexagonHardwareLoops() {376return new HexagonHardwareLoops();377}378379bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {380LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");381if (skipFunction(MF.getFunction()))382return false;383384bool Changed = false;385386MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();387MRI = &MF.getRegInfo();388MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();389const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();390TII = HST.getInstrInfo();391TRI = HST.getRegisterInfo();392393for (auto &L : *MLI)394if (L->isOutermost()) {395bool L0Used = false;396bool L1Used = false;397Changed |= convertToHardwareLoop(L, L0Used, L1Used);398}399400return Changed;401}402403bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,404Register &Reg,405int64_t &IVBump,406MachineInstr *&IVOp407) const {408MachineBasicBlock *Header = L->getHeader();409MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);410MachineBasicBlock *Latch = L->getLoopLatch();411MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();412if (!Header || !Preheader || !Latch || !ExitingBlock)413return false;414415// This pair represents an induction register together with an immediate416// value that will be added to it in each loop iteration.417using RegisterBump = std::pair<Register, int64_t>;418419// Mapping: R.next -> (R, bump), where R, R.next and bump are derived420// from an induction operation421// R.next = R + bump422// where bump is an immediate value.423using InductionMap = std::map<Register, RegisterBump>;424425InductionMap IndMap;426427using instr_iterator = MachineBasicBlock::instr_iterator;428429for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();430I != E && I->isPHI(); ++I) {431MachineInstr *Phi = &*I;432433// Have a PHI instruction. Get the operand that corresponds to the434// latch block, and see if is a result of an addition of form "reg+imm",435// where the "reg" is defined by the PHI node we are looking at.436for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {437if (Phi->getOperand(i+1).getMBB() != Latch)438continue;439440Register PhiOpReg = Phi->getOperand(i).getReg();441MachineInstr *DI = MRI->getVRegDef(PhiOpReg);442443if (DI->getDesc().isAdd()) {444// If the register operand to the add is the PHI we're looking at, this445// meets the induction pattern.446Register IndReg = DI->getOperand(1).getReg();447MachineOperand &Opnd2 = DI->getOperand(2);448int64_t V;449if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {450Register UpdReg = DI->getOperand(0).getReg();451IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));452}453}454} // for (i)455} // for (instr)456457SmallVector<MachineOperand,2> Cond;458MachineBasicBlock *TB = nullptr, *FB = nullptr;459bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);460if (NotAnalyzed)461return false;462463Register PredR;464unsigned PredPos, PredRegFlags;465if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))466return false;467468MachineInstr *PredI = MRI->getVRegDef(PredR);469if (!PredI->isCompare())470return false;471472Register CmpReg1, CmpReg2;473int64_t CmpImm = 0, CmpMask = 0;474bool CmpAnalyzed =475TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);476// Fail if the compare was not analyzed, or it's not comparing a register477// with an immediate value. Not checking the mask here, since we handle478// the individual compare opcodes (including A4_cmpb*) later on.479if (!CmpAnalyzed)480return false;481482// Exactly one of the input registers to the comparison should be among483// the induction registers.484InductionMap::iterator IndMapEnd = IndMap.end();485InductionMap::iterator F = IndMapEnd;486if (CmpReg1 != 0) {487InductionMap::iterator F1 = IndMap.find(CmpReg1);488if (F1 != IndMapEnd)489F = F1;490}491if (CmpReg2 != 0) {492InductionMap::iterator F2 = IndMap.find(CmpReg2);493if (F2 != IndMapEnd) {494if (F != IndMapEnd)495return false;496F = F2;497}498}499if (F == IndMapEnd)500return false;501502Reg = F->second.first;503IVBump = F->second.second;504IVOp = MRI->getVRegDef(F->first);505return true;506}507508// Return the comparison kind for the specified opcode.509HexagonHardwareLoops::Comparison::Kind510HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,511MachineOperand *InitialValue,512const MachineOperand *EndValue,513int64_t IVBump) const {514Comparison::Kind Cmp = (Comparison::Kind)0;515switch (CondOpc) {516case Hexagon::C2_cmpeq:517case Hexagon::C2_cmpeqi:518case Hexagon::C2_cmpeqp:519Cmp = Comparison::EQ;520break;521case Hexagon::C4_cmpneq:522case Hexagon::C4_cmpneqi:523Cmp = Comparison::NE;524break;525case Hexagon::C2_cmplt:526Cmp = Comparison::LTs;527break;528case Hexagon::C2_cmpltu:529Cmp = Comparison::LTu;530break;531case Hexagon::C4_cmplte:532case Hexagon::C4_cmpltei:533Cmp = Comparison::LEs;534break;535case Hexagon::C4_cmplteu:536case Hexagon::C4_cmplteui:537Cmp = Comparison::LEu;538break;539case Hexagon::C2_cmpgt:540case Hexagon::C2_cmpgti:541case Hexagon::C2_cmpgtp:542Cmp = Comparison::GTs;543break;544case Hexagon::C2_cmpgtu:545case Hexagon::C2_cmpgtui:546case Hexagon::C2_cmpgtup:547Cmp = Comparison::GTu;548break;549case Hexagon::C2_cmpgei:550Cmp = Comparison::GEs;551break;552case Hexagon::C2_cmpgeui:553Cmp = Comparison::GEs;554break;555default:556return (Comparison::Kind)0;557}558return Cmp;559}560561/// Analyze the statements in a loop to determine if the loop has562/// a computable trip count and, if so, return a value that represents563/// the trip count expression.564///565/// This function iterates over the phi nodes in the loop to check for566/// induction variable patterns that are used in the calculation for567/// the number of time the loop is executed.568CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,569SmallVectorImpl<MachineInstr *> &OldInsts) {570MachineBasicBlock *TopMBB = L->getTopBlock();571MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();572assert(PI != TopMBB->pred_end() &&573"Loop must have more than one incoming edge!");574MachineBasicBlock *Backedge = *PI++;575if (PI == TopMBB->pred_end()) // dead loop?576return nullptr;577MachineBasicBlock *Incoming = *PI++;578if (PI != TopMBB->pred_end()) // multiple backedges?579return nullptr;580581// Make sure there is one incoming and one backedge and determine which582// is which.583if (L->contains(Incoming)) {584if (L->contains(Backedge))585return nullptr;586std::swap(Incoming, Backedge);587} else if (!L->contains(Backedge))588return nullptr;589590// Look for the cmp instruction to determine if we can get a useful trip591// count. The trip count can be either a register or an immediate. The592// location of the value depends upon the type (reg or imm).593MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();594if (!ExitingBlock)595return nullptr;596597Register IVReg = 0;598int64_t IVBump = 0;599MachineInstr *IVOp;600bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);601if (!FoundIV)602return nullptr;603604MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);605606MachineOperand *InitialValue = nullptr;607MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);608MachineBasicBlock *Latch = L->getLoopLatch();609for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {610MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();611if (MBB == Preheader)612InitialValue = &IV_Phi->getOperand(i);613else if (MBB == Latch)614IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.615}616if (!InitialValue)617return nullptr;618619SmallVector<MachineOperand,2> Cond;620MachineBasicBlock *TB = nullptr, *FB = nullptr;621bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);622if (NotAnalyzed)623return nullptr;624625MachineBasicBlock *Header = L->getHeader();626// TB must be non-null. If FB is also non-null, one of them must be627// the header. Otherwise, branch to TB could be exiting the loop, and628// the fall through can go to the header.629assert (TB && "Exit block without a branch?");630if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {631MachineBasicBlock *LTB = nullptr, *LFB = nullptr;632SmallVector<MachineOperand,2> LCond;633bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);634if (NotAnalyzed)635return nullptr;636if (TB == Latch)637TB = (LTB == Header) ? LTB : LFB;638else639FB = (LTB == Header) ? LTB: LFB;640}641assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");642if (!TB || (FB && TB != Header && FB != Header))643return nullptr;644645// Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch646// to put imm(0), followed by P in the vector Cond.647// If TB is not the header, it means that the "not-taken" path must lead648// to the header.649bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);650Register PredReg;651unsigned PredPos, PredRegFlags;652if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))653return nullptr;654MachineInstr *CondI = MRI->getVRegDef(PredReg);655unsigned CondOpc = CondI->getOpcode();656657Register CmpReg1, CmpReg2;658int64_t Mask = 0, ImmValue = 0;659bool AnalyzedCmp =660TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);661if (!AnalyzedCmp)662return nullptr;663664// The comparison operator type determines how we compute the loop665// trip count.666OldInsts.push_back(CondI);667OldInsts.push_back(IVOp);668669// Sadly, the following code gets information based on the position670// of the operands in the compare instruction. This has to be done671// this way, because the comparisons check for a specific relationship672// between the operands (e.g. is-less-than), rather than to find out673// what relationship the operands are in (as on PPC).674Comparison::Kind Cmp;675bool isSwapped = false;676const MachineOperand &Op1 = CondI->getOperand(1);677const MachineOperand &Op2 = CondI->getOperand(2);678const MachineOperand *EndValue = nullptr;679680if (Op1.isReg()) {681if (Op2.isImm() || Op1.getReg() == IVReg)682EndValue = &Op2;683else {684EndValue = &Op1;685isSwapped = true;686}687}688689if (!EndValue)690return nullptr;691692Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);693if (!Cmp)694return nullptr;695if (Negated)696Cmp = Comparison::getNegatedComparison(Cmp);697if (isSwapped)698Cmp = Comparison::getSwappedComparison(Cmp);699700if (InitialValue->isReg()) {701Register R = InitialValue->getReg();702MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();703if (!MDT->properlyDominates(DefBB, Header)) {704int64_t V;705if (!checkForImmediate(*InitialValue, V))706return nullptr;707}708OldInsts.push_back(MRI->getVRegDef(R));709}710if (EndValue->isReg()) {711Register R = EndValue->getReg();712MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();713if (!MDT->properlyDominates(DefBB, Header)) {714int64_t V;715if (!checkForImmediate(*EndValue, V))716return nullptr;717}718OldInsts.push_back(MRI->getVRegDef(R));719}720721return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);722}723724/// Helper function that returns the expression that represents the725/// number of times a loop iterates. The function takes the operands that726/// represent the loop start value, loop end value, and induction value.727/// Based upon these operands, the function attempts to compute the trip count.728CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,729const MachineOperand *Start,730const MachineOperand *End,731Register IVReg,732int64_t IVBump,733Comparison::Kind Cmp) const {734// Cannot handle comparison EQ, i.e. while (A == B).735if (Cmp == Comparison::EQ)736return nullptr;737738// Check if either the start or end values are an assignment of an immediate.739// If so, use the immediate value rather than the register.740if (Start->isReg()) {741const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());742if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||743StartValInstr->getOpcode() == Hexagon::A2_tfrpi))744Start = &StartValInstr->getOperand(1);745}746if (End->isReg()) {747const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());748if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||749EndValInstr->getOpcode() == Hexagon::A2_tfrpi))750End = &EndValInstr->getOperand(1);751}752753if (!Start->isReg() && !Start->isImm())754return nullptr;755if (!End->isReg() && !End->isImm())756return nullptr;757758bool CmpLess = Cmp & Comparison::L;759bool CmpGreater = Cmp & Comparison::G;760bool CmpHasEqual = Cmp & Comparison::EQ;761762// Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.763if (CmpLess && IVBump < 0)764// Loop going while iv is "less" with the iv value going down. Must wrap.765return nullptr;766767if (CmpGreater && IVBump > 0)768// Loop going while iv is "greater" with the iv value going up. Must wrap.769return nullptr;770771// Phis that may feed into the loop.772LoopFeederMap LoopFeederPhi;773774// Check if the initial value may be zero and can be decremented in the first775// iteration. If the value is zero, the endloop instruction will not decrement776// the loop counter, so we shouldn't generate a hardware loop in this case.777if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,778LoopFeederPhi))779return nullptr;780781if (Start->isImm() && End->isImm()) {782// Both, start and end are immediates.783int64_t StartV = Start->getImm();784int64_t EndV = End->getImm();785int64_t Dist = EndV - StartV;786if (Dist == 0)787return nullptr;788789bool Exact = (Dist % IVBump) == 0;790791if (Cmp == Comparison::NE) {792if (!Exact)793return nullptr;794if ((Dist < 0) ^ (IVBump < 0))795return nullptr;796}797798// For comparisons that include the final value (i.e. include equality799// with the final value), we need to increase the distance by 1.800if (CmpHasEqual)801Dist = Dist > 0 ? Dist+1 : Dist-1;802803// For the loop to iterate, CmpLess should imply Dist > 0. Similarly,804// CmpGreater should imply Dist < 0. These conditions could actually805// fail, for example, in unreachable code (which may still appear to be806// reachable in the CFG).807if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))808return nullptr;809810// "Normalized" distance, i.e. with the bump set to +-1.811int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump812: (-Dist + (-IVBump - 1)) / (-IVBump);813assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");814815uint64_t Count = Dist1;816817if (Count > 0xFFFFFFFFULL)818return nullptr;819820return new CountValue(CountValue::CV_Immediate, Count);821}822823// A general case: Start and End are some values, but the actual824// iteration count may not be available. If it is not, insert825// a computation of it into the preheader.826827// If the induction variable bump is not a power of 2, quit.828// Othwerise we'd need a general integer division.829if (!isPowerOf2_64(std::abs(IVBump)))830return nullptr;831832MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);833assert (PH && "Should have a preheader by now");834MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();835DebugLoc DL;836if (InsertPos != PH->end())837DL = InsertPos->getDebugLoc();838839// If Start is an immediate and End is a register, the trip count840// will be "reg - imm". Hexagon's "subtract immediate" instruction841// is actually "reg + -imm".842843// If the loop IV is going downwards, i.e. if the bump is negative,844// then the iteration count (computed as End-Start) will need to be845// negated. To avoid the negation, just swap Start and End.846if (IVBump < 0) {847std::swap(Start, End);848IVBump = -IVBump;849}850// Cmp may now have a wrong direction, e.g. LEs may now be GEs.851// Signedness, and "including equality" are preserved.852853bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)854bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)855856int64_t StartV = 0, EndV = 0;857if (Start->isImm())858StartV = Start->getImm();859if (End->isImm())860EndV = End->getImm();861862int64_t AdjV = 0;863// To compute the iteration count, we would need this computation:864// Count = (End - Start + (IVBump-1)) / IVBump865// or, when CmpHasEqual:866// Count = (End - Start + (IVBump-1)+1) / IVBump867// The "IVBump-1" part is the adjustment (AdjV). We can avoid868// generating an instruction specifically to add it if we can adjust869// the immediate values for Start or End.870871if (CmpHasEqual) {872// Need to add 1 to the total iteration count.873if (Start->isImm())874StartV--;875else if (End->isImm())876EndV++;877else878AdjV += 1;879}880881if (Cmp != Comparison::NE) {882if (Start->isImm())883StartV -= (IVBump-1);884else if (End->isImm())885EndV += (IVBump-1);886else887AdjV += (IVBump-1);888}889890Register R = 0;891unsigned SR = 0;892if (Start->isReg()) {893R = Start->getReg();894SR = Start->getSubReg();895} else {896R = End->getReg();897SR = End->getSubReg();898}899const TargetRegisterClass *RC = MRI->getRegClass(R);900// Hardware loops cannot handle 64-bit registers. If it's a double901// register, it has to have a subregister.902if (!SR && RC == &Hexagon::DoubleRegsRegClass)903return nullptr;904const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;905906// Compute DistR (register with the distance between Start and End).907Register DistR;908unsigned DistSR;909910// Avoid special case, where the start value is an imm(0).911if (Start->isImm() && StartV == 0) {912DistR = End->getReg();913DistSR = End->getSubReg();914} else {915const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :916(RegToImm ? TII->get(Hexagon::A2_subri) :917TII->get(Hexagon::A2_addi));918if (RegToReg || RegToImm) {919Register SubR = MRI->createVirtualRegister(IntRC);920MachineInstrBuilder SubIB =921BuildMI(*PH, InsertPos, DL, SubD, SubR);922923if (RegToReg)924SubIB.addReg(End->getReg(), 0, End->getSubReg())925.addReg(Start->getReg(), 0, Start->getSubReg());926else927SubIB.addImm(EndV)928.addReg(Start->getReg(), 0, Start->getSubReg());929DistR = SubR;930} else {931// If the loop has been unrolled, we should use the original loop count932// instead of recalculating the value. This will avoid additional933// 'Add' instruction.934const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());935if (EndValInstr->getOpcode() == Hexagon::A2_addi &&936EndValInstr->getOperand(1).getSubReg() == 0 &&937EndValInstr->getOperand(2).getImm() == StartV) {938DistR = EndValInstr->getOperand(1).getReg();939} else {940Register SubR = MRI->createVirtualRegister(IntRC);941MachineInstrBuilder SubIB =942BuildMI(*PH, InsertPos, DL, SubD, SubR);943SubIB.addReg(End->getReg(), 0, End->getSubReg())944.addImm(-StartV);945DistR = SubR;946}947}948DistSR = 0;949}950951// From DistR, compute AdjR (register with the adjusted distance).952Register AdjR;953unsigned AdjSR;954955if (AdjV == 0) {956AdjR = DistR;957AdjSR = DistSR;958} else {959// Generate CountR = ADD DistR, AdjVal960Register AddR = MRI->createVirtualRegister(IntRC);961MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);962BuildMI(*PH, InsertPos, DL, AddD, AddR)963.addReg(DistR, 0, DistSR)964.addImm(AdjV);965966AdjR = AddR;967AdjSR = 0;968}969970// From AdjR, compute CountR (register with the final count).971Register CountR;972unsigned CountSR;973974if (IVBump == 1) {975CountR = AdjR;976CountSR = AdjSR;977} else {978// The IV bump is a power of two. Log_2(IV bump) is the shift amount.979unsigned Shift = Log2_32(IVBump);980981// Generate NormR = LSR DistR, Shift.982Register LsrR = MRI->createVirtualRegister(IntRC);983const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);984BuildMI(*PH, InsertPos, DL, LsrD, LsrR)985.addReg(AdjR, 0, AdjSR)986.addImm(Shift);987988CountR = LsrR;989CountSR = 0;990}991992return new CountValue(CountValue::CV_Register, CountR, CountSR);993}994995/// Return true if the operation is invalid within hardware loop.996bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,997bool IsInnerHWLoop) const {998// Call is not allowed because the callee may use a hardware loop except for999// the case when the call never returns.1000if (MI->getDesc().isCall())1001return !TII->doesNotReturn(*MI);10021003// Check if the instruction defines a hardware loop register.1004using namespace Hexagon;10051006static const Register Regs01[] = { LC0, SA0, LC1, SA1 };1007static const Register Regs1[] = { LC1, SA1 };1008auto CheckRegs = IsInnerHWLoop ? ArrayRef(Regs01) : ArrayRef(Regs1);1009for (Register R : CheckRegs)1010if (MI->modifiesRegister(R, TRI))1011return true;10121013return false;1014}10151016/// Return true if the loop contains an instruction that inhibits1017/// the use of the hardware loop instruction.1018bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,1019bool IsInnerHWLoop) const {1020LLVM_DEBUG(dbgs() << "\nhw_loop head, "1021<< printMBBReference(**L->block_begin()));1022for (MachineBasicBlock *MBB : L->getBlocks()) {1023for (const MachineInstr &MI : *MBB) {1024if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) {1025LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";1026MI.dump(););1027return true;1028}1029}1030}1031return false;1032}10331034/// Returns true if the instruction is dead. This was essentially1035/// copied from DeadMachineInstructionElim::isDead, but with special cases1036/// for inline asm, physical registers and instructions with side effects1037/// removed.1038bool HexagonHardwareLoops::isDead(const MachineInstr *MI,1039SmallVectorImpl<MachineInstr *> &DeadPhis) const {1040// Examine each operand.1041for (const MachineOperand &MO : MI->operands()) {1042if (!MO.isReg() || !MO.isDef())1043continue;10441045Register Reg = MO.getReg();1046if (MRI->use_nodbg_empty(Reg))1047continue;10481049using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;10501051// This instruction has users, but if the only user is the phi node for the1052// parent block, and the only use of that phi node is this instruction, then1053// this instruction is dead: both it (and the phi node) can be removed.1054use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);1055use_nodbg_iterator End = MRI->use_nodbg_end();1056if (std::next(I) != End || !I->getParent()->isPHI())1057return false;10581059MachineInstr *OnePhi = I->getParent();1060for (const MachineOperand &OPO : OnePhi->operands()) {1061if (!OPO.isReg() || !OPO.isDef())1062continue;10631064Register OPReg = OPO.getReg();1065use_nodbg_iterator nextJ;1066for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);1067J != End; J = nextJ) {1068nextJ = std::next(J);1069MachineOperand &Use = *J;1070MachineInstr *UseMI = Use.getParent();10711072// If the phi node has a user that is not MI, bail.1073if (MI != UseMI)1074return false;1075}1076}1077DeadPhis.push_back(OnePhi);1078}10791080// If there are no defs with uses, the instruction is dead.1081return true;1082}10831084void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {1085// This procedure was essentially copied from DeadMachineInstructionElim.10861087SmallVector<MachineInstr*, 1> DeadPhis;1088if (isDead(MI, DeadPhis)) {1089LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);10901091// It is possible that some DBG_VALUE instructions refer to this1092// instruction. Examine each def operand for such references;1093// if found, mark the DBG_VALUE as undef (but don't delete it).1094for (const MachineOperand &MO : MI->operands()) {1095if (!MO.isReg() || !MO.isDef())1096continue;1097Register Reg = MO.getReg();1098// We use make_early_inc_range here because setReg below invalidates the1099// iterator.1100for (MachineOperand &MO :1101llvm::make_early_inc_range(MRI->use_operands(Reg))) {1102MachineInstr *UseMI = MO.getParent();1103if (UseMI == MI)1104continue;1105if (MO.isDebug())1106MO.setReg(0U);1107}1108}11091110MI->eraseFromParent();1111for (unsigned i = 0; i < DeadPhis.size(); ++i)1112DeadPhis[i]->eraseFromParent();1113}1114}11151116/// Check if the loop is a candidate for converting to a hardware1117/// loop. If so, then perform the transformation.1118///1119/// This function works on innermost loops first. A loop can be converted1120/// if it is a counting loop; either a register value or an immediate.1121///1122/// The code makes several assumptions about the representation of the loop1123/// in llvm.1124bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,1125bool &RecL0used,1126bool &RecL1used) {1127// This is just to confirm basic correctness.1128assert(L->getHeader() && "Loop without a header?");11291130bool Changed = false;1131bool L0Used = false;1132bool L1Used = false;11331134// Process nested loops first.1135for (MachineLoop *I : *L) {1136Changed |= convertToHardwareLoop(I, RecL0used, RecL1used);1137L0Used |= RecL0used;1138L1Used |= RecL1used;1139}11401141// If a nested loop has been converted, then we can't convert this loop.1142if (Changed && L0Used && L1Used)1143return Changed;11441145unsigned LOOP_i;1146unsigned LOOP_r;1147unsigned ENDLOOP;11481149// Flag used to track loopN instruction:1150// 1 - Hardware loop is being generated for the inner most loop.1151// 0 - Hardware loop is being generated for the outer loop.1152unsigned IsInnerHWLoop = 1;11531154if (L0Used) {1155LOOP_i = Hexagon::J2_loop1i;1156LOOP_r = Hexagon::J2_loop1r;1157ENDLOOP = Hexagon::ENDLOOP1;1158IsInnerHWLoop = 0;1159} else {1160LOOP_i = Hexagon::J2_loop0i;1161LOOP_r = Hexagon::J2_loop0r;1162ENDLOOP = Hexagon::ENDLOOP0;1163}11641165#ifndef NDEBUG1166// Stop trying after reaching the limit (if any).1167int Limit = HWLoopLimit;1168if (Limit >= 0) {1169if (Counter >= HWLoopLimit)1170return false;1171Counter++;1172}1173#endif11741175// Does the loop contain any invalid instructions?1176if (containsInvalidInstruction(L, IsInnerHWLoop))1177return false;11781179MachineBasicBlock *LastMBB = L->findLoopControlBlock();1180// Don't generate hw loop if the loop has more than one exit.1181if (!LastMBB)1182return false;11831184MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();1185if (LastI == LastMBB->end())1186return false;11871188// Is the induction variable bump feeding the latch condition?1189if (!fixupInductionVariable(L))1190return false;11911192// Ensure the loop has a preheader: the loop instruction will be1193// placed there.1194MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);1195if (!Preheader) {1196Preheader = createPreheaderForLoop(L);1197if (!Preheader)1198return false;1199}12001201MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();12021203SmallVector<MachineInstr*, 2> OldInsts;1204// Are we able to determine the trip count for the loop?1205CountValue *TripCount = getLoopTripCount(L, OldInsts);1206if (!TripCount)1207return false;12081209// Is the trip count available in the preheader?1210if (TripCount->isReg()) {1211// There will be a use of the register inserted into the preheader,1212// so make sure that the register is actually defined at that point.1213MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());1214MachineBasicBlock *BBDef = TCDef->getParent();1215if (!MDT->dominates(BBDef, Preheader))1216return false;1217}12181219// Determine the loop start.1220MachineBasicBlock *TopBlock = L->getTopBlock();1221MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();1222MachineBasicBlock *LoopStart = nullptr;1223if (ExitingBlock != L->getLoopLatch()) {1224MachineBasicBlock *TB = nullptr, *FB = nullptr;1225SmallVector<MachineOperand, 2> Cond;12261227if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))1228return false;12291230if (L->contains(TB))1231LoopStart = TB;1232else if (L->contains(FB))1233LoopStart = FB;1234else1235return false;1236}1237else1238LoopStart = TopBlock;12391240// Convert the loop to a hardware loop.1241LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());1242DebugLoc DL;1243if (InsertPos != Preheader->end())1244DL = InsertPos->getDebugLoc();12451246if (TripCount->isReg()) {1247// Create a copy of the loop count register.1248Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);1249BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)1250.addReg(TripCount->getReg(), 0, TripCount->getSubReg());1251// Add the Loop instruction to the beginning of the loop.1252BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)1253.addReg(CountReg);1254} else {1255assert(TripCount->isImm() && "Expecting immediate value for trip count");1256// Add the Loop immediate instruction to the beginning of the loop,1257// if the immediate fits in the instructions. Otherwise, we need to1258// create a new virtual register.1259int64_t CountImm = TripCount->getImm();1260if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {1261Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);1262BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)1263.addImm(CountImm);1264BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))1265.addMBB(LoopStart).addReg(CountReg);1266} else1267BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))1268.addMBB(LoopStart).addImm(CountImm);1269}12701271// Make sure the loop start always has a reference in the CFG.1272LoopStart->setMachineBlockAddressTaken();12731274// Replace the loop branch with an endloop instruction.1275DebugLoc LastIDL = LastI->getDebugLoc();1276BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);12771278// The loop ends with either:1279// - a conditional branch followed by an unconditional branch, or1280// - a conditional branch to the loop start.1281if (LastI->getOpcode() == Hexagon::J2_jumpt ||1282LastI->getOpcode() == Hexagon::J2_jumpf) {1283// Delete one and change/add an uncond. branch to out of the loop.1284MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();1285LastI = LastMBB->erase(LastI);1286if (!L->contains(BranchTarget)) {1287if (LastI != LastMBB->end())1288LastI = LastMBB->erase(LastI);1289SmallVector<MachineOperand, 0> Cond;1290TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);1291}1292} else {1293// Conditional branch to loop start; just delete it.1294LastMBB->erase(LastI);1295}1296delete TripCount;12971298// The induction operation and the comparison may now be1299// unneeded. If these are unneeded, then remove them.1300for (unsigned i = 0; i < OldInsts.size(); ++i)1301removeIfDead(OldInsts[i]);13021303++NumHWLoops;13041305// Set RecL1used and RecL0used only after hardware loop has been1306// successfully generated. Doing it earlier can cause wrong loop instruction1307// to be used.1308if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.1309RecL1used = true;1310else1311RecL0used = true;13121313return true;1314}13151316bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,1317MachineInstr *CmpI) {1318assert (BumpI != CmpI && "Bump and compare in the same instruction?");13191320MachineBasicBlock *BB = BumpI->getParent();1321if (CmpI->getParent() != BB)1322return false;13231324using instr_iterator = MachineBasicBlock::instr_iterator;13251326// Check if things are in order to begin with.1327for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)1328if (&*I == CmpI)1329return true;13301331// Out of order.1332Register PredR = CmpI->getOperand(0).getReg();1333bool FoundBump = false;1334instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);1335for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {1336MachineInstr *In = &*I;1337for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {1338MachineOperand &MO = In->getOperand(i);1339if (MO.isReg() && MO.isUse()) {1340if (MO.getReg() == PredR) // Found an intervening use of PredR.1341return false;1342}1343}13441345if (In == BumpI) {1346BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());1347FoundBump = true;1348break;1349}1350}1351assert (FoundBump && "Cannot determine instruction order");1352return FoundBump;1353}13541355/// This function is required to break recursion. Visiting phis in a loop may1356/// result in recursion during compilation. We break the recursion by making1357/// sure that we visit a MachineOperand and its definition in a1358/// MachineInstruction only once. If we attempt to visit more than once, then1359/// there is recursion, and will return false.1360bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,1361MachineInstr *MI,1362const MachineOperand *MO,1363LoopFeederMap &LoopFeederPhi) const {1364if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {1365LLVM_DEBUG(dbgs() << "\nhw_loop head, "1366<< printMBBReference(**L->block_begin()));1367// Ignore all BBs that form Loop.1368if (llvm::is_contained(L->getBlocks(), A))1369return false;1370MachineInstr *Def = MRI->getVRegDef(MO->getReg());1371LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));1372return true;1373} else1374// Already visited node.1375return false;1376}13771378/// Return true if a Phi may generate a value that can underflow.1379/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.1380bool HexagonHardwareLoops::phiMayWrapOrUnderflow(1381MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,1382MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {1383assert(Phi->isPHI() && "Expecting a Phi.");1384// Walk through each Phi, and its used operands. Make sure that1385// if there is recursion in Phi, we won't generate hardware loops.1386for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)1387if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))1388if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,1389Phi->getParent(), L, LoopFeederPhi))1390return true;1391return false;1392}13931394/// Return true if the induction variable can underflow in the first iteration.1395/// An example, is an initial unsigned value that is 0 and is decrement in the1396/// first itertion of a do-while loop. In this case, we cannot generate a1397/// hardware loop because the endloop instruction does not decrement the loop1398/// counter if it is <= 1. We only need to perform this analysis if the1399/// initial value is a register.1400///1401/// This function assumes the initial value may underfow unless proven1402/// otherwise. If the type is signed, then we don't care because signed1403/// underflow is undefined. We attempt to prove the initial value is not1404/// zero by perfoming a crude analysis of the loop counter. This function1405/// checks if the initial value is used in any comparison prior to the loop1406/// and, if so, assumes the comparison is a range check. This is inexact,1407/// but will catch the simple cases.1408bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(1409const MachineOperand *InitVal, const MachineOperand *EndVal,1410MachineBasicBlock *MBB, MachineLoop *L,1411LoopFeederMap &LoopFeederPhi) const {1412// Only check register values since they are unknown.1413if (!InitVal->isReg())1414return false;14151416if (!EndVal->isImm())1417return false;14181419// A register value that is assigned an immediate is a known value, and it1420// won't underflow in the first iteration.1421int64_t Imm;1422if (checkForImmediate(*InitVal, Imm))1423return (EndVal->getImm() == Imm);14241425Register Reg = InitVal->getReg();14261427// We don't know the value of a physical register.1428if (!Reg.isVirtual())1429return true;14301431MachineInstr *Def = MRI->getVRegDef(Reg);1432if (!Def)1433return true;14341435// If the initial value is a Phi or copy and the operands may not underflow,1436// then the definition cannot be underflow either.1437if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),1438L, LoopFeederPhi))1439return false;1440if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),1441EndVal, Def->getParent(),1442L, LoopFeederPhi))1443return false;14441445// Iterate over the uses of the initial value. If the initial value is used1446// in a compare, then we assume this is a range check that ensures the loop1447// doesn't underflow. This is not an exact test and should be improved.1448for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),1449E = MRI->use_instr_nodbg_end(); I != E; ++I) {1450MachineInstr *MI = &*I;1451Register CmpReg1, CmpReg2;1452int64_t CmpMask = 0, CmpValue = 0;14531454if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))1455continue;14561457MachineBasicBlock *TBB = nullptr, *FBB = nullptr;1458SmallVector<MachineOperand, 2> Cond;1459if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))1460continue;14611462Comparison::Kind Cmp =1463getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);1464if (Cmp == 0)1465continue;1466if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))1467Cmp = Comparison::getNegatedComparison(Cmp);1468if (CmpReg2 != 0 && CmpReg2 == Reg)1469Cmp = Comparison::getSwappedComparison(Cmp);14701471// Signed underflow is undefined.1472if (Comparison::isSigned(Cmp))1473return false;14741475// Check if there is a comparison of the initial value. If the initial value1476// is greater than or not equal to another value, then assume this is a1477// range check.1478if ((Cmp & Comparison::G) || Cmp == Comparison::NE)1479return false;1480}14811482// OK - this is a hack that needs to be improved. We really need to analyze1483// the instructions performed on the initial value. This works on the simplest1484// cases only.1485if (!Def->isCopy() && !Def->isPHI())1486return false;14871488return true;1489}14901491bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,1492int64_t &Val) const {1493if (MO.isImm()) {1494Val = MO.getImm();1495return true;1496}1497if (!MO.isReg())1498return false;14991500// MO is a register. Check whether it is defined as an immediate value,1501// and if so, get the value of it in TV. That value will then need to be1502// processed to handle potential subregisters in MO.1503int64_t TV;15041505Register R = MO.getReg();1506if (!R.isVirtual())1507return false;1508MachineInstr *DI = MRI->getVRegDef(R);1509unsigned DOpc = DI->getOpcode();1510switch (DOpc) {1511case TargetOpcode::COPY:1512case Hexagon::A2_tfrsi:1513case Hexagon::A2_tfrpi:1514case Hexagon::CONST32:1515case Hexagon::CONST64:1516// Call recursively to avoid an extra check whether operand(1) is1517// indeed an immediate (it could be a global address, for example),1518// plus we can handle COPY at the same time.1519if (!checkForImmediate(DI->getOperand(1), TV))1520return false;1521break;1522case Hexagon::A2_combineii:1523case Hexagon::A4_combineir:1524case Hexagon::A4_combineii:1525case Hexagon::A4_combineri:1526case Hexagon::A2_combinew: {1527const MachineOperand &S1 = DI->getOperand(1);1528const MachineOperand &S2 = DI->getOperand(2);1529int64_t V1, V2;1530if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))1531return false;1532TV = V2 | (static_cast<uint64_t>(V1) << 32);1533break;1534}1535case TargetOpcode::REG_SEQUENCE: {1536const MachineOperand &S1 = DI->getOperand(1);1537const MachineOperand &S3 = DI->getOperand(3);1538int64_t V1, V3;1539if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))1540return false;1541unsigned Sub2 = DI->getOperand(2).getImm();1542unsigned Sub4 = DI->getOperand(4).getImm();1543if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)1544TV = V1 | (V3 << 32);1545else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)1546TV = V3 | (V1 << 32);1547else1548llvm_unreachable("Unexpected form of REG_SEQUENCE");1549break;1550}15511552default:1553return false;1554}15551556// By now, we should have successfully obtained the immediate value defining1557// the register referenced in MO. Handle a potential use of a subregister.1558switch (MO.getSubReg()) {1559case Hexagon::isub_lo:1560Val = TV & 0xFFFFFFFFULL;1561break;1562case Hexagon::isub_hi:1563Val = (TV >> 32) & 0xFFFFFFFFULL;1564break;1565default:1566Val = TV;1567break;1568}1569return true;1570}15711572void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {1573if (MO.isImm()) {1574MO.setImm(Val);1575return;1576}15771578assert(MO.isReg());1579Register R = MO.getReg();1580MachineInstr *DI = MRI->getVRegDef(R);15811582const TargetRegisterClass *RC = MRI->getRegClass(R);1583Register NewR = MRI->createVirtualRegister(RC);1584MachineBasicBlock &B = *DI->getParent();1585DebugLoc DL = DI->getDebugLoc();1586BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);1587MO.setReg(NewR);1588}15891590bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {1591MachineBasicBlock *Header = L->getHeader();1592MachineBasicBlock *Latch = L->getLoopLatch();1593MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();15941595if (!(Header && Latch && ExitingBlock))1596return false;15971598// These data structures follow the same concept as the corresponding1599// ones in findInductionRegister (where some comments are).1600using RegisterBump = std::pair<Register, int64_t>;1601using RegisterInduction = std::pair<Register, RegisterBump>;1602using RegisterInductionSet = std::set<RegisterInduction>;16031604// Register candidates for induction variables, with their associated bumps.1605RegisterInductionSet IndRegs;16061607// Look for induction patterns:1608// %1 = PHI ..., [ latch, %2 ]1609// %2 = ADD %1, imm1610using instr_iterator = MachineBasicBlock::instr_iterator;16111612for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();1613I != E && I->isPHI(); ++I) {1614MachineInstr *Phi = &*I;16151616// Have a PHI instruction.1617for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {1618if (Phi->getOperand(i+1).getMBB() != Latch)1619continue;16201621Register PhiReg = Phi->getOperand(i).getReg();1622MachineInstr *DI = MRI->getVRegDef(PhiReg);16231624if (DI->getDesc().isAdd()) {1625// If the register operand to the add/sub is the PHI we are looking1626// at, this meets the induction pattern.1627Register IndReg = DI->getOperand(1).getReg();1628MachineOperand &Opnd2 = DI->getOperand(2);1629int64_t V;1630if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {1631Register UpdReg = DI->getOperand(0).getReg();1632IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));1633}1634}1635} // for (i)1636} // for (instr)16371638if (IndRegs.empty())1639return false;16401641MachineBasicBlock *TB = nullptr, *FB = nullptr;1642SmallVector<MachineOperand,2> Cond;1643// analyzeBranch returns true if it fails to analyze branch.1644bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);1645if (NotAnalyzed || Cond.empty())1646return false;16471648if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {1649MachineBasicBlock *LTB = nullptr, *LFB = nullptr;1650SmallVector<MachineOperand,2> LCond;1651bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);1652if (NotAnalyzed)1653return false;16541655// Since latch is not the exiting block, the latch branch should be an1656// unconditional branch to the loop header.1657if (TB == Latch)1658TB = (LTB == Header) ? LTB : LFB;1659else1660FB = (LTB == Header) ? LTB : LFB;1661}1662if (TB != Header) {1663if (FB != Header) {1664// The latch/exit block does not go back to the header.1665return false;1666}1667// FB is the header (i.e., uncond. jump to branch header)1668// In this case, the LoopBody -> TB should not be a back edge otherwise1669// it could result in an infinite loop after conversion to hw_loop.1670// This case can happen when the Latch has two jumps like this:1671// Jmp_c OuterLoopHeader <-- TB1672// Jmp InnerLoopHeader <-- FB1673if (MDT->dominates(TB, FB))1674return false;1675}16761677// Expecting a predicate register as a condition. It won't be a hardware1678// predicate register at this point yet, just a vreg.1679// HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0)1680// into Cond, followed by the predicate register. For non-negated branches1681// it's just the register.1682unsigned CSz = Cond.size();1683if (CSz != 1 && CSz != 2)1684return false;16851686if (!Cond[CSz-1].isReg())1687return false;16881689Register P = Cond[CSz - 1].getReg();1690MachineInstr *PredDef = MRI->getVRegDef(P);16911692if (!PredDef->isCompare())1693return false;16941695SmallSet<Register,2> CmpRegs;1696MachineOperand *CmpImmOp = nullptr;16971698// Go over all operands to the compare and look for immediate and register1699// operands. Assume that if the compare has a single register use and a1700// single immediate operand, then the register is being compared with the1701// immediate value.1702for (MachineOperand &MO : PredDef->operands()) {1703if (MO.isReg()) {1704// Skip all implicit references. In one case there was:1705// %140 = FCMPUGT32_rr %138, %139, implicit %usr1706if (MO.isImplicit())1707continue;1708if (MO.isUse()) {1709if (!isImmediate(MO)) {1710CmpRegs.insert(MO.getReg());1711continue;1712}1713// Consider the register to be the "immediate" operand.1714if (CmpImmOp)1715return false;1716CmpImmOp = &MO;1717}1718} else if (MO.isImm()) {1719if (CmpImmOp) // A second immediate argument? Confusing. Bail out.1720return false;1721CmpImmOp = &MO;1722}1723}17241725if (CmpRegs.empty())1726return false;17271728// Check if the compared register follows the order we want. Fix if needed.1729for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();1730I != E; ++I) {1731// This is a success. If the register used in the comparison is one that1732// we have identified as a bumped (updated) induction register, there is1733// nothing to do.1734if (CmpRegs.count(I->first))1735return true;17361737// Otherwise, if the register being compared comes out of a PHI node,1738// and has been recognized as following the induction pattern, and is1739// compared against an immediate, we can fix it.1740const RegisterBump &RB = I->second;1741if (CmpRegs.count(RB.first)) {1742if (!CmpImmOp) {1743// If both operands to the compare instruction are registers, see if1744// it can be changed to use induction register as one of the operands.1745MachineInstr *IndI = nullptr;1746MachineInstr *nonIndI = nullptr;1747MachineOperand *IndMO = nullptr;1748MachineOperand *nonIndMO = nullptr;17491750for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {1751MachineOperand &MO = PredDef->getOperand(i);1752if (MO.isReg() && MO.getReg() == RB.first) {1753LLVM_DEBUG(dbgs() << "\n DefMI(" << i1754<< ") = " << *(MRI->getVRegDef(I->first)));1755if (IndI)1756return false;17571758IndI = MRI->getVRegDef(I->first);1759IndMO = &MO;1760} else if (MO.isReg()) {1761LLVM_DEBUG(dbgs() << "\n DefMI(" << i1762<< ") = " << *(MRI->getVRegDef(MO.getReg())));1763if (nonIndI)1764return false;17651766nonIndI = MRI->getVRegDef(MO.getReg());1767nonIndMO = &MO;1768}1769}1770if (IndI && nonIndI &&1771nonIndI->getOpcode() == Hexagon::A2_addi &&1772nonIndI->getOperand(2).isImm() &&1773nonIndI->getOperand(2).getImm() == - RB.second) {1774bool Order = orderBumpCompare(IndI, PredDef);1775if (Order) {1776IndMO->setReg(I->first);1777nonIndMO->setReg(nonIndI->getOperand(1).getReg());1778return true;1779}1780}1781return false;1782}17831784// It is not valid to do this transformation on an unsigned comparison1785// because it may underflow.1786Comparison::Kind Cmp =1787getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);1788if (!Cmp || Comparison::isUnsigned(Cmp))1789return false;17901791// If the register is being compared against an immediate, try changing1792// the compare instruction to use induction register and adjust the1793// immediate operand.1794int64_t CmpImm = getImmediate(*CmpImmOp);1795int64_t V = RB.second;1796// Handle Overflow (64-bit).1797if (((V > 0) && (CmpImm > INT64_MAX - V)) ||1798((V < 0) && (CmpImm < INT64_MIN - V)))1799return false;1800CmpImm += V;1801// Most comparisons of register against an immediate value allow1802// the immediate to be constant-extended. There are some exceptions1803// though. Make sure the new combination will work.1804if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) &&1805!TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false))1806return false;18071808// Make sure that the compare happens after the bump. Otherwise,1809// after the fixup, the compare would use a yet-undefined register.1810MachineInstr *BumpI = MRI->getVRegDef(I->first);1811bool Order = orderBumpCompare(BumpI, PredDef);1812if (!Order)1813return false;18141815// Finally, fix the compare instruction.1816setImmediate(*CmpImmOp, CmpImm);1817for (MachineOperand &MO : PredDef->operands()) {1818if (MO.isReg() && MO.getReg() == RB.first) {1819MO.setReg(I->first);1820return true;1821}1822}1823}1824}18251826return false;1827}18281829/// createPreheaderForLoop - Create a preheader for a given loop.1830MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(1831MachineLoop *L) {1832if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))1833return TmpPH;1834if (!HWCreatePreheader)1835return nullptr;18361837MachineBasicBlock *Header = L->getHeader();1838MachineBasicBlock *Latch = L->getLoopLatch();1839MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();1840MachineFunction *MF = Header->getParent();1841DebugLoc DL;18421843#ifndef NDEBUG1844if ((!PHFn.empty()) && (PHFn != MF->getName()))1845return nullptr;1846#endif18471848if (!Latch || !ExitingBlock || Header->hasAddressTaken())1849return nullptr;18501851using instr_iterator = MachineBasicBlock::instr_iterator;18521853// Verify that all existing predecessors have analyzable branches1854// (or no branches at all).1855using MBBVector = std::vector<MachineBasicBlock *>;18561857MBBVector Preds(Header->pred_begin(), Header->pred_end());1858SmallVector<MachineOperand,2> Tmp1;1859MachineBasicBlock *TB = nullptr, *FB = nullptr;18601861if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))1862return nullptr;18631864for (MachineBasicBlock *PB : Preds) {1865bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);1866if (NotAnalyzed)1867return nullptr;1868}18691870MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();1871MF->insert(Header->getIterator(), NewPH);18721873if (Header->pred_size() > 2) {1874// Ensure that the header has only two predecessors: the preheader and1875// the loop latch. Any additional predecessors of the header should1876// join at the newly created preheader. Inspect all PHI nodes from the1877// header and create appropriate corresponding PHI nodes in the preheader.18781879for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();1880I != E && I->isPHI(); ++I) {1881MachineInstr *PN = &*I;18821883const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);1884MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);1885NewPH->insert(NewPH->end(), NewPN);18861887Register PR = PN->getOperand(0).getReg();1888const TargetRegisterClass *RC = MRI->getRegClass(PR);1889Register NewPR = MRI->createVirtualRegister(RC);1890NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));18911892// Copy all non-latch operands of a header's PHI node to the newly1893// created PHI node in the preheader.1894for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {1895Register PredR = PN->getOperand(i).getReg();1896unsigned PredRSub = PN->getOperand(i).getSubReg();1897MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();1898if (PredB == Latch)1899continue;19001901MachineOperand MO = MachineOperand::CreateReg(PredR, false);1902MO.setSubReg(PredRSub);1903NewPN->addOperand(MO);1904NewPN->addOperand(MachineOperand::CreateMBB(PredB));1905}19061907// Remove copied operands from the old PHI node and add the value1908// coming from the preheader's PHI.1909for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {1910MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();1911if (PredB != Latch) {1912PN->removeOperand(i+1);1913PN->removeOperand(i);1914}1915}1916PN->addOperand(MachineOperand::CreateReg(NewPR, false));1917PN->addOperand(MachineOperand::CreateMBB(NewPH));1918}1919} else {1920assert(Header->pred_size() == 2);19211922// The header has only two predecessors, but the non-latch predecessor1923// is not a preheader (e.g. it has other successors, etc.)1924// In such a case we don't need any extra PHI nodes in the new preheader,1925// all we need is to adjust existing PHIs in the header to now refer to1926// the new preheader.1927for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();1928I != E && I->isPHI(); ++I) {1929MachineInstr *PN = &*I;1930for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {1931MachineOperand &MO = PN->getOperand(i+1);1932if (MO.getMBB() != Latch)1933MO.setMBB(NewPH);1934}1935}1936}19371938// "Reroute" the CFG edges to link in the new preheader.1939// If any of the predecessors falls through to the header, insert a branch1940// to the new preheader in that place.1941SmallVector<MachineOperand,1> Tmp2;1942SmallVector<MachineOperand,1> EmptyCond;19431944TB = FB = nullptr;19451946for (MachineBasicBlock *PB : Preds) {1947if (PB != Latch) {1948Tmp2.clear();1949bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);1950(void)NotAnalyzed; // suppress compiler warning1951assert (!NotAnalyzed && "Should be analyzable!");1952if (TB != Header && (Tmp2.empty() || FB != Header))1953TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);1954PB->ReplaceUsesOfBlockWith(Header, NewPH);1955}1956}19571958// It can happen that the latch block will fall through into the header.1959// Insert an unconditional branch to the header.1960TB = FB = nullptr;1961bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);1962(void)LatchNotAnalyzed; // suppress compiler warning1963assert (!LatchNotAnalyzed && "Should be analyzable!");1964if (!TB && !FB)1965TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);19661967// Finally, the branch from the preheader to the header.1968TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);1969NewPH->addSuccessor(Header);19701971MachineLoop *ParentLoop = L->getParentLoop();1972if (ParentLoop)1973ParentLoop->addBasicBlockToLoop(NewPH, *MLI);19741975// Update the dominator information with the new preheader.1976if (MDT) {1977if (MachineDomTreeNode *HN = MDT->getNode(Header)) {1978if (MachineDomTreeNode *DHN = HN->getIDom()) {1979MDT->addNewBlock(NewPH, DHN->getBlock());1980MDT->changeImmediateDominator(Header, NewPH);1981}1982}1983}19841985return NewPH;1986}198719881989