Path: blob/main/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
35266 views
//===- HexagonGenInsert.cpp -----------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "BitTracker.h"9#include "HexagonBitTracker.h"10#include "HexagonInstrInfo.h"11#include "HexagonRegisterInfo.h"12#include "HexagonSubtarget.h"13#include "llvm/ADT/BitVector.h"14#include "llvm/ADT/DenseMap.h"15#include "llvm/ADT/GraphTraits.h"16#include "llvm/ADT/PostOrderIterator.h"17#include "llvm/ADT/STLExtras.h"18#include "llvm/ADT/SmallSet.h"19#include "llvm/ADT/SmallVector.h"20#include "llvm/ADT/StringRef.h"21#include "llvm/CodeGen/MachineBasicBlock.h"22#include "llvm/CodeGen/MachineDominators.h"23#include "llvm/CodeGen/MachineFunction.h"24#include "llvm/CodeGen/MachineFunctionPass.h"25#include "llvm/CodeGen/MachineInstr.h"26#include "llvm/CodeGen/MachineInstrBuilder.h"27#include "llvm/CodeGen/MachineOperand.h"28#include "llvm/CodeGen/MachineRegisterInfo.h"29#include "llvm/CodeGen/TargetRegisterInfo.h"30#include "llvm/IR/DebugLoc.h"31#include "llvm/InitializePasses.h"32#include "llvm/Pass.h"33#include "llvm/Support/CommandLine.h"34#include "llvm/Support/Debug.h"35#include "llvm/Support/MathExtras.h"36#include "llvm/Support/Timer.h"37#include "llvm/Support/raw_ostream.h"38#include <algorithm>39#include <cassert>40#include <cstdint>41#include <iterator>42#include <utility>43#include <vector>4445#define DEBUG_TYPE "hexinsert"4647using namespace llvm;4849static cl::opt<unsigned>50VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden,51cl::desc("Vreg# cutoff for insert generation."));52// The distance cutoff is selected based on the precheckin-perf results:53// cutoffs 20, 25, 35, and 40 are worse than 30.54static cl::opt<unsigned>55VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden,56cl::desc("Vreg distance cutoff for insert "57"generation."));5859// Limit the container sizes for extreme cases where we run out of memory.60static cl::opt<unsigned>61MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden,62cl::desc("Maximum size of OrderedRegisterList"));63static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),64cl::Hidden,65cl::desc("Maximum size of IFMap"));6667static cl::opt<bool> OptTiming("insert-timing", cl::Hidden,68cl::desc("Enable timing of insert generation"));69static cl::opt<bool>70OptTimingDetail("insert-timing-detail", cl::Hidden,71cl::desc("Enable detailed timing of insert "72"generation"));7374static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden);75static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden);76// Whether to construct constant values via "insert". Could eliminate constant77// extenders, but often not practical.78static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden);7980// The preprocessor gets confused when the DEBUG macro is passed larger81// chunks of code. Use this function to detect debugging.82inline static bool isDebug() {83#ifndef NDEBUG84return DebugFlag && isCurrentDebugType(DEBUG_TYPE);85#else86return false;87#endif88}8990namespace {9192// Set of virtual registers, based on BitVector.93struct RegisterSet : private BitVector {94RegisterSet() = default;95explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}96RegisterSet(const RegisterSet &RS) = default;97RegisterSet &operator=(const RegisterSet &RS) = default;9899using BitVector::clear;100101unsigned find_first() const {102int First = BitVector::find_first();103if (First < 0)104return 0;105return x2v(First);106}107108unsigned find_next(unsigned Prev) const {109int Next = BitVector::find_next(v2x(Prev));110if (Next < 0)111return 0;112return x2v(Next);113}114115RegisterSet &insert(unsigned R) {116unsigned Idx = v2x(R);117ensure(Idx);118return static_cast<RegisterSet&>(BitVector::set(Idx));119}120RegisterSet &remove(unsigned R) {121unsigned Idx = v2x(R);122if (Idx >= size())123return *this;124return static_cast<RegisterSet&>(BitVector::reset(Idx));125}126127RegisterSet &insert(const RegisterSet &Rs) {128return static_cast<RegisterSet&>(BitVector::operator|=(Rs));129}130RegisterSet &remove(const RegisterSet &Rs) {131return static_cast<RegisterSet&>(BitVector::reset(Rs));132}133134reference operator[](unsigned R) {135unsigned Idx = v2x(R);136ensure(Idx);137return BitVector::operator[](Idx);138}139bool operator[](unsigned R) const {140unsigned Idx = v2x(R);141assert(Idx < size());142return BitVector::operator[](Idx);143}144bool has(unsigned R) const {145unsigned Idx = v2x(R);146if (Idx >= size())147return false;148return BitVector::test(Idx);149}150151bool empty() const {152return !BitVector::any();153}154bool includes(const RegisterSet &Rs) const {155// A.BitVector::test(B) <=> A-B != {}156return !Rs.BitVector::test(*this);157}158bool intersects(const RegisterSet &Rs) const {159return BitVector::anyCommon(Rs);160}161162private:163void ensure(unsigned Idx) {164if (size() <= Idx)165resize(std::max(Idx+1, 32U));166}167168static inline unsigned v2x(unsigned v) {169return Register::virtReg2Index(v);170}171172static inline unsigned x2v(unsigned x) {173return Register::index2VirtReg(x);174}175};176177struct PrintRegSet {178PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)179: RS(S), TRI(RI) {}180181friend raw_ostream &operator<< (raw_ostream &OS,182const PrintRegSet &P);183184private:185const RegisterSet &RS;186const TargetRegisterInfo *TRI;187};188189raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {190OS << '{';191for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))192OS << ' ' << printReg(R, P.TRI);193OS << " }";194return OS;195}196197// A convenience class to associate unsigned numbers (such as virtual198// registers) with unsigned numbers.199struct UnsignedMap : public DenseMap<unsigned,unsigned> {200UnsignedMap() = default;201202private:203using BaseType = DenseMap<unsigned, unsigned>;204};205206// A utility to establish an ordering between virtual registers:207// VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB]208// This is meant as a cache for the ordering of virtual registers defined209// by a potentially expensive comparison function, or obtained by a proce-210// dure that should not be repeated each time two registers are compared.211struct RegisterOrdering : public UnsignedMap {212RegisterOrdering() = default;213214unsigned operator[](unsigned VR) const {215const_iterator F = find(VR);216assert(F != end());217return F->second;218}219220// Add operator(), so that objects of this class can be used as221// comparators in std::sort et al.222bool operator() (unsigned VR1, unsigned VR2) const {223return operator[](VR1) < operator[](VR2);224}225};226227// Ordering of bit values. This class does not have operator[], but228// is supplies a comparison operator() for use in std:: algorithms.229// The order is as follows:230// - 0 < 1 < ref231// - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),232// or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.233struct BitValueOrdering {234BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}235236bool operator() (const BitTracker::BitValue &V1,237const BitTracker::BitValue &V2) const;238239const RegisterOrdering &BaseOrd;240};241242} // end anonymous namespace243244bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,245const BitTracker::BitValue &V2) const {246if (V1 == V2)247return false;248// V1==0 => true, V2==0 => false249if (V1.is(0) || V2.is(0))250return V1.is(0);251// Neither of V1,V2 is 0, and V1!=V2.252// V2==1 => false, V1==1 => true253if (V2.is(1) || V1.is(1))254return !V2.is(1);255// Both V1,V2 are refs.256unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];257if (Ind1 != Ind2)258return Ind1 < Ind2;259// If V1.Pos==V2.Pos260assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");261return V1.RefI.Pos < V2.RefI.Pos;262}263264namespace {265266// Cache for the BitTracker's cell map. Map lookup has a logarithmic267// complexity, this class will memoize the lookup results to reduce268// the access time for repeated lookups of the same cell.269struct CellMapShadow {270CellMapShadow(const BitTracker &T) : BT(T) {}271272const BitTracker::RegisterCell &lookup(unsigned VR) {273unsigned RInd = Register::virtReg2Index(VR);274// Grow the vector to at least 32 elements.275if (RInd >= CVect.size())276CVect.resize(std::max(RInd+16, 32U), nullptr);277const BitTracker::RegisterCell *CP = CVect[RInd];278if (CP == nullptr)279CP = CVect[RInd] = &BT.lookup(VR);280return *CP;281}282283const BitTracker &BT;284285private:286using CellVectType = std::vector<const BitTracker::RegisterCell *>;287288CellVectType CVect;289};290291// Comparator class for lexicographic ordering of virtual registers292// according to the corresponding BitTracker::RegisterCell objects.293struct RegisterCellLexCompare {294RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)295: BitOrd(BO), CM(M) {}296297bool operator() (unsigned VR1, unsigned VR2) const;298299private:300const BitValueOrdering &BitOrd;301CellMapShadow &CM;302};303304// Comparator class for lexicographic ordering of virtual registers305// according to the specified bits of the corresponding BitTracker::306// RegisterCell objects.307// Specifically, this class will be used to compare bit B of a register308// cell for a selected virtual register R with bit N of any register309// other than R.310struct RegisterCellBitCompareSel {311RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,312const BitValueOrdering &BO, CellMapShadow &M)313: SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}314315bool operator() (unsigned VR1, unsigned VR2) const;316317private:318const unsigned SelR, SelB;319const unsigned BitN;320const BitValueOrdering &BitOrd;321CellMapShadow &CM;322};323324} // end anonymous namespace325326bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {327// Ordering of registers, made up from two given orderings:328// - the ordering of the register numbers, and329// - the ordering of register cells.330// Def. R1 < R2 if:331// - cell(R1) < cell(R2), or332// - cell(R1) == cell(R2), and index(R1) < index(R2).333//334// For register cells, the ordering is lexicographic, with index 0 being335// the most significant.336if (VR1 == VR2)337return false;338339const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);340uint16_t W1 = RC1.width(), W2 = RC2.width();341for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {342const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];343if (V1 != V2)344return BitOrd(V1, V2);345}346// Cells are equal up until the common length.347if (W1 != W2)348return W1 < W2;349350return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];351}352353bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {354if (VR1 == VR2)355return false;356const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);357const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);358uint16_t W1 = RC1.width(), W2 = RC2.width();359uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;360uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;361// If Bit1 exceeds the width of VR1, then:362// - return false, if at the same time Bit2 exceeds VR2, or363// - return true, otherwise.364// (I.e. "a bit value that does not exist is less than any bit value365// that does exist".)366if (W1 <= Bit1)367return Bit2 < W2;368// If Bit1 is within VR1, but Bit2 is not within VR2, return false.369if (W2 <= Bit2)370return false;371372const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];373if (V1 != V2)374return BitOrd(V1, V2);375return false;376}377378namespace {379380class OrderedRegisterList {381using ListType = std::vector<unsigned>;382const unsigned MaxSize;383384public:385OrderedRegisterList(const RegisterOrdering &RO)386: MaxSize(MaxORLSize), Ord(RO) {}387388void insert(unsigned VR);389void remove(unsigned VR);390391unsigned operator[](unsigned Idx) const {392assert(Idx < Seq.size());393return Seq[Idx];394}395396unsigned size() const {397return Seq.size();398}399400using iterator = ListType::iterator;401using const_iterator = ListType::const_iterator;402403iterator begin() { return Seq.begin(); }404iterator end() { return Seq.end(); }405const_iterator begin() const { return Seq.begin(); }406const_iterator end() const { return Seq.end(); }407408// Convenience function to convert an iterator to the corresponding index.409unsigned idx(iterator It) const { return It-begin(); }410411private:412ListType Seq;413const RegisterOrdering &Ord;414};415416struct PrintORL {417PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)418: RL(L), TRI(RI) {}419420friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);421422private:423const OrderedRegisterList &RL;424const TargetRegisterInfo *TRI;425};426427raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {428OS << '(';429OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();430for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {431if (I != B)432OS << ", ";433OS << printReg(*I, P.TRI);434}435OS << ')';436return OS;437}438439} // end anonymous namespace440441void OrderedRegisterList::insert(unsigned VR) {442iterator L = llvm::lower_bound(Seq, VR, Ord);443if (L == Seq.end())444Seq.push_back(VR);445else446Seq.insert(L, VR);447448unsigned S = Seq.size();449if (S > MaxSize)450Seq.resize(MaxSize);451assert(Seq.size() <= MaxSize);452}453454void OrderedRegisterList::remove(unsigned VR) {455iterator L = llvm::lower_bound(Seq, VR, Ord);456if (L != Seq.end())457Seq.erase(L);458}459460namespace {461462// A record of the insert form. The fields correspond to the operands463// of the "insert" instruction:464// ... = insert(SrcR, InsR, #Wdh, #Off)465struct IFRecord {466IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)467: SrcR(SR), InsR(IR), Wdh(W), Off(O) {}468469unsigned SrcR, InsR;470uint16_t Wdh, Off;471};472473struct PrintIFR {474PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)475: IFR(R), TRI(RI) {}476477private:478friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);479480const IFRecord &IFR;481const TargetRegisterInfo *TRI;482};483484raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {485unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;486OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI)487<< ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';488return OS;489}490491using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>;492493} // end anonymous namespace494495namespace llvm {496497void initializeHexagonGenInsertPass(PassRegistry&);498FunctionPass *createHexagonGenInsert();499500} // end namespace llvm501502namespace {503504class HexagonGenInsert : public MachineFunctionPass {505public:506static char ID;507508HexagonGenInsert() : MachineFunctionPass(ID) {509initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry());510}511512StringRef getPassName() const override {513return "Hexagon generate \"insert\" instructions";514}515516void getAnalysisUsage(AnalysisUsage &AU) const override {517AU.addRequired<MachineDominatorTreeWrapperPass>();518AU.addPreserved<MachineDominatorTreeWrapperPass>();519MachineFunctionPass::getAnalysisUsage(AU);520}521522bool runOnMachineFunction(MachineFunction &MF) override;523524private:525using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>;526527void buildOrderingMF(RegisterOrdering &RO) const;528void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;529bool isIntClass(const TargetRegisterClass *RC) const;530bool isConstant(unsigned VR) const;531bool isSmallConstant(unsigned VR) const;532bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,533uint16_t L, uint16_t S) const;534bool findSelfReference(unsigned VR) const;535bool findNonSelfReference(unsigned VR) const;536void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;537void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;538unsigned distance(const MachineBasicBlock *FromB,539const MachineBasicBlock *ToB, const UnsignedMap &RPO,540PairMapType &M) const;541unsigned distance(MachineBasicBlock::const_iterator FromI,542MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,543PairMapType &M) const;544bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);545void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);546void findRemovableRegisters(unsigned VR, IFRecord IF,547RegisterSet &RMs) const;548void computeRemovableRegisters();549550void pruneEmptyLists();551void pruneCoveredSets(unsigned VR);552void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);553void pruneRegCopies(unsigned VR);554void pruneCandidates();555void selectCandidates();556bool generateInserts();557558bool removeDeadCode(MachineDomTreeNode *N);559560// IFRecord coupled with a set of potentially removable registers:561using IFListType = std::vector<IFRecordWithRegSet>;562using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType563564void dump_map() const;565566const HexagonInstrInfo *HII = nullptr;567const HexagonRegisterInfo *HRI = nullptr;568569MachineFunction *MFN;570MachineRegisterInfo *MRI;571MachineDominatorTree *MDT;572CellMapShadow *CMS;573574RegisterOrdering BaseOrd;575RegisterOrdering CellOrd;576IFMapType IFMap;577};578579} // end anonymous namespace580581char HexagonGenInsert::ID = 0;582583void HexagonGenInsert::dump_map() const {584for (const auto &I : IFMap) {585dbgs() << " " << printReg(I.first, HRI) << ":\n";586const IFListType &LL = I.second;587for (const auto &J : LL)588dbgs() << " " << PrintIFR(J.first, HRI) << ", "589<< PrintRegSet(J.second, HRI) << '\n';590}591}592593void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {594unsigned Index = 0;595596for (const MachineBasicBlock &B : *MFN) {597if (!CMS->BT.reached(&B))598continue;599600for (const MachineInstr &MI : B) {601for (const MachineOperand &MO : MI.operands()) {602if (MO.isReg() && MO.isDef()) {603Register R = MO.getReg();604assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");605if (R.isVirtual())606RO.insert(std::make_pair(R, Index++));607}608}609}610}611// Since some virtual registers may have had their def and uses eliminated,612// they are no longer referenced in the code, and so they will not appear613// in the map.614}615616void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,617RegisterOrdering &RO) const {618// Create a vector of all virtual registers (collect them from the base619// ordering RB), and then sort it using the RegisterCell comparator.620BitValueOrdering BVO(RB);621RegisterCellLexCompare LexCmp(BVO, *CMS);622623using SortableVectorType = std::vector<unsigned>;624625SortableVectorType VRs;626for (auto &I : RB)627VRs.push_back(I.first);628llvm::sort(VRs, LexCmp);629// Transfer the results to the outgoing register ordering.630for (unsigned i = 0, n = VRs.size(); i < n; ++i)631RO.insert(std::make_pair(VRs[i], i));632}633634inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {635return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;636}637638bool HexagonGenInsert::isConstant(unsigned VR) const {639const BitTracker::RegisterCell &RC = CMS->lookup(VR);640uint16_t W = RC.width();641for (uint16_t i = 0; i < W; ++i) {642const BitTracker::BitValue &BV = RC[i];643if (BV.is(0) || BV.is(1))644continue;645return false;646}647return true;648}649650bool HexagonGenInsert::isSmallConstant(unsigned VR) const {651const BitTracker::RegisterCell &RC = CMS->lookup(VR);652uint16_t W = RC.width();653if (W > 64)654return false;655uint64_t V = 0, B = 1;656for (uint16_t i = 0; i < W; ++i) {657const BitTracker::BitValue &BV = RC[i];658if (BV.is(1))659V |= B;660else if (!BV.is(0))661return false;662B <<= 1;663}664665// For 32-bit registers, consider: Rd = #s16.666if (W == 32)667return isInt<16>(V);668669// For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)670return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));671}672673bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,674unsigned InsR, uint16_t L, uint16_t S) const {675const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);676const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);677const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);678// Only integet (32-/64-bit) register classes.679if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))680return false;681// The "source" register must be of the same class as DstR.682if (DstRC != SrcRC)683return false;684if (DstRC == InsRC)685return true;686// A 64-bit register can only be generated from other 64-bit registers.687if (DstRC == &Hexagon::DoubleRegsRegClass)688return false;689// Otherwise, the L and S cannot span 32-bit word boundary.690if (S < 32 && S+L > 32)691return false;692return true;693}694695bool HexagonGenInsert::findSelfReference(unsigned VR) const {696const BitTracker::RegisterCell &RC = CMS->lookup(VR);697for (uint16_t i = 0, w = RC.width(); i < w; ++i) {698const BitTracker::BitValue &V = RC[i];699if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)700return true;701}702return false;703}704705bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {706BitTracker::RegisterCell RC = CMS->lookup(VR);707for (uint16_t i = 0, w = RC.width(); i < w; ++i) {708const BitTracker::BitValue &V = RC[i];709if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)710return true;711}712return false;713}714715void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,716RegisterSet &Defs) const {717for (const MachineOperand &MO : MI->operands()) {718if (!MO.isReg() || !MO.isDef())719continue;720Register R = MO.getReg();721if (!R.isVirtual())722continue;723Defs.insert(R);724}725}726727void HexagonGenInsert::getInstrUses(const MachineInstr *MI,728RegisterSet &Uses) const {729for (const MachineOperand &MO : MI->operands()) {730if (!MO.isReg() || !MO.isUse())731continue;732Register R = MO.getReg();733if (!R.isVirtual())734continue;735Uses.insert(R);736}737}738739unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,740const MachineBasicBlock *ToB, const UnsignedMap &RPO,741PairMapType &M) const {742// Forward distance from the end of a block to the beginning of it does743// not make sense. This function should not be called with FromB == ToB.744assert(FromB != ToB);745746unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();747// If we have already computed it, return the cached result.748PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));749if (F != M.end())750return F->second;751unsigned ToRPO = RPO.lookup(ToN);752753unsigned MaxD = 0;754755for (const MachineBasicBlock *PB : ToB->predecessors()) {756// Skip back edges. Also, if FromB is a predecessor of ToB, the distance757// along that path will be 0, and we don't need to do any calculations758// on it.759if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)760continue;761unsigned D = PB->size() + distance(FromB, PB, RPO, M);762if (D > MaxD)763MaxD = D;764}765766// Memoize the result for later lookup.767M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));768return MaxD;769}770771unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,772MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,773PairMapType &M) const {774const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();775if (FB == TB)776return std::distance(FromI, ToI);777unsigned D1 = std::distance(TB->begin(), ToI);778unsigned D2 = distance(FB, TB, RPO, M);779unsigned D3 = std::distance(FromI, FB->end());780return D1+D2+D3;781}782783bool HexagonGenInsert::findRecordInsertForms(unsigned VR,784OrderedRegisterList &AVs) {785if (isDebug()) {786dbgs() << __func__ << ": " << printReg(VR, HRI)787<< " AVs: " << PrintORL(AVs, HRI) << "\n";788}789if (AVs.size() == 0)790return false;791792using iterator = OrderedRegisterList::iterator;793794BitValueOrdering BVO(BaseOrd);795const BitTracker::RegisterCell &RC = CMS->lookup(VR);796uint16_t W = RC.width();797798using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift)799using RSListType = std::vector<RSRecord>;800// Have a map, with key being the matching prefix length, and the value801// being the list of pairs (R,S), where R's prefix matches VR at S.802// (DenseMap<uint16_t,RSListType> fails to instantiate.)803using LRSMapType = DenseMap<unsigned, RSListType>;804LRSMapType LM;805806// Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,807// and find matching prefixes from AVs with the rotated RC. Such a prefix808// would match a string of bits (of length L) in RC starting at S.809for (uint16_t S = 0; S < W; ++S) {810iterator B = AVs.begin(), E = AVs.end();811// The registers in AVs are ordered according to the lexical order of812// the corresponding register cells. This means that the range of regis-813// ters in AVs that match a prefix of length L+1 will be contained in814// the range that matches a prefix of length L. This means that we can815// keep narrowing the search space as the prefix length goes up. This816// helps reduce the overall complexity of the search.817uint16_t L;818for (L = 0; L < W-S; ++L) {819// Compare against VR's bits starting at S, which emulates rotation820// of VR by S.821RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);822iterator NewB = std::lower_bound(B, E, VR, RCB);823iterator NewE = std::upper_bound(NewB, E, VR, RCB);824// For the registers that are eliminated from the next range, L is825// the longest prefix matching VR at position S (their prefixes826// differ from VR at S+L). If L>0, record this information for later827// use.828if (L > 0) {829for (iterator I = B; I != NewB; ++I)830LM[L].push_back(std::make_pair(*I, S));831for (iterator I = NewE; I != E; ++I)832LM[L].push_back(std::make_pair(*I, S));833}834B = NewB, E = NewE;835if (B == E)836break;837}838// Record the final register range. If this range is non-empty, then839// L=W-S.840assert(B == E || L == W-S);841if (B != E) {842for (iterator I = B; I != E; ++I)843LM[L].push_back(std::make_pair(*I, S));844// If B!=E, then we found a range of registers whose prefixes cover the845// rest of VR from position S. There is no need to further advance S.846break;847}848}849850if (isDebug()) {851dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n";852for (const auto &I : LM) {853dbgs() << " L=" << I.first << ':';854const RSListType &LL = I.second;855for (const auto &J : LL)856dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')';857dbgs() << '\n';858}859}860861bool Recorded = false;862863for (unsigned SrcR : AVs) {864int FDi = -1, LDi = -1; // First/last different bit.865const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);866uint16_t AW = AC.width();867for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {868if (RC[i] == AC[i])869continue;870if (FDi == -1)871FDi = i;872LDi = i;873}874if (FDi == -1)875continue; // TODO (future): Record identical registers.876// Look for a register whose prefix could patch the range [FD..LD]877// where VR and SrcR differ.878uint16_t FD = FDi, LD = LDi; // Switch to unsigned type.879uint16_t MinL = LD-FD+1;880for (uint16_t L = MinL; L < W; ++L) {881LRSMapType::iterator F = LM.find(L);882if (F == LM.end())883continue;884RSListType &LL = F->second;885for (const auto &I : LL) {886uint16_t S = I.second;887// MinL is the minimum length of the prefix. Any length above MinL888// allows some flexibility as to where the prefix can start:889// given the extra length EL=L-MinL, the prefix must start between890// max(0,FD-EL) and FD.891if (S > FD) // Starts too late.892continue;893uint16_t EL = L-MinL;894uint16_t LowS = (EL < FD) ? FD-EL : 0;895if (S < LowS) // Starts too early.896continue;897unsigned InsR = I.first;898if (!isValidInsertForm(VR, SrcR, InsR, L, S))899continue;900if (isDebug()) {901dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI)902<< ',' << printReg(InsR, HRI) << ",#" << L << ",#"903<< S << ")\n";904}905IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());906IFMap[VR].push_back(RR);907Recorded = true;908}909}910}911912return Recorded;913}914915void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,916OrderedRegisterList &AVs) {917if (isDebug())918dbgs() << "visiting block " << printMBBReference(*B) << "\n";919920// First, check if this block is reachable at all. If not, the bit tracker921// will not have any information about registers in it.922if (!CMS->BT.reached(B))923return;924925bool DoConst = OptConst;926// Keep a separate set of registers defined in this block, so that we927// can remove them from the list of available registers once all DT928// successors have been processed.929RegisterSet BlockDefs, InsDefs;930for (MachineInstr &MI : *B) {931InsDefs.clear();932getInstrDefs(&MI, InsDefs);933// Leave those alone. They are more transparent than "insert".934bool Skip = MI.isCopy() || MI.isRegSequence();935936if (!Skip) {937// Visit all defined registers, and attempt to find the corresponding938// "insert" representations.939for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {940// Do not collect registers that are known to be compile-time cons-941// tants, unless requested.942if (!DoConst && isConstant(VR))943continue;944// If VR's cell contains a reference to VR, then VR cannot be defined945// via "insert". If VR is a constant that can be generated in a single946// instruction (without constant extenders), generating it via insert947// makes no sense.948if (findSelfReference(VR) || isSmallConstant(VR))949continue;950951findRecordInsertForms(VR, AVs);952// Stop if the map size is too large.953if (IFMap.size() > MaxIFMSize)954return;955}956}957958// Insert the defined registers into the list of available registers959// after they have been processed.960for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))961AVs.insert(VR);962BlockDefs.insert(InsDefs);963}964965for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {966MachineBasicBlock *SB = DTN->getBlock();967collectInBlock(SB, AVs);968}969970for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))971AVs.remove(VR);972}973974void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,975RegisterSet &RMs) const {976// For a given register VR and a insert form, find the registers that are977// used by the current definition of VR, and which would no longer be978// needed for it after the definition of VR is replaced with the insert979// form. These are the registers that could potentially become dead.980RegisterSet Regs[2];981982unsigned S = 0; // Register set selector.983Regs[S].insert(VR);984985while (!Regs[S].empty()) {986// Breadth-first search.987unsigned OtherS = 1-S;988Regs[OtherS].clear();989for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {990Regs[S].remove(R);991if (R == IF.SrcR || R == IF.InsR)992continue;993// Check if a given register has bits that are references to any other994// registers. This is to detect situations where the instruction that995// defines register R takes register Q as an operand, but R itself does996// not contain any bits from Q. Loads are examples of how this could997// happen:998// R = load Q999// In this case (assuming we do not have any knowledge about the loaded1000// value), we must not treat R as a "conveyance" of the bits from Q.1001// (The information in BT about R's bits would have them as constants,1002// in case of zero-extending loads, or refs to R.)1003if (!findNonSelfReference(R))1004continue;1005RMs.insert(R);1006const MachineInstr *DefI = MRI->getVRegDef(R);1007assert(DefI);1008// Do not iterate past PHI nodes to avoid infinite loops. This can1009// make the final set a bit less accurate, but the removable register1010// sets are an approximation anyway.1011if (DefI->isPHI())1012continue;1013getInstrUses(DefI, Regs[OtherS]);1014}1015S = OtherS;1016}1017// The register VR is added to the list as a side-effect of the algorithm,1018// but it is not "potentially removable". A potentially removable register1019// is one that may become unused (dead) after conversion to the insert form1020// IF, and obviously VR (or its replacement) will not become dead by apply-1021// ing IF.1022RMs.remove(VR);1023}10241025void HexagonGenInsert::computeRemovableRegisters() {1026for (auto &I : IFMap) {1027IFListType &LL = I.second;1028for (auto &J : LL)1029findRemovableRegisters(I.first, J.first, J.second);1030}1031}10321033void HexagonGenInsert::pruneEmptyLists() {1034// Remove all entries from the map, where the register has no insert forms1035// associated with it.1036using IterListType = SmallVector<IFMapType::iterator, 16>;1037IterListType Prune;1038for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {1039if (I->second.empty())1040Prune.push_back(I);1041}1042for (const auto &It : Prune)1043IFMap.erase(It);1044}10451046void HexagonGenInsert::pruneCoveredSets(unsigned VR) {1047IFMapType::iterator F = IFMap.find(VR);1048assert(F != IFMap.end());1049IFListType &LL = F->second;10501051// First, examine the IF candidates for register VR whose removable-regis-1052// ter sets are empty. This means that a given candidate will not help eli-1053// minate any registers, but since "insert" is not a constant-extendable1054// instruction, using such a candidate may reduce code size if the defini-1055// tion of VR is constant-extended.1056// If there exists a candidate with a non-empty set, the ones with empty1057// sets will not be used and can be removed.1058MachineInstr *DefVR = MRI->getVRegDef(VR);1059bool DefEx = HII->isConstExtended(*DefVR);1060bool HasNE = false;1061for (const auto &I : LL) {1062if (I.second.empty())1063continue;1064HasNE = true;1065break;1066}1067if (!DefEx || HasNE) {1068// The definition of VR is not constant-extended, or there is a candidate1069// with a non-empty set. Remove all candidates with empty sets.1070auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {1071return IR.second.empty();1072};1073llvm::erase_if(LL, IsEmpty);1074} else {1075// The definition of VR is constant-extended, and all candidates have1076// empty removable-register sets. Pick the maximum candidate, and remove1077// all others. The "maximum" does not have any special meaning here, it1078// is only so that the candidate that will remain on the list is selec-1079// ted deterministically.1080IFRecord MaxIF = LL[0].first;1081for (unsigned i = 1, n = LL.size(); i < n; ++i) {1082// If LL[MaxI] < LL[i], then MaxI = i.1083const IFRecord &IF = LL[i].first;1084unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];1085unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];1086if (M0 > R0)1087continue;1088if (M0 == R0) {1089if (M1 > R1)1090continue;1091if (M1 == R1) {1092if (MaxIF.Wdh > IF.Wdh)1093continue;1094if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)1095continue;1096}1097}1098// MaxIF < IF.1099MaxIF = IF;1100}1101// Remove everything except the maximum candidate. All register sets1102// are empty, so no need to preserve anything.1103LL.clear();1104LL.push_back(std::make_pair(MaxIF, RegisterSet()));1105}11061107// Now, remove those whose sets of potentially removable registers are1108// contained in another IF candidate for VR. For example, given these1109// candidates for %45,1110// %45:1111// (%44,%41,#9,#8), { %42 }1112// (%43,%41,#9,#8), { %42 %44 }1113// remove the first one, since it is contained in the second one.1114for (unsigned i = 0, n = LL.size(); i < n; ) {1115const RegisterSet &RMi = LL[i].second;1116unsigned j = 0;1117while (j < n) {1118if (j != i && LL[j].second.includes(RMi))1119break;1120j++;1121}1122if (j == n) { // RMi not contained in anything else.1123i++;1124continue;1125}1126LL.erase(LL.begin()+i);1127n = LL.size();1128}1129}11301131void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,1132PairMapType &M) {1133IFMapType::iterator F = IFMap.find(VR);1134assert(F != IFMap.end());1135IFListType &LL = F->second;1136unsigned Cutoff = VRegDistCutoff;1137const MachineInstr *DefV = MRI->getVRegDef(VR);11381139for (unsigned i = LL.size(); i > 0; --i) {1140unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;1141const MachineInstr *DefS = MRI->getVRegDef(SR);1142const MachineInstr *DefI = MRI->getVRegDef(IR);1143unsigned DSV = distance(DefS, DefV, RPO, M);1144if (DSV < Cutoff) {1145unsigned DIV = distance(DefI, DefV, RPO, M);1146if (DIV < Cutoff)1147continue;1148}1149LL.erase(LL.begin()+(i-1));1150}1151}11521153void HexagonGenInsert::pruneRegCopies(unsigned VR) {1154IFMapType::iterator F = IFMap.find(VR);1155assert(F != IFMap.end());1156IFListType &LL = F->second;11571158auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {1159return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);1160};1161llvm::erase_if(LL, IsCopy);1162}11631164void HexagonGenInsert::pruneCandidates() {1165// Remove candidates that are not beneficial, regardless of the final1166// selection method.1167// First, remove candidates whose potentially removable set is a subset1168// of another candidate's set.1169for (const auto &I : IFMap)1170pruneCoveredSets(I.first);11711172UnsignedMap RPO;11731174using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;11751176RPOTType RPOT(MFN);1177unsigned RPON = 0;1178for (const auto &I : RPOT)1179RPO[I->getNumber()] = RPON++;11801181PairMapType Memo; // Memoization map for distance calculation.1182// Remove candidates that would use registers defined too far away.1183for (const auto &I : IFMap)1184pruneUsesTooFar(I.first, RPO, Memo);11851186pruneEmptyLists();11871188for (const auto &I : IFMap)1189pruneRegCopies(I.first);1190}11911192namespace {11931194// Class for comparing IF candidates for registers that have multiple of1195// them. The smaller the candidate, according to this ordering, the better.1196// First, compare the number of zeros in the associated potentially remova-1197// ble register sets. "Zero" indicates that the register is very likely to1198// become dead after this transformation.1199// Second, compare "averages", i.e. use-count per size. The lower wins.1200// After that, it does not really matter which one is smaller. Resolve1201// the tie in some deterministic way.1202struct IFOrdering {1203IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)1204: UseC(UC), BaseOrd(BO) {}12051206bool operator() (const IFRecordWithRegSet &A,1207const IFRecordWithRegSet &B) const;12081209private:1210void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,1211unsigned &Sum) const;12121213const UnsignedMap &UseC;1214const RegisterOrdering &BaseOrd;1215};12161217} // end anonymous namespace12181219bool IFOrdering::operator() (const IFRecordWithRegSet &A,1220const IFRecordWithRegSet &B) const {1221unsigned SizeA = 0, ZeroA = 0, SumA = 0;1222unsigned SizeB = 0, ZeroB = 0, SumB = 0;1223stats(A.second, SizeA, ZeroA, SumA);1224stats(B.second, SizeB, ZeroB, SumB);12251226// We will pick the minimum element. The more zeros, the better.1227if (ZeroA != ZeroB)1228return ZeroA > ZeroB;1229// Compare SumA/SizeA with SumB/SizeB, lower is better.1230uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;1231if (AvgA != AvgB)1232return AvgA < AvgB;12331234// The sets compare identical so far. Resort to comparing the IF records.1235// The actual values don't matter, this is only for determinism.1236unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];1237if (OSA != OSB)1238return OSA < OSB;1239unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];1240if (OIA != OIB)1241return OIA < OIB;1242if (A.first.Wdh != B.first.Wdh)1243return A.first.Wdh < B.first.Wdh;1244return A.first.Off < B.first.Off;1245}12461247void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,1248unsigned &Sum) const {1249for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {1250UnsignedMap::const_iterator F = UseC.find(R);1251assert(F != UseC.end());1252unsigned UC = F->second;1253if (UC == 0)1254Zero++;1255Sum += UC;1256Size++;1257}1258}12591260void HexagonGenInsert::selectCandidates() {1261// Some registers may have multiple valid candidates. Pick the best one1262// (or decide not to use any).12631264// Compute the "removability" measure of R:1265// For each potentially removable register R, record the number of regis-1266// ters with IF candidates, where R appears in at least one set.1267RegisterSet AllRMs;1268UnsignedMap UseC, RemC;1269IFMapType::iterator End = IFMap.end();12701271for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {1272const IFListType &LL = I->second;1273RegisterSet TT;1274for (const auto &J : LL)1275TT.insert(J.second);1276for (unsigned R = TT.find_first(); R; R = TT.find_next(R))1277RemC[R]++;1278AllRMs.insert(TT);1279}12801281for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {1282using use_iterator = MachineRegisterInfo::use_nodbg_iterator;1283using InstrSet = SmallSet<const MachineInstr *, 16>;12841285InstrSet UIs;1286// Count as the number of instructions in which R is used, not the1287// number of operands.1288use_iterator E = MRI->use_nodbg_end();1289for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)1290UIs.insert(I->getParent());1291unsigned C = UIs.size();1292// Calculate a measure, which is the number of instructions using R,1293// minus the "removability" count computed earlier.1294unsigned D = RemC[R];1295UseC[R] = (C > D) ? C-D : 0; // doz1296}12971298bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;1299if (!SelectAll0 && !SelectHas0)1300SelectAll0 = true;13011302// The smaller the number UseC for a given register R, the "less used"1303// R is aside from the opportunities for removal offered by generating1304// "insert" instructions.1305// Iterate over the IF map, and for those registers that have multiple1306// candidates, pick the minimum one according to IFOrdering.1307IFOrdering IFO(UseC, BaseOrd);1308for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {1309IFListType &LL = I->second;1310if (LL.empty())1311continue;1312// Get the minimum element, remember it and clear the list. If the1313// element found is adequate, we will put it back on the list, other-1314// wise the list will remain empty, and the entry for this register1315// will be removed (i.e. this register will not be replaced by insert).1316IFListType::iterator MinI = llvm::min_element(LL, IFO);1317assert(MinI != LL.end());1318IFRecordWithRegSet M = *MinI;1319LL.clear();13201321// We want to make sure that this replacement will have a chance to be1322// beneficial, and that means that we want to have indication that some1323// register will be removed. The most likely registers to be eliminated1324// are the use operands in the definition of I->first. Accept/reject a1325// candidate based on how many of its uses it can potentially eliminate.13261327RegisterSet Us;1328const MachineInstr *DefI = MRI->getVRegDef(I->first);1329getInstrUses(DefI, Us);1330bool Accept = false;13311332if (SelectAll0) {1333bool All0 = true;1334for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {1335if (UseC[R] == 0)1336continue;1337All0 = false;1338break;1339}1340Accept = All0;1341} else if (SelectHas0) {1342bool Has0 = false;1343for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {1344if (UseC[R] != 0)1345continue;1346Has0 = true;1347break;1348}1349Accept = Has0;1350}1351if (Accept)1352LL.push_back(M);1353}13541355// Remove candidates that add uses of removable registers, unless the1356// removable registers are among replacement candidates.1357// Recompute the removable registers, since some candidates may have1358// been eliminated.1359AllRMs.clear();1360for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {1361const IFListType &LL = I->second;1362if (!LL.empty())1363AllRMs.insert(LL[0].second);1364}1365for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {1366IFListType &LL = I->second;1367if (LL.empty())1368continue;1369unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;1370if (AllRMs[SR] || AllRMs[IR])1371LL.clear();1372}13731374pruneEmptyLists();1375}13761377bool HexagonGenInsert::generateInserts() {1378// Create a new register for each one from IFMap, and store them in the1379// map.1380UnsignedMap RegMap;1381for (auto &I : IFMap) {1382unsigned VR = I.first;1383const TargetRegisterClass *RC = MRI->getRegClass(VR);1384Register NewVR = MRI->createVirtualRegister(RC);1385RegMap[VR] = NewVR;1386}13871388// We can generate the "insert" instructions using potentially stale re-1389// gisters: SrcR and InsR for a given VR may be among other registers that1390// are also replaced. This is fine, we will do the mass "rauw" a bit later.1391for (auto &I : IFMap) {1392MachineInstr *MI = MRI->getVRegDef(I.first);1393MachineBasicBlock &B = *MI->getParent();1394DebugLoc DL = MI->getDebugLoc();1395unsigned NewR = RegMap[I.first];1396bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;1397const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)1398: HII->get(Hexagon::S2_insertp);1399IFRecord IF = I.second[0].first;1400unsigned Wdh = IF.Wdh, Off = IF.Off;1401unsigned InsS = 0;1402if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {1403InsS = Hexagon::isub_lo;1404if (Off >= 32) {1405InsS = Hexagon::isub_hi;1406Off -= 32;1407}1408}1409// Advance to the proper location for inserting instructions. This could1410// be B.end().1411MachineBasicBlock::iterator At = MI;1412if (MI->isPHI())1413At = B.getFirstNonPHI();14141415BuildMI(B, At, DL, D, NewR)1416.addReg(IF.SrcR)1417.addReg(IF.InsR, 0, InsS)1418.addImm(Wdh)1419.addImm(Off);14201421MRI->clearKillFlags(IF.SrcR);1422MRI->clearKillFlags(IF.InsR);1423}14241425for (const auto &I : IFMap) {1426MachineInstr *DefI = MRI->getVRegDef(I.first);1427MRI->replaceRegWith(I.first, RegMap[I.first]);1428DefI->eraseFromParent();1429}14301431return true;1432}14331434bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {1435bool Changed = false;14361437for (auto *DTN : children<MachineDomTreeNode*>(N))1438Changed |= removeDeadCode(DTN);14391440MachineBasicBlock *B = N->getBlock();1441std::vector<MachineInstr*> Instrs;1442for (MachineInstr &MI : llvm::reverse(*B))1443Instrs.push_back(&MI);14441445for (MachineInstr *MI : Instrs) {1446unsigned Opc = MI->getOpcode();1447// Do not touch lifetime markers. This is why the target-independent DCE1448// cannot be used.1449if (Opc == TargetOpcode::LIFETIME_START ||1450Opc == TargetOpcode::LIFETIME_END)1451continue;1452bool Store = false;1453if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store))1454continue;14551456bool AllDead = true;1457SmallVector<unsigned,2> Regs;1458for (const MachineOperand &MO : MI->operands()) {1459if (!MO.isReg() || !MO.isDef())1460continue;1461Register R = MO.getReg();1462if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) {1463AllDead = false;1464break;1465}1466Regs.push_back(R);1467}1468if (!AllDead)1469continue;14701471B->erase(MI);1472for (unsigned Reg : Regs)1473MRI->markUsesInDebugValueAsUndef(Reg);1474Changed = true;1475}14761477return Changed;1478}14791480bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {1481if (skipFunction(MF.getFunction()))1482return false;14831484bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;1485bool Changed = false;14861487// Verify: one, but not both.1488assert(!OptSelectAll0 || !OptSelectHas0);14891490IFMap.clear();1491BaseOrd.clear();1492CellOrd.clear();14931494const auto &ST = MF.getSubtarget<HexagonSubtarget>();1495HII = ST.getInstrInfo();1496HRI = ST.getRegisterInfo();1497MFN = &MF;1498MRI = &MF.getRegInfo();1499MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();15001501// Clean up before any further processing, so that dead code does not1502// get used in a newly generated "insert" instruction. Have a custom1503// version of DCE that preserves lifetime markers. Without it, merging1504// of stack objects can fail to recognize and merge disjoint objects1505// leading to unnecessary stack growth.1506Changed = removeDeadCode(MDT->getRootNode());15071508const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);1509BitTracker BTLoc(HE, MF);1510BTLoc.trace(isDebug());1511BTLoc.run();1512CellMapShadow MS(BTLoc);1513CMS = &MS;15141515buildOrderingMF(BaseOrd);1516buildOrderingBT(BaseOrd, CellOrd);15171518if (isDebug()) {1519dbgs() << "Cell ordering:\n";1520for (const auto &I : CellOrd) {1521unsigned VR = I.first, Pos = I.second;1522dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n";1523}1524}15251526// Collect candidates for conversion into the insert forms.1527MachineBasicBlock *RootB = MDT->getRoot();1528OrderedRegisterList AvailR(CellOrd);15291530const char *const TGName = "hexinsert";1531const char *const TGDesc = "Generate Insert Instructions";15321533{1534NamedRegionTimer _T("collection", "collection", TGName, TGDesc,1535TimingDetail);1536collectInBlock(RootB, AvailR);1537// Complete the information gathered in IFMap.1538computeRemovableRegisters();1539}15401541if (isDebug()) {1542dbgs() << "Candidates after collection:\n";1543dump_map();1544}15451546if (IFMap.empty())1547return Changed;15481549{1550NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail);1551pruneCandidates();1552}15531554if (isDebug()) {1555dbgs() << "Candidates after pruning:\n";1556dump_map();1557}15581559if (IFMap.empty())1560return Changed;15611562{1563NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail);1564selectCandidates();1565}15661567if (isDebug()) {1568dbgs() << "Candidates after selection:\n";1569dump_map();1570}15711572// Filter out vregs beyond the cutoff.1573if (VRegIndexCutoff.getPosition()) {1574unsigned Cutoff = VRegIndexCutoff;15751576using IterListType = SmallVector<IFMapType::iterator, 16>;15771578IterListType Out;1579for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {1580unsigned Idx = Register::virtReg2Index(I->first);1581if (Idx >= Cutoff)1582Out.push_back(I);1583}1584for (const auto &It : Out)1585IFMap.erase(It);1586}1587if (IFMap.empty())1588return Changed;15891590{1591NamedRegionTimer _T("generation", "generation", TGName, TGDesc,1592TimingDetail);1593generateInserts();1594}15951596return true;1597}15981599FunctionPass *llvm::createHexagonGenInsert() {1600return new HexagonGenInsert();1601}16021603//===----------------------------------------------------------------------===//1604// Public Constructor Functions1605//===----------------------------------------------------------------------===//16061607INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",1608"Hexagon generate \"insert\" instructions", false, false)1609INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)1610INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",1611"Hexagon generate \"insert\" instructions", false, false)161216131614