Path: blob/main/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
35294 views
//===- HexagonBitSimplify.cpp ---------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "BitTracker.h"9#include "HexagonBitTracker.h"10#include "HexagonInstrInfo.h"11#include "HexagonRegisterInfo.h"12#include "HexagonSubtarget.h"13#include "llvm/ADT/BitVector.h"14#include "llvm/ADT/DenseMap.h"15#include "llvm/ADT/GraphTraits.h"16#include "llvm/ADT/STLExtras.h"17#include "llvm/ADT/SmallVector.h"18#include "llvm/ADT/StringRef.h"19#include "llvm/CodeGen/MachineBasicBlock.h"20#include "llvm/CodeGen/MachineDominators.h"21#include "llvm/CodeGen/MachineFunction.h"22#include "llvm/CodeGen/MachineFunctionPass.h"23#include "llvm/CodeGen/MachineInstr.h"24#include "llvm/CodeGen/MachineInstrBuilder.h"25#include "llvm/CodeGen/MachineOperand.h"26#include "llvm/CodeGen/MachineRegisterInfo.h"27#include "llvm/CodeGen/TargetRegisterInfo.h"28#include "llvm/IR/DebugLoc.h"29#include "llvm/InitializePasses.h"30#include "llvm/MC/MCInstrDesc.h"31#include "llvm/Pass.h"32#include "llvm/Support/CommandLine.h"33#include "llvm/Support/Compiler.h"34#include "llvm/Support/Debug.h"35#include "llvm/Support/ErrorHandling.h"36#include "llvm/Support/MathExtras.h"37#include "llvm/Support/raw_ostream.h"38#include <algorithm>39#include <cassert>40#include <cstdint>41#include <deque>42#include <iterator>43#include <limits>44#include <utility>45#include <vector>4647#define DEBUG_TYPE "hexbit"4849using namespace llvm;5051static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,52cl::init(true), cl::desc("Preserve subregisters in tied operands"));53static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden,54cl::init(true), cl::desc("Generate extract instructions"));55static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden,56cl::init(true), cl::desc("Generate bitsplit instructions"));5758static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden,59cl::init(std::numeric_limits<unsigned>::max()));60static unsigned CountExtract = 0;61static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden,62cl::init(std::numeric_limits<unsigned>::max()));63static unsigned CountBitSplit = 0;6465static cl::opt<unsigned> RegisterSetLimit("hexbit-registerset-limit",66cl::Hidden, cl::init(1000));6768namespace llvm {6970void initializeHexagonBitSimplifyPass(PassRegistry& Registry);71FunctionPass *createHexagonBitSimplify();7273} // end namespace llvm7475namespace {7677// Set of virtual registers, based on BitVector.78struct RegisterSet {79RegisterSet() = default;80explicit RegisterSet(unsigned s, bool t = false) : Bits(s, t) {}81RegisterSet(const RegisterSet &RS) = default;8283void clear() {84Bits.clear();85LRU.clear();86}8788unsigned count() const {89return Bits.count();90}9192unsigned find_first() const {93int First = Bits.find_first();94if (First < 0)95return 0;96return x2v(First);97}9899unsigned find_next(unsigned Prev) const {100int Next = Bits.find_next(v2x(Prev));101if (Next < 0)102return 0;103return x2v(Next);104}105106RegisterSet &insert(unsigned R) {107unsigned Idx = v2x(R);108ensure(Idx);109bool Exists = Bits.test(Idx);110Bits.set(Idx);111if (!Exists) {112LRU.push_back(Idx);113if (LRU.size() > RegisterSetLimit) {114unsigned T = LRU.front();115Bits.reset(T);116LRU.pop_front();117}118}119return *this;120}121RegisterSet &remove(unsigned R) {122unsigned Idx = v2x(R);123if (Idx < Bits.size()) {124bool Exists = Bits.test(Idx);125Bits.reset(Idx);126if (Exists) {127auto F = llvm::find(LRU, Idx);128assert(F != LRU.end());129LRU.erase(F);130}131}132return *this;133}134135RegisterSet &insert(const RegisterSet &Rs) {136for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))137insert(R);138return *this;139}140RegisterSet &remove(const RegisterSet &Rs) {141for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R))142remove(R);143return *this;144}145146bool operator[](unsigned R) const {147unsigned Idx = v2x(R);148return Idx < Bits.size() ? Bits[Idx] : false;149}150bool has(unsigned R) const {151unsigned Idx = v2x(R);152if (Idx >= Bits.size())153return false;154return Bits.test(Idx);155}156157bool empty() const {158return !Bits.any();159}160bool includes(const RegisterSet &Rs) const {161// A.test(B) <=> A-B != {}162return !Rs.Bits.test(Bits);163}164bool intersects(const RegisterSet &Rs) const {165return Bits.anyCommon(Rs.Bits);166}167168private:169BitVector Bits;170std::deque<unsigned> LRU;171172void ensure(unsigned Idx) {173if (Bits.size() <= Idx)174Bits.resize(std::max(Idx+1, 32U));175}176177static inline unsigned v2x(unsigned v) {178return Register::virtReg2Index(v);179}180181static inline unsigned x2v(unsigned x) {182return Register::index2VirtReg(x);183}184};185186struct PrintRegSet {187PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)188: RS(S), TRI(RI) {}189190friend raw_ostream &operator<< (raw_ostream &OS,191const PrintRegSet &P);192193private:194const RegisterSet &RS;195const TargetRegisterInfo *TRI;196};197198raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)199LLVM_ATTRIBUTE_UNUSED;200raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {201OS << '{';202for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))203OS << ' ' << printReg(R, P.TRI);204OS << " }";205return OS;206}207208class Transformation;209210class HexagonBitSimplify : public MachineFunctionPass {211public:212static char ID;213214HexagonBitSimplify() : MachineFunctionPass(ID) {}215216StringRef getPassName() const override {217return "Hexagon bit simplification";218}219220void getAnalysisUsage(AnalysisUsage &AU) const override {221AU.addRequired<MachineDominatorTreeWrapperPass>();222AU.addPreserved<MachineDominatorTreeWrapperPass>();223MachineFunctionPass::getAnalysisUsage(AU);224}225226bool runOnMachineFunction(MachineFunction &MF) override;227228static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);229static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);230static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,231const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);232static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,233uint16_t W);234static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,235uint16_t W, uint64_t &U);236static bool replaceReg(Register OldR, Register NewR,237MachineRegisterInfo &MRI);238static bool getSubregMask(const BitTracker::RegisterRef &RR,239unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);240static bool replaceRegWithSub(Register OldR, Register NewR, unsigned NewSR,241MachineRegisterInfo &MRI);242static bool replaceSubWithSub(Register OldR, unsigned OldSR, Register NewR,243unsigned NewSR, MachineRegisterInfo &MRI);244static bool parseRegSequence(const MachineInstr &I,245BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH,246const MachineRegisterInfo &MRI);247248static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,249uint16_t Begin);250static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,251uint16_t Begin, const HexagonInstrInfo &HII);252253static const TargetRegisterClass *getFinalVRegClass(254const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);255static bool isTransparentCopy(const BitTracker::RegisterRef &RD,256const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);257258private:259MachineDominatorTree *MDT = nullptr;260261bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);262static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,263unsigned NewSub = Hexagon::NoSubRegister);264};265266using HBS = HexagonBitSimplify;267268// The purpose of this class is to provide a common facility to traverse269// the function top-down or bottom-up via the dominator tree, and keep270// track of the available registers.271class Transformation {272public:273bool TopDown;274275Transformation(bool TD) : TopDown(TD) {}276virtual ~Transformation() = default;277278virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;279};280281} // end anonymous namespace282283char HexagonBitSimplify::ID = 0;284285INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify",286"Hexagon bit simplification", false, false)287INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)288INITIALIZE_PASS_END(HexagonBitSimplify, "hexagon-bit-simplify",289"Hexagon bit simplification", false, false)290291bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,292RegisterSet &AVs) {293bool Changed = false;294295if (T.TopDown)296Changed = T.processBlock(B, AVs);297298RegisterSet Defs;299for (auto &I : B)300getInstrDefs(I, Defs);301RegisterSet NewAVs = AVs;302NewAVs.insert(Defs);303304for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B)))305Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs);306307if (!T.TopDown)308Changed |= T.processBlock(B, AVs);309310return Changed;311}312313//314// Utility functions:315//316void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,317RegisterSet &Defs) {318for (auto &Op : MI.operands()) {319if (!Op.isReg() || !Op.isDef())320continue;321Register R = Op.getReg();322if (!R.isVirtual())323continue;324Defs.insert(R);325}326}327328void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,329RegisterSet &Uses) {330for (auto &Op : MI.operands()) {331if (!Op.isReg() || !Op.isUse())332continue;333Register R = Op.getReg();334if (!R.isVirtual())335continue;336Uses.insert(R);337}338}339340// Check if all the bits in range [B, E) in both cells are equal.341bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,342uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,343uint16_t W) {344for (uint16_t i = 0; i < W; ++i) {345// If RC1[i] is "bottom", it cannot be proven equal to RC2[i].346if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)347return false;348// Same for RC2[i].349if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)350return false;351if (RC1[B1+i] != RC2[B2+i])352return false;353}354return true;355}356357bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,358uint16_t B, uint16_t W) {359assert(B < RC.width() && B+W <= RC.width());360for (uint16_t i = B; i < B+W; ++i)361if (!RC[i].is(0))362return false;363return true;364}365366bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,367uint16_t B, uint16_t W, uint64_t &U) {368assert(B < RC.width() && B+W <= RC.width());369int64_t T = 0;370for (uint16_t i = B+W; i > B; --i) {371const BitTracker::BitValue &BV = RC[i-1];372T <<= 1;373if (BV.is(1))374T |= 1;375else if (!BV.is(0))376return false;377}378U = T;379return true;380}381382bool HexagonBitSimplify::replaceReg(Register OldR, Register NewR,383MachineRegisterInfo &MRI) {384if (!OldR.isVirtual() || !NewR.isVirtual())385return false;386auto Begin = MRI.use_begin(OldR), End = MRI.use_end();387decltype(End) NextI;388for (auto I = Begin; I != End; I = NextI) {389NextI = std::next(I);390I->setReg(NewR);391}392return Begin != End;393}394395bool HexagonBitSimplify::replaceRegWithSub(Register OldR, Register NewR,396unsigned NewSR,397MachineRegisterInfo &MRI) {398if (!OldR.isVirtual() || !NewR.isVirtual())399return false;400if (hasTiedUse(OldR, MRI, NewSR))401return false;402auto Begin = MRI.use_begin(OldR), End = MRI.use_end();403decltype(End) NextI;404for (auto I = Begin; I != End; I = NextI) {405NextI = std::next(I);406I->setReg(NewR);407I->setSubReg(NewSR);408}409return Begin != End;410}411412bool HexagonBitSimplify::replaceSubWithSub(Register OldR, unsigned OldSR,413Register NewR, unsigned NewSR,414MachineRegisterInfo &MRI) {415if (!OldR.isVirtual() || !NewR.isVirtual())416return false;417if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))418return false;419auto Begin = MRI.use_begin(OldR), End = MRI.use_end();420decltype(End) NextI;421for (auto I = Begin; I != End; I = NextI) {422NextI = std::next(I);423if (I->getSubReg() != OldSR)424continue;425I->setReg(NewR);426I->setSubReg(NewSR);427}428return Begin != End;429}430431// For a register ref (pair Reg:Sub), set Begin to the position of the LSB432// of Sub in Reg, and set Width to the size of Sub in bits. Return true,433// if this succeeded, otherwise return false.434bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,435unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {436const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);437if (RR.Sub == 0) {438Begin = 0;439Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC);440return true;441}442443Begin = 0;444445switch (RC->getID()) {446case Hexagon::DoubleRegsRegClassID:447case Hexagon::HvxWRRegClassID:448Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2;449if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi)450Begin = Width;451break;452default:453return false;454}455return true;456}457458459// For a REG_SEQUENCE, set SL to the low subregister and SH to the high460// subregister.461bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,462BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH,463const MachineRegisterInfo &MRI) {464assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);465unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();466auto &DstRC = *MRI.getRegClass(I.getOperand(0).getReg());467auto &HRI = static_cast<const HexagonRegisterInfo&>(468*MRI.getTargetRegisterInfo());469unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo);470unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi);471assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo));472if (Sub1 == SubLo && Sub2 == SubHi) {473SL = I.getOperand(1);474SH = I.getOperand(3);475return true;476}477if (Sub1 == SubHi && Sub2 == SubLo) {478SH = I.getOperand(1);479SL = I.getOperand(3);480return true;481}482return false;483}484485// All stores (except 64-bit stores) take a 32-bit register as the source486// of the value to be stored. If the instruction stores into a location487// that is shorter than 32 bits, some bits of the source register are not488// used. For each store instruction, calculate the set of used bits in489// the source register, and set appropriate bits in Bits. Return true if490// the bits are calculated, false otherwise.491bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,492uint16_t Begin) {493using namespace Hexagon;494495switch (Opc) {496// Store byte497case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32498case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new499case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32500case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32501case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32502case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32503case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new504case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new505case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new506case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new507case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32508case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new509case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32510case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32511case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32512case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32513case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new514case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new515case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new516case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new517case S4_storerb_ap: // memb(Re32=#U6)=Rt32518case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new519case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32520case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new521case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32522case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new523case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32524case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new525case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32526case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new527case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32528case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new529case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32530case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new531case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32532case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32533case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32534case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32535case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new536case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new537case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new538case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new539case S2_storerbgp: // memb(gp+#u16:0)=Rt32540case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new541case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32542case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32543case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32544case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32545case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new546case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new547case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new548case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new549Bits.set(Begin, Begin+8);550return true;551552// Store low half553case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32554case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new555case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32556case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32557case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32558case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32559case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new560case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new561case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new562case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new563case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32564case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new565case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32566case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32567case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32568case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32569case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new570case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new571case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new572case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new573case S4_storerh_ap: // memh(Re32=#U6)=Rt32574case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new575case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32576case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new577case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32578case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new579case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32580case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new581case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32582case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new583case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32584case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new585case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32586case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32587case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32588case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32589case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32590case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new591case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new592case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new593case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new594case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new595case S2_storerhgp: // memh(gp+#u16:1)=Rt32596case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new597case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32598case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32599case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32600case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32601case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new602case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new603case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new604case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new605Bits.set(Begin, Begin+16);606return true;607608// Store high half609case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32610case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32611case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32612case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32613case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32614case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32615case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32616case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32617case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32618case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32619case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32620case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32621case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32622case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32623case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32624case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32625case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32626case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32627case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32628case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32629case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32630case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32631case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32632case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32633case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32634case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32635Bits.set(Begin+16, Begin+32);636return true;637}638639return false;640}641642// For an instruction with opcode Opc, calculate the set of bits that it643// uses in a register in operand OpN. This only calculates the set of used644// bits for cases where it does not depend on any operands (as is the case645// in shifts, for example). For concrete instructions from a program, the646// operand may be a subregister of a larger register, while Bits would647// correspond to the larger register in its entirety. Because of that,648// the parameter Begin can be used to indicate which bit of Bits should be649// considered the LSB of the operand.650bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,651BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {652using namespace Hexagon;653654const MCInstrDesc &D = HII.get(Opc);655if (D.mayStore()) {656if (OpN == D.getNumOperands()-1)657return getUsedBitsInStore(Opc, Bits, Begin);658return false;659}660661switch (Opc) {662// One register source. Used bits: R1[0-7].663case A2_sxtb:664case A2_zxtb:665case A4_cmpbeqi:666case A4_cmpbgti:667case A4_cmpbgtui:668if (OpN == 1) {669Bits.set(Begin, Begin+8);670return true;671}672break;673674// One register source. Used bits: R1[0-15].675case A2_aslh:676case A2_sxth:677case A2_zxth:678case A4_cmpheqi:679case A4_cmphgti:680case A4_cmphgtui:681if (OpN == 1) {682Bits.set(Begin, Begin+16);683return true;684}685break;686687// One register source. Used bits: R1[16-31].688case A2_asrh:689if (OpN == 1) {690Bits.set(Begin+16, Begin+32);691return true;692}693break;694695// Two register sources. Used bits: R1[0-7], R2[0-7].696case A4_cmpbeq:697case A4_cmpbgt:698case A4_cmpbgtu:699if (OpN == 1) {700Bits.set(Begin, Begin+8);701return true;702}703break;704705// Two register sources. Used bits: R1[0-15], R2[0-15].706case A4_cmpheq:707case A4_cmphgt:708case A4_cmphgtu:709case A2_addh_h16_ll:710case A2_addh_h16_sat_ll:711case A2_addh_l16_ll:712case A2_addh_l16_sat_ll:713case A2_combine_ll:714case A2_subh_h16_ll:715case A2_subh_h16_sat_ll:716case A2_subh_l16_ll:717case A2_subh_l16_sat_ll:718case M2_mpy_acc_ll_s0:719case M2_mpy_acc_ll_s1:720case M2_mpy_acc_sat_ll_s0:721case M2_mpy_acc_sat_ll_s1:722case M2_mpy_ll_s0:723case M2_mpy_ll_s1:724case M2_mpy_nac_ll_s0:725case M2_mpy_nac_ll_s1:726case M2_mpy_nac_sat_ll_s0:727case M2_mpy_nac_sat_ll_s1:728case M2_mpy_rnd_ll_s0:729case M2_mpy_rnd_ll_s1:730case M2_mpy_sat_ll_s0:731case M2_mpy_sat_ll_s1:732case M2_mpy_sat_rnd_ll_s0:733case M2_mpy_sat_rnd_ll_s1:734case M2_mpyd_acc_ll_s0:735case M2_mpyd_acc_ll_s1:736case M2_mpyd_ll_s0:737case M2_mpyd_ll_s1:738case M2_mpyd_nac_ll_s0:739case M2_mpyd_nac_ll_s1:740case M2_mpyd_rnd_ll_s0:741case M2_mpyd_rnd_ll_s1:742case M2_mpyu_acc_ll_s0:743case M2_mpyu_acc_ll_s1:744case M2_mpyu_ll_s0:745case M2_mpyu_ll_s1:746case M2_mpyu_nac_ll_s0:747case M2_mpyu_nac_ll_s1:748case M2_mpyud_acc_ll_s0:749case M2_mpyud_acc_ll_s1:750case M2_mpyud_ll_s0:751case M2_mpyud_ll_s1:752case M2_mpyud_nac_ll_s0:753case M2_mpyud_nac_ll_s1:754if (OpN == 1 || OpN == 2) {755Bits.set(Begin, Begin+16);756return true;757}758break;759760// Two register sources. Used bits: R1[0-15], R2[16-31].761case A2_addh_h16_lh:762case A2_addh_h16_sat_lh:763case A2_combine_lh:764case A2_subh_h16_lh:765case A2_subh_h16_sat_lh:766case M2_mpy_acc_lh_s0:767case M2_mpy_acc_lh_s1:768case M2_mpy_acc_sat_lh_s0:769case M2_mpy_acc_sat_lh_s1:770case M2_mpy_lh_s0:771case M2_mpy_lh_s1:772case M2_mpy_nac_lh_s0:773case M2_mpy_nac_lh_s1:774case M2_mpy_nac_sat_lh_s0:775case M2_mpy_nac_sat_lh_s1:776case M2_mpy_rnd_lh_s0:777case M2_mpy_rnd_lh_s1:778case M2_mpy_sat_lh_s0:779case M2_mpy_sat_lh_s1:780case M2_mpy_sat_rnd_lh_s0:781case M2_mpy_sat_rnd_lh_s1:782case M2_mpyd_acc_lh_s0:783case M2_mpyd_acc_lh_s1:784case M2_mpyd_lh_s0:785case M2_mpyd_lh_s1:786case M2_mpyd_nac_lh_s0:787case M2_mpyd_nac_lh_s1:788case M2_mpyd_rnd_lh_s0:789case M2_mpyd_rnd_lh_s1:790case M2_mpyu_acc_lh_s0:791case M2_mpyu_acc_lh_s1:792case M2_mpyu_lh_s0:793case M2_mpyu_lh_s1:794case M2_mpyu_nac_lh_s0:795case M2_mpyu_nac_lh_s1:796case M2_mpyud_acc_lh_s0:797case M2_mpyud_acc_lh_s1:798case M2_mpyud_lh_s0:799case M2_mpyud_lh_s1:800case M2_mpyud_nac_lh_s0:801case M2_mpyud_nac_lh_s1:802// These four are actually LH.803case A2_addh_l16_hl:804case A2_addh_l16_sat_hl:805case A2_subh_l16_hl:806case A2_subh_l16_sat_hl:807if (OpN == 1) {808Bits.set(Begin, Begin+16);809return true;810}811if (OpN == 2) {812Bits.set(Begin+16, Begin+32);813return true;814}815break;816817// Two register sources, used bits: R1[16-31], R2[0-15].818case A2_addh_h16_hl:819case A2_addh_h16_sat_hl:820case A2_combine_hl:821case A2_subh_h16_hl:822case A2_subh_h16_sat_hl:823case M2_mpy_acc_hl_s0:824case M2_mpy_acc_hl_s1:825case M2_mpy_acc_sat_hl_s0:826case M2_mpy_acc_sat_hl_s1:827case M2_mpy_hl_s0:828case M2_mpy_hl_s1:829case M2_mpy_nac_hl_s0:830case M2_mpy_nac_hl_s1:831case M2_mpy_nac_sat_hl_s0:832case M2_mpy_nac_sat_hl_s1:833case M2_mpy_rnd_hl_s0:834case M2_mpy_rnd_hl_s1:835case M2_mpy_sat_hl_s0:836case M2_mpy_sat_hl_s1:837case M2_mpy_sat_rnd_hl_s0:838case M2_mpy_sat_rnd_hl_s1:839case M2_mpyd_acc_hl_s0:840case M2_mpyd_acc_hl_s1:841case M2_mpyd_hl_s0:842case M2_mpyd_hl_s1:843case M2_mpyd_nac_hl_s0:844case M2_mpyd_nac_hl_s1:845case M2_mpyd_rnd_hl_s0:846case M2_mpyd_rnd_hl_s1:847case M2_mpyu_acc_hl_s0:848case M2_mpyu_acc_hl_s1:849case M2_mpyu_hl_s0:850case M2_mpyu_hl_s1:851case M2_mpyu_nac_hl_s0:852case M2_mpyu_nac_hl_s1:853case M2_mpyud_acc_hl_s0:854case M2_mpyud_acc_hl_s1:855case M2_mpyud_hl_s0:856case M2_mpyud_hl_s1:857case M2_mpyud_nac_hl_s0:858case M2_mpyud_nac_hl_s1:859if (OpN == 1) {860Bits.set(Begin+16, Begin+32);861return true;862}863if (OpN == 2) {864Bits.set(Begin, Begin+16);865return true;866}867break;868869// Two register sources, used bits: R1[16-31], R2[16-31].870case A2_addh_h16_hh:871case A2_addh_h16_sat_hh:872case A2_combine_hh:873case A2_subh_h16_hh:874case A2_subh_h16_sat_hh:875case M2_mpy_acc_hh_s0:876case M2_mpy_acc_hh_s1:877case M2_mpy_acc_sat_hh_s0:878case M2_mpy_acc_sat_hh_s1:879case M2_mpy_hh_s0:880case M2_mpy_hh_s1:881case M2_mpy_nac_hh_s0:882case M2_mpy_nac_hh_s1:883case M2_mpy_nac_sat_hh_s0:884case M2_mpy_nac_sat_hh_s1:885case M2_mpy_rnd_hh_s0:886case M2_mpy_rnd_hh_s1:887case M2_mpy_sat_hh_s0:888case M2_mpy_sat_hh_s1:889case M2_mpy_sat_rnd_hh_s0:890case M2_mpy_sat_rnd_hh_s1:891case M2_mpyd_acc_hh_s0:892case M2_mpyd_acc_hh_s1:893case M2_mpyd_hh_s0:894case M2_mpyd_hh_s1:895case M2_mpyd_nac_hh_s0:896case M2_mpyd_nac_hh_s1:897case M2_mpyd_rnd_hh_s0:898case M2_mpyd_rnd_hh_s1:899case M2_mpyu_acc_hh_s0:900case M2_mpyu_acc_hh_s1:901case M2_mpyu_hh_s0:902case M2_mpyu_hh_s1:903case M2_mpyu_nac_hh_s0:904case M2_mpyu_nac_hh_s1:905case M2_mpyud_acc_hh_s0:906case M2_mpyud_acc_hh_s1:907case M2_mpyud_hh_s0:908case M2_mpyud_hh_s1:909case M2_mpyud_nac_hh_s0:910case M2_mpyud_nac_hh_s1:911if (OpN == 1 || OpN == 2) {912Bits.set(Begin+16, Begin+32);913return true;914}915break;916}917918return false;919}920921// Calculate the register class that matches Reg:Sub. For example, if922// %1 is a double register, then %1:isub_hi would match the "int"923// register class.924const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(925const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {926if (!RR.Reg.isVirtual())927return nullptr;928auto *RC = MRI.getRegClass(RR.Reg);929if (RR.Sub == 0)930return RC;931auto &HRI = static_cast<const HexagonRegisterInfo&>(932*MRI.getTargetRegisterInfo());933934auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void {935(void)HRI;936assert(Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_lo) ||937Sub == HRI.getHexagonSubRegIndex(*RC, Hexagon::ps_sub_hi));938};939940switch (RC->getID()) {941case Hexagon::DoubleRegsRegClassID:942VerifySR(RC, RR.Sub);943return &Hexagon::IntRegsRegClass;944case Hexagon::HvxWRRegClassID:945VerifySR(RC, RR.Sub);946return &Hexagon::HvxVRRegClass;947}948return nullptr;949}950951// Check if RD could be replaced with RS at any possible use of RD.952// For example a predicate register cannot be replaced with a integer953// register, but a 64-bit register with a subregister can be replaced954// with a 32-bit register.955bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,956const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {957if (!RD.Reg.isVirtual() || !RS.Reg.isVirtual())958return false;959// Return false if one (or both) classes are nullptr.960auto *DRC = getFinalVRegClass(RD, MRI);961if (!DRC)962return false;963964return DRC == getFinalVRegClass(RS, MRI);965}966967bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,968unsigned NewSub) {969if (!PreserveTiedOps)970return false;971return llvm::any_of(MRI.use_operands(Reg),972[NewSub] (const MachineOperand &Op) -> bool {973return Op.getSubReg() != NewSub && Op.isTied();974});975}976977namespace {978979class DeadCodeElimination {980public:981DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)982: MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),983MDT(mdt), MRI(mf.getRegInfo()) {}984985bool run() {986return runOnNode(MDT.getRootNode());987}988989private:990bool isDead(unsigned R) const;991bool runOnNode(MachineDomTreeNode *N);992993MachineFunction &MF;994const HexagonInstrInfo &HII;995MachineDominatorTree &MDT;996MachineRegisterInfo &MRI;997};998999} // end anonymous namespace10001001bool DeadCodeElimination::isDead(unsigned R) const {1002for (const MachineOperand &MO : MRI.use_operands(R)) {1003const MachineInstr *UseI = MO.getParent();1004if (UseI->isDebugInstr())1005continue;1006if (UseI->isPHI()) {1007assert(!UseI->getOperand(0).getSubReg());1008Register DR = UseI->getOperand(0).getReg();1009if (DR == R)1010continue;1011}1012return false;1013}1014return true;1015}10161017bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {1018bool Changed = false;10191020for (auto *DTN : children<MachineDomTreeNode*>(N))1021Changed |= runOnNode(DTN);10221023MachineBasicBlock *B = N->getBlock();1024std::vector<MachineInstr*> Instrs;1025for (MachineInstr &MI : llvm::reverse(*B))1026Instrs.push_back(&MI);10271028for (auto *MI : Instrs) {1029unsigned Opc = MI->getOpcode();1030// Do not touch lifetime markers. This is why the target-independent DCE1031// cannot be used.1032if (Opc == TargetOpcode::LIFETIME_START ||1033Opc == TargetOpcode::LIFETIME_END)1034continue;1035bool Store = false;1036if (MI->isInlineAsm())1037continue;1038// Delete PHIs if possible.1039if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))1040continue;10411042bool AllDead = true;1043SmallVector<unsigned,2> Regs;1044for (auto &Op : MI->operands()) {1045if (!Op.isReg() || !Op.isDef())1046continue;1047Register R = Op.getReg();1048if (!R.isVirtual() || !isDead(R)) {1049AllDead = false;1050break;1051}1052Regs.push_back(R);1053}1054if (!AllDead)1055continue;10561057B->erase(MI);1058for (unsigned Reg : Regs)1059MRI.markUsesInDebugValueAsUndef(Reg);1060Changed = true;1061}10621063return Changed;1064}10651066namespace {10671068// Eliminate redundant instructions1069//1070// This transformation will identify instructions where the output register1071// is the same as one of its input registers. This only works on instructions1072// that define a single register (unlike post-increment loads, for example).1073// The equality check is actually more detailed: the code calculates which1074// bits of the output are used, and only compares these bits with the input1075// registers.1076// If the output matches an input, the instruction is replaced with COPY.1077// The copies will be removed by another transformation.1078class RedundantInstrElimination : public Transformation {1079public:1080RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,1081const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)1082: Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}10831084bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;10851086private:1087bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,1088unsigned &LostB, unsigned &LostE);1089bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,1090unsigned &LostB, unsigned &LostE);1091bool computeUsedBits(unsigned Reg, BitVector &Bits);1092bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,1093uint16_t Begin);1094bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);10951096const HexagonInstrInfo &HII;1097const HexagonRegisterInfo &HRI;1098MachineRegisterInfo &MRI;1099BitTracker &BT;1100};11011102} // end anonymous namespace11031104// Check if the instruction is a lossy shift left, where the input being1105// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range1106// of bit indices that are lost.1107bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,1108unsigned OpN, unsigned &LostB, unsigned &LostE) {1109using namespace Hexagon;11101111unsigned Opc = MI.getOpcode();1112unsigned ImN, RegN, Width;1113switch (Opc) {1114case S2_asl_i_p:1115ImN = 2;1116RegN = 1;1117Width = 64;1118break;1119case S2_asl_i_p_acc:1120case S2_asl_i_p_and:1121case S2_asl_i_p_nac:1122case S2_asl_i_p_or:1123case S2_asl_i_p_xacc:1124ImN = 3;1125RegN = 2;1126Width = 64;1127break;1128case S2_asl_i_r:1129ImN = 2;1130RegN = 1;1131Width = 32;1132break;1133case S2_addasl_rrri:1134case S4_andi_asl_ri:1135case S4_ori_asl_ri:1136case S4_addi_asl_ri:1137case S4_subi_asl_ri:1138case S2_asl_i_r_acc:1139case S2_asl_i_r_and:1140case S2_asl_i_r_nac:1141case S2_asl_i_r_or:1142case S2_asl_i_r_sat:1143case S2_asl_i_r_xacc:1144ImN = 3;1145RegN = 2;1146Width = 32;1147break;1148default:1149return false;1150}11511152if (RegN != OpN)1153return false;11541155assert(MI.getOperand(ImN).isImm());1156unsigned S = MI.getOperand(ImN).getImm();1157if (S == 0)1158return false;1159LostB = Width-S;1160LostE = Width;1161return true;1162}11631164// Check if the instruction is a lossy shift right, where the input being1165// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range1166// of bit indices that are lost.1167bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,1168unsigned OpN, unsigned &LostB, unsigned &LostE) {1169using namespace Hexagon;11701171unsigned Opc = MI.getOpcode();1172unsigned ImN, RegN;1173switch (Opc) {1174case S2_asr_i_p:1175case S2_lsr_i_p:1176ImN = 2;1177RegN = 1;1178break;1179case S2_asr_i_p_acc:1180case S2_asr_i_p_and:1181case S2_asr_i_p_nac:1182case S2_asr_i_p_or:1183case S2_lsr_i_p_acc:1184case S2_lsr_i_p_and:1185case S2_lsr_i_p_nac:1186case S2_lsr_i_p_or:1187case S2_lsr_i_p_xacc:1188ImN = 3;1189RegN = 2;1190break;1191case S2_asr_i_r:1192case S2_lsr_i_r:1193ImN = 2;1194RegN = 1;1195break;1196case S4_andi_lsr_ri:1197case S4_ori_lsr_ri:1198case S4_addi_lsr_ri:1199case S4_subi_lsr_ri:1200case S2_asr_i_r_acc:1201case S2_asr_i_r_and:1202case S2_asr_i_r_nac:1203case S2_asr_i_r_or:1204case S2_lsr_i_r_acc:1205case S2_lsr_i_r_and:1206case S2_lsr_i_r_nac:1207case S2_lsr_i_r_or:1208case S2_lsr_i_r_xacc:1209ImN = 3;1210RegN = 2;1211break;12121213default:1214return false;1215}12161217if (RegN != OpN)1218return false;12191220assert(MI.getOperand(ImN).isImm());1221unsigned S = MI.getOperand(ImN).getImm();1222LostB = 0;1223LostE = S;1224return true;1225}12261227// Calculate the bit vector that corresponds to the used bits of register Reg.1228// The vector Bits has the same size, as the size of Reg in bits. If the cal-1229// culation fails (i.e. the used bits are unknown), it returns false. Other-1230// wise, it returns true and sets the corresponding bits in Bits.1231bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {1232BitVector Used(Bits.size());1233RegisterSet Visited;1234std::vector<unsigned> Pending;1235Pending.push_back(Reg);12361237for (unsigned i = 0; i < Pending.size(); ++i) {1238unsigned R = Pending[i];1239if (Visited.has(R))1240continue;1241Visited.insert(R);1242for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {1243BitTracker::RegisterRef UR = *I;1244unsigned B, W;1245if (!HBS::getSubregMask(UR, B, W, MRI))1246return false;1247MachineInstr &UseI = *I->getParent();1248if (UseI.isPHI() || UseI.isCopy()) {1249Register DefR = UseI.getOperand(0).getReg();1250if (!DefR.isVirtual())1251return false;1252Pending.push_back(DefR);1253} else {1254if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))1255return false;1256}1257}1258}1259Bits |= Used;1260return true;1261}12621263// Calculate the bits used by instruction MI in a register in operand OpN.1264// Return true/false if the calculation succeeds/fails. If is succeeds, set1265// used bits in Bits. This function does not reset any bits in Bits, so1266// subsequent calls over different instructions will result in the union1267// of the used bits in all these instructions.1268// The register in question may be used with a sub-register, whereas Bits1269// holds the bits for the entire register. To keep track of that, the1270// argument Begin indicates where in Bits is the lowest-significant bit1271// of the register used in operand OpN. For example, in instruction:1272// %1 = S2_lsr_i_r %2:isub_hi, 101273// the operand 1 is a 32-bit register, which happens to be a subregister1274// of the 64-bit register %2, and that subregister starts at position 32.1275// In this case Begin=32, since Bits[32] would be the lowest-significant bit1276// of %2:isub_hi.1277bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,1278unsigned OpN, BitVector &Bits, uint16_t Begin) {1279unsigned Opc = MI.getOpcode();1280BitVector T(Bits.size());1281bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);1282// Even if we don't have bits yet, we could still provide some information1283// if the instruction is a lossy shift: the lost bits will be marked as1284// not used.1285unsigned LB, LE;1286if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {1287assert(MI.getOperand(OpN).isReg());1288BitTracker::RegisterRef RR = MI.getOperand(OpN);1289const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);1290uint16_t Width = HRI.getRegSizeInBits(*RC);12911292if (!GotBits)1293T.set(Begin, Begin+Width);1294assert(LB <= LE && LB < Width && LE <= Width);1295T.reset(Begin+LB, Begin+LE);1296GotBits = true;1297}1298if (GotBits)1299Bits |= T;1300return GotBits;1301}13021303// Calculates the used bits in RD ("defined register"), and checks if these1304// bits in RS ("used register") and RD are identical.1305bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,1306BitTracker::RegisterRef RS) {1307const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);1308const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);13091310unsigned DB, DW;1311if (!HBS::getSubregMask(RD, DB, DW, MRI))1312return false;1313unsigned SB, SW;1314if (!HBS::getSubregMask(RS, SB, SW, MRI))1315return false;1316if (SW != DW)1317return false;13181319BitVector Used(DC.width());1320if (!computeUsedBits(RD.Reg, Used))1321return false;13221323for (unsigned i = 0; i != DW; ++i)1324if (Used[i+DB] && DC[DB+i] != SC[SB+i])1325return false;1326return true;1327}13281329bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,1330const RegisterSet&) {1331if (!BT.reached(&B))1332return false;1333bool Changed = false;13341335for (auto I = B.begin(), E = B.end(); I != E; ++I) {1336MachineInstr *MI = &*I;13371338if (MI->getOpcode() == TargetOpcode::COPY)1339continue;1340if (MI->isPHI() || MI->hasUnmodeledSideEffects() || MI->isInlineAsm())1341continue;1342unsigned NumD = MI->getDesc().getNumDefs();1343if (NumD != 1)1344continue;13451346BitTracker::RegisterRef RD = MI->getOperand(0);1347if (!BT.has(RD.Reg))1348continue;1349const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);1350auto At = MachineBasicBlock::iterator(MI);13511352// Find a source operand that is equal to the result.1353for (auto &Op : MI->uses()) {1354if (!Op.isReg())1355continue;1356BitTracker::RegisterRef RS = Op;1357if (!BT.has(RS.Reg))1358continue;1359if (!HBS::isTransparentCopy(RD, RS, MRI))1360continue;13611362unsigned BN, BW;1363if (!HBS::getSubregMask(RS, BN, BW, MRI))1364continue;13651366const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);1367if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))1368continue;13691370// If found, replace the instruction with a COPY.1371const DebugLoc &DL = MI->getDebugLoc();1372const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);1373Register NewR = MRI.createVirtualRegister(FRC);1374MachineInstr *CopyI =1375BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)1376.addReg(RS.Reg, 0, RS.Sub);1377HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);1378// This pass can create copies between registers that don't have the1379// exact same values. Updating the tracker has to involve updating1380// all dependent cells. Example:1381// %1 = inst %2 ; %1 != %2, but used bits are equal1382//1383// %3 = copy %2 ; <- inserted1384// ... = %3 ; <- replaced from %21385// Indirectly, we can create a "copy" between %1 and %2 even1386// though their exact values do not match.1387BT.visit(*CopyI);1388Changed = true;1389break;1390}1391}13921393return Changed;1394}13951396namespace {13971398// Recognize instructions that produce constant values known at compile-time.1399// Replace them with register definitions that load these constants directly.1400class ConstGeneration : public Transformation {1401public:1402ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,1403MachineRegisterInfo &mri)1404: Transformation(true), HII(hii), MRI(mri), BT(bt) {}14051406bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;1407static bool isTfrConst(const MachineInstr &MI);14081409private:1410Register genTfrConst(const TargetRegisterClass *RC, int64_t C,1411MachineBasicBlock &B, MachineBasicBlock::iterator At,1412DebugLoc &DL);14131414const HexagonInstrInfo &HII;1415MachineRegisterInfo &MRI;1416BitTracker &BT;1417};14181419} // end anonymous namespace14201421bool ConstGeneration::isTfrConst(const MachineInstr &MI) {1422unsigned Opc = MI.getOpcode();1423switch (Opc) {1424case Hexagon::A2_combineii:1425case Hexagon::A4_combineii:1426case Hexagon::A2_tfrsi:1427case Hexagon::A2_tfrpi:1428case Hexagon::PS_true:1429case Hexagon::PS_false:1430case Hexagon::CONST32:1431case Hexagon::CONST64:1432return true;1433}1434return false;1435}14361437// Generate a transfer-immediate instruction that is appropriate for the1438// register class and the actual value being transferred.1439Register ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,1440MachineBasicBlock &B,1441MachineBasicBlock::iterator At,1442DebugLoc &DL) {1443Register Reg = MRI.createVirtualRegister(RC);1444if (RC == &Hexagon::IntRegsRegClass) {1445BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)1446.addImm(int32_t(C));1447return Reg;1448}14491450if (RC == &Hexagon::DoubleRegsRegClass) {1451if (isInt<8>(C)) {1452BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)1453.addImm(C);1454return Reg;1455}14561457unsigned Lo = Lo_32(C), Hi = Hi_32(C);1458if (isInt<8>(Lo) || isInt<8>(Hi)) {1459unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii1460: Hexagon::A4_combineii;1461BuildMI(B, At, DL, HII.get(Opc), Reg)1462.addImm(int32_t(Hi))1463.addImm(int32_t(Lo));1464return Reg;1465}1466MachineFunction *MF = B.getParent();1467auto &HST = MF->getSubtarget<HexagonSubtarget>();14681469// Disable CONST64 for tiny core since it takes a LD resource.1470if (!HST.isTinyCore() ||1471MF->getFunction().hasOptSize()) {1472BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg)1473.addImm(C);1474return Reg;1475}1476}14771478if (RC == &Hexagon::PredRegsRegClass) {1479unsigned Opc;1480if (C == 0)1481Opc = Hexagon::PS_false;1482else if ((C & 0xFF) == 0xFF)1483Opc = Hexagon::PS_true;1484else1485return 0;1486BuildMI(B, At, DL, HII.get(Opc), Reg);1487return Reg;1488}14891490return 0;1491}14921493bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {1494if (!BT.reached(&B))1495return false;1496bool Changed = false;1497RegisterSet Defs;14981499for (auto I = B.begin(), E = B.end(); I != E; ++I) {1500if (isTfrConst(*I))1501continue;1502Defs.clear();1503HBS::getInstrDefs(*I, Defs);1504if (Defs.count() != 1)1505continue;1506Register DR = Defs.find_first();1507if (!DR.isVirtual())1508continue;1509uint64_t U;1510const BitTracker::RegisterCell &DRC = BT.lookup(DR);1511if (HBS::getConst(DRC, 0, DRC.width(), U)) {1512int64_t C = U;1513DebugLoc DL = I->getDebugLoc();1514auto At = I->isPHI() ? B.getFirstNonPHI() : I;1515Register ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);1516if (ImmReg) {1517HBS::replaceReg(DR, ImmReg, MRI);1518BT.put(ImmReg, DRC);1519Changed = true;1520}1521}1522}1523return Changed;1524}15251526namespace {15271528// Identify pairs of available registers which hold identical values.1529// In such cases, only one of them needs to be calculated, the other one1530// will be defined as a copy of the first.1531class CopyGeneration : public Transformation {1532public:1533CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,1534const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)1535: Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}15361537bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;15381539private:1540bool findMatch(const BitTracker::RegisterRef &Inp,1541BitTracker::RegisterRef &Out, const RegisterSet &AVs);15421543const HexagonInstrInfo &HII;1544const HexagonRegisterInfo &HRI;1545MachineRegisterInfo &MRI;1546BitTracker &BT;1547RegisterSet Forbidden;1548};15491550// Eliminate register copies RD = RS, by replacing the uses of RD with1551// with uses of RS.1552class CopyPropagation : public Transformation {1553public:1554CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)1555: Transformation(false), HRI(hri), MRI(mri) {}15561557bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;15581559static bool isCopyReg(unsigned Opc, bool NoConv);15601561private:1562bool propagateRegCopy(MachineInstr &MI);15631564const HexagonRegisterInfo &HRI;1565MachineRegisterInfo &MRI;1566};15671568} // end anonymous namespace15691570/// Check if there is a register in AVs that is identical to Inp. If so,1571/// set Out to the found register. The output may be a pair Reg:Sub.1572bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,1573BitTracker::RegisterRef &Out, const RegisterSet &AVs) {1574if (!BT.has(Inp.Reg))1575return false;1576const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);1577auto *FRC = HBS::getFinalVRegClass(Inp, MRI);1578unsigned B, W;1579if (!HBS::getSubregMask(Inp, B, W, MRI))1580return false;15811582for (Register R = AVs.find_first(); R; R = AVs.find_next(R)) {1583if (!BT.has(R) || Forbidden[R])1584continue;1585const BitTracker::RegisterCell &RC = BT.lookup(R);1586unsigned RW = RC.width();1587if (W == RW) {1588if (FRC != MRI.getRegClass(R))1589continue;1590if (!HBS::isTransparentCopy(R, Inp, MRI))1591continue;1592if (!HBS::isEqual(InpRC, B, RC, 0, W))1593continue;1594Out.Reg = R;1595Out.Sub = 0;1596return true;1597}1598// Check if there is a super-register, whose part (with a subregister)1599// is equal to the input.1600// Only do double registers for now.1601if (W*2 != RW)1602continue;1603if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)1604continue;16051606if (HBS::isEqual(InpRC, B, RC, 0, W))1607Out.Sub = Hexagon::isub_lo;1608else if (HBS::isEqual(InpRC, B, RC, W, W))1609Out.Sub = Hexagon::isub_hi;1610else1611continue;1612Out.Reg = R;1613if (HBS::isTransparentCopy(Out, Inp, MRI))1614return true;1615}1616return false;1617}16181619bool CopyGeneration::processBlock(MachineBasicBlock &B,1620const RegisterSet &AVs) {1621if (!BT.reached(&B))1622return false;1623RegisterSet AVB(AVs);1624bool Changed = false;1625RegisterSet Defs;16261627for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {1628Defs.clear();1629HBS::getInstrDefs(*I, Defs);16301631unsigned Opc = I->getOpcode();1632if (CopyPropagation::isCopyReg(Opc, false) ||1633ConstGeneration::isTfrConst(*I))1634continue;16351636DebugLoc DL = I->getDebugLoc();1637auto At = I->isPHI() ? B.getFirstNonPHI() : I;16381639for (Register R = Defs.find_first(); R; R = Defs.find_next(R)) {1640BitTracker::RegisterRef MR;1641auto *FRC = HBS::getFinalVRegClass(R, MRI);16421643if (findMatch(R, MR, AVB)) {1644Register NewR = MRI.createVirtualRegister(FRC);1645BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)1646.addReg(MR.Reg, 0, MR.Sub);1647BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));1648HBS::replaceReg(R, NewR, MRI);1649Forbidden.insert(R);1650continue;1651}16521653if (FRC == &Hexagon::DoubleRegsRegClass ||1654FRC == &Hexagon::HvxWRRegClass) {1655// Try to generate REG_SEQUENCE.1656unsigned SubLo = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_lo);1657unsigned SubHi = HRI.getHexagonSubRegIndex(*FRC, Hexagon::ps_sub_hi);1658BitTracker::RegisterRef TL = { R, SubLo };1659BitTracker::RegisterRef TH = { R, SubHi };1660BitTracker::RegisterRef ML, MH;1661if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {1662auto *FRC = HBS::getFinalVRegClass(R, MRI);1663Register NewR = MRI.createVirtualRegister(FRC);1664BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)1665.addReg(ML.Reg, 0, ML.Sub)1666.addImm(SubLo)1667.addReg(MH.Reg, 0, MH.Sub)1668.addImm(SubHi);1669BT.put(BitTracker::RegisterRef(NewR), BT.get(R));1670HBS::replaceReg(R, NewR, MRI);1671Forbidden.insert(R);1672}1673}1674}1675}16761677return Changed;1678}16791680bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) {1681switch (Opc) {1682case TargetOpcode::COPY:1683case TargetOpcode::REG_SEQUENCE:1684case Hexagon::A4_combineir:1685case Hexagon::A4_combineri:1686return true;1687case Hexagon::A2_tfr:1688case Hexagon::A2_tfrp:1689case Hexagon::A2_combinew:1690case Hexagon::V6_vcombine:1691return NoConv;1692default:1693break;1694}1695return false;1696}16971698bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {1699bool Changed = false;1700unsigned Opc = MI.getOpcode();1701BitTracker::RegisterRef RD = MI.getOperand(0);1702assert(MI.getOperand(0).getSubReg() == 0);17031704switch (Opc) {1705case TargetOpcode::COPY:1706case Hexagon::A2_tfr:1707case Hexagon::A2_tfrp: {1708BitTracker::RegisterRef RS = MI.getOperand(1);1709if (!HBS::isTransparentCopy(RD, RS, MRI))1710break;1711if (RS.Sub != 0)1712Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);1713else1714Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);1715break;1716}1717case TargetOpcode::REG_SEQUENCE: {1718BitTracker::RegisterRef SL, SH;1719if (HBS::parseRegSequence(MI, SL, SH, MRI)) {1720const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);1721unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);1722unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);1723Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI);1724Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI);1725}1726break;1727}1728case Hexagon::A2_combinew:1729case Hexagon::V6_vcombine: {1730const TargetRegisterClass &RC = *MRI.getRegClass(RD.Reg);1731unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);1732unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);1733BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);1734Changed = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI);1735Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI);1736break;1737}1738case Hexagon::A4_combineir:1739case Hexagon::A4_combineri: {1740unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;1741unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo1742: Hexagon::isub_hi;1743BitTracker::RegisterRef RS = MI.getOperand(SrcX);1744Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);1745break;1746}1747}1748return Changed;1749}17501751bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {1752std::vector<MachineInstr*> Instrs;1753for (MachineInstr &MI : llvm::reverse(B))1754Instrs.push_back(&MI);17551756bool Changed = false;1757for (auto *I : Instrs) {1758unsigned Opc = I->getOpcode();1759if (!CopyPropagation::isCopyReg(Opc, true))1760continue;1761Changed |= propagateRegCopy(*I);1762}17631764return Changed;1765}17661767namespace {17681769// Recognize patterns that can be simplified and replace them with the1770// simpler forms.1771// This is by no means complete1772class BitSimplification : public Transformation {1773public:1774BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt,1775const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri,1776MachineRegisterInfo &mri, MachineFunction &mf)1777: Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri),1778MF(mf), BT(bt) {}17791780bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;17811782private:1783struct RegHalf : public BitTracker::RegisterRef {1784bool Low; // Low/High halfword.1785};17861787bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,1788unsigned B, RegHalf &RH);1789bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum);17901791bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,1792BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);1793unsigned getCombineOpcode(bool HLow, bool LLow);17941795bool genStoreUpperHalf(MachineInstr *MI);1796bool genStoreImmediate(MachineInstr *MI);1797bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,1798const BitTracker::RegisterCell &RC);1799bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,1800const BitTracker::RegisterCell &RC);1801bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,1802const BitTracker::RegisterCell &RC);1803bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,1804const BitTracker::RegisterCell &RC);1805bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD,1806const BitTracker::RegisterCell &RC, const RegisterSet &AVs);1807bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,1808const BitTracker::RegisterCell &RC);1809bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,1810const BitTracker::RegisterCell &RC, const RegisterSet &AVs);1811bool simplifyRCmp0(MachineInstr *MI, BitTracker::RegisterRef RD);18121813// Cache of created instructions to avoid creating duplicates.1814// XXX Currently only used by genBitSplit.1815std::vector<MachineInstr*> NewMIs;18161817const MachineDominatorTree &MDT;1818const HexagonInstrInfo &HII;1819const HexagonRegisterInfo &HRI;1820MachineRegisterInfo &MRI;1821MachineFunction &MF;1822BitTracker &BT;1823};18241825} // end anonymous namespace18261827// Check if the bits [B..B+16) in register cell RC form a valid halfword,1828// i.e. [0..16), [16..32), etc. of some register. If so, return true and1829// set the information about the found register in RH.1830bool BitSimplification::matchHalf(unsigned SelfR,1831const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {1832// XXX This could be searching in the set of available registers, in case1833// the match is not exact.18341835// Match 16-bit chunks, where the RC[B..B+15] references exactly one1836// register and all the bits B..B+15 match between RC and the register.1837// This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },1838// and RC = { [0]:0 [1-15]:v1[1-15]... }.1839bool Low = false;1840unsigned I = B;1841while (I < B+16 && RC[I].num())1842I++;1843if (I == B+16)1844return false;18451846Register Reg = RC[I].RefI.Reg;1847unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B.1848if (P < I-B)1849return false;1850unsigned Pos = P - (I-B);18511852if (Reg == 0 || Reg == SelfR) // Don't match "self".1853return false;1854if (!Reg.isVirtual())1855return false;1856if (!BT.has(Reg))1857return false;18581859const BitTracker::RegisterCell &SC = BT.lookup(Reg);1860if (Pos+16 > SC.width())1861return false;18621863for (unsigned i = 0; i < 16; ++i) {1864const BitTracker::BitValue &RV = RC[i+B];1865if (RV.Type == BitTracker::BitValue::Ref) {1866if (RV.RefI.Reg != Reg)1867return false;1868if (RV.RefI.Pos != i+Pos)1869return false;1870continue;1871}1872if (RC[i+B] != SC[i+Pos])1873return false;1874}18751876unsigned Sub = 0;1877switch (Pos) {1878case 0:1879Sub = Hexagon::isub_lo;1880Low = true;1881break;1882case 16:1883Sub = Hexagon::isub_lo;1884Low = false;1885break;1886case 32:1887Sub = Hexagon::isub_hi;1888Low = true;1889break;1890case 48:1891Sub = Hexagon::isub_hi;1892Low = false;1893break;1894default:1895return false;1896}18971898RH.Reg = Reg;1899RH.Sub = Sub;1900RH.Low = Low;1901// If the subregister is not valid with the register, set it to 0.1902if (!HBS::getFinalVRegClass(RH, MRI))1903RH.Sub = 0;19041905return true;1906}19071908bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc,1909unsigned OpNum) {1910auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);1911auto *RRC = HBS::getFinalVRegClass(R, MRI);1912return OpRC->hasSubClassEq(RRC);1913}19141915// Check if RC matches the pattern of a S2_packhl. If so, return true and1916// set the inputs Rs and Rt.1917bool BitSimplification::matchPackhl(unsigned SelfR,1918const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,1919BitTracker::RegisterRef &Rt) {1920RegHalf L1, H1, L2, H2;19211922if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))1923return false;1924if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))1925return false;19261927// Rs = H1.L1, Rt = H2.L21928if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)1929return false;1930if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)1931return false;19321933Rs = H1;1934Rt = H2;1935return true;1936}19371938unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {1939return HLow ? LLow ? Hexagon::A2_combine_ll1940: Hexagon::A2_combine_lh1941: LLow ? Hexagon::A2_combine_hl1942: Hexagon::A2_combine_hh;1943}19441945// If MI stores the upper halfword of a register (potentially obtained via1946// shifts or extracts), replace it with a storerf instruction. This could1947// cause the "extraction" code to become dead.1948bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {1949unsigned Opc = MI->getOpcode();1950if (Opc != Hexagon::S2_storerh_io)1951return false;19521953MachineOperand &ValOp = MI->getOperand(2);1954BitTracker::RegisterRef RS = ValOp;1955if (!BT.has(RS.Reg))1956return false;1957const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);1958RegHalf H;1959unsigned B = (RS.Sub == Hexagon::isub_hi) ? 32 : 0;1960if (!matchHalf(0, RC, B, H))1961return false;1962if (H.Low)1963return false;1964MI->setDesc(HII.get(Hexagon::S2_storerf_io));1965ValOp.setReg(H.Reg);1966ValOp.setSubReg(H.Sub);1967return true;1968}19691970// If MI stores a value known at compile-time, and the value is within a range1971// that avoids using constant-extenders, replace it with a store-immediate.1972bool BitSimplification::genStoreImmediate(MachineInstr *MI) {1973unsigned Opc = MI->getOpcode();1974unsigned Align = 0;1975switch (Opc) {1976case Hexagon::S2_storeri_io:1977Align++;1978[[fallthrough]];1979case Hexagon::S2_storerh_io:1980Align++;1981[[fallthrough]];1982case Hexagon::S2_storerb_io:1983break;1984default:1985return false;1986}19871988// Avoid stores to frame-indices (due to an unknown offset).1989if (!MI->getOperand(0).isReg())1990return false;1991MachineOperand &OffOp = MI->getOperand(1);1992if (!OffOp.isImm())1993return false;19941995int64_t Off = OffOp.getImm();1996// Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).1997if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))1998return false;1999// Source register:2000BitTracker::RegisterRef RS = MI->getOperand(2);2001if (!BT.has(RS.Reg))2002return false;2003const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);2004uint64_t U;2005if (!HBS::getConst(RC, 0, RC.width(), U))2006return false;20072008// Only consider 8-bit values to avoid constant-extenders.2009int V;2010switch (Opc) {2011case Hexagon::S2_storerb_io:2012V = int8_t(U);2013break;2014case Hexagon::S2_storerh_io:2015V = int16_t(U);2016break;2017case Hexagon::S2_storeri_io:2018V = int32_t(U);2019break;2020default:2021// Opc is already checked above to be one of the three store instructions.2022// This silences a -Wuninitialized false positive on GCC 5.4.2023llvm_unreachable("Unexpected store opcode");2024}2025if (!isInt<8>(V))2026return false;20272028MI->removeOperand(2);2029switch (Opc) {2030case Hexagon::S2_storerb_io:2031MI->setDesc(HII.get(Hexagon::S4_storeirb_io));2032break;2033case Hexagon::S2_storerh_io:2034MI->setDesc(HII.get(Hexagon::S4_storeirh_io));2035break;2036case Hexagon::S2_storeri_io:2037MI->setDesc(HII.get(Hexagon::S4_storeiri_io));2038break;2039}2040MI->addOperand(MachineOperand::CreateImm(V));2041return true;2042}20432044// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the2045// last instruction in a sequence that results in something equivalent to2046// the pack-halfwords. The intent is to cause the entire sequence to become2047// dead.2048bool BitSimplification::genPackhl(MachineInstr *MI,2049BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {2050unsigned Opc = MI->getOpcode();2051if (Opc == Hexagon::S2_packhl)2052return false;2053BitTracker::RegisterRef Rs, Rt;2054if (!matchPackhl(RD.Reg, RC, Rs, Rt))2055return false;2056if (!validateReg(Rs, Hexagon::S2_packhl, 1) ||2057!validateReg(Rt, Hexagon::S2_packhl, 2))2058return false;20592060MachineBasicBlock &B = *MI->getParent();2061Register NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);2062DebugLoc DL = MI->getDebugLoc();2063auto At = MI->isPHI() ? B.getFirstNonPHI()2064: MachineBasicBlock::iterator(MI);2065BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)2066.addReg(Rs.Reg, 0, Rs.Sub)2067.addReg(Rt.Reg, 0, Rt.Sub);2068HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);2069BT.put(BitTracker::RegisterRef(NewR), RC);2070return true;2071}20722073// If MI produces halfword of the input in the low half of the output,2074// replace it with zero-extend or extractu.2075bool BitSimplification::genExtractHalf(MachineInstr *MI,2076BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {2077RegHalf L;2078// Check for halfword in low 16 bits, zeros elsewhere.2079if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))2080return false;20812082unsigned Opc = MI->getOpcode();2083MachineBasicBlock &B = *MI->getParent();2084DebugLoc DL = MI->getDebugLoc();20852086// Prefer zxth, since zxth can go in any slot, while extractu only in2087// slots 2 and 3.2088unsigned NewR = 0;2089auto At = MI->isPHI() ? B.getFirstNonPHI()2090: MachineBasicBlock::iterator(MI);2091if (L.Low && Opc != Hexagon::A2_zxth) {2092if (validateReg(L, Hexagon::A2_zxth, 1)) {2093NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);2094BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)2095.addReg(L.Reg, 0, L.Sub);2096}2097} else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) {2098if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) {2099NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);2100BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)2101.addReg(L.Reg, 0, L.Sub)2102.addImm(16);2103}2104}2105if (NewR == 0)2106return false;2107HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);2108BT.put(BitTracker::RegisterRef(NewR), RC);2109return true;2110}21112112// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the2113// combine.2114bool BitSimplification::genCombineHalf(MachineInstr *MI,2115BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {2116RegHalf L, H;2117// Check for combine h/l2118if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))2119return false;2120// Do nothing if this is just a reg copy.2121if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)2122return false;21232124unsigned Opc = MI->getOpcode();2125unsigned COpc = getCombineOpcode(H.Low, L.Low);2126if (COpc == Opc)2127return false;2128if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2))2129return false;21302131MachineBasicBlock &B = *MI->getParent();2132DebugLoc DL = MI->getDebugLoc();2133Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);2134auto At = MI->isPHI() ? B.getFirstNonPHI()2135: MachineBasicBlock::iterator(MI);2136BuildMI(B, At, DL, HII.get(COpc), NewR)2137.addReg(H.Reg, 0, H.Sub)2138.addReg(L.Reg, 0, L.Sub);2139HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);2140BT.put(BitTracker::RegisterRef(NewR), RC);2141return true;2142}21432144// If MI resets high bits of a register and keeps the lower ones, replace it2145// with zero-extend byte/half, and-immediate, or extractu, as appropriate.2146bool BitSimplification::genExtractLow(MachineInstr *MI,2147BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {2148unsigned Opc = MI->getOpcode();2149switch (Opc) {2150case Hexagon::A2_zxtb:2151case Hexagon::A2_zxth:2152case Hexagon::S2_extractu:2153return false;2154}2155if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {2156int32_t Imm = MI->getOperand(2).getImm();2157if (isInt<10>(Imm))2158return false;2159}21602161if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())2162return false;2163unsigned W = RC.width();2164while (W > 0 && RC[W-1].is(0))2165W--;2166if (W == 0 || W == RC.width())2167return false;2168unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb2169: (W == 16) ? Hexagon::A2_zxth2170: (W < 10) ? Hexagon::A2_andir2171: Hexagon::S2_extractu;2172MachineBasicBlock &B = *MI->getParent();2173DebugLoc DL = MI->getDebugLoc();21742175for (auto &Op : MI->uses()) {2176if (!Op.isReg())2177continue;2178BitTracker::RegisterRef RS = Op;2179if (!BT.has(RS.Reg))2180continue;2181const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);2182unsigned BN, BW;2183if (!HBS::getSubregMask(RS, BN, BW, MRI))2184continue;2185if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))2186continue;2187if (!validateReg(RS, NewOpc, 1))2188continue;21892190Register NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);2191auto At = MI->isPHI() ? B.getFirstNonPHI()2192: MachineBasicBlock::iterator(MI);2193auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)2194.addReg(RS.Reg, 0, RS.Sub);2195if (NewOpc == Hexagon::A2_andir)2196MIB.addImm((1 << W) - 1);2197else if (NewOpc == Hexagon::S2_extractu)2198MIB.addImm(W).addImm(0);2199HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);2200BT.put(BitTracker::RegisterRef(NewR), RC);2201return true;2202}2203return false;2204}22052206bool BitSimplification::genBitSplit(MachineInstr *MI,2207BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,2208const RegisterSet &AVs) {2209if (!GenBitSplit)2210return false;2211if (MaxBitSplit.getNumOccurrences()) {2212if (CountBitSplit >= MaxBitSplit)2213return false;2214}22152216unsigned Opc = MI->getOpcode();2217switch (Opc) {2218case Hexagon::A4_bitsplit:2219case Hexagon::A4_bitspliti:2220return false;2221}22222223unsigned W = RC.width();2224if (W != 32)2225return false;22262227auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned {2228unsigned Z = C.width();2229while (Z > 0 && C[Z-1].is(0))2230--Z;2231return C.width() - Z;2232};22332234// Count the number of leading zeros in the target RC.2235unsigned Z = ctlz(RC);2236if (Z == 0 || Z == W)2237return false;22382239// A simplistic analysis: assume the source register (the one being split)2240// is fully unknown, and that all its bits are self-references.2241const BitTracker::BitValue &B0 = RC[0];2242if (B0.Type != BitTracker::BitValue::Ref)2243return false;22442245unsigned SrcR = B0.RefI.Reg;2246unsigned SrcSR = 0;2247unsigned Pos = B0.RefI.Pos;22482249// All the non-zero bits should be consecutive bits from the same register.2250for (unsigned i = 1; i < W-Z; ++i) {2251const BitTracker::BitValue &V = RC[i];2252if (V.Type != BitTracker::BitValue::Ref)2253return false;2254if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i)2255return false;2256}22572258// Now, find the other bitfield among AVs.2259for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {2260// The number of leading zeros here should be the number of trailing2261// non-zeros in RC.2262unsigned SRC = MRI.getRegClass(S)->getID();2263if (SRC != Hexagon::IntRegsRegClassID &&2264SRC != Hexagon::DoubleRegsRegClassID)2265continue;2266if (!BT.has(S))2267continue;2268const BitTracker::RegisterCell &SC = BT.lookup(S);2269if (SC.width() != W || ctlz(SC) != W-Z)2270continue;2271// The Z lower bits should now match SrcR.2272const BitTracker::BitValue &S0 = SC[0];2273if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR)2274continue;2275unsigned P = S0.RefI.Pos;22762277if (Pos <= P && (Pos + W-Z) != P)2278continue;2279if (P < Pos && (P + Z) != Pos)2280continue;2281// The starting bitfield position must be at a subregister boundary.2282if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32)2283continue;22842285unsigned I;2286for (I = 1; I < Z; ++I) {2287const BitTracker::BitValue &V = SC[I];2288if (V.Type != BitTracker::BitValue::Ref)2289break;2290if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I)2291break;2292}2293if (I != Z)2294continue;22952296// Generate bitsplit where S is defined.2297if (MaxBitSplit.getNumOccurrences())2298CountBitSplit++;2299MachineInstr *DefS = MRI.getVRegDef(S);2300assert(DefS != nullptr);2301DebugLoc DL = DefS->getDebugLoc();2302MachineBasicBlock &B = *DefS->getParent();2303auto At = DefS->isPHI() ? B.getFirstNonPHI()2304: MachineBasicBlock::iterator(DefS);2305if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID)2306SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;2307if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1))2308continue;2309unsigned ImmOp = Pos <= P ? W-Z : Z;23102311// Find an existing bitsplit instruction if one already exists.2312unsigned NewR = 0;2313for (MachineInstr *In : NewMIs) {2314if (In->getOpcode() != Hexagon::A4_bitspliti)2315continue;2316MachineOperand &Op1 = In->getOperand(1);2317if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR)2318continue;2319if (In->getOperand(2).getImm() != ImmOp)2320continue;2321// Check if the target register is available here.2322MachineOperand &Op0 = In->getOperand(0);2323MachineInstr *DefI = MRI.getVRegDef(Op0.getReg());2324assert(DefI != nullptr);2325if (!MDT.dominates(DefI, &*At))2326continue;23272328// Found one that can be reused.2329assert(Op0.getSubReg() == 0);2330NewR = Op0.getReg();2331break;2332}2333if (!NewR) {2334NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);2335auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR)2336.addReg(SrcR, 0, SrcSR)2337.addImm(ImmOp);2338NewMIs.push_back(NewBS);2339}2340if (Pos <= P) {2341HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI);2342HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI);2343} else {2344HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI);2345HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI);2346}2347return true;2348}23492350return false;2351}23522353// Check for tstbit simplification opportunity, where the bit being checked2354// can be tracked back to another register. For example:2355// %2 = S2_lsr_i_r %1, 52356// %3 = S2_tstbit_i %2, 02357// =>2358// %3 = S2_tstbit_i %1, 52359bool BitSimplification::simplifyTstbit(MachineInstr *MI,2360BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {2361unsigned Opc = MI->getOpcode();2362if (Opc != Hexagon::S2_tstbit_i)2363return false;23642365unsigned BN = MI->getOperand(2).getImm();2366BitTracker::RegisterRef RS = MI->getOperand(1);2367unsigned F, W;2368DebugLoc DL = MI->getDebugLoc();2369if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))2370return false;2371MachineBasicBlock &B = *MI->getParent();2372auto At = MI->isPHI() ? B.getFirstNonPHI()2373: MachineBasicBlock::iterator(MI);23742375const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);2376const BitTracker::BitValue &V = SC[F+BN];2377if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {2378const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);2379// Need to map V.RefI.Reg to a 32-bit register, i.e. if it is2380// a double register, need to use a subregister and adjust bit2381// number.2382unsigned P = std::numeric_limits<unsigned>::max();2383BitTracker::RegisterRef RR(V.RefI.Reg, 0);2384if (TC == &Hexagon::DoubleRegsRegClass) {2385P = V.RefI.Pos;2386RR.Sub = Hexagon::isub_lo;2387if (P >= 32) {2388P -= 32;2389RR.Sub = Hexagon::isub_hi;2390}2391} else if (TC == &Hexagon::IntRegsRegClass) {2392P = V.RefI.Pos;2393}2394if (P != std::numeric_limits<unsigned>::max()) {2395Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);2396BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)2397.addReg(RR.Reg, 0, RR.Sub)2398.addImm(P);2399HBS::replaceReg(RD.Reg, NewR, MRI);2400BT.put(NewR, RC);2401return true;2402}2403} else if (V.is(0) || V.is(1)) {2404Register NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);2405unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true;2406BuildMI(B, At, DL, HII.get(NewOpc), NewR);2407HBS::replaceReg(RD.Reg, NewR, MRI);2408return true;2409}24102411return false;2412}24132414// Detect whether RD is a bitfield extract (sign- or zero-extended) of2415// some register from the AVs set. Create a new corresponding instruction2416// at the location of MI. The intent is to recognize situations where2417// a sequence of instructions performs an operation that is equivalent to2418// an extract operation, such as a shift left followed by a shift right.2419bool BitSimplification::simplifyExtractLow(MachineInstr *MI,2420BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,2421const RegisterSet &AVs) {2422if (!GenExtract)2423return false;2424if (MaxExtract.getNumOccurrences()) {2425if (CountExtract >= MaxExtract)2426return false;2427CountExtract++;2428}24292430unsigned W = RC.width();2431unsigned RW = W;2432unsigned Len;2433bool Signed;24342435// The code is mostly class-independent, except for the part that generates2436// the extract instruction, and establishes the source register (in case it2437// needs to use a subregister).2438const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);2439if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)2440return false;2441assert(RD.Sub == 0);24422443// Observation:2444// If the cell has a form of 00..0xx..x with k zeros and n remaining2445// bits, this could be an extractu of the n bits, but it could also be2446// an extractu of a longer field which happens to have 0s in the top2447// bit positions.2448// The same logic applies to sign-extended fields.2449//2450// Do not check for the extended extracts, since it would expand the2451// search space quite a bit. The search may be expensive as it is.24522453const BitTracker::BitValue &TopV = RC[W-1];24542455// Eliminate candidates that have self-referential bits, since they2456// cannot be extracts from other registers. Also, skip registers that2457// have compile-time constant values.2458bool IsConst = true;2459for (unsigned I = 0; I != W; ++I) {2460const BitTracker::BitValue &V = RC[I];2461if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg)2462return false;2463IsConst = IsConst && (V.is(0) || V.is(1));2464}2465if (IsConst)2466return false;24672468if (TopV.is(0) || TopV.is(1)) {2469bool S = TopV.is(1);2470for (--W; W > 0 && RC[W-1].is(S); --W)2471;2472Len = W;2473Signed = S;2474// The sign bit must be a part of the field being extended.2475if (Signed)2476++Len;2477} else {2478// This could still be a sign-extended extract.2479assert(TopV.Type == BitTracker::BitValue::Ref);2480if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1)2481return false;2482for (--W; W > 0 && RC[W-1] == TopV; --W)2483;2484// The top bits of RC are copies of TopV. One occurrence of TopV will2485// be a part of the field.2486Len = W + 1;2487Signed = true;2488}24892490// This would be just a copy. It should be handled elsewhere.2491if (Len == RW)2492return false;24932494LLVM_DEBUG({2495dbgs() << __func__ << " on reg: " << printReg(RD.Reg, &HRI, RD.Sub)2496<< ", MI: " << *MI;2497dbgs() << "Cell: " << RC << '\n';2498dbgs() << "Expected bitfield size: " << Len << " bits, "2499<< (Signed ? "sign" : "zero") << "-extended\n";2500});25012502bool Changed = false;25032504for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) {2505if (!BT.has(R))2506continue;2507const BitTracker::RegisterCell &SC = BT.lookup(R);2508unsigned SW = SC.width();25092510// The source can be longer than the destination, as long as its size is2511// a multiple of the size of the destination. Also, we would need to be2512// able to refer to the subregister in the source that would be of the2513// same size as the destination, but only check the sizes here.2514if (SW < RW || (SW % RW) != 0)2515continue;25162517// The field can start at any offset in SC as long as it contains Len2518// bits and does not cross subregister boundary (if the source register2519// is longer than the destination).2520unsigned Off = 0;2521while (Off <= SW-Len) {2522unsigned OE = (Off+Len)/RW;2523if (OE != Off/RW) {2524// The assumption here is that if the source (R) is longer than the2525// destination, then the destination is a sequence of words of2526// size RW, and each such word in R can be accessed via a subregister.2527//2528// If the beginning and the end of the field cross the subregister2529// boundary, advance to the next subregister.2530Off = OE*RW;2531continue;2532}2533if (HBS::isEqual(RC, 0, SC, Off, Len))2534break;2535++Off;2536}25372538if (Off > SW-Len)2539continue;25402541// Found match.2542unsigned ExtOpc = 0;2543if (Off == 0) {2544if (Len == 8)2545ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;2546else if (Len == 16)2547ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;2548else if (Len < 10 && !Signed)2549ExtOpc = Hexagon::A2_andir;2550}2551if (ExtOpc == 0) {2552ExtOpc =2553Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp)2554: (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);2555}2556unsigned SR = 0;2557// This only recognizes isub_lo and isub_hi.2558if (RW != SW && RW*2 != SW)2559continue;2560if (RW != SW)2561SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;2562Off = Off % RW;25632564if (!validateReg({R,SR}, ExtOpc, 1))2565continue;25662567// Don't generate the same instruction as the one being optimized.2568if (MI->getOpcode() == ExtOpc) {2569// All possible ExtOpc's have the source in operand(1).2570const MachineOperand &SrcOp = MI->getOperand(1);2571if (SrcOp.getReg() == R)2572continue;2573}25742575DebugLoc DL = MI->getDebugLoc();2576MachineBasicBlock &B = *MI->getParent();2577Register NewR = MRI.createVirtualRegister(FRC);2578auto At = MI->isPHI() ? B.getFirstNonPHI()2579: MachineBasicBlock::iterator(MI);2580auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR)2581.addReg(R, 0, SR);2582switch (ExtOpc) {2583case Hexagon::A2_sxtb:2584case Hexagon::A2_zxtb:2585case Hexagon::A2_sxth:2586case Hexagon::A2_zxth:2587break;2588case Hexagon::A2_andir:2589MIB.addImm((1u << Len) - 1);2590break;2591case Hexagon::S4_extract:2592case Hexagon::S2_extractu:2593case Hexagon::S4_extractp:2594case Hexagon::S2_extractup:2595MIB.addImm(Len)2596.addImm(Off);2597break;2598default:2599llvm_unreachable("Unexpected opcode");2600}26012602HBS::replaceReg(RD.Reg, NewR, MRI);2603BT.put(BitTracker::RegisterRef(NewR), RC);2604Changed = true;2605break;2606}26072608return Changed;2609}26102611bool BitSimplification::simplifyRCmp0(MachineInstr *MI,2612BitTracker::RegisterRef RD) {2613unsigned Opc = MI->getOpcode();2614if (Opc != Hexagon::A4_rcmpeqi && Opc != Hexagon::A4_rcmpneqi)2615return false;2616MachineOperand &CmpOp = MI->getOperand(2);2617if (!CmpOp.isImm() || CmpOp.getImm() != 0)2618return false;26192620const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);2621if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)2622return false;2623assert(RD.Sub == 0);26242625MachineBasicBlock &B = *MI->getParent();2626const DebugLoc &DL = MI->getDebugLoc();2627auto At = MI->isPHI() ? B.getFirstNonPHI()2628: MachineBasicBlock::iterator(MI);2629bool KnownZ = true;2630bool KnownNZ = false;26312632BitTracker::RegisterRef SR = MI->getOperand(1);2633if (!BT.has(SR.Reg))2634return false;2635const BitTracker::RegisterCell &SC = BT.lookup(SR.Reg);2636unsigned F, W;2637if (!HBS::getSubregMask(SR, F, W, MRI))2638return false;26392640for (uint16_t I = F; I != F+W; ++I) {2641const BitTracker::BitValue &V = SC[I];2642if (!V.is(0))2643KnownZ = false;2644if (V.is(1))2645KnownNZ = true;2646}26472648auto ReplaceWithConst = [&](int C) {2649Register NewR = MRI.createVirtualRegister(FRC);2650BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), NewR)2651.addImm(C);2652HBS::replaceReg(RD.Reg, NewR, MRI);2653BitTracker::RegisterCell NewRC(W);2654for (uint16_t I = 0; I != W; ++I) {2655NewRC[I] = BitTracker::BitValue(C & 1);2656C = unsigned(C) >> 1;2657}2658BT.put(BitTracker::RegisterRef(NewR), NewRC);2659return true;2660};26612662auto IsNonZero = [] (const MachineOperand &Op) {2663if (Op.isGlobal() || Op.isBlockAddress())2664return true;2665if (Op.isImm())2666return Op.getImm() != 0;2667if (Op.isCImm())2668return !Op.getCImm()->isZero();2669if (Op.isFPImm())2670return !Op.getFPImm()->isZero();2671return false;2672};26732674auto IsZero = [] (const MachineOperand &Op) {2675if (Op.isGlobal() || Op.isBlockAddress())2676return false;2677if (Op.isImm())2678return Op.getImm() == 0;2679if (Op.isCImm())2680return Op.getCImm()->isZero();2681if (Op.isFPImm())2682return Op.getFPImm()->isZero();2683return false;2684};26852686// If the source register is known to be 0 or non-0, the comparison can2687// be folded to a load of a constant.2688if (KnownZ || KnownNZ) {2689assert(KnownZ != KnownNZ && "Register cannot be both 0 and non-0");2690return ReplaceWithConst(KnownZ == (Opc == Hexagon::A4_rcmpeqi));2691}26922693// Special case: if the compare comes from a C2_muxii, then we know the2694// two possible constants that can be the source value.2695MachineInstr *InpDef = MRI.getVRegDef(SR.Reg);2696if (!InpDef)2697return false;2698if (SR.Sub == 0 && InpDef->getOpcode() == Hexagon::C2_muxii) {2699MachineOperand &Src1 = InpDef->getOperand(2);2700MachineOperand &Src2 = InpDef->getOperand(3);2701// Check if both are non-zero.2702bool KnownNZ1 = IsNonZero(Src1), KnownNZ2 = IsNonZero(Src2);2703if (KnownNZ1 && KnownNZ2)2704return ReplaceWithConst(Opc == Hexagon::A4_rcmpneqi);2705// Check if both are zero.2706bool KnownZ1 = IsZero(Src1), KnownZ2 = IsZero(Src2);2707if (KnownZ1 && KnownZ2)2708return ReplaceWithConst(Opc == Hexagon::A4_rcmpeqi);27092710// If for both operands we know that they are either 0 or non-0,2711// replace the comparison with a C2_muxii, using the same predicate2712// register, but with operands substituted with 0/1 accordingly.2713if ((KnownZ1 || KnownNZ1) && (KnownZ2 || KnownNZ2)) {2714Register NewR = MRI.createVirtualRegister(FRC);2715BuildMI(B, At, DL, HII.get(Hexagon::C2_muxii), NewR)2716.addReg(InpDef->getOperand(1).getReg())2717.addImm(KnownZ1 == (Opc == Hexagon::A4_rcmpeqi))2718.addImm(KnownZ2 == (Opc == Hexagon::A4_rcmpeqi));2719HBS::replaceReg(RD.Reg, NewR, MRI);2720// Create a new cell with only the least significant bit unknown.2721BitTracker::RegisterCell NewRC(W);2722NewRC[0] = BitTracker::BitValue::self();2723NewRC.fill(1, W, BitTracker::BitValue::Zero);2724BT.put(BitTracker::RegisterRef(NewR), NewRC);2725return true;2726}2727}27282729return false;2730}27312732bool BitSimplification::processBlock(MachineBasicBlock &B,2733const RegisterSet &AVs) {2734if (!BT.reached(&B))2735return false;2736bool Changed = false;2737RegisterSet AVB = AVs;2738RegisterSet Defs;27392740for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {2741MachineInstr *MI = &*I;2742Defs.clear();2743HBS::getInstrDefs(*MI, Defs);27442745unsigned Opc = MI->getOpcode();2746if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)2747continue;27482749if (MI->mayStore()) {2750bool T = genStoreUpperHalf(MI);2751T = T || genStoreImmediate(MI);2752Changed |= T;2753continue;2754}27552756if (Defs.count() != 1)2757continue;2758const MachineOperand &Op0 = MI->getOperand(0);2759if (!Op0.isReg() || !Op0.isDef())2760continue;2761BitTracker::RegisterRef RD = Op0;2762if (!BT.has(RD.Reg))2763continue;2764const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);2765const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);27662767if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {2768bool T = genPackhl(MI, RD, RC);2769T = T || simplifyExtractLow(MI, RD, RC, AVB);2770Changed |= T;2771continue;2772}27732774if (FRC->getID() == Hexagon::IntRegsRegClassID) {2775bool T = genBitSplit(MI, RD, RC, AVB);2776T = T || simplifyExtractLow(MI, RD, RC, AVB);2777T = T || genExtractHalf(MI, RD, RC);2778T = T || genCombineHalf(MI, RD, RC);2779T = T || genExtractLow(MI, RD, RC);2780T = T || simplifyRCmp0(MI, RD);2781Changed |= T;2782continue;2783}27842785if (FRC->getID() == Hexagon::PredRegsRegClassID) {2786bool T = simplifyTstbit(MI, RD, RC);2787Changed |= T;2788continue;2789}2790}2791return Changed;2792}27932794bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {2795if (skipFunction(MF.getFunction()))2796return false;27972798auto &HST = MF.getSubtarget<HexagonSubtarget>();2799auto &HRI = *HST.getRegisterInfo();2800auto &HII = *HST.getInstrInfo();28012802MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();2803MachineRegisterInfo &MRI = MF.getRegInfo();2804bool Changed;28052806Changed = DeadCodeElimination(MF, *MDT).run();28072808const HexagonEvaluator HE(HRI, MRI, HII, MF);2809BitTracker BT(HE, MF);2810LLVM_DEBUG(BT.trace(true));2811BT.run();28122813MachineBasicBlock &Entry = MF.front();28142815RegisterSet AIG; // Available registers for IG.2816ConstGeneration ImmG(BT, HII, MRI);2817Changed |= visitBlock(Entry, ImmG, AIG);28182819RegisterSet ARE; // Available registers for RIE.2820RedundantInstrElimination RIE(BT, HII, HRI, MRI);2821bool Ried = visitBlock(Entry, RIE, ARE);2822if (Ried) {2823Changed = true;2824BT.run();2825}28262827RegisterSet ACG; // Available registers for CG.2828CopyGeneration CopyG(BT, HII, HRI, MRI);2829Changed |= visitBlock(Entry, CopyG, ACG);28302831RegisterSet ACP; // Available registers for CP.2832CopyPropagation CopyP(HRI, MRI);2833Changed |= visitBlock(Entry, CopyP, ACP);28342835Changed = DeadCodeElimination(MF, *MDT).run() || Changed;28362837BT.run();2838RegisterSet ABS; // Available registers for BS.2839BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF);2840Changed |= visitBlock(Entry, BitS, ABS);28412842Changed = DeadCodeElimination(MF, *MDT).run() || Changed;28432844if (Changed) {2845for (auto &B : MF)2846for (auto &I : B)2847I.clearKillInfo();2848DeadCodeElimination(MF, *MDT).run();2849}2850return Changed;2851}28522853// Recognize loops where the code at the end of the loop matches the code2854// before the entry of the loop, and the matching code is such that is can2855// be simplified. This pass relies on the bit simplification above and only2856// prepares code in a way that can be handled by the bit simplifcation.2857//2858// This is the motivating testcase (and explanation):2859//2860// {2861// loop0(.LBB0_2, r1) // %for.body.preheader2862// r5:4 = memd(r0++#8)2863// }2864// {2865// r3 = lsr(r4, #16)2866// r7:6 = combine(r5, r5)2867// }2868// {2869// r3 = insert(r5, #16, #16)2870// r7:6 = vlsrw(r7:6, #16)2871// }2872// .LBB0_2:2873// {2874// memh(r2+#4) = r52875// memh(r2+#6) = r6 # R6 is really R5.H2876// }2877// {2878// r2 = add(r2, #8)2879// memh(r2+#0) = r42880// memh(r2+#2) = r3 # R3 is really R4.H2881// }2882// {2883// r5:4 = memd(r0++#8)2884// }2885// { # "Shuffling" code that sets up R3 and R62886// r3 = lsr(r4, #16) # so that their halves can be stored in the2887// r7:6 = combine(r5, r5) # next iteration. This could be folded into2888// } # the stores if the code was at the beginning2889// { # of the loop iteration. Since the same code2890// r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved2891// r7:6 = vlsrw(r7:6, #16) # there.2892// }:endloop02893//2894//2895// The outcome:2896//2897// {2898// loop0(.LBB0_2, r1)2899// r5:4 = memd(r0++#8)2900// }2901// .LBB0_2:2902// {2903// memh(r2+#4) = r52904// memh(r2+#6) = r5.h2905// }2906// {2907// r2 = add(r2, #8)2908// memh(r2+#0) = r42909// memh(r2+#2) = r4.h2910// }2911// {2912// r5:4 = memd(r0++#8)2913// }:endloop029142915namespace llvm {29162917FunctionPass *createHexagonLoopRescheduling();2918void initializeHexagonLoopReschedulingPass(PassRegistry&);29192920} // end namespace llvm29212922namespace {29232924class HexagonLoopRescheduling : public MachineFunctionPass {2925public:2926static char ID;29272928HexagonLoopRescheduling() : MachineFunctionPass(ID) {2929initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());2930}29312932bool runOnMachineFunction(MachineFunction &MF) override;29332934private:2935const HexagonInstrInfo *HII = nullptr;2936const HexagonRegisterInfo *HRI = nullptr;2937MachineRegisterInfo *MRI = nullptr;2938BitTracker *BTP = nullptr;29392940struct LoopCand {2941LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,2942MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}29432944MachineBasicBlock *LB, *PB, *EB;2945};2946using InstrList = std::vector<MachineInstr *>;2947struct InstrGroup {2948BitTracker::RegisterRef Inp, Out;2949InstrList Ins;2950};2951struct PhiInfo {2952PhiInfo(MachineInstr &P, MachineBasicBlock &B);29532954unsigned DefR;2955BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register2956MachineBasicBlock *LB, *PB; // Loop Block, Preheader Block2957};29582959static unsigned getDefReg(const MachineInstr *MI);2960bool isConst(unsigned Reg) const;2961bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;2962bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;2963bool isShuffleOf(unsigned OutR, unsigned InpR) const;2964bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,2965unsigned &InpR2) const;2966void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,2967MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);2968bool processLoop(LoopCand &C);2969};29702971} // end anonymous namespace29722973char HexagonLoopRescheduling::ID = 0;29742975INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",2976"Hexagon Loop Rescheduling", false, false)29772978HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,2979MachineBasicBlock &B) {2980DefR = HexagonLoopRescheduling::getDefReg(&P);2981LB = &B;2982PB = nullptr;2983for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {2984const MachineOperand &OpB = P.getOperand(i+1);2985if (OpB.getMBB() == &B) {2986LR = P.getOperand(i);2987continue;2988}2989PB = OpB.getMBB();2990PR = P.getOperand(i);2991}2992}29932994unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {2995RegisterSet Defs;2996HBS::getInstrDefs(*MI, Defs);2997if (Defs.count() != 1)2998return 0;2999return Defs.find_first();3000}30013002bool HexagonLoopRescheduling::isConst(unsigned Reg) const {3003if (!BTP->has(Reg))3004return false;3005const BitTracker::RegisterCell &RC = BTP->lookup(Reg);3006for (unsigned i = 0, w = RC.width(); i < w; ++i) {3007const BitTracker::BitValue &V = RC[i];3008if (!V.is(0) && !V.is(1))3009return false;3010}3011return true;3012}30133014bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,3015unsigned DefR) const {3016unsigned Opc = MI->getOpcode();3017switch (Opc) {3018case TargetOpcode::COPY:3019case Hexagon::S2_lsr_i_r:3020case Hexagon::S2_asr_i_r:3021case Hexagon::S2_asl_i_r:3022case Hexagon::S2_lsr_i_p:3023case Hexagon::S2_asr_i_p:3024case Hexagon::S2_asl_i_p:3025case Hexagon::S2_insert:3026case Hexagon::A2_or:3027case Hexagon::A2_orp:3028case Hexagon::A2_and:3029case Hexagon::A2_andp:3030case Hexagon::A2_combinew:3031case Hexagon::A4_combineri:3032case Hexagon::A4_combineir:3033case Hexagon::A2_combineii:3034case Hexagon::A4_combineii:3035case Hexagon::A2_combine_ll:3036case Hexagon::A2_combine_lh:3037case Hexagon::A2_combine_hl:3038case Hexagon::A2_combine_hh:3039return true;3040}3041return false;3042}30433044bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,3045unsigned InpR) const {3046for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {3047const MachineOperand &Op = MI->getOperand(i);3048if (!Op.isReg())3049continue;3050if (Op.getReg() == InpR)3051return i == n-1;3052}3053return false;3054}30553056bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {3057if (!BTP->has(OutR) || !BTP->has(InpR))3058return false;3059const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);3060for (unsigned i = 0, w = OutC.width(); i < w; ++i) {3061const BitTracker::BitValue &V = OutC[i];3062if (V.Type != BitTracker::BitValue::Ref)3063continue;3064if (V.RefI.Reg != InpR)3065return false;3066}3067return true;3068}30693070bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,3071unsigned OutR2, unsigned &InpR2) const {3072if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))3073return false;3074const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);3075const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);3076unsigned W = OutC1.width();3077unsigned MatchR = 0;3078if (W != OutC2.width())3079return false;3080for (unsigned i = 0; i < W; ++i) {3081const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];3082if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)3083return false;3084if (V1.Type != BitTracker::BitValue::Ref)3085continue;3086if (V1.RefI.Pos != V2.RefI.Pos)3087return false;3088if (V1.RefI.Reg != InpR1)3089return false;3090if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)3091return false;3092if (!MatchR)3093MatchR = V2.RefI.Reg;3094else if (V2.RefI.Reg != MatchR)3095return false;3096}3097InpR2 = MatchR;3098return true;3099}31003101void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,3102MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,3103unsigned NewPredR) {3104DenseMap<unsigned,unsigned> RegMap;31053106const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);3107Register PhiR = MRI->createVirtualRegister(PhiRC);3108BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)3109.addReg(NewPredR)3110.addMBB(&PB)3111.addReg(G.Inp.Reg)3112.addMBB(&LB);3113RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));31143115for (const MachineInstr *SI : llvm::reverse(G.Ins)) {3116unsigned DR = getDefReg(SI);3117const TargetRegisterClass *RC = MRI->getRegClass(DR);3118Register NewDR = MRI->createVirtualRegister(RC);3119DebugLoc DL = SI->getDebugLoc();31203121auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);3122for (const MachineOperand &Op : SI->operands()) {3123if (!Op.isReg()) {3124MIB.add(Op);3125continue;3126}3127if (!Op.isUse())3128continue;3129unsigned UseR = RegMap[Op.getReg()];3130MIB.addReg(UseR, 0, Op.getSubReg());3131}3132RegMap.insert(std::make_pair(DR, NewDR));3133}31343135HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);3136}31373138bool HexagonLoopRescheduling::processLoop(LoopCand &C) {3139LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB)3140<< "\n");3141std::vector<PhiInfo> Phis;3142for (auto &I : *C.LB) {3143if (!I.isPHI())3144break;3145unsigned PR = getDefReg(&I);3146if (isConst(PR))3147continue;3148bool BadUse = false, GoodUse = false;3149for (const MachineOperand &MO : MRI->use_operands(PR)) {3150const MachineInstr *UseI = MO.getParent();3151if (UseI->getParent() != C.LB) {3152BadUse = true;3153break;3154}3155if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))3156GoodUse = true;3157}3158if (BadUse || !GoodUse)3159continue;31603161Phis.push_back(PhiInfo(I, *C.LB));3162}31633164LLVM_DEBUG({3165dbgs() << "Phis: {";3166for (auto &I : Phis) {3167dbgs() << ' ' << printReg(I.DefR, HRI) << "=phi("3168<< printReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()3169<< ',' << printReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"3170<< I.LB->getNumber() << ')';3171}3172dbgs() << " }\n";3173});31743175if (Phis.empty())3176return false;31773178bool Changed = false;3179InstrList ShufIns;31803181// Go backwards in the block: for each bit shuffling instruction, check3182// if that instruction could potentially be moved to the front of the loop:3183// the output of the loop cannot be used in a non-shuffling instruction3184// in this loop.3185for (MachineInstr &MI : llvm::reverse(*C.LB)) {3186if (MI.isTerminator())3187continue;3188if (MI.isPHI())3189break;31903191RegisterSet Defs;3192HBS::getInstrDefs(MI, Defs);3193if (Defs.count() != 1)3194continue;3195Register DefR = Defs.find_first();3196if (!DefR.isVirtual())3197continue;3198if (!isBitShuffle(&MI, DefR))3199continue;32003201bool BadUse = false;3202for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {3203MachineInstr *UseI = UI->getParent();3204if (UseI->getParent() == C.LB) {3205if (UseI->isPHI()) {3206// If the use is in a phi node in this loop, then it should be3207// the value corresponding to the back edge.3208unsigned Idx = UI.getOperandNo();3209if (UseI->getOperand(Idx+1).getMBB() != C.LB)3210BadUse = true;3211} else {3212if (!llvm::is_contained(ShufIns, UseI))3213BadUse = true;3214}3215} else {3216// There is a use outside of the loop, but there is no epilog block3217// suitable for a copy-out.3218if (C.EB == nullptr)3219BadUse = true;3220}3221if (BadUse)3222break;3223}32243225if (BadUse)3226continue;3227ShufIns.push_back(&MI);3228}32293230// Partition the list of shuffling instructions into instruction groups,3231// where each group has to be moved as a whole (i.e. a group is a chain of3232// dependent instructions). A group produces a single live output register,3233// which is meant to be the input of the loop phi node (although this is3234// not checked here yet). It also uses a single register as its input,3235// which is some value produced in the loop body. After moving the group3236// to the beginning of the loop, that input register would need to be3237// the loop-carried register (through a phi node) instead of the (currently3238// loop-carried) output register.3239using InstrGroupList = std::vector<InstrGroup>;3240InstrGroupList Groups;32413242for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {3243MachineInstr *SI = ShufIns[i];3244if (SI == nullptr)3245continue;32463247InstrGroup G;3248G.Ins.push_back(SI);3249G.Out.Reg = getDefReg(SI);3250RegisterSet Inputs;3251HBS::getInstrUses(*SI, Inputs);32523253for (unsigned j = i+1; j < n; ++j) {3254MachineInstr *MI = ShufIns[j];3255if (MI == nullptr)3256continue;3257RegisterSet Defs;3258HBS::getInstrDefs(*MI, Defs);3259// If this instruction does not define any pending inputs, skip it.3260if (!Defs.intersects(Inputs))3261continue;3262// Otherwise, add it to the current group and remove the inputs that3263// are defined by MI.3264G.Ins.push_back(MI);3265Inputs.remove(Defs);3266// Then add all registers used by MI.3267HBS::getInstrUses(*MI, Inputs);3268ShufIns[j] = nullptr;3269}32703271// Only add a group if it requires at most one register.3272if (Inputs.count() > 1)3273continue;3274auto LoopInpEq = [G] (const PhiInfo &P) -> bool {3275return G.Out.Reg == P.LR.Reg;3276};3277if (llvm::none_of(Phis, LoopInpEq))3278continue;32793280G.Inp.Reg = Inputs.find_first();3281Groups.push_back(G);3282}32833284LLVM_DEBUG({3285for (unsigned i = 0, n = Groups.size(); i < n; ++i) {3286InstrGroup &G = Groups[i];3287dbgs() << "Group[" << i << "] inp: "3288<< printReg(G.Inp.Reg, HRI, G.Inp.Sub)3289<< " out: " << printReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";3290for (const MachineInstr *MI : G.Ins)3291dbgs() << " " << MI;3292}3293});32943295for (InstrGroup &G : Groups) {3296if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))3297continue;3298auto LoopInpEq = [G] (const PhiInfo &P) -> bool {3299return G.Out.Reg == P.LR.Reg;3300};3301auto F = llvm::find_if(Phis, LoopInpEq);3302if (F == Phis.end())3303continue;3304unsigned PrehR = 0;3305if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) {3306const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg);3307unsigned Opc = DefPrehR->getOpcode();3308if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)3309continue;3310if (!DefPrehR->getOperand(1).isImm())3311continue;3312if (DefPrehR->getOperand(1).getImm() != 0)3313continue;3314const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);3315if (RC != MRI->getRegClass(F->PR.Reg)) {3316PrehR = MRI->createVirtualRegister(RC);3317unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi3318: Hexagon::A2_tfrpi;3319auto T = C.PB->getFirstTerminator();3320DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();3321BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR)3322.addImm(0);3323} else {3324PrehR = F->PR.Reg;3325}3326}3327// isSameShuffle could match with PrehR being of a wider class than3328// G.Inp.Reg, for example if G shuffles the low 32 bits of its input,3329// it would match for the input being a 32-bit register, and PrehR3330// being a 64-bit register (where the low 32 bits match). This could3331// be handled, but for now skip these cases.3332if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg))3333continue;3334moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR);3335Changed = true;3336}33373338return Changed;3339}33403341bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {3342if (skipFunction(MF.getFunction()))3343return false;33443345auto &HST = MF.getSubtarget<HexagonSubtarget>();3346HII = HST.getInstrInfo();3347HRI = HST.getRegisterInfo();3348MRI = &MF.getRegInfo();3349const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);3350BitTracker BT(HE, MF);3351LLVM_DEBUG(BT.trace(true));3352BT.run();3353BTP = &BT;33543355std::vector<LoopCand> Cand;33563357for (auto &B : MF) {3358if (B.pred_size() != 2 || B.succ_size() != 2)3359continue;3360MachineBasicBlock *PB = nullptr;3361bool IsLoop = false;3362for (MachineBasicBlock *Pred : B.predecessors()) {3363if (Pred != &B)3364PB = Pred;3365else3366IsLoop = true;3367}3368if (!IsLoop)3369continue;33703371MachineBasicBlock *EB = nullptr;3372for (MachineBasicBlock *Succ : B.successors()) {3373if (Succ == &B)3374continue;3375// Set EP to the epilog block, if it has only 1 predecessor (i.e. the3376// edge from B to EP is non-critical.3377if (Succ->pred_size() == 1)3378EB = Succ;3379break;3380}33813382Cand.push_back(LoopCand(&B, PB, EB));3383}33843385bool Changed = false;3386for (auto &C : Cand)3387Changed |= processLoop(C);33883389return Changed;3390}33913392//===----------------------------------------------------------------------===//3393// Public Constructor Functions3394//===----------------------------------------------------------------------===//33953396FunctionPass *llvm::createHexagonLoopRescheduling() {3397return new HexagonLoopRescheduling();3398}33993400FunctionPass *llvm::createHexagonBitSimplify() {3401return new HexagonBitSimplify();3402}340334043405