Path: blob/main/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
35268 views
//===- HexagonConstExtenders.cpp ------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "HexagonInstrInfo.h"9#include "HexagonRegisterInfo.h"10#include "HexagonSubtarget.h"11#include "llvm/ADT/SetVector.h"12#include "llvm/ADT/SmallVector.h"13#include "llvm/CodeGen/MachineDominators.h"14#include "llvm/CodeGen/MachineFunctionPass.h"15#include "llvm/CodeGen/MachineInstrBuilder.h"16#include "llvm/CodeGen/MachineRegisterInfo.h"17#include "llvm/CodeGen/Register.h"18#include "llvm/InitializePasses.h"19#include "llvm/Pass.h"20#include "llvm/Support/CommandLine.h"21#include "llvm/Support/raw_ostream.h"22#include <map>23#include <set>24#include <utility>25#include <vector>2627#define DEBUG_TYPE "hexagon-cext-opt"2829using namespace llvm;3031static cl::opt<unsigned> CountThreshold(32"hexagon-cext-threshold", cl::init(3), cl::Hidden,33cl::desc("Minimum number of extenders to trigger replacement"));3435static cl::opt<unsigned>36ReplaceLimit("hexagon-cext-limit", cl::init(0), cl::Hidden,37cl::desc("Maximum number of replacements"));3839namespace llvm {40void initializeHexagonConstExtendersPass(PassRegistry&);41FunctionPass *createHexagonConstExtenders();42}4344static int32_t adjustUp(int32_t V, uint8_t A, uint8_t O) {45assert(isPowerOf2_32(A));46int32_t U = (V & -A) + O;47return U >= V ? U : U+A;48}4950static int32_t adjustDown(int32_t V, uint8_t A, uint8_t O) {51assert(isPowerOf2_32(A));52int32_t U = (V & -A) + O;53return U <= V ? U : U-A;54}5556namespace {57struct OffsetRange {58// The range of values between Min and Max that are of form Align*N+Offset,59// for some integer N. Min and Max are required to be of that form as well,60// except in the case of an empty range.61int32_t Min = INT_MIN, Max = INT_MAX;62uint8_t Align = 1;63uint8_t Offset = 0;6465OffsetRange() = default;66OffsetRange(int32_t L, int32_t H, uint8_t A, uint8_t O = 0)67: Min(L), Max(H), Align(A), Offset(O) {}68OffsetRange &intersect(OffsetRange A) {69if (Align < A.Align)70std::swap(*this, A);7172// Align >= A.Align.73if (Offset >= A.Offset && (Offset - A.Offset) % A.Align == 0) {74Min = adjustUp(std::max(Min, A.Min), Align, Offset);75Max = adjustDown(std::min(Max, A.Max), Align, Offset);76} else {77// Make an empty range.78Min = 0;79Max = -1;80}81// Canonicalize empty ranges.82if (Min > Max)83std::tie(Min, Max, Align) = std::make_tuple(0, -1, 1);84return *this;85}86OffsetRange &shift(int32_t S) {87Min += S;88Max += S;89Offset = (Offset+S) % Align;90return *this;91}92OffsetRange &extendBy(int32_t D) {93// If D < 0, extend Min, otherwise extend Max.94assert(D % Align == 0);95if (D < 0)96Min = (INT_MIN-D < Min) ? Min+D : INT_MIN;97else98Max = (INT_MAX-D > Max) ? Max+D : INT_MAX;99return *this;100}101bool empty() const {102return Min > Max;103}104bool contains(int32_t V) const {105return Min <= V && V <= Max && (V-Offset) % Align == 0;106}107bool operator==(const OffsetRange &R) const {108return Min == R.Min && Max == R.Max && Align == R.Align;109}110bool operator!=(const OffsetRange &R) const {111return !operator==(R);112}113bool operator<(const OffsetRange &R) const {114if (Min != R.Min)115return Min < R.Min;116if (Max != R.Max)117return Max < R.Max;118return Align < R.Align;119}120static OffsetRange zero() { return {0, 0, 1}; }121};122123struct RangeTree {124struct Node {125Node(const OffsetRange &R) : MaxEnd(R.Max), Range(R) {}126unsigned Height = 1;127unsigned Count = 1;128int32_t MaxEnd;129const OffsetRange &Range;130Node *Left = nullptr, *Right = nullptr;131};132133Node *Root = nullptr;134135void add(const OffsetRange &R) {136Root = add(Root, R);137}138void erase(const Node *N) {139Root = remove(Root, N);140delete N;141}142void order(SmallVectorImpl<Node*> &Seq) const {143order(Root, Seq);144}145SmallVector<Node*,8> nodesWith(int32_t P, bool CheckAlign = true) {146SmallVector<Node*,8> Nodes;147nodesWith(Root, P, CheckAlign, Nodes);148return Nodes;149}150void dump() const;151~RangeTree() {152SmallVector<Node*,8> Nodes;153order(Nodes);154for (Node *N : Nodes)155delete N;156}157158private:159void dump(const Node *N) const;160void order(Node *N, SmallVectorImpl<Node*> &Seq) const;161void nodesWith(Node *N, int32_t P, bool CheckA,162SmallVectorImpl<Node*> &Seq) const;163164Node *add(Node *N, const OffsetRange &R);165Node *remove(Node *N, const Node *D);166Node *rotateLeft(Node *Lower, Node *Higher);167Node *rotateRight(Node *Lower, Node *Higher);168unsigned height(Node *N) {169return N != nullptr ? N->Height : 0;170}171Node *update(Node *N) {172assert(N != nullptr);173N->Height = 1 + std::max(height(N->Left), height(N->Right));174if (N->Left)175N->MaxEnd = std::max(N->MaxEnd, N->Left->MaxEnd);176if (N->Right)177N->MaxEnd = std::max(N->MaxEnd, N->Right->MaxEnd);178return N;179}180Node *rebalance(Node *N) {181assert(N != nullptr);182int32_t Balance = height(N->Right) - height(N->Left);183if (Balance < -1)184return rotateRight(N->Left, N);185if (Balance > 1)186return rotateLeft(N->Right, N);187return N;188}189};190191struct Loc {192MachineBasicBlock *Block = nullptr;193MachineBasicBlock::iterator At;194195Loc(MachineBasicBlock *B, MachineBasicBlock::iterator It)196: Block(B), At(It) {197if (B->end() == It) {198Pos = -1;199} else {200assert(It->getParent() == B);201Pos = std::distance(B->begin(), It);202}203}204bool operator<(Loc A) const {205if (Block != A.Block)206return Block->getNumber() < A.Block->getNumber();207if (A.Pos == -1)208return Pos != A.Pos;209return Pos != -1 && Pos < A.Pos;210}211private:212int Pos = 0;213};214215struct HexagonConstExtenders : public MachineFunctionPass {216static char ID;217HexagonConstExtenders() : MachineFunctionPass(ID) {}218219void getAnalysisUsage(AnalysisUsage &AU) const override {220AU.addRequired<MachineDominatorTreeWrapperPass>();221AU.addPreserved<MachineDominatorTreeWrapperPass>();222MachineFunctionPass::getAnalysisUsage(AU);223}224225StringRef getPassName() const override {226return "Hexagon constant-extender optimization";227}228bool runOnMachineFunction(MachineFunction &MF) override;229230private:231struct Register {232Register() = default;233Register(llvm::Register R, unsigned S) : Reg(R), Sub(S) {}234Register(const MachineOperand &Op)235: Reg(Op.getReg()), Sub(Op.getSubReg()) {}236Register &operator=(const MachineOperand &Op) {237if (Op.isReg()) {238Reg = Op.getReg();239Sub = Op.getSubReg();240} else if (Op.isFI()) {241Reg = llvm::Register::index2StackSlot(Op.getIndex());242}243return *this;244}245bool isVReg() const {246return Reg != 0 && !Reg.isStack() && Reg.isVirtual();247}248bool isSlot() const { return Reg != 0 && Reg.isStack(); }249operator MachineOperand() const {250if (isVReg())251return MachineOperand::CreateReg(Reg, /*Def*/false, /*Imp*/false,252/*Kill*/false, /*Dead*/false, /*Undef*/false,253/*EarlyClobber*/false, Sub);254if (Reg.isStack()) {255int FI = llvm::Register::stackSlot2Index(Reg);256return MachineOperand::CreateFI(FI);257}258llvm_unreachable("Cannot create MachineOperand");259}260bool operator==(Register R) const { return Reg == R.Reg && Sub == R.Sub; }261bool operator!=(Register R) const { return !operator==(R); }262bool operator<(Register R) const {263// For std::map.264return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub);265}266llvm::Register Reg;267unsigned Sub = 0;268};269270struct ExtExpr {271// A subexpression in which the extender is used. In general, this272// represents an expression where adding D to the extender will be273// equivalent to adding D to the expression as a whole. In other274// words, expr(add(##V,D) = add(expr(##V),D).275276// The original motivation for this are the io/ur addressing modes,277// where the offset is extended. Consider the io example:278// In memw(Rs+##V), the ##V could be replaced by a register Rt to279// form the rr mode: memw(Rt+Rs<<0). In such case, however, the280// register Rt must have exactly the value of ##V. If there was281// another instruction memw(Rs+##V+4), it would need a different Rt.282// Now, if Rt was initialized as "##V+Rs<<0", both of these283// instructions could use the same Rt, just with different offsets.284// Here it's clear that "initializer+4" should be the same as if285// the offset 4 was added to the ##V in the initializer.286287// The only kinds of expressions that support the requirement of288// commuting with addition are addition and subtraction from ##V.289// Include shifting the Rs to account for the ur addressing mode:290// ##Val + Rs << S291// ##Val - Rs292Register Rs;293unsigned S = 0;294bool Neg = false;295296ExtExpr() = default;297ExtExpr(Register RS, bool NG, unsigned SH) : Rs(RS), S(SH), Neg(NG) {}298// Expression is trivial if it does not modify the extender.299bool trivial() const {300return Rs.Reg == 0;301}302bool operator==(const ExtExpr &Ex) const {303return Rs == Ex.Rs && S == Ex.S && Neg == Ex.Neg;304}305bool operator!=(const ExtExpr &Ex) const {306return !operator==(Ex);307}308bool operator<(const ExtExpr &Ex) const {309if (Rs != Ex.Rs)310return Rs < Ex.Rs;311if (S != Ex.S)312return S < Ex.S;313return !Neg && Ex.Neg;314}315};316317struct ExtDesc {318MachineInstr *UseMI = nullptr;319unsigned OpNum = -1u;320// The subexpression in which the extender is used (e.g. address321// computation).322ExtExpr Expr;323// Optional register that is assigned the value of Expr.324Register Rd;325// Def means that the output of the instruction may differ from the326// original by a constant c, and that the difference can be corrected327// by adding/subtracting c in all users of the defined register.328bool IsDef = false;329330MachineOperand &getOp() {331return UseMI->getOperand(OpNum);332}333const MachineOperand &getOp() const {334return UseMI->getOperand(OpNum);335}336};337338struct ExtRoot {339union {340const ConstantFP *CFP; // MO_FPImmediate341const char *SymbolName; // MO_ExternalSymbol342const GlobalValue *GV; // MO_GlobalAddress343const BlockAddress *BA; // MO_BlockAddress344int64_t ImmVal; // MO_Immediate, MO_TargetIndex,345// and MO_ConstantPoolIndex346} V;347unsigned Kind; // Same as in MachineOperand.348unsigned char TF; // TargetFlags.349350ExtRoot(const MachineOperand &Op);351bool operator==(const ExtRoot &ER) const {352return Kind == ER.Kind && V.ImmVal == ER.V.ImmVal;353}354bool operator!=(const ExtRoot &ER) const {355return !operator==(ER);356}357bool operator<(const ExtRoot &ER) const;358};359360struct ExtValue : public ExtRoot {361int32_t Offset;362363ExtValue(const MachineOperand &Op);364ExtValue(const ExtDesc &ED) : ExtValue(ED.getOp()) {}365ExtValue(const ExtRoot &ER, int32_t Off) : ExtRoot(ER), Offset(Off) {}366bool operator<(const ExtValue &EV) const;367bool operator==(const ExtValue &EV) const {368return ExtRoot(*this) == ExtRoot(EV) && Offset == EV.Offset;369}370bool operator!=(const ExtValue &EV) const {371return !operator==(EV);372}373explicit operator MachineOperand() const;374};375376using IndexList = SetVector<unsigned>;377using ExtenderInit = std::pair<ExtValue, ExtExpr>;378using AssignmentMap = std::map<ExtenderInit, IndexList>;379using LocDefList = std::vector<std::pair<Loc, IndexList>>;380381const HexagonSubtarget *HST = nullptr;382const HexagonInstrInfo *HII = nullptr;383const HexagonRegisterInfo *HRI = nullptr;384MachineDominatorTree *MDT = nullptr;385MachineRegisterInfo *MRI = nullptr;386std::vector<ExtDesc> Extenders;387std::vector<unsigned> NewRegs;388389bool isStoreImmediate(unsigned Opc) const;390bool isRegOffOpcode(unsigned ExtOpc) const ;391unsigned getRegOffOpcode(unsigned ExtOpc) const;392unsigned getDirectRegReplacement(unsigned ExtOpc) const;393OffsetRange getOffsetRange(Register R, const MachineInstr &MI) const;394OffsetRange getOffsetRange(const ExtDesc &ED) const;395OffsetRange getOffsetRange(Register Rd) const;396397void recordExtender(MachineInstr &MI, unsigned OpNum);398void collectInstr(MachineInstr &MI);399void collect(MachineFunction &MF);400void assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,401AssignmentMap &IMap);402void calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,403LocDefList &Defs);404Register insertInitializer(Loc DefL, const ExtenderInit &ExtI);405bool replaceInstrExact(const ExtDesc &ED, Register ExtR);406bool replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,407Register ExtR, int32_t &Diff);408bool replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI);409bool replaceExtenders(const AssignmentMap &IMap);410411unsigned getOperandIndex(const MachineInstr &MI,412const MachineOperand &Op) const;413const MachineOperand &getPredicateOp(const MachineInstr &MI) const;414const MachineOperand &getLoadResultOp(const MachineInstr &MI) const;415const MachineOperand &getStoredValueOp(const MachineInstr &MI) const;416417friend struct PrintRegister;418friend struct PrintExpr;419friend struct PrintInit;420friend struct PrintIMap;421friend raw_ostream &operator<< (raw_ostream &OS,422const struct PrintRegister &P);423friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintExpr &P);424friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintInit &P);425friend raw_ostream &operator<< (raw_ostream &OS, const ExtDesc &ED);426friend raw_ostream &operator<< (raw_ostream &OS, const ExtRoot &ER);427friend raw_ostream &operator<< (raw_ostream &OS, const ExtValue &EV);428friend raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR);429friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintIMap &P);430};431432using HCE = HexagonConstExtenders;433434LLVM_ATTRIBUTE_UNUSED435raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) {436if (OR.Min > OR.Max)437OS << '!';438OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align)439<< '+' << unsigned(OR.Offset);440return OS;441}442443struct PrintRegister {444PrintRegister(HCE::Register R, const HexagonRegisterInfo &I)445: Rs(R), HRI(I) {}446HCE::Register Rs;447const HexagonRegisterInfo &HRI;448};449450LLVM_ATTRIBUTE_UNUSED451raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) {452if (P.Rs.Reg != 0)453OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub);454else455OS << "noreg";456return OS;457}458459struct PrintExpr {460PrintExpr(const HCE::ExtExpr &E, const HexagonRegisterInfo &I)461: Ex(E), HRI(I) {}462const HCE::ExtExpr &Ex;463const HexagonRegisterInfo &HRI;464};465466LLVM_ATTRIBUTE_UNUSED467raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) {468OS << "## " << (P.Ex.Neg ? "- " : "+ ");469if (P.Ex.Rs.Reg != 0)470OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub);471else472OS << "__";473OS << " << " << P.Ex.S;474return OS;475}476477struct PrintInit {478PrintInit(const HCE::ExtenderInit &EI, const HexagonRegisterInfo &I)479: ExtI(EI), HRI(I) {}480const HCE::ExtenderInit &ExtI;481const HexagonRegisterInfo &HRI;482};483484LLVM_ATTRIBUTE_UNUSED485raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) {486OS << '[' << P.ExtI.first << ", "487<< PrintExpr(P.ExtI.second, P.HRI) << ']';488return OS;489}490491LLVM_ATTRIBUTE_UNUSED492raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) {493assert(ED.OpNum != -1u);494const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent();495const MachineFunction &MF = *MBB.getParent();496const auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();497OS << "bb#" << MBB.getNumber() << ": ";498if (ED.Rd.Reg != 0)499OS << printReg(ED.Rd.Reg, &HRI, ED.Rd.Sub);500else501OS << "__";502OS << " = " << PrintExpr(ED.Expr, HRI);503if (ED.IsDef)504OS << ", def";505return OS;506}507508LLVM_ATTRIBUTE_UNUSED509raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) {510switch (ER.Kind) {511case MachineOperand::MO_Immediate:512OS << "imm:" << ER.V.ImmVal;513break;514case MachineOperand::MO_FPImmediate:515OS << "fpi:" << *ER.V.CFP;516break;517case MachineOperand::MO_ExternalSymbol:518OS << "sym:" << *ER.V.SymbolName;519break;520case MachineOperand::MO_GlobalAddress:521OS << "gad:" << ER.V.GV->getName();522break;523case MachineOperand::MO_BlockAddress:524OS << "blk:" << *ER.V.BA;525break;526case MachineOperand::MO_TargetIndex:527OS << "tgi:" << ER.V.ImmVal;528break;529case MachineOperand::MO_ConstantPoolIndex:530OS << "cpi:" << ER.V.ImmVal;531break;532case MachineOperand::MO_JumpTableIndex:533OS << "jti:" << ER.V.ImmVal;534break;535default:536OS << "???:" << ER.V.ImmVal;537break;538}539return OS;540}541542LLVM_ATTRIBUTE_UNUSED543raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) {544OS << HCE::ExtRoot(EV) << " off:" << EV.Offset;545return OS;546}547548struct PrintIMap {549PrintIMap(const HCE::AssignmentMap &M, const HexagonRegisterInfo &I)550: IMap(M), HRI(I) {}551const HCE::AssignmentMap &IMap;552const HexagonRegisterInfo &HRI;553};554555LLVM_ATTRIBUTE_UNUSED556raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) {557OS << "{\n";558for (const std::pair<const HCE::ExtenderInit, HCE::IndexList> &Q : P.IMap) {559OS << " " << PrintInit(Q.first, P.HRI) << " -> {";560for (unsigned I : Q.second)561OS << ' ' << I;562OS << " }\n";563}564OS << "}\n";565return OS;566}567}568569INITIALIZE_PASS_BEGIN(HexagonConstExtenders, "hexagon-cext-opt",570"Hexagon constant-extender optimization", false, false)571INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)572INITIALIZE_PASS_END(HexagonConstExtenders, "hexagon-cext-opt",573"Hexagon constant-extender optimization", false, false)574575static unsigned ReplaceCounter = 0;576577char HCE::ID = 0;578579#ifndef NDEBUG580LLVM_DUMP_METHOD void RangeTree::dump() const {581dbgs() << "Root: " << Root << '\n';582if (Root)583dump(Root);584}585586LLVM_DUMP_METHOD void RangeTree::dump(const Node *N) const {587dbgs() << "Node: " << N << '\n';588dbgs() << " Height: " << N->Height << '\n';589dbgs() << " Count: " << N->Count << '\n';590dbgs() << " MaxEnd: " << N->MaxEnd << '\n';591dbgs() << " Range: " << N->Range << '\n';592dbgs() << " Left: " << N->Left << '\n';593dbgs() << " Right: " << N->Right << "\n\n";594595if (N->Left)596dump(N->Left);597if (N->Right)598dump(N->Right);599}600#endif601602void RangeTree::order(Node *N, SmallVectorImpl<Node*> &Seq) const {603if (N == nullptr)604return;605order(N->Left, Seq);606Seq.push_back(N);607order(N->Right, Seq);608}609610void RangeTree::nodesWith(Node *N, int32_t P, bool CheckA,611SmallVectorImpl<Node*> &Seq) const {612if (N == nullptr || N->MaxEnd < P)613return;614nodesWith(N->Left, P, CheckA, Seq);615if (N->Range.Min <= P) {616if ((CheckA && N->Range.contains(P)) || (!CheckA && P <= N->Range.Max))617Seq.push_back(N);618nodesWith(N->Right, P, CheckA, Seq);619}620}621622RangeTree::Node *RangeTree::add(Node *N, const OffsetRange &R) {623if (N == nullptr)624return new Node(R);625626if (N->Range == R) {627N->Count++;628return N;629}630631if (R < N->Range)632N->Left = add(N->Left, R);633else634N->Right = add(N->Right, R);635return rebalance(update(N));636}637638RangeTree::Node *RangeTree::remove(Node *N, const Node *D) {639assert(N != nullptr);640641if (N != D) {642assert(N->Range != D->Range && "N and D should not be equal");643if (D->Range < N->Range)644N->Left = remove(N->Left, D);645else646N->Right = remove(N->Right, D);647return rebalance(update(N));648}649650// We got to the node we need to remove. If any of its children are651// missing, simply replace it with the other child.652if (N->Left == nullptr || N->Right == nullptr)653return (N->Left == nullptr) ? N->Right : N->Left;654655// Find the rightmost child of N->Left, remove it and plug it in place656// of N.657Node *M = N->Left;658while (M->Right)659M = M->Right;660M->Left = remove(N->Left, M);661M->Right = N->Right;662return rebalance(update(M));663}664665RangeTree::Node *RangeTree::rotateLeft(Node *Lower, Node *Higher) {666assert(Higher->Right == Lower);667// The Lower node is on the right from Higher. Make sure that Lower's668// balance is greater to the right. Otherwise the rotation will create669// an unbalanced tree again.670if (height(Lower->Left) > height(Lower->Right))671Lower = rotateRight(Lower->Left, Lower);672assert(height(Lower->Left) <= height(Lower->Right));673Higher->Right = Lower->Left;674update(Higher);675Lower->Left = Higher;676update(Lower);677return Lower;678}679680RangeTree::Node *RangeTree::rotateRight(Node *Lower, Node *Higher) {681assert(Higher->Left == Lower);682// The Lower node is on the left from Higher. Make sure that Lower's683// balance is greater to the left. Otherwise the rotation will create684// an unbalanced tree again.685if (height(Lower->Left) < height(Lower->Right))686Lower = rotateLeft(Lower->Right, Lower);687assert(height(Lower->Left) >= height(Lower->Right));688Higher->Left = Lower->Right;689update(Higher);690Lower->Right = Higher;691update(Lower);692return Lower;693}694695696HCE::ExtRoot::ExtRoot(const MachineOperand &Op) {697// Always store ImmVal, since it's the field used for comparisons.698V.ImmVal = 0;699if (Op.isImm())700; // Keep 0. Do not use Op.getImm() for value here (treat 0 as the root).701else if (Op.isFPImm())702V.CFP = Op.getFPImm();703else if (Op.isSymbol())704V.SymbolName = Op.getSymbolName();705else if (Op.isGlobal())706V.GV = Op.getGlobal();707else if (Op.isBlockAddress())708V.BA = Op.getBlockAddress();709else if (Op.isCPI() || Op.isTargetIndex() || Op.isJTI())710V.ImmVal = Op.getIndex();711else712llvm_unreachable("Unexpected operand type");713714Kind = Op.getType();715TF = Op.getTargetFlags();716}717718bool HCE::ExtRoot::operator< (const HCE::ExtRoot &ER) const {719if (Kind != ER.Kind)720return Kind < ER.Kind;721switch (Kind) {722case MachineOperand::MO_Immediate:723case MachineOperand::MO_TargetIndex:724case MachineOperand::MO_ConstantPoolIndex:725case MachineOperand::MO_JumpTableIndex:726return V.ImmVal < ER.V.ImmVal;727case MachineOperand::MO_FPImmediate: {728const APFloat &ThisF = V.CFP->getValueAPF();729const APFloat &OtherF = ER.V.CFP->getValueAPF();730return ThisF.bitcastToAPInt().ult(OtherF.bitcastToAPInt());731}732case MachineOperand::MO_ExternalSymbol:733return StringRef(V.SymbolName) < StringRef(ER.V.SymbolName);734case MachineOperand::MO_GlobalAddress:735// Do not use GUIDs, since they depend on the source path. Moving the736// source file to a different directory could cause different GUID737// values for a pair of given symbols. These symbols could then compare738// "less" in one directory, but "greater" in another.739assert(!V.GV->getName().empty() && !ER.V.GV->getName().empty());740return V.GV->getName() < ER.V.GV->getName();741case MachineOperand::MO_BlockAddress: {742const BasicBlock *ThisB = V.BA->getBasicBlock();743const BasicBlock *OtherB = ER.V.BA->getBasicBlock();744assert(ThisB->getParent() == OtherB->getParent());745const Function &F = *ThisB->getParent();746return std::distance(F.begin(), ThisB->getIterator()) <747std::distance(F.begin(), OtherB->getIterator());748}749}750return V.ImmVal < ER.V.ImmVal;751}752753HCE::ExtValue::ExtValue(const MachineOperand &Op) : ExtRoot(Op) {754if (Op.isImm())755Offset = Op.getImm();756else if (Op.isFPImm() || Op.isJTI())757Offset = 0;758else if (Op.isSymbol() || Op.isGlobal() || Op.isBlockAddress() ||759Op.isCPI() || Op.isTargetIndex())760Offset = Op.getOffset();761else762llvm_unreachable("Unexpected operand type");763}764765bool HCE::ExtValue::operator< (const HCE::ExtValue &EV) const {766const ExtRoot &ER = *this;767if (!(ER == ExtRoot(EV)))768return ER < EV;769return Offset < EV.Offset;770}771772HCE::ExtValue::operator MachineOperand() const {773switch (Kind) {774case MachineOperand::MO_Immediate:775return MachineOperand::CreateImm(V.ImmVal + Offset);776case MachineOperand::MO_FPImmediate:777assert(Offset == 0);778return MachineOperand::CreateFPImm(V.CFP);779case MachineOperand::MO_ExternalSymbol:780assert(Offset == 0);781return MachineOperand::CreateES(V.SymbolName, TF);782case MachineOperand::MO_GlobalAddress:783return MachineOperand::CreateGA(V.GV, Offset, TF);784case MachineOperand::MO_BlockAddress:785return MachineOperand::CreateBA(V.BA, Offset, TF);786case MachineOperand::MO_TargetIndex:787return MachineOperand::CreateTargetIndex(V.ImmVal, Offset, TF);788case MachineOperand::MO_ConstantPoolIndex:789return MachineOperand::CreateCPI(V.ImmVal, Offset, TF);790case MachineOperand::MO_JumpTableIndex:791assert(Offset == 0);792return MachineOperand::CreateJTI(V.ImmVal, TF);793default:794llvm_unreachable("Unhandled kind");795}796}797798bool HCE::isStoreImmediate(unsigned Opc) const {799switch (Opc) {800case Hexagon::S4_storeirbt_io:801case Hexagon::S4_storeirbf_io:802case Hexagon::S4_storeirht_io:803case Hexagon::S4_storeirhf_io:804case Hexagon::S4_storeirit_io:805case Hexagon::S4_storeirif_io:806case Hexagon::S4_storeirb_io:807case Hexagon::S4_storeirh_io:808case Hexagon::S4_storeiri_io:809return true;810default:811break;812}813return false;814}815816bool HCE::isRegOffOpcode(unsigned Opc) const {817switch (Opc) {818case Hexagon::L2_loadrub_io:819case Hexagon::L2_loadrb_io:820case Hexagon::L2_loadruh_io:821case Hexagon::L2_loadrh_io:822case Hexagon::L2_loadri_io:823case Hexagon::L2_loadrd_io:824case Hexagon::L2_loadbzw2_io:825case Hexagon::L2_loadbzw4_io:826case Hexagon::L2_loadbsw2_io:827case Hexagon::L2_loadbsw4_io:828case Hexagon::L2_loadalignh_io:829case Hexagon::L2_loadalignb_io:830case Hexagon::L2_ploadrubt_io:831case Hexagon::L2_ploadrubf_io:832case Hexagon::L2_ploadrbt_io:833case Hexagon::L2_ploadrbf_io:834case Hexagon::L2_ploadruht_io:835case Hexagon::L2_ploadruhf_io:836case Hexagon::L2_ploadrht_io:837case Hexagon::L2_ploadrhf_io:838case Hexagon::L2_ploadrit_io:839case Hexagon::L2_ploadrif_io:840case Hexagon::L2_ploadrdt_io:841case Hexagon::L2_ploadrdf_io:842case Hexagon::S2_storerb_io:843case Hexagon::S2_storerh_io:844case Hexagon::S2_storerf_io:845case Hexagon::S2_storeri_io:846case Hexagon::S2_storerd_io:847case Hexagon::S2_pstorerbt_io:848case Hexagon::S2_pstorerbf_io:849case Hexagon::S2_pstorerht_io:850case Hexagon::S2_pstorerhf_io:851case Hexagon::S2_pstorerft_io:852case Hexagon::S2_pstorerff_io:853case Hexagon::S2_pstorerit_io:854case Hexagon::S2_pstorerif_io:855case Hexagon::S2_pstorerdt_io:856case Hexagon::S2_pstorerdf_io:857case Hexagon::A2_addi:858return true;859default:860break;861}862return false;863}864865unsigned HCE::getRegOffOpcode(unsigned ExtOpc) const {866// If there exists an instruction that takes a register and offset,867// that corresponds to the ExtOpc, return it, otherwise return 0.868using namespace Hexagon;869switch (ExtOpc) {870case A2_tfrsi: return A2_addi;871default:872break;873}874const MCInstrDesc &D = HII->get(ExtOpc);875if (D.mayLoad() || D.mayStore()) {876uint64_t F = D.TSFlags;877unsigned AM = (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;878switch (AM) {879case HexagonII::Absolute:880case HexagonII::AbsoluteSet:881case HexagonII::BaseLongOffset:882switch (ExtOpc) {883case PS_loadrubabs:884case L4_loadrub_ap:885case L4_loadrub_ur: return L2_loadrub_io;886case PS_loadrbabs:887case L4_loadrb_ap:888case L4_loadrb_ur: return L2_loadrb_io;889case PS_loadruhabs:890case L4_loadruh_ap:891case L4_loadruh_ur: return L2_loadruh_io;892case PS_loadrhabs:893case L4_loadrh_ap:894case L4_loadrh_ur: return L2_loadrh_io;895case PS_loadriabs:896case L4_loadri_ap:897case L4_loadri_ur: return L2_loadri_io;898case PS_loadrdabs:899case L4_loadrd_ap:900case L4_loadrd_ur: return L2_loadrd_io;901case L4_loadbzw2_ap:902case L4_loadbzw2_ur: return L2_loadbzw2_io;903case L4_loadbzw4_ap:904case L4_loadbzw4_ur: return L2_loadbzw4_io;905case L4_loadbsw2_ap:906case L4_loadbsw2_ur: return L2_loadbsw2_io;907case L4_loadbsw4_ap:908case L4_loadbsw4_ur: return L2_loadbsw4_io;909case L4_loadalignh_ap:910case L4_loadalignh_ur: return L2_loadalignh_io;911case L4_loadalignb_ap:912case L4_loadalignb_ur: return L2_loadalignb_io;913case L4_ploadrubt_abs: return L2_ploadrubt_io;914case L4_ploadrubf_abs: return L2_ploadrubf_io;915case L4_ploadrbt_abs: return L2_ploadrbt_io;916case L4_ploadrbf_abs: return L2_ploadrbf_io;917case L4_ploadruht_abs: return L2_ploadruht_io;918case L4_ploadruhf_abs: return L2_ploadruhf_io;919case L4_ploadrht_abs: return L2_ploadrht_io;920case L4_ploadrhf_abs: return L2_ploadrhf_io;921case L4_ploadrit_abs: return L2_ploadrit_io;922case L4_ploadrif_abs: return L2_ploadrif_io;923case L4_ploadrdt_abs: return L2_ploadrdt_io;924case L4_ploadrdf_abs: return L2_ploadrdf_io;925case PS_storerbabs:926case S4_storerb_ap:927case S4_storerb_ur: return S2_storerb_io;928case PS_storerhabs:929case S4_storerh_ap:930case S4_storerh_ur: return S2_storerh_io;931case PS_storerfabs:932case S4_storerf_ap:933case S4_storerf_ur: return S2_storerf_io;934case PS_storeriabs:935case S4_storeri_ap:936case S4_storeri_ur: return S2_storeri_io;937case PS_storerdabs:938case S4_storerd_ap:939case S4_storerd_ur: return S2_storerd_io;940case S4_pstorerbt_abs: return S2_pstorerbt_io;941case S4_pstorerbf_abs: return S2_pstorerbf_io;942case S4_pstorerht_abs: return S2_pstorerht_io;943case S4_pstorerhf_abs: return S2_pstorerhf_io;944case S4_pstorerft_abs: return S2_pstorerft_io;945case S4_pstorerff_abs: return S2_pstorerff_io;946case S4_pstorerit_abs: return S2_pstorerit_io;947case S4_pstorerif_abs: return S2_pstorerif_io;948case S4_pstorerdt_abs: return S2_pstorerdt_io;949case S4_pstorerdf_abs: return S2_pstorerdf_io;950default:951break;952}953break;954case HexagonII::BaseImmOffset:955if (!isStoreImmediate(ExtOpc))956return ExtOpc;957break;958default:959break;960}961}962return 0;963}964965unsigned HCE::getDirectRegReplacement(unsigned ExtOpc) const {966switch (ExtOpc) {967case Hexagon::A2_addi: return Hexagon::A2_add;968case Hexagon::A2_andir: return Hexagon::A2_and;969case Hexagon::A2_combineii: return Hexagon::A4_combineri;970case Hexagon::A2_orir: return Hexagon::A2_or;971case Hexagon::A2_paddif: return Hexagon::A2_paddf;972case Hexagon::A2_paddit: return Hexagon::A2_paddt;973case Hexagon::A2_subri: return Hexagon::A2_sub;974case Hexagon::A2_tfrsi: return TargetOpcode::COPY;975case Hexagon::A4_cmpbeqi: return Hexagon::A4_cmpbeq;976case Hexagon::A4_cmpbgti: return Hexagon::A4_cmpbgt;977case Hexagon::A4_cmpbgtui: return Hexagon::A4_cmpbgtu;978case Hexagon::A4_cmpheqi: return Hexagon::A4_cmpheq;979case Hexagon::A4_cmphgti: return Hexagon::A4_cmphgt;980case Hexagon::A4_cmphgtui: return Hexagon::A4_cmphgtu;981case Hexagon::A4_combineii: return Hexagon::A4_combineir;982case Hexagon::A4_combineir: return TargetOpcode::REG_SEQUENCE;983case Hexagon::A4_combineri: return TargetOpcode::REG_SEQUENCE;984case Hexagon::A4_rcmpeqi: return Hexagon::A4_rcmpeq;985case Hexagon::A4_rcmpneqi: return Hexagon::A4_rcmpneq;986case Hexagon::C2_cmoveif: return Hexagon::A2_tfrpf;987case Hexagon::C2_cmoveit: return Hexagon::A2_tfrpt;988case Hexagon::C2_cmpeqi: return Hexagon::C2_cmpeq;989case Hexagon::C2_cmpgti: return Hexagon::C2_cmpgt;990case Hexagon::C2_cmpgtui: return Hexagon::C2_cmpgtu;991case Hexagon::C2_muxii: return Hexagon::C2_muxir;992case Hexagon::C2_muxir: return Hexagon::C2_mux;993case Hexagon::C2_muxri: return Hexagon::C2_mux;994case Hexagon::C4_cmpltei: return Hexagon::C4_cmplte;995case Hexagon::C4_cmplteui: return Hexagon::C4_cmplteu;996case Hexagon::C4_cmpneqi: return Hexagon::C4_cmpneq;997case Hexagon::M2_accii: return Hexagon::M2_acci; // T -> T998/* No M2_macsin */999case Hexagon::M2_macsip: return Hexagon::M2_maci; // T -> T1000case Hexagon::M2_mpysin: return Hexagon::M2_mpyi;1001case Hexagon::M2_mpysip: return Hexagon::M2_mpyi;1002case Hexagon::M2_mpysmi: return Hexagon::M2_mpyi;1003case Hexagon::M2_naccii: return Hexagon::M2_nacci; // T -> T1004case Hexagon::M4_mpyri_addi: return Hexagon::M4_mpyri_addr;1005case Hexagon::M4_mpyri_addr: return Hexagon::M4_mpyrr_addr; // _ -> T1006case Hexagon::M4_mpyrr_addi: return Hexagon::M4_mpyrr_addr; // _ -> T1007case Hexagon::S4_addaddi: return Hexagon::M2_acci; // _ -> T1008case Hexagon::S4_addi_asl_ri: return Hexagon::S2_asl_i_r_acc; // T -> T1009case Hexagon::S4_addi_lsr_ri: return Hexagon::S2_lsr_i_r_acc; // T -> T1010case Hexagon::S4_andi_asl_ri: return Hexagon::S2_asl_i_r_and; // T -> T1011case Hexagon::S4_andi_lsr_ri: return Hexagon::S2_lsr_i_r_and; // T -> T1012case Hexagon::S4_ori_asl_ri: return Hexagon::S2_asl_i_r_or; // T -> T1013case Hexagon::S4_ori_lsr_ri: return Hexagon::S2_lsr_i_r_or; // T -> T1014case Hexagon::S4_subaddi: return Hexagon::M2_subacc; // _ -> T1015case Hexagon::S4_subi_asl_ri: return Hexagon::S2_asl_i_r_nac; // T -> T1016case Hexagon::S4_subi_lsr_ri: return Hexagon::S2_lsr_i_r_nac; // T -> T10171018// Store-immediates:1019case Hexagon::S4_storeirbf_io: return Hexagon::S2_pstorerbf_io;1020case Hexagon::S4_storeirb_io: return Hexagon::S2_storerb_io;1021case Hexagon::S4_storeirbt_io: return Hexagon::S2_pstorerbt_io;1022case Hexagon::S4_storeirhf_io: return Hexagon::S2_pstorerhf_io;1023case Hexagon::S4_storeirh_io: return Hexagon::S2_storerh_io;1024case Hexagon::S4_storeirht_io: return Hexagon::S2_pstorerht_io;1025case Hexagon::S4_storeirif_io: return Hexagon::S2_pstorerif_io;1026case Hexagon::S4_storeiri_io: return Hexagon::S2_storeri_io;1027case Hexagon::S4_storeirit_io: return Hexagon::S2_pstorerit_io;10281029default:1030break;1031}1032return 0;1033}10341035// Return the allowable deviation from the current value of Rb (i.e. the1036// range of values that can be added to the current value) which the1037// instruction MI can accommodate.1038// The instruction MI is a user of register Rb, which is defined via an1039// extender. It may be possible for MI to be tweaked to work for a register1040// defined with a slightly different value. For example1041// ... = L2_loadrub_io Rb, 11042// can be modifed to be1043// ... = L2_loadrub_io Rb', 01044// if Rb' = Rb+1.1045// The range for Rb would be [Min+1, Max+1], where [Min, Max] is a range1046// for L2_loadrub with offset 0. That means that Rb could be replaced with1047// Rc, where Rc-Rb belongs to [Min+1, Max+1].1048OffsetRange HCE::getOffsetRange(Register Rb, const MachineInstr &MI) const {1049unsigned Opc = MI.getOpcode();1050// Instructions that are constant-extended may be replaced with something1051// else that no longer offers the same range as the original.1052if (!isRegOffOpcode(Opc) || HII->isConstExtended(MI))1053return OffsetRange::zero();10541055if (Opc == Hexagon::A2_addi) {1056const MachineOperand &Op1 = MI.getOperand(1), &Op2 = MI.getOperand(2);1057if (Rb != Register(Op1) || !Op2.isImm())1058return OffsetRange::zero();1059OffsetRange R = { -(1<<15)+1, (1<<15)-1, 1 };1060return R.shift(Op2.getImm());1061}10621063// HII::getBaseAndOffsetPosition returns the increment position as "offset".1064if (HII->isPostIncrement(MI))1065return OffsetRange::zero();10661067const MCInstrDesc &D = HII->get(Opc);1068assert(D.mayLoad() || D.mayStore());10691070unsigned BaseP, OffP;1071if (!HII->getBaseAndOffsetPosition(MI, BaseP, OffP) ||1072Rb != Register(MI.getOperand(BaseP)) ||1073!MI.getOperand(OffP).isImm())1074return OffsetRange::zero();10751076uint64_t F = (D.TSFlags >> HexagonII::MemAccessSizePos) &1077HexagonII::MemAccesSizeMask;1078uint8_t A = HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F));1079unsigned L = Log2_32(A);1080unsigned S = 10+L; // sint11_L1081int32_t Min = -alignDown((1<<S)-1, A);10821083// The range will be shifted by Off. To prefer non-negative offsets,1084// adjust Max accordingly.1085int32_t Off = MI.getOperand(OffP).getImm();1086int32_t Max = Off >= 0 ? 0 : -Off;10871088OffsetRange R = { Min, Max, A };1089return R.shift(Off);1090}10911092// Return the allowable deviation from the current value of the extender ED,1093// for which the instruction corresponding to ED can be modified without1094// using an extender.1095// The instruction uses the extender directly. It will be replaced with1096// another instruction, say MJ, where the extender will be replaced with a1097// register. MJ can allow some variability with respect to the value of1098// that register, as is the case with indexed memory instructions.1099OffsetRange HCE::getOffsetRange(const ExtDesc &ED) const {1100// The only way that there can be a non-zero range available is if1101// the instruction using ED will be converted to an indexed memory1102// instruction.1103unsigned IdxOpc = getRegOffOpcode(ED.UseMI->getOpcode());1104switch (IdxOpc) {1105case 0:1106return OffsetRange::zero();1107case Hexagon::A2_addi: // s161108return { -32767, 32767, 1 };1109case Hexagon::A2_subri: // s101110return { -511, 511, 1 };1111}11121113if (!ED.UseMI->mayLoad() && !ED.UseMI->mayStore())1114return OffsetRange::zero();1115const MCInstrDesc &D = HII->get(IdxOpc);1116uint64_t F = (D.TSFlags >> HexagonII::MemAccessSizePos) &1117HexagonII::MemAccesSizeMask;1118uint8_t A = HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F));1119unsigned L = Log2_32(A);1120unsigned S = 10+L; // sint11_L1121int32_t Min = -alignDown((1<<S)-1, A);1122int32_t Max = 0; // Force non-negative offsets.1123return { Min, Max, A };1124}11251126// Get the allowable deviation from the current value of Rd by checking1127// all uses of Rd.1128OffsetRange HCE::getOffsetRange(Register Rd) const {1129OffsetRange Range;1130for (const MachineOperand &Op : MRI->use_operands(Rd.Reg)) {1131// Make sure that the register being used by this operand is identical1132// to the register that was defined: using a different subregister1133// precludes any non-trivial range.1134if (Rd != Register(Op))1135return OffsetRange::zero();1136Range.intersect(getOffsetRange(Rd, *Op.getParent()));1137}1138return Range;1139}11401141void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) {1142unsigned Opc = MI.getOpcode();1143ExtDesc ED;1144ED.OpNum = OpNum;11451146bool IsLoad = MI.mayLoad();1147bool IsStore = MI.mayStore();11481149// Fixed stack slots have negative indexes, and they cannot be used1150// with TRI::stackSlot2Index and TRI::index2StackSlot. This is somewhat1151// unfortunate, but should not be a frequent thing.1152for (MachineOperand &Op : MI.operands())1153if (Op.isFI() && Op.getIndex() < 0)1154return;11551156if (IsLoad || IsStore) {1157unsigned AM = HII->getAddrMode(MI);1158switch (AM) {1159// (Re: ##Off + Rb<<S) = Rd: ##Val1160case HexagonII::Absolute: // (__: ## + __<<_)1161break;1162case HexagonII::AbsoluteSet: // (Rd: ## + __<<_)1163ED.Rd = MI.getOperand(OpNum-1);1164ED.IsDef = true;1165break;1166case HexagonII::BaseImmOffset: // (__: ## + Rs<<0)1167// Store-immediates are treated as non-memory operations, since1168// it's the value being stored that is extended (as opposed to1169// a part of the address).1170if (!isStoreImmediate(Opc))1171ED.Expr.Rs = MI.getOperand(OpNum-1);1172break;1173case HexagonII::BaseLongOffset: // (__: ## + Rs<<S)1174ED.Expr.Rs = MI.getOperand(OpNum-2);1175ED.Expr.S = MI.getOperand(OpNum-1).getImm();1176break;1177default:1178llvm_unreachable("Unhandled memory instruction");1179}1180} else {1181switch (Opc) {1182case Hexagon::A2_tfrsi: // (Rd: ## + __<<_)1183ED.Rd = MI.getOperand(0);1184ED.IsDef = true;1185break;1186case Hexagon::A2_combineii: // (Rd: ## + __<<_)1187case Hexagon::A4_combineir:1188ED.Rd = { MI.getOperand(0).getReg(), Hexagon::isub_hi };1189ED.IsDef = true;1190break;1191case Hexagon::A4_combineri: // (Rd: ## + __<<_)1192ED.Rd = { MI.getOperand(0).getReg(), Hexagon::isub_lo };1193ED.IsDef = true;1194break;1195case Hexagon::A2_addi: // (Rd: ## + Rs<<0)1196ED.Rd = MI.getOperand(0);1197ED.Expr.Rs = MI.getOperand(OpNum-1);1198break;1199case Hexagon::M2_accii: // (__: ## + Rs<<0)1200case Hexagon::M2_naccii:1201case Hexagon::S4_addaddi:1202ED.Expr.Rs = MI.getOperand(OpNum-1);1203break;1204case Hexagon::A2_subri: // (Rd: ## - Rs<<0)1205ED.Rd = MI.getOperand(0);1206ED.Expr.Rs = MI.getOperand(OpNum+1);1207ED.Expr.Neg = true;1208break;1209case Hexagon::S4_subaddi: // (__: ## - Rs<<0)1210ED.Expr.Rs = MI.getOperand(OpNum+1);1211ED.Expr.Neg = true;1212break;1213default: // (__: ## + __<<_)1214break;1215}1216}12171218ED.UseMI = &MI;12191220// Ignore unnamed globals.1221ExtRoot ER(ED.getOp());1222if (ER.Kind == MachineOperand::MO_GlobalAddress)1223if (ER.V.GV->getName().empty())1224return;1225// Ignore block address that points to block in another function1226if (ER.Kind == MachineOperand::MO_BlockAddress)1227if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction()))1228return;1229Extenders.push_back(ED);1230}12311232void HCE::collectInstr(MachineInstr &MI) {1233if (!HII->isConstExtended(MI))1234return;12351236// Skip some non-convertible instructions.1237unsigned Opc = MI.getOpcode();1238switch (Opc) {1239case Hexagon::M2_macsin: // There is no Rx -= mpyi(Rs,Rt).1240case Hexagon::C4_addipc:1241case Hexagon::S4_or_andi:1242case Hexagon::S4_or_andix:1243case Hexagon::S4_or_ori:1244return;1245}1246recordExtender(MI, HII->getCExtOpNum(MI));1247}12481249void HCE::collect(MachineFunction &MF) {1250Extenders.clear();1251for (MachineBasicBlock &MBB : MF) {1252// Skip unreachable blocks.1253if (MBB.getNumber() == -1)1254continue;1255for (MachineInstr &MI : MBB)1256collectInstr(MI);1257}1258}12591260void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,1261AssignmentMap &IMap) {1262// Basic correctness: make sure that all extenders in the range [Begin..End)1263// share the same root ER.1264for (unsigned I = Begin; I != End; ++I)1265assert(ER == ExtRoot(Extenders[I].getOp()));12661267// Construct the list of ranges, such that for each P in Ranges[I],1268// a register Reg = ER+P can be used in place of Extender[I]. If the1269// instruction allows, uses in the form of Reg+Off are considered1270// (here, Off = required_value - P).1271std::vector<OffsetRange> Ranges(End-Begin);12721273// For each extender that is a def, visit all uses of the defined register,1274// and produce an offset range that works for all uses. The def doesn't1275// have to be checked, because it can become dead if all uses can be updated1276// to use a different reg/offset.1277for (unsigned I = Begin; I != End; ++I) {1278const ExtDesc &ED = Extenders[I];1279if (!ED.IsDef)1280continue;1281ExtValue EV(ED);1282LLVM_DEBUG(dbgs() << " =" << I << ". " << EV << " " << ED << '\n');1283assert(ED.Rd.Reg != 0);1284Ranges[I-Begin] = getOffsetRange(ED.Rd).shift(EV.Offset);1285// A2_tfrsi is a special case: it will be replaced with A2_addi, which1286// has a 16-bit signed offset. This means that A2_tfrsi not only has a1287// range coming from its uses, but also from the fact that its replacement1288// has a range as well.1289if (ED.UseMI->getOpcode() == Hexagon::A2_tfrsi) {1290int32_t D = alignDown(32767, Ranges[I-Begin].Align); // XXX hardcoded1291Ranges[I-Begin].extendBy(-D).extendBy(D);1292}1293}12941295// Visit all non-def extenders. For each one, determine the offset range1296// available for it.1297for (unsigned I = Begin; I != End; ++I) {1298const ExtDesc &ED = Extenders[I];1299if (ED.IsDef)1300continue;1301ExtValue EV(ED);1302LLVM_DEBUG(dbgs() << " " << I << ". " << EV << " " << ED << '\n');1303OffsetRange Dev = getOffsetRange(ED);1304Ranges[I-Begin].intersect(Dev.shift(EV.Offset));1305}13061307// Here for each I there is a corresponding Range[I]. Construct the1308// inverse map, that to each range will assign the set of indexes in1309// [Begin..End) that this range corresponds to.1310std::map<OffsetRange, IndexList> RangeMap;1311for (unsigned I = Begin; I != End; ++I)1312RangeMap[Ranges[I-Begin]].insert(I);13131314LLVM_DEBUG({1315dbgs() << "Ranges\n";1316for (unsigned I = Begin; I != End; ++I)1317dbgs() << " " << I << ". " << Ranges[I-Begin] << '\n';1318dbgs() << "RangeMap\n";1319for (auto &P : RangeMap) {1320dbgs() << " " << P.first << " ->";1321for (unsigned I : P.second)1322dbgs() << ' ' << I;1323dbgs() << '\n';1324}1325});13261327// Select the definition points, and generate the assignment between1328// these points and the uses.13291330// For each candidate offset, keep a pair CandData consisting of1331// the total number of ranges containing that candidate, and the1332// vector of corresponding RangeTree nodes.1333using CandData = std::pair<unsigned, SmallVector<RangeTree::Node*,8>>;1334std::map<int32_t, CandData> CandMap;13351336RangeTree Tree;1337for (const OffsetRange &R : Ranges)1338Tree.add(R);1339SmallVector<RangeTree::Node*,8> Nodes;1340Tree.order(Nodes);13411342auto MaxAlign = [](const SmallVectorImpl<RangeTree::Node*> &Nodes,1343uint8_t Align, uint8_t Offset) {1344for (RangeTree::Node *N : Nodes) {1345if (N->Range.Align <= Align || N->Range.Offset < Offset)1346continue;1347if ((N->Range.Offset - Offset) % Align != 0)1348continue;1349Align = N->Range.Align;1350Offset = N->Range.Offset;1351}1352return std::make_pair(Align, Offset);1353};13541355// Construct the set of all potential definition points from the endpoints1356// of the ranges. If a given endpoint also belongs to a different range,1357// but with a higher alignment, also consider the more-highly-aligned1358// value of this endpoint.1359std::set<int32_t> CandSet;1360for (RangeTree::Node *N : Nodes) {1361const OffsetRange &R = N->Range;1362auto P0 = MaxAlign(Tree.nodesWith(R.Min, false), R.Align, R.Offset);1363CandSet.insert(R.Min);1364if (R.Align < P0.first)1365CandSet.insert(adjustUp(R.Min, P0.first, P0.second));1366auto P1 = MaxAlign(Tree.nodesWith(R.Max, false), R.Align, R.Offset);1367CandSet.insert(R.Max);1368if (R.Align < P1.first)1369CandSet.insert(adjustDown(R.Max, P1.first, P1.second));1370}13711372// Build the assignment map: candidate C -> { list of extender indexes }.1373// This has to be done iteratively:1374// - pick the candidate that covers the maximum number of extenders,1375// - add the candidate to the map,1376// - remove the extenders from the pool.1377while (true) {1378using CMap = std::map<int32_t,unsigned>;1379CMap Counts;1380for (auto It = CandSet.begin(), Et = CandSet.end(); It != Et; ) {1381auto &&V = Tree.nodesWith(*It);1382unsigned N = std::accumulate(V.begin(), V.end(), 0u,1383[](unsigned Acc, const RangeTree::Node *N) {1384return Acc + N->Count;1385});1386if (N != 0)1387Counts.insert({*It, N});1388It = (N != 0) ? std::next(It) : CandSet.erase(It);1389}1390if (Counts.empty())1391break;13921393// Find the best candidate with respect to the number of extenders covered.1394auto BestIt = llvm::max_element(1395Counts, [](const CMap::value_type &A, const CMap::value_type &B) {1396return A.second < B.second || (A.second == B.second && A < B);1397});1398int32_t Best = BestIt->first;1399ExtValue BestV(ER, Best);1400for (RangeTree::Node *N : Tree.nodesWith(Best)) {1401for (unsigned I : RangeMap[N->Range])1402IMap[{BestV,Extenders[I].Expr}].insert(I);1403Tree.erase(N);1404}1405}14061407LLVM_DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap, *HRI));14081409// There is some ambiguity in what initializer should be used, if the1410// descriptor's subexpression is non-trivial: it can be the entire1411// subexpression (which is what has been done so far), or it can be1412// the extender's value itself, if all corresponding extenders have the1413// exact value of the initializer (i.e. require offset of 0).14141415// To reduce the number of initializers, merge such special cases.1416for (std::pair<const ExtenderInit,IndexList> &P : IMap) {1417// Skip trivial initializers.1418if (P.first.second.trivial())1419continue;1420// If the corresponding trivial initializer does not exist, skip this1421// entry.1422const ExtValue &EV = P.first.first;1423AssignmentMap::iterator F = IMap.find({EV, ExtExpr()});1424if (F == IMap.end())1425continue;14261427// Finally, check if all extenders have the same value as the initializer.1428// Make sure that extenders that are a part of a stack address are not1429// merged with those that aren't. Stack addresses need an offset field1430// (to be used by frame index elimination), while non-stack expressions1431// can be replaced with forms (such as rr) that do not have such a field.1432// Example:1433//1434// Collected 3 extenders1435// =2. imm:0 off:32968 bb#2: %7 = ## + __ << 0, def1436// 0. imm:0 off:267 bb#0: __ = ## + SS#1 << 01437// 1. imm:0 off:267 bb#1: __ = ## + SS#1 << 01438// Ranges1439// 0. [-756,267]a1+01440// 1. [-756,267]a1+01441// 2. [201,65735]a1+01442// RangeMap1443// [-756,267]a1+0 -> 0 11444// [201,65735]a1+0 -> 21445// IMap (before fixup) = {1446// [imm:0 off:267, ## + __ << 0] -> { 2 }1447// [imm:0 off:267, ## + SS#1 << 0] -> { 0 1 }1448// }1449// IMap (after fixup) = {1450// [imm:0 off:267, ## + __ << 0] -> { 2 0 1 }1451// [imm:0 off:267, ## + SS#1 << 0] -> { }1452// }1453// Inserted def in bb#0 for initializer: [imm:0 off:267, ## + __ << 0]1454// %12:intregs = A2_tfrsi 2671455//1456// The result was1457// %12:intregs = A2_tfrsi 2671458// S4_pstorerbt_rr %3, %12, %stack.1, 0, killed %41459// Which became1460// r0 = #2671461// if (p0.new) memb(r0+r29<<#4) = r214621463bool IsStack = any_of(F->second, [this](unsigned I) {1464return Extenders[I].Expr.Rs.isSlot();1465});1466auto SameValue = [&EV,this,IsStack](unsigned I) {1467const ExtDesc &ED = Extenders[I];1468return ED.Expr.Rs.isSlot() == IsStack &&1469ExtValue(ED).Offset == EV.Offset;1470};1471if (all_of(P.second, SameValue)) {1472F->second.insert(P.second.begin(), P.second.end());1473P.second.clear();1474}1475}14761477LLVM_DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap, *HRI));1478}14791480void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,1481LocDefList &Defs) {1482if (Refs.empty())1483return;14841485// The placement calculation is somewhat simple right now: it finds a1486// single location for the def that dominates all refs. Since this may1487// place the def far from the uses, producing several locations for1488// defs that collectively dominate all refs could be better.1489// For now only do the single one.1490DenseSet<MachineBasicBlock*> Blocks;1491DenseSet<MachineInstr*> RefMIs;1492const ExtDesc &ED0 = Extenders[Refs[0]];1493MachineBasicBlock *DomB = ED0.UseMI->getParent();1494RefMIs.insert(ED0.UseMI);1495Blocks.insert(DomB);1496for (unsigned i = 1, e = Refs.size(); i != e; ++i) {1497const ExtDesc &ED = Extenders[Refs[i]];1498MachineBasicBlock *MBB = ED.UseMI->getParent();1499RefMIs.insert(ED.UseMI);1500DomB = MDT->findNearestCommonDominator(DomB, MBB);1501Blocks.insert(MBB);1502}15031504#ifndef NDEBUG1505// The block DomB should be dominated by the def of each register used1506// in the initializer.1507Register Rs = ExtI.second.Rs; // Only one reg allowed now.1508const MachineInstr *DefI = Rs.isVReg() ? MRI->getVRegDef(Rs.Reg) : nullptr;15091510// This should be guaranteed given that the entire expression is used1511// at each instruction in Refs. Add an assertion just in case.1512assert(!DefI || MDT->dominates(DefI->getParent(), DomB));1513#endif15141515MachineBasicBlock::iterator It;1516if (Blocks.count(DomB)) {1517// Try to find the latest possible location for the def.1518MachineBasicBlock::iterator End = DomB->end();1519for (It = DomB->begin(); It != End; ++It)1520if (RefMIs.count(&*It))1521break;1522assert(It != End && "Should have found a ref in DomB");1523} else {1524// DomB does not contain any refs.1525It = DomB->getFirstTerminator();1526}1527Loc DefLoc(DomB, It);1528Defs.emplace_back(DefLoc, Refs);1529}15301531HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {1532llvm::Register DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);1533MachineBasicBlock &MBB = *DefL.Block;1534MachineBasicBlock::iterator At = DefL.At;1535DebugLoc dl = DefL.Block->findDebugLoc(DefL.At);1536const ExtValue &EV = ExtI.first;1537MachineOperand ExtOp(EV);15381539const ExtExpr &Ex = ExtI.second;1540const MachineInstr *InitI = nullptr;15411542if (Ex.Rs.isSlot()) {1543assert(Ex.S == 0 && "Cannot have a shift of a stack slot");1544assert(!Ex.Neg && "Cannot subtract a stack slot");1545// DefR = PS_fi Rb,##EV1546InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::PS_fi), DefR)1547.add(MachineOperand(Ex.Rs))1548.add(ExtOp);1549} else {1550assert((Ex.Rs.Reg == 0 || Ex.Rs.isVReg()) && "Expecting virtual register");1551if (Ex.trivial()) {1552// DefR = ##EV1553InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_tfrsi), DefR)1554.add(ExtOp);1555} else if (Ex.S == 0) {1556if (Ex.Neg) {1557// DefR = sub(##EV,Rb)1558InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_subri), DefR)1559.add(ExtOp)1560.add(MachineOperand(Ex.Rs));1561} else {1562// DefR = add(Rb,##EV)1563InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_addi), DefR)1564.add(MachineOperand(Ex.Rs))1565.add(ExtOp);1566}1567} else {1568if (HST->useCompound()) {1569unsigned NewOpc = Ex.Neg ? Hexagon::S4_subi_asl_ri1570: Hexagon::S4_addi_asl_ri;1571// DefR = add(##EV,asl(Rb,S))1572InitI = BuildMI(MBB, At, dl, HII->get(NewOpc), DefR)1573.add(ExtOp)1574.add(MachineOperand(Ex.Rs))1575.addImm(Ex.S);1576} else {1577// No compounds are available. It is not clear whether we should1578// even process such extenders where the initializer cannot be1579// a single instruction, but do it for now.1580llvm::Register TmpR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);1581BuildMI(MBB, At, dl, HII->get(Hexagon::S2_asl_i_r), TmpR)1582.add(MachineOperand(Ex.Rs))1583.addImm(Ex.S);1584if (Ex.Neg)1585InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_subri), DefR)1586.add(ExtOp)1587.add(MachineOperand(Register(TmpR, 0)));1588else1589InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_addi), DefR)1590.add(MachineOperand(Register(TmpR, 0)))1591.add(ExtOp);1592}1593}1594}15951596assert(InitI);1597(void)InitI;1598LLVM_DEBUG(dbgs() << "Inserted def in bb#" << MBB.getNumber()1599<< " for initializer: " << PrintInit(ExtI, *HRI) << "\n "1600<< *InitI);1601return { DefR, 0 };1602}16031604// Replace the extender at index Idx with the register ExtR.1605bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) {1606MachineInstr &MI = *ED.UseMI;1607MachineBasicBlock &MBB = *MI.getParent();1608MachineBasicBlock::iterator At = MI.getIterator();1609DebugLoc dl = MI.getDebugLoc();1610unsigned ExtOpc = MI.getOpcode();16111612// With a few exceptions, direct replacement amounts to creating an1613// instruction with a corresponding register opcode, with all operands1614// the same, except for the register used in place of the extender.1615unsigned RegOpc = getDirectRegReplacement(ExtOpc);16161617if (RegOpc == TargetOpcode::REG_SEQUENCE) {1618if (ExtOpc == Hexagon::A4_combineri)1619BuildMI(MBB, At, dl, HII->get(RegOpc))1620.add(MI.getOperand(0))1621.add(MI.getOperand(1))1622.addImm(Hexagon::isub_hi)1623.add(MachineOperand(ExtR))1624.addImm(Hexagon::isub_lo);1625else if (ExtOpc == Hexagon::A4_combineir)1626BuildMI(MBB, At, dl, HII->get(RegOpc))1627.add(MI.getOperand(0))1628.add(MachineOperand(ExtR))1629.addImm(Hexagon::isub_hi)1630.add(MI.getOperand(2))1631.addImm(Hexagon::isub_lo);1632else1633llvm_unreachable("Unexpected opcode became REG_SEQUENCE");1634MBB.erase(MI);1635return true;1636}1637if (ExtOpc == Hexagon::C2_cmpgei || ExtOpc == Hexagon::C2_cmpgeui) {1638unsigned NewOpc = ExtOpc == Hexagon::C2_cmpgei ? Hexagon::C2_cmplt1639: Hexagon::C2_cmpltu;1640BuildMI(MBB, At, dl, HII->get(NewOpc))1641.add(MI.getOperand(0))1642.add(MachineOperand(ExtR))1643.add(MI.getOperand(1));1644MBB.erase(MI);1645return true;1646}16471648if (RegOpc != 0) {1649MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(RegOpc));1650unsigned RegN = ED.OpNum;1651// Copy all operands except the one that has the extender.1652for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {1653if (i != RegN)1654MIB.add(MI.getOperand(i));1655else1656MIB.add(MachineOperand(ExtR));1657}1658MIB.cloneMemRefs(MI);1659MBB.erase(MI);1660return true;1661}16621663if (MI.mayLoadOrStore() && !isStoreImmediate(ExtOpc)) {1664// For memory instructions, there is an asymmetry in the addressing1665// modes. Addressing modes allowing extenders can be replaced with1666// addressing modes that use registers, but the order of operands1667// (or even their number) may be different.1668// Replacements:1669// BaseImmOffset (io) -> BaseRegOffset (rr)1670// BaseLongOffset (ur) -> BaseRegOffset (rr)1671unsigned RegOpc, Shift;1672unsigned AM = HII->getAddrMode(MI);1673if (AM == HexagonII::BaseImmOffset) {1674RegOpc = HII->changeAddrMode_io_rr(ExtOpc);1675Shift = 0;1676} else if (AM == HexagonII::BaseLongOffset) {1677// Loads: Rd = L4_loadri_ur Rs, S, ##1678// Stores: S4_storeri_ur Rs, S, ##, Rt1679RegOpc = HII->changeAddrMode_ur_rr(ExtOpc);1680Shift = MI.getOperand(MI.mayLoad() ? 2 : 1).getImm();1681} else {1682llvm_unreachable("Unexpected addressing mode");1683}1684#ifndef NDEBUG1685if (RegOpc == -1u) {1686dbgs() << "\nExtOpc: " << HII->getName(ExtOpc) << " has no rr version\n";1687llvm_unreachable("No corresponding rr instruction");1688}1689#endif16901691unsigned BaseP, OffP;1692HII->getBaseAndOffsetPosition(MI, BaseP, OffP);16931694// Build an rr instruction: (RegOff + RegBase<<0)1695MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(RegOpc));1696// First, add the def for loads.1697if (MI.mayLoad())1698MIB.add(getLoadResultOp(MI));1699// Handle possible predication.1700if (HII->isPredicated(MI))1701MIB.add(getPredicateOp(MI));1702// Build the address.1703MIB.add(MachineOperand(ExtR)); // RegOff1704MIB.add(MI.getOperand(BaseP)); // RegBase1705MIB.addImm(Shift); // << Shift1706// Add the stored value for stores.1707if (MI.mayStore())1708MIB.add(getStoredValueOp(MI));1709MIB.cloneMemRefs(MI);1710MBB.erase(MI);1711return true;1712}17131714#ifndef NDEBUG1715dbgs() << '\n' << MI;1716#endif1717llvm_unreachable("Unhandled exact replacement");1718return false;1719}17201721// Replace the extender ED with a form corresponding to the initializer ExtI.1722bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,1723Register ExtR, int32_t &Diff) {1724MachineInstr &MI = *ED.UseMI;1725MachineBasicBlock &MBB = *MI.getParent();1726MachineBasicBlock::iterator At = MI.getIterator();1727DebugLoc dl = MI.getDebugLoc();1728unsigned ExtOpc = MI.getOpcode();17291730if (ExtOpc == Hexagon::A2_tfrsi) {1731// A2_tfrsi is a special case: it's replaced with A2_addi, which introduces1732// another range. One range is the one that's common to all tfrsi's uses,1733// this one is the range of immediates in A2_addi. When calculating ranges,1734// the addi's 16-bit argument was included, so now we need to make it such1735// that the produced value is in the range for the uses alone.1736// Most of the time, simply adding Diff will make the addi produce exact1737// result, but if Diff is outside of the 16-bit range, some adjustment1738// will be needed.1739unsigned IdxOpc = getRegOffOpcode(ExtOpc);1740assert(IdxOpc == Hexagon::A2_addi);17411742// Clamp Diff to the 16 bit range.1743int32_t D = isInt<16>(Diff) ? Diff : (Diff > 0 ? 32767 : -32768);1744if (Diff > 32767) {1745// Split Diff into two values: one that is close to min/max int16,1746// and the other being the rest, and such that both have the same1747// "alignment" as Diff.1748uint32_t UD = Diff;1749OffsetRange R = getOffsetRange(MI.getOperand(0));1750uint32_t A = std::min<uint32_t>(R.Align, 1u << llvm::countr_zero(UD));1751D &= ~(A-1);1752}1753BuildMI(MBB, At, dl, HII->get(IdxOpc))1754.add(MI.getOperand(0))1755.add(MachineOperand(ExtR))1756.addImm(D);1757Diff -= D;1758#ifndef NDEBUG1759// Make sure the output is within allowable range for uses.1760// "Diff" is a difference in the "opposite direction", i.e. Ext - DefV,1761// not DefV - Ext, as the getOffsetRange would calculate.1762OffsetRange Uses = getOffsetRange(MI.getOperand(0));1763if (!Uses.contains(-Diff))1764dbgs() << "Diff: " << -Diff << " out of range " << Uses1765<< " for " << MI;1766assert(Uses.contains(-Diff));1767#endif1768MBB.erase(MI);1769return true;1770}17711772const ExtValue &EV = ExtI.first; (void)EV;1773const ExtExpr &Ex = ExtI.second; (void)Ex;17741775if (ExtOpc == Hexagon::A2_addi || ExtOpc == Hexagon::A2_subri) {1776// If addi/subri are replaced with the exactly matching initializer,1777// they amount to COPY.1778// Check that the initializer is an exact match (for simplicity).1779#ifndef NDEBUG1780bool IsAddi = ExtOpc == Hexagon::A2_addi;1781const MachineOperand &RegOp = MI.getOperand(IsAddi ? 1 : 2);1782const MachineOperand &ImmOp = MI.getOperand(IsAddi ? 2 : 1);1783assert(Ex.Rs == RegOp && EV == ImmOp && Ex.Neg != IsAddi &&1784"Initializer mismatch");1785#endif1786BuildMI(MBB, At, dl, HII->get(TargetOpcode::COPY))1787.add(MI.getOperand(0))1788.add(MachineOperand(ExtR));1789Diff = 0;1790MBB.erase(MI);1791return true;1792}1793if (ExtOpc == Hexagon::M2_accii || ExtOpc == Hexagon::M2_naccii ||1794ExtOpc == Hexagon::S4_addaddi || ExtOpc == Hexagon::S4_subaddi) {1795// M2_accii: add(Rt,add(Rs,V)) (tied)1796// M2_naccii: sub(Rt,add(Rs,V))1797// S4_addaddi: add(Rt,add(Rs,V))1798// S4_subaddi: add(Rt,sub(V,Rs))1799// Check that Rs and V match the initializer expression. The Rs+V is the1800// combination that is considered "subexpression" for V, although Rx+V1801// would also be valid.1802#ifndef NDEBUG1803bool IsSub = ExtOpc == Hexagon::S4_subaddi;1804Register Rs = MI.getOperand(IsSub ? 3 : 2);1805ExtValue V = MI.getOperand(IsSub ? 2 : 3);1806assert(EV == V && Rs == Ex.Rs && IsSub == Ex.Neg && "Initializer mismatch");1807#endif1808unsigned NewOpc = ExtOpc == Hexagon::M2_naccii ? Hexagon::A2_sub1809: Hexagon::A2_add;1810BuildMI(MBB, At, dl, HII->get(NewOpc))1811.add(MI.getOperand(0))1812.add(MI.getOperand(1))1813.add(MachineOperand(ExtR));1814MBB.erase(MI);1815return true;1816}18171818if (MI.mayLoadOrStore()) {1819unsigned IdxOpc = getRegOffOpcode(ExtOpc);1820assert(IdxOpc && "Expecting indexed opcode");1821MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(IdxOpc));1822// Construct the new indexed instruction.1823// First, add the def for loads.1824if (MI.mayLoad())1825MIB.add(getLoadResultOp(MI));1826// Handle possible predication.1827if (HII->isPredicated(MI))1828MIB.add(getPredicateOp(MI));1829// Build the address.1830MIB.add(MachineOperand(ExtR));1831MIB.addImm(Diff);1832// Add the stored value for stores.1833if (MI.mayStore())1834MIB.add(getStoredValueOp(MI));1835MIB.cloneMemRefs(MI);1836MBB.erase(MI);1837return true;1838}18391840#ifndef NDEBUG1841dbgs() << '\n' << PrintInit(ExtI, *HRI) << " " << MI;1842#endif1843llvm_unreachable("Unhandled expr replacement");1844return false;1845}18461847bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) {1848if (ReplaceLimit.getNumOccurrences()) {1849if (ReplaceLimit <= ReplaceCounter)1850return false;1851++ReplaceCounter;1852}1853const ExtDesc &ED = Extenders[Idx];1854assert((!ED.IsDef || ED.Rd.Reg != 0) && "Missing Rd for def");1855const ExtValue &DefV = ExtI.first;1856assert(ExtRoot(ExtValue(ED)) == ExtRoot(DefV) && "Extender root mismatch");1857const ExtExpr &DefEx = ExtI.second;18581859ExtValue EV(ED);1860int32_t Diff = EV.Offset - DefV.Offset;1861const MachineInstr &MI = *ED.UseMI;1862LLVM_DEBUG(dbgs() << __func__ << " Idx:" << Idx << " ExtR:"1863<< PrintRegister(ExtR, *HRI) << " Diff:" << Diff << '\n');18641865// These two addressing modes must be converted into indexed forms1866// regardless of what the initializer looks like.1867bool IsAbs = false, IsAbsSet = false;1868if (MI.mayLoadOrStore()) {1869unsigned AM = HII->getAddrMode(MI);1870IsAbs = AM == HexagonII::Absolute;1871IsAbsSet = AM == HexagonII::AbsoluteSet;1872}18731874// If it's a def, remember all operands that need to be updated.1875// If ED is a def, and Diff is not 0, then all uses of the register Rd1876// defined by ED must be in the form (Rd, imm), i.e. the immediate offset1877// must follow the Rd in the operand list.1878std::vector<std::pair<MachineInstr*,unsigned>> RegOps;1879if (ED.IsDef && Diff != 0) {1880for (MachineOperand &Op : MRI->use_operands(ED.Rd.Reg)) {1881MachineInstr &UI = *Op.getParent();1882RegOps.push_back({&UI, getOperandIndex(UI, Op)});1883}1884}18851886// Replace the instruction.1887bool Replaced = false;1888if (Diff == 0 && DefEx.trivial() && !IsAbs && !IsAbsSet)1889Replaced = replaceInstrExact(ED, ExtR);1890else1891Replaced = replaceInstrExpr(ED, ExtI, ExtR, Diff);18921893if (Diff != 0 && Replaced && ED.IsDef) {1894// Update offsets of the def's uses.1895for (std::pair<MachineInstr*,unsigned> P : RegOps) {1896unsigned J = P.second;1897assert(P.first->getNumOperands() > J+1 &&1898P.first->getOperand(J+1).isImm());1899MachineOperand &ImmOp = P.first->getOperand(J+1);1900ImmOp.setImm(ImmOp.getImm() + Diff);1901}1902// If it was an absolute-set instruction, the "set" part has been removed.1903// ExtR will now be the register with the extended value, and since all1904// users of Rd have been updated, all that needs to be done is to replace1905// Rd with ExtR.1906if (IsAbsSet) {1907assert(ED.Rd.Sub == 0 && ExtR.Sub == 0);1908MRI->replaceRegWith(ED.Rd.Reg, ExtR.Reg);1909}1910}19111912return Replaced;1913}19141915bool HCE::replaceExtenders(const AssignmentMap &IMap) {1916LocDefList Defs;1917bool Changed = false;19181919for (const std::pair<const ExtenderInit, IndexList> &P : IMap) {1920const IndexList &Idxs = P.second;1921if (Idxs.size() < CountThreshold)1922continue;19231924Defs.clear();1925calculatePlacement(P.first, Idxs, Defs);1926for (const std::pair<Loc,IndexList> &Q : Defs) {1927Register DefR = insertInitializer(Q.first, P.first);1928NewRegs.push_back(DefR.Reg);1929for (unsigned I : Q.second)1930Changed |= replaceInstr(I, DefR, P.first);1931}1932}1933return Changed;1934}19351936unsigned HCE::getOperandIndex(const MachineInstr &MI,1937const MachineOperand &Op) const {1938for (unsigned i = 0, n = MI.getNumOperands(); i != n; ++i)1939if (&MI.getOperand(i) == &Op)1940return i;1941llvm_unreachable("Not an operand of MI");1942}19431944const MachineOperand &HCE::getPredicateOp(const MachineInstr &MI) const {1945assert(HII->isPredicated(MI));1946for (const MachineOperand &Op : MI.operands()) {1947if (!Op.isReg() || !Op.isUse() ||1948MRI->getRegClass(Op.getReg()) != &Hexagon::PredRegsRegClass)1949continue;1950assert(Op.getSubReg() == 0 && "Predicate register with a subregister");1951return Op;1952}1953llvm_unreachable("Predicate operand not found");1954}19551956const MachineOperand &HCE::getLoadResultOp(const MachineInstr &MI) const {1957assert(MI.mayLoad());1958return MI.getOperand(0);1959}19601961const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const {1962assert(MI.mayStore());1963return MI.getOperand(MI.getNumExplicitOperands()-1);1964}19651966bool HCE::runOnMachineFunction(MachineFunction &MF) {1967if (skipFunction(MF.getFunction()))1968return false;1969if (MF.getFunction().hasPersonalityFn()) {1970LLVM_DEBUG(dbgs() << getPassName() << ": skipping " << MF.getName()1971<< " due to exception handling\n");1972return false;1973}1974LLVM_DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr));19751976HST = &MF.getSubtarget<HexagonSubtarget>();1977HII = HST->getInstrInfo();1978HRI = HST->getRegisterInfo();1979MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();1980MRI = &MF.getRegInfo();1981AssignmentMap IMap;19821983collect(MF);1984llvm::sort(Extenders, [this](const ExtDesc &A, const ExtDesc &B) {1985ExtValue VA(A), VB(B);1986if (VA != VB)1987return VA < VB;1988const MachineInstr *MA = A.UseMI;1989const MachineInstr *MB = B.UseMI;1990if (MA == MB) {1991// If it's the same instruction, compare operand numbers.1992return A.OpNum < B.OpNum;1993}19941995const MachineBasicBlock *BA = MA->getParent();1996const MachineBasicBlock *BB = MB->getParent();1997assert(BA->getNumber() != -1 && BB->getNumber() != -1);1998if (BA != BB)1999return BA->getNumber() < BB->getNumber();2000return MDT->dominates(MA, MB);2001});20022003bool Changed = false;2004LLVM_DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n");2005for (unsigned I = 0, E = Extenders.size(); I != E; ) {2006unsigned B = I;2007const ExtRoot &T = Extenders[B].getOp();2008while (I != E && ExtRoot(Extenders[I].getOp()) == T)2009++I;20102011IMap.clear();2012assignInits(T, B, I, IMap);2013Changed |= replaceExtenders(IMap);2014}20152016LLVM_DEBUG({2017if (Changed)2018MF.print(dbgs() << "After " << getPassName() << '\n', nullptr);2019else2020dbgs() << "No changes\n";2021});2022return Changed;2023}20242025FunctionPass *llvm::createHexagonConstExtenders() {2026return new HexagonConstExtenders();2027}202820292030