Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
35269 views
//===- ConstantHoisting.cpp - Prepare code for expensive constants --------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass identifies expensive constants to hoist and coalesces them to9// better prepare it for SelectionDAG-based code generation. This works around10// the limitations of the basic-block-at-a-time approach.11//12// First it scans all instructions for integer constants and calculates its13// cost. If the constant can be folded into the instruction (the cost is14// TCC_Free) or the cost is just a simple operation (TCC_BASIC), then we don't15// consider it expensive and leave it alone. This is the default behavior and16// the default implementation of getIntImmCostInst will always return TCC_Free.17//18// If the cost is more than TCC_BASIC, then the integer constant can't be folded19// into the instruction and it might be beneficial to hoist the constant.20// Similar constants are coalesced to reduce register pressure and21// materialization code.22//23// When a constant is hoisted, it is also hidden behind a bitcast to force it to24// be live-out of the basic block. Otherwise the constant would be just25// duplicated and each basic block would have its own copy in the SelectionDAG.26// The SelectionDAG recognizes such constants as opaque and doesn't perform27// certain transformations on them, which would create a new expensive constant.28//29// This optimization is only applied to integer constants in instructions and30// simple (this means not nested) constant cast expressions. For example:31// %0 = load i64* inttoptr (i64 big_constant to i64*)32//===----------------------------------------------------------------------===//3334#include "llvm/Transforms/Scalar/ConstantHoisting.h"35#include "llvm/ADT/APInt.h"36#include "llvm/ADT/DenseMap.h"37#include "llvm/ADT/SmallPtrSet.h"38#include "llvm/ADT/SmallVector.h"39#include "llvm/ADT/Statistic.h"40#include "llvm/Analysis/BlockFrequencyInfo.h"41#include "llvm/Analysis/ProfileSummaryInfo.h"42#include "llvm/Analysis/TargetTransformInfo.h"43#include "llvm/IR/BasicBlock.h"44#include "llvm/IR/Constants.h"45#include "llvm/IR/DataLayout.h"46#include "llvm/IR/DebugInfoMetadata.h"47#include "llvm/IR/Dominators.h"48#include "llvm/IR/Function.h"49#include "llvm/IR/InstrTypes.h"50#include "llvm/IR/Instruction.h"51#include "llvm/IR/Instructions.h"52#include "llvm/IR/IntrinsicInst.h"53#include "llvm/IR/Operator.h"54#include "llvm/IR/Value.h"55#include "llvm/InitializePasses.h"56#include "llvm/Pass.h"57#include "llvm/Support/BlockFrequency.h"58#include "llvm/Support/Casting.h"59#include "llvm/Support/CommandLine.h"60#include "llvm/Support/Debug.h"61#include "llvm/Support/raw_ostream.h"62#include "llvm/Transforms/Scalar.h"63#include "llvm/Transforms/Utils/Local.h"64#include "llvm/Transforms/Utils/SizeOpts.h"65#include <algorithm>66#include <cassert>67#include <cstdint>68#include <iterator>69#include <tuple>70#include <utility>7172using namespace llvm;73using namespace consthoist;7475#define DEBUG_TYPE "consthoist"7677STATISTIC(NumConstantsHoisted, "Number of constants hoisted");78STATISTIC(NumConstantsRebased, "Number of constants rebased");7980static cl::opt<bool> ConstHoistWithBlockFrequency(81"consthoist-with-block-frequency", cl::init(true), cl::Hidden,82cl::desc("Enable the use of the block frequency analysis to reduce the "83"chance to execute const materialization more frequently than "84"without hoisting."));8586static cl::opt<bool> ConstHoistGEP(87"consthoist-gep", cl::init(false), cl::Hidden,88cl::desc("Try hoisting constant gep expressions"));8990static cl::opt<unsigned>91MinNumOfDependentToRebase("consthoist-min-num-to-rebase",92cl::desc("Do not rebase if number of dependent constants of a Base is less "93"than this number."),94cl::init(0), cl::Hidden);9596namespace {9798/// The constant hoisting pass.99class ConstantHoistingLegacyPass : public FunctionPass {100public:101static char ID; // Pass identification, replacement for typeid102103ConstantHoistingLegacyPass() : FunctionPass(ID) {104initializeConstantHoistingLegacyPassPass(*PassRegistry::getPassRegistry());105}106107bool runOnFunction(Function &Fn) override;108109StringRef getPassName() const override { return "Constant Hoisting"; }110111void getAnalysisUsage(AnalysisUsage &AU) const override {112AU.setPreservesCFG();113if (ConstHoistWithBlockFrequency)114AU.addRequired<BlockFrequencyInfoWrapperPass>();115AU.addRequired<DominatorTreeWrapperPass>();116AU.addRequired<ProfileSummaryInfoWrapperPass>();117AU.addRequired<TargetTransformInfoWrapperPass>();118}119120private:121ConstantHoistingPass Impl;122};123124} // end anonymous namespace125126char ConstantHoistingLegacyPass::ID = 0;127128INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",129"Constant Hoisting", false, false)130INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)131INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)132INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)133INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)134INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",135"Constant Hoisting", false, false)136137FunctionPass *llvm::createConstantHoistingPass() {138return new ConstantHoistingLegacyPass();139}140141/// Perform the constant hoisting optimization for the given function.142bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {143if (skipFunction(Fn))144return false;145146LLVM_DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");147LLVM_DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');148149bool MadeChange =150Impl.runImpl(Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),151getAnalysis<DominatorTreeWrapperPass>().getDomTree(),152ConstHoistWithBlockFrequency153? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()154: nullptr,155Fn.getEntryBlock(),156&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());157158LLVM_DEBUG(dbgs() << "********** End Constant Hoisting **********\n");159160return MadeChange;161}162163void ConstantHoistingPass::collectMatInsertPts(164const RebasedConstantListType &RebasedConstants,165SmallVectorImpl<BasicBlock::iterator> &MatInsertPts) const {166for (const RebasedConstantInfo &RCI : RebasedConstants)167for (const ConstantUser &U : RCI.Uses)168MatInsertPts.emplace_back(findMatInsertPt(U.Inst, U.OpndIdx));169}170171/// Find the constant materialization insertion point.172BasicBlock::iterator ConstantHoistingPass::findMatInsertPt(Instruction *Inst,173unsigned Idx) const {174// If the operand is a cast instruction, then we have to materialize the175// constant before the cast instruction.176if (Idx != ~0U) {177Value *Opnd = Inst->getOperand(Idx);178if (auto CastInst = dyn_cast<Instruction>(Opnd))179if (CastInst->isCast())180return CastInst->getIterator();181}182183// The simple and common case. This also includes constant expressions.184if (!isa<PHINode>(Inst) && !Inst->isEHPad())185return Inst->getIterator();186187// We can't insert directly before a phi node or an eh pad. Insert before188// the terminator of the incoming or dominating block.189assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!");190BasicBlock *InsertionBlock = nullptr;191if (Idx != ~0U && isa<PHINode>(Inst)) {192InsertionBlock = cast<PHINode>(Inst)->getIncomingBlock(Idx);193if (!InsertionBlock->isEHPad()) {194return InsertionBlock->getTerminator()->getIterator();195}196} else {197InsertionBlock = Inst->getParent();198}199200// This must be an EH pad. Iterate over immediate dominators until we find a201// non-EH pad. We need to skip over catchswitch blocks, which are both EH pads202// and terminators.203auto *IDom = DT->getNode(InsertionBlock)->getIDom();204while (IDom->getBlock()->isEHPad()) {205assert(Entry != IDom->getBlock() && "eh pad in entry block");206IDom = IDom->getIDom();207}208209return IDom->getBlock()->getTerminator()->getIterator();210}211212/// Given \p BBs as input, find another set of BBs which collectively213/// dominates \p BBs and have the minimal sum of frequencies. Return the BB214/// set found in \p BBs.215static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,216BasicBlock *Entry,217SetVector<BasicBlock *> &BBs) {218assert(!BBs.count(Entry) && "Assume Entry is not in BBs");219// Nodes on the current path to the root.220SmallPtrSet<BasicBlock *, 8> Path;221// Candidates includes any block 'BB' in set 'BBs' that is not strictly222// dominated by any other blocks in set 'BBs', and all nodes in the path223// in the dominator tree from Entry to 'BB'.224SmallPtrSet<BasicBlock *, 16> Candidates;225for (auto *BB : BBs) {226// Ignore unreachable basic blocks.227if (!DT.isReachableFromEntry(BB))228continue;229Path.clear();230// Walk up the dominator tree until Entry or another BB in BBs231// is reached. Insert the nodes on the way to the Path.232BasicBlock *Node = BB;233// The "Path" is a candidate path to be added into Candidates set.234bool isCandidate = false;235do {236Path.insert(Node);237if (Node == Entry || Candidates.count(Node)) {238isCandidate = true;239break;240}241assert(DT.getNode(Node)->getIDom() &&242"Entry doens't dominate current Node");243Node = DT.getNode(Node)->getIDom()->getBlock();244} while (!BBs.count(Node));245246// If isCandidate is false, Node is another Block in BBs dominating247// current 'BB'. Drop the nodes on the Path.248if (!isCandidate)249continue;250251// Add nodes on the Path into Candidates.252Candidates.insert(Path.begin(), Path.end());253}254255// Sort the nodes in Candidates in top-down order and save the nodes256// in Orders.257unsigned Idx = 0;258SmallVector<BasicBlock *, 16> Orders;259Orders.push_back(Entry);260while (Idx != Orders.size()) {261BasicBlock *Node = Orders[Idx++];262for (auto *ChildDomNode : DT.getNode(Node)->children()) {263if (Candidates.count(ChildDomNode->getBlock()))264Orders.push_back(ChildDomNode->getBlock());265}266}267268// Visit Orders in bottom-up order.269using InsertPtsCostPair =270std::pair<SetVector<BasicBlock *>, BlockFrequency>;271272// InsertPtsMap is a map from a BB to the best insertion points for the273// subtree of BB (subtree not including the BB itself).274DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap;275InsertPtsMap.reserve(Orders.size() + 1);276for (BasicBlock *Node : llvm::reverse(Orders)) {277bool NodeInBBs = BBs.count(Node);278auto &InsertPts = InsertPtsMap[Node].first;279BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;280281// Return the optimal insert points in BBs.282if (Node == Entry) {283BBs.clear();284if (InsertPtsFreq > BFI.getBlockFreq(Node) ||285(InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1))286BBs.insert(Entry);287else288BBs.insert(InsertPts.begin(), InsertPts.end());289break;290}291292BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock();293// Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child294// will update its parent's ParentInsertPts and ParentPtsFreq.295auto &ParentInsertPts = InsertPtsMap[Parent].first;296BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;297// Choose to insert in Node or in subtree of Node.298// Don't hoist to EHPad because we may not find a proper place to insert299// in EHPad.300// If the total frequency of InsertPts is the same as the frequency of the301// target Node, and InsertPts contains more than one nodes, choose hoisting302// to reduce code size.303if (NodeInBBs ||304(!Node->isEHPad() &&305(InsertPtsFreq > BFI.getBlockFreq(Node) ||306(InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) {307ParentInsertPts.insert(Node);308ParentPtsFreq += BFI.getBlockFreq(Node);309} else {310ParentInsertPts.insert(InsertPts.begin(), InsertPts.end());311ParentPtsFreq += InsertPtsFreq;312}313}314}315316/// Find an insertion point that dominates all uses.317SetVector<BasicBlock::iterator>318ConstantHoistingPass::findConstantInsertionPoint(319const ConstantInfo &ConstInfo,320const ArrayRef<BasicBlock::iterator> MatInsertPts) const {321assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");322// Collect all basic blocks.323SetVector<BasicBlock *> BBs;324SetVector<BasicBlock::iterator> InsertPts;325326for (BasicBlock::iterator MatInsertPt : MatInsertPts)327BBs.insert(MatInsertPt->getParent());328329if (BBs.count(Entry)) {330InsertPts.insert(Entry->begin());331return InsertPts;332}333334if (BFI) {335findBestInsertionSet(*DT, *BFI, Entry, BBs);336for (BasicBlock *BB : BBs)337InsertPts.insert(BB->getFirstInsertionPt());338return InsertPts;339}340341while (BBs.size() >= 2) {342BasicBlock *BB, *BB1, *BB2;343BB1 = BBs.pop_back_val();344BB2 = BBs.pop_back_val();345BB = DT->findNearestCommonDominator(BB1, BB2);346if (BB == Entry) {347InsertPts.insert(Entry->begin());348return InsertPts;349}350BBs.insert(BB);351}352assert((BBs.size() == 1) && "Expected only one element.");353Instruction &FirstInst = (*BBs.begin())->front();354InsertPts.insert(findMatInsertPt(&FirstInst));355return InsertPts;356}357358/// Record constant integer ConstInt for instruction Inst at operand359/// index Idx.360///361/// The operand at index Idx is not necessarily the constant integer itself. It362/// could also be a cast instruction or a constant expression that uses the363/// constant integer.364void ConstantHoistingPass::collectConstantCandidates(365ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,366ConstantInt *ConstInt) {367if (ConstInt->getType()->isVectorTy())368return;369370InstructionCost Cost;371// Ask the target about the cost of materializing the constant for the given372// instruction and operand index.373if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))374Cost = TTI->getIntImmCostIntrin(IntrInst->getIntrinsicID(), Idx,375ConstInt->getValue(), ConstInt->getType(),376TargetTransformInfo::TCK_SizeAndLatency);377else378Cost = TTI->getIntImmCostInst(379Inst->getOpcode(), Idx, ConstInt->getValue(), ConstInt->getType(),380TargetTransformInfo::TCK_SizeAndLatency, Inst);381382// Ignore cheap integer constants.383if (Cost > TargetTransformInfo::TCC_Basic) {384ConstCandMapType::iterator Itr;385bool Inserted;386ConstPtrUnionType Cand = ConstInt;387std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0));388if (Inserted) {389ConstIntCandVec.push_back(ConstantCandidate(ConstInt));390Itr->second = ConstIntCandVec.size() - 1;391}392ConstIntCandVec[Itr->second].addUser(Inst, Idx, *Cost.getValue());393LLVM_DEBUG(if (isa<ConstantInt>(Inst->getOperand(Idx))) dbgs()394<< "Collect constant " << *ConstInt << " from " << *Inst395<< " with cost " << Cost << '\n';396else dbgs() << "Collect constant " << *ConstInt397<< " indirectly from " << *Inst << " via "398<< *Inst->getOperand(Idx) << " with cost " << Cost399<< '\n';);400}401}402403/// Record constant GEP expression for instruction Inst at operand index Idx.404void ConstantHoistingPass::collectConstantCandidates(405ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,406ConstantExpr *ConstExpr) {407// TODO: Handle vector GEPs408if (ConstExpr->getType()->isVectorTy())409return;410411GlobalVariable *BaseGV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));412if (!BaseGV)413return;414415// Get offset from the base GV.416PointerType *GVPtrTy = cast<PointerType>(BaseGV->getType());417IntegerType *OffsetTy = DL->getIndexType(*Ctx, GVPtrTy->getAddressSpace());418APInt Offset(DL->getTypeSizeInBits(OffsetTy), /*val*/ 0, /*isSigned*/ true);419auto *GEPO = cast<GEPOperator>(ConstExpr);420421// TODO: If we have a mix of inbounds and non-inbounds GEPs, then basing a422// non-inbounds GEP on an inbounds GEP is potentially incorrect. Restrict to423// inbounds GEP for now -- alternatively, we could drop inbounds from the424// constant expression,425if (!GEPO->isInBounds())426return;427428if (!GEPO->accumulateConstantOffset(*DL, Offset))429return;430431if (!Offset.isIntN(32))432return;433434// A constant GEP expression that has a GlobalVariable as base pointer is435// usually lowered to a load from constant pool. Such operation is unlikely436// to be cheaper than compute it by <Base + Offset>, which can be lowered to437// an ADD instruction or folded into Load/Store instruction.438InstructionCost Cost =439TTI->getIntImmCostInst(Instruction::Add, 1, Offset, OffsetTy,440TargetTransformInfo::TCK_SizeAndLatency, Inst);441ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];442ConstCandMapType::iterator Itr;443bool Inserted;444ConstPtrUnionType Cand = ConstExpr;445std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(Cand, 0));446if (Inserted) {447ExprCandVec.push_back(ConstantCandidate(448ConstantInt::get(Type::getInt32Ty(*Ctx), Offset.getLimitedValue()),449ConstExpr));450Itr->second = ExprCandVec.size() - 1;451}452ExprCandVec[Itr->second].addUser(Inst, Idx, *Cost.getValue());453}454455/// Check the operand for instruction Inst at index Idx.456void ConstantHoistingPass::collectConstantCandidates(457ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx) {458Value *Opnd = Inst->getOperand(Idx);459460// Visit constant integers.461if (auto ConstInt = dyn_cast<ConstantInt>(Opnd)) {462collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);463return;464}465466// Visit cast instructions that have constant integers.467if (auto CastInst = dyn_cast<Instruction>(Opnd)) {468// Only visit cast instructions, which have been skipped. All other469// instructions should have already been visited.470if (!CastInst->isCast())471return;472473if (auto *ConstInt = dyn_cast<ConstantInt>(CastInst->getOperand(0))) {474// Pretend the constant is directly used by the instruction and ignore475// the cast instruction.476collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);477return;478}479}480481// Visit constant expressions that have constant integers.482if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {483// Handle constant gep expressions.484if (ConstHoistGEP && isa<GEPOperator>(ConstExpr))485collectConstantCandidates(ConstCandMap, Inst, Idx, ConstExpr);486487// Only visit constant cast expressions.488if (!ConstExpr->isCast())489return;490491if (auto ConstInt = dyn_cast<ConstantInt>(ConstExpr->getOperand(0))) {492// Pretend the constant is directly used by the instruction and ignore493// the constant expression.494collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);495return;496}497}498}499500/// Scan the instruction for expensive integer constants and record them501/// in the constant candidate vector.502void ConstantHoistingPass::collectConstantCandidates(503ConstCandMapType &ConstCandMap, Instruction *Inst) {504// Skip all cast instructions. They are visited indirectly later on.505if (Inst->isCast())506return;507508// Scan all operands.509for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {510// The cost of materializing the constants (defined in511// `TargetTransformInfo::getIntImmCostInst`) for instructions which only512// take constant variables is lower than `TargetTransformInfo::TCC_Basic`.513// So it's safe for us to collect constant candidates from all514// IntrinsicInsts.515if (canReplaceOperandWithVariable(Inst, Idx)) {516collectConstantCandidates(ConstCandMap, Inst, Idx);517}518} // end of for all operands519}520521/// Collect all integer constants in the function that cannot be folded522/// into an instruction itself.523void ConstantHoistingPass::collectConstantCandidates(Function &Fn) {524ConstCandMapType ConstCandMap;525for (BasicBlock &BB : Fn) {526// Ignore unreachable basic blocks.527if (!DT->isReachableFromEntry(&BB))528continue;529for (Instruction &Inst : BB)530if (!TTI->preferToKeepConstantsAttached(Inst, Fn))531collectConstantCandidates(ConstCandMap, &Inst);532}533}534535// This helper function is necessary to deal with values that have different536// bit widths (APInt Operator- does not like that). If the value cannot be537// represented in uint64 we return an "empty" APInt. This is then interpreted538// as the value is not in range.539static std::optional<APInt> calculateOffsetDiff(const APInt &V1,540const APInt &V2) {541std::optional<APInt> Res;542unsigned BW = V1.getBitWidth() > V2.getBitWidth() ?543V1.getBitWidth() : V2.getBitWidth();544uint64_t LimVal1 = V1.getLimitedValue();545uint64_t LimVal2 = V2.getLimitedValue();546547if (LimVal1 == ~0ULL || LimVal2 == ~0ULL)548return Res;549550uint64_t Diff = LimVal1 - LimVal2;551return APInt(BW, Diff, true);552}553554// From a list of constants, one needs to picked as the base and the other555// constants will be transformed into an offset from that base constant. The556// question is which we can pick best? For example, consider these constants557// and their number of uses:558//559// Constants| 2 | 4 | 12 | 42 |560// NumUses | 3 | 2 | 8 | 7 |561//562// Selecting constant 12 because it has the most uses will generate negative563// offsets for constants 2 and 4 (i.e. -10 and -8 respectively). If negative564// offsets lead to less optimal code generation, then there might be better565// solutions. Suppose immediates in the range of 0..35 are most optimally566// supported by the architecture, then selecting constant 2 is most optimal567// because this will generate offsets: 0, 2, 10, 40. Offsets 0, 2 and 10 are in568// range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would569// have only 8 uses in range, so choosing 2 as a base is more optimal. Thus, in570// selecting the base constant the range of the offsets is a very important571// factor too that we take into account here. This algorithm calculates a total572// costs for selecting a constant as the base and substract the costs if573// immediates are out of range. It has quadratic complexity, so we call this574// function only when we're optimising for size and there are less than 100575// constants, we fall back to the straightforward algorithm otherwise576// which does not do all the offset calculations.577unsigned578ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,579ConstCandVecType::iterator E,580ConstCandVecType::iterator &MaxCostItr) {581unsigned NumUses = 0;582583if (!OptForSize || std::distance(S,E) > 100) {584for (auto ConstCand = S; ConstCand != E; ++ConstCand) {585NumUses += ConstCand->Uses.size();586if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)587MaxCostItr = ConstCand;588}589return NumUses;590}591592LLVM_DEBUG(dbgs() << "== Maximize constants in range ==\n");593InstructionCost MaxCost = -1;594for (auto ConstCand = S; ConstCand != E; ++ConstCand) {595auto Value = ConstCand->ConstInt->getValue();596Type *Ty = ConstCand->ConstInt->getType();597InstructionCost Cost = 0;598NumUses += ConstCand->Uses.size();599LLVM_DEBUG(dbgs() << "= Constant: " << ConstCand->ConstInt->getValue()600<< "\n");601602for (auto User : ConstCand->Uses) {603unsigned Opcode = User.Inst->getOpcode();604unsigned OpndIdx = User.OpndIdx;605Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty,606TargetTransformInfo::TCK_SizeAndLatency);607LLVM_DEBUG(dbgs() << "Cost: " << Cost << "\n");608609for (auto C2 = S; C2 != E; ++C2) {610std::optional<APInt> Diff = calculateOffsetDiff(611C2->ConstInt->getValue(), ConstCand->ConstInt->getValue());612if (Diff) {613const InstructionCost ImmCosts =614TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, *Diff, Ty);615Cost -= ImmCosts;616LLVM_DEBUG(dbgs() << "Offset " << *Diff << " "617<< "has penalty: " << ImmCosts << "\n"618<< "Adjusted cost: " << Cost << "\n");619}620}621}622LLVM_DEBUG(dbgs() << "Cumulative cost: " << Cost << "\n");623if (Cost > MaxCost) {624MaxCost = Cost;625MaxCostItr = ConstCand;626LLVM_DEBUG(dbgs() << "New candidate: " << MaxCostItr->ConstInt->getValue()627<< "\n");628}629}630return NumUses;631}632633/// Find the base constant within the given range and rebase all other634/// constants with respect to the base constant.635void ConstantHoistingPass::findAndMakeBaseConstant(636ConstCandVecType::iterator S, ConstCandVecType::iterator E,637SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec) {638auto MaxCostItr = S;639unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);640641// Don't hoist constants that have only one use.642if (NumUses <= 1)643return;644645ConstantInt *ConstInt = MaxCostItr->ConstInt;646ConstantExpr *ConstExpr = MaxCostItr->ConstExpr;647ConstantInfo ConstInfo;648ConstInfo.BaseInt = ConstInt;649ConstInfo.BaseExpr = ConstExpr;650Type *Ty = ConstInt->getType();651652// Rebase the constants with respect to the base constant.653for (auto ConstCand = S; ConstCand != E; ++ConstCand) {654APInt Diff = ConstCand->ConstInt->getValue() - ConstInt->getValue();655Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff);656Type *ConstTy =657ConstCand->ConstExpr ? ConstCand->ConstExpr->getType() : nullptr;658ConstInfo.RebasedConstants.push_back(659RebasedConstantInfo(std::move(ConstCand->Uses), Offset, ConstTy));660}661ConstInfoVec.push_back(std::move(ConstInfo));662}663664/// Finds and combines constant candidates that can be easily665/// rematerialized with an add from a common base constant.666void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) {667// If BaseGV is nullptr, find base among candidate constant integers;668// Otherwise find base among constant GEPs that share the same BaseGV.669ConstCandVecType &ConstCandVec = BaseGV ?670ConstGEPCandMap[BaseGV] : ConstIntCandVec;671ConstInfoVecType &ConstInfoVec = BaseGV ?672ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;673674// Sort the constants by value and type. This invalidates the mapping!675llvm::stable_sort(ConstCandVec, [](const ConstantCandidate &LHS,676const ConstantCandidate &RHS) {677if (LHS.ConstInt->getType() != RHS.ConstInt->getType())678return LHS.ConstInt->getBitWidth() < RHS.ConstInt->getBitWidth();679return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue());680});681682// Simple linear scan through the sorted constant candidate vector for viable683// merge candidates.684auto MinValItr = ConstCandVec.begin();685for (auto CC = std::next(ConstCandVec.begin()), E = ConstCandVec.end();686CC != E; ++CC) {687if (MinValItr->ConstInt->getType() == CC->ConstInt->getType()) {688Type *MemUseValTy = nullptr;689for (auto &U : CC->Uses) {690auto *UI = U.Inst;691if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {692MemUseValTy = LI->getType();693break;694} else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {695// Make sure the constant is used as pointer operand of the StoreInst.696if (SI->getPointerOperand() == SI->getOperand(U.OpndIdx)) {697MemUseValTy = SI->getValueOperand()->getType();698break;699}700}701}702703// Check if the constant is in range of an add with immediate.704APInt Diff = CC->ConstInt->getValue() - MinValItr->ConstInt->getValue();705if ((Diff.getBitWidth() <= 64) &&706TTI->isLegalAddImmediate(Diff.getSExtValue()) &&707// Check if Diff can be used as offset in addressing mode of the user708// memory instruction.709(!MemUseValTy || TTI->isLegalAddressingMode(MemUseValTy,710/*BaseGV*/nullptr, /*BaseOffset*/Diff.getSExtValue(),711/*HasBaseReg*/true, /*Scale*/0)))712continue;713}714// We either have now a different constant type or the constant is not in715// range of an add with immediate anymore.716findAndMakeBaseConstant(MinValItr, CC, ConstInfoVec);717// Start a new base constant search.718MinValItr = CC;719}720// Finalize the last base constant search.721findAndMakeBaseConstant(MinValItr, ConstCandVec.end(), ConstInfoVec);722}723724/// Updates the operand at Idx in instruction Inst with the result of725/// instruction Mat. If the instruction is a PHI node then special726/// handling for duplicate values from the same incoming basic block is727/// required.728/// \return The update will always succeed, but the return value indicated if729/// Mat was used for the update or not.730static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat) {731if (auto PHI = dyn_cast<PHINode>(Inst)) {732// Check if any previous operand of the PHI node has the same incoming basic733// block. This is a very odd case that happens when the incoming basic block734// has a switch statement. In this case use the same value as the previous735// operand(s), otherwise we will fail verification due to different values.736// The values are actually the same, but the variable names are different737// and the verifier doesn't like that.738BasicBlock *IncomingBB = PHI->getIncomingBlock(Idx);739for (unsigned i = 0; i < Idx; ++i) {740if (PHI->getIncomingBlock(i) == IncomingBB) {741Value *IncomingVal = PHI->getIncomingValue(i);742Inst->setOperand(Idx, IncomingVal);743return false;744}745}746}747748Inst->setOperand(Idx, Mat);749return true;750}751752/// Emit materialization code for all rebased constants and update their753/// users.754void ConstantHoistingPass::emitBaseConstants(Instruction *Base,755UserAdjustment *Adj) {756Instruction *Mat = Base;757758// The same offset can be dereferenced to different types in nested struct.759if (!Adj->Offset && Adj->Ty && Adj->Ty != Base->getType())760Adj->Offset = ConstantInt::get(Type::getInt32Ty(*Ctx), 0);761762if (Adj->Offset) {763if (Adj->Ty) {764// Constant being rebased is a ConstantExpr.765Mat = GetElementPtrInst::Create(Type::getInt8Ty(*Ctx), Base, Adj->Offset,766"mat_gep", Adj->MatInsertPt);767// Hide it behind a bitcast.768Mat = new BitCastInst(Mat, Adj->Ty, "mat_bitcast",769Adj->MatInsertPt->getIterator());770} else771// Constant being rebased is a ConstantInt.772Mat =773BinaryOperator::Create(Instruction::Add, Base, Adj->Offset,774"const_mat", Adj->MatInsertPt->getIterator());775776LLVM_DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0)777<< " + " << *Adj->Offset << ") in BB "778<< Mat->getParent()->getName() << '\n'779<< *Mat << '\n');780Mat->setDebugLoc(Adj->User.Inst->getDebugLoc());781}782Value *Opnd = Adj->User.Inst->getOperand(Adj->User.OpndIdx);783784// Visit constant integer.785if (isa<ConstantInt>(Opnd)) {786LLVM_DEBUG(dbgs() << "Update: " << *Adj->User.Inst << '\n');787if (!updateOperand(Adj->User.Inst, Adj->User.OpndIdx, Mat) && Adj->Offset)788Mat->eraseFromParent();789LLVM_DEBUG(dbgs() << "To : " << *Adj->User.Inst << '\n');790return;791}792793// Visit cast instruction.794if (auto CastInst = dyn_cast<Instruction>(Opnd)) {795assert(CastInst->isCast() && "Expected an cast instruction!");796// Check if we already have visited this cast instruction before to avoid797// unnecessary cloning.798Instruction *&ClonedCastInst = ClonedCastMap[CastInst];799if (!ClonedCastInst) {800ClonedCastInst = CastInst->clone();801ClonedCastInst->setOperand(0, Mat);802ClonedCastInst->insertAfter(CastInst);803// Use the same debug location as the original cast instruction.804ClonedCastInst->setDebugLoc(CastInst->getDebugLoc());805LLVM_DEBUG(dbgs() << "Clone instruction: " << *CastInst << '\n'806<< "To : " << *ClonedCastInst << '\n');807}808809LLVM_DEBUG(dbgs() << "Update: " << *Adj->User.Inst << '\n');810updateOperand(Adj->User.Inst, Adj->User.OpndIdx, ClonedCastInst);811LLVM_DEBUG(dbgs() << "To : " << *Adj->User.Inst << '\n');812return;813}814815// Visit constant expression.816if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {817if (isa<GEPOperator>(ConstExpr)) {818// Operand is a ConstantGEP, replace it.819updateOperand(Adj->User.Inst, Adj->User.OpndIdx, Mat);820return;821}822823// Aside from constant GEPs, only constant cast expressions are collected.824assert(ConstExpr->isCast() && "ConstExpr should be a cast");825Instruction *ConstExprInst = ConstExpr->getAsInstruction();826ConstExprInst->insertBefore(Adj->MatInsertPt);827ConstExprInst->setOperand(0, Mat);828829// Use the same debug location as the instruction we are about to update.830ConstExprInst->setDebugLoc(Adj->User.Inst->getDebugLoc());831832LLVM_DEBUG(dbgs() << "Create instruction: " << *ConstExprInst << '\n'833<< "From : " << *ConstExpr << '\n');834LLVM_DEBUG(dbgs() << "Update: " << *Adj->User.Inst << '\n');835if (!updateOperand(Adj->User.Inst, Adj->User.OpndIdx, ConstExprInst)) {836ConstExprInst->eraseFromParent();837if (Adj->Offset)838Mat->eraseFromParent();839}840LLVM_DEBUG(dbgs() << "To : " << *Adj->User.Inst << '\n');841return;842}843}844845/// Hoist and hide the base constant behind a bitcast and emit846/// materialization code for derived constants.847bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {848bool MadeChange = false;849SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec =850BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;851for (const consthoist::ConstantInfo &ConstInfo : ConstInfoVec) {852SmallVector<BasicBlock::iterator, 4> MatInsertPts;853collectMatInsertPts(ConstInfo.RebasedConstants, MatInsertPts);854SetVector<BasicBlock::iterator> IPSet =855findConstantInsertionPoint(ConstInfo, MatInsertPts);856// We can have an empty set if the function contains unreachable blocks.857if (IPSet.empty())858continue;859860unsigned UsesNum = 0;861unsigned ReBasesNum = 0;862unsigned NotRebasedNum = 0;863for (const BasicBlock::iterator &IP : IPSet) {864// First, collect constants depending on this IP of the base.865UsesNum = 0;866SmallVector<UserAdjustment, 4> ToBeRebased;867unsigned MatCtr = 0;868for (auto const &RCI : ConstInfo.RebasedConstants) {869UsesNum += RCI.Uses.size();870for (auto const &U : RCI.Uses) {871const BasicBlock::iterator &MatInsertPt = MatInsertPts[MatCtr++];872BasicBlock *OrigMatInsertBB = MatInsertPt->getParent();873// If Base constant is to be inserted in multiple places,874// generate rebase for U using the Base dominating U.875if (IPSet.size() == 1 ||876DT->dominates(IP->getParent(), OrigMatInsertBB))877ToBeRebased.emplace_back(RCI.Offset, RCI.Ty, MatInsertPt, U);878}879}880881// If only few constants depend on this IP of base, skip rebasing,882// assuming the base and the rebased have the same materialization cost.883if (ToBeRebased.size() < MinNumOfDependentToRebase) {884NotRebasedNum += ToBeRebased.size();885continue;886}887888// Emit an instance of the base at this IP.889Instruction *Base = nullptr;890// Hoist and hide the base constant behind a bitcast.891if (ConstInfo.BaseExpr) {892assert(BaseGV && "A base constant expression must have an base GV");893Type *Ty = ConstInfo.BaseExpr->getType();894Base = new BitCastInst(ConstInfo.BaseExpr, Ty, "const", IP);895} else {896IntegerType *Ty = ConstInfo.BaseInt->getIntegerType();897Base = new BitCastInst(ConstInfo.BaseInt, Ty, "const", IP);898}899900Base->setDebugLoc(IP->getDebugLoc());901902LLVM_DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseInt903<< ") to BB " << IP->getParent()->getName() << '\n'904<< *Base << '\n');905906// Emit materialization code for rebased constants depending on this IP.907for (UserAdjustment &R : ToBeRebased) {908emitBaseConstants(Base, &R);909ReBasesNum++;910// Use the same debug location as the last user of the constant.911Base->setDebugLoc(DILocation::getMergedLocation(912Base->getDebugLoc(), R.User.Inst->getDebugLoc()));913}914assert(!Base->use_empty() && "The use list is empty!?");915assert(isa<Instruction>(Base->user_back()) &&916"All uses should be instructions.");917}918(void)UsesNum;919(void)ReBasesNum;920(void)NotRebasedNum;921// Expect all uses are rebased after rebase is done.922assert(UsesNum == (ReBasesNum + NotRebasedNum) &&923"Not all uses are rebased");924925NumConstantsHoisted++;926927// Base constant is also included in ConstInfo.RebasedConstants, so928// deduct 1 from ConstInfo.RebasedConstants.size().929NumConstantsRebased += ConstInfo.RebasedConstants.size() - 1;930931MadeChange = true;932}933return MadeChange;934}935936/// Check all cast instructions we made a copy of and remove them if they937/// have no more users.938void ConstantHoistingPass::deleteDeadCastInst() const {939for (auto const &I : ClonedCastMap)940if (I.first->use_empty())941I.first->eraseFromParent();942}943944/// Optimize expensive integer constants in the given function.945bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,946DominatorTree &DT, BlockFrequencyInfo *BFI,947BasicBlock &Entry, ProfileSummaryInfo *PSI) {948this->TTI = &TTI;949this->DT = &DT;950this->BFI = BFI;951this->DL = &Fn.getDataLayout();952this->Ctx = &Fn.getContext();953this->Entry = &Entry;954this->PSI = PSI;955this->OptForSize = Entry.getParent()->hasOptSize() ||956llvm::shouldOptimizeForSize(Entry.getParent(), PSI, BFI,957PGSOQueryType::IRPass);958959// Collect all constant candidates.960collectConstantCandidates(Fn);961962// Combine constants that can be easily materialized with an add from a common963// base constant.964if (!ConstIntCandVec.empty())965findBaseConstants(nullptr);966for (const auto &MapEntry : ConstGEPCandMap)967if (!MapEntry.second.empty())968findBaseConstants(MapEntry.first);969970// Finally hoist the base constant and emit materialization code for dependent971// constants.972bool MadeChange = false;973if (!ConstIntInfoVec.empty())974MadeChange = emitBaseConstants(nullptr);975for (const auto &MapEntry : ConstGEPInfoMap)976if (!MapEntry.second.empty())977MadeChange |= emitBaseConstants(MapEntry.first);978979980// Cleanup dead instructions.981deleteDeadCastInst();982983cleanup();984985return MadeChange;986}987988PreservedAnalyses ConstantHoistingPass::run(Function &F,989FunctionAnalysisManager &AM) {990auto &DT = AM.getResult<DominatorTreeAnalysis>(F);991auto &TTI = AM.getResult<TargetIRAnalysis>(F);992auto BFI = ConstHoistWithBlockFrequency993? &AM.getResult<BlockFrequencyAnalysis>(F)994: nullptr;995auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);996auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());997if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI))998return PreservedAnalyses::all();9991000PreservedAnalyses PA;1001PA.preserveSet<CFGAnalyses>();1002return PA;1003}100410051006