Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
35294 views
//===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass hoists and/or decomposes/recomposes integer division and remainder9// instructions to enable CFG improvements and better codegen.10//11//===----------------------------------------------------------------------===//1213#include "llvm/Transforms/Scalar/DivRemPairs.h"14#include "llvm/ADT/DenseMap.h"15#include "llvm/ADT/MapVector.h"16#include "llvm/ADT/Statistic.h"17#include "llvm/Analysis/GlobalsModRef.h"18#include "llvm/Analysis/TargetTransformInfo.h"19#include "llvm/Analysis/ValueTracking.h"20#include "llvm/IR/Dominators.h"21#include "llvm/IR/Function.h"22#include "llvm/IR/PatternMatch.h"23#include "llvm/Support/DebugCounter.h"24#include "llvm/Transforms/Utils/BypassSlowDivision.h"25#include <optional>2627using namespace llvm;28using namespace llvm::PatternMatch;2930#define DEBUG_TYPE "div-rem-pairs"31STATISTIC(NumPairs, "Number of div/rem pairs");32STATISTIC(NumRecomposed, "Number of instructions recomposed");33STATISTIC(NumHoisted, "Number of instructions hoisted");34STATISTIC(NumDecomposed, "Number of instructions decomposed");35DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",36"Controls transformations in div-rem-pairs pass");3738namespace {39struct ExpandedMatch {40DivRemMapKey Key;41Instruction *Value;42};43} // namespace4445/// See if we can match: (which is the form we expand into)46/// X - ((X ?/ Y) * Y)47/// which is equivalent to:48/// X ?% Y49static std::optional<ExpandedMatch> matchExpandedRem(Instruction &I) {50Value *Dividend, *XroundedDownToMultipleOfY;51if (!match(&I, m_Sub(m_Value(Dividend), m_Value(XroundedDownToMultipleOfY))))52return std::nullopt;5354Value *Divisor;55Instruction *Div;56// Look for ((X / Y) * Y)57if (!match(58XroundedDownToMultipleOfY,59m_c_Mul(m_CombineAnd(m_IDiv(m_Specific(Dividend), m_Value(Divisor)),60m_Instruction(Div)),61m_Deferred(Divisor))))62return std::nullopt;6364ExpandedMatch M;65M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv;66M.Key.Dividend = Dividend;67M.Key.Divisor = Divisor;68M.Value = &I;69return M;70}7172namespace {73/// A thin wrapper to store two values that we matched as div-rem pair.74/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.75struct DivRemPairWorklistEntry {76/// The actual udiv/sdiv instruction. Source of truth.77AssertingVH<Instruction> DivInst;7879/// The instruction that we have matched as a remainder instruction.80/// Should only be used as Value, don't introspect it.81AssertingVH<Instruction> RemInst;8283DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)84: DivInst(DivInst_), RemInst(RemInst_) {85assert((DivInst->getOpcode() == Instruction::UDiv ||86DivInst->getOpcode() == Instruction::SDiv) &&87"Not a division.");88assert(DivInst->getType() == RemInst->getType() && "Types should match.");89// We can't check anything else about remainder instruction,90// it's not strictly required to be a urem/srem.91}9293/// The type for this pair, identical for both the div and rem.94Type *getType() const { return DivInst->getType(); }9596/// Is this pair signed or unsigned?97bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }9899/// In this pair, what are the divident and divisor?100Value *getDividend() const { return DivInst->getOperand(0); }101Value *getDivisor() const { return DivInst->getOperand(1); }102103bool isRemExpanded() const {104switch (RemInst->getOpcode()) {105case Instruction::SRem:106case Instruction::URem:107return false; // single 'rem' instruction - unexpanded form.108default:109return true; // anything else means we have remainder in expanded form.110}111}112};113} // namespace114using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;115116/// Find matching pairs of integer div/rem ops (they have the same numerator,117/// denominator, and signedness). Place those pairs into a worklist for further118/// processing. This indirection is needed because we have to use TrackingVH<>119/// because we will be doing RAUW, and if one of the rem instructions we change120/// happens to be an input to another div/rem in the maps, we'd have problems.121static DivRemWorklistTy getWorklist(Function &F) {122// Insert all divide and remainder instructions into maps keyed by their123// operands and opcode (signed or unsigned).124DenseMap<DivRemMapKey, Instruction *> DivMap;125// Use a MapVector for RemMap so that instructions are moved/inserted in a126// deterministic order.127MapVector<DivRemMapKey, Instruction *> RemMap;128for (auto &BB : F) {129for (auto &I : BB) {130if (I.getOpcode() == Instruction::SDiv)131DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;132else if (I.getOpcode() == Instruction::UDiv)133DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;134else if (I.getOpcode() == Instruction::SRem)135RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;136else if (I.getOpcode() == Instruction::URem)137RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;138else if (auto Match = matchExpandedRem(I))139RemMap[Match->Key] = Match->Value;140}141}142143// We'll accumulate the matching pairs of div-rem instructions here.144DivRemWorklistTy Worklist;145146// We can iterate over either map because we are only looking for matched147// pairs. Choose remainders for efficiency because they are usually even more148// rare than division.149for (auto &RemPair : RemMap) {150// Find the matching division instruction from the division map.151auto It = DivMap.find(RemPair.first);152if (It == DivMap.end())153continue;154155// We have a matching pair of div/rem instructions.156NumPairs++;157Instruction *RemInst = RemPair.second;158159// Place it in the worklist.160Worklist.emplace_back(It->second, RemInst);161}162163return Worklist;164}165166/// Find matching pairs of integer div/rem ops (they have the same numerator,167/// denominator, and signedness). If they exist in different basic blocks, bring168/// them together by hoisting or replace the common division operation that is169/// implicit in the remainder:170/// X % Y <--> X - ((X / Y) * Y).171///172/// We can largely ignore the normal safety and cost constraints on speculation173/// of these ops when we find a matching pair. This is because we are already174/// guaranteed that any exceptions and most cost are already incurred by the175/// first member of the pair.176///177/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or178/// SimplifyCFG, but it's split off on its own because it's different enough179/// that it doesn't quite match the stated objectives of those passes.180static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,181const DominatorTree &DT) {182bool Changed = false;183184// Get the matching pairs of div-rem instructions. We want this extra185// indirection to avoid dealing with having to RAUW the keys of the maps.186DivRemWorklistTy Worklist = getWorklist(F);187188// Process each entry in the worklist.189for (DivRemPairWorklistEntry &E : Worklist) {190if (!DebugCounter::shouldExecute(DRPCounter))191continue;192193bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());194195auto &DivInst = E.DivInst;196auto &RemInst = E.RemInst;197198const bool RemOriginallyWasInExpandedForm = E.isRemExpanded();199(void)RemOriginallyWasInExpandedForm; // suppress unused variable warning200201if (HasDivRemOp && E.isRemExpanded()) {202// The target supports div+rem but the rem is expanded.203// We should recompose it first.204Value *X = E.getDividend();205Value *Y = E.getDivisor();206Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(X, Y)207: BinaryOperator::CreateURem(X, Y);208// Note that we place it right next to the original expanded instruction,209// and letting further handling to move it if needed.210RealRem->setName(RemInst->getName() + ".recomposed");211RealRem->insertAfter(RemInst);212Instruction *OrigRemInst = RemInst;213// Update AssertingVH<> with new instruction so it doesn't assert.214RemInst = RealRem;215// And replace the original instruction with the new one.216OrigRemInst->replaceAllUsesWith(RealRem);217RealRem->setDebugLoc(OrigRemInst->getDebugLoc());218OrigRemInst->eraseFromParent();219NumRecomposed++;220// Note that we have left ((X / Y) * Y) around.221// If it had other uses we could rewrite it as X - X % Y222Changed = true;223}224225assert((!E.isRemExpanded() || !HasDivRemOp) &&226"*If* the target supports div-rem, then by now the RemInst *is* "227"Instruction::[US]Rem.");228229// If the target supports div+rem and the instructions are in the same block230// already, there's nothing to do. The backend should handle this. If the231// target does not support div+rem, then we will decompose the rem.232if (HasDivRemOp && RemInst->getParent() == DivInst->getParent())233continue;234235bool DivDominates = DT.dominates(DivInst, RemInst);236if (!DivDominates && !DT.dominates(RemInst, DivInst)) {237// We have matching div-rem pair, but they are in two different blocks,238// neither of which dominates one another.239240BasicBlock *PredBB = nullptr;241BasicBlock *DivBB = DivInst->getParent();242BasicBlock *RemBB = RemInst->getParent();243244// It's only safe to hoist if every instruction before the Div/Rem in the245// basic block is guaranteed to transfer execution.246auto IsSafeToHoist = [](Instruction *DivOrRem, BasicBlock *ParentBB) {247for (auto I = ParentBB->begin(), E = DivOrRem->getIterator(); I != E;248++I)249if (!isGuaranteedToTransferExecutionToSuccessor(&*I))250return false;251252return true;253};254255// Look for something like this256// PredBB257// | \258// | Rem259// | /260// Div261//262// If the Rem block has a single predecessor and successor, and all paths263// from PredBB go to either RemBB or DivBB, and execution of RemBB and264// DivBB will always reach the Div/Rem, we can hoist Div to PredBB. If265// we have a DivRem operation we can also hoist Rem. Otherwise we'll leave266// Rem where it is and rewrite it to mul/sub.267if (RemBB->getSingleSuccessor() == DivBB) {268PredBB = RemBB->getUniquePredecessor();269270// Look for something like this271// PredBB272// / \273// Div Rem274//275// If the Rem and Din blocks share a unique predecessor, and all276// paths from PredBB go to either RemBB or DivBB, and execution of RemBB277// and DivBB will always reach the Div/Rem, we can hoist Div to PredBB.278// If we have a DivRem operation we can also hoist Rem. By hoisting both279// ops to the same block, we reduce code size and allow the DivRem to280// issue sooner. Without a DivRem op, this transformation is281// unprofitable because we would end up performing an extra Mul+Sub on282// the Rem path.283} else if (BasicBlock *RemPredBB = RemBB->getUniquePredecessor()) {284// This hoist is only profitable when the target has a DivRem op.285if (HasDivRemOp && RemPredBB == DivBB->getUniquePredecessor())286PredBB = RemPredBB;287}288// FIXME: We could handle more hoisting cases.289290if (PredBB && !isa<CatchSwitchInst>(PredBB->getTerminator()) &&291isGuaranteedToTransferExecutionToSuccessor(PredBB->getTerminator()) &&292IsSafeToHoist(RemInst, RemBB) && IsSafeToHoist(DivInst, DivBB) &&293all_of(successors(PredBB),294[&](BasicBlock *BB) { return BB == DivBB || BB == RemBB; }) &&295all_of(predecessors(DivBB),296[&](BasicBlock *BB) { return BB == RemBB || BB == PredBB; })) {297DivDominates = true;298DivInst->moveBefore(PredBB->getTerminator());299Changed = true;300if (HasDivRemOp) {301RemInst->moveBefore(PredBB->getTerminator());302continue;303}304} else305continue;306}307308// The target does not have a single div/rem operation,309// and the rem is already in expanded form. Nothing to do.310if (!HasDivRemOp && E.isRemExpanded())311continue;312313if (HasDivRemOp) {314// The target has a single div/rem operation. Hoist the lower instruction315// to make the matched pair visible to the backend.316if (DivDominates)317RemInst->moveAfter(DivInst);318else319DivInst->moveAfter(RemInst);320NumHoisted++;321} else {322// The target does not have a single div/rem operation,323// and the rem is *not* in a already-expanded form.324// Decompose the remainder calculation as:325// X % Y --> X - ((X / Y) * Y).326327assert(!RemOriginallyWasInExpandedForm &&328"We should not be expanding if the rem was in expanded form to "329"begin with.");330331Value *X = E.getDividend();332Value *Y = E.getDivisor();333Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);334Instruction *Sub = BinaryOperator::CreateSub(X, Mul);335336// If the remainder dominates, then hoist the division up to that block:337//338// bb1:339// %rem = srem %x, %y340// bb2:341// %div = sdiv %x, %y342// -->343// bb1:344// %div = sdiv %x, %y345// %mul = mul %div, %y346// %rem = sub %x, %mul347//348// If the division dominates, it's already in the right place. The mul+sub349// will be in a different block because we don't assume that they are350// cheap to speculatively execute:351//352// bb1:353// %div = sdiv %x, %y354// bb2:355// %rem = srem %x, %y356// -->357// bb1:358// %div = sdiv %x, %y359// bb2:360// %mul = mul %div, %y361// %rem = sub %x, %mul362//363// If the div and rem are in the same block, we do the same transform,364// but any code movement would be within the same block.365366if (!DivDominates)367DivInst->moveBefore(RemInst);368Mul->insertAfter(RemInst);369Mul->setDebugLoc(RemInst->getDebugLoc());370Sub->insertAfter(Mul);371Sub->setDebugLoc(RemInst->getDebugLoc());372373// If DivInst has the exact flag, remove it. Otherwise this optimization374// may replace a well-defined value 'X % Y' with poison.375DivInst->dropPoisonGeneratingFlags();376377// If X can be undef, X should be frozen first.378// For example, let's assume that Y = 1 & X = undef:379// %div = sdiv undef, 1 // %div = undef380// %rem = srem undef, 1 // %rem = 0381// =>382// %div = sdiv undef, 1 // %div = undef383// %mul = mul %div, 1 // %mul = undef384// %rem = sub %x, %mul // %rem = undef - undef = undef385// If X is not frozen, %rem becomes undef after transformation.386if (!isGuaranteedNotToBeUndef(X, nullptr, DivInst, &DT)) {387auto *FrX =388new FreezeInst(X, X->getName() + ".frozen", DivInst->getIterator());389FrX->setDebugLoc(DivInst->getDebugLoc());390DivInst->setOperand(0, FrX);391Sub->setOperand(0, FrX);392}393// Same for Y. If X = 1 and Y = (undef | 1), %rem in src is either 1 or 0,394// but %rem in tgt can be one of many integer values.395if (!isGuaranteedNotToBeUndef(Y, nullptr, DivInst, &DT)) {396auto *FrY =397new FreezeInst(Y, Y->getName() + ".frozen", DivInst->getIterator());398FrY->setDebugLoc(DivInst->getDebugLoc());399DivInst->setOperand(1, FrY);400Mul->setOperand(1, FrY);401}402403// Now kill the explicit remainder. We have replaced it with:404// (sub X, (mul (div X, Y), Y)405Sub->setName(RemInst->getName() + ".decomposed");406Instruction *OrigRemInst = RemInst;407// Update AssertingVH<> with new instruction so it doesn't assert.408RemInst = Sub;409// And replace the original instruction with the new one.410OrigRemInst->replaceAllUsesWith(Sub);411OrigRemInst->eraseFromParent();412NumDecomposed++;413}414Changed = true;415}416417return Changed;418}419420// Pass manager boilerplate below here.421422PreservedAnalyses DivRemPairsPass::run(Function &F,423FunctionAnalysisManager &FAM) {424TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);425DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);426if (!optimizeDivRem(F, TTI, DT))427return PreservedAnalyses::all();428// TODO: This pass just hoists/replaces math ops - all analyses are preserved?429PreservedAnalyses PA;430PA.preserveSet<CFGAnalyses>();431return PA;432}433434435