Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
35271 views
//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains an optimization for div and rem on architectures that9// execute short instructions significantly faster than longer instructions.10// For example, on Intel Atom 32-bit divides are slow enough that during11// runtime it is profitable to check the value of the operands, and if they are12// positive and less than 256 use an unsigned 8-bit divide.13//14//===----------------------------------------------------------------------===//1516#include "llvm/Transforms/Utils/BypassSlowDivision.h"17#include "llvm/ADT/DenseMap.h"18#include "llvm/ADT/STLExtras.h"19#include "llvm/ADT/SmallPtrSet.h"20#include "llvm/Transforms/Utils/Local.h"21#include "llvm/Analysis/ValueTracking.h"22#include "llvm/IR/BasicBlock.h"23#include "llvm/IR/Constants.h"24#include "llvm/IR/DerivedTypes.h"25#include "llvm/IR/Function.h"26#include "llvm/IR/IRBuilder.h"27#include "llvm/IR/Instruction.h"28#include "llvm/IR/Instructions.h"29#include "llvm/IR/Module.h"30#include "llvm/IR/Type.h"31#include "llvm/IR/Value.h"32#include "llvm/Support/Casting.h"33#include "llvm/Support/KnownBits.h"34#include <cassert>35#include <cstdint>3637using namespace llvm;3839#define DEBUG_TYPE "bypass-slow-division"4041namespace {4243struct QuotRemPair {44Value *Quotient;45Value *Remainder;4647QuotRemPair(Value *InQuotient, Value *InRemainder)48: Quotient(InQuotient), Remainder(InRemainder) {}49};5051/// A quotient and remainder, plus a BB from which they logically "originate".52/// If you use Quotient or Remainder in a Phi node, you should use BB as its53/// corresponding predecessor.54struct QuotRemWithBB {55BasicBlock *BB = nullptr;56Value *Quotient = nullptr;57Value *Remainder = nullptr;58};5960using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;61using BypassWidthsTy = DenseMap<unsigned, unsigned>;62using VisitedSetTy = SmallPtrSet<Instruction *, 4>;6364enum ValueRange {65/// Operand definitely fits into BypassType. No runtime checks are needed.66VALRNG_KNOWN_SHORT,67/// A runtime check is required, as value range is unknown.68VALRNG_UNKNOWN,69/// Operand is unlikely to fit into BypassType. The bypassing should be70/// disabled.71VALRNG_LIKELY_LONG72};7374class FastDivInsertionTask {75bool IsValidTask = false;76Instruction *SlowDivOrRem = nullptr;77IntegerType *BypassType = nullptr;78BasicBlock *MainBB = nullptr;7980bool isHashLikeValue(Value *V, VisitedSetTy &Visited);81ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);82QuotRemWithBB createSlowBB(BasicBlock *Successor);83QuotRemWithBB createFastBB(BasicBlock *Successor);84QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,85BasicBlock *PhiBB);86Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);87std::optional<QuotRemPair> insertFastDivAndRem();8889bool isSignedOp() {90return SlowDivOrRem->getOpcode() == Instruction::SDiv ||91SlowDivOrRem->getOpcode() == Instruction::SRem;92}9394bool isDivisionOp() {95return SlowDivOrRem->getOpcode() == Instruction::SDiv ||96SlowDivOrRem->getOpcode() == Instruction::UDiv;97}9899Type *getSlowType() { return SlowDivOrRem->getType(); }100101public:102FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);103104Value *getReplacement(DivCacheTy &Cache);105};106107} // end anonymous namespace108109FastDivInsertionTask::FastDivInsertionTask(Instruction *I,110const BypassWidthsTy &BypassWidths) {111switch (I->getOpcode()) {112case Instruction::UDiv:113case Instruction::SDiv:114case Instruction::URem:115case Instruction::SRem:116SlowDivOrRem = I;117break;118default:119// I is not a div/rem operation.120return;121}122123// Skip division on vector types. Only optimize integer instructions.124IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());125if (!SlowType)126return;127128// Skip if this bitwidth is not bypassed.129auto BI = BypassWidths.find(SlowType->getBitWidth());130if (BI == BypassWidths.end())131return;132133// Get type for div/rem instruction with bypass bitwidth.134IntegerType *BT = IntegerType::get(I->getContext(), BI->second);135BypassType = BT;136137// The original basic block.138MainBB = I->getParent();139140// The instruction is indeed a slow div or rem operation.141IsValidTask = true;142}143144/// Reuses previously-computed dividend or remainder from the current BB if145/// operands and operation are identical. Otherwise calls insertFastDivAndRem to146/// perform the optimization and caches the resulting dividend and remainder.147/// If no replacement can be generated, nullptr is returned.148Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {149// First, make sure that the task is valid.150if (!IsValidTask)151return nullptr;152153// Then, look for a value in Cache.154Value *Dividend = SlowDivOrRem->getOperand(0);155Value *Divisor = SlowDivOrRem->getOperand(1);156DivRemMapKey Key(isSignedOp(), Dividend, Divisor);157auto CacheI = Cache.find(Key);158159if (CacheI == Cache.end()) {160// If previous instance does not exist, try to insert fast div.161std::optional<QuotRemPair> OptResult = insertFastDivAndRem();162// Bail out if insertFastDivAndRem has failed.163if (!OptResult)164return nullptr;165CacheI = Cache.insert({Key, *OptResult}).first;166}167168QuotRemPair &Value = CacheI->second;169return isDivisionOp() ? Value.Quotient : Value.Remainder;170}171172/// Check if a value looks like a hash.173///174/// The routine is expected to detect values computed using the most common hash175/// algorithms. Typically, hash computations end with one of the following176/// instructions:177///178/// 1) MUL with a constant wider than BypassType179/// 2) XOR instruction180///181/// And even if we are wrong and the value is not a hash, it is still quite182/// unlikely that such values will fit into BypassType.183///184/// To detect string hash algorithms like FNV we have to look through PHI-nodes.185/// It is implemented as a depth-first search for values that look neither long186/// nor hash-like.187bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {188Instruction *I = dyn_cast<Instruction>(V);189if (!I)190return false;191192switch (I->getOpcode()) {193case Instruction::Xor:194return true;195case Instruction::Mul: {196// After Constant Hoisting pass, long constants may be represented as197// bitcast instructions. As a result, some constants may look like an198// instruction at first, and an additional check is necessary to find out if199// an operand is actually a constant.200Value *Op1 = I->getOperand(1);201ConstantInt *C = dyn_cast<ConstantInt>(Op1);202if (!C && isa<BitCastInst>(Op1))203C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));204return C && C->getValue().getSignificantBits() > BypassType->getBitWidth();205}206case Instruction::PHI:207// Stop IR traversal in case of a crazy input code. This limits recursion208// depth.209if (Visited.size() >= 16)210return false;211// Do not visit nodes that have been visited already. We return true because212// it means that we couldn't find any value that doesn't look hash-like.213if (!Visited.insert(I).second)214return true;215return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {216// Ignore undef values as they probably don't affect the division217// operands.218return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||219isa<UndefValue>(V);220});221default:222return false;223}224}225226/// Check if an integer value fits into our bypass type.227ValueRange FastDivInsertionTask::getValueRange(Value *V,228VisitedSetTy &Visited) {229unsigned ShortLen = BypassType->getBitWidth();230unsigned LongLen = V->getType()->getIntegerBitWidth();231232assert(LongLen > ShortLen && "Value type must be wider than BypassType");233unsigned HiBits = LongLen - ShortLen;234235const DataLayout &DL = SlowDivOrRem->getDataLayout();236KnownBits Known(LongLen);237238computeKnownBits(V, Known, DL);239240if (Known.countMinLeadingZeros() >= HiBits)241return VALRNG_KNOWN_SHORT;242243if (Known.countMaxLeadingZeros() < HiBits)244return VALRNG_LIKELY_LONG;245246// Long integer divisions are often used in hashtable implementations. It's247// not worth bypassing such divisions because hash values are extremely248// unlikely to have enough leading zeros. The call below tries to detect249// values that are unlikely to fit BypassType (including hashes).250if (isHashLikeValue(V, Visited))251return VALRNG_LIKELY_LONG;252253return VALRNG_UNKNOWN;254}255256/// Add new basic block for slow div and rem operations and put it before257/// SuccessorBB.258QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {259QuotRemWithBB DivRemPair;260DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",261MainBB->getParent(), SuccessorBB);262IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());263Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());264265Value *Dividend = SlowDivOrRem->getOperand(0);266Value *Divisor = SlowDivOrRem->getOperand(1);267268if (isSignedOp()) {269DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);270DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);271} else {272DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);273DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);274}275276Builder.CreateBr(SuccessorBB);277return DivRemPair;278}279280/// Add new basic block for fast div and rem operations and put it before281/// SuccessorBB.282QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {283QuotRemWithBB DivRemPair;284DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",285MainBB->getParent(), SuccessorBB);286IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());287Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());288289Value *Dividend = SlowDivOrRem->getOperand(0);290Value *Divisor = SlowDivOrRem->getOperand(1);291Value *ShortDivisorV =292Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);293Value *ShortDividendV =294Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);295296// udiv/urem because this optimization only handles positive numbers.297Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);298Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);299DivRemPair.Quotient =300Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());301DivRemPair.Remainder =302Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());303Builder.CreateBr(SuccessorBB);304305return DivRemPair;306}307308/// Creates Phi nodes for result of Div and Rem.309QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,310QuotRemWithBB &RHS,311BasicBlock *PhiBB) {312IRBuilder<> Builder(PhiBB, PhiBB->begin());313Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());314PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);315QuoPhi->addIncoming(LHS.Quotient, LHS.BB);316QuoPhi->addIncoming(RHS.Quotient, RHS.BB);317PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);318RemPhi->addIncoming(LHS.Remainder, LHS.BB);319RemPhi->addIncoming(RHS.Remainder, RHS.BB);320return QuotRemPair(QuoPhi, RemPhi);321}322323/// Creates a runtime check to test whether both the divisor and dividend fit324/// into BypassType. The check is inserted at the end of MainBB. True return325/// value means that the operands fit. Either of the operands may be NULL if it326/// doesn't need a runtime check.327Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {328assert((Op1 || Op2) && "Nothing to check");329IRBuilder<> Builder(MainBB, MainBB->end());330Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());331332Value *OrV;333if (Op1 && Op2)334OrV = Builder.CreateOr(Op1, Op2);335else336OrV = Op1 ? Op1 : Op2;337338// BitMask is inverted to check if the operands are339// larger than the bypass type340uint64_t BitMask = ~BypassType->getBitMask();341Value *AndV = Builder.CreateAnd(OrV, BitMask);342343// Compare operand values344Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);345return Builder.CreateICmpEQ(AndV, ZeroV);346}347348/// Substitutes the div/rem instruction with code that checks the value of the349/// operands and uses a shorter-faster div/rem instruction when possible.350std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {351Value *Dividend = SlowDivOrRem->getOperand(0);352Value *Divisor = SlowDivOrRem->getOperand(1);353354VisitedSetTy SetL;355ValueRange DividendRange = getValueRange(Dividend, SetL);356if (DividendRange == VALRNG_LIKELY_LONG)357return std::nullopt;358359VisitedSetTy SetR;360ValueRange DivisorRange = getValueRange(Divisor, SetR);361if (DivisorRange == VALRNG_LIKELY_LONG)362return std::nullopt;363364bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);365bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);366367if (DividendShort && DivisorShort) {368// If both operands are known to be short then just replace the long369// division with a short one in-place. Since we're not introducing control370// flow in this case, narrowing the division is always a win, even if the371// divisor is a constant (and will later get replaced by a multiplication).372373IRBuilder<> Builder(SlowDivOrRem);374Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);375Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);376Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);377Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);378Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());379Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());380return QuotRemPair(ExtDiv, ExtRem);381}382383if (isa<ConstantInt>(Divisor)) {384// If the divisor is not a constant, DAGCombiner will convert it to a385// multiplication by a magic constant. It isn't clear if it is worth386// introducing control flow to get a narrower multiply.387return std::nullopt;388}389390// After Constant Hoisting pass, long constants may be represented as391// bitcast instructions. As a result, some constants may look like an392// instruction at first, and an additional check is necessary to find out if393// an operand is actually a constant.394if (auto *BCI = dyn_cast<BitCastInst>(Divisor))395if (BCI->getParent() == SlowDivOrRem->getParent() &&396isa<ConstantInt>(BCI->getOperand(0)))397return std::nullopt;398399IRBuilder<> Builder(MainBB, MainBB->end());400Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());401402if (DividendShort && !isSignedOp()) {403// If the division is unsigned and Dividend is known to be short, then404// either405// 1) Divisor is less or equal to Dividend, and the result can be computed406// with a short division.407// 2) Divisor is greater than Dividend. In this case, no division is needed408// at all: The quotient is 0 and the remainder is equal to Dividend.409//410// So instead of checking at runtime whether Divisor fits into BypassType,411// we emit a runtime check to differentiate between these two cases. This412// lets us entirely avoid a long div.413414// Split the basic block before the div/rem.415BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);416// Remove the unconditional branch from MainBB to SuccessorBB.417MainBB->back().eraseFromParent();418QuotRemWithBB Long;419Long.BB = MainBB;420Long.Quotient = ConstantInt::get(getSlowType(), 0);421Long.Remainder = Dividend;422QuotRemWithBB Fast = createFastBB(SuccessorBB);423QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);424Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);425Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);426return Result;427} else {428// General case. Create both slow and fast div/rem pairs and choose one of429// them at runtime.430431// Split the basic block before the div/rem.432BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);433// Remove the unconditional branch from MainBB to SuccessorBB.434MainBB->back().eraseFromParent();435QuotRemWithBB Fast = createFastBB(SuccessorBB);436QuotRemWithBB Slow = createSlowBB(SuccessorBB);437QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);438Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,439DivisorShort ? nullptr : Divisor);440Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);441return Result;442}443}444445/// This optimization identifies DIV/REM instructions in a BB that can be446/// profitably bypassed and carried out with a shorter, faster divide.447bool llvm::bypassSlowDivision(BasicBlock *BB,448const BypassWidthsTy &BypassWidths) {449DivCacheTy PerBBDivCache;450451bool MadeChange = false;452Instruction *Next = &*BB->begin();453while (Next != nullptr) {454// We may add instructions immediately after I, but we want to skip over455// them.456Instruction *I = Next;457Next = Next->getNextNode();458459// Ignore dead code to save time and avoid bugs.460if (I->hasNUses(0))461continue;462463FastDivInsertionTask Task(I, BypassWidths);464if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {465I->replaceAllUsesWith(Replacement);466I->eraseFromParent();467MadeChange = true;468}469}470471// Above we eagerly create divs and rems, as pairs, so that we can efficiently472// create divrem machine instructions. Now erase any unused divs / rems so we473// don't leave extra instructions sitting around.474for (auto &KV : PerBBDivCache)475for (Value *V : {KV.second.Quotient, KV.second.Remainder})476RecursivelyDeleteTriviallyDeadInstructions(V);477478return MadeChange;479}480481482