Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
35266 views
//===- GuardWidening.cpp - ---- Guard widening ----------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the guard widening pass. The semantics of the9// @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails10// more often that it did before the transform. This optimization is called11// "widening" and can be used hoist and common runtime checks in situations like12// these:13//14// %cmp0 = 7 u< Length15// call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]16// call @unknown_side_effects()17// %cmp1 = 9 u< Length18// call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ]19// ...20//21// =>22//23// %cmp0 = 9 u< Length24// call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ]25// call @unknown_side_effects()26// ...27//28// If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a29// generic implementation of the same function, which will have the correct30// semantics from that point onward. It is always _legal_ to deoptimize (so31// replacing %cmp0 with false is "correct"), though it may not always be32// profitable to do so.33//34// NB! This pass is a work in progress. It hasn't been tuned to be "production35// ready" yet. It is known to have quadriatic running time and will not scale36// to large numbers of guards37//38//===----------------------------------------------------------------------===//3940#include "llvm/Transforms/Scalar/GuardWidening.h"41#include "llvm/ADT/DenseMap.h"42#include "llvm/ADT/DepthFirstIterator.h"43#include "llvm/ADT/Statistic.h"44#include "llvm/Analysis/AssumptionCache.h"45#include "llvm/Analysis/GuardUtils.h"46#include "llvm/Analysis/LoopInfo.h"47#include "llvm/Analysis/MemorySSAUpdater.h"48#include "llvm/Analysis/PostDominators.h"49#include "llvm/Analysis/ValueTracking.h"50#include "llvm/IR/ConstantRange.h"51#include "llvm/IR/Dominators.h"52#include "llvm/IR/IRBuilder.h"53#include "llvm/IR/IntrinsicInst.h"54#include "llvm/IR/Module.h"55#include "llvm/IR/PatternMatch.h"56#include "llvm/Support/CommandLine.h"57#include "llvm/Support/Debug.h"58#include "llvm/Support/KnownBits.h"59#include "llvm/Transforms/Scalar.h"60#include "llvm/Transforms/Utils/GuardUtils.h"61#include "llvm/Transforms/Utils/LoopUtils.h"62#include <functional>6364using namespace llvm;6566#define DEBUG_TYPE "guard-widening"6768STATISTIC(GuardsEliminated, "Number of eliminated guards");69STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");70STATISTIC(FreezeAdded, "Number of freeze instruction introduced");7172static cl::opt<bool>73WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,74cl::desc("Whether or not we should widen guards "75"expressed as branches by widenable conditions"),76cl::init(true));7778namespace {7980// Get the condition of \p I. It can either be a guard or a conditional branch.81static Value *getCondition(Instruction *I) {82if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {83assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&84"Bad guard intrinsic?");85return GI->getArgOperand(0);86}87Value *Cond, *WC;88BasicBlock *IfTrueBB, *IfFalseBB;89if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB))90return Cond;9192return cast<BranchInst>(I)->getCondition();93}9495// Set the condition for \p I to \p NewCond. \p I can either be a guard or a96// conditional branch.97static void setCondition(Instruction *I, Value *NewCond) {98if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {99assert(GI->getIntrinsicID() == Intrinsic::experimental_guard &&100"Bad guard intrinsic?");101GI->setArgOperand(0, NewCond);102return;103}104cast<BranchInst>(I)->setCondition(NewCond);105}106107// Eliminates the guard instruction properly.108static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {109GuardInst->eraseFromParent();110if (MSSAU)111MSSAU->removeMemoryAccess(GuardInst);112++GuardsEliminated;113}114115/// Find a point at which the widened condition of \p Guard should be inserted.116/// When it is represented as intrinsic call, we can do it right before the call117/// instruction. However, when we are dealing with widenable branch, we must118/// account for the following situation: widening should not turn a119/// loop-invariant condition into a loop-variant. It means that if120/// widenable.condition() call is invariant (w.r.t. any loop), the new wide121/// condition should stay invariant. Otherwise there can be a miscompile, like122/// the one described at https://github.com/llvm/llvm-project/issues/60234. The123/// safest way to do it is to expand the new condition at WC's block.124static std::optional<BasicBlock::iterator>125findInsertionPointForWideCondition(Instruction *WCOrGuard) {126if (isGuard(WCOrGuard))127return WCOrGuard->getIterator();128if (auto WC = extractWidenableCondition(WCOrGuard))129return cast<Instruction>(WC)->getIterator();130return std::nullopt;131}132133class GuardWideningImpl {134DominatorTree &DT;135PostDominatorTree *PDT;136LoopInfo &LI;137AssumptionCache &AC;138MemorySSAUpdater *MSSAU;139140/// Together, these describe the region of interest. This might be all of141/// the blocks within a function, or only a given loop's blocks and preheader.142DomTreeNode *Root;143std::function<bool(BasicBlock*)> BlockFilter;144145/// The set of guards and conditional branches whose conditions have been146/// widened into dominating guards.147SmallVector<Instruction *, 16> EliminatedGuardsAndBranches;148149/// The set of guards which have been widened to include conditions to other150/// guards.151DenseSet<Instruction *> WidenedGuards;152153/// Try to eliminate instruction \p Instr by widening it into an earlier154/// dominating guard. \p DFSI is the DFS iterator on the dominator tree that155/// is currently visiting the block containing \p Guard, and \p GuardsPerBlock156/// maps BasicBlocks to the set of guards seen in that block.157bool eliminateInstrViaWidening(158Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,159const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>>160&GuardsPerBlock);161162/// Used to keep track of which widening potential is more effective.163enum WideningScore {164/// Don't widen.165WS_IllegalOrNegative,166167/// Widening is performance neutral as far as the cycles spent in check168/// conditions goes (but can still help, e.g., code layout, having less169/// deopt state).170WS_Neutral,171172/// Widening is profitable.173WS_Positive,174175/// Widening is very profitable. Not significantly different from \c176/// WS_Positive, except by the order.177WS_VeryPositive178};179180static StringRef scoreTypeToString(WideningScore WS);181182/// Compute the score for widening the condition in \p DominatedInstr183/// into \p WideningPoint.184WideningScore computeWideningScore(Instruction *DominatedInstr,185Instruction *ToWiden,186BasicBlock::iterator WideningPoint,187SmallVectorImpl<Value *> &ChecksToHoist,188SmallVectorImpl<Value *> &ChecksToWiden);189190/// Helper to check if \p V can be hoisted to \p InsertPos.191bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos) const {192SmallPtrSet<const Instruction *, 8> Visited;193return canBeHoistedTo(V, InsertPos, Visited);194}195196bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos,197SmallPtrSetImpl<const Instruction *> &Visited) const;198199bool canBeHoistedTo(const SmallVectorImpl<Value *> &Checks,200BasicBlock::iterator InsertPos) const {201return all_of(Checks,202[&](const Value *V) { return canBeHoistedTo(V, InsertPos); });203}204/// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c205/// canBeHoistedTo returned true.206void makeAvailableAt(Value *V, BasicBlock::iterator InsertPos) const;207208void makeAvailableAt(const SmallVectorImpl<Value *> &Checks,209BasicBlock::iterator InsertPos) const {210for (Value *V : Checks)211makeAvailableAt(V, InsertPos);212}213214/// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try215/// to generate an expression computing the logical AND of \p ChecksToHoist216/// and \p ChecksToWiden. Return true if the expression computing the AND is217/// only as expensive as computing one of the set of expressions. If \p218/// InsertPt is true then actually generate the resulting expression, make it219/// available at \p InsertPt and return it in \p Result (else no change to the220/// IR is made).221std::optional<Value *>222mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,223SmallVectorImpl<Value *> &ChecksToWiden,224std::optional<BasicBlock::iterator> InsertPt);225226/// Generate the logical AND of \p ChecksToHoist and \p OldCondition and make227/// it available at InsertPt228Value *hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist,229Value *OldCondition, BasicBlock::iterator InsertPt);230231/// Adds freeze to Orig and push it as far as possible very aggressively.232/// Also replaces all uses of frozen instruction with frozen version.233Value *freezeAndPush(Value *Orig, BasicBlock::iterator InsertPt);234235/// Represents a range check of the form \c Base + \c Offset u< \c Length,236/// with the constraint that \c Length is not negative. \c CheckInst is the237/// pre-existing instruction in the IR that computes the result of this range238/// check.239class RangeCheck {240const Value *Base;241const ConstantInt *Offset;242const Value *Length;243ICmpInst *CheckInst;244245public:246explicit RangeCheck(const Value *Base, const ConstantInt *Offset,247const Value *Length, ICmpInst *CheckInst)248: Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {}249250void setBase(const Value *NewBase) { Base = NewBase; }251void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; }252253const Value *getBase() const { return Base; }254const ConstantInt *getOffset() const { return Offset; }255const APInt &getOffsetValue() const { return getOffset()->getValue(); }256const Value *getLength() const { return Length; };257ICmpInst *getCheckInst() const { return CheckInst; }258259void print(raw_ostream &OS, bool PrintTypes = false) {260OS << "Base: ";261Base->printAsOperand(OS, PrintTypes);262OS << " Offset: ";263Offset->printAsOperand(OS, PrintTypes);264OS << " Length: ";265Length->printAsOperand(OS, PrintTypes);266}267268LLVM_DUMP_METHOD void dump() {269print(dbgs());270dbgs() << "\n";271}272};273274/// Parse \p ToParse into a conjunction (logical-and) of range checks; and275/// append them to \p Checks. Returns true on success, may clobber \c Checks276/// on failure.277bool parseRangeChecks(SmallVectorImpl<Value *> &ToParse,278SmallVectorImpl<RangeCheck> &Checks) {279for (auto CheckCond : ToParse) {280if (!parseRangeChecks(CheckCond, Checks))281return false;282}283return true;284}285286bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks);287288/// Combine the checks in \p Checks into a smaller set of checks and append289/// them into \p CombinedChecks. Return true on success (i.e. all of checks290/// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks291/// and \p CombinedChecks on success and on failure.292bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,293SmallVectorImpl<RangeCheck> &CombinedChecks) const;294295/// Can we compute the logical AND of \p ChecksToHoist and \p ChecksToWiden296/// for the price of computing only one of the set of expressions?297bool isWideningCondProfitable(SmallVectorImpl<Value *> &ChecksToHoist,298SmallVectorImpl<Value *> &ChecksToWiden) {299return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/std::nullopt)300.has_value();301}302303/// Widen \p ChecksToWiden to fail if any of \p ChecksToHoist is false304void widenGuard(SmallVectorImpl<Value *> &ChecksToHoist,305SmallVectorImpl<Value *> &ChecksToWiden,306Instruction *ToWiden) {307auto InsertPt = findInsertionPointForWideCondition(ToWiden);308auto MergedCheck = mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt);309Value *Result = MergedCheck ? *MergedCheck310: hoistChecks(ChecksToHoist,311getCondition(ToWiden), *InsertPt);312313if (isGuardAsWidenableBranch(ToWiden)) {314setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);315return;316}317setCondition(ToWiden, Result);318}319320public:321explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,322LoopInfo &LI, AssumptionCache &AC,323MemorySSAUpdater *MSSAU, DomTreeNode *Root,324std::function<bool(BasicBlock *)> BlockFilter)325: DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root),326BlockFilter(BlockFilter) {}327328/// The entry point for this pass.329bool run();330};331}332333static bool isSupportedGuardInstruction(const Instruction *Insn) {334if (isGuard(Insn))335return true;336if (WidenBranchGuards && isGuardAsWidenableBranch(Insn))337return true;338return false;339}340341bool GuardWideningImpl::run() {342DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;343bool Changed = false;344for (auto DFI = df_begin(Root), DFE = df_end(Root);345DFI != DFE; ++DFI) {346auto *BB = (*DFI)->getBlock();347if (!BlockFilter(BB))348continue;349350auto &CurrentList = GuardsInBlock[BB];351352for (auto &I : *BB)353if (isSupportedGuardInstruction(&I))354CurrentList.push_back(cast<Instruction>(&I));355356for (auto *II : CurrentList)357Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock);358}359360assert(EliminatedGuardsAndBranches.empty() || Changed);361for (auto *I : EliminatedGuardsAndBranches)362if (!WidenedGuards.count(I)) {363assert(isa<ConstantInt>(getCondition(I)) && "Should be!");364if (isSupportedGuardInstruction(I))365eliminateGuard(I, MSSAU);366else {367assert(isa<BranchInst>(I) &&368"Eliminated something other than guard or branch?");369++CondBranchEliminated;370}371}372373return Changed;374}375376bool GuardWideningImpl::eliminateInstrViaWidening(377Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,378const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>>379&GuardsInBlock) {380SmallVector<Value *> ChecksToHoist;381parseWidenableGuard(Instr, ChecksToHoist);382// Ignore trivial true or false conditions. These instructions will be383// trivially eliminated by any cleanup pass. Do not erase them because other384// guards can possibly be widened into them.385if (ChecksToHoist.empty() ||386(ChecksToHoist.size() == 1 && isa<ConstantInt>(ChecksToHoist.front())))387return false;388389Instruction *BestSoFar = nullptr;390auto BestScoreSoFar = WS_IllegalOrNegative;391392// In the set of dominating guards, find the one we can merge GuardInst with393// for the most profit.394for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) {395auto *CurBB = DFSI.getPath(i)->getBlock();396if (!BlockFilter(CurBB))397break;398assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");399const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;400401auto I = GuardsInCurBB.begin();402auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr)403: GuardsInCurBB.end();404405#ifndef NDEBUG406{407unsigned Index = 0;408for (auto &I : *CurBB) {409if (Index == GuardsInCurBB.size())410break;411if (GuardsInCurBB[Index] == &I)412Index++;413}414assert(Index == GuardsInCurBB.size() &&415"Guards expected to be in order!");416}417#endif418419assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?");420421for (auto *Candidate : make_range(I, E)) {422auto WideningPoint = findInsertionPointForWideCondition(Candidate);423if (!WideningPoint)424continue;425SmallVector<Value *> CandidateChecks;426parseWidenableGuard(Candidate, CandidateChecks);427auto Score = computeWideningScore(Instr, Candidate, *WideningPoint,428ChecksToHoist, CandidateChecks);429LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate430<< " is " << scoreTypeToString(Score) << "\n");431if (Score > BestScoreSoFar) {432BestScoreSoFar = Score;433BestSoFar = Candidate;434}435}436}437438if (BestScoreSoFar == WS_IllegalOrNegative) {439LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n");440return false;441}442443assert(BestSoFar != Instr && "Should have never visited same guard!");444assert(DT.dominates(BestSoFar, Instr) && "Should be!");445446LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar447<< " with score " << scoreTypeToString(BestScoreSoFar)448<< "\n");449SmallVector<Value *> ChecksToWiden;450parseWidenableGuard(BestSoFar, ChecksToWiden);451widenGuard(ChecksToHoist, ChecksToWiden, BestSoFar);452auto NewGuardCondition = ConstantInt::getTrue(Instr->getContext());453setCondition(Instr, NewGuardCondition);454EliminatedGuardsAndBranches.push_back(Instr);455WidenedGuards.insert(BestSoFar);456return true;457}458459GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(460Instruction *DominatedInstr, Instruction *ToWiden,461BasicBlock::iterator WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist,462SmallVectorImpl<Value *> &ChecksToWiden) {463Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent());464Loop *DominatingGuardLoop = LI.getLoopFor(WideningPoint->getParent());465bool HoistingOutOfLoop = false;466467if (DominatingGuardLoop != DominatedInstrLoop) {468// Be conservative and don't widen into a sibling loop. TODO: If the469// sibling is colder, we should consider allowing this.470if (DominatingGuardLoop &&471!DominatingGuardLoop->contains(DominatedInstrLoop))472return WS_IllegalOrNegative;473474HoistingOutOfLoop = true;475}476477if (!canBeHoistedTo(ChecksToHoist, WideningPoint))478return WS_IllegalOrNegative;479// Further in the GuardWideningImpl::hoistChecks the entire condition might be480// widened, not the parsed list of checks. So we need to check the possibility481// of that condition hoisting.482if (!canBeHoistedTo(getCondition(ToWiden), WideningPoint))483return WS_IllegalOrNegative;484485// If the guard was conditional executed, it may never be reached486// dynamically. There are two potential downsides to hoisting it out of the487// conditionally executed region: 1) we may spuriously deopt without need and488// 2) we have the extra cost of computing the guard condition in the common489// case. At the moment, we really only consider the second in our heuristic490// here. TODO: evaluate cost model for spurious deopt491// NOTE: As written, this also lets us hoist right over another guard which492// is essentially just another spelling for control flow.493if (isWideningCondProfitable(ChecksToHoist, ChecksToWiden))494return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;495496if (HoistingOutOfLoop)497return WS_Positive;498499// For a given basic block \p BB, return its successor which is guaranteed or500// highly likely will be taken as its successor.501auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * {502if (auto *UniqueSucc = BB->getUniqueSuccessor())503return UniqueSucc;504auto *Term = BB->getTerminator();505Value *Cond = nullptr;506const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;507using namespace PatternMatch;508if (!match(Term, m_Br(m_Value(Cond), m_BasicBlock(IfTrue),509m_BasicBlock(IfFalse))))510return nullptr;511// For constant conditions, only one dynamical successor is possible512if (auto *ConstCond = dyn_cast<ConstantInt>(Cond))513return ConstCond->isAllOnesValue() ? IfTrue : IfFalse;514// If one of successors ends with deopt, another one is likely.515if (IfFalse->getPostdominatingDeoptimizeCall())516return IfTrue;517if (IfTrue->getPostdominatingDeoptimizeCall())518return IfFalse;519// TODO: Use branch frequency metatada to allow hoisting through non-deopt520// branches?521return nullptr;522};523524// Returns true if we might be hoisting above explicit control flow into a525// considerably hotter block. Note that this completely ignores implicit526// control flow (guards, calls which throw, etc...). That choice appears527// arbitrary (we assume that implicit control flow exits are all rare).528auto MaybeHoistingToHotterBlock = [&]() {529const auto *DominatingBlock = WideningPoint->getParent();530const auto *DominatedBlock = DominatedInstr->getParent();531532// Descend as low as we can, always taking the likely successor.533assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code");534assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code");535assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance");536while (DominatedBlock != DominatingBlock) {537auto *LikelySucc = GetLikelySuccessor(DominatingBlock);538// No likely successor?539if (!LikelySucc)540break;541// Only go down the dominator tree.542if (!DT.properlyDominates(DominatingBlock, LikelySucc))543break;544DominatingBlock = LikelySucc;545}546547// Found?548if (DominatedBlock == DominatingBlock)549return false;550// We followed the likely successor chain and went past the dominated551// block. It means that the dominated guard is in dead/very cold code.552if (!DT.dominates(DominatingBlock, DominatedBlock))553return true;554// TODO: diamond, triangle cases555if (!PDT)556return true;557return !PDT->dominates(DominatedBlock, DominatingBlock);558};559560return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral;561}562563bool GuardWideningImpl::canBeHoistedTo(564const Value *V, BasicBlock::iterator Loc,565SmallPtrSetImpl<const Instruction *> &Visited) const {566auto *Inst = dyn_cast<Instruction>(V);567if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))568return true;569570if (!isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) ||571Inst->mayReadFromMemory())572return false;573574Visited.insert(Inst);575576// We only want to go _up_ the dominance chain when recursing.577assert(!isa<PHINode>(Loc) &&578"PHIs should return false for isSafeToSpeculativelyExecute");579assert(DT.isReachableFromEntry(Inst->getParent()) &&580"We did a DFS from the block entry!");581return all_of(Inst->operands(),582[&](Value *Op) { return canBeHoistedTo(Op, Loc, Visited); });583}584585void GuardWideningImpl::makeAvailableAt(Value *V,586BasicBlock::iterator Loc) const {587auto *Inst = dyn_cast<Instruction>(V);588if (!Inst || DT.dominates(Inst, Loc))589return;590591assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) &&592!Inst->mayReadFromMemory() &&593"Should've checked with canBeHoistedTo!");594595for (Value *Op : Inst->operands())596makeAvailableAt(Op, Loc);597598Inst->moveBefore(*Loc->getParent(), Loc);599}600601// Return Instruction before which we can insert freeze for the value V as close602// to def as possible. If there is no place to add freeze, return empty.603static std::optional<BasicBlock::iterator>604getFreezeInsertPt(Value *V, const DominatorTree &DT) {605auto *I = dyn_cast<Instruction>(V);606if (!I)607return DT.getRoot()->getFirstNonPHIOrDbgOrAlloca()->getIterator();608609std::optional<BasicBlock::iterator> Res = I->getInsertionPointAfterDef();610// If there is no place to add freeze - return nullptr.611if (!Res || !DT.dominates(I, &**Res))612return std::nullopt;613614Instruction *ResInst = &**Res;615616// If there is a User dominated by original I, then it should be dominated617// by Freeze instruction as well.618if (any_of(I->users(), [&](User *U) {619Instruction *User = cast<Instruction>(U);620return ResInst != User && DT.dominates(I, User) &&621!DT.dominates(ResInst, User);622}))623return std::nullopt;624return Res;625}626627Value *GuardWideningImpl::freezeAndPush(Value *Orig,628BasicBlock::iterator InsertPt) {629if (isGuaranteedNotToBePoison(Orig, nullptr, InsertPt, &DT))630return Orig;631std::optional<BasicBlock::iterator> InsertPtAtDef =632getFreezeInsertPt(Orig, DT);633if (!InsertPtAtDef) {634FreezeInst *FI = new FreezeInst(Orig, "gw.freeze");635FI->insertBefore(*InsertPt->getParent(), InsertPt);636return FI;637}638if (isa<Constant>(Orig) || isa<GlobalValue>(Orig)) {639BasicBlock::iterator InsertPt = *InsertPtAtDef;640FreezeInst *FI = new FreezeInst(Orig, "gw.freeze");641FI->insertBefore(*InsertPt->getParent(), InsertPt);642return FI;643}644645SmallSet<Value *, 16> Visited;646SmallVector<Value *, 16> Worklist;647SmallSet<Instruction *, 16> DropPoisonFlags;648SmallVector<Value *, 16> NeedFreeze;649DenseMap<Value *, FreezeInst *> CacheOfFreezes;650651// A bit overloaded data structures. Visited contains constant/GV652// if we already met it. In this case CacheOfFreezes has a freeze if it is653// required.654auto handleConstantOrGlobal = [&](Use &U) {655Value *Def = U.get();656if (!isa<Constant>(Def) && !isa<GlobalValue>(Def))657return false;658659if (Visited.insert(Def).second) {660if (isGuaranteedNotToBePoison(Def, nullptr, InsertPt, &DT))661return true;662BasicBlock::iterator InsertPt = *getFreezeInsertPt(Def, DT);663FreezeInst *FI = new FreezeInst(Def, Def->getName() + ".gw.fr");664FI->insertBefore(*InsertPt->getParent(), InsertPt);665CacheOfFreezes[Def] = FI;666}667668if (CacheOfFreezes.count(Def))669U.set(CacheOfFreezes[Def]);670return true;671};672673Worklist.push_back(Orig);674while (!Worklist.empty()) {675Value *V = Worklist.pop_back_val();676if (!Visited.insert(V).second)677continue;678679if (isGuaranteedNotToBePoison(V, nullptr, InsertPt, &DT))680continue;681682Instruction *I = dyn_cast<Instruction>(V);683if (!I || canCreateUndefOrPoison(cast<Operator>(I),684/*ConsiderFlagsAndMetadata*/ false)) {685NeedFreeze.push_back(V);686continue;687}688// Check all operands. If for any of them we cannot insert Freeze,689// stop here. Otherwise, iterate.690if (any_of(I->operands(), [&](Value *Op) {691return isa<Instruction>(Op) && !getFreezeInsertPt(Op, DT);692})) {693NeedFreeze.push_back(I);694continue;695}696DropPoisonFlags.insert(I);697for (Use &U : I->operands())698if (!handleConstantOrGlobal(U))699Worklist.push_back(U.get());700}701for (Instruction *I : DropPoisonFlags)702I->dropPoisonGeneratingAnnotations();703704Value *Result = Orig;705for (Value *V : NeedFreeze) {706BasicBlock::iterator FreezeInsertPt = *getFreezeInsertPt(V, DT);707FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr");708FI->insertBefore(*FreezeInsertPt->getParent(), FreezeInsertPt);709++FreezeAdded;710if (V == Orig)711Result = FI;712V->replaceUsesWithIf(713FI, [&](const Use & U)->bool { return U.getUser() != FI; });714}715716return Result;717}718719std::optional<Value *>720GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,721SmallVectorImpl<Value *> &ChecksToWiden,722std::optional<BasicBlock::iterator> InsertPt) {723using namespace llvm::PatternMatch;724725Value *Result = nullptr;726{727// L >u C0 && L >u C1 -> L >u max(C0, C1)728ConstantInt *RHS0, *RHS1;729Value *LHS;730ICmpInst::Predicate Pred0, Pred1;731// TODO: Support searching for pairs to merge from both whole lists of732// ChecksToHoist and ChecksToWiden.733if (ChecksToWiden.size() == 1 && ChecksToHoist.size() == 1 &&734match(ChecksToWiden.front(),735m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) &&736match(ChecksToHoist.front(),737m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) {738739ConstantRange CR0 =740ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue());741ConstantRange CR1 =742ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue());743744// Given what we're doing here and the semantics of guards, it would745// be correct to use a subset intersection, but that may be too746// aggressive in cases we care about.747if (std::optional<ConstantRange> Intersect =748CR0.exactIntersectWith(CR1)) {749APInt NewRHSAP;750CmpInst::Predicate Pred;751if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) {752if (InsertPt) {753ConstantInt *NewRHS =754ConstantInt::get((*InsertPt)->getContext(), NewRHSAP);755assert(canBeHoistedTo(LHS, *InsertPt) && "must be");756makeAvailableAt(LHS, *InsertPt);757Result = new ICmpInst(*InsertPt, Pred, LHS, NewRHS, "wide.chk");758}759return Result;760}761}762}763}764765{766SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks;767if (parseRangeChecks(ChecksToWiden, Checks) &&768parseRangeChecks(ChecksToHoist, Checks) &&769combineRangeChecks(Checks, CombinedChecks)) {770if (InsertPt) {771for (auto &RC : CombinedChecks) {772makeAvailableAt(RC.getCheckInst(), *InsertPt);773if (Result)774Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "",775*InsertPt);776else777Result = RC.getCheckInst();778}779assert(Result && "Failed to find result value");780Result->setName("wide.chk");781Result = freezeAndPush(Result, *InsertPt);782}783return Result;784}785}786// We were not able to compute ChecksToHoist AND ChecksToWiden for the price787// of one.788return std::nullopt;789}790791Value *GuardWideningImpl::hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist,792Value *OldCondition,793BasicBlock::iterator InsertPt) {794assert(!ChecksToHoist.empty());795IRBuilder<> Builder(InsertPt->getParent(), InsertPt);796makeAvailableAt(ChecksToHoist, InsertPt);797makeAvailableAt(OldCondition, InsertPt);798Value *Result = Builder.CreateAnd(ChecksToHoist);799Result = freezeAndPush(Result, InsertPt);800Result = Builder.CreateAnd(OldCondition, Result);801Result->setName("wide.chk");802return Result;803}804805bool GuardWideningImpl::parseRangeChecks(806Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks) {807using namespace llvm::PatternMatch;808809auto *IC = dyn_cast<ICmpInst>(CheckCond);810if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() ||811(IC->getPredicate() != ICmpInst::ICMP_ULT &&812IC->getPredicate() != ICmpInst::ICMP_UGT))813return false;814815const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);816if (IC->getPredicate() == ICmpInst::ICMP_UGT)817std::swap(CmpLHS, CmpRHS);818819auto &DL = IC->getDataLayout();820821GuardWideningImpl::RangeCheck Check(822CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())),823CmpRHS, IC);824825if (!isKnownNonNegative(Check.getLength(), DL))826return false;827828// What we have in \c Check now is a correct interpretation of \p CheckCond.829// Try to see if we can move some constant offsets into the \c Offset field.830831bool Changed;832auto &Ctx = CheckCond->getContext();833834do {835Value *OpLHS;836ConstantInt *OpRHS;837Changed = false;838839#ifndef NDEBUG840auto *BaseInst = dyn_cast<Instruction>(Check.getBase());841assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) &&842"Unreachable instruction?");843#endif844845if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {846Check.setBase(OpLHS);847APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();848Check.setOffset(ConstantInt::get(Ctx, NewOffset));849Changed = true;850} else if (match(Check.getBase(),851m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {852KnownBits Known = computeKnownBits(OpLHS, DL);853if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) {854Check.setBase(OpLHS);855APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();856Check.setOffset(ConstantInt::get(Ctx, NewOffset));857Changed = true;858}859}860} while (Changed);861862Checks.push_back(Check);863return true;864}865866bool GuardWideningImpl::combineRangeChecks(867SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,868SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const {869unsigned OldCount = Checks.size();870while (!Checks.empty()) {871// Pick all of the range checks with a specific base and length, and try to872// merge them.873const Value *CurrentBase = Checks.front().getBase();874const Value *CurrentLength = Checks.front().getLength();875876SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks;877878auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) {879return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength;880};881882copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck);883erase_if(Checks, IsCurrentCheck);884885assert(CurrentChecks.size() != 0 && "We know we have at least one!");886887if (CurrentChecks.size() < 3) {888llvm::append_range(RangeChecksOut, CurrentChecks);889continue;890}891892// CurrentChecks.size() will typically be 3 here, but so far there has been893// no need to hard-code that fact.894895llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS,896const GuardWideningImpl::RangeCheck &RHS) {897return LHS.getOffsetValue().slt(RHS.getOffsetValue());898});899900// Note: std::sort should not invalidate the ChecksStart iterator.901902const ConstantInt *MinOffset = CurrentChecks.front().getOffset();903const ConstantInt *MaxOffset = CurrentChecks.back().getOffset();904905unsigned BitWidth = MaxOffset->getValue().getBitWidth();906if ((MaxOffset->getValue() - MinOffset->getValue())907.ugt(APInt::getSignedMinValue(BitWidth)))908return false;909910APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue();911const APInt &HighOffset = MaxOffset->getValue();912auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) {913return (HighOffset - RC.getOffsetValue()).ult(MaxDiff);914};915916if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK))917return false;918919// We have a series of f+1 checks as:920//921// I+k_0 u< L ... Chk_0922// I+k_1 u< L ... Chk_1923// ...924// I+k_f u< L ... Chk_f925//926// with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0927// k_f-k_0 u< INT_MIN+k_f ... Precond_1928// k_f != k_0 ... Precond_2929//930// Claim:931// Chk_0 AND Chk_f implies all the other checks932//933// Informal proof sketch:934//935// We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap936// (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and937// thus I+k_f is the greatest unsigned value in that range.938//939// This combined with Ckh_(f+1) shows that everything in that range is u< L.940// Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1)941// lie in [I+k_0,I+k_f], this proving our claim.942//943// To see that [I+k_0,I+k_f] is not a wrapping range, note that there are944// two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal945// since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping946// range by definition, and the latter case is impossible:947//948// 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1)949// xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx950//951// For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted952// with 'x' above) to be at least >u INT_MIN.953954RangeChecksOut.emplace_back(CurrentChecks.front());955RangeChecksOut.emplace_back(CurrentChecks.back());956}957958assert(RangeChecksOut.size() <= OldCount && "We pessimized!");959return RangeChecksOut.size() != OldCount;960}961962#ifndef NDEBUG963StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) {964switch (WS) {965case WS_IllegalOrNegative:966return "IllegalOrNegative";967case WS_Neutral:968return "Neutral";969case WS_Positive:970return "Positive";971case WS_VeryPositive:972return "VeryPositive";973}974975llvm_unreachable("Fully covered switch above!");976}977#endif978979PreservedAnalyses GuardWideningPass::run(Function &F,980FunctionAnalysisManager &AM) {981// Avoid requesting analyses if there are no guards or widenable conditions.982auto *GuardDecl = F.getParent()->getFunction(983Intrinsic::getName(Intrinsic::experimental_guard));984bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();985auto *WCDecl = F.getParent()->getFunction(986Intrinsic::getName(Intrinsic::experimental_widenable_condition));987bool HasWidenableConditions = WCDecl && !WCDecl->use_empty();988if (!HasIntrinsicGuards && !HasWidenableConditions)989return PreservedAnalyses::all();990auto &DT = AM.getResult<DominatorTreeAnalysis>(F);991auto &LI = AM.getResult<LoopAnalysis>(F);992auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);993auto &AC = AM.getResult<AssumptionAnalysis>(F);994auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F);995std::unique_ptr<MemorySSAUpdater> MSSAU;996if (MSSAA)997MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA());998if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr,999DT.getRootNode(), [](BasicBlock *) { return true; })1000.run())1001return PreservedAnalyses::all();10021003PreservedAnalyses PA;1004PA.preserveSet<CFGAnalyses>();1005PA.preserve<MemorySSAAnalysis>();1006return PA;1007}10081009PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,1010LoopStandardAnalysisResults &AR,1011LPMUpdater &U) {1012BasicBlock *RootBB = L.getLoopPredecessor();1013if (!RootBB)1014RootBB = L.getHeader();1015auto BlockFilter = [&](BasicBlock *BB) {1016return BB == RootBB || L.contains(BB);1017};1018std::unique_ptr<MemorySSAUpdater> MSSAU;1019if (AR.MSSA)1020MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);1021if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC,1022MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(RootBB),1023BlockFilter)1024.run())1025return PreservedAnalyses::all();10261027auto PA = getLoopPassPreservedAnalyses();1028if (AR.MSSA)1029PA.preserve<MemorySSAAnalysis>();1030return PA;1031}103210331034