Path: blob/main/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
35234 views
//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file "describes" induction and recurrence variables.9//10//===----------------------------------------------------------------------===//1112#include "llvm/Analysis/IVDescriptors.h"13#include "llvm/Analysis/DemandedBits.h"14#include "llvm/Analysis/LoopInfo.h"15#include "llvm/Analysis/ScalarEvolution.h"16#include "llvm/Analysis/ScalarEvolutionExpressions.h"17#include "llvm/Analysis/ValueTracking.h"18#include "llvm/IR/Dominators.h"19#include "llvm/IR/Instructions.h"20#include "llvm/IR/Module.h"21#include "llvm/IR/PatternMatch.h"22#include "llvm/IR/ValueHandle.h"23#include "llvm/Support/Debug.h"24#include "llvm/Support/KnownBits.h"2526using namespace llvm;27using namespace llvm::PatternMatch;2829#define DEBUG_TYPE "iv-descriptors"3031bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,32SmallPtrSetImpl<Instruction *> &Set) {33for (const Use &Use : I->operands())34if (!Set.count(dyn_cast<Instruction>(Use)))35return false;36return true;37}3839bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {40switch (Kind) {41default:42break;43case RecurKind::Add:44case RecurKind::Mul:45case RecurKind::Or:46case RecurKind::And:47case RecurKind::Xor:48case RecurKind::SMax:49case RecurKind::SMin:50case RecurKind::UMax:51case RecurKind::UMin:52case RecurKind::IAnyOf:53case RecurKind::FAnyOf:54return true;55}56return false;57}5859bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) {60return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);61}6263/// Determines if Phi may have been type-promoted. If Phi has a single user64/// that ANDs the Phi with a type mask, return the user. RT is updated to65/// account for the narrower bit width represented by the mask, and the AND66/// instruction is added to CI.67static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,68SmallPtrSetImpl<Instruction *> &Visited,69SmallPtrSetImpl<Instruction *> &CI) {70if (!Phi->hasOneUse())71return Phi;7273const APInt *M = nullptr;74Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());7576// Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT77// with a new integer type of the corresponding bit width.78if (match(J, m_And(m_Instruction(I), m_APInt(M)))) {79int32_t Bits = (*M + 1).exactLogBase2();80if (Bits > 0) {81RT = IntegerType::get(Phi->getContext(), Bits);82Visited.insert(Phi);83CI.insert(J);84return J;85}86}87return Phi;88}8990/// Compute the minimal bit width needed to represent a reduction whose exit91/// instruction is given by Exit.92static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,93DemandedBits *DB,94AssumptionCache *AC,95DominatorTree *DT) {96bool IsSigned = false;97const DataLayout &DL = Exit->getDataLayout();98uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());99100if (DB) {101// Use the demanded bits analysis to determine the bits that are live out102// of the exit instruction, rounding up to the nearest power of two. If the103// use of demanded bits results in a smaller bit width, we know the value104// must be positive (i.e., IsSigned = false), because if this were not the105// case, the sign bit would have been demanded.106auto Mask = DB->getDemandedBits(Exit);107MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();108}109110if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {111// If demanded bits wasn't able to limit the bit width, we can try to use112// value tracking instead. This can be the case, for example, if the value113// may be negative.114auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);115auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());116MaxBitWidth = NumTypeBits - NumSignBits;117KnownBits Bits = computeKnownBits(Exit, DL);118if (!Bits.isNonNegative()) {119// If the value is not known to be non-negative, we set IsSigned to true,120// meaning that we will use sext instructions instead of zext121// instructions to restore the original type.122IsSigned = true;123// Make sure at least one sign bit is included in the result, so it124// will get properly sign-extended.125++MaxBitWidth;126}127}128MaxBitWidth = llvm::bit_ceil(MaxBitWidth);129130return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),131IsSigned);132}133134/// Collect cast instructions that can be ignored in the vectorizer's cost135/// model, given a reduction exit value and the minimal type in which the136// reduction can be represented. Also search casts to the recurrence type137// to find the minimum width used by the recurrence.138static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,139Type *RecurrenceType,140SmallPtrSetImpl<Instruction *> &Casts,141unsigned &MinWidthCastToRecurTy) {142143SmallVector<Instruction *, 8> Worklist;144SmallPtrSet<Instruction *, 8> Visited;145Worklist.push_back(Exit);146MinWidthCastToRecurTy = -1U;147148while (!Worklist.empty()) {149Instruction *Val = Worklist.pop_back_val();150Visited.insert(Val);151if (auto *Cast = dyn_cast<CastInst>(Val)) {152if (Cast->getSrcTy() == RecurrenceType) {153// If the source type of a cast instruction is equal to the recurrence154// type, it will be eliminated, and should be ignored in the vectorizer155// cost model.156Casts.insert(Cast);157continue;158}159if (Cast->getDestTy() == RecurrenceType) {160// The minimum width used by the recurrence is found by checking for161// casts on its operands. The minimum width is used by the vectorizer162// when finding the widest type for in-loop reductions without any163// loads/stores.164MinWidthCastToRecurTy = std::min<unsigned>(165MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());166continue;167}168}169// Add all operands to the work list if they are loop-varying values that170// we haven't yet visited.171for (Value *O : cast<User>(Val)->operands())172if (auto *I = dyn_cast<Instruction>(O))173if (TheLoop->contains(I) && !Visited.count(I))174Worklist.push_back(I);175}176}177178// Check if a given Phi node can be recognized as an ordered reduction for179// vectorizing floating point operations without unsafe math.180static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,181Instruction *Exit, PHINode *Phi) {182// Currently only FAdd and FMulAdd are supported.183if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)184return false;185186if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)187return false;188189if (Kind == RecurKind::FMulAdd &&190!RecurrenceDescriptor::isFMulAddIntrinsic(Exit))191return false;192193// Ensure the exit instruction has only one user other than the reduction PHI194if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))195return false;196197// The only pattern accepted is the one in which the reduction PHI198// is used as one of the operands of the exit instruction199auto *Op0 = Exit->getOperand(0);200auto *Op1 = Exit->getOperand(1);201if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)202return false;203if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)204return false;205206LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi207<< ", ExitInst: " << *Exit << "\n");208209return true;210}211212bool RecurrenceDescriptor::AddReductionVar(213PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,214RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC,215DominatorTree *DT, ScalarEvolution *SE) {216if (Phi->getNumIncomingValues() != 2)217return false;218219// Reduction variables are only found in the loop header block.220if (Phi->getParent() != TheLoop->getHeader())221return false;222223// Obtain the reduction start value from the value that comes from the loop224// preheader.225Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());226227// ExitInstruction is the single value which is used outside the loop.228// We only allow for a single reduction value to be used outside the loop.229// This includes users of the reduction, variables (which form a cycle230// which ends in the phi node).231Instruction *ExitInstruction = nullptr;232233// Variable to keep last visited store instruction. By the end of the234// algorithm this variable will be either empty or having intermediate235// reduction value stored in invariant address.236StoreInst *IntermediateStore = nullptr;237238// Indicates that we found a reduction operation in our scan.239bool FoundReduxOp = false;240241// We start with the PHI node and scan for all of the users of this242// instruction. All users must be instructions that can be used as reduction243// variables (such as ADD). We must have a single out-of-block user. The cycle244// must include the original PHI.245bool FoundStartPHI = false;246247// To recognize min/max patterns formed by a icmp select sequence, we store248// the number of instruction we saw from the recognized min/max pattern,249// to make sure we only see exactly the two instructions.250unsigned NumCmpSelectPatternInst = 0;251InstDesc ReduxDesc(false, nullptr);252253// Data used for determining if the recurrence has been type-promoted.254Type *RecurrenceType = Phi->getType();255SmallPtrSet<Instruction *, 4> CastInsts;256unsigned MinWidthCastToRecurrenceType;257Instruction *Start = Phi;258bool IsSigned = false;259260SmallPtrSet<Instruction *, 8> VisitedInsts;261SmallVector<Instruction *, 8> Worklist;262263// Return early if the recurrence kind does not match the type of Phi. If the264// recurrence kind is arithmetic, we attempt to look through AND operations265// resulting from the type promotion performed by InstCombine. Vector266// operations are not limited to the legal integer widths, so we may be able267// to evaluate the reduction in the narrower width.268if (RecurrenceType->isFloatingPointTy()) {269if (!isFloatingPointRecurrenceKind(Kind))270return false;271} else if (RecurrenceType->isIntegerTy()) {272if (!isIntegerRecurrenceKind(Kind))273return false;274if (!isMinMaxRecurrenceKind(Kind))275Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);276} else {277// Pointer min/max may exist, but it is not supported as a reduction op.278return false;279}280281Worklist.push_back(Start);282VisitedInsts.insert(Start);283284// Start with all flags set because we will intersect this with the reduction285// flags from all the reduction operations.286FastMathFlags FMF = FastMathFlags::getFast();287288// The first instruction in the use-def chain of the Phi node that requires289// exact floating point operations.290Instruction *ExactFPMathInst = nullptr;291292// A value in the reduction can be used:293// - By the reduction:294// - Reduction operation:295// - One use of reduction value (safe).296// - Multiple use of reduction value (not safe).297// - PHI:298// - All uses of the PHI must be the reduction (safe).299// - Otherwise, not safe.300// - By instructions outside of the loop (safe).301// * One value may have several outside users, but all outside302// uses must be of the same value.303// - By store instructions with a loop invariant address (safe with304// the following restrictions):305// * If there are several stores, all must have the same address.306// * Final value should be stored in that loop invariant address.307// - By an instruction that is not part of the reduction (not safe).308// This is either:309// * An instruction type other than PHI or the reduction operation.310// * A PHI in the header other than the initial PHI.311while (!Worklist.empty()) {312Instruction *Cur = Worklist.pop_back_val();313314// Store instructions are allowed iff it is the store of the reduction315// value to the same loop invariant memory location.316if (auto *SI = dyn_cast<StoreInst>(Cur)) {317if (!SE) {318LLVM_DEBUG(dbgs() << "Store instructions are not processed without "319<< "Scalar Evolution Analysis\n");320return false;321}322323const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());324// Check it is the same address as previous stores325if (IntermediateStore) {326const SCEV *OtherScev =327SE->getSCEV(IntermediateStore->getPointerOperand());328329if (OtherScev != PtrScev) {330LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "331<< "inside the loop: " << *SI->getPointerOperand()332<< " and "333<< *IntermediateStore->getPointerOperand() << '\n');334return false;335}336}337338// Check the pointer is loop invariant339if (!SE->isLoopInvariant(PtrScev, TheLoop)) {340LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "341<< "inside the loop: " << *SI->getPointerOperand()342<< '\n');343return false;344}345346// IntermediateStore is always the last store in the loop.347IntermediateStore = SI;348continue;349}350351// No Users.352// If the instruction has no users then this is a broken chain and can't be353// a reduction variable.354if (Cur->use_empty())355return false;356357bool IsAPhi = isa<PHINode>(Cur);358359// A header PHI use other than the original PHI.360if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())361return false;362363// Reductions of instructions such as Div, and Sub is only possible if the364// LHS is the reduction variable.365if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&366!isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&367!VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))368return false;369370// Any reduction instruction must be of one of the allowed kinds. We ignore371// the starting value (the Phi or an AND instruction if the Phi has been372// type-promoted).373if (Cur != Start) {374ReduxDesc =375isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);376ExactFPMathInst = ExactFPMathInst == nullptr377? ReduxDesc.getExactFPMathInst()378: ExactFPMathInst;379if (!ReduxDesc.isRecurrence())380return false;381// FIXME: FMF is allowed on phi, but propagation is not handled correctly.382if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {383FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();384if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) {385// Accept FMF on either fcmp or select of a min/max idiom.386// TODO: This is a hack to work-around the fact that FMF may not be387// assigned/propagated correctly. If that problem is fixed or we388// standardize on fmin/fmax via intrinsics, this can be removed.389if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))390CurFMF |= FCmp->getFastMathFlags();391}392FMF &= CurFMF;393}394// Update this reduction kind if we matched a new instruction.395// TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'396// state accurate while processing the worklist?397if (ReduxDesc.getRecKind() != RecurKind::None)398Kind = ReduxDesc.getRecKind();399}400401bool IsASelect = isa<SelectInst>(Cur);402403// A conditional reduction operation must only have 2 or less uses in404// VisitedInsts.405if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&406hasMultipleUsesOf(Cur, VisitedInsts, 2))407return false;408409// A reduction operation must only have one use of the reduction value.410if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&411!isAnyOfRecurrenceKind(Kind) && hasMultipleUsesOf(Cur, VisitedInsts, 1))412return false;413414// All inputs to a PHI node must be a reduction value.415if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))416return false;417418if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::IAnyOf) &&419(isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))420++NumCmpSelectPatternInst;421if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::FAnyOf) &&422(isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))423++NumCmpSelectPatternInst;424425// Check whether we found a reduction operator.426FoundReduxOp |= !IsAPhi && Cur != Start;427428// Process users of current instruction. Push non-PHI nodes after PHI nodes429// onto the stack. This way we are going to have seen all inputs to PHI430// nodes once we get to them.431SmallVector<Instruction *, 8> NonPHIs;432SmallVector<Instruction *, 8> PHIs;433for (User *U : Cur->users()) {434Instruction *UI = cast<Instruction>(U);435436// If the user is a call to llvm.fmuladd then the instruction can only be437// the final operand.438if (isFMulAddIntrinsic(UI))439if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))440return false;441442// Check if we found the exit user.443BasicBlock *Parent = UI->getParent();444if (!TheLoop->contains(Parent)) {445// If we already know this instruction is used externally, move on to446// the next user.447if (ExitInstruction == Cur)448continue;449450// Exit if you find multiple values used outside or if the header phi451// node is being used. In this case the user uses the value of the452// previous iteration, in which case we would loose "VF-1" iterations of453// the reduction operation if we vectorize.454if (ExitInstruction != nullptr || Cur == Phi)455return false;456457// The instruction used by an outside user must be the last instruction458// before we feed back to the reduction phi. Otherwise, we loose VF-1459// operations on the value.460if (!is_contained(Phi->operands(), Cur))461return false;462463ExitInstruction = Cur;464continue;465}466467// Process instructions only once (termination). Each reduction cycle468// value must only be used once, except by phi nodes and min/max469// reductions which are represented as a cmp followed by a select.470InstDesc IgnoredVal(false, nullptr);471if (VisitedInsts.insert(UI).second) {472if (isa<PHINode>(UI)) {473PHIs.push_back(UI);474} else {475StoreInst *SI = dyn_cast<StoreInst>(UI);476if (SI && SI->getPointerOperand() == Cur) {477// Reduction variable chain can only be stored somewhere but it478// can't be used as an address.479return false;480}481NonPHIs.push_back(UI);482}483} else if (!isa<PHINode>(UI) &&484((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&485!isa<SelectInst>(UI)) ||486(!isConditionalRdxPattern(Kind, UI).isRecurrence() &&487!isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal)488.isRecurrence() &&489!isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))490return false;491492// Remember that we completed the cycle.493if (UI == Phi)494FoundStartPHI = true;495}496Worklist.append(PHIs.begin(), PHIs.end());497Worklist.append(NonPHIs.begin(), NonPHIs.end());498}499500// This means we have seen one but not the other instruction of the501// pattern or more than just a select and cmp. Zero implies that we saw a502// llvm.min/max intrinsic, which is always OK.503if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&504NumCmpSelectPatternInst != 0)505return false;506507if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)508return false;509510if (IntermediateStore) {511// Check that stored value goes to the phi node again. This way we make sure512// that the value stored in IntermediateStore is indeed the final reduction513// value.514if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {515LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "516<< *IntermediateStore << '\n');517return false;518}519520// If there is an exit instruction it's value should be stored in521// IntermediateStore522if (ExitInstruction &&523IntermediateStore->getValueOperand() != ExitInstruction) {524LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "525"store last calculated value of the reduction: "526<< *IntermediateStore << '\n');527return false;528}529530// If all uses are inside the loop (intermediate stores), then the531// reduction value after the loop will be the one used in the last store.532if (!ExitInstruction)533ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());534}535536if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)537return false;538539const bool IsOrdered =540checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);541542if (Start != Phi) {543// If the starting value is not the same as the phi node, we speculatively544// looked through an 'and' instruction when evaluating a potential545// arithmetic reduction to determine if it may have been type-promoted.546//547// We now compute the minimal bit width that is required to represent the548// reduction. If this is the same width that was indicated by the 'and', we549// can represent the reduction in the smaller type. The 'and' instruction550// will be eliminated since it will essentially be a cast instruction that551// can be ignore in the cost model. If we compute a different type than we552// did when evaluating the 'and', the 'and' will not be eliminated, and we553// will end up with different kinds of operations in the recurrence554// expression (e.g., IntegerAND, IntegerADD). We give up if this is555// the case.556//557// The vectorizer relies on InstCombine to perform the actual558// type-shrinking. It does this by inserting instructions to truncate the559// exit value of the reduction to the width indicated by RecurrenceType and560// then extend this value back to the original width. If IsSigned is false,561// a 'zext' instruction will be generated; otherwise, a 'sext' will be562// used.563//564// TODO: We should not rely on InstCombine to rewrite the reduction in the565// smaller type. We should just generate a correctly typed expression566// to begin with.567Type *ComputedType;568std::tie(ComputedType, IsSigned) =569computeRecurrenceType(ExitInstruction, DB, AC, DT);570if (ComputedType != RecurrenceType)571return false;572}573574// Collect cast instructions and the minimum width used by the recurrence.575// If the starting value is not the same as the phi node and the computed576// recurrence type is equal to the recurrence type, the recurrence expression577// will be represented in a narrower or wider type. If there are any cast578// instructions that will be unnecessary, collect them in CastsFromRecurTy.579// Note that the 'and' instruction was already included in this list.580//581// TODO: A better way to represent this may be to tag in some way all the582// instructions that are a part of the reduction. The vectorizer cost583// model could then apply the recurrence type to these instructions,584// without needing a white list of instructions to ignore.585// This may also be useful for the inloop reductions, if it can be586// kept simple enough.587collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,588MinWidthCastToRecurrenceType);589590// We found a reduction var if we have reached the original phi node and we591// only have a single instruction with out-of-loop users.592593// The ExitInstruction(Instruction which is allowed to have out-of-loop users)594// is saved as part of the RecurrenceDescriptor.595596// Save the description of this reduction variable.597RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,598FMF, ExactFPMathInst, RecurrenceType, IsSigned,599IsOrdered, CastInsts, MinWidthCastToRecurrenceType);600RedDes = RD;601602return true;603}604605// We are looking for loops that do something like this:606// int r = 0;607// for (int i = 0; i < n; i++) {608// if (src[i] > 3)609// r = 3;610// }611// where the reduction value (r) only has two states, in this example 0 or 3.612// The generated LLVM IR for this type of loop will be like this:613// for.body:614// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]615// ...616// %cmp = icmp sgt i32 %5, 3617// %spec.select = select i1 %cmp, i32 3, i32 %r618// ...619// In general we can support vectorization of loops where 'r' flips between620// any two non-constants, provided they are loop invariant. The only thing621// we actually care about at the end of the loop is whether or not any lane622// in the selected vector is different from the start value. The final623// across-vector reduction after the loop simply involves choosing the start624// value if nothing changed (0 in the example above) or the other selected625// value (3 in the example above).626RecurrenceDescriptor::InstDesc627RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,628Instruction *I, InstDesc &Prev) {629// We must handle the select(cmp(),x,y) as a single instruction. Advance to630// the select.631CmpInst::Predicate Pred;632if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {633if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))634return InstDesc(Select, Prev.getRecKind());635}636637if (!match(I,638m_Select(m_Cmp(Pred, m_Value(), m_Value()), m_Value(), m_Value())))639return InstDesc(false, I);640641SelectInst *SI = cast<SelectInst>(I);642Value *NonPhi = nullptr;643644if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))645NonPhi = SI->getFalseValue();646else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))647NonPhi = SI->getTrueValue();648else649return InstDesc(false, I);650651// We are looking for selects of the form:652// select(cmp(), phi, loop_invariant) or653// select(cmp(), loop_invariant, phi)654if (!Loop->isLoopInvariant(NonPhi))655return InstDesc(false, I);656657return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf658: RecurKind::FAnyOf);659}660661RecurrenceDescriptor::InstDesc662RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,663const InstDesc &Prev) {664assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) &&665"Expected a cmp or select or call instruction");666if (!isMinMaxRecurrenceKind(Kind))667return InstDesc(false, I);668669// We must handle the select(cmp()) as a single instruction. Advance to the670// select.671CmpInst::Predicate Pred;672if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {673if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))674return InstDesc(Select, Prev.getRecKind());675}676677// Only match select with single use cmp condition, or a min/max intrinsic.678if (!isa<IntrinsicInst>(I) &&679!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),680m_Value())))681return InstDesc(false, I);682683// Look for a min/max pattern.684if (match(I, m_UMin(m_Value(), m_Value())))685return InstDesc(Kind == RecurKind::UMin, I);686if (match(I, m_UMax(m_Value(), m_Value())))687return InstDesc(Kind == RecurKind::UMax, I);688if (match(I, m_SMax(m_Value(), m_Value())))689return InstDesc(Kind == RecurKind::SMax, I);690if (match(I, m_SMin(m_Value(), m_Value())))691return InstDesc(Kind == RecurKind::SMin, I);692if (match(I, m_OrdFMin(m_Value(), m_Value())))693return InstDesc(Kind == RecurKind::FMin, I);694if (match(I, m_OrdFMax(m_Value(), m_Value())))695return InstDesc(Kind == RecurKind::FMax, I);696if (match(I, m_UnordFMin(m_Value(), m_Value())))697return InstDesc(Kind == RecurKind::FMin, I);698if (match(I, m_UnordFMax(m_Value(), m_Value())))699return InstDesc(Kind == RecurKind::FMax, I);700if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))701return InstDesc(Kind == RecurKind::FMin, I);702if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))703return InstDesc(Kind == RecurKind::FMax, I);704if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))705return InstDesc(Kind == RecurKind::FMinimum, I);706if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))707return InstDesc(Kind == RecurKind::FMaximum, I);708709return InstDesc(false, I);710}711712/// Returns true if the select instruction has users in the compare-and-add713/// reduction pattern below. The select instruction argument is the last one714/// in the sequence.715///716/// %sum.1 = phi ...717/// ...718/// %cmp = fcmp pred %0, %CFP719/// %add = fadd %0, %sum.1720/// %sum.2 = select %cmp, %add, %sum.1721RecurrenceDescriptor::InstDesc722RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {723SelectInst *SI = dyn_cast<SelectInst>(I);724if (!SI)725return InstDesc(false, I);726727CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());728// Only handle single use cases for now.729if (!CI || !CI->hasOneUse())730return InstDesc(false, I);731732Value *TrueVal = SI->getTrueValue();733Value *FalseVal = SI->getFalseValue();734// Handle only when either of operands of select instruction is a PHI735// node for now.736if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) ||737(!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal)))738return InstDesc(false, I);739740Instruction *I1 =741isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal)742: dyn_cast<Instruction>(TrueVal);743if (!I1 || !I1->isBinaryOp())744return InstDesc(false, I);745746Value *Op1, *Op2;747if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||748m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&749I1->isFast()) ||750(m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||751((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||752m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||753(m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))754return InstDesc(false, I);755756Instruction *IPhi = isa<PHINode>(*Op1) ? dyn_cast<Instruction>(Op1)757: dyn_cast<Instruction>(Op2);758if (!IPhi || IPhi != FalseVal)759return InstDesc(false, I);760761return InstDesc(true, SI);762}763764RecurrenceDescriptor::InstDesc765RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,766Instruction *I, RecurKind Kind,767InstDesc &Prev, FastMathFlags FuncFMF) {768assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);769switch (I->getOpcode()) {770default:771return InstDesc(false, I);772case Instruction::PHI:773return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());774case Instruction::Sub:775case Instruction::Add:776return InstDesc(Kind == RecurKind::Add, I);777case Instruction::Mul:778return InstDesc(Kind == RecurKind::Mul, I);779case Instruction::And:780return InstDesc(Kind == RecurKind::And, I);781case Instruction::Or:782return InstDesc(Kind == RecurKind::Or, I);783case Instruction::Xor:784return InstDesc(Kind == RecurKind::Xor, I);785case Instruction::FDiv:786case Instruction::FMul:787return InstDesc(Kind == RecurKind::FMul, I,788I->hasAllowReassoc() ? nullptr : I);789case Instruction::FSub:790case Instruction::FAdd:791return InstDesc(Kind == RecurKind::FAdd, I,792I->hasAllowReassoc() ? nullptr : I);793case Instruction::Select:794if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||795Kind == RecurKind::Add || Kind == RecurKind::Mul)796return isConditionalRdxPattern(Kind, I);797[[fallthrough]];798case Instruction::FCmp:799case Instruction::ICmp:800case Instruction::Call:801if (isAnyOfRecurrenceKind(Kind))802return isAnyOfPattern(L, OrigPhi, I, Prev);803auto HasRequiredFMF = [&]() {804if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())805return true;806if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())807return true;808// minimum and maximum intrinsics do not require nsz and nnan flags since809// NaN and signed zeroes are propagated in the intrinsic implementation.810return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) ||811match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()));812};813if (isIntMinMaxRecurrenceKind(Kind) ||814(HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))815return isMinMaxPattern(I, Kind, Prev);816else if (isFMulAddIntrinsic(I))817return InstDesc(Kind == RecurKind::FMulAdd, I,818I->hasAllowReassoc() ? nullptr : I);819return InstDesc(false, I);820}821}822823bool RecurrenceDescriptor::hasMultipleUsesOf(824Instruction *I, SmallPtrSetImpl<Instruction *> &Insts,825unsigned MaxNumUses) {826unsigned NumUses = 0;827for (const Use &U : I->operands()) {828if (Insts.count(dyn_cast<Instruction>(U)))829++NumUses;830if (NumUses > MaxNumUses)831return true;832}833834return false;835}836837bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,838RecurrenceDescriptor &RedDes,839DemandedBits *DB, AssumptionCache *AC,840DominatorTree *DT,841ScalarEvolution *SE) {842BasicBlock *Header = TheLoop->getHeader();843Function &F = *Header->getParent();844FastMathFlags FMF;845FMF.setNoNaNs(846F.getFnAttribute("no-nans-fp-math").getValueAsBool());847FMF.setNoSignedZeros(848F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());849850if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,851SE)) {852LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");853return true;854}855if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,856SE)) {857LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");858return true;859}860if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,861SE)) {862LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");863return true;864}865if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,866SE)) {867LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");868return true;869}870if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,871SE)) {872LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");873return true;874}875if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT,876SE)) {877LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");878return true;879}880if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT,881SE)) {882LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");883return true;884}885if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT,886SE)) {887LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");888return true;889}890if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT,891SE)) {892LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");893return true;894}895if (AddReductionVar(Phi, RecurKind::IAnyOf, TheLoop, FMF, RedDes, DB, AC, DT,896SE)) {897LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."898<< *Phi << "\n");899return true;900}901if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,902SE)) {903LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");904return true;905}906if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,907SE)) {908LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");909return true;910}911if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT,912SE)) {913LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");914return true;915}916if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT,917SE)) {918LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");919return true;920}921if (AddReductionVar(Phi, RecurKind::FAnyOf, TheLoop, FMF, RedDes, DB, AC, DT,922SE)) {923LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."924<< " PHI." << *Phi << "\n");925return true;926}927if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,928SE)) {929LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");930return true;931}932if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,933SE)) {934LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");935return true;936}937if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,938SE)) {939LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");940return true;941}942// Not a reduction of known type.943return false;944}945946bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,947DominatorTree *DT) {948949// Ensure the phi node is in the loop header and has two incoming values.950if (Phi->getParent() != TheLoop->getHeader() ||951Phi->getNumIncomingValues() != 2)952return false;953954// Ensure the loop has a preheader and a single latch block. The loop955// vectorizer will need the latch to set up the next iteration of the loop.956auto *Preheader = TheLoop->getLoopPreheader();957auto *Latch = TheLoop->getLoopLatch();958if (!Preheader || !Latch)959return false;960961// Ensure the phi node's incoming blocks are the loop preheader and latch.962if (Phi->getBasicBlockIndex(Preheader) < 0 ||963Phi->getBasicBlockIndex(Latch) < 0)964return false;965966// Get the previous value. The previous value comes from the latch edge while967// the initial value comes from the preheader edge.968auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));969970// If Previous is a phi in the header, go through incoming values from the971// latch until we find a non-phi value. Use this as the new Previous, all uses972// in the header will be dominated by the original phi, but need to be moved973// after the non-phi previous value.974SmallPtrSet<PHINode *, 4> SeenPhis;975while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {976if (PrevPhi->getParent() != Phi->getParent())977return false;978if (!SeenPhis.insert(PrevPhi).second)979return false;980Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));981}982983if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))984return false;985986// Ensure every user of the phi node (recursively) is dominated by the987// previous value. The dominance requirement ensures the loop vectorizer will988// not need to vectorize the initial value prior to the first iteration of the989// loop.990// TODO: Consider extending this sinking to handle memory instructions.991992SmallPtrSet<Value *, 8> Seen;993BasicBlock *PhiBB = Phi->getParent();994SmallVector<Instruction *, 8> WorkList;995auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {996// Cyclic dependence.997if (Previous == SinkCandidate)998return false;9991000if (!Seen.insert(SinkCandidate).second)1001return true;1002if (DT->dominates(Previous,1003SinkCandidate)) // We already are good w/o sinking.1004return true;10051006if (SinkCandidate->getParent() != PhiBB ||1007SinkCandidate->mayHaveSideEffects() ||1008SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())1009return false;10101011// If we reach a PHI node that is not dominated by Previous, we reached a1012// header PHI. No need for sinking.1013if (isa<PHINode>(SinkCandidate))1014return true;10151016// Sink User tentatively and check its users1017WorkList.push_back(SinkCandidate);1018return true;1019};10201021WorkList.push_back(Phi);1022// Try to recursively sink instructions and their users after Previous.1023while (!WorkList.empty()) {1024Instruction *Current = WorkList.pop_back_val();1025for (User *User : Current->users()) {1026if (!TryToPushSinkCandidate(cast<Instruction>(User)))1027return false;1028}1029}10301031return true;1032}10331034/// This function returns the identity element (or neutral element) for1035/// the operation K.1036Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,1037FastMathFlags FMF) const {1038switch (K) {1039case RecurKind::Xor:1040case RecurKind::Add:1041case RecurKind::Or:1042// Adding, Xoring, Oring zero to a number does not change it.1043return ConstantInt::get(Tp, 0);1044case RecurKind::Mul:1045// Multiplying a number by 1 does not change it.1046return ConstantInt::get(Tp, 1);1047case RecurKind::And:1048// AND-ing a number with an all-1 value does not change it.1049return ConstantInt::get(Tp, -1, true);1050case RecurKind::FMul:1051// Multiplying a number by 1 does not change it.1052return ConstantFP::get(Tp, 1.0L);1053case RecurKind::FMulAdd:1054case RecurKind::FAdd:1055// Adding zero to a number does not change it.1056// FIXME: Ideally we should not need to check FMF for FAdd and should always1057// use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0.1058// Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI1059// nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would1060// mean we can then remove the check for noSignedZeros() below (see D98963).1061if (FMF.noSignedZeros())1062return ConstantFP::get(Tp, 0.0L);1063return ConstantFP::get(Tp, -0.0L);1064case RecurKind::UMin:1065return ConstantInt::get(Tp, -1, true);1066case RecurKind::UMax:1067return ConstantInt::get(Tp, 0);1068case RecurKind::SMin:1069return ConstantInt::get(Tp,1070APInt::getSignedMaxValue(Tp->getIntegerBitWidth()));1071case RecurKind::SMax:1072return ConstantInt::get(Tp,1073APInt::getSignedMinValue(Tp->getIntegerBitWidth()));1074case RecurKind::FMin:1075assert((FMF.noNaNs() && FMF.noSignedZeros()) &&1076"nnan, nsz is expected to be set for FP min reduction.");1077return ConstantFP::getInfinity(Tp, false /*Negative*/);1078case RecurKind::FMax:1079assert((FMF.noNaNs() && FMF.noSignedZeros()) &&1080"nnan, nsz is expected to be set for FP max reduction.");1081return ConstantFP::getInfinity(Tp, true /*Negative*/);1082case RecurKind::FMinimum:1083return ConstantFP::getInfinity(Tp, false /*Negative*/);1084case RecurKind::FMaximum:1085return ConstantFP::getInfinity(Tp, true /*Negative*/);1086case RecurKind::IAnyOf:1087case RecurKind::FAnyOf:1088return getRecurrenceStartValue();1089break;1090default:1091llvm_unreachable("Unknown recurrence kind");1092}1093}10941095unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {1096switch (Kind) {1097case RecurKind::Add:1098return Instruction::Add;1099case RecurKind::Mul:1100return Instruction::Mul;1101case RecurKind::Or:1102return Instruction::Or;1103case RecurKind::And:1104return Instruction::And;1105case RecurKind::Xor:1106return Instruction::Xor;1107case RecurKind::FMul:1108return Instruction::FMul;1109case RecurKind::FMulAdd:1110case RecurKind::FAdd:1111return Instruction::FAdd;1112case RecurKind::SMax:1113case RecurKind::SMin:1114case RecurKind::UMax:1115case RecurKind::UMin:1116case RecurKind::IAnyOf:1117return Instruction::ICmp;1118case RecurKind::FMax:1119case RecurKind::FMin:1120case RecurKind::FMaximum:1121case RecurKind::FMinimum:1122case RecurKind::FAnyOf:1123return Instruction::FCmp;1124default:1125llvm_unreachable("Unknown recurrence operation");1126}1127}11281129SmallVector<Instruction *, 4>1130RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {1131SmallVector<Instruction *, 4> ReductionOperations;1132unsigned RedOp = getOpcode(Kind);11331134// Search down from the Phi to the LoopExitInstr, looking for instructions1135// with a single user of the correct type for the reduction.11361137// Note that we check that the type of the operand is correct for each item in1138// the chain, including the last (the loop exit value). This can come up from1139// sub, which would otherwise be treated as an add reduction. MinMax also need1140// to check for a pair of icmp/select, for which we use getNextInstruction and1141// isCorrectOpcode functions to step the right number of instruction, and1142// check the icmp/select pair.1143// FIXME: We also do not attempt to look through Select's yet, which might1144// be part of the reduction chain, or attempt to looks through And's to find a1145// smaller bitwidth. Subs are also currently not allowed (which are usually1146// treated as part of a add reduction) as they are expected to generally be1147// more expensive than out-of-loop reductions, and need to be costed more1148// carefully.1149unsigned ExpectedUses = 1;1150if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)1151ExpectedUses = 2;11521153auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {1154for (auto *User : Cur->users()) {1155Instruction *UI = cast<Instruction>(User);1156if (isa<PHINode>(UI))1157continue;1158if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {1159// We are expecting a icmp/select pair, which we go to the next select1160// instruction if we can. We already know that Cur has 2 uses.1161if (isa<SelectInst>(UI))1162return UI;1163continue;1164}1165return UI;1166}1167return nullptr;1168};1169auto isCorrectOpcode = [&](Instruction *Cur) {1170if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {1171Value *LHS, *RHS;1172return SelectPatternResult::isMinOrMax(1173matchSelectPattern(Cur, LHS, RHS).Flavor);1174}1175// Recognize a call to the llvm.fmuladd intrinsic.1176if (isFMulAddIntrinsic(Cur))1177return true;11781179return Cur->getOpcode() == RedOp;1180};11811182// Attempt to look through Phis which are part of the reduction chain1183unsigned ExtraPhiUses = 0;1184Instruction *RdxInstr = LoopExitInstr;1185if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {1186if (ExitPhi->getNumIncomingValues() != 2)1187return {};11881189Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));1190Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));11911192Instruction *Chain = nullptr;1193if (Inc0 == Phi)1194Chain = Inc1;1195else if (Inc1 == Phi)1196Chain = Inc0;1197else1198return {};11991200RdxInstr = Chain;1201ExtraPhiUses = 1;1202}12031204// The loop exit instruction we check first (as a quick test) but add last. We1205// check the opcode is correct (and dont allow them to be Subs) and that they1206// have expected to have the expected number of uses. They will have one use1207// from the phi and one from a LCSSA value, no matter the type.1208if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))1209return {};12101211// Check that the Phi has one (or two for min/max) uses, plus an extra use1212// for conditional reductions.1213if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))1214return {};12151216Instruction *Cur = getNextInstruction(Phi);12171218// Each other instruction in the chain should have the expected number of uses1219// and be the correct opcode.1220while (Cur != RdxInstr) {1221if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))1222return {};12231224ReductionOperations.push_back(Cur);1225Cur = getNextInstruction(Cur);1226}12271228ReductionOperations.push_back(Cur);1229return ReductionOperations;1230}12311232InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,1233const SCEV *Step, BinaryOperator *BOp,1234SmallVectorImpl<Instruction *> *Casts)1235: StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {1236assert(IK != IK_NoInduction && "Not an induction");12371238// Start value type should match the induction kind and the value1239// itself should not be null.1240assert(StartValue && "StartValue is null");1241assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&1242"StartValue is not a pointer for pointer induction");1243assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&1244"StartValue is not an integer for integer induction");12451246// Check the Step Value. It should be non-zero integer value.1247assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&1248"Step value is zero");12491250assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&1251"StepValue is not an integer");12521253assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&1254"StepValue is not FP for FpInduction");1255assert((IK != IK_FpInduction ||1256(InductionBinOp &&1257(InductionBinOp->getOpcode() == Instruction::FAdd ||1258InductionBinOp->getOpcode() == Instruction::FSub))) &&1259"Binary opcode should be specified for FP induction");12601261if (Casts) {1262for (auto &Inst : *Casts) {1263RedundantCasts.push_back(Inst);1264}1265}1266}12671268ConstantInt *InductionDescriptor::getConstIntStepValue() const {1269if (isa<SCEVConstant>(Step))1270return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());1271return nullptr;1272}12731274bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,1275ScalarEvolution *SE,1276InductionDescriptor &D) {12771278// Here we only handle FP induction variables.1279assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");12801281if (TheLoop->getHeader() != Phi->getParent())1282return false;12831284// The loop may have multiple entrances or multiple exits; we can analyze1285// this phi if it has a unique entry value and a unique backedge value.1286if (Phi->getNumIncomingValues() != 2)1287return false;1288Value *BEValue = nullptr, *StartValue = nullptr;1289if (TheLoop->contains(Phi->getIncomingBlock(0))) {1290BEValue = Phi->getIncomingValue(0);1291StartValue = Phi->getIncomingValue(1);1292} else {1293assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&1294"Unexpected Phi node in the loop");1295BEValue = Phi->getIncomingValue(1);1296StartValue = Phi->getIncomingValue(0);1297}12981299BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);1300if (!BOp)1301return false;13021303Value *Addend = nullptr;1304if (BOp->getOpcode() == Instruction::FAdd) {1305if (BOp->getOperand(0) == Phi)1306Addend = BOp->getOperand(1);1307else if (BOp->getOperand(1) == Phi)1308Addend = BOp->getOperand(0);1309} else if (BOp->getOpcode() == Instruction::FSub)1310if (BOp->getOperand(0) == Phi)1311Addend = BOp->getOperand(1);13121313if (!Addend)1314return false;13151316// The addend should be loop invariant1317if (auto *I = dyn_cast<Instruction>(Addend))1318if (TheLoop->contains(I))1319return false;13201321// FP Step has unknown SCEV1322const SCEV *Step = SE->getUnknown(Addend);1323D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);1324return true;1325}13261327/// This function is called when we suspect that the update-chain of a phi node1328/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,1329/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime1330/// predicate P under which the SCEV expression for the phi can be the1331/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the1332/// cast instructions that are involved in the update-chain of this induction.1333/// A caller that adds the required runtime predicate can be free to drop these1334/// cast instructions, and compute the phi using \p AR (instead of some scev1335/// expression with casts).1336///1337/// For example, without a predicate the scev expression can take the following1338/// form:1339/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)1340///1341/// It corresponds to the following IR sequence:1342/// %for.body:1343/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]1344/// %casted_phi = "ExtTrunc i64 %x"1345/// %add = add i64 %casted_phi, %step1346///1347/// where %x is given in \p PN,1348/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,1349/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of1350/// several forms, for example, such as:1351/// ExtTrunc1: %casted_phi = and %x, 2^n-11352/// or:1353/// ExtTrunc2: %t = shl %x, m1354/// %casted_phi = ashr %t, m1355///1356/// If we are able to find such sequence, we return the instructions1357/// we found, namely %casted_phi and the instructions on its use-def chain up1358/// to the phi (not including the phi).1359static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,1360const SCEVUnknown *PhiScev,1361const SCEVAddRecExpr *AR,1362SmallVectorImpl<Instruction *> &CastInsts) {13631364assert(CastInsts.empty() && "CastInsts is expected to be empty.");1365auto *PN = cast<PHINode>(PhiScev->getValue());1366assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");1367const Loop *L = AR->getLoop();13681369// Find any cast instructions that participate in the def-use chain of1370// PhiScev in the loop.1371// FORNOW/TODO: We currently expect the def-use chain to include only1372// two-operand instructions, where one of the operands is an invariant.1373// createAddRecFromPHIWithCasts() currently does not support anything more1374// involved than that, so we keep the search simple. This can be1375// extended/generalized as needed.13761377auto getDef = [&](const Value *Val) -> Value * {1378const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);1379if (!BinOp)1380return nullptr;1381Value *Op0 = BinOp->getOperand(0);1382Value *Op1 = BinOp->getOperand(1);1383Value *Def = nullptr;1384if (L->isLoopInvariant(Op0))1385Def = Op1;1386else if (L->isLoopInvariant(Op1))1387Def = Op0;1388return Def;1389};13901391// Look for the instruction that defines the induction via the1392// loop backedge.1393BasicBlock *Latch = L->getLoopLatch();1394if (!Latch)1395return false;1396Value *Val = PN->getIncomingValueForBlock(Latch);1397if (!Val)1398return false;13991400// Follow the def-use chain until the induction phi is reached.1401// If on the way we encounter a Value that has the same SCEV Expr as the1402// phi node, we can consider the instructions we visit from that point1403// as part of the cast-sequence that can be ignored.1404bool InCastSequence = false;1405auto *Inst = dyn_cast<Instruction>(Val);1406while (Val != PN) {1407// If we encountered a phi node other than PN, or if we left the loop,1408// we bail out.1409if (!Inst || !L->contains(Inst)) {1410return false;1411}1412auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));1413if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))1414InCastSequence = true;1415if (InCastSequence) {1416// Only the last instruction in the cast sequence is expected to have1417// uses outside the induction def-use chain.1418if (!CastInsts.empty())1419if (!Inst->hasOneUse())1420return false;1421CastInsts.push_back(Inst);1422}1423Val = getDef(Val);1424if (!Val)1425return false;1426Inst = dyn_cast<Instruction>(Val);1427}14281429return InCastSequence;1430}14311432bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,1433PredicatedScalarEvolution &PSE,1434InductionDescriptor &D, bool Assume) {1435Type *PhiTy = Phi->getType();14361437// Handle integer and pointer inductions variables.1438// Now we handle also FP induction but not trying to make a1439// recurrent expression from the PHI node in-place.14401441if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&1442!PhiTy->isDoubleTy() && !PhiTy->isHalfTy())1443return false;14441445if (PhiTy->isFloatingPointTy())1446return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);14471448const SCEV *PhiScev = PSE.getSCEV(Phi);1449const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);14501451// We need this expression to be an AddRecExpr.1452if (Assume && !AR)1453AR = PSE.getAsAddRec(Phi);14541455if (!AR) {1456LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");1457return false;1458}14591460// Record any Cast instructions that participate in the induction update1461const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);1462// If we started from an UnknownSCEV, and managed to build an addRecurrence1463// only after enabling Assume with PSCEV, this means we may have encountered1464// cast instructions that required adding a runtime check in order to1465// guarantee the correctness of the AddRecurrence respresentation of the1466// induction.1467if (PhiScev != AR && SymbolicPhi) {1468SmallVector<Instruction *, 2> Casts;1469if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))1470return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);1471}14721473return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);1474}14751476bool InductionDescriptor::isInductionPHI(1477PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,1478InductionDescriptor &D, const SCEV *Expr,1479SmallVectorImpl<Instruction *> *CastsToIgnore) {1480Type *PhiTy = Phi->getType();1481// We only handle integer and pointer inductions variables.1482if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())1483return false;14841485// Check that the PHI is consecutive.1486const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);1487const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);14881489if (!AR) {1490LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");1491return false;1492}14931494if (AR->getLoop() != TheLoop) {1495// FIXME: We should treat this as a uniform. Unfortunately, we1496// don't currently know how to handled uniform PHIs.1497LLVM_DEBUG(1498dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");1499return false;1500}15011502// This function assumes that InductionPhi is called only on Phi nodes1503// present inside loop headers. Check for the same, and throw an assert if1504// the current Phi is not present inside the loop header.1505assert(Phi->getParent() == AR->getLoop()->getHeader()1506&& "Invalid Phi node, not present in loop header");15071508Value *StartValue =1509Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());15101511BasicBlock *Latch = AR->getLoop()->getLoopLatch();1512if (!Latch)1513return false;15141515const SCEV *Step = AR->getStepRecurrence(*SE);1516// Calculate the pointer stride and check if it is consecutive.1517// The stride may be a constant or a loop invariant integer value.1518const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);1519if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))1520return false;15211522if (PhiTy->isIntegerTy()) {1523BinaryOperator *BOp =1524dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));1525D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,1526CastsToIgnore);1527return true;1528}15291530assert(PhiTy->isPointerTy() && "The PHI must be a pointer");15311532// This allows induction variables w/non-constant steps.1533D = InductionDescriptor(StartValue, IK_PtrInduction, Step);1534return true;1535}153615371538