Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
35269 views
//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file9/// This file contains the declarations of the Vectorization Plan base classes:10/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual11/// VPBlockBase, together implementing a Hierarchical CFG;12/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained13/// within VPBasicBlocks;14/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that15/// also inherit from VPValue.16/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned17/// instruction;18/// 5. The VPlan class holding a candidate for vectorization;19/// 6. The VPlanPrinter class providing a way to print a plan in dot format;20/// These are documented in docs/VectorizationPlan.rst.21//22//===----------------------------------------------------------------------===//2324#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H2627#include "VPlanAnalysis.h"28#include "VPlanValue.h"29#include "llvm/ADT/DenseMap.h"30#include "llvm/ADT/MapVector.h"31#include "llvm/ADT/SmallBitVector.h"32#include "llvm/ADT/SmallPtrSet.h"33#include "llvm/ADT/SmallVector.h"34#include "llvm/ADT/Twine.h"35#include "llvm/ADT/ilist.h"36#include "llvm/ADT/ilist_node.h"37#include "llvm/Analysis/DomTreeUpdater.h"38#include "llvm/Analysis/IVDescriptors.h"39#include "llvm/Analysis/LoopInfo.h"40#include "llvm/Analysis/VectorUtils.h"41#include "llvm/IR/DebugLoc.h"42#include "llvm/IR/FMF.h"43#include "llvm/IR/Operator.h"44#include "llvm/Support/InstructionCost.h"45#include <algorithm>46#include <cassert>47#include <cstddef>48#include <string>4950namespace llvm {5152class BasicBlock;53class DominatorTree;54class InnerLoopVectorizer;55class IRBuilderBase;56class LoopInfo;57class raw_ostream;58class RecurrenceDescriptor;59class SCEV;60class Type;61class VPBasicBlock;62class VPRegionBlock;63class VPlan;64class VPReplicateRecipe;65class VPlanSlp;66class Value;67class LoopVectorizationCostModel;68class LoopVersioning;6970struct VPCostContext;7172namespace Intrinsic {73typedef unsigned ID;74}7576/// Returns a calculation for the total number of elements for a given \p VF.77/// For fixed width vectors this value is a constant, whereas for scalable78/// vectors it is an expression determined at runtime.79Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);8081/// Return a value for Step multiplied by VF.82Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,83int64_t Step);8485const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,86Loop *CurLoop = nullptr);8788/// A helper function that returns the reciprocal of the block probability of89/// predicated blocks. If we return X, we are assuming the predicated block90/// will execute once for every X iterations of the loop header.91///92/// TODO: We should use actual block probability here, if available. Currently,93/// we always assume predicated blocks have a 50% chance of executing.94inline unsigned getReciprocalPredBlockProb() { return 2; }9596/// A range of powers-of-2 vectorization factors with fixed start and97/// adjustable end. The range includes start and excludes end, e.g.,:98/// [1, 16) = {1, 2, 4, 8}99struct VFRange {100// A power of 2.101const ElementCount Start;102103// A power of 2. If End <= Start range is empty.104ElementCount End;105106bool isEmpty() const {107return End.getKnownMinValue() <= Start.getKnownMinValue();108}109110VFRange(const ElementCount &Start, const ElementCount &End)111: Start(Start), End(End) {112assert(Start.isScalable() == End.isScalable() &&113"Both Start and End should have the same scalable flag");114assert(isPowerOf2_32(Start.getKnownMinValue()) &&115"Expected Start to be a power of 2");116assert(isPowerOf2_32(End.getKnownMinValue()) &&117"Expected End to be a power of 2");118}119120/// Iterator to iterate over vectorization factors in a VFRange.121class iterator122: public iterator_facade_base<iterator, std::forward_iterator_tag,123ElementCount> {124ElementCount VF;125126public:127iterator(ElementCount VF) : VF(VF) {}128129bool operator==(const iterator &Other) const { return VF == Other.VF; }130131ElementCount operator*() const { return VF; }132133iterator &operator++() {134VF *= 2;135return *this;136}137};138139iterator begin() { return iterator(Start); }140iterator end() {141assert(isPowerOf2_32(End.getKnownMinValue()));142return iterator(End);143}144};145146using VPlanPtr = std::unique_ptr<VPlan>;147148/// In what follows, the term "input IR" refers to code that is fed into the149/// vectorizer whereas the term "output IR" refers to code that is generated by150/// the vectorizer.151152/// VPLane provides a way to access lanes in both fixed width and scalable153/// vectors, where for the latter the lane index sometimes needs calculating154/// as a runtime expression.155class VPLane {156public:157/// Kind describes how to interpret Lane.158enum class Kind : uint8_t {159/// For First, Lane is the index into the first N elements of a160/// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.161First,162/// For ScalableLast, Lane is the offset from the start of the last163/// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For164/// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of165/// 1 corresponds to `((vscale - 1) * N) + 1`, etc.166ScalableLast167};168169private:170/// in [0..VF)171unsigned Lane;172173/// Indicates how the Lane should be interpreted, as described above.174Kind LaneKind;175176public:177VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}178179static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }180181static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {182assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&183"trying to extract with invalid offset");184unsigned LaneOffset = VF.getKnownMinValue() - Offset;185Kind LaneKind;186if (VF.isScalable())187// In this case 'LaneOffset' refers to the offset from the start of the188// last subvector with VF.getKnownMinValue() elements.189LaneKind = VPLane::Kind::ScalableLast;190else191LaneKind = VPLane::Kind::First;192return VPLane(LaneOffset, LaneKind);193}194195static VPLane getLastLaneForVF(const ElementCount &VF) {196return getLaneFromEnd(VF, 1);197}198199/// Returns a compile-time known value for the lane index and asserts if the200/// lane can only be calculated at runtime.201unsigned getKnownLane() const {202assert(LaneKind == Kind::First);203return Lane;204}205206/// Returns an expression describing the lane index that can be used at207/// runtime.208Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;209210/// Returns the Kind of lane offset.211Kind getKind() const { return LaneKind; }212213/// Returns true if this is the first lane of the whole vector.214bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }215216/// Maps the lane to a cache index based on \p VF.217unsigned mapToCacheIndex(const ElementCount &VF) const {218switch (LaneKind) {219case VPLane::Kind::ScalableLast:220assert(VF.isScalable() && Lane < VF.getKnownMinValue());221return VF.getKnownMinValue() + Lane;222default:223assert(Lane < VF.getKnownMinValue());224return Lane;225}226}227228/// Returns the maxmimum number of lanes that we are able to consider229/// caching for \p VF.230static unsigned getNumCachedLanes(const ElementCount &VF) {231return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);232}233};234235/// VPIteration represents a single point in the iteration space of the output236/// (vectorized and/or unrolled) IR loop.237struct VPIteration {238/// in [0..UF)239unsigned Part;240241VPLane Lane;242243VPIteration(unsigned Part, unsigned Lane,244VPLane::Kind Kind = VPLane::Kind::First)245: Part(Part), Lane(Lane, Kind) {}246247VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}248249bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }250};251252/// VPTransformState holds information passed down when "executing" a VPlan,253/// needed for generating the output IR.254struct VPTransformState {255VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,256DominatorTree *DT, IRBuilderBase &Builder,257InnerLoopVectorizer *ILV, VPlan *Plan, LLVMContext &Ctx);258259/// The chosen Vectorization and Unroll Factors of the loop being vectorized.260ElementCount VF;261unsigned UF;262263/// Hold the indices to generate specific scalar instructions. Null indicates264/// that all instances are to be generated, using either scalar or vector265/// instructions.266std::optional<VPIteration> Instance;267268struct DataState {269/// A type for vectorized values in the new loop. Each value from the270/// original loop, when vectorized, is represented by UF vector values in271/// the new unrolled loop, where UF is the unroll factor.272typedef SmallVector<Value *, 2> PerPartValuesTy;273274DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;275276using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;277DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;278} Data;279280/// Get the generated vector Value for a given VPValue \p Def and a given \p281/// Part if \p IsScalar is false, otherwise return the generated scalar282/// for \p Part. \See set.283Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);284285/// Get the generated Value for a given VPValue and given Part and Lane.286Value *get(VPValue *Def, const VPIteration &Instance);287288bool hasVectorValue(VPValue *Def, unsigned Part) {289auto I = Data.PerPartOutput.find(Def);290return I != Data.PerPartOutput.end() && Part < I->second.size() &&291I->second[Part];292}293294bool hasScalarValue(VPValue *Def, VPIteration Instance) {295auto I = Data.PerPartScalars.find(Def);296if (I == Data.PerPartScalars.end())297return false;298unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);299return Instance.Part < I->second.size() &&300CacheIdx < I->second[Instance.Part].size() &&301I->second[Instance.Part][CacheIdx];302}303304/// Set the generated vector Value for a given VPValue and a given Part, if \p305/// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).306void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {307if (IsScalar) {308set(Def, V, VPIteration(Part, 0));309return;310}311assert((VF.isScalar() || V->getType()->isVectorTy()) &&312"scalar values must be stored as (Part, 0)");313if (!Data.PerPartOutput.count(Def)) {314DataState::PerPartValuesTy Entry(UF);315Data.PerPartOutput[Def] = Entry;316}317Data.PerPartOutput[Def][Part] = V;318}319320/// Reset an existing vector value for \p Def and a given \p Part.321void reset(VPValue *Def, Value *V, unsigned Part) {322auto Iter = Data.PerPartOutput.find(Def);323assert(Iter != Data.PerPartOutput.end() &&324"need to overwrite existing value");325Iter->second[Part] = V;326}327328/// Set the generated scalar \p V for \p Def and the given \p Instance.329void set(VPValue *Def, Value *V, const VPIteration &Instance) {330auto Iter = Data.PerPartScalars.insert({Def, {}});331auto &PerPartVec = Iter.first->second;332if (PerPartVec.size() <= Instance.Part)333PerPartVec.resize(Instance.Part + 1);334auto &Scalars = PerPartVec[Instance.Part];335unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);336if (Scalars.size() <= CacheIdx)337Scalars.resize(CacheIdx + 1);338assert(!Scalars[CacheIdx] && "should overwrite existing value");339Scalars[CacheIdx] = V;340}341342/// Reset an existing scalar value for \p Def and a given \p Instance.343void reset(VPValue *Def, Value *V, const VPIteration &Instance) {344auto Iter = Data.PerPartScalars.find(Def);345assert(Iter != Data.PerPartScalars.end() &&346"need to overwrite existing value");347assert(Instance.Part < Iter->second.size() &&348"need to overwrite existing value");349unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);350assert(CacheIdx < Iter->second[Instance.Part].size() &&351"need to overwrite existing value");352Iter->second[Instance.Part][CacheIdx] = V;353}354355/// Add additional metadata to \p To that was not present on \p Orig.356///357/// Currently this is used to add the noalias annotations based on the358/// inserted memchecks. Use this for instructions that are *cloned* into the359/// vector loop.360void addNewMetadata(Instruction *To, const Instruction *Orig);361362/// Add metadata from one instruction to another.363///364/// This includes both the original MDs from \p From and additional ones (\see365/// addNewMetadata). Use this for *newly created* instructions in the vector366/// loop.367void addMetadata(Value *To, Instruction *From);368369/// Set the debug location in the builder using the debug location \p DL.370void setDebugLocFrom(DebugLoc DL);371372/// Construct the vector value of a scalarized value \p V one lane at a time.373void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance);374375/// Hold state information used when constructing the CFG of the output IR,376/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.377struct CFGState {378/// The previous VPBasicBlock visited. Initially set to null.379VPBasicBlock *PrevVPBB = nullptr;380381/// The previous IR BasicBlock created or used. Initially set to the new382/// header BasicBlock.383BasicBlock *PrevBB = nullptr;384385/// The last IR BasicBlock in the output IR. Set to the exit block of the386/// vector loop.387BasicBlock *ExitBB = nullptr;388389/// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case390/// of replication, maps the BasicBlock of the last replica created.391SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;392393/// Updater for the DominatorTree.394DomTreeUpdater DTU;395396CFGState(DominatorTree *DT)397: DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}398399/// Returns the BasicBlock* mapped to the pre-header of the loop region400/// containing \p R.401BasicBlock *getPreheaderBBFor(VPRecipeBase *R);402} CFG;403404/// Hold a pointer to LoopInfo to register new basic blocks in the loop.405LoopInfo *LI;406407/// Hold a reference to the IRBuilder used to generate output IR code.408IRBuilderBase &Builder;409410/// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.411InnerLoopVectorizer *ILV;412413/// Pointer to the VPlan code is generated for.414VPlan *Plan;415416/// The loop object for the current parent region, or nullptr.417Loop *CurrentVectorLoop = nullptr;418419/// LoopVersioning. It's only set up (non-null) if memchecks were420/// used.421///422/// This is currently only used to add no-alias metadata based on the423/// memchecks. The actually versioning is performed manually.424LoopVersioning *LVer = nullptr;425426/// Map SCEVs to their expanded values. Populated when executing427/// VPExpandSCEVRecipes.428DenseMap<const SCEV *, Value *> ExpandedSCEVs;429430/// VPlan-based type analysis.431VPTypeAnalysis TypeAnalysis;432};433434/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.435/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.436class VPBlockBase {437friend class VPBlockUtils;438439const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).440441/// An optional name for the block.442std::string Name;443444/// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if445/// it is a topmost VPBlockBase.446VPRegionBlock *Parent = nullptr;447448/// List of predecessor blocks.449SmallVector<VPBlockBase *, 1> Predecessors;450451/// List of successor blocks.452SmallVector<VPBlockBase *, 1> Successors;453454/// VPlan containing the block. Can only be set on the entry block of the455/// plan.456VPlan *Plan = nullptr;457458/// Add \p Successor as the last successor to this block.459void appendSuccessor(VPBlockBase *Successor) {460assert(Successor && "Cannot add nullptr successor!");461Successors.push_back(Successor);462}463464/// Add \p Predecessor as the last predecessor to this block.465void appendPredecessor(VPBlockBase *Predecessor) {466assert(Predecessor && "Cannot add nullptr predecessor!");467Predecessors.push_back(Predecessor);468}469470/// Remove \p Predecessor from the predecessors of this block.471void removePredecessor(VPBlockBase *Predecessor) {472auto Pos = find(Predecessors, Predecessor);473assert(Pos && "Predecessor does not exist");474Predecessors.erase(Pos);475}476477/// Remove \p Successor from the successors of this block.478void removeSuccessor(VPBlockBase *Successor) {479auto Pos = find(Successors, Successor);480assert(Pos && "Successor does not exist");481Successors.erase(Pos);482}483484protected:485VPBlockBase(const unsigned char SC, const std::string &N)486: SubclassID(SC), Name(N) {}487488public:489/// An enumeration for keeping track of the concrete subclass of VPBlockBase490/// that are actually instantiated. Values of this enumeration are kept in the491/// SubclassID field of the VPBlockBase objects. They are used for concrete492/// type identification.493using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };494495using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;496497virtual ~VPBlockBase() = default;498499const std::string &getName() const { return Name; }500501void setName(const Twine &newName) { Name = newName.str(); }502503/// \return an ID for the concrete type of this object.504/// This is used to implement the classof checks. This should not be used505/// for any other purpose, as the values may change as LLVM evolves.506unsigned getVPBlockID() const { return SubclassID; }507508VPRegionBlock *getParent() { return Parent; }509const VPRegionBlock *getParent() const { return Parent; }510511/// \return A pointer to the plan containing the current block.512VPlan *getPlan();513const VPlan *getPlan() const;514515/// Sets the pointer of the plan containing the block. The block must be the516/// entry block into the VPlan.517void setPlan(VPlan *ParentPlan);518519void setParent(VPRegionBlock *P) { Parent = P; }520521/// \return the VPBasicBlock that is the entry of this VPBlockBase,522/// recursively, if the latter is a VPRegionBlock. Otherwise, if this523/// VPBlockBase is a VPBasicBlock, it is returned.524const VPBasicBlock *getEntryBasicBlock() const;525VPBasicBlock *getEntryBasicBlock();526527/// \return the VPBasicBlock that is the exiting this VPBlockBase,528/// recursively, if the latter is a VPRegionBlock. Otherwise, if this529/// VPBlockBase is a VPBasicBlock, it is returned.530const VPBasicBlock *getExitingBasicBlock() const;531VPBasicBlock *getExitingBasicBlock();532533const VPBlocksTy &getSuccessors() const { return Successors; }534VPBlocksTy &getSuccessors() { return Successors; }535536iterator_range<VPBlockBase **> successors() { return Successors; }537538const VPBlocksTy &getPredecessors() const { return Predecessors; }539VPBlocksTy &getPredecessors() { return Predecessors; }540541/// \return the successor of this VPBlockBase if it has a single successor.542/// Otherwise return a null pointer.543VPBlockBase *getSingleSuccessor() const {544return (Successors.size() == 1 ? *Successors.begin() : nullptr);545}546547/// \return the predecessor of this VPBlockBase if it has a single548/// predecessor. Otherwise return a null pointer.549VPBlockBase *getSinglePredecessor() const {550return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);551}552553size_t getNumSuccessors() const { return Successors.size(); }554size_t getNumPredecessors() const { return Predecessors.size(); }555556/// An Enclosing Block of a block B is any block containing B, including B557/// itself. \return the closest enclosing block starting from "this", which558/// has successors. \return the root enclosing block if all enclosing blocks559/// have no successors.560VPBlockBase *getEnclosingBlockWithSuccessors();561562/// \return the closest enclosing block starting from "this", which has563/// predecessors. \return the root enclosing block if all enclosing blocks564/// have no predecessors.565VPBlockBase *getEnclosingBlockWithPredecessors();566567/// \return the successors either attached directly to this VPBlockBase or, if568/// this VPBlockBase is the exit block of a VPRegionBlock and has no569/// successors of its own, search recursively for the first enclosing570/// VPRegionBlock that has successors and return them. If no such571/// VPRegionBlock exists, return the (empty) successors of the topmost572/// VPBlockBase reached.573const VPBlocksTy &getHierarchicalSuccessors() {574return getEnclosingBlockWithSuccessors()->getSuccessors();575}576577/// \return the hierarchical successor of this VPBlockBase if it has a single578/// hierarchical successor. Otherwise return a null pointer.579VPBlockBase *getSingleHierarchicalSuccessor() {580return getEnclosingBlockWithSuccessors()->getSingleSuccessor();581}582583/// \return the predecessors either attached directly to this VPBlockBase or,584/// if this VPBlockBase is the entry block of a VPRegionBlock and has no585/// predecessors of its own, search recursively for the first enclosing586/// VPRegionBlock that has predecessors and return them. If no such587/// VPRegionBlock exists, return the (empty) predecessors of the topmost588/// VPBlockBase reached.589const VPBlocksTy &getHierarchicalPredecessors() {590return getEnclosingBlockWithPredecessors()->getPredecessors();591}592593/// \return the hierarchical predecessor of this VPBlockBase if it has a594/// single hierarchical predecessor. Otherwise return a null pointer.595VPBlockBase *getSingleHierarchicalPredecessor() {596return getEnclosingBlockWithPredecessors()->getSinglePredecessor();597}598599/// Set a given VPBlockBase \p Successor as the single successor of this600/// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.601/// This VPBlockBase must have no successors.602void setOneSuccessor(VPBlockBase *Successor) {603assert(Successors.empty() && "Setting one successor when others exist.");604assert(Successor->getParent() == getParent() &&605"connected blocks must have the same parent");606appendSuccessor(Successor);607}608609/// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two610/// successors of this VPBlockBase. This VPBlockBase is not added as611/// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no612/// successors.613void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {614assert(Successors.empty() && "Setting two successors when others exist.");615appendSuccessor(IfTrue);616appendSuccessor(IfFalse);617}618619/// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.620/// This VPBlockBase must have no predecessors. This VPBlockBase is not added621/// as successor of any VPBasicBlock in \p NewPreds.622void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) {623assert(Predecessors.empty() && "Block predecessors already set.");624for (auto *Pred : NewPreds)625appendPredecessor(Pred);626}627628/// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.629/// This VPBlockBase must have no successors. This VPBlockBase is not added630/// as predecessor of any VPBasicBlock in \p NewSuccs.631void setSuccessors(ArrayRef<VPBlockBase *> NewSuccs) {632assert(Successors.empty() && "Block successors already set.");633for (auto *Succ : NewSuccs)634appendSuccessor(Succ);635}636637/// Remove all the predecessor of this block.638void clearPredecessors() { Predecessors.clear(); }639640/// Remove all the successors of this block.641void clearSuccessors() { Successors.clear(); }642643/// The method which generates the output IR that correspond to this644/// VPBlockBase, thereby "executing" the VPlan.645virtual void execute(VPTransformState *State) = 0;646647/// Return the cost of the block.648virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;649650/// Delete all blocks reachable from a given VPBlockBase, inclusive.651static void deleteCFG(VPBlockBase *Entry);652653/// Return true if it is legal to hoist instructions into this block.654bool isLegalToHoistInto() {655// There are currently no constraints that prevent an instruction to be656// hoisted into a VPBlockBase.657return true;658}659660/// Replace all operands of VPUsers in the block with \p NewValue and also661/// replaces all uses of VPValues defined in the block with NewValue.662virtual void dropAllReferences(VPValue *NewValue) = 0;663664#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)665void printAsOperand(raw_ostream &OS, bool PrintType) const {666OS << getName();667}668669/// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines670/// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using671/// consequtive numbers.672///673/// Note that the numbering is applied to the whole VPlan, so printing674/// individual blocks is consistent with the whole VPlan printing.675virtual void print(raw_ostream &O, const Twine &Indent,676VPSlotTracker &SlotTracker) const = 0;677678/// Print plain-text dump of this VPlan to \p O.679void print(raw_ostream &O) const {680VPSlotTracker SlotTracker(getPlan());681print(O, "", SlotTracker);682}683684/// Print the successors of this block to \p O, prefixing all lines with \p685/// Indent.686void printSuccessors(raw_ostream &O, const Twine &Indent) const;687688/// Dump this VPBlockBase to dbgs().689LLVM_DUMP_METHOD void dump() const { print(dbgs()); }690#endif691692/// Clone the current block and it's recipes without updating the operands of693/// the cloned recipes, including all blocks in the single-entry single-exit694/// region for VPRegionBlocks.695virtual VPBlockBase *clone() = 0;696};697698/// A value that is used outside the VPlan. The operand of the user needs to be699/// added to the associated phi node. The incoming block from VPlan is700/// determined by where the VPValue is defined: if it is defined by a recipe701/// outside a region, its parent block is used, otherwise the middle block is702/// used.703class VPLiveOut : public VPUser {704PHINode *Phi;705706public:707VPLiveOut(PHINode *Phi, VPValue *Op)708: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}709710static inline bool classof(const VPUser *U) {711return U->getVPUserID() == VPUser::VPUserID::LiveOut;712}713714/// Fix the wrapped phi node. This means adding an incoming value to exit715/// block phi's from the vector loop via middle block (values from scalar loop716/// already reach these phi's), and updating the value to scalar header phi's717/// from the scalar preheader.718void fixPhi(VPlan &Plan, VPTransformState &State);719720/// Returns true if the VPLiveOut uses scalars of operand \p Op.721bool usesScalars(const VPValue *Op) const override {722assert(is_contained(operands(), Op) &&723"Op must be an operand of the recipe");724return true;725}726727PHINode *getPhi() const { return Phi; }728729#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)730/// Print the VPLiveOut to \p O.731void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;732#endif733};734735/// Struct to hold various analysis needed for cost computations.736struct VPCostContext {737const TargetTransformInfo &TTI;738VPTypeAnalysis Types;739LLVMContext &LLVMCtx;740LoopVectorizationCostModel &CM;741SmallPtrSet<Instruction *, 8> SkipCostComputation;742743VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy,744LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)745: TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}746747/// Return the cost for \p UI with \p VF using the legacy cost model as748/// fallback until computing the cost of all recipes migrates to VPlan.749InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;750751/// Return true if the cost for \p UI shouldn't be computed, e.g. because it752/// has already been pre-computed.753bool skipCostComputation(Instruction *UI, bool IsVector) const;754};755756/// VPRecipeBase is a base class modeling a sequence of one or more output IR757/// instructions. VPRecipeBase owns the VPValues it defines through VPDef758/// and is responsible for deleting its defined values. Single-value759/// recipes must inherit from VPSingleDef instead of inheriting from both760/// VPRecipeBase and VPValue separately.761class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,762public VPDef,763public VPUser {764friend VPBasicBlock;765friend class VPBlockUtils;766767/// Each VPRecipe belongs to a single VPBasicBlock.768VPBasicBlock *Parent = nullptr;769770/// The debug location for the recipe.771DebugLoc DL;772773public:774VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,775DebugLoc DL = {})776: VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}777778template <typename IterT>779VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands,780DebugLoc DL = {})781: VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}782virtual ~VPRecipeBase() = default;783784/// Clone the current recipe.785virtual VPRecipeBase *clone() = 0;786787/// \return the VPBasicBlock which this VPRecipe belongs to.788VPBasicBlock *getParent() { return Parent; }789const VPBasicBlock *getParent() const { return Parent; }790791/// The method which generates the output IR instructions that correspond to792/// this VPRecipe, thereby "executing" the VPlan.793virtual void execute(VPTransformState &State) = 0;794795/// Return the cost of this recipe, taking into account if the cost796/// computation should be skipped and the ForceTargetInstructionCost flag.797/// Also takes care of printing the cost for debugging.798virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx);799800/// Insert an unlinked recipe into a basic block immediately before801/// the specified recipe.802void insertBefore(VPRecipeBase *InsertPos);803/// Insert an unlinked recipe into \p BB immediately before the insertion804/// point \p IP;805void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);806807/// Insert an unlinked Recipe into a basic block immediately after808/// the specified Recipe.809void insertAfter(VPRecipeBase *InsertPos);810811/// Unlink this recipe from its current VPBasicBlock and insert it into812/// the VPBasicBlock that MovePos lives in, right after MovePos.813void moveAfter(VPRecipeBase *MovePos);814815/// Unlink this recipe and insert into BB before I.816///817/// \pre I is a valid iterator into BB.818void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);819820/// This method unlinks 'this' from the containing basic block, but does not821/// delete it.822void removeFromParent();823824/// This method unlinks 'this' from the containing basic block and deletes it.825///826/// \returns an iterator pointing to the element after the erased one827iplist<VPRecipeBase>::iterator eraseFromParent();828829/// Method to support type inquiry through isa, cast, and dyn_cast.830static inline bool classof(const VPDef *D) {831// All VPDefs are also VPRecipeBases.832return true;833}834835static inline bool classof(const VPUser *U) {836return U->getVPUserID() == VPUser::VPUserID::Recipe;837}838839/// Returns true if the recipe may have side-effects.840bool mayHaveSideEffects() const;841842/// Returns true for PHI-like recipes.843bool isPhi() const {844return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;845}846847/// Returns true if the recipe may read from memory.848bool mayReadFromMemory() const;849850/// Returns true if the recipe may write to memory.851bool mayWriteToMemory() const;852853/// Returns true if the recipe may read from or write to memory.854bool mayReadOrWriteMemory() const {855return mayReadFromMemory() || mayWriteToMemory();856}857858/// Returns the debug location of the recipe.859DebugLoc getDebugLoc() const { return DL; }860861protected:862/// Compute the cost of this recipe using the legacy cost model and the863/// underlying instructions.864InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;865};866867// Helper macro to define common classof implementations for recipes.868#define VP_CLASSOF_IMPL(VPDefID) \869static inline bool classof(const VPDef *D) { \870return D->getVPDefID() == VPDefID; \871} \872static inline bool classof(const VPValue *V) { \873auto *R = V->getDefiningRecipe(); \874return R && R->getVPDefID() == VPDefID; \875} \876static inline bool classof(const VPUser *U) { \877auto *R = dyn_cast<VPRecipeBase>(U); \878return R && R->getVPDefID() == VPDefID; \879} \880static inline bool classof(const VPRecipeBase *R) { \881return R->getVPDefID() == VPDefID; \882} \883static inline bool classof(const VPSingleDefRecipe *R) { \884return R->getVPDefID() == VPDefID; \885}886887/// VPSingleDef is a base class for recipes for modeling a sequence of one or888/// more output IR that define a single result VPValue.889/// Note that VPRecipeBase must be inherited from before VPValue.890class VPSingleDefRecipe : public VPRecipeBase, public VPValue {891public:892template <typename IterT>893VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})894: VPRecipeBase(SC, Operands, DL), VPValue(this) {}895896VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,897DebugLoc DL = {})898: VPRecipeBase(SC, Operands, DL), VPValue(this) {}899900template <typename IterT>901VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,902DebugLoc DL = {})903: VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}904905static inline bool classof(const VPRecipeBase *R) {906switch (R->getVPDefID()) {907case VPRecipeBase::VPDerivedIVSC:908case VPRecipeBase::VPEVLBasedIVPHISC:909case VPRecipeBase::VPExpandSCEVSC:910case VPRecipeBase::VPInstructionSC:911case VPRecipeBase::VPReductionEVLSC:912case VPRecipeBase::VPReductionSC:913case VPRecipeBase::VPReplicateSC:914case VPRecipeBase::VPScalarIVStepsSC:915case VPRecipeBase::VPVectorPointerSC:916case VPRecipeBase::VPWidenCallSC:917case VPRecipeBase::VPWidenCanonicalIVSC:918case VPRecipeBase::VPWidenCastSC:919case VPRecipeBase::VPWidenGEPSC:920case VPRecipeBase::VPWidenSC:921case VPRecipeBase::VPWidenSelectSC:922case VPRecipeBase::VPBlendSC:923case VPRecipeBase::VPPredInstPHISC:924case VPRecipeBase::VPCanonicalIVPHISC:925case VPRecipeBase::VPActiveLaneMaskPHISC:926case VPRecipeBase::VPFirstOrderRecurrencePHISC:927case VPRecipeBase::VPWidenPHISC:928case VPRecipeBase::VPWidenIntOrFpInductionSC:929case VPRecipeBase::VPWidenPointerInductionSC:930case VPRecipeBase::VPReductionPHISC:931case VPRecipeBase::VPScalarCastSC:932return true;933case VPRecipeBase::VPInterleaveSC:934case VPRecipeBase::VPBranchOnMaskSC:935case VPRecipeBase::VPWidenLoadEVLSC:936case VPRecipeBase::VPWidenLoadSC:937case VPRecipeBase::VPWidenStoreEVLSC:938case VPRecipeBase::VPWidenStoreSC:939// TODO: Widened stores don't define a value, but widened loads do. Split940// the recipes to be able to make widened loads VPSingleDefRecipes.941return false;942}943llvm_unreachable("Unhandled VPDefID");944}945946static inline bool classof(const VPUser *U) {947auto *R = dyn_cast<VPRecipeBase>(U);948return R && classof(R);949}950951virtual VPSingleDefRecipe *clone() override = 0;952953/// Returns the underlying instruction.954Instruction *getUnderlyingInstr() {955return cast<Instruction>(getUnderlyingValue());956}957const Instruction *getUnderlyingInstr() const {958return cast<Instruction>(getUnderlyingValue());959}960};961962/// Class to record LLVM IR flag for a recipe along with it.963class VPRecipeWithIRFlags : public VPSingleDefRecipe {964enum class OperationType : unsigned char {965Cmp,966OverflowingBinOp,967DisjointOp,968PossiblyExactOp,969GEPOp,970FPMathOp,971NonNegOp,972Other973};974975public:976struct WrapFlagsTy {977char HasNUW : 1;978char HasNSW : 1;979980WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}981};982983struct DisjointFlagsTy {984char IsDisjoint : 1;985DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}986};987988protected:989struct GEPFlagsTy {990char IsInBounds : 1;991GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}992};993994private:995struct ExactFlagsTy {996char IsExact : 1;997};998struct NonNegFlagsTy {999char NonNeg : 1;1000};1001struct FastMathFlagsTy {1002char AllowReassoc : 1;1003char NoNaNs : 1;1004char NoInfs : 1;1005char NoSignedZeros : 1;1006char AllowReciprocal : 1;1007char AllowContract : 1;1008char ApproxFunc : 1;10091010FastMathFlagsTy(const FastMathFlags &FMF);1011};10121013OperationType OpType;10141015union {1016CmpInst::Predicate CmpPredicate;1017WrapFlagsTy WrapFlags;1018DisjointFlagsTy DisjointFlags;1019ExactFlagsTy ExactFlags;1020GEPFlagsTy GEPFlags;1021NonNegFlagsTy NonNegFlags;1022FastMathFlagsTy FMFs;1023unsigned AllFlags;1024};10251026protected:1027void transferFlags(VPRecipeWithIRFlags &Other) {1028OpType = Other.OpType;1029AllFlags = Other.AllFlags;1030}10311032public:1033template <typename IterT>1034VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})1035: VPSingleDefRecipe(SC, Operands, DL) {1036OpType = OperationType::Other;1037AllFlags = 0;1038}10391040template <typename IterT>1041VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)1042: VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()) {1043if (auto *Op = dyn_cast<CmpInst>(&I)) {1044OpType = OperationType::Cmp;1045CmpPredicate = Op->getPredicate();1046} else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {1047OpType = OperationType::DisjointOp;1048DisjointFlags.IsDisjoint = Op->isDisjoint();1049} else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {1050OpType = OperationType::OverflowingBinOp;1051WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};1052} else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {1053OpType = OperationType::PossiblyExactOp;1054ExactFlags.IsExact = Op->isExact();1055} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {1056OpType = OperationType::GEPOp;1057GEPFlags.IsInBounds = GEP->isInBounds();1058} else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {1059OpType = OperationType::NonNegOp;1060NonNegFlags.NonNeg = PNNI->hasNonNeg();1061} else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {1062OpType = OperationType::FPMathOp;1063FMFs = Op->getFastMathFlags();1064} else {1065OpType = OperationType::Other;1066AllFlags = 0;1067}1068}10691070template <typename IterT>1071VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,1072CmpInst::Predicate Pred, DebugLoc DL = {})1073: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),1074CmpPredicate(Pred) {}10751076template <typename IterT>1077VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,1078WrapFlagsTy WrapFlags, DebugLoc DL = {})1079: VPSingleDefRecipe(SC, Operands, DL),1080OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}10811082template <typename IterT>1083VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,1084FastMathFlags FMFs, DebugLoc DL = {})1085: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),1086FMFs(FMFs) {}10871088template <typename IterT>1089VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,1090DisjointFlagsTy DisjointFlags, DebugLoc DL = {})1091: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),1092DisjointFlags(DisjointFlags) {}10931094protected:1095template <typename IterT>1096VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,1097GEPFlagsTy GEPFlags, DebugLoc DL = {})1098: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),1099GEPFlags(GEPFlags) {}11001101public:1102static inline bool classof(const VPRecipeBase *R) {1103return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||1104R->getVPDefID() == VPRecipeBase::VPWidenSC ||1105R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||1106R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||1107R->getVPDefID() == VPRecipeBase::VPReplicateSC ||1108R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;1109}11101111static inline bool classof(const VPUser *U) {1112auto *R = dyn_cast<VPRecipeBase>(U);1113return R && classof(R);1114}11151116/// Drop all poison-generating flags.1117void dropPoisonGeneratingFlags() {1118// NOTE: This needs to be kept in-sync with1119// Instruction::dropPoisonGeneratingFlags.1120switch (OpType) {1121case OperationType::OverflowingBinOp:1122WrapFlags.HasNUW = false;1123WrapFlags.HasNSW = false;1124break;1125case OperationType::DisjointOp:1126DisjointFlags.IsDisjoint = false;1127break;1128case OperationType::PossiblyExactOp:1129ExactFlags.IsExact = false;1130break;1131case OperationType::GEPOp:1132GEPFlags.IsInBounds = false;1133break;1134case OperationType::FPMathOp:1135FMFs.NoNaNs = false;1136FMFs.NoInfs = false;1137break;1138case OperationType::NonNegOp:1139NonNegFlags.NonNeg = false;1140break;1141case OperationType::Cmp:1142case OperationType::Other:1143break;1144}1145}11461147/// Set the IR flags for \p I.1148void setFlags(Instruction *I) const {1149switch (OpType) {1150case OperationType::OverflowingBinOp:1151I->setHasNoUnsignedWrap(WrapFlags.HasNUW);1152I->setHasNoSignedWrap(WrapFlags.HasNSW);1153break;1154case OperationType::DisjointOp:1155cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);1156break;1157case OperationType::PossiblyExactOp:1158I->setIsExact(ExactFlags.IsExact);1159break;1160case OperationType::GEPOp:1161// TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.1162cast<GetElementPtrInst>(I)->setNoWrapFlags(1163GEPFlags.IsInBounds ? GEPNoWrapFlags::inBounds()1164: GEPNoWrapFlags::none());1165break;1166case OperationType::FPMathOp:1167I->setHasAllowReassoc(FMFs.AllowReassoc);1168I->setHasNoNaNs(FMFs.NoNaNs);1169I->setHasNoInfs(FMFs.NoInfs);1170I->setHasNoSignedZeros(FMFs.NoSignedZeros);1171I->setHasAllowReciprocal(FMFs.AllowReciprocal);1172I->setHasAllowContract(FMFs.AllowContract);1173I->setHasApproxFunc(FMFs.ApproxFunc);1174break;1175case OperationType::NonNegOp:1176I->setNonNeg(NonNegFlags.NonNeg);1177break;1178case OperationType::Cmp:1179case OperationType::Other:1180break;1181}1182}11831184CmpInst::Predicate getPredicate() const {1185assert(OpType == OperationType::Cmp &&1186"recipe doesn't have a compare predicate");1187return CmpPredicate;1188}11891190bool isInBounds() const {1191assert(OpType == OperationType::GEPOp &&1192"recipe doesn't have inbounds flag");1193return GEPFlags.IsInBounds;1194}11951196/// Returns true if the recipe has fast-math flags.1197bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }11981199FastMathFlags getFastMathFlags() const;12001201bool hasNoUnsignedWrap() const {1202assert(OpType == OperationType::OverflowingBinOp &&1203"recipe doesn't have a NUW flag");1204return WrapFlags.HasNUW;1205}12061207bool hasNoSignedWrap() const {1208assert(OpType == OperationType::OverflowingBinOp &&1209"recipe doesn't have a NSW flag");1210return WrapFlags.HasNSW;1211}12121213bool isDisjoint() const {1214assert(OpType == OperationType::DisjointOp &&1215"recipe cannot have a disjoing flag");1216return DisjointFlags.IsDisjoint;1217}12181219#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1220void printFlags(raw_ostream &O) const;1221#endif1222};12231224/// This is a concrete Recipe that models a single VPlan-level instruction.1225/// While as any Recipe it may generate a sequence of IR instructions when1226/// executed, these instructions would always form a single-def expression as1227/// the VPInstruction is also a single def-use vertex.1228class VPInstruction : public VPRecipeWithIRFlags {1229friend class VPlanSlp;12301231public:1232/// VPlan opcodes, extending LLVM IR with idiomatics instructions.1233enum {1234FirstOrderRecurrenceSplice =1235Instruction::OtherOpsEnd + 1, // Combines the incoming and previous1236// values of a first-order recurrence.1237Not,1238SLPLoad,1239SLPStore,1240ActiveLaneMask,1241ExplicitVectorLength,1242/// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.1243/// The first operand is the incoming value from the predecessor in VPlan,1244/// the second operand is the incoming value for all other predecessors1245/// (which are currently not modeled in VPlan).1246ResumePhi,1247CalculateTripCountMinusVF,1248// Increment the canonical IV separately for each unrolled part.1249CanonicalIVIncrementForPart,1250BranchOnCount,1251BranchOnCond,1252ComputeReductionResult,1253// Takes the VPValue to extract from as first operand and the lane or part1254// to extract as second operand, counting from the end starting with 1 for1255// last. The second operand must be a positive constant and <= VF when1256// extracting from a vector or <= UF when extracting from an unrolled1257// scalar.1258ExtractFromEnd,1259LogicalAnd, // Non-poison propagating logical And.1260// Add an offset in bytes (second operand) to a base pointer (first1261// operand). Only generates scalar values (either for the first lane only or1262// for all lanes, depending on its uses).1263PtrAdd,1264};12651266private:1267typedef unsigned char OpcodeTy;1268OpcodeTy Opcode;12691270/// An optional name that can be used for the generated IR instruction.1271const std::string Name;12721273/// Returns true if this VPInstruction generates scalar values for all lanes.1274/// Most VPInstructions generate a single value per part, either vector or1275/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)1276/// values per all lanes, stemming from an original ingredient. This method1277/// identifies the (rare) cases of VPInstructions that do so as well, w/o an1278/// underlying ingredient.1279bool doesGeneratePerAllLanes() const;12801281/// Returns true if we can generate a scalar for the first lane only if1282/// needed.1283bool canGenerateScalarForFirstLane() const;12841285/// Utility methods serving execute(): generates a single instance of the1286/// modeled instruction for a given part. \returns the generated value for \p1287/// Part. In some cases an existing value is returned rather than a generated1288/// one.1289Value *generatePerPart(VPTransformState &State, unsigned Part);12901291/// Utility methods serving execute(): generates a scalar single instance of1292/// the modeled instruction for a given lane. \returns the scalar generated1293/// value for lane \p Lane.1294Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);12951296#if !defined(NDEBUG)1297/// Return true if the VPInstruction is a floating point math operation, i.e.1298/// has fast-math flags.1299bool isFPMathOp() const;1300#endif13011302public:1303VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,1304const Twine &Name = "")1305: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),1306Opcode(Opcode), Name(Name.str()) {}13071308VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,1309DebugLoc DL = {}, const Twine &Name = "")1310: VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}13111312VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,1313VPValue *B, DebugLoc DL = {}, const Twine &Name = "");13141315VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,1316WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")1317: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),1318Opcode(Opcode), Name(Name.str()) {}13191320VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,1321DisjointFlagsTy DisjointFlag, DebugLoc DL = {},1322const Twine &Name = "")1323: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),1324Opcode(Opcode), Name(Name.str()) {1325assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");1326}13271328VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,1329FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");13301331VP_CLASSOF_IMPL(VPDef::VPInstructionSC)13321333VPInstruction *clone() override {1334SmallVector<VPValue *, 2> Operands(operands());1335auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);1336New->transferFlags(*this);1337return New;1338}13391340unsigned getOpcode() const { return Opcode; }13411342/// Generate the instruction.1343/// TODO: We currently execute only per-part unless a specific instance is1344/// provided.1345void execute(VPTransformState &State) override;13461347#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1348/// Print the VPInstruction to \p O.1349void print(raw_ostream &O, const Twine &Indent,1350VPSlotTracker &SlotTracker) const override;13511352/// Print the VPInstruction to dbgs() (for debugging).1353LLVM_DUMP_METHOD void dump() const;1354#endif13551356/// Return true if this instruction may modify memory.1357bool mayWriteToMemory() const {1358// TODO: we can use attributes of the called function to rule out memory1359// modifications.1360return Opcode == Instruction::Store || Opcode == Instruction::Call ||1361Opcode == Instruction::Invoke || Opcode == SLPStore;1362}13631364bool hasResult() const {1365// CallInst may or may not have a result, depending on the called function.1366// Conservatively return calls have results for now.1367switch (getOpcode()) {1368case Instruction::Ret:1369case Instruction::Br:1370case Instruction::Store:1371case Instruction::Switch:1372case Instruction::IndirectBr:1373case Instruction::Resume:1374case Instruction::CatchRet:1375case Instruction::Unreachable:1376case Instruction::Fence:1377case Instruction::AtomicRMW:1378case VPInstruction::BranchOnCond:1379case VPInstruction::BranchOnCount:1380return false;1381default:1382return true;1383}1384}13851386/// Returns true if the recipe only uses the first lane of operand \p Op.1387bool onlyFirstLaneUsed(const VPValue *Op) const override;13881389/// Returns true if the recipe only uses the first part of operand \p Op.1390bool onlyFirstPartUsed(const VPValue *Op) const override;13911392/// Returns true if this VPInstruction produces a scalar value from a vector,1393/// e.g. by performing a reduction or extracting a lane.1394bool isVectorToScalar() const;13951396/// Returns true if this VPInstruction's operands are single scalars and the1397/// result is also a single scalar.1398bool isSingleScalar() const;1399};14001401/// VPWidenRecipe is a recipe for producing a widened instruction using the1402/// opcode and operands of the recipe. This recipe covers most of the1403/// traditional vectorization cases where each recipe transforms into a1404/// vectorized version of itself.1405class VPWidenRecipe : public VPRecipeWithIRFlags {1406unsigned Opcode;14071408public:1409template <typename IterT>1410VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)1411: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),1412Opcode(I.getOpcode()) {}14131414~VPWidenRecipe() override = default;14151416VPWidenRecipe *clone() override {1417auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());1418R->transferFlags(*this);1419return R;1420}14211422VP_CLASSOF_IMPL(VPDef::VPWidenSC)14231424/// Produce a widened instruction using the opcode and operands of the recipe,1425/// processing State.VF elements.1426void execute(VPTransformState &State) override;14271428unsigned getOpcode() const { return Opcode; }14291430#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1431/// Print the recipe.1432void print(raw_ostream &O, const Twine &Indent,1433VPSlotTracker &SlotTracker) const override;1434#endif1435};14361437/// VPWidenCastRecipe is a recipe to create vector cast instructions.1438class VPWidenCastRecipe : public VPRecipeWithIRFlags {1439/// Cast instruction opcode.1440Instruction::CastOps Opcode;14411442/// Result type for the cast.1443Type *ResultTy;14441445public:1446VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,1447CastInst &UI)1448: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),1449ResultTy(ResultTy) {1450assert(UI.getOpcode() == Opcode &&1451"opcode of underlying cast doesn't match");1452}14531454VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)1455: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),1456ResultTy(ResultTy) {}14571458~VPWidenCastRecipe() override = default;14591460VPWidenCastRecipe *clone() override {1461if (auto *UV = getUnderlyingValue())1462return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,1463*cast<CastInst>(UV));14641465return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);1466}14671468VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)14691470/// Produce widened copies of the cast.1471void execute(VPTransformState &State) override;14721473#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1474/// Print the recipe.1475void print(raw_ostream &O, const Twine &Indent,1476VPSlotTracker &SlotTracker) const override;1477#endif14781479Instruction::CastOps getOpcode() const { return Opcode; }14801481/// Returns the result type of the cast.1482Type *getResultType() const { return ResultTy; }1483};14841485/// VPScalarCastRecipe is a recipe to create scalar cast instructions.1486class VPScalarCastRecipe : public VPSingleDefRecipe {1487Instruction::CastOps Opcode;14881489Type *ResultTy;14901491Value *generate(VPTransformState &State, unsigned Part);14921493public:1494VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)1495: VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),1496ResultTy(ResultTy) {}14971498~VPScalarCastRecipe() override = default;14991500VPScalarCastRecipe *clone() override {1501return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);1502}15031504VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)15051506void execute(VPTransformState &State) override;15071508#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1509void print(raw_ostream &O, const Twine &Indent,1510VPSlotTracker &SlotTracker) const override;1511#endif15121513/// Returns the result type of the cast.1514Type *getResultType() const { return ResultTy; }15151516bool onlyFirstLaneUsed(const VPValue *Op) const override {1517// At the moment, only uniform codegen is implemented.1518assert(is_contained(operands(), Op) &&1519"Op must be an operand of the recipe");1520return true;1521}1522};15231524/// A recipe for widening Call instructions.1525class VPWidenCallRecipe : public VPSingleDefRecipe {1526/// ID of the vector intrinsic to call when widening the call. If set the1527/// Intrinsic::not_intrinsic, a library call will be used instead.1528Intrinsic::ID VectorIntrinsicID;1529/// If this recipe represents a library call, Variant stores a pointer to1530/// the chosen function. There is a 1:1 mapping between a given VF and the1531/// chosen vectorized variant, so there will be a different vplan for each1532/// VF with a valid variant.1533Function *Variant;15341535public:1536template <typename IterT>1537VPWidenCallRecipe(Value *UV, iterator_range<IterT> CallArguments,1538Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},1539Function *Variant = nullptr)1540: VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, UV, DL),1541VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {1542assert(1543isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&1544"last operand must be the called function");1545}15461547~VPWidenCallRecipe() override = default;15481549VPWidenCallRecipe *clone() override {1550return new VPWidenCallRecipe(getUnderlyingValue(), operands(),1551VectorIntrinsicID, getDebugLoc(), Variant);1552}15531554VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)15551556/// Produce a widened version of the call instruction.1557void execute(VPTransformState &State) override;15581559Function *getCalledScalarFunction() const {1560return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());1561}15621563operand_range arg_operands() {1564return make_range(op_begin(), op_begin() + getNumOperands() - 1);1565}1566const_operand_range arg_operands() const {1567return make_range(op_begin(), op_begin() + getNumOperands() - 1);1568}15691570#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1571/// Print the recipe.1572void print(raw_ostream &O, const Twine &Indent,1573VPSlotTracker &SlotTracker) const override;1574#endif1575};15761577/// A recipe for widening select instructions.1578struct VPWidenSelectRecipe : public VPSingleDefRecipe {1579template <typename IterT>1580VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)1581: VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,1582I.getDebugLoc()) {}15831584~VPWidenSelectRecipe() override = default;15851586VPWidenSelectRecipe *clone() override {1587return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),1588operands());1589}15901591VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)15921593/// Produce a widened version of the select instruction.1594void execute(VPTransformState &State) override;15951596#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1597/// Print the recipe.1598void print(raw_ostream &O, const Twine &Indent,1599VPSlotTracker &SlotTracker) const override;1600#endif16011602VPValue *getCond() const {1603return getOperand(0);1604}16051606bool isInvariantCond() const {1607return getCond()->isDefinedOutsideVectorRegions();1608}1609};16101611/// A recipe for handling GEP instructions.1612class VPWidenGEPRecipe : public VPRecipeWithIRFlags {1613bool isPointerLoopInvariant() const {1614return getOperand(0)->isDefinedOutsideVectorRegions();1615}16161617bool isIndexLoopInvariant(unsigned I) const {1618return getOperand(I + 1)->isDefinedOutsideVectorRegions();1619}16201621bool areAllOperandsInvariant() const {1622return all_of(operands(), [](VPValue *Op) {1623return Op->isDefinedOutsideVectorRegions();1624});1625}16261627public:1628template <typename IterT>1629VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)1630: VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}16311632~VPWidenGEPRecipe() override = default;16331634VPWidenGEPRecipe *clone() override {1635return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),1636operands());1637}16381639VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)16401641/// Generate the gep nodes.1642void execute(VPTransformState &State) override;16431644#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1645/// Print the recipe.1646void print(raw_ostream &O, const Twine &Indent,1647VPSlotTracker &SlotTracker) const override;1648#endif1649};16501651/// A recipe to compute the pointers for widened memory accesses of IndexTy for1652/// all parts. If IsReverse is true, compute pointers for accessing the input in1653/// reverse order per part.1654class VPVectorPointerRecipe : public VPRecipeWithIRFlags {1655Type *IndexedTy;1656bool IsReverse;16571658public:1659VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,1660bool IsInBounds, DebugLoc DL)1661: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),1662GEPFlagsTy(IsInBounds), DL),1663IndexedTy(IndexedTy), IsReverse(IsReverse) {}16641665VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)16661667void execute(VPTransformState &State) override;16681669bool onlyFirstLaneUsed(const VPValue *Op) const override {1670assert(is_contained(operands(), Op) &&1671"Op must be an operand of the recipe");1672return true;1673}16741675VPVectorPointerRecipe *clone() override {1676return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,1677isInBounds(), getDebugLoc());1678}16791680#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1681/// Print the recipe.1682void print(raw_ostream &O, const Twine &Indent,1683VPSlotTracker &SlotTracker) const override;1684#endif1685};16861687/// A pure virtual base class for all recipes modeling header phis, including1688/// phis for first order recurrences, pointer inductions and reductions. The1689/// start value is the first operand of the recipe and the incoming value from1690/// the backedge is the second operand.1691///1692/// Inductions are modeled using the following sub-classes:1693/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,1694/// starting at a specified value (zero for the main vector loop, the resume1695/// value for the epilogue vector loop) and stepping by 1. The induction1696/// controls exiting of the vector loop by comparing against the vector trip1697/// count. Produces a single scalar PHI for the induction value per1698/// iteration.1699/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and1700/// floating point inductions with arbitrary start and step values. Produces1701/// a vector PHI per-part.1702/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding1703/// value of an IV with different start and step values. Produces a single1704/// scalar value per iteration1705/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a1706/// canonical or derived induction.1707/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a1708/// pointer induction. Produces either a vector PHI per-part or scalar values1709/// per-lane based on the canonical induction.1710class VPHeaderPHIRecipe : public VPSingleDefRecipe {1711protected:1712VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,1713VPValue *Start = nullptr, DebugLoc DL = {})1714: VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {1715if (Start)1716addOperand(Start);1717}17181719public:1720~VPHeaderPHIRecipe() override = default;17211722/// Method to support type inquiry through isa, cast, and dyn_cast.1723static inline bool classof(const VPRecipeBase *B) {1724return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&1725B->getVPDefID() <= VPDef::VPLastHeaderPHISC;1726}1727static inline bool classof(const VPValue *V) {1728auto *B = V->getDefiningRecipe();1729return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&1730B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;1731}17321733/// Generate the phi nodes.1734void execute(VPTransformState &State) override = 0;17351736#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1737/// Print the recipe.1738void print(raw_ostream &O, const Twine &Indent,1739VPSlotTracker &SlotTracker) const override = 0;1740#endif17411742/// Returns the start value of the phi, if one is set.1743VPValue *getStartValue() {1744return getNumOperands() == 0 ? nullptr : getOperand(0);1745}1746VPValue *getStartValue() const {1747return getNumOperands() == 0 ? nullptr : getOperand(0);1748}17491750/// Update the start value of the recipe.1751void setStartValue(VPValue *V) { setOperand(0, V); }17521753/// Returns the incoming value from the loop backedge.1754virtual VPValue *getBackedgeValue() {1755return getOperand(1);1756}17571758/// Returns the backedge value as a recipe. The backedge value is guaranteed1759/// to be a recipe.1760virtual VPRecipeBase &getBackedgeRecipe() {1761return *getBackedgeValue()->getDefiningRecipe();1762}1763};17641765/// A recipe for handling phi nodes of integer and floating-point inductions,1766/// producing their vector values.1767class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {1768PHINode *IV;1769TruncInst *Trunc;1770const InductionDescriptor &IndDesc;17711772public:1773VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,1774const InductionDescriptor &IndDesc)1775: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),1776Trunc(nullptr), IndDesc(IndDesc) {1777addOperand(Step);1778}17791780VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,1781const InductionDescriptor &IndDesc,1782TruncInst *Trunc)1783: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),1784IV(IV), Trunc(Trunc), IndDesc(IndDesc) {1785addOperand(Step);1786}17871788~VPWidenIntOrFpInductionRecipe() override = default;17891790VPWidenIntOrFpInductionRecipe *clone() override {1791return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(),1792getStepValue(), IndDesc, Trunc);1793}17941795VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)17961797/// Generate the vectorized and scalarized versions of the phi node as1798/// needed by their users.1799void execute(VPTransformState &State) override;18001801#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1802/// Print the recipe.1803void print(raw_ostream &O, const Twine &Indent,1804VPSlotTracker &SlotTracker) const override;1805#endif18061807VPValue *getBackedgeValue() override {1808// TODO: All operands of base recipe must exist and be at same index in1809// derived recipe.1810llvm_unreachable(1811"VPWidenIntOrFpInductionRecipe generates its own backedge value");1812}18131814VPRecipeBase &getBackedgeRecipe() override {1815// TODO: All operands of base recipe must exist and be at same index in1816// derived recipe.1817llvm_unreachable(1818"VPWidenIntOrFpInductionRecipe generates its own backedge value");1819}18201821/// Returns the step value of the induction.1822VPValue *getStepValue() { return getOperand(1); }1823const VPValue *getStepValue() const { return getOperand(1); }18241825/// Returns the first defined value as TruncInst, if it is one or nullptr1826/// otherwise.1827TruncInst *getTruncInst() { return Trunc; }1828const TruncInst *getTruncInst() const { return Trunc; }18291830PHINode *getPHINode() { return IV; }18311832/// Returns the induction descriptor for the recipe.1833const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }18341835/// Returns true if the induction is canonical, i.e. starting at 0 and1836/// incremented by UF * VF (= the original IV is incremented by 1) and has the1837/// same type as the canonical induction.1838bool isCanonical() const;18391840/// Returns the scalar type of the induction.1841Type *getScalarType() const {1842return Trunc ? Trunc->getType() : IV->getType();1843}1844};18451846class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {1847const InductionDescriptor &IndDesc;18481849bool IsScalarAfterVectorization;18501851public:1852/// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p1853/// Start.1854VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step,1855const InductionDescriptor &IndDesc,1856bool IsScalarAfterVectorization)1857: VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),1858IndDesc(IndDesc),1859IsScalarAfterVectorization(IsScalarAfterVectorization) {1860addOperand(Start);1861addOperand(Step);1862}18631864~VPWidenPointerInductionRecipe() override = default;18651866VPWidenPointerInductionRecipe *clone() override {1867return new VPWidenPointerInductionRecipe(1868cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),1869IndDesc, IsScalarAfterVectorization);1870}18711872VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)18731874/// Generate vector values for the pointer induction.1875void execute(VPTransformState &State) override;18761877/// Returns true if only scalar values will be generated.1878bool onlyScalarsGenerated(bool IsScalable);18791880/// Returns the induction descriptor for the recipe.1881const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }18821883#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1884/// Print the recipe.1885void print(raw_ostream &O, const Twine &Indent,1886VPSlotTracker &SlotTracker) const override;1887#endif1888};18891890/// A recipe for handling phis that are widened in the vector loop.1891/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are1892/// managed in the recipe directly.1893class VPWidenPHIRecipe : public VPSingleDefRecipe {1894/// List of incoming blocks. Only used in the VPlan native path.1895SmallVector<VPBasicBlock *, 2> IncomingBlocks;18961897public:1898/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.1899VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)1900: VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {1901if (Start)1902addOperand(Start);1903}19041905VPWidenPHIRecipe *clone() override {1906llvm_unreachable("cloning not implemented yet");1907}19081909~VPWidenPHIRecipe() override = default;19101911VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)19121913/// Generate the phi/select nodes.1914void execute(VPTransformState &State) override;19151916#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1917/// Print the recipe.1918void print(raw_ostream &O, const Twine &Indent,1919VPSlotTracker &SlotTracker) const override;1920#endif19211922/// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.1923void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {1924addOperand(IncomingV);1925IncomingBlocks.push_back(IncomingBlock);1926}19271928/// Returns the \p I th incoming VPBasicBlock.1929VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }19301931/// Returns the \p I th incoming VPValue.1932VPValue *getIncomingValue(unsigned I) { return getOperand(I); }1933};19341935/// A recipe for handling first-order recurrence phis. The start value is the1936/// first operand of the recipe and the incoming value from the backedge is the1937/// second operand.1938struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {1939VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)1940: VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}19411942VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)19431944static inline bool classof(const VPHeaderPHIRecipe *R) {1945return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;1946}19471948VPFirstOrderRecurrencePHIRecipe *clone() override {1949return new VPFirstOrderRecurrencePHIRecipe(1950cast<PHINode>(getUnderlyingInstr()), *getOperand(0));1951}19521953void execute(VPTransformState &State) override;19541955#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)1956/// Print the recipe.1957void print(raw_ostream &O, const Twine &Indent,1958VPSlotTracker &SlotTracker) const override;1959#endif1960};19611962/// A recipe for handling reduction phis. The start value is the first operand1963/// of the recipe and the incoming value from the backedge is the second1964/// operand.1965class VPReductionPHIRecipe : public VPHeaderPHIRecipe {1966/// Descriptor for the reduction.1967const RecurrenceDescriptor &RdxDesc;19681969/// The phi is part of an in-loop reduction.1970bool IsInLoop;19711972/// The phi is part of an ordered reduction. Requires IsInLoop to be true.1973bool IsOrdered;19741975public:1976/// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p1977/// RdxDesc.1978VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc,1979VPValue &Start, bool IsInLoop = false,1980bool IsOrdered = false)1981: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),1982RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {1983assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");1984}19851986~VPReductionPHIRecipe() override = default;19871988VPReductionPHIRecipe *clone() override {1989auto *R =1990new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,1991*getOperand(0), IsInLoop, IsOrdered);1992R->addOperand(getBackedgeValue());1993return R;1994}19951996VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)19971998static inline bool classof(const VPHeaderPHIRecipe *R) {1999return R->getVPDefID() == VPDef::VPReductionPHISC;2000}20012002/// Generate the phi/select nodes.2003void execute(VPTransformState &State) override;20042005#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2006/// Print the recipe.2007void print(raw_ostream &O, const Twine &Indent,2008VPSlotTracker &SlotTracker) const override;2009#endif20102011const RecurrenceDescriptor &getRecurrenceDescriptor() const {2012return RdxDesc;2013}20142015/// Returns true, if the phi is part of an ordered reduction.2016bool isOrdered() const { return IsOrdered; }20172018/// Returns true, if the phi is part of an in-loop reduction.2019bool isInLoop() const { return IsInLoop; }2020};20212022/// A recipe for vectorizing a phi-node as a sequence of mask-based select2023/// instructions.2024class VPBlendRecipe : public VPSingleDefRecipe {2025public:2026/// The blend operation is a User of the incoming values and of their2027/// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first2028/// incoming value does not have a mask associated.2029VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands)2030: VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {2031assert((Operands.size() + 1) % 2 == 0 &&2032"Expected an odd number of operands");2033}20342035VPBlendRecipe *clone() override {2036SmallVector<VPValue *> Ops(operands());2037return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);2038}20392040VP_CLASSOF_IMPL(VPDef::VPBlendSC)20412042/// Return the number of incoming values, taking into account that the first2043/// incoming value has no mask.2044unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }20452046/// Return incoming value number \p Idx.2047VPValue *getIncomingValue(unsigned Idx) const {2048return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);2049}20502051/// Return mask number \p Idx.2052VPValue *getMask(unsigned Idx) const {2053assert(Idx > 0 && "First index has no mask associated.");2054return getOperand(Idx * 2);2055}20562057/// Generate the phi/select nodes.2058void execute(VPTransformState &State) override;20592060#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2061/// Print the recipe.2062void print(raw_ostream &O, const Twine &Indent,2063VPSlotTracker &SlotTracker) const override;2064#endif20652066/// Returns true if the recipe only uses the first lane of operand \p Op.2067bool onlyFirstLaneUsed(const VPValue *Op) const override {2068assert(is_contained(operands(), Op) &&2069"Op must be an operand of the recipe");2070// Recursing through Blend recipes only, must terminate at header phi's the2071// latest.2072return all_of(users(),2073[this](VPUser *U) { return U->onlyFirstLaneUsed(this); });2074}2075};20762077/// VPInterleaveRecipe is a recipe for transforming an interleave group of load2078/// or stores into one wide load/store and shuffles. The first operand of a2079/// VPInterleave recipe is the address, followed by the stored values, followed2080/// by an optional mask.2081class VPInterleaveRecipe : public VPRecipeBase {2082const InterleaveGroup<Instruction> *IG;20832084/// Indicates if the interleave group is in a conditional block and requires a2085/// mask.2086bool HasMask = false;20872088/// Indicates if gaps between members of the group need to be masked out or if2089/// unusued gaps can be loaded speculatively.2090bool NeedsMaskForGaps = false;20912092public:2093VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,2094ArrayRef<VPValue *> StoredValues, VPValue *Mask,2095bool NeedsMaskForGaps)2096: VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),2097NeedsMaskForGaps(NeedsMaskForGaps) {2098for (unsigned i = 0; i < IG->getFactor(); ++i)2099if (Instruction *I = IG->getMember(i)) {2100if (I->getType()->isVoidTy())2101continue;2102new VPValue(I, this);2103}21042105for (auto *SV : StoredValues)2106addOperand(SV);2107if (Mask) {2108HasMask = true;2109addOperand(Mask);2110}2111}2112~VPInterleaveRecipe() override = default;21132114VPInterleaveRecipe *clone() override {2115return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),2116NeedsMaskForGaps);2117}21182119VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)21202121/// Return the address accessed by this recipe.2122VPValue *getAddr() const {2123return getOperand(0); // Address is the 1st, mandatory operand.2124}21252126/// Return the mask used by this recipe. Note that a full mask is represented2127/// by a nullptr.2128VPValue *getMask() const {2129// Mask is optional and therefore the last, currently 2nd operand.2130return HasMask ? getOperand(getNumOperands() - 1) : nullptr;2131}21322133/// Return the VPValues stored by this interleave group. If it is a load2134/// interleave group, return an empty ArrayRef.2135ArrayRef<VPValue *> getStoredValues() const {2136// The first operand is the address, followed by the stored values, followed2137// by an optional mask.2138return ArrayRef<VPValue *>(op_begin(), getNumOperands())2139.slice(1, getNumStoreOperands());2140}21412142/// Generate the wide load or store, and shuffles.2143void execute(VPTransformState &State) override;21442145#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2146/// Print the recipe.2147void print(raw_ostream &O, const Twine &Indent,2148VPSlotTracker &SlotTracker) const override;2149#endif21502151const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }21522153/// Returns the number of stored operands of this interleave group. Returns 02154/// for load interleave groups.2155unsigned getNumStoreOperands() const {2156return getNumOperands() - (HasMask ? 2 : 1);2157}21582159/// The recipe only uses the first lane of the address.2160bool onlyFirstLaneUsed(const VPValue *Op) const override {2161assert(is_contained(operands(), Op) &&2162"Op must be an operand of the recipe");2163return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);2164}21652166Instruction *getInsertPos() const { return IG->getInsertPos(); }2167};21682169/// A recipe to represent inloop reduction operations, performing a reduction on2170/// a vector operand into a scalar value, and adding the result to a chain.2171/// The Operands are {ChainOp, VecOp, [Condition]}.2172class VPReductionRecipe : public VPSingleDefRecipe {2173/// The recurrence decriptor for the reduction in question.2174const RecurrenceDescriptor &RdxDesc;2175bool IsOrdered;2176/// Whether the reduction is conditional.2177bool IsConditional = false;21782179protected:2180VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,2181Instruction *I, ArrayRef<VPValue *> Operands,2182VPValue *CondOp, bool IsOrdered)2183: VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {2184if (CondOp) {2185IsConditional = true;2186addOperand(CondOp);2187}2188}21892190public:2191VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,2192VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,2193bool IsOrdered)2194: VPReductionRecipe(VPDef::VPReductionSC, R, I,2195ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,2196IsOrdered) {}21972198~VPReductionRecipe() override = default;21992200VPReductionRecipe *clone() override {2201return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),2202getVecOp(), getCondOp(), IsOrdered);2203}22042205static inline bool classof(const VPRecipeBase *R) {2206return R->getVPDefID() == VPRecipeBase::VPReductionSC ||2207R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;2208}22092210static inline bool classof(const VPUser *U) {2211auto *R = dyn_cast<VPRecipeBase>(U);2212return R && classof(R);2213}22142215/// Generate the reduction in the loop2216void execute(VPTransformState &State) override;22172218#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2219/// Print the recipe.2220void print(raw_ostream &O, const Twine &Indent,2221VPSlotTracker &SlotTracker) const override;2222#endif22232224/// Return the recurrence decriptor for the in-loop reduction.2225const RecurrenceDescriptor &getRecurrenceDescriptor() const {2226return RdxDesc;2227}2228/// Return true if the in-loop reduction is ordered.2229bool isOrdered() const { return IsOrdered; };2230/// Return true if the in-loop reduction is conditional.2231bool isConditional() const { return IsConditional; };2232/// The VPValue of the scalar Chain being accumulated.2233VPValue *getChainOp() const { return getOperand(0); }2234/// The VPValue of the vector value to be reduced.2235VPValue *getVecOp() const { return getOperand(1); }2236/// The VPValue of the condition for the block.2237VPValue *getCondOp() const {2238return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;2239}2240};22412242/// A recipe to represent inloop reduction operations with vector-predication2243/// intrinsics, performing a reduction on a vector operand with the explicit2244/// vector length (EVL) into a scalar value, and adding the result to a chain.2245/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.2246class VPReductionEVLRecipe : public VPReductionRecipe {2247public:2248VPReductionEVLRecipe(VPReductionRecipe *R, VPValue *EVL, VPValue *CondOp)2249: VPReductionRecipe(2250VPDef::VPReductionEVLSC, R->getRecurrenceDescriptor(),2251cast_or_null<Instruction>(R->getUnderlyingValue()),2252ArrayRef<VPValue *>({R->getChainOp(), R->getVecOp(), EVL}), CondOp,2253R->isOrdered()) {}22542255~VPReductionEVLRecipe() override = default;22562257VPReductionEVLRecipe *clone() override {2258llvm_unreachable("cloning not implemented yet");2259}22602261VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)22622263/// Generate the reduction in the loop2264void execute(VPTransformState &State) override;22652266#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2267/// Print the recipe.2268void print(raw_ostream &O, const Twine &Indent,2269VPSlotTracker &SlotTracker) const override;2270#endif22712272/// The VPValue of the explicit vector length.2273VPValue *getEVL() const { return getOperand(2); }22742275/// Returns true if the recipe only uses the first lane of operand \p Op.2276bool onlyFirstLaneUsed(const VPValue *Op) const override {2277assert(is_contained(operands(), Op) &&2278"Op must be an operand of the recipe");2279return Op == getEVL();2280}2281};22822283/// VPReplicateRecipe replicates a given instruction producing multiple scalar2284/// copies of the original scalar type, one per lane, instead of producing a2285/// single copy of widened type for all lanes. If the instruction is known to be2286/// uniform only one copy, per lane zero, will be generated.2287class VPReplicateRecipe : public VPRecipeWithIRFlags {2288/// Indicator if only a single replica per lane is needed.2289bool IsUniform;22902291/// Indicator if the replicas are also predicated.2292bool IsPredicated;22932294public:2295template <typename IterT>2296VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,2297bool IsUniform, VPValue *Mask = nullptr)2298: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),2299IsUniform(IsUniform), IsPredicated(Mask) {2300if (Mask)2301addOperand(Mask);2302}23032304~VPReplicateRecipe() override = default;23052306VPReplicateRecipe *clone() override {2307auto *Copy =2308new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,2309isPredicated() ? getMask() : nullptr);2310Copy->transferFlags(*this);2311return Copy;2312}23132314VP_CLASSOF_IMPL(VPDef::VPReplicateSC)23152316/// Generate replicas of the desired Ingredient. Replicas will be generated2317/// for all parts and lanes unless a specific part and lane are specified in2318/// the \p State.2319void execute(VPTransformState &State) override;23202321#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2322/// Print the recipe.2323void print(raw_ostream &O, const Twine &Indent,2324VPSlotTracker &SlotTracker) const override;2325#endif23262327bool isUniform() const { return IsUniform; }23282329bool isPredicated() const { return IsPredicated; }23302331/// Returns true if the recipe only uses the first lane of operand \p Op.2332bool onlyFirstLaneUsed(const VPValue *Op) const override {2333assert(is_contained(operands(), Op) &&2334"Op must be an operand of the recipe");2335return isUniform();2336}23372338/// Returns true if the recipe uses scalars of operand \p Op.2339bool usesScalars(const VPValue *Op) const override {2340assert(is_contained(operands(), Op) &&2341"Op must be an operand of the recipe");2342return true;2343}23442345/// Returns true if the recipe is used by a widened recipe via an intervening2346/// VPPredInstPHIRecipe. In this case, the scalar values should also be packed2347/// in a vector.2348bool shouldPack() const;23492350/// Return the mask of a predicated VPReplicateRecipe.2351VPValue *getMask() {2352assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");2353return getOperand(getNumOperands() - 1);2354}23552356unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }2357};23582359/// A recipe for generating conditional branches on the bits of a mask.2360class VPBranchOnMaskRecipe : public VPRecipeBase {2361public:2362VPBranchOnMaskRecipe(VPValue *BlockInMask)2363: VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {2364if (BlockInMask) // nullptr means all-one mask.2365addOperand(BlockInMask);2366}23672368VPBranchOnMaskRecipe *clone() override {2369return new VPBranchOnMaskRecipe(getOperand(0));2370}23712372VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)23732374/// Generate the extraction of the appropriate bit from the block mask and the2375/// conditional branch.2376void execute(VPTransformState &State) override;23772378#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2379/// Print the recipe.2380void print(raw_ostream &O, const Twine &Indent,2381VPSlotTracker &SlotTracker) const override {2382O << Indent << "BRANCH-ON-MASK ";2383if (VPValue *Mask = getMask())2384Mask->printAsOperand(O, SlotTracker);2385else2386O << " All-One";2387}2388#endif23892390/// Return the mask used by this recipe. Note that a full mask is represented2391/// by a nullptr.2392VPValue *getMask() const {2393assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");2394// Mask is optional.2395return getNumOperands() == 1 ? getOperand(0) : nullptr;2396}23972398/// Returns true if the recipe uses scalars of operand \p Op.2399bool usesScalars(const VPValue *Op) const override {2400assert(is_contained(operands(), Op) &&2401"Op must be an operand of the recipe");2402return true;2403}2404};24052406/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when2407/// control converges back from a Branch-on-Mask. The phi nodes are needed in2408/// order to merge values that are set under such a branch and feed their uses.2409/// The phi nodes can be scalar or vector depending on the users of the value.2410/// This recipe works in concert with VPBranchOnMaskRecipe.2411class VPPredInstPHIRecipe : public VPSingleDefRecipe {2412public:2413/// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi2414/// nodes after merging back from a Branch-on-Mask.2415VPPredInstPHIRecipe(VPValue *PredV)2416: VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}2417~VPPredInstPHIRecipe() override = default;24182419VPPredInstPHIRecipe *clone() override {2420return new VPPredInstPHIRecipe(getOperand(0));2421}24222423VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)24242425/// Generates phi nodes for live-outs as needed to retain SSA form.2426void execute(VPTransformState &State) override;24272428#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2429/// Print the recipe.2430void print(raw_ostream &O, const Twine &Indent,2431VPSlotTracker &SlotTracker) const override;2432#endif24332434/// Returns true if the recipe uses scalars of operand \p Op.2435bool usesScalars(const VPValue *Op) const override {2436assert(is_contained(operands(), Op) &&2437"Op must be an operand of the recipe");2438return true;2439}2440};24412442/// A common base class for widening memory operations. An optional mask can be2443/// provided as the last operand.2444class VPWidenMemoryRecipe : public VPRecipeBase {2445protected:2446Instruction &Ingredient;24472448/// Whether the accessed addresses are consecutive.2449bool Consecutive;24502451/// Whether the consecutive accessed addresses are in reverse order.2452bool Reverse;24532454/// Whether the memory access is masked.2455bool IsMasked = false;24562457void setMask(VPValue *Mask) {2458assert(!IsMasked && "cannot re-set mask");2459if (!Mask)2460return;2461addOperand(Mask);2462IsMasked = true;2463}24642465VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,2466std::initializer_list<VPValue *> Operands,2467bool Consecutive, bool Reverse, DebugLoc DL)2468: VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),2469Reverse(Reverse) {2470assert((Consecutive || !Reverse) && "Reverse implies consecutive");2471}24722473public:2474VPWidenMemoryRecipe *clone() override {2475llvm_unreachable("cloning not supported");2476}24772478static inline bool classof(const VPRecipeBase *R) {2479return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||2480R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||2481R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||2482R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;2483}24842485static inline bool classof(const VPUser *U) {2486auto *R = dyn_cast<VPRecipeBase>(U);2487return R && classof(R);2488}24892490/// Return whether the loaded-from / stored-to addresses are consecutive.2491bool isConsecutive() const { return Consecutive; }24922493/// Return whether the consecutive loaded/stored addresses are in reverse2494/// order.2495bool isReverse() const { return Reverse; }24962497/// Return the address accessed by this recipe.2498VPValue *getAddr() const { return getOperand(0); }24992500/// Returns true if the recipe is masked.2501bool isMasked() const { return IsMasked; }25022503/// Return the mask used by this recipe. Note that a full mask is represented2504/// by a nullptr.2505VPValue *getMask() const {2506// Mask is optional and therefore the last operand.2507return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;2508}25092510/// Generate the wide load/store.2511void execute(VPTransformState &State) override {2512llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");2513}25142515Instruction &getIngredient() const { return Ingredient; }2516};25172518/// A recipe for widening load operations, using the address to load from and an2519/// optional mask.2520struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {2521VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,2522bool Consecutive, bool Reverse, DebugLoc DL)2523: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,2524Reverse, DL),2525VPValue(this, &Load) {2526setMask(Mask);2527}25282529VPWidenLoadRecipe *clone() override {2530return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),2531getMask(), Consecutive, Reverse,2532getDebugLoc());2533}25342535VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);25362537/// Generate a wide load or gather.2538void execute(VPTransformState &State) override;25392540#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2541/// Print the recipe.2542void print(raw_ostream &O, const Twine &Indent,2543VPSlotTracker &SlotTracker) const override;2544#endif25452546/// Returns true if the recipe only uses the first lane of operand \p Op.2547bool onlyFirstLaneUsed(const VPValue *Op) const override {2548assert(is_contained(operands(), Op) &&2549"Op must be an operand of the recipe");2550// Widened, consecutive loads operations only demand the first lane of2551// their address.2552return Op == getAddr() && isConsecutive();2553}2554};25552556/// A recipe for widening load operations with vector-predication intrinsics,2557/// using the address to load from, the explicit vector length and an optional2558/// mask.2559struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {2560VPWidenLoadEVLRecipe(VPWidenLoadRecipe *L, VPValue *EVL, VPValue *Mask)2561: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L->getIngredient(),2562{L->getAddr(), EVL}, L->isConsecutive(),2563L->isReverse(), L->getDebugLoc()),2564VPValue(this, &getIngredient()) {2565setMask(Mask);2566}25672568VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)25692570/// Return the EVL operand.2571VPValue *getEVL() const { return getOperand(1); }25722573/// Generate the wide load or gather.2574void execute(VPTransformState &State) override;25752576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2577/// Print the recipe.2578void print(raw_ostream &O, const Twine &Indent,2579VPSlotTracker &SlotTracker) const override;2580#endif25812582/// Returns true if the recipe only uses the first lane of operand \p Op.2583bool onlyFirstLaneUsed(const VPValue *Op) const override {2584assert(is_contained(operands(), Op) &&2585"Op must be an operand of the recipe");2586// Widened loads only demand the first lane of EVL and consecutive loads2587// only demand the first lane of their address.2588return Op == getEVL() || (Op == getAddr() && isConsecutive());2589}2590};25912592/// A recipe for widening store operations, using the stored value, the address2593/// to store to and an optional mask.2594struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {2595VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,2596VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)2597: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},2598Consecutive, Reverse, DL) {2599setMask(Mask);2600}26012602VPWidenStoreRecipe *clone() override {2603return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),2604getStoredValue(), getMask(), Consecutive,2605Reverse, getDebugLoc());2606}26072608VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);26092610/// Return the value stored by this recipe.2611VPValue *getStoredValue() const { return getOperand(1); }26122613/// Generate a wide store or scatter.2614void execute(VPTransformState &State) override;26152616#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2617/// Print the recipe.2618void print(raw_ostream &O, const Twine &Indent,2619VPSlotTracker &SlotTracker) const override;2620#endif26212622/// Returns true if the recipe only uses the first lane of operand \p Op.2623bool onlyFirstLaneUsed(const VPValue *Op) const override {2624assert(is_contained(operands(), Op) &&2625"Op must be an operand of the recipe");2626// Widened, consecutive stores only demand the first lane of their address,2627// unless the same operand is also stored.2628return Op == getAddr() && isConsecutive() && Op != getStoredValue();2629}2630};26312632/// A recipe for widening store operations with vector-predication intrinsics,2633/// using the value to store, the address to store to, the explicit vector2634/// length and an optional mask.2635struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {2636VPWidenStoreEVLRecipe(VPWidenStoreRecipe *S, VPValue *EVL, VPValue *Mask)2637: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S->getIngredient(),2638{S->getAddr(), S->getStoredValue(), EVL},2639S->isConsecutive(), S->isReverse(),2640S->getDebugLoc()) {2641setMask(Mask);2642}26432644VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)26452646/// Return the address accessed by this recipe.2647VPValue *getStoredValue() const { return getOperand(1); }26482649/// Return the EVL operand.2650VPValue *getEVL() const { return getOperand(2); }26512652/// Generate the wide store or scatter.2653void execute(VPTransformState &State) override;26542655#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2656/// Print the recipe.2657void print(raw_ostream &O, const Twine &Indent,2658VPSlotTracker &SlotTracker) const override;2659#endif26602661/// Returns true if the recipe only uses the first lane of operand \p Op.2662bool onlyFirstLaneUsed(const VPValue *Op) const override {2663assert(is_contained(operands(), Op) &&2664"Op must be an operand of the recipe");2665if (Op == getEVL()) {2666assert(getStoredValue() != Op && "unexpected store of EVL");2667return true;2668}2669// Widened, consecutive memory operations only demand the first lane of2670// their address, unless the same operand is also stored. That latter can2671// happen with opaque pointers.2672return Op == getAddr() && isConsecutive() && Op != getStoredValue();2673}2674};26752676/// Recipe to expand a SCEV expression.2677class VPExpandSCEVRecipe : public VPSingleDefRecipe {2678const SCEV *Expr;2679ScalarEvolution &SE;26802681public:2682VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)2683: VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}26842685~VPExpandSCEVRecipe() override = default;26862687VPExpandSCEVRecipe *clone() override {2688return new VPExpandSCEVRecipe(Expr, SE);2689}26902691VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)26922693/// Generate a canonical vector induction variable of the vector loop, with2694void execute(VPTransformState &State) override;26952696#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2697/// Print the recipe.2698void print(raw_ostream &O, const Twine &Indent,2699VPSlotTracker &SlotTracker) const override;2700#endif27012702const SCEV *getSCEV() const { return Expr; }2703};27042705/// Canonical scalar induction phi of the vector loop. Starting at the specified2706/// start value (either 0 or the resume value when vectorizing the epilogue2707/// loop). VPWidenCanonicalIVRecipe represents the vector version of the2708/// canonical induction variable.2709class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {2710public:2711VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)2712: VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}27132714~VPCanonicalIVPHIRecipe() override = default;27152716VPCanonicalIVPHIRecipe *clone() override {2717auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());2718R->addOperand(getBackedgeValue());2719return R;2720}27212722VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)27232724static inline bool classof(const VPHeaderPHIRecipe *D) {2725return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;2726}27272728/// Generate the canonical scalar induction phi of the vector loop.2729void execute(VPTransformState &State) override;27302731#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2732/// Print the recipe.2733void print(raw_ostream &O, const Twine &Indent,2734VPSlotTracker &SlotTracker) const override;2735#endif27362737/// Returns the scalar type of the induction.2738Type *getScalarType() const {2739return getStartValue()->getLiveInIRValue()->getType();2740}27412742/// Returns true if the recipe only uses the first lane of operand \p Op.2743bool onlyFirstLaneUsed(const VPValue *Op) const override {2744assert(is_contained(operands(), Op) &&2745"Op must be an operand of the recipe");2746return true;2747}27482749/// Returns true if the recipe only uses the first part of operand \p Op.2750bool onlyFirstPartUsed(const VPValue *Op) const override {2751assert(is_contained(operands(), Op) &&2752"Op must be an operand of the recipe");2753return true;2754}27552756/// Check if the induction described by \p Kind, /p Start and \p Step is2757/// canonical, i.e. has the same start and step (of 1) as the canonical IV.2758bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,2759VPValue *Step) const;2760};27612762/// A recipe for generating the active lane mask for the vector loop that is2763/// used to predicate the vector operations.2764/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and2765/// remove VPActiveLaneMaskPHIRecipe.2766class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {2767public:2768VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)2769: VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,2770DL) {}27712772~VPActiveLaneMaskPHIRecipe() override = default;27732774VPActiveLaneMaskPHIRecipe *clone() override {2775return new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc());2776}27772778VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)27792780static inline bool classof(const VPHeaderPHIRecipe *D) {2781return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;2782}27832784/// Generate the active lane mask phi of the vector loop.2785void execute(VPTransformState &State) override;27862787#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2788/// Print the recipe.2789void print(raw_ostream &O, const Twine &Indent,2790VPSlotTracker &SlotTracker) const override;2791#endif2792};27932794/// A recipe for generating the phi node for the current index of elements,2795/// adjusted in accordance with EVL value. It starts at the start value of the2796/// canonical induction and gets incremented by EVL in each iteration of the2797/// vector loop.2798class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {2799public:2800VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)2801: VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}28022803~VPEVLBasedIVPHIRecipe() override = default;28042805VPEVLBasedIVPHIRecipe *clone() override {2806llvm_unreachable("cloning not implemented yet");2807}28082809VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)28102811static inline bool classof(const VPHeaderPHIRecipe *D) {2812return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;2813}28142815/// Generate phi for handling IV based on EVL over iterations correctly.2816/// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.2817void execute(VPTransformState &State) override;28182819/// Returns true if the recipe only uses the first lane of operand \p Op.2820bool onlyFirstLaneUsed(const VPValue *Op) const override {2821assert(is_contained(operands(), Op) &&2822"Op must be an operand of the recipe");2823return true;2824}28252826#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2827/// Print the recipe.2828void print(raw_ostream &O, const Twine &Indent,2829VPSlotTracker &SlotTracker) const override;2830#endif2831};28322833/// A Recipe for widening the canonical induction variable of the vector loop.2834class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {2835public:2836VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)2837: VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}28382839~VPWidenCanonicalIVRecipe() override = default;28402841VPWidenCanonicalIVRecipe *clone() override {2842return new VPWidenCanonicalIVRecipe(2843cast<VPCanonicalIVPHIRecipe>(getOperand(0)));2844}28452846VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)28472848/// Generate a canonical vector induction variable of the vector loop, with2849/// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and2850/// step = <VF*UF, VF*UF, ..., VF*UF>.2851void execute(VPTransformState &State) override;28522853#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2854/// Print the recipe.2855void print(raw_ostream &O, const Twine &Indent,2856VPSlotTracker &SlotTracker) const override;2857#endif2858};28592860/// A recipe for converting the input value \p IV value to the corresponding2861/// value of an IV with different start and step values, using Start + IV *2862/// Step.2863class VPDerivedIVRecipe : public VPSingleDefRecipe {2864/// Kind of the induction.2865const InductionDescriptor::InductionKind Kind;2866/// If not nullptr, the floating point induction binary operator. Must be set2867/// for floating point inductions.2868const FPMathOperator *FPBinOp;28692870public:2871VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,2872VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)2873: VPDerivedIVRecipe(2874IndDesc.getKind(),2875dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),2876Start, CanonicalIV, Step) {}28772878VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,2879const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,2880VPValue *Step)2881: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),2882FPBinOp(FPBinOp) {}28832884~VPDerivedIVRecipe() override = default;28852886VPDerivedIVRecipe *clone() override {2887return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),2888getStepValue());2889}28902891VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)28922893/// Generate the transformed value of the induction at offset StartValue (1.2894/// operand) + IV (2. operand) * StepValue (3, operand).2895void execute(VPTransformState &State) override;28962897#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2898/// Print the recipe.2899void print(raw_ostream &O, const Twine &Indent,2900VPSlotTracker &SlotTracker) const override;2901#endif29022903Type *getScalarType() const {2904return getStartValue()->getLiveInIRValue()->getType();2905}29062907VPValue *getStartValue() const { return getOperand(0); }2908VPValue *getStepValue() const { return getOperand(2); }29092910/// Returns true if the recipe only uses the first lane of operand \p Op.2911bool onlyFirstLaneUsed(const VPValue *Op) const override {2912assert(is_contained(operands(), Op) &&2913"Op must be an operand of the recipe");2914return true;2915}2916};29172918/// A recipe for handling phi nodes of integer and floating-point inductions,2919/// producing their scalar values.2920class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {2921Instruction::BinaryOps InductionOpcode;29222923public:2924VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step,2925Instruction::BinaryOps Opcode, FastMathFlags FMFs)2926: VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,2927ArrayRef<VPValue *>({IV, Step}), FMFs),2928InductionOpcode(Opcode) {}29292930VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,2931VPValue *Step)2932: VPScalarIVStepsRecipe(2933IV, Step, IndDesc.getInductionOpcode(),2934dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())2935? IndDesc.getInductionBinOp()->getFastMathFlags()2936: FastMathFlags()) {}29372938~VPScalarIVStepsRecipe() override = default;29392940VPScalarIVStepsRecipe *clone() override {2941return new VPScalarIVStepsRecipe(2942getOperand(0), getOperand(1), InductionOpcode,2943hasFastMathFlags() ? getFastMathFlags() : FastMathFlags());2944}29452946VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)29472948/// Generate the scalarized versions of the phi node as needed by their users.2949void execute(VPTransformState &State) override;29502951#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2952/// Print the recipe.2953void print(raw_ostream &O, const Twine &Indent,2954VPSlotTracker &SlotTracker) const override;2955#endif29562957VPValue *getStepValue() const { return getOperand(1); }29582959/// Returns true if the recipe only uses the first lane of operand \p Op.2960bool onlyFirstLaneUsed(const VPValue *Op) const override {2961assert(is_contained(operands(), Op) &&2962"Op must be an operand of the recipe");2963return true;2964}2965};29662967/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It2968/// holds a sequence of zero or more VPRecipe's each representing a sequence of2969/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.2970class VPBasicBlock : public VPBlockBase {2971public:2972using RecipeListTy = iplist<VPRecipeBase>;29732974protected:2975/// The VPRecipes held in the order of output instructions to generate.2976RecipeListTy Recipes;29772978VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")2979: VPBlockBase(BlockSC, Name.str()) {}29802981public:2982VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)2983: VPBlockBase(VPBasicBlockSC, Name.str()) {2984if (Recipe)2985appendRecipe(Recipe);2986}29872988~VPBasicBlock() override {2989while (!Recipes.empty())2990Recipes.pop_back();2991}29922993/// Instruction iterators...2994using iterator = RecipeListTy::iterator;2995using const_iterator = RecipeListTy::const_iterator;2996using reverse_iterator = RecipeListTy::reverse_iterator;2997using const_reverse_iterator = RecipeListTy::const_reverse_iterator;29982999//===--------------------------------------------------------------------===//3000/// Recipe iterator methods3001///3002inline iterator begin() { return Recipes.begin(); }3003inline const_iterator begin() const { return Recipes.begin(); }3004inline iterator end() { return Recipes.end(); }3005inline const_iterator end() const { return Recipes.end(); }30063007inline reverse_iterator rbegin() { return Recipes.rbegin(); }3008inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }3009inline reverse_iterator rend() { return Recipes.rend(); }3010inline const_reverse_iterator rend() const { return Recipes.rend(); }30113012inline size_t size() const { return Recipes.size(); }3013inline bool empty() const { return Recipes.empty(); }3014inline const VPRecipeBase &front() const { return Recipes.front(); }3015inline VPRecipeBase &front() { return Recipes.front(); }3016inline const VPRecipeBase &back() const { return Recipes.back(); }3017inline VPRecipeBase &back() { return Recipes.back(); }30183019/// Returns a reference to the list of recipes.3020RecipeListTy &getRecipeList() { return Recipes; }30213022/// Returns a pointer to a member of the recipe list.3023static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {3024return &VPBasicBlock::Recipes;3025}30263027/// Method to support type inquiry through isa, cast, and dyn_cast.3028static inline bool classof(const VPBlockBase *V) {3029return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||3030V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;3031}30323033void insert(VPRecipeBase *Recipe, iterator InsertPt) {3034assert(Recipe && "No recipe to append.");3035assert(!Recipe->Parent && "Recipe already in VPlan");3036Recipe->Parent = this;3037Recipes.insert(InsertPt, Recipe);3038}30393040/// Augment the existing recipes of a VPBasicBlock with an additional3041/// \p Recipe as the last recipe.3042void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }30433044/// The method which generates the output IR instructions that correspond to3045/// this VPBasicBlock, thereby "executing" the VPlan.3046void execute(VPTransformState *State) override;30473048/// Return the cost of this VPBasicBlock.3049InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;30503051/// Return the position of the first non-phi node recipe in the block.3052iterator getFirstNonPhi();30533054/// Returns an iterator range over the PHI-like recipes in the block.3055iterator_range<iterator> phis() {3056return make_range(begin(), getFirstNonPhi());3057}30583059void dropAllReferences(VPValue *NewValue) override;30603061/// Split current block at \p SplitAt by inserting a new block between the3062/// current block and its successors and moving all recipes starting at3063/// SplitAt to the new block. Returns the new block.3064VPBasicBlock *splitAt(iterator SplitAt);30653066VPRegionBlock *getEnclosingLoopRegion();30673068#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)3069/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p3070/// SlotTracker is used to print unnamed VPValue's using consequtive numbers.3071///3072/// Note that the numbering is applied to the whole VPlan, so printing3073/// individual blocks is consistent with the whole VPlan printing.3074void print(raw_ostream &O, const Twine &Indent,3075VPSlotTracker &SlotTracker) const override;3076using VPBlockBase::print; // Get the print(raw_stream &O) version.3077#endif30783079/// If the block has multiple successors, return the branch recipe terminating3080/// the block. If there are no or only a single successor, return nullptr;3081VPRecipeBase *getTerminator();3082const VPRecipeBase *getTerminator() const;30833084/// Returns true if the block is exiting it's parent region.3085bool isExiting() const;30863087/// Clone the current block and it's recipes, without updating the operands of3088/// the cloned recipes.3089VPBasicBlock *clone() override {3090auto *NewBlock = new VPBasicBlock(getName());3091for (VPRecipeBase &R : *this)3092NewBlock->appendRecipe(R.clone());3093return NewBlock;3094}30953096protected:3097/// Execute the recipes in the IR basic block \p BB.3098void executeRecipes(VPTransformState *State, BasicBlock *BB);30993100private:3101/// Create an IR BasicBlock to hold the output instructions generated by this3102/// VPBasicBlock, and return it. Update the CFGState accordingly.3103BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);3104};31053106/// A special type of VPBasicBlock that wraps an existing IR basic block.3107/// Recipes of the block get added before the first non-phi instruction in the3108/// wrapped block.3109/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's3110/// preheader block.3111class VPIRBasicBlock : public VPBasicBlock {3112BasicBlock *IRBB;31133114public:3115VPIRBasicBlock(BasicBlock *IRBB)3116: VPBasicBlock(VPIRBasicBlockSC,3117(Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),3118IRBB(IRBB) {}31193120~VPIRBasicBlock() override {}31213122static inline bool classof(const VPBlockBase *V) {3123return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;3124}31253126/// The method which generates the output IR instructions that correspond to3127/// this VPBasicBlock, thereby "executing" the VPlan.3128void execute(VPTransformState *State) override;31293130VPIRBasicBlock *clone() override {3131auto *NewBlock = new VPIRBasicBlock(IRBB);3132for (VPRecipeBase &R : Recipes)3133NewBlock->appendRecipe(R.clone());3134return NewBlock;3135}31363137BasicBlock *getIRBasicBlock() const { return IRBB; }3138};31393140/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks3141/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.3142/// A VPRegionBlock may indicate that its contents are to be replicated several3143/// times. This is designed to support predicated scalarization, in which a3144/// scalar if-then code structure needs to be generated VF * UF times. Having3145/// this replication indicator helps to keep a single model for multiple3146/// candidate VF's. The actual replication takes place only once the desired VF3147/// and UF have been determined.3148class VPRegionBlock : public VPBlockBase {3149/// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.3150VPBlockBase *Entry;31513152/// Hold the Single Exiting block of the SESE region modelled by the3153/// VPRegionBlock.3154VPBlockBase *Exiting;31553156/// An indicator whether this region is to generate multiple replicated3157/// instances of output IR corresponding to its VPBlockBases.3158bool IsReplicator;31593160public:3161VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,3162const std::string &Name = "", bool IsReplicator = false)3163: VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),3164IsReplicator(IsReplicator) {3165assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");3166assert(Exiting->getSuccessors().empty() && "Exit block has successors.");3167Entry->setParent(this);3168Exiting->setParent(this);3169}3170VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)3171: VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),3172IsReplicator(IsReplicator) {}31733174~VPRegionBlock() override {3175if (Entry) {3176VPValue DummyValue;3177Entry->dropAllReferences(&DummyValue);3178deleteCFG(Entry);3179}3180}31813182/// Method to support type inquiry through isa, cast, and dyn_cast.3183static inline bool classof(const VPBlockBase *V) {3184return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;3185}31863187const VPBlockBase *getEntry() const { return Entry; }3188VPBlockBase *getEntry() { return Entry; }31893190/// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p3191/// EntryBlock must have no predecessors.3192void setEntry(VPBlockBase *EntryBlock) {3193assert(EntryBlock->getPredecessors().empty() &&3194"Entry block cannot have predecessors.");3195Entry = EntryBlock;3196EntryBlock->setParent(this);3197}31983199const VPBlockBase *getExiting() const { return Exiting; }3200VPBlockBase *getExiting() { return Exiting; }32013202/// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p3203/// ExitingBlock must have no successors.3204void setExiting(VPBlockBase *ExitingBlock) {3205assert(ExitingBlock->getSuccessors().empty() &&3206"Exit block cannot have successors.");3207Exiting = ExitingBlock;3208ExitingBlock->setParent(this);3209}32103211/// Returns the pre-header VPBasicBlock of the loop region.3212VPBasicBlock *getPreheaderVPBB() {3213assert(!isReplicator() && "should only get pre-header of loop regions");3214return getSinglePredecessor()->getExitingBasicBlock();3215}32163217/// An indicator whether this region is to generate multiple replicated3218/// instances of output IR corresponding to its VPBlockBases.3219bool isReplicator() const { return IsReplicator; }32203221/// The method which generates the output IR instructions that correspond to3222/// this VPRegionBlock, thereby "executing" the VPlan.3223void execute(VPTransformState *State) override;32243225// Return the cost of this region.3226InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;32273228void dropAllReferences(VPValue *NewValue) override;32293230#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)3231/// Print this VPRegionBlock to \p O (recursively), prefixing all lines with3232/// \p Indent. \p SlotTracker is used to print unnamed VPValue's using3233/// consequtive numbers.3234///3235/// Note that the numbering is applied to the whole VPlan, so printing3236/// individual regions is consistent with the whole VPlan printing.3237void print(raw_ostream &O, const Twine &Indent,3238VPSlotTracker &SlotTracker) const override;3239using VPBlockBase::print; // Get the print(raw_stream &O) version.3240#endif32413242/// Clone all blocks in the single-entry single-exit region of the block and3243/// their recipes without updating the operands of the cloned recipes.3244VPRegionBlock *clone() override;3245};32463247/// VPlan models a candidate for vectorization, encoding various decisions take3248/// to produce efficient output IR, including which branches, basic-blocks and3249/// output IR instructions to generate, and their cost. VPlan holds a3250/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry3251/// VPBasicBlock.3252class VPlan {3253friend class VPlanPrinter;3254friend class VPSlotTracker;32553256/// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the3257/// preheader of the vector loop.3258VPBasicBlock *Entry;32593260/// VPBasicBlock corresponding to the original preheader. Used to place3261/// VPExpandSCEV recipes for expressions used during skeleton creation and the3262/// rest of VPlan execution.3263VPBasicBlock *Preheader;32643265/// Holds the VFs applicable to this VPlan.3266SmallSetVector<ElementCount, 2> VFs;32673268/// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for3269/// any UF.3270SmallSetVector<unsigned, 2> UFs;32713272/// Holds the name of the VPlan, for printing.3273std::string Name;32743275/// Represents the trip count of the original loop, for folding3276/// the tail.3277VPValue *TripCount = nullptr;32783279/// Represents the backedge taken count of the original loop, for folding3280/// the tail. It equals TripCount - 1.3281VPValue *BackedgeTakenCount = nullptr;32823283/// Represents the vector trip count.3284VPValue VectorTripCount;32853286/// Represents the loop-invariant VF * UF of the vector loop region.3287VPValue VFxUF;32883289/// Holds a mapping between Values and their corresponding VPValue inside3290/// VPlan.3291Value2VPValueTy Value2VPValue;32923293/// Contains all the external definitions created for this VPlan. External3294/// definitions are VPValues that hold a pointer to their underlying IR.3295SmallVector<VPValue *, 16> VPLiveInsToFree;32963297/// Values used outside the plan. It contains live-outs that need fixing. Any3298/// live-out that is fixed outside VPlan needs to be removed. The remaining3299/// live-outs are fixed via VPLiveOut::fixPhi.3300MapVector<PHINode *, VPLiveOut *> LiveOuts;33013302/// Mapping from SCEVs to the VPValues representing their expansions.3303/// NOTE: This mapping is temporary and will be removed once all users have3304/// been modeled in VPlan directly.3305DenseMap<const SCEV *, VPValue *> SCEVToExpansion;33063307public:3308/// Construct a VPlan with original preheader \p Preheader, trip count \p TC3309/// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to3310/// be disconnected, as the bypass blocks between them are not yet modeled in3311/// VPlan.3312VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)3313: VPlan(Preheader, Entry) {3314TripCount = TC;3315}33163317/// Construct a VPlan with original preheader \p Preheader and \p Entry to3318/// the plan. At the moment, \p Preheader and \p Entry need to be3319/// disconnected, as the bypass blocks between them are not yet modeled in3320/// VPlan.3321VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)3322: Entry(Entry), Preheader(Preheader) {3323Entry->setPlan(this);3324Preheader->setPlan(this);3325assert(Preheader->getNumSuccessors() == 0 &&3326Preheader->getNumPredecessors() == 0 &&3327"preheader must be disconnected");3328}33293330~VPlan();33313332/// Create initial VPlan, having an "entry" VPBasicBlock (wrapping3333/// original scalar pre-header ) which contains SCEV expansions that need3334/// to happen before the CFG is modified; a VPBasicBlock for the vector3335/// pre-header, followed by a region for the vector loop, followed by the3336/// middle VPBasicBlock. If a check is needed to guard executing the scalar3337/// epilogue loop, it will be added to the middle block, together with3338/// VPBasicBlocks for the scalar preheader and exit blocks.3339static VPlanPtr createInitialVPlan(const SCEV *TripCount,3340ScalarEvolution &PSE,3341bool RequiresScalarEpilogueCheck,3342bool TailFolded, Loop *TheLoop);33433344/// Prepare the plan for execution, setting up the required live-in values.3345void prepareToExecute(Value *TripCount, Value *VectorTripCount,3346Value *CanonicalIVStartValue, VPTransformState &State);33473348/// Generate the IR code for this VPlan.3349void execute(VPTransformState *State);33503351/// Return the cost of this plan.3352InstructionCost cost(ElementCount VF, VPCostContext &Ctx);33533354VPBasicBlock *getEntry() { return Entry; }3355const VPBasicBlock *getEntry() const { return Entry; }33563357/// The trip count of the original loop.3358VPValue *getTripCount() const {3359assert(TripCount && "trip count needs to be set before accessing it");3360return TripCount;3361}33623363/// Resets the trip count for the VPlan. The caller must make sure all uses of3364/// the original trip count have been replaced.3365void resetTripCount(VPValue *NewTripCount) {3366assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&3367"TripCount always must be set");3368TripCount = NewTripCount;3369}33703371/// The backedge taken count of the original loop.3372VPValue *getOrCreateBackedgeTakenCount() {3373if (!BackedgeTakenCount)3374BackedgeTakenCount = new VPValue();3375return BackedgeTakenCount;3376}33773378/// The vector trip count.3379VPValue &getVectorTripCount() { return VectorTripCount; }33803381/// Returns VF * UF of the vector loop region.3382VPValue &getVFxUF() { return VFxUF; }33833384void addVF(ElementCount VF) { VFs.insert(VF); }33853386void setVF(ElementCount VF) {3387assert(hasVF(VF) && "Cannot set VF not already in plan");3388VFs.clear();3389VFs.insert(VF);3390}33913392bool hasVF(ElementCount VF) { return VFs.count(VF); }3393bool hasScalableVF() {3394return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });3395}33963397/// Returns an iterator range over all VFs of the plan.3398iterator_range<SmallSetVector<ElementCount, 2>::iterator>3399vectorFactors() const {3400return {VFs.begin(), VFs.end()};3401}34023403bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }34043405bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }34063407void setUF(unsigned UF) {3408assert(hasUF(UF) && "Cannot set the UF not already in plan");3409UFs.clear();3410UFs.insert(UF);3411}34123413/// Return a string with the name of the plan and the applicable VFs and UFs.3414std::string getName() const;34153416void setName(const Twine &newName) { Name = newName.str(); }34173418/// Gets the live-in VPValue for \p V or adds a new live-in (if none exists3419/// yet) for \p V.3420VPValue *getOrAddLiveIn(Value *V) {3421assert(V && "Trying to get or add the VPValue of a null Value");3422if (!Value2VPValue.count(V)) {3423VPValue *VPV = new VPValue(V);3424VPLiveInsToFree.push_back(VPV);3425assert(VPV->isLiveIn() && "VPV must be a live-in.");3426assert(!Value2VPValue.count(V) && "Value already exists in VPlan");3427Value2VPValue[V] = VPV;3428}34293430assert(Value2VPValue.count(V) && "Value does not exist in VPlan");3431assert(Value2VPValue[V]->isLiveIn() &&3432"Only live-ins should be in mapping");3433return Value2VPValue[V];3434}34353436/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.3437VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }34383439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)3440/// Print the live-ins of this VPlan to \p O.3441void printLiveIns(raw_ostream &O) const;34423443/// Print this VPlan to \p O.3444void print(raw_ostream &O) const;34453446/// Print this VPlan in DOT format to \p O.3447void printDOT(raw_ostream &O) const;34483449/// Dump the plan to stderr (for debugging).3450LLVM_DUMP_METHOD void dump() const;3451#endif34523453/// Returns the VPRegionBlock of the vector loop.3454VPRegionBlock *getVectorLoopRegion() {3455return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());3456}3457const VPRegionBlock *getVectorLoopRegion() const {3458return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());3459}34603461/// Returns the canonical induction recipe of the vector loop.3462VPCanonicalIVPHIRecipe *getCanonicalIV() {3463VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();3464if (EntryVPBB->empty()) {3465// VPlan native path.3466EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());3467}3468return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());3469}34703471void addLiveOut(PHINode *PN, VPValue *V);34723473void removeLiveOut(PHINode *PN) {3474delete LiveOuts[PN];3475LiveOuts.erase(PN);3476}34773478const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {3479return LiveOuts;3480}34813482VPValue *getSCEVExpansion(const SCEV *S) const {3483return SCEVToExpansion.lookup(S);3484}34853486void addSCEVExpansion(const SCEV *S, VPValue *V) {3487assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");3488SCEVToExpansion[S] = V;3489}34903491/// \return The block corresponding to the original preheader.3492VPBasicBlock *getPreheader() { return Preheader; }3493const VPBasicBlock *getPreheader() const { return Preheader; }34943495/// Clone the current VPlan, update all VPValues of the new VPlan and cloned3496/// recipes to refer to the clones, and return it.3497VPlan *duplicate();3498};34993500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)3501/// VPlanPrinter prints a given VPlan to a given output stream. The printing is3502/// indented and follows the dot format.3503class VPlanPrinter {3504raw_ostream &OS;3505const VPlan &Plan;3506unsigned Depth = 0;3507unsigned TabWidth = 2;3508std::string Indent;3509unsigned BID = 0;3510SmallDenseMap<const VPBlockBase *, unsigned> BlockID;35113512VPSlotTracker SlotTracker;35133514/// Handle indentation.3515void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }35163517/// Print a given \p Block of the Plan.3518void dumpBlock(const VPBlockBase *Block);35193520/// Print the information related to the CFG edges going out of a given3521/// \p Block, followed by printing the successor blocks themselves.3522void dumpEdges(const VPBlockBase *Block);35233524/// Print a given \p BasicBlock, including its VPRecipes, followed by printing3525/// its successor blocks.3526void dumpBasicBlock(const VPBasicBlock *BasicBlock);35273528/// Print a given \p Region of the Plan.3529void dumpRegion(const VPRegionBlock *Region);35303531unsigned getOrCreateBID(const VPBlockBase *Block) {3532return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;3533}35343535Twine getOrCreateName(const VPBlockBase *Block);35363537Twine getUID(const VPBlockBase *Block);35383539/// Print the information related to a CFG edge between two VPBlockBases.3540void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,3541const Twine &Label);35423543public:3544VPlanPrinter(raw_ostream &O, const VPlan &P)3545: OS(O), Plan(P), SlotTracker(&P) {}35463547LLVM_DUMP_METHOD void dump();3548};35493550struct VPlanIngredient {3551const Value *V;35523553VPlanIngredient(const Value *V) : V(V) {}35543555void print(raw_ostream &O) const;3556};35573558inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {3559I.print(OS);3560return OS;3561}35623563inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {3564Plan.print(OS);3565return OS;3566}3567#endif35683569//===----------------------------------------------------------------------===//3570// VPlan Utilities3571//===----------------------------------------------------------------------===//35723573/// Class that provides utilities for VPBlockBases in VPlan.3574class VPBlockUtils {3575public:3576VPBlockUtils() = delete;35773578/// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p3579/// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p3580/// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's3581/// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must3582/// have neither successors nor predecessors.3583static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {3584assert(NewBlock->getSuccessors().empty() &&3585NewBlock->getPredecessors().empty() &&3586"Can't insert new block with predecessors or successors.");3587NewBlock->setParent(BlockPtr->getParent());3588SmallVector<VPBlockBase *> Succs(BlockPtr->successors());3589for (VPBlockBase *Succ : Succs) {3590disconnectBlocks(BlockPtr, Succ);3591connectBlocks(NewBlock, Succ);3592}3593connectBlocks(BlockPtr, NewBlock);3594}35953596/// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p3597/// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p3598/// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr3599/// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors3600/// and \p IfTrue and \p IfFalse must have neither successors nor3601/// predecessors.3602static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,3603VPBlockBase *BlockPtr) {3604assert(IfTrue->getSuccessors().empty() &&3605"Can't insert IfTrue with successors.");3606assert(IfFalse->getSuccessors().empty() &&3607"Can't insert IfFalse with successors.");3608BlockPtr->setTwoSuccessors(IfTrue, IfFalse);3609IfTrue->setPredecessors({BlockPtr});3610IfFalse->setPredecessors({BlockPtr});3611IfTrue->setParent(BlockPtr->getParent());3612IfFalse->setParent(BlockPtr->getParent());3613}36143615/// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to3616/// the successors of \p From and \p From to the predecessors of \p To. Both3617/// VPBlockBases must have the same parent, which can be null. Both3618/// VPBlockBases can be already connected to other VPBlockBases.3619static void connectBlocks(VPBlockBase *From, VPBlockBase *To) {3620assert((From->getParent() == To->getParent()) &&3621"Can't connect two block with different parents");3622assert(From->getNumSuccessors() < 2 &&3623"Blocks can't have more than two successors.");3624From->appendSuccessor(To);3625To->appendPredecessor(From);3626}36273628/// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To3629/// from the successors of \p From and \p From from the predecessors of \p To.3630static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) {3631assert(To && "Successor to disconnect is null.");3632From->removeSuccessor(To);3633To->removePredecessor(From);3634}36353636/// Return an iterator range over \p Range which only includes \p BlockTy3637/// blocks. The accesses are casted to \p BlockTy.3638template <typename BlockTy, typename T>3639static auto blocksOnly(const T &Range) {3640// Create BaseTy with correct const-ness based on BlockTy.3641using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,3642const VPBlockBase, VPBlockBase>;36433644// We need to first create an iterator range over (const) BlocktTy & instead3645// of (const) BlockTy * for filter_range to work properly.3646auto Mapped =3647map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });3648auto Filter = make_filter_range(3649Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });3650return map_range(Filter, [](BaseTy &Block) -> BlockTy * {3651return cast<BlockTy>(&Block);3652});3653}3654};36553656class VPInterleavedAccessInfo {3657DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>3658InterleaveGroupMap;36593660/// Type for mapping of instruction based interleave groups to VPInstruction3661/// interleave groups3662using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,3663InterleaveGroup<VPInstruction> *>;36643665/// Recursively \p Region and populate VPlan based interleave groups based on3666/// \p IAI.3667void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,3668InterleavedAccessInfo &IAI);3669/// Recursively traverse \p Block and populate VPlan based interleave groups3670/// based on \p IAI.3671void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,3672InterleavedAccessInfo &IAI);36733674public:3675VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);36763677~VPInterleavedAccessInfo() {3678SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;3679// Avoid releasing a pointer twice.3680for (auto &I : InterleaveGroupMap)3681DelSet.insert(I.second);3682for (auto *Ptr : DelSet)3683delete Ptr;3684}36853686/// Get the interleave group that \p Instr belongs to.3687///3688/// \returns nullptr if doesn't have such group.3689InterleaveGroup<VPInstruction> *3690getInterleaveGroup(VPInstruction *Instr) const {3691return InterleaveGroupMap.lookup(Instr);3692}3693};36943695/// Class that maps (parts of) an existing VPlan to trees of combined3696/// VPInstructions.3697class VPlanSlp {3698enum class OpMode { Failed, Load, Opcode };36993700/// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as3701/// DenseMap keys.3702struct BundleDenseMapInfo {3703static SmallVector<VPValue *, 4> getEmptyKey() {3704return {reinterpret_cast<VPValue *>(-1)};3705}37063707static SmallVector<VPValue *, 4> getTombstoneKey() {3708return {reinterpret_cast<VPValue *>(-2)};3709}37103711static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {3712return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));3713}37143715static bool isEqual(const SmallVector<VPValue *, 4> &LHS,3716const SmallVector<VPValue *, 4> &RHS) {3717return LHS == RHS;3718}3719};37203721/// Mapping of values in the original VPlan to a combined VPInstruction.3722DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>3723BundleToCombined;37243725VPInterleavedAccessInfo &IAI;37263727/// Basic block to operate on. For now, only instructions in a single BB are3728/// considered.3729const VPBasicBlock &BB;37303731/// Indicates whether we managed to combine all visited instructions or not.3732bool CompletelySLP = true;37333734/// Width of the widest combined bundle in bits.3735unsigned WidestBundleBits = 0;37363737using MultiNodeOpTy =3738typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;37393740// Input operand bundles for the current multi node. Each multi node operand3741// bundle contains values not matching the multi node's opcode. They will3742// be reordered in reorderMultiNodeOps, once we completed building a3743// multi node.3744SmallVector<MultiNodeOpTy, 4> MultiNodeOps;37453746/// Indicates whether we are building a multi node currently.3747bool MultiNodeActive = false;37483749/// Check if we can vectorize Operands together.3750bool areVectorizable(ArrayRef<VPValue *> Operands) const;37513752/// Add combined instruction \p New for the bundle \p Operands.3753void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);37543755/// Indicate we hit a bundle we failed to combine. Returns nullptr for now.3756VPInstruction *markFailed();37573758/// Reorder operands in the multi node to maximize sequential memory access3759/// and commutative operations.3760SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();37613762/// Choose the best candidate to use for the lane after \p Last. The set of3763/// candidates to choose from are values with an opcode matching \p Last's3764/// or loads consecutive to \p Last.3765std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,3766SmallPtrSetImpl<VPValue *> &Candidates,3767VPInterleavedAccessInfo &IAI);37683769#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)3770/// Print bundle \p Values to dbgs().3771void dumpBundle(ArrayRef<VPValue *> Values);3772#endif37733774public:3775VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}37763777~VPlanSlp() = default;37783779/// Tries to build an SLP tree rooted at \p Operands and returns a3780/// VPInstruction combining \p Operands, if they can be combined.3781VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);37823783/// Return the width of the widest combined bundle in bits.3784unsigned getWidestBundleBits() const { return WidestBundleBits; }37853786/// Return true if all visited instruction can be combined.3787bool isCompletelySLP() const { return CompletelySLP; }3788};37893790namespace vputils {37913792/// Returns true if only the first lane of \p Def is used.3793bool onlyFirstLaneUsed(const VPValue *Def);37943795/// Returns true if only the first part of \p Def is used.3796bool onlyFirstPartUsed(const VPValue *Def);37973798/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p3799/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in3800/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's3801/// pre-header already contains a recipe expanding \p Expr, return it. If not,3802/// create a new one.3803VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,3804ScalarEvolution &SE);38053806/// Returns true if \p VPV is uniform after vectorization.3807inline bool isUniformAfterVectorization(VPValue *VPV) {3808// A value defined outside the vector region must be uniform after3809// vectorization inside a vector region.3810if (VPV->isDefinedOutsideVectorRegions())3811return true;3812VPRecipeBase *Def = VPV->getDefiningRecipe();3813assert(Def && "Must have definition for value defined inside vector region");3814if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))3815return Rep->isUniform();3816if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))3817return all_of(GEP->operands(), isUniformAfterVectorization);3818if (auto *VPI = dyn_cast<VPInstruction>(Def))3819return VPI->isSingleScalar() || VPI->isVectorToScalar();3820return false;3821}38223823/// Return true if \p V is a header mask in \p Plan.3824bool isHeaderMask(VPValue *V, VPlan &Plan);3825} // end namespace vputils38263827} // end namespace llvm38283829#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H383038313832